2. Data Acquisition and Resilience
The foundation of any robust model is a reliable data source.
Following the unexpected shutdown of advanced data feeds on FBref in
January 2026, this project successfully pivoted its data infrastructure
to the American Soccer Analysis (ASA) API,
demonstrating data resilience and
adaptability—key traits for a modern data
scientist.
# --- 2.1 Data Extraction (ASA API - Final Verified Version) ---
library(itscalledsoccer)
library(tidyverse)
# 1. Initialize the ASA API client
asa_client <- AmericanSoccerAnalysis$new()
# 2. Extract 2025 MLS Match Data
mls_match_data_raw <- asa_client$get_game_xgoals(leagues = "mls")
## INFO: New data found. Clearing session cache and refreshing `AmericanSoccerAnalysis` class.
# 3. Filter for 2025 and Clean using the correct column names
mls_match_data <- mls_match_data_raw %>%
# Use the correct date column name
filter(grepl("2025", as.character(date_time_utc))) %>%
select(
Date = date_time_utc,
Home_ID = home_team_id,
Away_ID = away_team_id,
Home_Goals = home_goals,
Away_Goals = away_goals,
Home_xG = home_team_xgoals,
Away_xG = away_team_xgoals,
Home_xPts = home_xpoints,
Away_xPts = away_xpoints
)
# 4. Map Team IDs to Names (Optional but recommended for clarity)
# ASA usually provides a separate function for team names if needed:
# teams <- asa_client$get_teams(leagues = "mls")
# mls_match_data <- mls_match_data %>%
# left_join(teams %>% select(team_id, Home_Name = team_name), by = c("Home_ID" = "team_id"))
print(paste("Successfully extracted", nrow(mls_match_data), "matches with xPts!"))
## [1] "Successfully extracted 540 matches with xPts!"
head(mls_match_data)
# --- 2.2 Mapping Team IDs to Names ---
# 1. Pull the team list from ASA
teams_raw <- asa_client$get_teams(leagues = "mls")
# 2. Create a clean mapping table
team_map <- teams_raw %>%
select(team_id, team_name)
# 3. Join names to your match data
mls_match_data_final <- mls_match_data %>%
left_join(team_map, by = c("Home_ID" = "team_id")) %>%
rename(Home_Team = team_name) %>%
left_join(team_map, by = c("Away_ID" = "team_id")) %>%
rename(Away_Team = team_name)
# 4. Aggregate the Total 2025 xPts per Team
# This is the '60%' foundation of your 2026 prediction!
total_xpts_2025 <- mls_match_data_final %>%
pivot_longer(cols = c(Home_Team, Away_Team), names_to = "Location", values_to = "Team") %>%
mutate(
xPts_Earned = case_when(
Location == "Home_Team" ~ Home_xPts,
Location == "Away_Team" ~ Away_xPts
)
) %>%
group_by(Team) %>%
summarise(
Total_xPts_2025 = sum(xPts_Earned, na.rm = TRUE),
.groups = 'drop'
)
print("Total xPts for 2025 calculated successfully!")
## [1] "Total xPts for 2025 calculated successfully!"
head(total_xpts_2025 %>% arrange(desc(Total_xPts_2025)))
# --- 4. Final 2026 Prediction Model ---
# 1. Create a table with your Verified 2025 Actual Points
# (I've put the top teams here as an example; add the rest of your verified list!)
actual_standings_2025 <- tibble(
Team = c("Philadelphia Union", "FC Cincinnati", "Inter Miami CF", "San Diego FC", "Vancouver Whitecaps FC", "Los Angeles FC"),
Actual_Points = c(66, 65, 65, 63, 63, 60),
Actual_Rank = c(1, 2, 3, 1, 2, 3) # Note: San Diego/Vancouver/LAFC are West
)
# 2. Add your Qualitative Adjustments (Roster and Schedule)
# Scale: -5 to +5 for Roster, -2 to +2 for Schedule
qualitative_adjustments <- tibble(
Team = c("Philadelphia Union", "FC Cincinnati", "Inter Miami CF", "San Diego FC", "Vancouver Whitecaps FC", "Los Angeles FC"),
Roster_Adj = c(-1, 0, 4, 1, 2, 1), # e.g., Miami +4 for roster strength
SoS_Adj = c(0, 0, 1, 0, 0, -1) # e.g., LAFC -1 for hard schedule
)
# 3. Combine and Calculate 2026 Predicted Points
final_2026_prediction <- total_xpts_2025 %>%
inner_join(actual_standings_2025, by = "Team") %>%
inner_join(qualitative_adjustments, by = "Team") %>%
mutate(
xPts_Delta = Actual_Points - Total_xPts_2025,
# The 60/30/10 Formula:
# 60% weight on xPts, 30% on Actual, plus Roster (x2) and SoS (x1) points
Predicted_Points_2026 = (Total_xPts_2025 * 0.6) +
(Actual_Points * 0.3) +
(Roster_Adj * 2) +
(SoS_Adj * 1)
) %>%
arrange(desc(Predicted_Points_2026)) %>%
mutate(Predicted_Rank_2026 = row_number())
# 4. View your Portfolio-Ready Table
final_2026_prediction %>%
select(Team, Actual_Points, Total_xPts_2025, xPts_Delta, Predicted_Points_2026, Predicted_Rank_2026) %>%
gt() %>%
tab_header(title = "MLS 2026: Predicted Performance Model") %>%
fmt_number(columns = c(Total_xPts_2025, xPts_Delta, Predicted_Points_2026), decimals = 2)
| MLS 2026: Predicted Performance Model |
| Team |
Actual_Points |
Total_xPts_2025 |
xPts_Delta |
Predicted_Points_2026 |
Predicted_Rank_2026 |
| Inter Miami CF |
65 |
57.60 |
7.40 |
63.06 |
1 |
| Vancouver Whitecaps FC |
63 |
61.05 |
1.95 |
59.53 |
2 |
| Los Angeles FC |
60 |
63.59 |
−3.59 |
57.15 |
3 |
| Philadelphia Union |
66 |
60.04 |
5.96 |
53.83 |
4 |
| San Diego FC |
63 |
52.67 |
10.33 |
52.50 |
5 |
| FC Cincinnati |
65 |
42.48 |
22.52 |
44.99 |
6 |
# --- FINAL 30-TEAM INTEGRATION ---
# 1. Get the full team list from ASA for all 30 teams
all_teams <- asa_client$get_teams(leagues = "mls") %>%
select(Team = team_name)
# 2. Your Verified 2025 Actual Standings (Full 30 Teams)
# I have populated this based on the official standings you provided
full_actual_2025 <- tibble(
Team = c("Philadelphia Union", "FC Cincinnati", "Inter Miami CF", "Charlotte FC", "New York City FC",
"Nashville SC", "Columbus Crew", "Chicago Fire FC", "Orlando City SC", "New York Red Bulls",
"New England Revolution", "Toronto FC", "CF Montréal", "Atlanta United FC", "D.C. United",
"San Diego FC", "Vancouver Whitecaps FC", "Los Angeles FC", "Minnesota United FC", "Seattle Sounders FC",
"Austin FC", "FC Dallas", "Portland Timbers", "Real Salt Lake", "San Jose Earthquakes",
"Colorado Rapids", "Houston Dynamo FC", "St. Louis City SC", "LA Galaxy", "Sporting Kansas City"),
Actual_Points = c(66, 65, 65, 59, 56, 54, 54, 53, 53, 43, 36, 32, 28, 28, 26,
63, 63, 60, 58, 55, 47, 44, 44, 41, 41, 41, 37, 32, 30, 28),
Conference = c(rep("East", 15), rep("West", 15))
)
# 3. Apply the 2026 Model to the Full League
# Note: I'm setting Roster/SoS to 0 for teams we haven't 'scouted' yet;
# you can manually update these in your script!
final_30_team_prediction <- total_xpts_2025 %>%
inner_join(full_actual_2025, by = "Team") %>%
mutate(
xPts_Delta = Actual_Points - Total_xPts_2025,
# Standard prediction baseline (can be adjusted with Roster/SoS)
Predicted_Points_2026 = (Total_xPts_2025 * 0.6) + (Actual_Points * 0.4)
) %>%
group_by(Conference) %>%
arrange(desc(Predicted_Points_2026)) %>%
mutate(Predicted_Rank_2026 = row_number()) %>%
ungroup()
# 4. Generate the Final Master Table
final_30_team_prediction %>%
select(Conference, Predicted_Rank_2026, Team, Actual_Points, Total_xPts_2025, xPts_Delta, Predicted_Points_2026) %>%
arrange(Conference, Predicted_Rank_2026) %>%
gt() %>%
tab_header(title = "MLS 2026: Complete League Projections",
subtitle = "Based on 2025 xPts Performance & Historical Regression") %>%
fmt_number(columns = c(Total_xPts_2025, xPts_Delta, Predicted_Points_2026), decimals = 2)
| MLS 2026: Complete League Projections |
| Based on 2025 xPts Performance & Historical Regression |
| Conference |
Predicted_Rank_2026 |
Team |
Actual_Points |
Total_xPts_2025 |
xPts_Delta |
Predicted_Points_2026 |
| East |
1 |
Philadelphia Union |
66 |
60.04 |
5.96 |
62.43 |
| East |
2 |
Inter Miami CF |
65 |
57.60 |
7.40 |
60.56 |
| East |
3 |
Nashville SC |
54 |
59.53 |
−5.53 |
57.32 |
| East |
4 |
Orlando City SC |
53 |
57.91 |
−4.91 |
55.94 |
| East |
5 |
Columbus Crew |
54 |
51.43 |
2.57 |
52.46 |
| East |
6 |
Chicago Fire FC |
53 |
50.71 |
2.29 |
51.63 |
| East |
7 |
New York City FC |
56 |
48.54 |
7.46 |
51.53 |
| East |
8 |
FC Cincinnati |
65 |
42.48 |
22.52 |
51.49 |
| East |
9 |
Charlotte FC |
59 |
43.02 |
15.98 |
49.41 |
| East |
10 |
New York Red Bulls |
43 |
43.00 |
0.00 |
43.00 |
| East |
11 |
New England Revolution |
36 |
37.92 |
−1.92 |
37.15 |
| East |
12 |
Atlanta United FC |
28 |
43.15 |
−15.15 |
37.09 |
| East |
13 |
Toronto FC |
32 |
36.91 |
−4.91 |
34.95 |
| East |
14 |
CF Montréal |
28 |
39.48 |
−11.48 |
34.89 |
| East |
15 |
D.C. United |
26 |
40.49 |
−14.49 |
34.69 |
| West |
1 |
Los Angeles FC |
60 |
63.59 |
−3.59 |
62.15 |
| West |
2 |
Vancouver Whitecaps FC |
63 |
61.05 |
1.95 |
61.83 |
| West |
3 |
San Diego FC |
63 |
52.67 |
10.33 |
56.80 |
| West |
4 |
Seattle Sounders FC |
55 |
54.91 |
0.09 |
54.95 |
| West |
5 |
Minnesota United FC |
58 |
48.71 |
9.29 |
52.43 |
| West |
6 |
San Jose Earthquakes |
41 |
53.30 |
−12.30 |
48.38 |
| West |
7 |
Austin FC |
47 |
42.89 |
4.11 |
44.53 |
| West |
8 |
FC Dallas |
44 |
43.34 |
0.66 |
43.60 |
| West |
9 |
Colorado Rapids |
41 |
44.85 |
−3.85 |
43.31 |
| West |
10 |
Real Salt Lake |
41 |
41.66 |
−0.66 |
41.40 |
| West |
11 |
Houston Dynamo FC |
37 |
44.14 |
−7.14 |
41.29 |
| West |
12 |
St. Louis City SC |
32 |
42.99 |
−10.99 |
38.59 |
| West |
13 |
LA Galaxy |
30 |
41.23 |
−11.23 |
36.74 |
| West |
14 |
Sporting Kansas City |
28 |
31.96 |
−3.96 |
30.38 |
# --- FINAL 30-TEAM FIX ---
# 1. Correct the name for Portland in your verified standings
full_actual_2025 <- full_actual_2025 %>%
mutate(Team = if_else(Team == "Portland Timbers", "Portland Timbers FC", Team))
# 2. Re-run the join and calculation
final_30_team_prediction <- total_xpts_2025 %>%
inner_join(full_actual_2025, by = "Team") %>%
mutate(
xPts_Delta = Actual_Points - Total_xPts_2025,
Predicted_Points_2026 = (Total_xPts_2025 * 0.6) + (Actual_Points * 0.4)
) %>%
group_by(Conference) %>%
arrange(desc(Predicted_Points_2026)) %>%
mutate(Predicted_Rank_2026 = row_number()) %>%
ungroup()
# 3. Verify you have 30 teams
print(paste("Final Team Count:", nrow(final_30_team_prediction)))
## [1] "Final Team Count: 30"
# 4. View the complete, professional table
final_30_team_prediction %>%
select(Conference, Predicted_Rank_2026, Team, Actual_Points, Total_xPts_2025, xPts_Delta, Predicted_Points_2026) %>%
arrange(Conference, Predicted_Rank_2026) %>%
gt() %>%
tab_header(title = "MLS 2026: Complete League Projections",
subtitle = "The 'inData' Model: Regression Analysis & Performance Baseline") %>%
fmt_number(columns = c(Total_xPts_2025, xPts_Delta, Predicted_Points_2026), decimals = 2)
| MLS 2026: Complete League Projections |
| The 'inData' Model: Regression Analysis & Performance Baseline |
| Conference |
Predicted_Rank_2026 |
Team |
Actual_Points |
Total_xPts_2025 |
xPts_Delta |
Predicted_Points_2026 |
| East |
1 |
Philadelphia Union |
66 |
60.04 |
5.96 |
62.43 |
| East |
2 |
Inter Miami CF |
65 |
57.60 |
7.40 |
60.56 |
| East |
3 |
Nashville SC |
54 |
59.53 |
−5.53 |
57.32 |
| East |
4 |
Orlando City SC |
53 |
57.91 |
−4.91 |
55.94 |
| East |
5 |
Columbus Crew |
54 |
51.43 |
2.57 |
52.46 |
| East |
6 |
Chicago Fire FC |
53 |
50.71 |
2.29 |
51.63 |
| East |
7 |
New York City FC |
56 |
48.54 |
7.46 |
51.53 |
| East |
8 |
FC Cincinnati |
65 |
42.48 |
22.52 |
51.49 |
| East |
9 |
Charlotte FC |
59 |
43.02 |
15.98 |
49.41 |
| East |
10 |
New York Red Bulls |
43 |
43.00 |
0.00 |
43.00 |
| East |
11 |
New England Revolution |
36 |
37.92 |
−1.92 |
37.15 |
| East |
12 |
Atlanta United FC |
28 |
43.15 |
−15.15 |
37.09 |
| East |
13 |
Toronto FC |
32 |
36.91 |
−4.91 |
34.95 |
| East |
14 |
CF Montréal |
28 |
39.48 |
−11.48 |
34.89 |
| East |
15 |
D.C. United |
26 |
40.49 |
−14.49 |
34.69 |
| West |
1 |
Los Angeles FC |
60 |
63.59 |
−3.59 |
62.15 |
| West |
2 |
Vancouver Whitecaps FC |
63 |
61.05 |
1.95 |
61.83 |
| West |
3 |
San Diego FC |
63 |
52.67 |
10.33 |
56.80 |
| West |
4 |
Seattle Sounders FC |
55 |
54.91 |
0.09 |
54.95 |
| West |
5 |
Minnesota United FC |
58 |
48.71 |
9.29 |
52.43 |
| West |
6 |
San Jose Earthquakes |
41 |
53.30 |
−12.30 |
48.38 |
| West |
7 |
Austin FC |
47 |
42.89 |
4.11 |
44.53 |
| West |
8 |
FC Dallas |
44 |
43.34 |
0.66 |
43.60 |
| West |
9 |
Colorado Rapids |
41 |
44.85 |
−3.85 |
43.31 |
| West |
10 |
Real Salt Lake |
41 |
41.66 |
−0.66 |
41.40 |
| West |
11 |
Houston Dynamo FC |
37 |
44.14 |
−7.14 |
41.29 |
| West |
12 |
Portland Timbers FC |
44 |
38.12 |
5.88 |
40.47 |
| West |
13 |
St. Louis City SC |
32 |
42.99 |
−10.99 |
38.59 |
| West |
14 |
LA Galaxy |
30 |
41.23 |
−11.23 |
36.74 |
| West |
15 |
Sporting Kansas City |
28 |
31.96 |
−3.96 |
30.38 |