1. Introduction: The ‘inData’ Philosophy

This report details the methodology and results of the @MLSinData 2026 MLS Regular Season prediction model. Our philosophy is simple: move beyond actual results and analyze underlying performance metrics to identify teams due for statistical regression (positive or negative). This approach is crucial for a professional sports analytics portfolio.

2. Data Acquisition and Resilience

The foundation of any robust model is a reliable data source. Following the unexpected shutdown of advanced data feeds on FBref in January 2026, this project successfully pivoted its data infrastructure to the American Soccer Analysis (ASA) API, demonstrating data resilience and adaptability—key traits for a modern data scientist.

# --- 2.1 Data Extraction (ASA API - Final Verified Version) ---
library(itscalledsoccer)
library(tidyverse)

# 1. Initialize the ASA API client
asa_client <- AmericanSoccerAnalysis$new()

# 2. Extract 2025 MLS Match Data
mls_match_data_raw <- asa_client$get_game_xgoals(leagues = "mls")
##   INFO: New data found. Clearing session cache and refreshing `AmericanSoccerAnalysis` class.
# 3. Filter for 2025 and Clean using the correct column names
mls_match_data <- mls_match_data_raw %>%
  # Use the correct date column name
  filter(grepl("2025", as.character(date_time_utc))) %>% 
  select(
    Date = date_time_utc,
    Home_ID = home_team_id,
    Away_ID = away_team_id,
    Home_Goals = home_goals,
    Away_Goals = away_goals,
    Home_xG = home_team_xgoals,
    Away_xG = away_team_xgoals,
    Home_xPts = home_xpoints,
    Away_xPts = away_xpoints
  )

# 4. Map Team IDs to Names (Optional but recommended for clarity)
# ASA usually provides a separate function for team names if needed:
# teams <- asa_client$get_teams(leagues = "mls")
# mls_match_data <- mls_match_data %>% 
#   left_join(teams %>% select(team_id, Home_Name = team_name), by = c("Home_ID" = "team_id"))

print(paste("Successfully extracted", nrow(mls_match_data), "matches with xPts!"))
## [1] "Successfully extracted 540 matches with xPts!"
head(mls_match_data)
# --- 2.2 Mapping Team IDs to Names ---

# 1. Pull the team list from ASA
teams_raw <- asa_client$get_teams(leagues = "mls")

# 2. Create a clean mapping table
team_map <- teams_raw %>%
  select(team_id, team_name)

# 3. Join names to your match data
mls_match_data_final <- mls_match_data %>%
  left_join(team_map, by = c("Home_ID" = "team_id")) %>%
  rename(Home_Team = team_name) %>%
  left_join(team_map, by = c("Away_ID" = "team_id")) %>%
  rename(Away_Team = team_name)

# 4. Aggregate the Total 2025 xPts per Team
# This is the '60%' foundation of your 2026 prediction!
total_xpts_2025 <- mls_match_data_final %>%
  pivot_longer(cols = c(Home_Team, Away_Team), names_to = "Location", values_to = "Team") %>%
  mutate(
    xPts_Earned = case_when(
      Location == "Home_Team" ~ Home_xPts,
      Location == "Away_Team" ~ Away_xPts
    )
  ) %>%
  group_by(Team) %>%
  summarise(
    Total_xPts_2025 = sum(xPts_Earned, na.rm = TRUE),
    .groups = 'drop'
  )

print("Total xPts for 2025 calculated successfully!")
## [1] "Total xPts for 2025 calculated successfully!"
head(total_xpts_2025 %>% arrange(desc(Total_xPts_2025)))
# --- 4. Final 2026 Prediction Model ---

# 1. Create a table with your Verified 2025 Actual Points
# (I've put the top teams here as an example; add the rest of your verified list!)
actual_standings_2025 <- tibble(
  Team = c("Philadelphia Union", "FC Cincinnati", "Inter Miami CF", "San Diego FC", "Vancouver Whitecaps FC", "Los Angeles FC"),
  Actual_Points = c(66, 65, 65, 63, 63, 60),
  Actual_Rank = c(1, 2, 3, 1, 2, 3) # Note: San Diego/Vancouver/LAFC are West
)

# 2. Add your Qualitative Adjustments (Roster and Schedule)
# Scale: -5 to +5 for Roster, -2 to +2 for Schedule
qualitative_adjustments <- tibble(
  Team = c("Philadelphia Union", "FC Cincinnati", "Inter Miami CF", "San Diego FC", "Vancouver Whitecaps FC", "Los Angeles FC"),
  Roster_Adj = c(-1, 0, 4, 1, 2, 1), # e.g., Miami +4 for roster strength
  SoS_Adj = c(0, 0, 1, 0, 0, -1)     # e.g., LAFC -1 for hard schedule
)

# 3. Combine and Calculate 2026 Predicted Points
final_2026_prediction <- total_xpts_2025 %>%
  inner_join(actual_standings_2025, by = "Team") %>%
  inner_join(qualitative_adjustments, by = "Team") %>%
  mutate(
    xPts_Delta = Actual_Points - Total_xPts_2025,
    # The 60/30/10 Formula:
    # 60% weight on xPts, 30% on Actual, plus Roster (x2) and SoS (x1) points
    Predicted_Points_2026 = (Total_xPts_2025 * 0.6) + 
                            (Actual_Points * 0.3) + 
                            (Roster_Adj * 2) + 
                            (SoS_Adj * 1)
  ) %>%
  arrange(desc(Predicted_Points_2026)) %>%
  mutate(Predicted_Rank_2026 = row_number())

# 4. View your Portfolio-Ready Table
final_2026_prediction %>% 
  select(Team, Actual_Points, Total_xPts_2025, xPts_Delta, Predicted_Points_2026, Predicted_Rank_2026) %>%
  gt() %>%
  tab_header(title = "MLS 2026: Predicted Performance Model") %>%
  fmt_number(columns = c(Total_xPts_2025, xPts_Delta, Predicted_Points_2026), decimals = 2)
MLS 2026: Predicted Performance Model
Team Actual_Points Total_xPts_2025 xPts_Delta Predicted_Points_2026 Predicted_Rank_2026
Inter Miami CF 65 57.60 7.40 63.06 1
Vancouver Whitecaps FC 63 61.05 1.95 59.53 2
Los Angeles FC 60 63.59 −3.59 57.15 3
Philadelphia Union 66 60.04 5.96 53.83 4
San Diego FC 63 52.67 10.33 52.50 5
FC Cincinnati 65 42.48 22.52 44.99 6
# --- FINAL 30-TEAM INTEGRATION ---

# 1. Get the full team list from ASA for all 30 teams
all_teams <- asa_client$get_teams(leagues = "mls") %>%
  select(Team = team_name)

# 2. Your Verified 2025 Actual Standings (Full 30 Teams)
# I have populated this based on the official standings you provided
full_actual_2025 <- tibble(
  Team = c("Philadelphia Union", "FC Cincinnati", "Inter Miami CF", "Charlotte FC", "New York City FC", 
           "Nashville SC", "Columbus Crew", "Chicago Fire FC", "Orlando City SC", "New York Red Bulls", 
           "New England Revolution", "Toronto FC", "CF Montréal", "Atlanta United FC", "D.C. United",
           "San Diego FC", "Vancouver Whitecaps FC", "Los Angeles FC", "Minnesota United FC", "Seattle Sounders FC", 
           "Austin FC", "FC Dallas", "Portland Timbers", "Real Salt Lake", "San Jose Earthquakes", 
           "Colorado Rapids", "Houston Dynamo FC", "St. Louis City SC", "LA Galaxy", "Sporting Kansas City"),
  Actual_Points = c(66, 65, 65, 59, 56, 54, 54, 53, 53, 43, 36, 32, 28, 28, 26, 
                    63, 63, 60, 58, 55, 47, 44, 44, 41, 41, 41, 37, 32, 30, 28),
  Conference = c(rep("East", 15), rep("West", 15))
)

# 3. Apply the 2026 Model to the Full League
# Note: I'm setting Roster/SoS to 0 for teams we haven't 'scouted' yet; 
# you can manually update these in your script!
final_30_team_prediction <- total_xpts_2025 %>%
  inner_join(full_actual_2025, by = "Team") %>%
  mutate(
    xPts_Delta = Actual_Points - Total_xPts_2025,
    # Standard prediction baseline (can be adjusted with Roster/SoS)
    Predicted_Points_2026 = (Total_xPts_2025 * 0.6) + (Actual_Points * 0.4)
  ) %>%
  group_by(Conference) %>%
  arrange(desc(Predicted_Points_2026)) %>%
  mutate(Predicted_Rank_2026 = row_number()) %>%
  ungroup()

# 4. Generate the Final Master Table
final_30_team_prediction %>%
  select(Conference, Predicted_Rank_2026, Team, Actual_Points, Total_xPts_2025, xPts_Delta, Predicted_Points_2026) %>%
  arrange(Conference, Predicted_Rank_2026) %>%
  gt() %>%
  tab_header(title = "MLS 2026: Complete League Projections",
             subtitle = "Based on 2025 xPts Performance & Historical Regression") %>%
  fmt_number(columns = c(Total_xPts_2025, xPts_Delta, Predicted_Points_2026), decimals = 2)
MLS 2026: Complete League Projections
Based on 2025 xPts Performance & Historical Regression
Conference Predicted_Rank_2026 Team Actual_Points Total_xPts_2025 xPts_Delta Predicted_Points_2026
East 1 Philadelphia Union 66 60.04 5.96 62.43
East 2 Inter Miami CF 65 57.60 7.40 60.56
East 3 Nashville SC 54 59.53 −5.53 57.32
East 4 Orlando City SC 53 57.91 −4.91 55.94
East 5 Columbus Crew 54 51.43 2.57 52.46
East 6 Chicago Fire FC 53 50.71 2.29 51.63
East 7 New York City FC 56 48.54 7.46 51.53
East 8 FC Cincinnati 65 42.48 22.52 51.49
East 9 Charlotte FC 59 43.02 15.98 49.41
East 10 New York Red Bulls 43 43.00 0.00 43.00
East 11 New England Revolution 36 37.92 −1.92 37.15
East 12 Atlanta United FC 28 43.15 −15.15 37.09
East 13 Toronto FC 32 36.91 −4.91 34.95
East 14 CF Montréal 28 39.48 −11.48 34.89
East 15 D.C. United 26 40.49 −14.49 34.69
West 1 Los Angeles FC 60 63.59 −3.59 62.15
West 2 Vancouver Whitecaps FC 63 61.05 1.95 61.83
West 3 San Diego FC 63 52.67 10.33 56.80
West 4 Seattle Sounders FC 55 54.91 0.09 54.95
West 5 Minnesota United FC 58 48.71 9.29 52.43
West 6 San Jose Earthquakes 41 53.30 −12.30 48.38
West 7 Austin FC 47 42.89 4.11 44.53
West 8 FC Dallas 44 43.34 0.66 43.60
West 9 Colorado Rapids 41 44.85 −3.85 43.31
West 10 Real Salt Lake 41 41.66 −0.66 41.40
West 11 Houston Dynamo FC 37 44.14 −7.14 41.29
West 12 St. Louis City SC 32 42.99 −10.99 38.59
West 13 LA Galaxy 30 41.23 −11.23 36.74
West 14 Sporting Kansas City 28 31.96 −3.96 30.38
# --- FINAL 30-TEAM FIX ---

# 1. Correct the name for Portland in your verified standings
full_actual_2025 <- full_actual_2025 %>%
  mutate(Team = if_else(Team == "Portland Timbers", "Portland Timbers FC", Team))

# 2. Re-run the join and calculation
final_30_team_prediction <- total_xpts_2025 %>%
  inner_join(full_actual_2025, by = "Team") %>%
  mutate(
    xPts_Delta = Actual_Points - Total_xPts_2025,
    Predicted_Points_2026 = (Total_xPts_2025 * 0.6) + (Actual_Points * 0.4)
  ) %>%
  group_by(Conference) %>%
  arrange(desc(Predicted_Points_2026)) %>%
  mutate(Predicted_Rank_2026 = row_number()) %>%
  ungroup()

# 3. Verify you have 30 teams
print(paste("Final Team Count:", nrow(final_30_team_prediction)))
## [1] "Final Team Count: 30"
# 4. View the complete, professional table
final_30_team_prediction %>%
  select(Conference, Predicted_Rank_2026, Team, Actual_Points, Total_xPts_2025, xPts_Delta, Predicted_Points_2026) %>%
  arrange(Conference, Predicted_Rank_2026) %>%
  gt() %>%
  tab_header(title = "MLS 2026: Complete League Projections",
             subtitle = "The 'inData' Model: Regression Analysis & Performance Baseline") %>%
  fmt_number(columns = c(Total_xPts_2025, xPts_Delta, Predicted_Points_2026), decimals = 2)
MLS 2026: Complete League Projections
The 'inData' Model: Regression Analysis & Performance Baseline
Conference Predicted_Rank_2026 Team Actual_Points Total_xPts_2025 xPts_Delta Predicted_Points_2026
East 1 Philadelphia Union 66 60.04 5.96 62.43
East 2 Inter Miami CF 65 57.60 7.40 60.56
East 3 Nashville SC 54 59.53 −5.53 57.32
East 4 Orlando City SC 53 57.91 −4.91 55.94
East 5 Columbus Crew 54 51.43 2.57 52.46
East 6 Chicago Fire FC 53 50.71 2.29 51.63
East 7 New York City FC 56 48.54 7.46 51.53
East 8 FC Cincinnati 65 42.48 22.52 51.49
East 9 Charlotte FC 59 43.02 15.98 49.41
East 10 New York Red Bulls 43 43.00 0.00 43.00
East 11 New England Revolution 36 37.92 −1.92 37.15
East 12 Atlanta United FC 28 43.15 −15.15 37.09
East 13 Toronto FC 32 36.91 −4.91 34.95
East 14 CF Montréal 28 39.48 −11.48 34.89
East 15 D.C. United 26 40.49 −14.49 34.69
West 1 Los Angeles FC 60 63.59 −3.59 62.15
West 2 Vancouver Whitecaps FC 63 61.05 1.95 61.83
West 3 San Diego FC 63 52.67 10.33 56.80
West 4 Seattle Sounders FC 55 54.91 0.09 54.95
West 5 Minnesota United FC 58 48.71 9.29 52.43
West 6 San Jose Earthquakes 41 53.30 −12.30 48.38
West 7 Austin FC 47 42.89 4.11 44.53
West 8 FC Dallas 44 43.34 0.66 43.60
West 9 Colorado Rapids 41 44.85 −3.85 43.31
West 10 Real Salt Lake 41 41.66 −0.66 41.40
West 11 Houston Dynamo FC 37 44.14 −7.14 41.29
West 12 Portland Timbers FC 44 38.12 5.88 40.47
West 13 St. Louis City SC 32 42.99 −10.99 38.59
West 14 LA Galaxy 30 41.23 −11.23 36.74
West 15 Sporting Kansas City 28 31.96 −3.96 30.38