Background

The AFLW Season finals is about to kick off, finishing up their home and away season. What better way to see which team is predicted to win the grand final based off this model. Using the fitzRoy data package, we focused on the last 4 years, from 2022 as that is when all 18 AFL teams began competeting.

Predictions are based on the AFLW ladder and Elo ratings as of the end of Round 11. The final round results may change the finals matchups and thus affect the predicted probabilities.

Load Libraries

library(fitzRoy)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(corrplot)
library(elo)
library(PlayerRatings)

Import data

We will be using data from 2022 - 2025 Season

seasons <- 2022:2025

AFLW <- purrr::map_df(seasons, ~fetch_results(comp = "AFLW", season = .x))

names(AFLW)
##  [1] "k2kSponsored"                        "match.name"                         
##  [3] "match.date"                          "match.status"                       
##  [5] "match.matchId"                       "match.venue"                        
##  [7] "match.utcStartTime"                  "match.homeTeamId"                   
##  [9] "match.awayTeamId"                    "match.round"                        
## [11] "match.venueLocalStartTime"           "match.abbr"                         
## [13] "match.twitterHashTag"                "match.homeTeam.name"                
## [15] "match.homeTeam.timeZone"             "match.homeTeam.teamId"              
## [17] "match.homeTeam.abbr"                 "match.homeTeam.nickname"            
## [19] "match.awayTeam.name"                 "match.awayTeam.timeZone"            
## [21] "match.awayTeam.teamId"               "match.awayTeam.abbr"                
## [23] "match.awayTeam.nickname"             "venue.address"                      
## [25] "venue.name"                          "venue.state"                        
## [27] "venue.timeZone"                      "venue.venueId"                      
## [29] "venue.abbreviation"                  "venue.capacity"                     
## [31] "venue.groundDimension"               "venue.latitude"                     
## [33] "venue.longitude"                     "venue.landOwner"                    
## [35] "round.name"                          "round.year"                         
## [37] "round.roundId"                       "round.abbreviation"                 
## [39] "round.competitionId"                 "round.roundNumber"                  
## [41] "status"                              "matchId"                            
## [43] "scoreWorm"                           "scoreMap"                           
## [45] "lastUpdated"                         "homeTeamScore.periodScore"          
## [47] "homeTeamScore.rushedBehinds"         "homeTeamScore.minutesInFront"       
## [49] "homeTeamScore.matchScore.totalScore" "homeTeamScore.matchScore.goals"     
## [51] "homeTeamScore.matchScore.behinds"    "homeTeamScore.matchScore.superGoals"
## [53] "awayTeamScore.periodScore"           "awayTeamScore.rushedBehinds"        
## [55] "awayTeamScore.minutesInFront"        "awayTeamScore.matchScore.totalScore"
## [57] "awayTeamScore.matchScore.goals"      "awayTeamScore.matchScore.behinds"   
## [59] "awayTeamScore.matchScore.superGoals" "matchClock.periods"                 
## [61] "weather.description"                 "weather.tempInCelsius"              
## [63] "weather.weatherType"                 "homeTeamScoreChart.goals"           
## [65] "homeTeamScoreChart.leftBehinds"      "homeTeamScoreChart.rightBehinds"    
## [67] "homeTeamScoreChart.leftPosters"      "homeTeamScoreChart.rightPosters"    
## [69] "homeTeamScoreChart.rushedBehinds"    "homeTeamScoreChart.touchedBehinds"  
## [71] "awayTeamScoreChart.goals"            "awayTeamScoreChart.leftBehinds"     
## [73] "awayTeamScoreChart.rightBehinds"     "awayTeamScoreChart.leftPosters"     
## [75] "awayTeamScoreChart.rightPosters"     "awayTeamScoreChart.rushedBehinds"   
## [77] "awayTeamScoreChart.touchedBehinds"

Clean the data

As the dataset contains 77 variables, we need to clean the data and choose which variables are relevant to use and which are not.

#Choose Variables
aflw_clean <- AFLW %>%
  select(match.date,
         match.matchId,
         home_team = match.homeTeam.name,
         away_team = match.awayTeam.name,
         home_score = homeTeamScore.matchScore.totalScore,
         away_score = awayTeamScore.matchScore.totalScore, 
         venue.name,
         round.name, 
         round.year,
         home_mins_front = homeTeamScore.minutesInFront,
         away_mins_front = awayTeamScore.minutesInFront, 
         weather.tempInCelsius, 
         weather.weatherType)

#Rename some variables
aflw_clean <-  aflw_clean %>% 
  rename(match_id = match.matchId,
         venue_name = venue.name, 
         round_name = round.name,
         round_year = round.year, 
         weather_celsius = weather.tempInCelsius,
         weather_type = weather.weatherType)

#Create a new variable Margin & Home win (if the home team won)
aflw_clean <- aflw_clean %>% 
  mutate(margin = home_score - away_score, 
         home_win = ifelse(home_score > away_score, 1, 0))

aflw_clean <- aflw_clean %>%
  mutate(match_id = str_remove(match_id, "^CD_M"))

#Change all character variables to factors
aflw_clean$match_number <- seq_len(nrow(aflw_clean))

aflw_clean <- aflw_clean %>%
  mutate(
    round_name = str_replace(round_name, "^Week", "Round"), 
    round_name = str_replace(round_name, "Finals Week 1", "Qualifying Final")
  )

Exploratory Data Analysis

We can now have a look at the data to learn what we are working with, identify any issues like missing data and help us decide what modelling procedure to take.

str(aflw_clean)
## tibble [486 × 16] (S3: tbl_df/tbl/data.frame)
##  $ match.date     : POSIXct[1:486], format: "2022-01-07 08:15:00" "2022-01-08 06:10:00" ...
##  $ match_id       : chr [1:486] "20222640101" "20222640102" "20222640103" "20222640104" ...
##  $ home_team      : chr [1:486] "St Kilda" "Kangaroos" "Western Bulldogs" "Fremantle" ...
##  $ away_team      : chr [1:486] "Richmond" "Geelong Cats" "Melbourne" "West Coast Eagles" ...
##  $ home_score     : int [1:486] 23 26 22 43 39 25 21 38 41 17 ...
##  $ away_score     : int [1:486] 61 18 46 15 9 44 36 54 14 31 ...
##  $ venue_name     : chr [1:486] "Kinetic Stadium" "Arden Street Oval" "Mission Whitten Oval" "Fremantle Oval" ...
##  $ round_name     : chr [1:486] "Round 1" "Round 1" "Round 1" "Round 1" ...
##  $ round_year     : chr [1:486] "2022" "2022" "2022" "2022" ...
##  $ home_mins_front: int [1:486] 8 42 0 44 45 22 20 19 56 2 ...
##  $ away_mins_front: int [1:486] 49 16 53 12 4 45 36 45 0 49 ...
##  $ weather_celsius: num [1:486] 29 21 21 18 18 23 18 32 25 27 ...
##  $ weather_type   : chr [1:486] "THUNDERSTORMS" "RAIN" "RAIN" "MOSTLY_SUNNY" ...
##  $ margin         : int [1:486] -38 8 -24 28 30 -19 -15 -16 27 -14 ...
##  $ home_win       : num [1:486] 0 1 0 1 1 0 0 0 1 0 ...
##  $ match_number   : int [1:486] 1 2 3 4 5 6 7 8 9 10 ...
summary(aflw_clean)
##    match.date                    match_id          home_team        
##  Min.   :2022-01-07 08:15:00   Length:486         Length:486        
##  1st Qu.:2022-09-30 14:02:30   Class :character   Class :character  
##  Median :2023-10-21 17:10:00   Mode  :character   Mode  :character  
##  Mean   :2023-12-13 23:39:47                                        
##  3rd Qu.:2024-11-01 21:37:30                                        
##  Max.   :2025-11-01 08:15:00                                        
##   away_team           home_score       away_score      venue_name       
##  Length:486         Min.   :  1.00   Min.   :  1.00   Length:486        
##  Class :character   1st Qu.: 25.00   1st Qu.: 22.00   Class :character  
##  Mode  :character   Median : 35.50   Median : 34.00   Mode  :character  
##                     Mean   : 38.20   Mean   : 36.61                     
##                     3rd Qu.: 47.75   3rd Qu.: 46.00                     
##                     Max.   :108.00   Max.   :114.00                     
##   round_name         round_year        home_mins_front  away_mins_front
##  Length:486         Length:486         Min.   :  0.00   Min.   : 0.00  
##  Class :character   Class :character   1st Qu.:  5.25   1st Qu.: 3.00  
##  Mode  :character   Mode  :character   Median : 37.00   Median :24.00  
##                                        Mean   : 35.67   Mean   :31.62  
##                                        3rd Qu.: 60.00   3rd Qu.:58.00  
##                                        Max.   :118.00   Max.   :87.00  
##  weather_celsius weather_type           margin            home_win     
##  Min.   :12.00   Length:486         Min.   :-100.000   Min.   :0.0000  
##  1st Qu.:18.00   Class :character   1st Qu.: -18.000   1st Qu.:0.0000  
##  Median :19.00   Mode  :character   Median :   2.000   Median :1.0000  
##  Mean   :20.74                      Mean   :   1.588   Mean   :0.5185  
##  3rd Qu.:23.00                      3rd Qu.:  22.000   3rd Qu.:1.0000  
##  Max.   :37.00                      Max.   :  96.000   Max.   :1.0000  
##   match_number  
##  Min.   :  1.0  
##  1st Qu.:122.2  
##  Median :243.5  
##  Mean   :243.5  
##  3rd Qu.:364.8  
##  Max.   :486.0
head(aflw_clean)
## # A tibble: 6 × 16
##   match.date          match_id    home_team      away_team home_score away_score
##   <dttm>              <chr>       <chr>          <chr>          <int>      <int>
## 1 2022-01-07 08:15:00 20222640101 St Kilda       Richmond          23         61
## 2 2022-01-08 06:10:00 20222640102 Kangaroos      Geelong …         26         18
## 3 2022-01-08 08:00:00 20222640103 Western Bulld… Melbourne         22         46
## 4 2022-01-08 09:50:00 20222640104 Fremantle      West Coa…         43         15
## 5 2022-01-09 03:10:00 20222640107 Adelaide Crows Brisbane…         39          9
## 6 2022-01-09 05:10:00 20222640105 Carlton        Collingw…         25         44
## # ℹ 10 more variables: venue_name <chr>, round_name <chr>, round_year <chr>,
## #   home_mins_front <int>, away_mins_front <int>, weather_celsius <dbl>,
## #   weather_type <chr>, margin <int>, home_win <dbl>, match_number <int>
#Any missing values?
colSums(is.na(aflw_clean))
##      match.date        match_id       home_team       away_team      home_score 
##               0               0               0               0               0 
##      away_score      venue_name      round_name      round_year home_mins_front 
##               0               0               0               0               0 
## away_mins_front weather_celsius    weather_type          margin        home_win 
##               0               0               0               0               0 
##    match_number 
##               0
#Exploration plots
aflw_clean %>%
  group_by(home_team, round_year) %>%
  summarise(avg_score = mean(home_score)) %>%
  ggplot(aes(x = home_team, y = avg_score, fill = factor(round_year))) + 
  geom_col(position = "dodge") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(aflw_clean, aes(x = factor(home_win), y = margin)) +
  geom_boxplot(fill = "lightgreen") +
  labs(x = "Home Win", y = "Margin", title = "Margin vs Match Outcome")

# Select numeric columns and calculate correlation matrix
num_cols <- c("home_score", "away_score", "home_mins_front", "away_mins_front", "weather_celsius")
cor_matrix <- cor(aflw_clean[, num_cols], use = "complete.obs")

# Plot correlation matrix
corrplot(cor_matrix, method = "color", type = "upper", 
         addCoef.col = "black", tl.col = "black", tl.srt = 45)

The correlation matrix is telling us that the variables home team time in the front and away team time in front are useful for refining an ELO model and can be used as extra predictors. However weather is a weakly correlated value and therefore not useful.

Split Data

Next we want to split the data up.To train and text the accuracy of the model we will use seasons 22 - 24 and then the 2024 finals. Once tested and modified we can use the 2022 - 2025 seasons to predict the 2025 Grand Final Winner

#Filter Data to only bee regular games not finals games
train_data <- aflw_clean %>%
  filter(round_year %in% 2022:2024) %>% 
  filter(grepl("Round", round_name))

test_data <- aflw_clean %>%
  filter(round_year %in% 2024) %>% 
  filter(!grepl("Round", round_name))

predict_data <- aflw_clean %>%
  filter(round_year %in% 2022:2025) %>% 
  filter(grepl("Round", round_name))

Create Elo Model

Next we can train the elo model

elo_model <- elo(train_data[c("match_number", "home_team", "away_team", "home_win")], 
                      init = 1500, 
                      kfac = 27, 
                      history = TRUE)

plot(elo_model)

Get finals elo ratings

We can now save these finals ratings in a dataframe

final_ratings <- as.data.frame(elo_model$ratings) %>%
  rename(team = Player) %>%
  arrange(desc(Rating))

# Convert your data frame to a named vector
team_ratings <- setNames(final_ratings$Rating, final_ratings$team)

print(final_ratings)
##                 team   Rating Games Win Draw Loss Lag
## 1     Brisbane Lions 1688.935    41  33    0    8   3
## 2     Adelaide Crows 1688.032    41  34    0    7   8
## 3          Kangaroos 1671.920    41  30    0   11   5
## 4          Melbourne 1649.351    41  32    0    9   7
## 5           Hawthorn 1555.182    31  16    0   15   2
## 6           Richmond 1548.481    41  23    0   18   2
## 7          Fremantle 1529.101    41  22    0   19   0
## 8       Geelong Cats 1517.474    41  20    0   21   8
## 9           Essendon 1513.939    31  16    0   15   4
## 10     Port Adelaide 1447.118    31  11    0   20   1
## 11          St Kilda 1437.911    41  15    0   26   3
## 12   Gold Coast SUNS 1434.777    41  17    0   24   5
## 13       Collingwood 1424.089    41  19    0   22   7
## 14  Western Bulldogs 1419.027    41  17    0   24   0
## 15           Carlton 1412.205    41  15    0   26   4
## 16      Sydney Swans 1395.919    31   9    0   22   6
## 17 West Coast Eagles 1337.610    41   9    0   32   6
## 18        GWS GIANTS 1328.929    41  11    0   30   1

Create win probability

Now we can calculate the probability of each team winning, with added predictors home advantage and time in front.

elo_prob <- function(home, away, ratings, home_adv = 0, front_factor = 0) {
  # Adjusted ratings including home advantage and front factor
  Rh <- ratings[home] + home_adv + front_factor
  Ra <- ratings[away]
  
  # Elo probability formula
  1 / (1 + 10^((Ra - Rh)/400))
}

Check Accuracy of Model

Then we check the accuracy of the model against the actual results to see if we need to change anything or make it better.

#Set scaling variables for elo prediction 
home_advantage <- 50      # home ground advantage 
front_scale <- 2          # weight for minutes in front
weather_scale <- 1        # weight for weather effect
margin_scale <- 1         # weight for margin effect

test_data <- test_data %>%
  mutate(
    #Home/Away ratings
    home_rating = team_ratings[home_team],
    away_rating = team_ratings[away_team],
    
    #Time-in-front factor
    front_factor = (home_mins_front - away_mins_front) * front_scale,
    
    #Weather factor
    weather_factor = case_when(
      weather_type %in% c("RAIN","THUNDERSTORMS") ~ -20 * weather_scale,
      weather_celsius < 10 ~ -10 * weather_scale,
      TRUE ~ 0
    ),
    
    #Margin factor
    margin_factor = margin * margin_scale,
    
    #Combine all factors into Elo probability
    home_win_prob = elo_prob(
      home_team,
      away_team,
      team_ratings,
      home_adv = home_advantage,
      front_factor = front_factor + weather_factor + margin_factor
    )
  )

#Predict
test_data <- test_data %>%
  mutate(
    pred_home_win = ifelse(home_win_prob > 0.5, 1, 0),
    correct = ifelse(pred_home_win == home_win, 1, 0)
  )


accuracy <- mean(test_data$correct, na.rm = TRUE)
brier <- mean((test_data$home_win_prob - test_data$home_win)^2, na.rm = TRUE)

cat("Elo Model Accuracy:", round(accuracy, 3), "\n",
    "Brier Score:", round(brier, 3), "\n")
## Elo Model Accuracy: 0.889 
##  Brier Score: 0.167

The model has an accuracy of 0.889, which is saying that our model is 89% accurate, predicting every 9 out of 10 finals games correctly in 2024. The Brier score also shows us how close our predicted probabilities are to the actual results, with 0 being close to perfect and 1 showing the worst. A score of 0.167 demonstrates it is very close to actual predictions and the probabilities are well-calibrated.

Top 8

As of Round 11, the current ladder is as follows; 1st - North Melbourne 2nd - Hawthorn 3rd - Melbourne 4th - Brisbane Lions 5th - Carlton 6th - St Kilda 7th - West Coast Eagles 8th - Adelaide

With round 12 playing this weekend, a few positions may be changed and some teams may be kicked out. Therefore for this sake, we will be using up until round 11 and assuming the final ladder position is as above. Once round 12 has finished, this should be updated to the correct ladder.

QF <- data.frame(
  home_team = c("Kangaroos", "Hawthorn", "Melbourne", "Brisbane Lions"),
  away_team = c("Carlton", "St Kilda", "Adelaide Crows", "West Coast Eagles"), 
    stringsAsFactors = FALSE
)

Create Elo Model with 2025 games

Now we know the model is fairly accurate we need to include the 2025 data to predict for the 2025 finals.

elo_model_final <- elo(predict_data[c("match_number", "home_team", "away_team", "home_win")],
                       init = 1500,
                       kfac = 27,
                       history = TRUE)

final_ratings <- as.data.frame(elo_model_final$ratings) %>%
  rename(team = Player) %>%
  arrange(desc(Rating))

team_ratings <- setNames(final_ratings$Rating, final_ratings$team)

print(final_ratings)
##                 team   Rating Games Win Draw Loss Lag
## 1          Kangaroos 1755.478    53  42    0   11   5
## 2     Brisbane Lions 1698.314    52  41    0   11  10
## 3          Melbourne 1651.582    53  41    0   12   0
## 4     Adelaide Crows 1635.179    52  40    0   12  14
## 5           Hawthorn 1600.290    43  25    0   18   5
## 6          Fremantle 1539.772    52  28    0   24  11
## 7           St Kilda 1497.843    52  22    0   30   8
## 8       Geelong Cats 1491.096    53  25    0   28   0
## 9      Port Adelaide 1480.369    43  17    0   26   3
## 10           Carlton 1479.642    53  23    0   30   4
## 11          Essendon 1463.691    43  20    0   23   1
## 12          Richmond 1452.923    53  25    0   28   2
## 13      Sydney Swans 1428.078    43  15    0   28   1
## 14  Western Bulldogs 1408.219    52  21    0   31   9
## 15 West Coast Eagles 1384.326    53  15    0   38   4
## 16       Collingwood 1382.234    52  22    0   30   6
## 17   Gold Coast SUNS 1364.403    53  19    0   34   2
## 18        GWS GIANTS 1286.560    53  13    0   40   3

Run Match Simulation

Now using these rating we can simulate a single game, and how two teams match up against each other and who will win.

simulate_match <- function(home, away, ratings, home_adv = 50, weather_factor = 0) {
  rating_home <- ratings[home] + home_adv + weather_factor
  rating_away <- ratings[away] + weather_factor
  
  p_home <- 1 / (1 + 10^((rating_away - rating_home)/400))
  
  winner <- ifelse(runif(1) < p_home, home, away)
  loser <- ifelse(winner == home, away, home)
  
  list(Winner = winner, Loser = loser)
}

Simulate Final Series

Using these match simulations we can now simulate the finals series to see who is predicted to win and who is predicted to loose. Therefore making it to the grand final.

simulate_finals <- function(ratings, home_adv = 50, weather_factor = 0) {
  results <- list()
  
  # Week 1: Quarter Finals
  results$QF1 <- simulate_match(QF$home_team[1], QF$away_team[1], ratings, home_adv, weather_factor)
  results$QF2 <- simulate_match(QF$home_team[2], QF$away_team[2], ratings, home_adv, weather_factor)
  results$QF3 <- simulate_match(QF$home_team[3], QF$away_team[3], ratings, home_adv, weather_factor)
  results$QF4 <- simulate_match(QF$home_team[4], QF$away_team[4], ratings, home_adv, weather_factor)
  
  # Week 2: Semi Finals (adjust to AFLW finals structure)
  results$SF1 <- simulate_match(results$QF1$Winner, results$QF2$Winner, ratings, home_adv, weather_factor)
  results$SF2 <- simulate_match(results$QF3$Winner, results$QF4$Winner, ratings, home_adv, weather_factor)
  
  # Week 3: Preliminary Final
  results$PF <- simulate_match(results$SF1$Winner, results$SF2$Winner, ratings, home_adv, weather_factor)
  
  # Week 4: Grand Final
  results$GF <- simulate_match(results$PF$Winner, results$SF2$Loser, ratings, home_adv, weather_factor)
  
  results
}

Get Final Positions

Now we can simulate the finals series 10,000 times. This can give us a probabilistic view of outcomes.

Once simulated, we can get the probability of each team being eliminated during each round of the finals. This will provide us with the probability of each time winning the grand final.

# Run multiple simulations
set.seed(123)
n_sims <- 10000
elo_all_sims <- replicate(n_sims, simulate_finals(team_ratings), simplify = FALSE)

# Summarize finishing probabilities
get_positions <- function(sim) {
  teams <- c(QF$home_team, QF$away_team)
  pos <- setNames(rep(NA, length(teams)), teams)
  
  # QF losers
  pos[c(sim$QF1$Loser, sim$QF2$Loser, sim$QF3$Loser, sim$QF4$Loser)] <- "Eliminated Week 1"
  
  # SF losers
  pos[c(sim$SF1$Loser, sim$SF2$Loser)] <- "Eliminated Week 2"
  
  # PF loser
  pos[sim$PF$Loser] <- "Lose Preliminary Final"
  
  # Grand Final
  pos[sim$GF$Loser] <- "Lose Grand Final"
  pos[sim$GF$Winner] <- "Premiership"
  
  pos
}

# Run 10k simulations
all_sims <- replicate(n_sims, simulate_finals(team_ratings), simplify = FALSE)

Find Grand Final Winner

We can put together the probabilities of each team winning. The team with the highest probability shows they are the most likely team to win the 2025 AFLW Grand final.

# Extract Grand Final winners
gf_winners <- sapply(all_sims, function(x) unname(x$GF$Winner))

# Make sure it's plain text
gf_winners <- as.character(gf_winners)

# Summarise win probabilities
gf_winner_probs <- as.data.frame(table(gf_winners), stringsAsFactors = FALSE) %>%
  rename(Team = gf_winners, Wins = Freq) %>%
  mutate(Probability = Wins / n_sims) %>%
  arrange(desc(Probability))

# Print clear text output for the most likely winner
cat("Predicted Grand Final Winner:",
    gf_winner_probs$Team[1],
    "(",
    round(gf_winner_probs$Probability[1] * 100, 1),
    "% chance)\n", 
    "Followed by", gf_winner_probs$Team[2],
    "(",
    round(gf_winner_probs$Probability[2] * 100, 1),"% chance)")
## Predicted Grand Final Winner: Kangaroos ( 32.9 % chance)
##  Followed by Brisbane Lions ( 32.6 % chance)

The model shows that the predicted 2025 AFLW Grand Final Winner will be North Melbourne with a 32.9% chance of winning.