Introduction

The purpose of this document is to create an ELO rating system for the AFL 2023 season. The data will be trained on the 2023 regular season and tested on the 2023 Finals games. Load the following packages and if you haven’t, install these packages.

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(elo)
library(fitzRoy)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──
## ✔ broom        1.0.5     ✔ rsample      1.2.0
## ✔ dials        1.2.0     ✔ tune         1.1.2
## ✔ infer        1.0.5     ✔ workflows    1.1.3
## ✔ modeldata    1.2.0     ✔ workflowsets 1.0.1
## ✔ parsnip      1.1.1     ✔ yardstick    1.2.0
## ✔ recipes      1.0.8     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Learn how to get started at https://www.tidymodels.org/start/
library(gtools)
## 
## Attaching package: 'gtools'
## 
## The following object is masked from 'package:rsample':
## 
##     permutations
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
library(dplyr)
library(tidyr)
library(parsnip)

Fetching Data

The data is sourced from the Fitzroy package using fetch_results_afltables which will give us the results for each game of the 2023 season.

afl2023 <- fitzRoy::fetch_results_afltables(season = 2023)

View Dataset

head(afl2023, )
## # A tibble: 6 × 16
##    Game Date       Round Home.Team Home.Goals Home.Behinds Home.Points Away.Team
##   <dbl> <date>     <chr> <chr>          <int>        <int>       <int> <chr>    
## 1 16191 2023-03-16 R1    Richmond           8           10          58 Carlton  
## 2 16192 2023-03-17 R1    Geelong           16            7         103 Collingw…
## 3 16193 2023-03-18 R1    North Me…         12           15          87 West Coa…
## 4 16194 2023-03-18 R1    Port Ade…         18           18         126 Brisbane…
## 5 16195 2023-03-18 R1    Melbourne         17           13         115 Footscray
## 6 16196 2023-03-18 R1    Gold Coa…          9            7          61 Sydney   
## # ℹ 8 more variables: Away.Goals <int>, Away.Behinds <int>, Away.Points <int>,
## #   Venue <chr>, Margin <int>, Season <dbl>, Round.Type <chr>,
## #   Round.Number <int>

The data is split between home and away teams.

Creating Result Variable and Splitting Data

A new column will be created which will represent the outcome of the game for the home team. A win = 1, a draw = 0.5 and a loss = 0. As mentioned previously, the data will be split into training data which will include the regular season and testing data which covers the finals matches.

afl2023$Result <- ifelse(afl2023$Margin > 0, 1,
                          ifelse(afl2023$Margin < 0, 0, 0.5))

train_data <- afl2023[afl2023$Season < 2023 | (afl2023$Season == 2023 & afl2023$Round.Type == "Regular"), ]
test_data <- afl2023[afl2023$Season == 2023 & afl2023$Round.Type == "Finals", ]

Model Creation

Tuning Parameters

An initial elo rating of 1650 and k value of 30 will be assigned. A new dataset will be created which will only consist of the 2023 finals teams.

elo_model_train <- elo::elo.run(initial_elos = 1650,
                         formula = Result ~ Home.Team + Away.Team,
                         k = 30,
                         data = train_data)

final_elos_train <- final.elos(elo_model_train) %>% 
  as.data.frame() %>% 
  rownames_to_column()

final_elos_train
##            rowname        .
## 1         Adelaide 1492.972
## 2   Brisbane Lions 1615.341
## 3          Carlton 1565.372
## 4      Collingwood 1616.083
## 5         Essendon 1469.965
## 6        Footscray 1495.534
## 7        Fremantle 1471.746
## 8          Geelong 1476.643
## 9       Gold Coast 1445.522
## 10             GWS 1554.260
## 11        Hawthorn 1423.744
## 12       Melbourne 1594.223
## 13 North Melbourne 1320.600
## 14   Port Adelaide 1603.028
## 15        Richmond 1477.361
## 16        St Kilda 1511.492
## 17          Sydney 1534.524
## 18      West Coast 1331.590
final_elos_top8 <- 
  final_elos_train %>% 
  filter(rowname %in% c("Collingwood",
                        "Brisbane Lions",
                        "Port Adelaide",
                        "Melbourne",
                        "Carlton",
                        "St Kilda",
                        "GWS",
                        "Sydney")) %>% 
  mutate(rowname = case_when(
    rowname == 'Collingwood' ~ 'COL',
    rowname == 'Brisbane Lions' ~ 'BRI',
    rowname == 'Port Adelaide' ~ 'PAF',
    rowname == 'Melbourne' ~ 'MEL',
    rowname == 'Carlton' ~ 'CFC',
    rowname == 'St Kilda' ~ 'STK',
    rowname == 'GWS' ~ 'GWS',
    rowname == 'Sydney' ~ 'SYD'
  ))


final_elos_top8
##   rowname        .
## 1     BRI 1615.341
## 2     CFC 1565.372
## 3     COL 1616.083
## 4     GWS 1554.260
## 5     MEL 1594.223
## 6     PAF 1603.028
## 7     STK 1511.492
## 8     SYD 1534.524

Here are the following ELO ratings for the top 8 games that are in the finals campaign.

Creating Margin Model

Now we will be calculating the ELO and probability of each time winning their matches.

colnames(final_elos_top8) <- c("Team", "elo")

team_name_mapping <- data.frame(
  old_name = c("BRI", "CFC", "COL", "Giants", "MEL", "PAF", "STK", "SYD"),  # Replace with actual old team names
  new_name = c("Brisbane Lions", "Carlton", "Collingwood", "GWS", "Melbourne", "Port Adelaide", "St Kilda", "Sydney")  # Replace with corresponding new names
)

final_elos_top8 <- final_elos_top8 %>%
  left_join(team_name_mapping, by = c("Team" = "old_name")) %>%
  mutate(Team = ifelse(!is.na(new_name), new_name, Team)) %>%
  select(-new_name)

test_data <- test_data %>%
  left_join(final_elos_top8, by = c("Home.Team" = "Team"))

test_data <- test_data %>%
  left_join(final_elos_top8, by = c("Away.Team" = "Team"))

test_data$home_elo <- test_data$elo.x
test_data$away_elo <- test_data$elo.y

test_data$home_elo_prob <- elo.prob(test_data$home_elo, test_data$away_elo)

test_data$away_elo_prob <- 1 - test_data$home_elo_prob

view(test_data)

From this, we are able to see the ELO rating of each taem as well as the win percentage.