Load Packages

These are the required packages for this analysis. Note* Will need to install if not previously done so.

# install packages, remove hashtag to run install 
# install.packages('tidyverse')
# install.packages('fitzRoy')

# load packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.3     ✔ purrr   1.0.2
## ✔ tibble  3.2.1     ✔ dplyr   1.1.3
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.4     ✔ forcats 1.0.0
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(fitzRoy)

Data Import

Data is taken from the fitzRoy package, which contains datasets for AFL competitions. In this analysis we will take data for the 2023 season from ‘fetch_player_stats_afltables’. Data will also be filtered for the 2023 season.

afl2023 <- fitzRoy::fetch_player_stats_afltables(season = 2023)
## ℹ Looking for data from 2023-01-01 to 2023-11-03
## 
ℹ fetching cached data from <github.com>

✔ fetching cached data from <github.com> ... done
## Finished getting afltables data

View Data

head(afl2023)
## # A tibble: 6 × 59
##   Season Round Date       Local.start.time Venue  Attendance Home.team  HQ1G
##    <dbl> <chr> <date>                <int> <chr>       <dbl> <chr>     <int>
## 1   2023 1     2023-03-16             1920 M.C.G.      88084 Richmond      1
## 2   2023 1     2023-03-16             1920 M.C.G.      88084 Richmond      1
## 3   2023 1     2023-03-16             1920 M.C.G.      88084 Richmond      1
## 4   2023 1     2023-03-16             1920 M.C.G.      88084 Richmond      1
## 5   2023 1     2023-03-16             1920 M.C.G.      88084 Richmond      1
## 6   2023 1     2023-03-16             1920 M.C.G.      88084 Richmond      1
## # ℹ 51 more variables: HQ1B <int>, HQ2G <int>, HQ2B <int>, HQ3G <int>,
## #   HQ3B <int>, HQ4G <int>, HQ4B <int>, Home.score <int>, Away.team <chr>,
## #   AQ1G <int>, AQ1B <int>, AQ2G <int>, AQ2B <int>, AQ3G <int>, AQ3B <int>,
## #   AQ4G <int>, AQ4B <int>, Away.score <int>, First.name <chr>, Surname <chr>,
## #   ID <dbl>, Jumper.No. <chr>, Playing.for <chr>, Kicks <dbl>, Marks <dbl>,
## #   Handballs <dbl>, Goals <dbl>, Behinds <dbl>, Hit.Outs <dbl>, Tackles <dbl>,
## #   Rebounds <dbl>, Inside.50s <dbl>, Clearances <dbl>, Clangers <dbl>, …

Create dataset giving team stats

The dataset contains individual player stats, now we want match averages for each team. This is done using the group_by function. However since we want stats for both home and away games for team, a dataset is needed for each and then merged together. Also create variables for scoring shots and match outcome(i.e. score)

# Get dataset for home team stats, with relevant stats
afl23_team_stats <- afl2023 %>% 
  group_by(Round, Home.team, Playing.for) %>%
  summarise(Opponent = Away.team,
            Ave_Kicks = mean(Kicks),
            Ave_Marks = mean(Marks),
            Ave_Handballs = mean(Handballs),
            Ave_Goals = mean(Goals),
            Ave_Behinds = mean(Behinds),
            Ave_Scoring_shots = mean(Goals) + mean(Behinds),
            Ave_Hit_Outs = mean(Hit.Outs),
            Ave_Tackles = mean(Tackles),
            Ave_Rebounds = mean(Rebounds),
            Ave_Inside_50s = mean(Inside.50s),
            Ave_Clearances = mean(Clearances),
            Ave_Clangers = mean(Clangers),
            Ave_Frees_For = mean(Frees.For),
            Ave_Frees_Against = mean(Frees.Against),
            Ave_Contested_Possessions = mean(Contested.Possessions),
            Ave_Uncontested_Possessions = mean(Uncontested.Possessions),
            Ave_Contested_Marks = mean(Contested.Marks),
            Ave_Marks_Inside_50 = mean(Marks.Inside.50),
            Ave_One_Percenters = mean(One.Percenters),
            Ave_Bounces = mean(Bounces),
            Margin = Home.score - Away.score,
            Score = ifelse(Home.score > Away.score, 1,
                           ifelse(Home.score < Away.score, 0, 0.5))) %>%
  distinct(Round, Home.team, .keep_all = TRUE) %>%
  ungroup()
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Round', 'Home.team', 'Playing.for'. You
## can override using the `.groups` argument.
# Get dataset for away team stats, with relevant stats
afl23_team_stats_away <- afl2023 %>%
  group_by(Round, Away.team, Playing.for) %>%
  summarise(Opponent = Home.team,
            Ave_Kicks = mean(Kicks),
            Ave_Marks = mean(Marks),
            Ave_Handballs = mean(Handballs),
            Ave_Goals = mean(Goals),
            Ave_Behinds = mean(Behinds),
            Ave_Scoring_shots = mean(Goals) + mean(Behinds),
            Ave_Hit_Outs = mean(Hit.Outs),
            Ave_Tackles = mean(Tackles),
            Ave_Rebounds = mean(Rebounds),
            Ave_Inside_50s = mean(Inside.50s),
            Ave_Clearances = mean(Clearances),
            Ave_Clangers = mean(Clangers),
            Ave_Frees_For = mean(Frees.For),
            Ave_Frees_Against = mean(Frees.Against),
            Ave_Contested_Possessions = mean(Contested.Possessions),
            Ave_Uncontested_Possessions = mean(Uncontested.Possessions),
            Ave_Contested_Marks = mean(Contested.Marks),
            Ave_Marks_Inside_50 = mean(Marks.Inside.50),
            Ave_One_Percenters = mean(One.Percenters),
            Ave_Bounces = mean(Bounces),
            Margin = Away.score - Home.score,
            Score = ifelse(Away.score > Home.score, 1,
                           ifelse(Away.score < Home.score, 0, 0.5))) %>%
  distinct(Round, Away.team, .keep_all = TRUE) %>%
  ungroup()
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Round', 'Away.team', 'Playing.for'. You
## can override using the `.groups` argument.
# Remove rows where playing for doesn't match the home team
afl23_team_stats <- afl23_team_stats %>% 
  filter(Playing.for != Opponent)
# Remove rows where playing for doesn't match the away team
afl23_team_stats_away <- afl23_team_stats_away %>% 
  filter(Playing.for != Opponent)

# Change focus team column name to Team 
colnames(afl23_team_stats)[colnames(afl23_team_stats) == "Home.team"] <- "Team"
colnames(afl23_team_stats_away)[colnames(afl23_team_stats_away) == "Away.team"] <- "Team"

# remove playing for column from each dataset
afl23_team_stats <- afl23_team_stats %>% 
  select(-Playing.for)
afl23_team_stats_away <- afl23_team_stats_away %>% 
  select(-Playing.for)

# bind home team dataset and away team dataset together 
afl23_team_stats <- rbind(afl23_team_stats, afl23_team_stats_away)

Now create dataset for teams overall stats, over the season.

Again using group_by, however this time we do not group by round. This provides each teams overall stats. Also create variable for goal kicking accuracy and total wins.

afl23_overall_stats <- afl23_team_stats %>% 
  group_by(Team) %>% 
  summarise(Ave_Kicks = mean(Ave_Kicks),
            Ave_Marks = mean(Ave_Marks),
            Ave_Handballs = mean(Ave_Handballs),
            Ave_Goals = mean(Ave_Goals),
            Ave_Behinds = mean(Ave_Behinds),
            Ave_Scoring_shots = mean(Ave_Goals) + mean(Ave_Behinds),
            Ave_Hit_Outs = mean(Ave_Hit_Outs),
            Ave_Tackles = mean(Ave_Tackles),
            Ave_Rebounds = mean(Ave_Rebounds),
            Ave_Inside_50s = mean(Ave_Inside_50s),
            Ave_Clearances = mean(Ave_Clearances),
            Ave_Clangers = mean(Ave_Clangers),
            Ave_Frees_For = mean(Ave_Frees_For),
            Ave_Frees_Against = mean(Ave_Frees_Against),
            Ave_Contested_Possessions = mean(Ave_Contested_Possessions),
            Ave_Uncontested_Possessions = mean(Ave_Uncontested_Possessions),
            Ave_Contested_Marks = mean(Ave_Contested_Marks),
            Ave_Marks_Inside_50 = mean(Ave_Marks_Inside_50),
            Ave_One_Percenters = mean(Ave_One_Percenters),
            Ave_Bounces = mean(Ave_Bounces),
            Wins = sum(Score))

# mutate new variable for goal kicking accuracy
afl23_overall_stats <- afl23_overall_stats %>% 
  mutate(Goal_Kick_Accuracy = (Ave_Goals / Ave_Scoring_shots)*100)

Plot the data

Here we plot each teams goal kicking accuracy, using ggplot and geom_point

ggplot(data = afl23_overall_stats, aes(x = Team, y = Goal_Kick_Accuracy))+
         geom_point()

Now make the graph look nice. Adding a title and column labels, make team names clear on x axis.

ggplot(data = afl23_overall_stats, aes(x = Team, y = Goal_Kick_Accuracy))+
  geom_point()+
  labs(title = 'AFL Teams Goal Kicking Accuracy',
       x = 'Team',
       y = 'Accuracy (%)')+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10))

Now assess how Accuracy relates to winning with another plot.

ggplot(data = afl23_overall_stats, aes(x = Goal_Kick_Accuracy, y = Wins, col = Team))+
  geom_point()+
  geom_text(aes(label = Team), hjust = 1, vjust = 1) +
  labs(title = 'AFL Teams Goal Kicking Accuracy, Wins relation',
       x = 'Accuracy (%)',
       y = 'Wins',
       col = 'Team')