library(dplyr)
library(data.table)
library(stringr)
library(purrr)
library(ggplot2)
library(readr)
library(RColorBrewer)
library(jsonlite)
library(cowplot)
library(knitr)
#football specific packages
library(understatr) #remotes::install_github('ewenme/understatr')
#used for understat data
library(ggsoccer) #used for football visualisations
pitch_custom <- list(
length = 587,
width = 373,
penalty_box_length = 101,
penalty_box_width = 211,
six_yard_box_length = 31,
six_yard_box_width = 111,
penalty_spot_distance = 66,
goal_width = 45,
origin_x = 0,
origin_y = 0)
# below uses https://biscuitchaserfc.blogspot.com/2020/09/shot-data-for-top-5-european-leagues.html code
PL_team_stats <- get_league_teams_stats(league_name = "EPL", year = 2020)
PL_player_data <- map_dfr(unique(PL_team_stats$team_name), get_team_players_stats, year = 2020) # uses purrr package
PL_players<-c(PL_player_data$player_id) # create list of players and IDs
# N.B. below takes ages to run - downloads every player shot data - so can save as CSV and then load again later by un-hashing
PL_shot_data <- PL_players %>%
map_dfr(.,possibly(get_player_shots,otherwise=NULL))
# write_csv(PL_shot_data, "PL_shot_data.csv")
#PL_shot_data <- read.csv("D:\\R projects\\Football\\Arsenal assists\\PL_shot_data.csv")
rm(PL_team_stats)
PL_shot_data <- PL_shot_data %>%
mutate(team_name = case_when(
h_a == "h" ~ h_team,
h_a == "a" ~ a_team))
# add minutes played for each player:
PL_shot_data <- inner_join(PL_shot_data, PL_player_data[, c("player_id", "time")])
#Arsenal forwards only
arsenal_shots <- PL_shot_data %>%
filter(player %in% c("Alexandre Lacazette", "Pierre-Emerick Aubameyang", "Eddie Nketiah", "Gabriel Martinelli", "Bukayo Saka", "Willian", "Nicolas Pepe", "Reiss Nelson", "Emile Smith-Rowe", "Granit Xhaka", "Dani Ceballos", "Joe Willock", "Mohamed Elneny", "Thomas Partey"), year == 2020)
# filter out players who started season in another team
arsenal_shots <- arsenal_shots %>%
filter(team_name == "Arsenal")
# add total xA for each player
arsenal_shots_xA <- arsenal_shots %>%
filter(player_assisted != "NA") %>%
group_by(player_assisted) %>% summarise(across(xG, sum))
setnames(arsenal_shots_xA, old="xG", new = "xA")
arsenal_shots <- left_join(arsenal_shots, arsenal_shots_xA)
rm(arsenal_shots_xA)
# get time played for player assisted - need to created player_assisted ID
player_id <- distinct(PL_shot_data[,c("player", "time")])
setnames(player_id, c("player", "time"), c("player_assisted", "time_assister"))
arsenal_shots <- left_join(arsenal_shots, player_id)
# Hector Bellerin has some strange UTF-16 or UTF-8 or unicode symbol in it - remove
arsenal_shots$player_assisted <- as.character(arsenal_shots$player_assisted)
arsenal_shots[str_which(arsenal_shots$player_assisted,"\\bctor\\b"),c("player_assisted")] <- "Hector Bellerin"
# focus on assists, only those with more than 5 assists
tt <- table(arsenal_shots$player_assisted)
arsenal_shots <- arsenal_shots[arsenal_shots$player_assisted %in% names(tt[tt>4]),]
rm(tt)
# turn player_assisted back into factor
arsenal_shots$player_assisted <- as.factor(arsenal_shots$player_assisted)
arsenal_shots$player <- factor(arsenal_shots$player)
## switch coordinates for vertical view
arsenal_shots <- arsenal_shots %>%
mutate(
x = case_when(
h_a == "a" ~ X * 587,
h_a == "h" ~ X * 587,
TRUE ~ 0),
y = case_when(
h_a == "a" ~ Y * 373,
h_a == "h" ~ Y * 373,
TRUE ~ 0))
# add player_assist time played for plot
arsenal_shots <- arsenal_shots %>%
mutate(player_time = paste0(player, ": ", time, " minutes played"),
player_assisted_time = paste0(player_assisted, ": ", time_assister, " minutes played, total of ",round(xA,digits=3)," xA"))
arsenal_shots$player_time <- factor(arsenal_shots$player_time)
arsenal_shots$player_assisted_time <- factor(arsenal_shots$player_assisted_time)
# make colour palette for players
graph_colours <- c("#b183d3","#89d751","#7946cf","#d5c357","#cc46b1","#6dd1a8","#d33d60","#557e38","#4e337b","#d65830","#5e95d1","#aa763e","#79283a","#d27992") # use colour pallete from iwanthue - https://medialab.github.io/iwanthue/examples/ - here 14 colours
names(graph_colours) <- levels(factor(c(levels(arsenal_shots$player)))) # Extract all factor names (names of players shooting)
# make list of plots
ggList <- lapply(split(arsenal_shots, arsenal_shots$player_assisted_time), function(i) {
ggplot(i, aes(x = x, y = y, colour=player)) +
annotate_pitch(dimensions = pitch_custom) +
geom_point(aes(x = x, y = y, size = xG)) +
#scale_x_continuous(expand = c(0.01, 0)) +
theme_pitch(aspect_ratio = 373/587) +
coord_flip(xlim = c(280, 590),
ylim = c(10, 365)) +
theme(plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "pt")) +
guides(size = FALSE)} +
scale_color_manual(name = "Player Shooting",
values = graph_colours)
)
# plot as grid in 1 columns
cowplot::plot_grid(plotlist = ggList, ncol = 1,
align = 'v', labels = levels(arsenal_shots$player_assisted_time))
