library(dplyr)
library(data.table)
library(stringr)
library(purrr)
library(ggplot2)
library(readr)
library(RColorBrewer)
library(jsonlite)
library(cowplot)
library(knitr)



#football specific packages
library(understatr) #remotes::install_github('ewenme/understatr')
  #used for understat data
library(ggsoccer) #used for football visualisations
pitch_custom <- list(
  length = 587,
  width = 373,
  penalty_box_length = 101,
  penalty_box_width = 211,
  six_yard_box_length = 31,
  six_yard_box_width = 111,
  penalty_spot_distance = 66,
  goal_width = 45,
  origin_x = 0,
  origin_y = 0)
# below uses https://biscuitchaserfc.blogspot.com/2020/09/shot-data-for-top-5-european-leagues.html code
PL_team_stats <- get_league_teams_stats(league_name = "EPL", year = 2020)
PL_player_data <- map_dfr(unique(PL_team_stats$team_name), get_team_players_stats, year = 2020) # uses purrr package
PL_players<-c(PL_player_data$player_id) # create list of players and IDs

# N.B. below takes ages to run - downloads every player shot data - so can save as CSV and then load again later by un-hashing
PL_shot_data <- PL_players %>% 
  map_dfr(.,possibly(get_player_shots,otherwise=NULL))

# write_csv(PL_shot_data, "PL_shot_data.csv")
#PL_shot_data <- read.csv("D:\\R projects\\Football\\Arsenal assists\\PL_shot_data.csv")

rm(PL_team_stats)
PL_shot_data <- PL_shot_data %>% 
  mutate(team_name = case_when(
    h_a == "h" ~ h_team,
    h_a == "a" ~ a_team))


# add minutes played for each player:
PL_shot_data <- inner_join(PL_shot_data, PL_player_data[, c("player_id", "time")])
#Arsenal forwards only
arsenal_shots <- PL_shot_data %>%
    filter(player %in% c("Alexandre Lacazette", "Pierre-Emerick Aubameyang", "Eddie Nketiah", "Gabriel Martinelli", "Bukayo Saka", "Willian", "Nicolas Pepe", "Reiss Nelson", "Emile Smith-Rowe", "Granit Xhaka", "Dani Ceballos", "Joe Willock", "Mohamed Elneny", "Thomas Partey"), year == 2020)

# filter out players who started season in another team
arsenal_shots <- arsenal_shots %>% 
  filter(team_name == "Arsenal")

# add total xA for each player
arsenal_shots_xA <- arsenal_shots %>% 
  filter(player_assisted != "NA") %>%
  group_by(player_assisted) %>% summarise(across(xG, sum))
setnames(arsenal_shots_xA, old="xG", new = "xA")
arsenal_shots <- left_join(arsenal_shots, arsenal_shots_xA)
rm(arsenal_shots_xA)
  

# get time played for player assisted - need to created player_assisted ID
player_id <- distinct(PL_shot_data[,c("player", "time")])
setnames(player_id, c("player", "time"), c("player_assisted", "time_assister"))
arsenal_shots <- left_join(arsenal_shots, player_id)

# Hector Bellerin has some strange UTF-16 or UTF-8 or unicode symbol in it - remove
arsenal_shots$player_assisted <- as.character(arsenal_shots$player_assisted)
arsenal_shots[str_which(arsenal_shots$player_assisted,"\\bctor\\b"),c("player_assisted")] <- "Hector Bellerin"

# focus on assists, only those with more than 5 assists
tt <- table(arsenal_shots$player_assisted)
arsenal_shots <- arsenal_shots[arsenal_shots$player_assisted %in% names(tt[tt>4]),]
rm(tt)


# turn player_assisted back into factor
arsenal_shots$player_assisted <- as.factor(arsenal_shots$player_assisted)
arsenal_shots$player <- factor(arsenal_shots$player)
## switch coordinates for vertical view
arsenal_shots <- arsenal_shots %>% 
    mutate(
        x = case_when(
            h_a == "a" ~ X * 587,
            h_a == "h" ~ X * 587,
            TRUE ~ 0),
        y = case_when(
            h_a == "a" ~ Y * 373,
            h_a == "h" ~ Y * 373,
            TRUE ~ 0))

# add player_assist time played for plot
arsenal_shots <- arsenal_shots %>% 
    mutate(player_time = paste0(player, ": ", time, " minutes played"),
           player_assisted_time = paste0(player_assisted, ": ", time_assister, " minutes played, total of ",round(xA,digits=3)," xA"))
arsenal_shots$player_time <- factor(arsenal_shots$player_time)
arsenal_shots$player_assisted_time <- factor(arsenal_shots$player_assisted_time)


# make colour palette for players
graph_colours <- c("#b183d3","#89d751","#7946cf","#d5c357","#cc46b1","#6dd1a8","#d33d60","#557e38","#4e337b","#d65830","#5e95d1","#aa763e","#79283a","#d27992") # use colour pallete from iwanthue - https://medialab.github.io/iwanthue/examples/ - here 14 colours
names(graph_colours) <- levels(factor(c(levels(arsenal_shots$player)))) # Extract all factor names (names of players shooting)



# make list of plots
ggList <- lapply(split(arsenal_shots, arsenal_shots$player_assisted_time), function(i) { 
  ggplot(i, aes(x = x, y = y, colour=player)) +
    annotate_pitch(dimensions = pitch_custom) +
    geom_point(aes(x = x, y = y, size = xG)) +
    #scale_x_continuous(expand = c(0.01, 0)) +
    theme_pitch(aspect_ratio = 373/587) +
    coord_flip(xlim = c(280, 590), 
               ylim = c(10, 365)) +
    theme(plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "pt")) +
    guides(size = FALSE)} + 
  scale_color_manual(name = "Player Shooting",
                     values = graph_colours)
  )


# plot as grid in 1 columns
cowplot::plot_grid(plotlist = ggList, ncol = 1,
                   align = 'v', labels = levels(arsenal_shots$player_assisted_time))