directory <- "~/Desktop/2023 data"
setwd(directory)
source('~/Desktop/2023 data/functions.R')
load_packages(c("data.table", "dplyr", "ggplot2", "lubridate", "ggforce", "hms", "plm", "tidyr"))

df1 <- load_data_for_one_week(directory,
                              weekNumber = 1,
                              merge = T)
df2 <- load_data_for_one_week(directory,
                              weekNumber = 2,
                              merge = T)
df3 <- load_data_for_one_week(directory,
                              weekNumber = 3,
                              merge = T)
df4 <- load_data_for_one_week(directory,
                              weekNumber = 4,
                              merge = T)
df5 <- load_data_for_one_week(directory,
                              weekNumber = 5,
                              merge = T)
df6 <- load_data_for_one_week(directory,
                              weekNumber = 6,
                              merge = T)
df7 <- load_data_for_one_week(directory,
                              weekNumber = 7,
                              merge = T)
df8 <- load_data_for_one_week(directory,
                              weekNumber = 8,
                              merge = T)

bigd = rbind(df1, df2,df3,df4,df5,df6,df7,df8)


rm(df1,df2,df3,df4,df5,df6,df7,df8)


smalld <- bigd %>%
  filter(pff_blockType == "UP") %>%
  filter(passResult != "IN", passResult != "I") %>%
  data.frame
df1 <- smalld %>%
  select(gameId,playId, nflId, s, dis, dir, a, time, passResult, week, quarter, yardlineNumber, defendersInBox
         ,pff_beatenByDefender) %>%
  mutate(hms = as_hms(time),
         second = hour(hms)*3600 + minute(hms)*60 + second(hms),
         dir_change = dir - lag(-dir, n=1)) %>%
  group_by(gameId, playId, nflId, passResult, week, quarter,yardlineNumber, defendersInBox,pff_beatenByDefender) %>%
  summarise(dis_sum = sum(dis),
            ave_s = mean(s),
            ave_a = mean(a),
            min_sec = min(second),
            max_sec = max(second),
            dir_pos = abs(dir_change),
            .groups = "keep") %>%
  data.frame()



df1[is.na(df1)] = 0
df2 <- df1 %>%
  select(gameId, playId, nflId, passResult, ave_s, ave_a, dis_sum, min_sec, max_sec, dir_pos,week, quarter,yardlineNumber,
         defendersInBox,pff_beatenByDefender) %>%
  mutate(play_sec = max_sec - min_sec,
         passResult = recode(passResult, "C" = 1, "S" = 0, "R" = 0)) %>%
  group_by(gameId, playId, nflId, passResult, dis_sum, ave_s, ave_a, play_sec, week, quarter, yardlineNumber, defendersInBox,
           pff_beatenByDefender) %>%
  select(-min_sec, -max_sec,) %>%
  summarise(fw = sum(dir_pos),
            .groups = "keep") %>%
  data.frame

library(caret)


df2[is.na(df2)] = 0
model_norm <- plm(passResult ~  ave_a + ave_s + dis_sum + quarter + defendersInBox,
                  data = df2,
                  index = c("playId", "week"),
                  model = "within")
summary(model_norm)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = passResult ~ ave_a + ave_s + dis_sum + quarter + 
##     defendersInBox, data = df2, model = "within", index = c("playId", 
##     "week"))
## 
## Unbalanced Panel: n = 216, T = 1-3, N = 231
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -0.29502  0.00000  0.00000  0.00000  0.29502 
## 
## Coefficients:
##                 Estimate Std. Error t-value Pr(>|t|)   
## ave_a          -0.041040   0.136811 -0.3000 0.770338   
## ave_s           0.737450   0.231701  3.1828 0.009774 **
## dis_sum        -0.154714   0.038384 -4.0307 0.002397 **
## quarter         1.398617   0.323273  4.3264 0.001498 **
## defendersInBox  0.243087   0.096013  2.5318 0.029778 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1.6667
## Residual Sum of Squares: 0.39406
## R-Squared:      0.76356
## Adj. R-Squared: -4.438
## F-statistic: 6.45895 on 5 and 10 DF, p-value: 0.0062404
df2$new_y = - 0.04104024*df2$ave_a + 0.73744955*df2$ave_s - 0.15471391*df2$dis_sum +   1.39861732*df2$quarter + 0.24308745*df2$defendersInBox


df2$rounded_y <- round(df2$new_y ,digit=2)
new_order <- arrange(df2, -rounded_y)
df3 <- new_order[1:15,]

ggplot(df3, aes(x = reorder(playId, - rounded_y), y = rounded_y, fill = passResult)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5,)) +
  geom_bar(stat="identity") +   
  labs(x = "Players", y = "Ranking", title = "Pull Plays", fill = "Pass Results") +  
  geom_text(aes(label = paste0(rounded_y)), vjust = -.5, size = 2, angle= 45, hjust = -.04) + 
  scale_fill_continuous( 
    limits = c(0, 1),
    labels=paste0(seq(0, 15, 5), ""),
    breaks = seq(0, 15, 5),
    low = "red",
    high = "dark green") +
  theme(plot.title = element_text(hjust=0.5))

new_order <- arrange(df2, rounded_y)
df4 <- new_order[1:15,]


ggplot(df4, aes(x = reorder(playId, - rounded_y), y = rounded_y, fill = passResult)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5,)) +
  geom_bar(stat="identity") +   
  labs(x = "Players", y = "Ranking", title = "Pull Plays", fill = "Pass Results") +  
  geom_text(aes(label = paste0(rounded_y)), vjust = -.5, size = 2, angle= 45, hjust = -.04) + 
  scale_fill_continuous( 
    limits = c(0, 1),
    labels=paste0(seq(0, 15, 5), ""),
    breaks = seq(0, 15, 5),
    low = "red",
    high = "dark green") +
  theme(plot.title = element_text(hjust=0.5))