# Faith, Brian - MSDS 456 - Assignment #1 Win Probability Model

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.2     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggrepel)
library(ggimage)
library(nflfastR)
library(caTools)
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:ggimage':
## 
##     theme_transparent
library(png)
library(Metrics)
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following objects are masked from 'package:Metrics':
## 
##     precision, recall
## The following object is masked from 'package:purrr':
## 
##     lift
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(knitr)
options(scipen = 9999)

# Pulling logos
billsfile <- download.file('https://upload.wikimedia.org/wikipedia/en/thumb/7/77/Buffalo_Bills_logo.svg/189px-Buffalo_Bills_logo.svg.png',
                           destfile = 'bills.png', mode = 'wb')
billslogo <- readPNG('bills.png')

chiefsfile <-download.file('https://upload.wikimedia.org/wikipedia/en/thumb/e/e1/Kansas_City_Chiefs_logo.svg/100px-Kansas_City_Chiefs_logo.svg.png',
                           destfile = "chiefslogo.png", mode = "wb")
chiefslogo <- readPNG('chiefslogo.png')

sffile <- download.file('https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/San_Francisco_49ers_logo.svg/100px-San_Francisco_49ers_logo.svg.png',
                        destfile = "sflogo.png", mode = "wb")
sflogo <- readPNG('sflogo.png')

packersfile <- download.file('https://upload.wikimedia.org/wikipedia/commons/thumb/5/50/Green_Bay_Packers_logo.svg/100px-Green_Bay_Packers_logo.svg.png',
                             destfile= "packerslogo.png", mode = "wb")
packerslogo <- readPNG('packerslogo.png')

ramsfile <- download.file('https://upload.wikimedia.org/wikipedia/en/thumb/8/8a/Los_Angeles_Rams_logo.svg/100px-Los_Angeles_Rams_logo.svg.png',
                          destfile = "ramslogo.png", mode = "wb")
ramslogo <- readPNG('ramslogo.png')

bucsfile <-download.file('https://upload.wikimedia.org/wikipedia/en/thumb/a/a2/Tampa_Bay_Buccaneers_logo.svg/100px-Tampa_Bay_Buccaneers_logo.svg.png',
                         destfile = "bucslogo.png", mode = "wb")
bucslogo <- readPNG('bucslogo.png')

bengalsfile <-download.file('https://upload.wikimedia.org/wikipedia/commons/thumb/8/81/Cincinnati_Bengals_logo.svg/100px-Cincinnati_Bengals_logo.svg.png',
                            destfile = "bengalslogo.png", mode = "wb")
bengalslogo <- readPNG('bengalslogo.png')

titansfile <-download.file ('https://upload.wikimedia.org/wikipedia/en/thumb/c/c1/Tennessee_Titans_logo.svg/100px-Tennessee_Titans_logo.svg.png',
                            destfile = "titanslogo.png", mode = "wb")
titanslogo <- readPNG('titanslogo.png')

# Load Play-by-Play and add winner and poswins columns 
pbp_final21 <- load_pbp(2017:2021)
pbp_final21 <- pbp_final21 %>% mutate (winner = ifelse(home_score > away_score, home_team, away_team))
pbp_final21 <- pbp_final21 %>% mutate (poswins = ifelse (winner == posteam, "Yes", "No"))

length(pbp_final21)
## [1] 374
# Convert Down and Poswins to factors 
pbp_final21$down <- as.factor(pbp_final21$down)
pbp_final21$poswins <- as.factor(pbp_final21$poswins)

# Filter needed columns 
pbp_finalfilter <- pbp_final21 %>%
  filter (qtr <= 4 & poswins != "NA") %>%
  select (game_id, home_team, away_team, posteam, winner, down, ydstogo, 
          game_seconds_remaining/60, yardline_100, score_differential, poswins,
          home_wp, away_wp, wp, qtr)
View(pbp_finalfilter)

# Separate and row bind divisional round play-by-play data for reference
pbp_div1 <- pbp_finalfilter[pbp_finalfilter$game_id == "2021_20_BUF_KC", 1:15]
pbp_div2 <- pbp_finalfilter[pbp_finalfilter$game_id == "2021_20_SF_GB", 1:15]
pbp_div3 <- pbp_finalfilter[pbp_finalfilter$game_id == "2021_20_LA_TB", 1:15]
pbp_div4 <- pbp_finalfilter[pbp_finalfilter$game_id == "2021_20_CIN_TEN", 1:15]

pbp_divisional <- rbind(pbp_div1, pbp_div2, pbp_div3, pbp_div4)
View(pbp_div1)

# Split data sets into train and test 
set.seed(315)
split = sample.split (pbp_finalfilter$poswins, SplitRatio = 0.8)

train_final <- pbp_finalfilter %>% filter (split == TRUE)
test_final <- pbp_finalfilter %>% filter (split == FALSE)

# Create model
model_final <- glm(poswins ~ down + ydstogo + game_seconds_remaining +
                     yardline_100 + score_differential, train_final, family = "binomial")
summary(model_final)
## 
## Call:
## glm(formula = poswins ~ down + ydstogo + game_seconds_remaining + 
##     yardline_100 + score_differential, family = "binomial", data = train_final)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -3.05734  -0.78419   0.07261   0.82360   2.94668  
## 
## Coefficients:
##                            Estimate   Std. Error z value             Pr(>|z|)
## (Intercept)             0.982026378  0.024523007  40.045 < 0.0000000000000002
## down2                  -0.078154228  0.015483960  -5.047        0.00000044779
## down3                  -0.197158061  0.018078145 -10.906 < 0.0000000000000002
## down4                  -0.402524219  0.022531385 -17.865 < 0.0000000000000002
## ydstogo                -0.009869989  0.001650537  -5.980        0.00000000223
## game_seconds_remaining -0.000013671  0.000005919  -2.310               0.0209
## yardline_100           -0.009277997  0.000267451 -34.690 < 0.0000000000000002
## score_differential      0.191423978  0.001043032 183.526 < 0.0000000000000002
##                           
## (Intercept)            ***
## down2                  ***
## down3                  ***
## down4                  ***
## ydstogo                ***
## game_seconds_remaining *  
## yardline_100           ***
## score_differential     ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 221310  on 159661  degrees of freedom
## Residual deviance: 152142  on 159654  degrees of freedom
##   (19112 observations deleted due to missingness)
## AIC: 152158
## 
## Number of Fisher Scoring iterations: 5
predict_final <- predict (model_final, train_final, type = "response")

train_final <- cbind (train_final, predict_final)
train_final <- mutate (train_final, predict_final_home = 
                      ifelse(posteam == home_team, predict_final, 1-predict_final))

#Divisional Round Games
# Bengals-Titans
cin_ten <-ggplot (filter(train_final, game_id == "2021_20_CIN_TEN", !is.na(down)),
                  aes(game_seconds_remaining/60, wp)) +
  annotation_raster(bengalslogo, ymin = .89, ymax = .99, xmin = -52, xmax = -60) +
  annotation_raster(titanslogo, ymin = .01, ymax = .11, xmin = -52, xmax = -60) +
  geom_line (aes(game_seconds_remaining/60, predict_final_home),size = 1.5, col = "#4b92db") +
  geom_line (aes(game_seconds_remaining/60, 1-predict_final_home), size = 1.5, col = "#fb4f14") +
  geom_line (aes(game_seconds_remaining/60, home_wp), size = 1.5, col = "gray50") +
  geom_line (aes(game_seconds_remaining/60, away_wp), size = 1.5, col = "black") +
  geom_vline(xintercept = 45, lty = "dotdash", col = "gray20", size = .75) +
  geom_vline(xintercept = 30, lty = "dotdash", col = "gray20", size = .75) +
  geom_vline(xintercept = 15, lty = "dotdash", col = "gray20", size = .75) +
  geom_hline(yintercept = .5, lty = "solid", col = "gray20", size = .5) +
  geom_hline(yintercept = 0, lty = "solid", col = "gray20", size = .75) +
  geom_hline(yintercept = 1, lty = "solid", col = "gray20", size = .75) +
  scale_x_reverse() + 
  theme_minimal () +
  ylab ("Win Probability") +
  xlab ("Time Remaining (mins)") +
  scale_y_continuous(labels = scales::percent) +
  ggtitle(label = "Faith Win Probability Model Compared to nflfastR",
          subtitle = "NFL Divisional Playoffs: Tennessee Titans vs Cincinnati Bengals") +
  labs (caption = "Data: nflfastR") +
  annotate ("text", x=5, y=.93, label = "Bengals nflfastR WP", col = "black", 
            fontface = 2) +
  annotate ("text", x=5, y=.98, label = "Bengals Faith WP", col = "#fb4f14", fontface = 2) +
  annotate ("text", x=5, y = .05, label = "Titans nflfastR WP", col = "gray50",
            fontface = 2)+
  annotate ("text", x=5, y = .1, label = "Titans Faith WP", col = "#4b92db", fontface = 2)
cin_ten

# Niners-Packers
sf_gb <- ggplot (filter(train_final, game_id == "2021_20_SF_GB", !is.na(down)),
                 aes(game_seconds_remaining/60, wp)) +
  annotation_raster(packerslogo, ymin = .89, ymax = .99, xmin = -52, xmax = -60) +
  annotation_raster(sflogo, ymin = .01, ymax = .11, xmin = -52, xmax = -60) +
  geom_line (aes(game_seconds_remaining/60, predict_final_home),size = 1.5, col = "darkgreen") +
  geom_line (aes(game_seconds_remaining/60, 1-predict_final_home), size = 1.5, col = "#b3995d") +
  geom_line (aes(game_seconds_remaining/60, home_wp), size = 1.5, col = "gray50") +
  geom_line (aes(game_seconds_remaining/60, away_wp), size = 1.5, col = "black") +
  geom_vline(xintercept = 45, lty = "dotdash", col = "gray20", size = .75) +
  geom_vline(xintercept = 30, lty = "dotdash", col = "gray20", size = .75) +
  geom_vline(xintercept = 15, lty = "dotdash", col = "gray20", size = .75) +
  geom_hline(yintercept = .5, lty = "solid", col = "gray20", size = .5) +
  geom_hline(yintercept = 0, lty = "solid", col = "gray20", size = .75) +
  geom_hline(yintercept = 1, lty = "solid", col = "gray20", size = .75) +
  scale_x_reverse() + 
  theme_minimal () +
  ylab ("Win Probability") +
  xlab ("Time Remaining (mins)") +
  scale_y_continuous(labels = scales::percent) +
  ggtitle(label = "Faith Win Probability Model Compared to nflfastR",
          subtitle = "NFL Divisional Playoffs: Green Bay Packers vs San Francisco 49ers") +
  labs (caption = "Data: nflfastR") +
  annotate ("text", x=5, y=.93, label = "Packers nflfastR WP", col = "gray50", 
            fontface = 2) +
  annotate ("text", x=5, y=.98, label = "Packers Faith WP", col = "darkgreen", fontface = 2) +
  annotate ("text", x=5, y = .05, label = "49ers nflfastR WP", col = "black",
            fontface = 2)+
  annotate ("text", x=5, y = .10, label = "49ers Faith WP", col = "#b3995d", fontface = 2)
sf_gb

# Rams-Bucs
la_tb <- ggplot (filter(train_final, game_id == "2021_20_LA_TB", !is.na(down)),
                 aes(game_seconds_remaining/60, wp)) +
  annotation_raster(ramslogo, ymin = .89, ymax = .99, xmin = -52, xmax = -60) +
  annotation_raster(bucslogo, ymin = .01, ymax = .11, xmin = -53, xmax = -60) +
  geom_line (aes(game_seconds_remaining/60, predict_final_home),size = 1.5, col = "#ff7900") +
  geom_line (aes(game_seconds_remaining/60, 1-predict_final_home), size = 1.5, col = "#003594") +
  geom_line (aes(game_seconds_remaining/60, home_wp), size = 1.5, col = "gray50") +
  geom_line (aes(game_seconds_remaining/60, away_wp), size = 1.5, col = "black") +
  geom_vline(xintercept = 45, lty = "dotdash", col = "gray20", size = .75) +
  geom_vline(xintercept = 30, lty = "dotdash", col = "gray20", size = .75) +
  geom_vline(xintercept = 15, lty = "dotdash", col = "gray20", size = .75) +
  geom_hline(yintercept = .5, lty = "solid", col = "gray20", size = .5) +
  geom_hline(yintercept = 0, lty = "solid", col = "gray20", size = .75) +
  geom_hline(yintercept = 1, lty = "solid", col = "gray20", size = .75) +
  scale_x_reverse() + 
  theme_minimal () +
  ylab ("Win Probability") +
  xlab ("Time Remaining (mins)") +
  scale_y_continuous(labels = scales::percent) +
  ggtitle(label = "Faith Win Probability Model Compared to nflfastR",
          subtitle = "NFL Divisional Playoffs: Tampa Bay Buccaneers vs Los Angeles Rams") +
  labs (caption = "Data: nflfastR") +
  annotate ("text", x=22, y=.80, label = "Rams nflfastR WP", col = "black", 
            fontface = 2) +
  annotate ("text", x=22, y=.85, label = "Rams Faith WP", col = "#003594", fontface = 2) +
  annotate ("text", x=22, y = .2, label = "Bucs nflfastR WP", col = "gray50",
            fontface = 2)+
  annotate ("text", x=22, y = .25, label = "Bucs Faith WP", col = "#ff7900", fontface = 2)
la_tb

# Bills-Chiefs (No OT)
buf_kc <- ggplot (filter(train_final, game_id == "2021_20_BUF_KC", !is.na(down)),
        aes(game_seconds_remaining/60, wp)) +
  annotation_raster(billslogo, ymin = .89, ymax = .99, xmin = -52, xmax = -60) +
  annotation_raster(chiefslogo, ymin = .01, ymax = .11, xmin = -52, xmax = -60) +
  geom_line (aes(game_seconds_remaining/60, predict_final_home),size = 1.5, col = "#e31837") +
  geom_line (aes(game_seconds_remaining/60, 1-predict_final_home), size = 1.5, col = "#00338d") +
  geom_line (aes(game_seconds_remaining/60, home_wp), size = 1.5, col = "gray50") +
  geom_line (aes(game_seconds_remaining/60, away_wp), size = 1.5, col = "black") +
  geom_vline(xintercept = 45, lty = "dotdash", col = "gray20", size = .75) +
  geom_vline(xintercept = 30, lty = "dotdash", col = "gray20", size = .75) +
  geom_vline(xintercept = 15, lty = "dotdash", col = "gray20", size = .75) +
  geom_hline(yintercept = .5, lty = "solid", col = "gray20", size = .5) +
  geom_hline(yintercept = 0, lty = "solid", col = "gray20", size = .75) +
  geom_hline(yintercept = 1, lty = "solid", col = "gray20", size = .75) +
  scale_x_reverse() + 
  theme_minimal () +
  ylab ("Win Probability") +
  xlab ("Time Remaining (mins)") +
  scale_y_continuous(labels = scales::percent) +
  ggtitle(label = "Faith Win Probability Model Compared to nflfastR",
          subtitle = "NFL Divisional Playoffs: Kansas City Chiefs vs Buffalo Bills") +
          labs (caption = "Data: nflfastR") +
  annotate ("text", x=5, y=.93, label = "Bills nflfastR WP", col = "black", 
            fontface = 2) +
  annotate ("text", x=5, y=.98, label = "Bills Faith WP", col = "#00338d", fontface = 2) +
  annotate ("text", x=5, y = .05, label = "Chiefs nflfastR WP", col = "gray50",
            fontface = 2)+
  annotate ("text", x=5, y = .10, label = "Chiefs Faith WP", col = "#e31837", fontface = 2)
buf_kc

# Arrange all four WP charts in 2x2 
ggarrange(cin_ten, sf_gb, la_tb, buf_kc, ncol = 2, nrow = 2)

# Bills - Chiefs Last Three Minutes
buf_kc_4th <- ggplot (filter(train_final, game_seconds_remaining < 180, game_id == "2021_20_BUF_KC", !is.na(down)),
                      aes(game_seconds_remaining/60, wp)) +
  annotation_raster(billslogo, ymin = .89, ymax = .99, xmin = -2.4, xmax = -2.8) +
  annotation_raster(chiefslogo, ymin = .01, ymax = .11, xmin = -2.4, xmax = -2.8) +
  geom_line (aes(game_seconds_remaining/60, predict_final_home),size = 1.5, col = "#e31837") +
  geom_line (aes(game_seconds_remaining/60, 1-predict_final_home), size = 1.5, col = "#00338d") +
  geom_line (aes(game_seconds_remaining/60, home_wp), size = 1.5, col = "gray50") +
  geom_line (aes(game_seconds_remaining/60, away_wp), size = 1.5, col = "black") +
  geom_vline(xintercept = 2, lty = "dotdash", col = "gray20", size = .75) +
  geom_vline(xintercept = 0, lty = "dotdash", col = "gray20", size = .75) +
  geom_hline(yintercept = .5, lty = "solid", col = "gray20", size = .5) +
  geom_hline(yintercept = 0, lty = "solid", col = "gray20", size = .75) +
  geom_hline(yintercept = 1, lty = "solid", col = "gray20", size = .75) +
  geom_point(aes(x = 159/60, y =  .58), size = 7, col = "green") +
  geom_point(aes(x = 114/60, y =  .6), size = 7, col = "green") +
  geom_point(aes(x = 62/60, y =  .64), size = 7, col = "green") +
  geom_point(aes(x = 13/60, y =  .6), size = 7, col = "green") +
  geom_point(aes(x = 3/60, y =  .5), size = 7, col = "green") +
  scale_x_reverse() + 
  theme_minimal () +
  ylab ("Win Probability") +
  xlab ("Time Remaining (mins)") +
  scale_y_continuous(labels = scales::percent) +
  ggtitle(label = "Faith Win Probability Model Compared to nflfastR",
          subtitle = "NFL Divisional Playoffs: Kansas City Chiefs vs Buffalo Bills: Last Three Minutes") +
  labs (caption = "Data: nflfastR") +
  annotate ("text", x = 155/60, y = .55, size = 3, label = "2:48, 4th & 4 Bills Conversion") +
  annotate ("text", x = 102/60, y = .57, size = 3, label = "1:54, 4th & 13 Bills TD") +
  annotate ("text", x = 55/60, y = .67, size = 3, label = "1:02, Chiefs 64-yd TD") +
  annotate ("text", x = 7/60, y = .64, size = 3, label = "0:13, Bills TD") +
  annotate ("text", x = -2.5/60, y = .54, size = 3, label = "0:03, Chiefs FG") +
  annotate ("text", x=.5, y=.95, label = "Bills nflfastR WP", col = "black", 
            fontface = 2) +
  annotate ("text", x=.5, y=.98, label = "Bills Faith WP", col = "#00338d", fontface = 2) +
  annotate ("text", x=.5, y = .02, label = "Chiefs nflfastR WP", col = "gray50",
            fontface = 2)+
  annotate ("text", x=.5, y = .05, label = "Chiefs Faith WP", col = "#e31837", fontface = 2)
buf_kc_4th

# RMSE comparing Predict to training Poswins
w_or_l <- ifelse(train_final$poswins == "Yes", 1, 0)
RMSE(predict_final, w_or_l, na.rm = TRUE)
## [1] 0.3988101