# Faith, Brian - MSDS 456 - Assignment #1 Win Probability Model
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggrepel)
library(ggimage)
library(nflfastR)
library(caTools)
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:ggimage':
##
## theme_transparent
library(png)
library(Metrics)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following objects are masked from 'package:Metrics':
##
## precision, recall
## The following object is masked from 'package:purrr':
##
## lift
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(knitr)
options(scipen = 9999)
# Pulling logos
billsfile <- download.file('https://upload.wikimedia.org/wikipedia/en/thumb/7/77/Buffalo_Bills_logo.svg/189px-Buffalo_Bills_logo.svg.png',
destfile = 'bills.png', mode = 'wb')
billslogo <- readPNG('bills.png')
chiefsfile <-download.file('https://upload.wikimedia.org/wikipedia/en/thumb/e/e1/Kansas_City_Chiefs_logo.svg/100px-Kansas_City_Chiefs_logo.svg.png',
destfile = "chiefslogo.png", mode = "wb")
chiefslogo <- readPNG('chiefslogo.png')
sffile <- download.file('https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/San_Francisco_49ers_logo.svg/100px-San_Francisco_49ers_logo.svg.png',
destfile = "sflogo.png", mode = "wb")
sflogo <- readPNG('sflogo.png')
packersfile <- download.file('https://upload.wikimedia.org/wikipedia/commons/thumb/5/50/Green_Bay_Packers_logo.svg/100px-Green_Bay_Packers_logo.svg.png',
destfile= "packerslogo.png", mode = "wb")
packerslogo <- readPNG('packerslogo.png')
ramsfile <- download.file('https://upload.wikimedia.org/wikipedia/en/thumb/8/8a/Los_Angeles_Rams_logo.svg/100px-Los_Angeles_Rams_logo.svg.png',
destfile = "ramslogo.png", mode = "wb")
ramslogo <- readPNG('ramslogo.png')
bucsfile <-download.file('https://upload.wikimedia.org/wikipedia/en/thumb/a/a2/Tampa_Bay_Buccaneers_logo.svg/100px-Tampa_Bay_Buccaneers_logo.svg.png',
destfile = "bucslogo.png", mode = "wb")
bucslogo <- readPNG('bucslogo.png')
bengalsfile <-download.file('https://upload.wikimedia.org/wikipedia/commons/thumb/8/81/Cincinnati_Bengals_logo.svg/100px-Cincinnati_Bengals_logo.svg.png',
destfile = "bengalslogo.png", mode = "wb")
bengalslogo <- readPNG('bengalslogo.png')
titansfile <-download.file ('https://upload.wikimedia.org/wikipedia/en/thumb/c/c1/Tennessee_Titans_logo.svg/100px-Tennessee_Titans_logo.svg.png',
destfile = "titanslogo.png", mode = "wb")
titanslogo <- readPNG('titanslogo.png')
# Load Play-by-Play and add winner and poswins columns
pbp_final21 <- load_pbp(2017:2021)
pbp_final21 <- pbp_final21 %>% mutate (winner = ifelse(home_score > away_score, home_team, away_team))
pbp_final21 <- pbp_final21 %>% mutate (poswins = ifelse (winner == posteam, "Yes", "No"))
length(pbp_final21)
## [1] 374
# Convert Down and Poswins to factors
pbp_final21$down <- as.factor(pbp_final21$down)
pbp_final21$poswins <- as.factor(pbp_final21$poswins)
# Filter needed columns
pbp_finalfilter <- pbp_final21 %>%
filter (qtr <= 4 & poswins != "NA") %>%
select (game_id, home_team, away_team, posteam, winner, down, ydstogo,
game_seconds_remaining/60, yardline_100, score_differential, poswins,
home_wp, away_wp, wp, qtr)
View(pbp_finalfilter)
# Separate and row bind divisional round play-by-play data for reference
pbp_div1 <- pbp_finalfilter[pbp_finalfilter$game_id == "2021_20_BUF_KC", 1:15]
pbp_div2 <- pbp_finalfilter[pbp_finalfilter$game_id == "2021_20_SF_GB", 1:15]
pbp_div3 <- pbp_finalfilter[pbp_finalfilter$game_id == "2021_20_LA_TB", 1:15]
pbp_div4 <- pbp_finalfilter[pbp_finalfilter$game_id == "2021_20_CIN_TEN", 1:15]
pbp_divisional <- rbind(pbp_div1, pbp_div2, pbp_div3, pbp_div4)
View(pbp_div1)
# Split data sets into train and test
set.seed(315)
split = sample.split (pbp_finalfilter$poswins, SplitRatio = 0.8)
train_final <- pbp_finalfilter %>% filter (split == TRUE)
test_final <- pbp_finalfilter %>% filter (split == FALSE)
# Create model
model_final <- glm(poswins ~ down + ydstogo + game_seconds_remaining +
yardline_100 + score_differential, train_final, family = "binomial")
summary(model_final)
##
## Call:
## glm(formula = poswins ~ down + ydstogo + game_seconds_remaining +
## yardline_100 + score_differential, family = "binomial", data = train_final)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.05734 -0.78419 0.07261 0.82360 2.94668
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.982026378 0.024523007 40.045 < 0.0000000000000002
## down2 -0.078154228 0.015483960 -5.047 0.00000044779
## down3 -0.197158061 0.018078145 -10.906 < 0.0000000000000002
## down4 -0.402524219 0.022531385 -17.865 < 0.0000000000000002
## ydstogo -0.009869989 0.001650537 -5.980 0.00000000223
## game_seconds_remaining -0.000013671 0.000005919 -2.310 0.0209
## yardline_100 -0.009277997 0.000267451 -34.690 < 0.0000000000000002
## score_differential 0.191423978 0.001043032 183.526 < 0.0000000000000002
##
## (Intercept) ***
## down2 ***
## down3 ***
## down4 ***
## ydstogo ***
## game_seconds_remaining *
## yardline_100 ***
## score_differential ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 221310 on 159661 degrees of freedom
## Residual deviance: 152142 on 159654 degrees of freedom
## (19112 observations deleted due to missingness)
## AIC: 152158
##
## Number of Fisher Scoring iterations: 5
predict_final <- predict (model_final, train_final, type = "response")
train_final <- cbind (train_final, predict_final)
train_final <- mutate (train_final, predict_final_home =
ifelse(posteam == home_team, predict_final, 1-predict_final))
#Divisional Round Games
# Bengals-Titans
cin_ten <-ggplot (filter(train_final, game_id == "2021_20_CIN_TEN", !is.na(down)),
aes(game_seconds_remaining/60, wp)) +
annotation_raster(bengalslogo, ymin = .89, ymax = .99, xmin = -52, xmax = -60) +
annotation_raster(titanslogo, ymin = .01, ymax = .11, xmin = -52, xmax = -60) +
geom_line (aes(game_seconds_remaining/60, predict_final_home),size = 1.5, col = "#4b92db") +
geom_line (aes(game_seconds_remaining/60, 1-predict_final_home), size = 1.5, col = "#fb4f14") +
geom_line (aes(game_seconds_remaining/60, home_wp), size = 1.5, col = "gray50") +
geom_line (aes(game_seconds_remaining/60, away_wp), size = 1.5, col = "black") +
geom_vline(xintercept = 45, lty = "dotdash", col = "gray20", size = .75) +
geom_vline(xintercept = 30, lty = "dotdash", col = "gray20", size = .75) +
geom_vline(xintercept = 15, lty = "dotdash", col = "gray20", size = .75) +
geom_hline(yintercept = .5, lty = "solid", col = "gray20", size = .5) +
geom_hline(yintercept = 0, lty = "solid", col = "gray20", size = .75) +
geom_hline(yintercept = 1, lty = "solid", col = "gray20", size = .75) +
scale_x_reverse() +
theme_minimal () +
ylab ("Win Probability") +
xlab ("Time Remaining (mins)") +
scale_y_continuous(labels = scales::percent) +
ggtitle(label = "Faith Win Probability Model Compared to nflfastR",
subtitle = "NFL Divisional Playoffs: Tennessee Titans vs Cincinnati Bengals") +
labs (caption = "Data: nflfastR") +
annotate ("text", x=5, y=.93, label = "Bengals nflfastR WP", col = "black",
fontface = 2) +
annotate ("text", x=5, y=.98, label = "Bengals Faith WP", col = "#fb4f14", fontface = 2) +
annotate ("text", x=5, y = .05, label = "Titans nflfastR WP", col = "gray50",
fontface = 2)+
annotate ("text", x=5, y = .1, label = "Titans Faith WP", col = "#4b92db", fontface = 2)
cin_ten

# Niners-Packers
sf_gb <- ggplot (filter(train_final, game_id == "2021_20_SF_GB", !is.na(down)),
aes(game_seconds_remaining/60, wp)) +
annotation_raster(packerslogo, ymin = .89, ymax = .99, xmin = -52, xmax = -60) +
annotation_raster(sflogo, ymin = .01, ymax = .11, xmin = -52, xmax = -60) +
geom_line (aes(game_seconds_remaining/60, predict_final_home),size = 1.5, col = "darkgreen") +
geom_line (aes(game_seconds_remaining/60, 1-predict_final_home), size = 1.5, col = "#b3995d") +
geom_line (aes(game_seconds_remaining/60, home_wp), size = 1.5, col = "gray50") +
geom_line (aes(game_seconds_remaining/60, away_wp), size = 1.5, col = "black") +
geom_vline(xintercept = 45, lty = "dotdash", col = "gray20", size = .75) +
geom_vline(xintercept = 30, lty = "dotdash", col = "gray20", size = .75) +
geom_vline(xintercept = 15, lty = "dotdash", col = "gray20", size = .75) +
geom_hline(yintercept = .5, lty = "solid", col = "gray20", size = .5) +
geom_hline(yintercept = 0, lty = "solid", col = "gray20", size = .75) +
geom_hline(yintercept = 1, lty = "solid", col = "gray20", size = .75) +
scale_x_reverse() +
theme_minimal () +
ylab ("Win Probability") +
xlab ("Time Remaining (mins)") +
scale_y_continuous(labels = scales::percent) +
ggtitle(label = "Faith Win Probability Model Compared to nflfastR",
subtitle = "NFL Divisional Playoffs: Green Bay Packers vs San Francisco 49ers") +
labs (caption = "Data: nflfastR") +
annotate ("text", x=5, y=.93, label = "Packers nflfastR WP", col = "gray50",
fontface = 2) +
annotate ("text", x=5, y=.98, label = "Packers Faith WP", col = "darkgreen", fontface = 2) +
annotate ("text", x=5, y = .05, label = "49ers nflfastR WP", col = "black",
fontface = 2)+
annotate ("text", x=5, y = .10, label = "49ers Faith WP", col = "#b3995d", fontface = 2)
sf_gb

# Rams-Bucs
la_tb <- ggplot (filter(train_final, game_id == "2021_20_LA_TB", !is.na(down)),
aes(game_seconds_remaining/60, wp)) +
annotation_raster(ramslogo, ymin = .89, ymax = .99, xmin = -52, xmax = -60) +
annotation_raster(bucslogo, ymin = .01, ymax = .11, xmin = -53, xmax = -60) +
geom_line (aes(game_seconds_remaining/60, predict_final_home),size = 1.5, col = "#ff7900") +
geom_line (aes(game_seconds_remaining/60, 1-predict_final_home), size = 1.5, col = "#003594") +
geom_line (aes(game_seconds_remaining/60, home_wp), size = 1.5, col = "gray50") +
geom_line (aes(game_seconds_remaining/60, away_wp), size = 1.5, col = "black") +
geom_vline(xintercept = 45, lty = "dotdash", col = "gray20", size = .75) +
geom_vline(xintercept = 30, lty = "dotdash", col = "gray20", size = .75) +
geom_vline(xintercept = 15, lty = "dotdash", col = "gray20", size = .75) +
geom_hline(yintercept = .5, lty = "solid", col = "gray20", size = .5) +
geom_hline(yintercept = 0, lty = "solid", col = "gray20", size = .75) +
geom_hline(yintercept = 1, lty = "solid", col = "gray20", size = .75) +
scale_x_reverse() +
theme_minimal () +
ylab ("Win Probability") +
xlab ("Time Remaining (mins)") +
scale_y_continuous(labels = scales::percent) +
ggtitle(label = "Faith Win Probability Model Compared to nflfastR",
subtitle = "NFL Divisional Playoffs: Tampa Bay Buccaneers vs Los Angeles Rams") +
labs (caption = "Data: nflfastR") +
annotate ("text", x=22, y=.80, label = "Rams nflfastR WP", col = "black",
fontface = 2) +
annotate ("text", x=22, y=.85, label = "Rams Faith WP", col = "#003594", fontface = 2) +
annotate ("text", x=22, y = .2, label = "Bucs nflfastR WP", col = "gray50",
fontface = 2)+
annotate ("text", x=22, y = .25, label = "Bucs Faith WP", col = "#ff7900", fontface = 2)
la_tb

# Bills-Chiefs (No OT)
buf_kc <- ggplot (filter(train_final, game_id == "2021_20_BUF_KC", !is.na(down)),
aes(game_seconds_remaining/60, wp)) +
annotation_raster(billslogo, ymin = .89, ymax = .99, xmin = -52, xmax = -60) +
annotation_raster(chiefslogo, ymin = .01, ymax = .11, xmin = -52, xmax = -60) +
geom_line (aes(game_seconds_remaining/60, predict_final_home),size = 1.5, col = "#e31837") +
geom_line (aes(game_seconds_remaining/60, 1-predict_final_home), size = 1.5, col = "#00338d") +
geom_line (aes(game_seconds_remaining/60, home_wp), size = 1.5, col = "gray50") +
geom_line (aes(game_seconds_remaining/60, away_wp), size = 1.5, col = "black") +
geom_vline(xintercept = 45, lty = "dotdash", col = "gray20", size = .75) +
geom_vline(xintercept = 30, lty = "dotdash", col = "gray20", size = .75) +
geom_vline(xintercept = 15, lty = "dotdash", col = "gray20", size = .75) +
geom_hline(yintercept = .5, lty = "solid", col = "gray20", size = .5) +
geom_hline(yintercept = 0, lty = "solid", col = "gray20", size = .75) +
geom_hline(yintercept = 1, lty = "solid", col = "gray20", size = .75) +
scale_x_reverse() +
theme_minimal () +
ylab ("Win Probability") +
xlab ("Time Remaining (mins)") +
scale_y_continuous(labels = scales::percent) +
ggtitle(label = "Faith Win Probability Model Compared to nflfastR",
subtitle = "NFL Divisional Playoffs: Kansas City Chiefs vs Buffalo Bills") +
labs (caption = "Data: nflfastR") +
annotate ("text", x=5, y=.93, label = "Bills nflfastR WP", col = "black",
fontface = 2) +
annotate ("text", x=5, y=.98, label = "Bills Faith WP", col = "#00338d", fontface = 2) +
annotate ("text", x=5, y = .05, label = "Chiefs nflfastR WP", col = "gray50",
fontface = 2)+
annotate ("text", x=5, y = .10, label = "Chiefs Faith WP", col = "#e31837", fontface = 2)
buf_kc

# Arrange all four WP charts in 2x2
ggarrange(cin_ten, sf_gb, la_tb, buf_kc, ncol = 2, nrow = 2)

# Bills - Chiefs Last Three Minutes
buf_kc_4th <- ggplot (filter(train_final, game_seconds_remaining < 180, game_id == "2021_20_BUF_KC", !is.na(down)),
aes(game_seconds_remaining/60, wp)) +
annotation_raster(billslogo, ymin = .89, ymax = .99, xmin = -2.4, xmax = -2.8) +
annotation_raster(chiefslogo, ymin = .01, ymax = .11, xmin = -2.4, xmax = -2.8) +
geom_line (aes(game_seconds_remaining/60, predict_final_home),size = 1.5, col = "#e31837") +
geom_line (aes(game_seconds_remaining/60, 1-predict_final_home), size = 1.5, col = "#00338d") +
geom_line (aes(game_seconds_remaining/60, home_wp), size = 1.5, col = "gray50") +
geom_line (aes(game_seconds_remaining/60, away_wp), size = 1.5, col = "black") +
geom_vline(xintercept = 2, lty = "dotdash", col = "gray20", size = .75) +
geom_vline(xintercept = 0, lty = "dotdash", col = "gray20", size = .75) +
geom_hline(yintercept = .5, lty = "solid", col = "gray20", size = .5) +
geom_hline(yintercept = 0, lty = "solid", col = "gray20", size = .75) +
geom_hline(yintercept = 1, lty = "solid", col = "gray20", size = .75) +
geom_point(aes(x = 159/60, y = .58), size = 7, col = "green") +
geom_point(aes(x = 114/60, y = .6), size = 7, col = "green") +
geom_point(aes(x = 62/60, y = .64), size = 7, col = "green") +
geom_point(aes(x = 13/60, y = .6), size = 7, col = "green") +
geom_point(aes(x = 3/60, y = .5), size = 7, col = "green") +
scale_x_reverse() +
theme_minimal () +
ylab ("Win Probability") +
xlab ("Time Remaining (mins)") +
scale_y_continuous(labels = scales::percent) +
ggtitle(label = "Faith Win Probability Model Compared to nflfastR",
subtitle = "NFL Divisional Playoffs: Kansas City Chiefs vs Buffalo Bills: Last Three Minutes") +
labs (caption = "Data: nflfastR") +
annotate ("text", x = 155/60, y = .55, size = 3, label = "2:48, 4th & 4 Bills Conversion") +
annotate ("text", x = 102/60, y = .57, size = 3, label = "1:54, 4th & 13 Bills TD") +
annotate ("text", x = 55/60, y = .67, size = 3, label = "1:02, Chiefs 64-yd TD") +
annotate ("text", x = 7/60, y = .64, size = 3, label = "0:13, Bills TD") +
annotate ("text", x = -2.5/60, y = .54, size = 3, label = "0:03, Chiefs FG") +
annotate ("text", x=.5, y=.95, label = "Bills nflfastR WP", col = "black",
fontface = 2) +
annotate ("text", x=.5, y=.98, label = "Bills Faith WP", col = "#00338d", fontface = 2) +
annotate ("text", x=.5, y = .02, label = "Chiefs nflfastR WP", col = "gray50",
fontface = 2)+
annotate ("text", x=.5, y = .05, label = "Chiefs Faith WP", col = "#e31837", fontface = 2)
buf_kc_4th

# RMSE comparing Predict to training Poswins
w_or_l <- ifelse(train_final$poswins == "Yes", 1, 0)
RMSE(predict_final, w_or_l, na.rm = TRUE)
## [1] 0.3988101