library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.0.3
library(ggimage)
library(nflfastR)
## Warning: package 'nflfastR' was built under R version 4.0.3
options(scipen = 9999)
create a dataframe that only contains run plays and pass plays.
pbp_rp <- data %>%
filter(rush == 1 | pass == 1, !is.na(epa))
In the above, !is.na(epa) means to exclude plays with missing (na) EPA
EPA/play on offense and defense
This turned our two-lines-per-team dataframe into one, with the 0 column being pass == 0 (run plays) and the 1 column pass == 1.
pbp_rp %>%
group_by(posteam, season, pass) %>%
summarize(epa = mean(epa)) %>%
pivot_wider(names_from = pass, values_from = epa) %>%
head(4)
## `summarise()` regrouping output by 'posteam', 'season' (override with `.groups` argument)
## # A tibble: 4 x 4
## # Groups: posteam, season [4]
## posteam season `0` `1`
## <chr> <int> <dbl> <dbl>
## 1 ARI 2020 -0.00107 0.111
## 2 ATL 2020 -0.146 0.138
## 3 BAL 2020 0.0739 0.111
## 4 BUF 2020 -0.0819 0.290
#removing the first 4 wks of the season
#pbp_rp <- pbp_rp %>%
#filter(game_date > "2020-11-18")
Now let’s rename to something more sensible and save:
offense <- pbp_rp %>%
group_by(posteam, season, pass) %>%
summarize(epa = mean(epa)) %>%
pivot_wider(names_from = pass, values_from = epa) %>%
rename(off_pass_epa = `1`, off_rush_epa = `0`)
## `summarise()` regrouping output by 'posteam', 'season' (override with `.groups` argument)
offense %>%
arrange(-off_rush_epa) %>%
head(5)
## # A tibble: 5 x 4
## # Groups: posteam, season [5]
## posteam season off_rush_epa off_pass_epa
## <chr> <int> <dbl> <dbl>
## 1 BAL 2020 0.0739 0.111
## 2 TEN 2020 0.0617 0.291
## 3 NE 2020 0.0435 -0.0595
## 4 NO 2020 0.0328 0.137
## 5 GB 2020 0.0275 0.335
defense <- pbp_rp %>%
group_by(defteam, season, pass) %>%
summarize(epa = mean(epa)) %>%
pivot_wider(names_from = pass, values_from = epa) %>%
rename(def_pass_epa = `1`, def_rush_epa = `0`)
## `summarise()` regrouping output by 'defteam', 'season' (override with `.groups` argument)
defense %>%
arrange(def_rush_epa) %>%
head(5)
## # A tibble: 5 x 4
## # Groups: defteam, season [5]
## defteam season def_rush_epa def_pass_epa
## <chr> <int> <dbl> <dbl>
## 1 TB 2020 -0.213 0.0297
## 2 LA 2020 -0.181 -0.127
## 3 ATL 2020 -0.165 0.156
## 4 NO 2020 -0.156 0.0537
## 5 SF 2020 -0.149 0.0597
colnames(offense)[1] <- "team"
offense
## # A tibble: 32 x 4
## # Groups: team, season [32]
## team season off_rush_epa off_pass_epa
## <chr> <int> <dbl> <dbl>
## 1 ARI 2020 -0.00107 0.111
## 2 ATL 2020 -0.146 0.138
## 3 BAL 2020 0.0739 0.111
## 4 BUF 2020 -0.0819 0.290
## 5 CAR 2020 -0.0841 0.120
## 6 CHI 2020 -0.0702 0.0279
## 7 CIN 2020 -0.132 0.0411
## 8 CLE 2020 -0.0427 0.193
## 9 DAL 2020 -0.0716 0.00689
## 10 DEN 2020 -0.0941 -0.0707
## # ... with 22 more rows
colnames(defense)[1] <- "team"
defense
## # A tibble: 32 x 4
## # Groups: team, season [32]
## team season def_rush_epa def_pass_epa
## <chr> <int> <dbl> <dbl>
## 1 ARI 2020 -0.0606 0.0933
## 2 ATL 2020 -0.165 0.156
## 3 BAL 2020 -0.137 0.0626
## 4 BUF 2020 -0.0187 0.0891
## 5 CAR 2020 -0.0339 0.145
## 6 CHI 2020 -0.129 0.0734
## 7 CIN 2020 -0.0473 0.157
## 8 CLE 2020 -0.0233 0.142
## 9 DAL 2020 0.0156 0.124
## 10 DEN 2020 -0.0269 0.0856
## # ... with 22 more rows
#team colors & logos come with NFLFastr
head(teams_colors_logos)
## # A tibble: 6 x 11
## team_abbr team_name team_id team_nick team_color team_color2 team_color3
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ARI Arizona ~ 3800 Cardinals #97233f #000000 #ffb612
## 2 ATL Atlanta ~ 0200 Falcons #a71930 #000000 #a5acaf
## 3 BAL Baltimor~ 0325 Ravens #241773 #000000 #9e7c0c
## 4 BUF Buffalo ~ 0610 Bills #00338d #c60c30 #0c2e82
## 5 CAR Carolina~ 0750 Panthers #0085ca #000000 #bfc0bf
## 6 CHI Chicago ~ 0810 Bears #0b162a #c83803 #0b162a
## # ... with 4 more variables: team_color4 <chr>, team_logo_wikipedia <chr>,
## # team_logo_espn <chr>, team_wordmark <glue>
#import schedules
schedule <- fast_scraper_schedules(2020, pp=FALSE)
schedule
## # A tibble: 256 x 17
## game_id season game_type week gameday weekday gametime away_team home_team
## <chr> <int> <chr> <int> <chr> <chr> <chr> <chr> <chr>
## 1 2020_0~ 2020 REG 1 2020-0~ Thursd~ 20:20 HOU KC
## 2 2020_0~ 2020 REG 1 2020-0~ Sunday 13:00 SEA ATL
## 3 2020_0~ 2020 REG 1 2020-0~ Sunday 13:00 CLE BAL
## 4 2020_0~ 2020 REG 1 2020-0~ Sunday 13:00 NYJ BUF
## 5 2020_0~ 2020 REG 1 2020-0~ Sunday 13:00 LV CAR
## 6 2020_0~ 2020 REG 1 2020-0~ Sunday 13:00 CHI DET
## 7 2020_0~ 2020 REG 1 2020-0~ Sunday 13:00 IND JAX
## 8 2020_0~ 2020 REG 1 2020-0~ Sunday 13:00 GB MIN
## 9 2020_0~ 2020 REG 1 2020-0~ Sunday 13:00 MIA NE
## 10 2020_0~ 2020 REG 1 2020-0~ Sunday 13:00 PHI WAS
## # ... with 246 more rows, and 8 more variables: away_score <int>,
## # home_score <int>, home_result <int>, stadium <chr>, location <chr>,
## # roof <chr>, surface <chr>, old_game_id <chr>
#use this code to change the week
schedulewk <- schedule %>%
filter(week == 17) %>%
select("game_id", "week", "weekday","gametime","away_team","home_team",
"old_game_id") %>%
filter(weekday == "Sunday")
schedulewk1 <- schedulewk
#change home & away to Team and Opponent
colnames(schedulewk1)[5] <- "Team"
colnames(schedulewk1)[6] <- "Opponent"
#change away & home to Opponent and Team
schedulewk2 <- schedulewk
colnames(schedulewk2)[5] <- "Opponent"
colnames(schedulewk2)[6] <- "Team"
#reorder the columns in the away schedule to match the home schedule
schedulewk2 <- schedulewk2[, c(1,2,3,4,6,5,7)]
#bind the rows to get Team & Opponent layout
schedulewk3 <- schedulewk2 %>%
bind_rows(schedulewk1)
#join team to offsense
teamoffense <- schedulewk3 %>%
left_join(offense, by = c("Team" = "team"))
#rename columns
colnames(teamoffense)[9] <- "Team_Rush_Epa"
colnames(teamoffense)[10] <- "Team_Pass_Epa"
#join opponent to defense
teamdefense <- teamoffense %>%
left_join(defense, by = c("Opponent" = "team"))
#rename columns
colnames(teamdefense)[12] <- "Opponent_Def_Rush_Epa"
colnames(teamdefense)[13] <- "Opponent_Def_Pass_Epa"
#remove the extra 2020 column
teams <- teamdefense %>%
select(-"season.y")
#need to import team logos/colors and join them to teamdefense df
logos <- teams_colors_logos
#join teams and logos - only need to join on team
teamslogo <- teams %>%
left_join(logos, by = c("Team" = "team_abbr"))
#filter teams logo to get 1 or 4pm games
teamslogo <- teamslogo %>%
filter(gametime <= '16:00')
#create text aesthetics for passing
bound.label <- 0.3
df.text <- data.frame(lab.text = c("Good Offense v Good Defense", "Smash Spot", "Bad Offense v Good Defense", "Bad Offense v Bad Defense"),
x = c(bound.label, bound.label, -0.033*bound.label, -0.033*bound.label),
y = c(-0.25*bound.label, bound.label, -0.25*bound.label, bound.label))
#epa passing vs def
teamslogo %>%
ggplot(aes(x = Team_Pass_Epa, y = Opponent_Def_Pass_Epa)) +
#add points for the QBs with the logos
geom_image(aes(image = team_logo_espn), asp = 16 / 9) +
#titles and caption
labs(x = "Team EPA Passing",
y = "Oppenent Def EPA Passing",
title = "Passing",
caption = "Data: @nflfastR") +
#uses the black and white ggplot theme
theme_bw() +
geom_hline(aes(yintercept = 0.1), lty = 2, col = "red", alpha = 0.5) +
geom_vline(aes(xintercept = 0.1), lty = 2, col = "red", alpha = 0.5) +
geom_text(data = df.text, aes(x, y, label = lab.text), colour = "blue") +
#center title with hjust = 0.5
theme(
plot.title = element_text(size = 14, hjust = 0.5, face = "bold")
)
#create text aesthetics for rushing
bound.labelr <- 0.05
df.textr <- data.frame(lab.text = c("Good Offense v Good Defense", "Smash Spot", "Bad Offense v Good Defense", "Bad Offense v Bad Defense"),
x = c(bound.labelr, bound.labelr, -2.25*bound.labelr, -2.25*bound.labelr),
y = c(-2.5*bound.labelr, 1.5*bound.labelr, -2.5*bound.labelr, 1.5*bound.labelr))
#epa rushing vs def
teamslogo %>%
ggplot(aes(x = Team_Rush_Epa, y = Opponent_Def_Rush_Epa)) +
#add points for the QBs with the logos
geom_image(aes(image = team_logo_espn), asp = 16 / 9) +
#titles and caption
labs(x = "Team EPA Rushing",
y = "Oppenent Def EPA Rushing",
title = "Rushing",
caption = "Data: @nflfastR") +
#uses the black and white ggplot theme
theme_bw() +
geom_hline(aes(yintercept = -0.05), lty = 2, col = "red", alpha = 0.5) +
geom_vline(aes(xintercept = -0.05), lty = 2, col = "red", alpha = 0.5) +
geom_text(data = df.textr, aes(x, y, label = lab.text), colour = "blue") +
#center title with hjust = 0.5
theme(
plot.title = element_text(size = 14, hjust = 0.5, face = "bold")
)