library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.0.3
library(ggimage)
library(nflfastR)
## Warning: package 'nflfastR' was built under R version 4.0.3
options(scipen = 9999)

create a dataframe that only contains run plays and pass plays.

pbp_rp <- data %>%
  filter(rush == 1 | pass == 1, !is.na(epa))

In the above, !is.na(epa) means to exclude plays with missing (na) EPA


EPA/play on offense and defense

This turned our two-lines-per-team dataframe into one, with the 0 column being pass == 0 (run plays) and the 1 column pass == 1.

pbp_rp %>%
  group_by(posteam, season, pass) %>% 
  summarize(epa = mean(epa)) %>%
  pivot_wider(names_from = pass, values_from = epa) %>%
  head(4)
## `summarise()` regrouping output by 'posteam', 'season' (override with `.groups` argument)
## # A tibble: 4 x 4
## # Groups:   posteam, season [4]
##   posteam season      `0`   `1`
##   <chr>    <int>    <dbl> <dbl>
## 1 ARI       2020 -0.00107 0.111
## 2 ATL       2020 -0.146   0.138
## 3 BAL       2020  0.0739  0.111
## 4 BUF       2020 -0.0819  0.290
#removing the first 4 wks of the season
#pbp_rp <- pbp_rp %>%
 #filter(game_date > "2020-12-02")

Now let’s rename to something more sensible and save:

offense <- pbp_rp %>%
  group_by(posteam, season, pass) %>% 
  summarize(epa = mean(epa)) %>%
  pivot_wider(names_from = pass, values_from = epa) %>%
  rename(off_pass_epa = `1`, off_rush_epa = `0`)
## `summarise()` regrouping output by 'posteam', 'season' (override with `.groups` argument)
offense %>%
  arrange(-off_rush_epa) %>%
  head(5)
## # A tibble: 5 x 4
## # Groups:   posteam, season [5]
##   posteam season off_rush_epa off_pass_epa
##   <chr>    <int>        <dbl>        <dbl>
## 1 BAL       2020       0.0739       0.111 
## 2 TEN       2020       0.0617       0.291 
## 3 NE        2020       0.0435      -0.0595
## 4 NO        2020       0.0328       0.137 
## 5 GB        2020       0.0275       0.335
defense <- pbp_rp %>%
  group_by(defteam, season, pass) %>% 
  summarize(epa = mean(epa)) %>%
  pivot_wider(names_from = pass, values_from = epa) %>%
  rename(def_pass_epa = `1`, def_rush_epa = `0`)
## `summarise()` regrouping output by 'defteam', 'season' (override with `.groups` argument)
defense %>%
  arrange(def_rush_epa) %>%
  head(5)
## # A tibble: 5 x 4
## # Groups:   defteam, season [5]
##   defteam season def_rush_epa def_pass_epa
##   <chr>    <int>        <dbl>        <dbl>
## 1 TB        2020       -0.213       0.0297
## 2 LA        2020       -0.181      -0.127 
## 3 ATL       2020       -0.165       0.156 
## 4 NO        2020       -0.156       0.0537
## 5 SF        2020       -0.149       0.0597
colnames(offense)[1] <- "team"
offense
## # A tibble: 32 x 4
## # Groups:   team, season [32]
##    team  season off_rush_epa off_pass_epa
##    <chr>  <int>        <dbl>        <dbl>
##  1 ARI     2020     -0.00107      0.111  
##  2 ATL     2020     -0.146        0.138  
##  3 BAL     2020      0.0739       0.111  
##  4 BUF     2020     -0.0819       0.290  
##  5 CAR     2020     -0.0841       0.120  
##  6 CHI     2020     -0.0702       0.0279 
##  7 CIN     2020     -0.132        0.0411 
##  8 CLE     2020     -0.0427       0.193  
##  9 DAL     2020     -0.0716       0.00689
## 10 DEN     2020     -0.0941      -0.0707 
## # ... with 22 more rows
colnames(defense)[1] <- "team"
defense
## # A tibble: 32 x 4
## # Groups:   team, season [32]
##    team  season def_rush_epa def_pass_epa
##    <chr>  <int>        <dbl>        <dbl>
##  1 ARI     2020      -0.0606       0.0933
##  2 ATL     2020      -0.165        0.156 
##  3 BAL     2020      -0.137        0.0626
##  4 BUF     2020      -0.0187       0.0891
##  5 CAR     2020      -0.0339       0.145 
##  6 CHI     2020      -0.129        0.0734
##  7 CIN     2020      -0.0473       0.157 
##  8 CLE     2020      -0.0233       0.142 
##  9 DAL     2020       0.0156       0.124 
## 10 DEN     2020      -0.0269       0.0856
## # ... with 22 more rows
#team colors & logos come with NFLFastr
head(teams_colors_logos)
## # A tibble: 6 x 11
##   team_abbr team_name team_id team_nick team_color team_color2 team_color3
##   <chr>     <chr>     <chr>   <chr>     <chr>      <chr>       <chr>      
## 1 ARI       Arizona ~ 3800    Cardinals #97233f    #000000     #ffb612    
## 2 ATL       Atlanta ~ 0200    Falcons   #a71930    #000000     #a5acaf    
## 3 BAL       Baltimor~ 0325    Ravens    #241773    #000000     #9e7c0c    
## 4 BUF       Buffalo ~ 0610    Bills     #00338d    #c60c30     #0c2e82    
## 5 CAR       Carolina~ 0750    Panthers  #0085ca    #000000     #bfc0bf    
## 6 CHI       Chicago ~ 0810    Bears     #0b162a    #c83803     #0b162a    
## # ... with 4 more variables: team_color4 <chr>, team_logo_wikipedia <chr>,
## #   team_logo_espn <chr>, team_wordmark <glue>
#import schedules
schedule <- fast_scraper_schedules(2020, pp=FALSE)
schedule
## # A tibble: 256 x 17
##    game_id season game_type  week gameday weekday gametime away_team home_team
##    <chr>    <int> <chr>     <int> <chr>   <chr>   <chr>    <chr>     <chr>    
##  1 2020_0~   2020 REG           1 2020-0~ Thursd~ 20:20    HOU       KC       
##  2 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    SEA       ATL      
##  3 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    CLE       BAL      
##  4 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    NYJ       BUF      
##  5 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    LV        CAR      
##  6 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    CHI       DET      
##  7 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    IND       JAX      
##  8 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    GB        MIN      
##  9 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    MIA       NE       
## 10 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    PHI       WAS      
## # ... with 246 more rows, and 8 more variables: away_score <int>,
## #   home_score <int>, home_result <int>, stadium <chr>, location <chr>,
## #   roof <chr>, surface <chr>, old_game_id <chr>
#use this code to change the week
schedulewk <- schedule %>%
  filter(week == 17) %>%
  select("game_id", "week", "weekday","gametime","away_team","home_team",
         "old_game_id") %>%
  filter(weekday == "Sunday")
schedulewk1 <- schedulewk 
#change home & away to Team and Opponent
colnames(schedulewk1)[5] <- "Team"
colnames(schedulewk1)[6] <- "Opponent"
#change away & home to Opponent and Team
schedulewk2 <- schedulewk
colnames(schedulewk2)[5] <- "Opponent"
colnames(schedulewk2)[6] <- "Team"
#reorder the columns in the away schedule to match the home schedule
schedulewk2 <- schedulewk2[, c(1,2,3,4,6,5,7)]
#bind the rows to get Team & Opponent layout
schedulewk3 <- schedulewk2 %>%
  bind_rows(schedulewk1)
#join team to offsense 
teamoffense <- schedulewk3 %>%
  left_join(offense, by = c("Team" = "team"))
#rename columns
colnames(teamoffense)[9] <- "Team_Rush_Epa"
colnames(teamoffense)[10] <- "Team_Pass_Epa"
#join opponent to defense
teamdefense <- teamoffense %>%
  left_join(defense, by = c("Opponent" = "team")) 
#rename columns
colnames(teamdefense)[12] <- "Opponent_Def_Rush_Epa"
colnames(teamdefense)[13] <- "Opponent_Def_Pass_Epa"
#remove the extra 2020 column 
teams <- teamdefense %>%
  select(-"season.y")
#need to import team logos/colors and join them to teamdefense df
logos <- teams_colors_logos
#join teams and logos - only need to join on team
teamslogo <- teams %>%
  left_join(logos, by = c("Team" = "team_abbr"))
#filter teams logo to get 1 or 4pm games
teamslogo <- teamslogo %>%
 filter(gametime >= '16:00' & gametime <= "20:10")
#create text aesthetics for passing
bound.label <- 0.3
df.text <- data.frame(lab.text = c("Good Offense v Good Defense", "Smash Spot", "Bad Offense v Good Defense", "Bad Offense v Bad Defense"), 
                      x = c(bound.label, bound.label, -0.033*bound.label, -0.033*bound.label), 
                      y = c(-0.25*bound.label, bound.label, -0.25*bound.label, bound.label))
#epa passing vs def
teamslogo %>%
  ggplot(aes(x = Team_Pass_Epa, y = Opponent_Def_Pass_Epa)) +
  #add points for the QBs with the logos
  geom_image(aes(image = team_logo_espn), asp = 16 / 9) +
  #titles and caption
  labs(x = "Team EPA Passing",
       y = "Oppenent Def EPA Passing",
       title = "Passing",
       caption = "Data: @nflfastR") +
  #uses the black and white ggplot theme
  theme_bw() +
  geom_hline(aes(yintercept = 0.1), lty = 2, col = "red", alpha = 0.5) + 
  geom_vline(aes(xintercept = 0.1), lty = 2, col = "red", alpha = 0.5) +
  geom_text(data = df.text, aes(x, y, label = lab.text), colour = "blue") +
  #center title with hjust = 0.5
  theme(
    plot.title = element_text(size = 14, hjust = 0.5, face = "bold")
  ) 

#create text aesthetics for rushing
bound.labelr <- 0.05
df.textr <- data.frame(lab.text = c("Good Offense v Good Defense", "Smash Spot", "Bad Offense v Good Defense", "Bad Offense v Bad Defense"), 
                      x = c(bound.labelr, bound.labelr, -2.25*bound.labelr, -2.25*bound.labelr), 
                      y = c(-2.5*bound.labelr, 1.5*bound.labelr, -2.5*bound.labelr, 1.5*bound.labelr))
#epa rushing vs def
teamslogo %>%
  ggplot(aes(x = Team_Rush_Epa, y = Opponent_Def_Rush_Epa)) +
  #add points for the QBs with the logos
  geom_image(aes(image = team_logo_espn), asp = 16 / 9) +
  #titles and caption
  labs(x = "Team EPA Rushing",
       y = "Oppenent Def EPA Rushing",
       title = "Rushing",
       caption = "Data: @nflfastR") +
  #uses the black and white ggplot theme
  theme_bw() +
  geom_hline(aes(yintercept = -0.05), lty = 2, col = "red", alpha = 0.5) + 
  geom_vline(aes(xintercept = -0.05), lty = 2, col = "red", alpha = 0.5) +
  geom_text(data = df.textr, aes(x, y, label = lab.text), colour = "blue") +
  #center title with hjust = 0.5
  theme(
    plot.title = element_text(size = 14, hjust = 0.5, face = "bold") 
  )