library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggrepel)
library(ggimage)
library(nflfastR)
## Warning: package 'nflfastR' was built under R version 4.0.3
options(scipen = 9999)

create a dataframe that only contains run plays and pass plays.

pbp_rp <- data %>%
  filter(rush == 1 | pass == 1, !is.na(epa))

In the above, !is.na(epa) means to exclude plays with missing (na) EPA


EPA/play on offense and defense

This turned our two-lines-per-team dataframe into one, with the 0 column being pass == 0 (run plays) and the 1 column pass == 1.

pbp_rp %>%
  group_by(posteam, season, pass) %>% 
  summarize(epa = mean(epa)) %>%
  pivot_wider(names_from = pass, values_from = epa) %>%
  head(4)
## `summarise()` regrouping output by 'posteam', 'season' (override with `.groups` argument)
## # A tibble: 4 x 4
## # Groups:   posteam, season [4]
##   posteam season     `0`    `1`
##   <chr>    <int>   <dbl>  <dbl>
## 1 ARI       2020  0.0352 0.190 
## 2 ATL       2020 -0.143  0.182 
## 3 BAL       2020  0.0146 0.0718
## 4 BUF       2020 -0.0673 0.242
#removing the first 4 wks of the season
#pbp_rp <- pbp_rp %>%
  #filter(game_date > "2020-10-24")

Now let’s rename to something more sensible and save:

offense <- pbp_rp %>%
  group_by(posteam, season, pass) %>% 
  summarize(epa = mean(epa)) %>%
  pivot_wider(names_from = pass, values_from = epa) %>%
  rename(off_pass_epa = `1`, off_rush_epa = `0`)
## `summarise()` regrouping output by 'posteam', 'season' (override with `.groups` argument)
offense %>%
  arrange(-off_rush_epa) %>%
  head(5)
## # A tibble: 5 x 4
## # Groups:   posteam, season [5]
##   posteam season off_rush_epa off_pass_epa
##   <chr>    <int>        <dbl>        <dbl>
## 1 NE        2020      0.0658        0.0333
## 2 ARI       2020      0.0352        0.190 
## 3 TEN       2020      0.0193        0.285 
## 4 BAL       2020      0.0146        0.0718
## 5 LV        2020      0.00442       0.259
defense <- pbp_rp %>%
  group_by(defteam, season, pass) %>% 
  summarize(epa = mean(epa)) %>%
  pivot_wider(names_from = pass, values_from = epa) %>%
  rename(def_pass_epa = `1`, def_rush_epa = `0`)
## `summarise()` regrouping output by 'defteam', 'season' (override with `.groups` argument)
defense %>%
  arrange(def_rush_epa) %>%
  head(5)
## # A tibble: 5 x 4
## # Groups:   defteam, season [5]
##   defteam season def_rush_epa def_pass_epa
##   <chr>    <int>        <dbl>        <dbl>
## 1 TB        2020       -0.233      0.00739
## 2 SF        2020       -0.174      0.180  
## 3 NO        2020       -0.171      0.0796 
## 4 ATL       2020       -0.169      0.237  
## 5 BAL       2020       -0.156      0.0341
colnames(offense)[1] <- "team"
offense
## # A tibble: 32 x 4
## # Groups:   team, season [32]
##    team  season off_rush_epa off_pass_epa
##    <chr>  <int>        <dbl>        <dbl>
##  1 ARI     2020       0.0352       0.190 
##  2 ATL     2020      -0.143        0.182 
##  3 BAL     2020       0.0146       0.0718
##  4 BUF     2020      -0.0673       0.242 
##  5 CAR     2020      -0.0792       0.149 
##  6 CHI     2020      -0.173       -0.0499
##  7 CIN     2020      -0.0837       0.0606
##  8 CLE     2020      -0.0268       0.106 
##  9 DAL     2020      -0.0619      -0.0650
## 10 DEN     2020      -0.103       -0.0828
## # ... with 22 more rows
colnames(defense)[1] <- "team"
defense
## # A tibble: 32 x 4
## # Groups:   team, season [32]
##    team  season def_rush_epa def_pass_epa
##    <chr>  <int>        <dbl>        <dbl>
##  1 ARI     2020      -0.0734       0.116 
##  2 ATL     2020      -0.169        0.237 
##  3 BAL     2020      -0.156        0.0341
##  4 BUF     2020       0.0395       0.156 
##  5 CAR     2020      -0.0446       0.182 
##  6 CHI     2020      -0.152        0.0346
##  7 CIN     2020      -0.0139       0.178 
##  8 CLE     2020      -0.0208       0.109 
##  9 DAL     2020       0.0499       0.143 
## 10 DEN     2020      -0.102        0.0590
## # ... with 22 more rows
#team colors & logos come with NFLFastr
head(teams_colors_logos)
## # A tibble: 6 x 10
##   team_abbr team_name team_id team_nick team_color team_color2 team_color3
##   <chr>     <chr>     <chr>   <chr>     <chr>      <chr>       <chr>      
## 1 ARI       Arizona ~ 3800    Cardinals #97233f    #000000     #ffb612    
## 2 ATL       Atlanta ~ 0200    Falcons   #a71930    #000000     #a5acaf    
## 3 BAL       Baltimor~ 0325    Ravens    #241773    #000000     #9e7c0c    
## 4 BUF       Buffalo ~ 0610    Bills     #00338d    #c60c30     #0c2e82    
## 5 CAR       Carolina~ 0750    Panthers  #0085ca    #000000     #bfc0bf    
## 6 CHI       Chicago ~ 0810    Bears     #0b162a    #c83803     #0b162a    
## # ... with 3 more variables: team_color4 <chr>, team_logo_wikipedia <chr>,
## #   team_logo_espn <chr>
#import schedules
schedule <- fast_scraper_schedules(2020, pp=FALSE)
schedule
## # A tibble: 256 x 17
##    game_id season game_type  week gameday weekday gametime away_team home_team
##    <chr>    <int> <chr>     <int> <chr>   <chr>   <chr>    <chr>     <chr>    
##  1 2020_0~   2020 REG           1 2020-0~ Thursd~ 20:20    HOU       KC       
##  2 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    SEA       ATL      
##  3 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    CLE       BAL      
##  4 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    NYJ       BUF      
##  5 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    LV        CAR      
##  6 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    CHI       DET      
##  7 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    IND       JAX      
##  8 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    GB        MIN      
##  9 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    MIA       NE       
## 10 2020_0~   2020 REG           1 2020-0~ Sunday  13:00    PHI       WAS      
## # ... with 246 more rows, and 8 more variables: away_score <int>,
## #   home_score <int>, home_result <int>, stadium <chr>, location <chr>,
## #   roof <chr>, surface <chr>, old_game_id <chr>
#use this code to change the week
schedulewk <- schedule %>%
  filter(week == 12) %>%
  select("game_id", "week", "weekday","gametime","away_team","home_team",
         "old_game_id") %>%
  filter(weekday == "Sunday")
schedulewk1 <- schedulewk 
#change home & away to Team and Opponent
colnames(schedulewk1)[5] <- "Team"
colnames(schedulewk1)[6] <- "Opponent"
#change away & home to Opponent and Team
schedulewk2 <- schedulewk
colnames(schedulewk2)[5] <- "Opponent"
colnames(schedulewk2)[6] <- "Team"
#reorder the columns in the away schedule to match the home schedule
schedulewk2 <- schedulewk2[, c(1,2,3,4,6,5,7)]
#bind the rows to get Team & Opponent layout
schedulewk3 <- schedulewk2 %>%
  bind_rows(schedulewk1)
#join team to offsense 
teamoffense <- schedulewk3 %>%
  left_join(offense, by = c("Team" = "team"))
#rename columns
colnames(teamoffense)[9] <- "Team_Rush_Epa"
colnames(teamoffense)[10] <- "Team_Pass_Epa"
#join opponent to defense
teamdefense <- teamoffense %>%
  left_join(defense, by = c("Opponent" = "team")) 
#rename columns
colnames(teamdefense)[12] <- "Opponent_Def_Rush_Epa"
colnames(teamdefense)[13] <- "Opponent_Def_Pass_Epa"
#remove the extra 2020 column 
teams <- teamdefense %>%
  select(-"season.y")
#need to import team logos/colors and join them to teamdefense df
logos <- teams_colors_logos
#join teams and logos - only need to join on team
teamslogo <- teams %>%
  left_join(logos, by = c("Team" = "team_abbr"))
#filter teams logo to get 1 or 4pm games
#teamslogo <- teamslogo %>%
 # filter(gametime >= '16:00')
#create text aesthetics for passing
bound.label <- 0.3
df.text <- data.frame(lab.text = c("Good Offense v Good Defense", "Smash Spot", "Bad Offense v Good Defense", "Bad Offense v Bad Defense"), 
                      x = c(bound.label, bound.label, -0.033*bound.label, -0.033*bound.label), 
                      y = c(-0.25*bound.label, bound.label, -0.25*bound.label, bound.label))
#epa passing vs def
teamslogo %>%
  ggplot(aes(x = Team_Pass_Epa, y = Opponent_Def_Pass_Epa)) +
  #add points for the QBs with the logos
  geom_image(aes(image = team_logo_espn), asp = 16 / 9) +
  #titles and caption
  labs(x = "Team EPA Passing",
       y = "Oppenent Def EPA Passing",
       title = "Passing",
       caption = "Data: @nflfastR") +
  #uses the black and white ggplot theme
  theme_bw() +
  geom_hline(aes(yintercept = 0.1), lty = 2, col = "red", alpha = 0.5) + 
  geom_vline(aes(xintercept = 0.1), lty = 2, col = "red", alpha = 0.5) +
  geom_text(data = df.text, aes(x, y, label = lab.text), colour = "blue") +
  #center title with hjust = 0.5
  theme(
    plot.title = element_text(size = 14, hjust = 0.5, face = "bold")
  ) 

#create text aesthetics for rushing
bound.labelr <- 0.05
df.textr <- data.frame(lab.text = c("Good Offense v Good Defense", "Smash Spot", "Bad Offense v Good Defense", "Bad Offense v Bad Defense"), 
                      x = c(bound.labelr, bound.labelr, -2.25*bound.labelr, -2.25*bound.labelr), 
                      y = c(-2.5*bound.labelr, 1.5*bound.labelr, -2.5*bound.labelr, 1.5*bound.labelr))
#epa rushing vs def
teamslogo %>%
  ggplot(aes(x = Team_Rush_Epa, y = Opponent_Def_Rush_Epa)) +
  #add points for the QBs with the logos
  geom_image(aes(image = team_logo_espn), asp = 16 / 9) +
  #titles and caption
  labs(x = "Team EPA Rushing",
       y = "Oppenent Def EPA Rushing",
       title = "Rushing",
       caption = "Data: @nflfastR") +
  #uses the black and white ggplot theme
  theme_bw() +
  geom_hline(aes(yintercept = -0.05), lty = 2, col = "red", alpha = 0.5) + 
  geom_vline(aes(xintercept = -0.05), lty = 2, col = "red", alpha = 0.5) +
  geom_text(data = df.textr, aes(x, y, label = lab.text), colour = "blue") +
  #center title with hjust = 0.5
  theme(
    plot.title = element_text(size = 14, hjust = 0.5, face = "bold") 
  )