#Load in some new libraries!
library(tidyverse) # Data Cleaning, manipulation, summarization, plotting
library(ggtext)
library(gt)
library(DT) # beautiful interactive tables
library(ggthemes) # custom pre-built themes
library(ggforce) # better annotations
library(ggridges) # many distributions at once
library(ggrepel) # better labels
library(ggbeeswarm) # beeswarm plots
library(extrafont) # for extra fonts
library(bbplot)
library(dplyr)
library(na.tools)
library(ggimage)
library(knitr)
library(kableExtra)
library(teamcolors)
library(concaveman)

Basic setup of the document can be attributed to the great work of Ben Baldwin and the nflscrapr Community :)

Import and Format the Data

# Importing the data from github and saving it as the object php
# The year can be changed by editing the CSV

pbp <- read_csv(url("https://github.com/ryurko/nflscrapR-data/raw/master/play_by_play_data/regular_season/reg_pbp_2019.csv"))
## Cleaning the data
#piping the pbp object to select posteam, defteam and sort by descending play type - second pip to call just the head of the data
pbp %>% select(posteam, defteam, desc, play_type) %>% head
# The desc column includes a bunch of data that should be removed
# Punts, Kickoffs, field goals and dead ball penalities will be removed as they don't impact EPA 
pbp %>%
  mutate(
    stick_throw = case_when(
      air_yards < ydstogo ~ "Short of Sticks",
      air_yards == ydstogo ~ "At Stick",
      air_yards > ydstogo ~ "Past Stick",
      TRUE ~ NA_character_
    )
  ) %>%
  select(air_yards, ydstogo, stick_throw) %>%
  filter(!is.na(air_yards))
#Filtering out punts, kickoffs, field goals, dead ball penalties
pbp_rp <- pbp %>% 
    filter(!is_na(epa), play_type=="no_play" | play_type=="pass" | play_type=="run")
# View the "no play" plays
pbp_rp %>% filter(play_type=="no_play") %>% select(desc, rush_attempt, pass_attempt)  %>% head
# Some of these plays could be attempted rush or pass plays that should count when computing EPA. 
# The data currently shows 0 for each attempt which is wrong - pass, create "pass", which searches the "desc" variable for plays with "pass", "sacked", or "scramble", along with a variable for rush and for a successful play

pbp_rp <- pbp_rp %>%
    mutate(
    pass = if_else(str_detect(desc, "(pass)|(sacked)|(scramble)"), 1, 0),
    rush = if_else(str_detect(desc, "(left end)|(left tackle)|(left guard)|(up the middle)|(right guard)|(right tackle)|(right end)") & pass == 0, 1, 0),
    success = ifelse(epa>0, 1 , 0)
) %>%
# filter to only pass or rush plays
  filter(pass == 1 | rush == 1) %>%
  mutate(
    passer_player_name = ifelse(play_type == "no_play" & pass == 1,
      str_extract(desc, "(?<=\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?(?=\\s((pass)|(sack)|(scramble)))"),
      passer_player_name
    ),
    receiver_player_name = ifelse(play_type == "no_play" & str_detect(desc, "pass"),
      str_extract(
        desc,
        "(?<=to\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?"
      ),
      receiver_player_name
    ),
    rusher_player_name = ifelse(play_type == "no_play" & rush == 1,
      str_extract(desc, "(?<=\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?(?=\\s((left end)|(left tackle)|(left guard)|      (up the middle)|(right guard)|(right tackle)|(right end)))"),
      rusher_player_name
    )
  ) %>%
  mutate(
    name = if_else(!is.na(passer_player_name), passer_player_name, rusher_player_name),
    rusher = rusher_player_name,
    receiver = receiver_player_name,
    play = 1
  )

#review the data to validate that the correct plays are pulling in
pbp_rp %>% filter(play_type=="no_play") %>% select(pass, rush, desc)  %>% head
# Creating a custom theme
theme_fivethirtyeight <- function(base_size = 12, base_family = "sans") {
  colors <- deframe(ggthemes::ggthemes_data[["fivethirtyeight"]])
  (theme_foundation(base_size = base_size, base_family = base_family)
  + theme(
      line = element_line(colour = "black"),
      rect = element_rect(
        fill = colors["Light Gray"],
        linetype = 0, colour = NA
      ),
      text = element_text(colour = colors["Dark Gray"]),
      axis.title = element_blank(),
      axis.text = element_text(),
      axis.ticks = element_blank(),
      axis.line = element_blank(),
      legend.background = element_rect(),
      legend.position = "bottom",
      legend.direction = "horizontal",
      legend.box = "vertical",
      panel.grid = element_line(colour = NULL),
      panel.grid.major =
        element_line(colour = colors["Medium Gray"]),
      panel.grid.minor = element_blank(),
      plot.title = element_text(hjust = 0, size = rel(1.5), face = "bold"),
      plot.margin = unit(c(1, 1, 1, 1), "lines"),
      strip.background = element_rect()
    ))
}
#Editing the custom theme
theme_538 <- function(base_size = 12, font = "TT Times New Roman") {

  # Text setting
  txt <- element_text(size = base_size + 2, colour = "black", face = "plain")
  bold_txt <- element_text(
    size = base_size + 2, colour = "black",
    family = "Montserrat", face = "bold"
  )
  large_txt <- element_text(size = base_size + 4, color = "black", face = "bold")


  theme_minimal(base_size = base_size, base_family = font) +
    theme(
      # Legend Settings
      legend.key = element_blank(),
      legend.background = element_blank(),
      legend.position = "bottom",
      legend.direction = "horizontal",
      legend.box = "vertical",

      # Backgrounds
      strip.background = element_blank(),
      strip.text = large_txt,
      plot.background = element_blank(),
      plot.margin = unit(c(1, 1, 1, 1), "lines"),

      # Axis & Titles
      text = txt,
      axis.text = txt,
      axis.ticks = element_blank(),
      axis.line = element_blank(),
      axis.title = bold_txt,
      plot.title = large_txt,

      # Panel
      panel.grid = element_line(colour = NULL),
      panel.grid.major = element_line(colour = "#D2D2D2"),
      panel.grid.minor = element_blank(),
      panel.background = element_blank(),
      panel.border = element_blank()
    )
}

EPA

Line Charts

# Prepare data
wr_duel <- pbp_rp %>%
  filter(receiver %in% c("M.Evans", "C.Godwin")) %>%
  group_by(game_date, receiver) %>%
  summarize(mean_epa = mean(epa, na.rm = TRUE))
# Clean it up a bit
wr_duel_plot <- ggplot(
  wr_duel,
  aes(x = game_date, y = mean_epa, color = receiver)
) +
  geom_line(size = 1) +
  theme_538() +
  geom_hline(yintercept = 0, size = 1, color = "black") +
  labs(
    x = "\nGame Date",
    y = "EPA (Average)",
    title = "Quick comparison of Godwin vs Evans across the 2019 season",
    caption = "Data: @nflscrapR"
  )
# Change the Colors
tb_colors <- teamcolors %>% 
  filter(name == "Tampa Bay Buccaneers") %>%
  select(name:secondary)

tb_primary <- pull(tb_colors, primary)
tb_secondary <- pull(tb_colors, secondary)
# Assign the Colors 
wr_duel_plot <- ggplot(wr_duel,
               aes(x = game_date, y = mean_epa, 
                   color = if_else(receiver == "M.Evans", tb_primary , tb_secondary))) +
  geom_line(size = 1) +
  theme_538() +
  geom_hline(yintercept = 0, size = 1, color = "black") +
  labs(x = "",
       y = "EPA (Average)",
       title = "Quick comparison of <span style='color:#d50a0a'>**Mike Evans**</span> vs <span style='color:#34302b'>**Chris Godwin**</span> across the 2019 season",
       caption = "Data: @nflscrapR") +
  scale_color_identity() +
  theme(plot.title = element_markdown())

wr_duel_plot

# Prepare data
cgod_do_it <- pbp_rp %>%
  filter(receiver == "C.Godwin") %>%
  arrange(desc(game_date)) %>%
  group_by(game_date) %>%
  summarize(
    total_yards = sum(yards_gained, na.rm = TRUE),
    total_airyards = sum(air_yards, na.rm = TRUE)
  ) %>%
  head(5) %>%
  mutate(
    game_num = row_number(),
    game_text = glue::glue("Game {game_num}")
  )

ggplot(cgod_do_it, aes(x = total_airyards, y = total_yards, color = game_num)) +
  # geom path follows the order of underlying data
  geom_path(size = 2) +
  geom_point(size = 5) +
  # creates a line for comparison
  geom_abline(intercept = 0, slope = 1, color = "grey", linetype = "dashed") +
  # adds labels to only game 1 and 5
  geom_text(
    data = filter(cgod_do_it, game_num %in% c(1, 5)),
    aes(label = game_text),
    hjust = 1, nudge_x = -5
  ) +
  # set scales for 0-axis
  scale_x_continuous(limits = c(0, 140)) +
  scale_y_continuous(limits = c(0, 140)) +
  # change color gradient to start at black and transition to yellow
  scale_color_gradient(low = tb_primary, high = tb_secondary) +
  theme_538() +
  labs(
    x = "\nTotal Air Yards",
    y = "Total Yards\n",
    title = "Chris Godwin Air Yards vs Total Yards",
    caption = "Data: @nflscrapR"
  ) +
  theme(legend.position = "none")

# Prepare data
mevans_do_it <- pbp_rp %>%
  filter(receiver == "M.Evans") %>%
  arrange(desc(game_date)) %>%
  group_by(game_date) %>%
  summarize(
    total_yards = sum(yards_gained, na.rm = TRUE),
    total_airyards = sum(air_yards, na.rm = TRUE)
  ) %>%
  head(5) %>%
  mutate(
    game_num = row_number(),
    game_text = glue::glue("Game {game_num}")
  )

ggplot(mevans_do_it, aes(x = total_airyards, y = total_yards, color = game_num)) +
  # geom path follows the order of underlying data
  geom_path(size = 2) +
  geom_point(size = 5) +
  # creates a line for comparison
  geom_abline(intercept = 0, slope = 1, color = "grey", linetype = "dashed") +
  # adds labels to only game 1 and 5
  geom_text(
    data = filter(mevans_do_it, game_num %in% c(1, 5)),
    aes(label = game_text),
    hjust = 1, nudge_x = -5
  ) +
  # set scales for 0-axis
  scale_x_continuous(limits = c(0, 140)) +
  scale_y_continuous(limits = c(0, 140)) +
  # change color gradient to start at black and transition to yellow
  scale_color_gradient(low = tb_primary, high = tb_secondary) +
  theme_538() +
  labs(
    x = "\nTotal Air Yards",
    y = "Total Yards\n",
    title = "Mike Evans Air Yards vs Total Yards",
    caption = "Data: @nflscrapR"
  ) +
  theme(legend.position = "none")

Team Level Stuff

#Plotting the data
ggplot(schotty, aes(x=reorder(posteam,-mean_pass), y=mean_pass)) +
        geom_text(aes(label=posteam))

ggsave('FILENAME.png', dpi=1000)

Dropback Success Rate vs EPA

#Take all plays w/pass=1, grouping by team, summarising number of dropbacks and calculating EPA/db and success rate (anytime epa>o)
chart_data <- pbp_rp %>%
    filter(pass==1) %>%
    group_by(posteam) %>%
    summarise(
    num_db = n(),
    epa_per_db = sum(epa) / num_db,
    success_rate = sum(epa > 0) / num_db
    )

#pulling in NFL logos Data - joining by teamcode/posteam
nfl_logos_df <- read_csv("https://raw.githubusercontent.com/statsbylopez/BlogPosts/master/nfl_teamlogos.csv")
chart <- chart_data %>% left_join(nfl_logos_df, by = c("posteam" = "team_code"))
#Home v Away EPA
team_epa <- pbp_rp %>%
  group_by(posteam) %>%
  summarise(
    total_home_epa = sum(total_home_epa),
    total_away_epa = sum(total_away_epa)
  )
epa_chart <- team_epa %>% left_join(nfl_logos_df, by = c("posteam" = "team_code"))
epa_chart %>%
ggplot(aes(x = total_home_epa, y = total_away_epa)) +
    geom_image(aes(image = url), size = 0.05) +
    labs(x = "Total Home EPA",
    y = "Total Away EPA",
    caption = "Data from nflscrapR",
    title = "Home v Away EPA by Team",
    subtitle = "2019") +
    theme_bw() +
    theme(axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    plot.title = element_text(size = 16),
    plot.subtitle = element_text(size = 14),
        plot.caption = element_text(size = 12))

ggsave('FILENAME.png', dpi=1000)
## Saving 7 x 5 in image
chart %>%
ggplot(aes(x = success_rate, y = epa_per_db)) +
    geom_image(aes(image = url), size = 0.05) +
    labs(x = "Success rate",
    y = "EPA per play",
    caption = "Data from nflscrapR",
    title = "Dropback success rate & EPA/play",
    subtitle = "2019") +
    theme_bw() +
    theme(axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    plot.title = element_text(size = 16),
    plot.subtitle = element_text(size = 14),
        plot.caption = element_text(size = 12))

ggsave('FILENAME.png', dpi=1000)

Improving Graphics with Team Data

#Bring in Team Colors
library(teamcolors)
filter(teamcolors, league == "nfl")
#Join teamcolors and Play BY Play datasets'
nfl_colors <- teamcolors %>%
  filter(league == "nfl") %>%
  mutate(
    team_abb = case_when(
      name == "Arizona Cardinals" ~ "ARI",
      name == "Atlanta Falcons" ~ "ATL",
      name == "Baltimore Ravens" ~ "BAL",
      name == "Buffalo Bills" ~ "BUF",
      name == "Carolina Panthers" ~ "CAR",
      name == "Chicago Bears" ~ "CHI",
      name == "Cincinnati Bengals" ~ "CIN",
      name == "Cleveland Browns" ~ "CLE",
      name == "Dallas Cowboys" ~ "DAL",
      name == "Denver Broncos" ~ "DEN",
      name == "Detroit Lions" ~ "DET",
      name == "Green Bay Packers" ~ "GB",
      name == "Houston Texans" ~ "HOU",
      name == "Indianapolis Colts" ~ "IND",
      name == "Jacksonville Jaguars" ~ "JAX",
      name == "Kansas City Chiefs" ~ "KC",
      name == "Los Angeles Rams" ~ "LA",
      name == "Los Angeles Chargers" ~ "LAC",
      name == "Miami Dolphins" ~ "MIA",
      name == "Minnesota Vikings" ~ "MIN",
      name == "New England Patriots" ~ "NE",
      name == "New Orleans Saints" ~ "NO",
      name == "New York Giants" ~ "NYG",
      name == "New York Jets" ~ "NYJ",
      name == "Oakland Raiders" ~ "OAK",
      name == "Philadelphia Eagles" ~ "PHI",
      name == "Pittsburgh Steelers" ~ "PIT",
      name == "Seattle Seahawks" ~ "SEA",
      name == "San Francisco 49ers" ~ "SF",
      name == "Tampa Bay Buccaneers" ~ "TB",
      name == "Tennessee Titans" ~ "TEN",
      name == "Washington Redskins" ~ "WAS",
      TRUE ~ NA_character_
    ),
    posteam = team_abb
  )


# left_join the data together
pbp_colors <- left_join(pbp, nfl_colors, by = c("posteam"))

pbp_colors %>%
  # Excludes non-plays, eg end of quarter
  filter(!is.na(posteam)) %>%
  select(posteam, team_abb, name, primary, secondary, logo) %>%
  # Distinct grabs only the distinct/unique cases of column
  distinct(posteam, .keep_all = TRUE)

Bar Charts

Who likes the committee approach when it comes to running backs? (*crickets)

Let’s creat a visualization of 2019 data for some of the Committee based teams

  • San Francisco 49ers
rb_quad <- pbp_rp %>%
  filter(
    posteam == "SF",
    receiver %in% c("M.Breida", "T.Coleman", "R.Mostert", "J.Wilson") |
      rusher %in% c("M.Breida", "T.Coleman", "R.Mostert", "J.Wilson"),
    play_type != "no_play"
  ) %>%
  mutate(
    # Assign a single player name for filtering regardless of play_type
    player = if_else(is.na(receiver), rusher, receiver),
    # Add nice labels to play_type
    play_type = factor(play_type, labels = c("Reception", "Rush"))
  ) %>%
  group_by(player, play_type) %>%
  summarize(
    n = n(),
    mean_yards = sum(yards_gained, na.rm = TRUE) / n,
    mean_success = sum(success, na.rm = TRUE) / n
  )

rb_quad_plot <- rb_quad %>%
  ggplot(aes(x = player, y = mean_yards)) +
  geom_col(aes(fill = play_type), position = "dodge")
#Breaking this out into Facets
rb_quad_plot <- rb_quad %>%
  ggplot(aes(x = player, y = mean_yards, fill = player, position = "dodge", group = play_type)) +
  geom_col() +
  facet_grid(~play_type)
# Change the Colors
sf_colors <- teamcolors %>% 
  filter(name == "San Francisco 49ers") %>%
  select(name:secondary)

sf_primary <- pull(sf_colors, primary)
sf_secondary <- pull(sf_colors, secondary)
rb_quad_plot  +
  geom_hline(yintercept = 0.03, color = "black", size = 2) +
  theme_538() +
  scale_fill_manual(values = c("grey", sf_primary, sf_secondary, "Black")) +
  labs(
    x = "",
    y = "Avg Yards per Play",
    title = "49ers Running Backs in 2019",
    subtitle = "McKinnon has not been active",
    caption = "Data: @nflscrapR"
  ) +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_line(color = "white", size = 1),
    panel.ontop = TRUE,
    legend.position = "none"
  )  +
  scale_y_continuous(
    breaks = seq(0, 15, 1)
  )

  • Washington Redskins
rb_trio <- pbp_rp %>%
  filter(
    posteam == "WAS",
    receiver %in% c("A.Peterson", "D.Guice", "C.Thompson") |
      rusher %in% c("A.Peterson", "D.Guice", "C.Thompson"),
    play_type != "no_play"
  ) %>%
  mutate(
    # Assign a single player name for filtering regardless of play_type
    player = if_else(is.na(receiver), rusher, receiver),
    # Add nice labels to play_type
    play_type = factor(play_type, labels = c("Reception", "Rush"))
  ) %>%
  group_by(player, play_type) %>%
  summarize(
    n = n(),
    mean_yards = sum(yards_gained, na.rm = TRUE) / n,
    mean_success = sum(success, na.rm = TRUE) / n
  )

rb_trio_plot <- rb_trio %>%
  ggplot(aes(x = player, y = mean_yards)) +
  geom_col(aes(fill = play_type), position = "dodge")
# Change the Colors
was_colors <- teamcolors %>% 
  filter(name == "Washington Redskins") %>%
  select(name:secondary)

was_primary <- pull(was_colors, primary)
was_secondary <- pull(was_colors, secondary)
#Breaking this out into Facets
rb_trio_plot <- rb_trio %>%
  ggplot(aes(x = player, y = mean_yards, fill = player, position = "dodge", group = play_type)) +
  geom_col() +
  facet_grid(~play_type)
rb_trio_plot +
  geom_hline(yintercept = 0.03, color = "black", size = 2) +
  theme_538() +
  scale_fill_manual(values = c(was_primary, was_secondary, "Black")) +
  labs(
    x = "",
    y = "Avg Yards per Play",
    title = "Redskins Running Backs in 2019",
    subtitle = "AP is a Generational Talent",
    caption = "Data: @nflscrapR"
  ) +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_line(color = "white", size = 1),
    panel.ontop = TRUE,
    legend.position = "none"
  )  +
  scale_y_continuous(
    breaks = seq(0, 15, 1)
  )

EPA per Dropback by Team

epa_play <- pbp_rp %>% 
  filter(pass == 1) %>% 
  group_by(posteam) %>% 
  summarize(
    n = n(),
    epa_per_db = sum(epa, na.rm = TRUE) / n,
    success_rate = sum(epa) / n
  )

epa_play %>%
  ggplot(aes(x = reorder(posteam, epa_per_db), y = epa_per_db)) +
  geom_point(aes(color = if_else(epa_per_db >= 0, "#2c7bb6", "#d7181c")),
    size = 3
  ) +
  geom_text(aes(
    label = posteam,
    color = if_else(epa_per_db >= 0, "#2c7bb6", "#d7181c"),
    hjust = if_else(epa_per_db > 0, -0.2, 1.2)
  )) +
  coord_flip() +
  scale_fill_identity(aesthetics = c("fill", "colour")) +
  theme_538() +
  theme(
    panel.grid.major.y = element_blank(),
    axis.text.y = element_blank()
  ) +
  geom_hline(yintercept = 0) +
  scale_y_continuous(breaks = seq(-0.5, 0.5, 0.1)) +
  labs(
    x = "",
    y = "2019 EPA per Dropback",
    title = "The majority of teams had positive EPA/dropback",
    subtitle = "But there are some clear outliers",
    caption = "Data: @nflscrapR"
  )

ScatterPlots

#Data Cleanup
pbp_players <- pbp_rp %>%
  mutate(
    passer_player_name = ifelse(play_type == "no_play" & pass == 1,
      str_extract(desc, "(?<=\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?(?=\\s((pass)|(sack)|(scramble)))"),
      passer_player_name
    ),
    receiver_player_name = ifelse(play_type == "no_play" & str_detect(desc, "pass"),
      str_extract(
        desc,
        "(?<=to\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?"
      ),
      receiver_player_name
    ),
    rusher_player_name = ifelse(play_type == "no_play" & rush == 1,
      str_extract(desc, "(?<=\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?(?=\\s((left end)|(left tackle)|(left guard)|      (up the middle)|(right guard)|(right tackle)|(right end)))"),
      rusher_player_name
    )
  )
#Generate a summarry dataframe
qbs <- pbp_players %>%
  mutate(
    name = ifelse(!is.na(passer_player_name), passer_player_name, rusher_player_name),
    rusher = rusher_player_name,
    receiver = receiver_player_name,
    play = 1
  ) %>%
  group_by(name, posteam) %>%
  summarize(
    n_dropbacks = sum(pass),
    n_rush = sum(rush),
    n_plays = sum(play),
    epa_per_play = sum(epa) / n_plays,
    success_per_play = sum(success) / n_plays
  ) %>%
  filter(n_dropbacks >= 20) %>% 
  ungroup() # always ungroup if you no longer need the grouping effect
qbs %>%
  ggplot(aes(x = success_per_play, y = epa_per_play)) +
  # Notice that color/size inside aes()
  geom_point(aes(color = if_else(posteam == "MIN", "#4f2683", "black"), size = n_plays / 60), alpha = 0.50) +
  # we need this to assign purple/black to the actual color
  scale_color_identity() +

  # add labels for all players
  geom_text_repel(aes(label = name, color = if_else(posteam == "MIN", "#4f2683", "black")),
    force = 1, point.padding = 0.1,
    segment.size = 0.2
  ) +
  labs(
    x = "Success rate",
    y = "EPA per play",
    caption = "Data from nflscrapR",
    title = "QB success rate and EPA/play",
    subtitle = "2019, min 20 pass attempts, includes all QB's rush and pass plays"
  ) +
  theme_bw() +
  theme(
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    plot.title = element_text(size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 14, hjust = 0.5),
    plot.caption = element_text(size = 12)
  ) +
  theme(legend.position = "none")

Let’s compare Kirk Cousins to Teddy Bridgewater and Case Keenum this year :(

qbs %>%
  ggplot(aes(x = success_per_play, y = epa_per_play)) +
  # Notice that color/size inside aes()
  geom_point(aes(
    color = if_else(posteam == "MIN", "#4f2683", "black"),
    size = n_plays / 60
  ),
  alpha = 0.50
  ) +
  # we need this to assign purple/black to the actual color
  scale_color_identity() +
  # add labels JUST for Bridgewater/Keenum/Cousins with ggforce
  geom_mark_hull(
    aes(
      filter = name %in% c("K.Cousins", "C.Keenum", "T.Bridgewater"),
      description = "This is the Bermuda Triangle of Mediocrity"
    ),
    color = "red", label.fontface = "bold", label.colour = "red", con.colour = "red"
  ) +
  labs(
    x = "Success rate",
    y = "EPA per play",
    caption = "Data from nflscrapR",
    title = "QB success rate and EPA/play",
    subtitle = "2019, min 20 pass attempts, includes all QB's rush and pass plays"
  ) +
  theme_bw() +
  theme(
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    plot.title = element_text(size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 14, hjust = 0.5),
    plot.caption = element_text(size = 12)
  ) +
  theme(legend.position = "none")

#get from tutorial
rosters <- read_csv("https://raw.githubusercontent.com/ryurko/nflscrapR-data/master/roster_data/regular_season/reg_roster_2019.csv") %>%
  filter(position %in% c("WR", "RB", "FB", "TE"), season == 2019) %>% 
  mutate(name = abbr_player_name, posteam = team) %>%
  select(season, name, posteam, position)
#Cleaning the data again
data_clean <- pbp_rp %>%
  filter(pass == 1 & sack == 0 & qb_scramble == 0) %>%
  select(
    name, pass, desc, posteam, epa, defteam, complete_pass, incomplete_pass,
    air_yards, receiver_player_name, down, success, complete_pass
  ) %>%
  left_join(rosters, by = c("receiver_player_name" = "name", "posteam")) %>%
  mutate(
    qb = ifelse(is.na(position), 0, 1), rec = receiver_player_name,
    drop = if_else(str_detect(desc, "(sacked)|(scramble)"), 1, 0)
  ) %>%
  filter(drop == 0)

problem_wrs <- c(
  "K.Benjamin", "A.Cooper", "G.Tate", "A.Robinson", "B.Marshall",
  "D.Hilliard", "D.Thompson", "De.Thomas", "E.St", "K.Benjamin", "K.Bibbs",
  "Ty.Williams", "W.Snead", "W.Snead IV", "T.Pryor", "E.St. Brown",
  "A.Robinson II", "J.Gordon", "D.Carter", "B.Ellington",
  "A.Holmes", "R.Matthews", "M.Valdes", "V.Bolden"
)

problem_rbs <- c(
  "A.Abdullah", "C.Hyde", "Dam.", "T.Montgomery", "A.Ekeler", "T.Yeldon",
  "Dam. Williams", "Dar.Williams", "R.Jones II", "C.Anderson"
)

# fix a bunch of problem players
pos <- data_clean %>%
  mutate(
    position = if_else(
      rec %in% problem_wrs, "WR", position
    ),
    position = if_else(
      rec %in% problem_rbs, "RB", position
    ),
    position = if_else(position == "FB", "RB", position)
  ) %>%
  filter(!is.na(position), down <= 2)

Air Yards Versus EPA

pos %>%
  ggplot(aes(x = air_yards, y = epa, color = if_else(complete_pass == 1, "blue", "red"))) +
  geom_point() +
  scale_color_identity()

Facet the data by Position (WR, TE, RB) and only show passes that travel between 1-25 yards * Look at the RB concentration at the 0-5 yard mark. + WR has a somewhat linear increase in EPA with an increase in Yards + RB/TE sem so decline dramatically once beyond the 0-10 yard range

pos %>%
  mutate(position = factor(position, levels = c("WR", "TE", "RB"))) %>%
  filter(between(air_yards, 1, 25)) %>%
  group_by(position, air_yards) %>% 
  mutate(n = n(),
         mean = mean(epa)) %>% 
  ungroup() %>% 
  ggplot(aes(x = air_yards, y = epa, fill = position)) +
  geom_point(aes(group = air_yards), shape = 21, alpha = 0.2, fill = "black") +
  geom_point(aes(size = n, x = air_yards, y = mean), shape = 21, stroke = 0.5, color = "white", alpha = 0.8) +
  geom_hline(yintercept = 0, size = 1, color = "black") +
  stat_smooth(color = "white", method = "loess", alpha = 0.5) +
  facet_grid(~position) +
  coord_cartesian(ylim = c(-1.5, 5)) +
  scale_y_continuous(breaks = seq(-1.5, 4.5, by = 0.5)) +
  ggthemes::theme_fivethirtyeight() +
  theme(
    legend.position = "none",
    strip.text = element_text(face = "bold")
  ) +
  scale_fill_manual(
    values = c("#00b159", "#003399", "#ff2b4f"),
    aesthetics = c("color", "fill")
  ) +
  labs(
    x = "Air Yards (Depth of Target)",
    y = "EPA\n",
    title = "WR and TE EPA generally increases by depth of target",
    subtitle = "However, RBs generally don't get targeted at these distances!\n\nPasses = 1st/2nd, Air Yards between 1 and 25",
    caption = "Data: @nflscrapR"
  )

pass_comp <- pos %>%
  mutate(position = factor(position, levels = c("WR", "TE", "RB"))) %>%
  filter(between(air_yards, 1, 25)) %>%
  group_by(position, air_yards) %>%
  summarize(
    n = n(),
    comp_rate = sum(complete_pass, na.rm = TRUE) / n,
    epa = mean(epa, na.rm = TRUE)
  )
pass_comp_plot <- pass_comp %>%
  ggplot(aes(x = air_yards, y = comp_rate, fill = position)) +
  geom_point(aes(size = n), shape = 21, stroke = 0.5) +
  geom_smooth(color = "white", method = "loess") +
  geom_hline(yintercept = 0, size = 1, color = "black") +
  geom_vline(xintercept = 20, size = 1, color = "black", linetype = "dashed", alpha = 0.5) +
  geom_hline(yintercept = 0.5, size = 1, color = "black", linetype = "dashed", alpha = 0.5) +
  facet_grid(~position) +
  ggthemes::theme_fivethirtyeight() +
  scale_fill_manual(
    values = c("#00b159", "#003399", "#ff2b4f"),
    aesthetics = c("color", "fill")
  ) +
  scale_y_continuous(labels = scales::percent) +
  labs(
    x = "Air Yards (Depth of Target)",
    y = "EPA\n",
    title = "Completion rate by Depth of Target on 1st/2nd Down",
    subtitle = "Completion rate generally drops below 50% for passes > 20 air yards",
    caption = "Graph: @thomas_mock | Data: @nflscrapR",
    size = "N of Passes"
  ) +
  theme(strip.text = element_text(face = "bold")) +
  guides(color = FALSE, fill = FALSE) +
  theme(
    legend.direction = "vertical",
    legend.position = c(0.1, 0.1),
    legend.background = element_blank(),
    legend.title = element_text(face = "bold")
  )

pass_comp_plot

Distributions

So far in 2019 it appears that Dallas and KC are best in the league in QB efficiency. Lets compare them

dal_color <- teamcolors %>%
  filter(name == "Dallas Cowboys") %>%
  pull(primary)

kc_color <- teamcolors %>%
  filter(name == "Kansas City Chiefs") %>%
  pull(primary)

pbp_rp %>%
  filter(play_type != "no_play", posteam %in% c("DAL", "KC")) %>%
  group_by(posteam, play_type) %>%
  summarize(n = n()) %>%
  mutate(freq = n / sum(n))
  • Histogram
loadfonts(device = "win")
pbp_rp %>%
  filter(play_type == "pass") %>%
  filter(posteam %in% c("DAL", "KC")) %>%
  ggplot(aes(x = air_yards, fill = posteam)) +
  geom_histogram(binwidth = 2, alpha = 0.9) +
  scale_fill_manual(values = c(dal_color, kc_color)) +
  geom_hline(yintercept = 0, size = 1) +
  theme_538() +
  theme(
    legend.title = element_blank(),
    legend.position = c(0.6, 0.9)
  ) +
  scale_x_continuous(breaks = seq(-10, 60, 10)) +
  labs(
    x = "\nAir Yards",
    y = "Count",
    title = "Dallas vs Kansas City",
    caption = "Data: @nflscrapR"
  )

  • Density Plot Scale out the y=axis as to show overlap. The key here is distribution and it is evident that KC has more just about everywhere.
pbp_rp %>%
  filter(play_type == "pass") %>%
  filter(posteam %in% c("DAL", "KC")) %>%
  ggplot(aes(x = air_yards, y = ..scaled.., fill = posteam)) +
  geom_density(alpha = 0.8) +
  scale_fill_manual(values = c(dal_color, kc_color)) +
  theme_538() +
  theme(
    legend.title = element_blank(),
    legend.position = c(0.6, 0.9)
  ) +
  scale_x_continuous(breaks = seq(-10, 60, 10))

  • Ridge Plot This is like a stacked Density and Histogram Plot
pbp_rp %>%
  filter(play_type == "pass") %>%
  filter(posteam %in% c("DAL", "KC")) %>%
  ggplot(aes(x = air_yards, y = posteam, fill = posteam)) +
  geom_density_ridges() +
  scale_fill_manual(values = c(dal_color, kc_color)) +
  theme_538() +
  theme(
    panel.grid.major.y = element_blank(),
    legend.position = "none"
  ) +
  scale_x_continuous(breaks = seq(-10, 60, 10)) +
  labs(
    x = "Air Yards",
    y = "",
    title = "DAL and KC pass to similar depths of the field",
    caption = "Data: @nflscrapR"
  )

  • Box Plots
  • Boxplots show central tendency & range of distribution
  • Adding geom_jitter() can help with showing the distribution here
pbp_rp %>%
  filter(play_type == "pass") %>%
  filter(posteam %in% c("DAL", "KC")) %>%
  ggplot(aes(x = posteam, y = air_yards, fill = posteam)) +
  geom_boxplot() +
  geom_jitter(width = 0.2, alpha = 0.2) +
  scale_fill_manual(values = c(dal_color, kc_color)) +
  theme_538() +
  theme(legend.position = "none")

pbp_rp %>%
  filter(play_type != "no_play", posteam %in% c("DAL", "KC")) %>% 
  ggplot(aes(x = play_type, y = epa , fill = posteam)) +
  geom_boxplot() +
  geom_jitter(width = 0.3, alpha = 0.1) +
  scale_fill_manual(values = c(dal_color, kc_color), aesthetics = c("fill", "color")) +
  theme_538() +
  theme(legend.position = "none") +
  facet_grid(~posteam)

  • Sina Plot
pbp_rp %>%
  filter(play_type != "no_play", posteam %in% c("DAL", "KC")) %>%
  ggplot(aes(x = play_type, y = epa, color = posteam)) +
  geom_sina(alpha = 0.5) +
  scale_fill_manual(values = c(dal_color, kc_color), aesthetics = c("fill", "color")) +
  theme_538() +
  theme(legend.position = "none") +
  facet_grid(~posteam)

  • Beeswarm
pbp_rp %>%
  filter(play_type != "no_play", posteam %in% c("DAL", "KC")) %>%
  ggplot(
    aes(x = play_type, y = epa, color = posteam)
  ) +
  geom_beeswarm(priority = "random", alpha = 0.5, size = 0.5) +
  scale_fill_manual(values = c(dal_color, kc_color), aesthetics = c("fill", "color")) +
  theme_538() +
  theme(legend.position = "none") +
  facet_grid(~posteam)

rush_v_pass <- pbp_rp %>% 
  filter(play_type != "no_play", penalty == 0) %>% 
  group_by(play_type, posteam) %>% 
  summarize(avg_yds = mean(yards_gained, na.rm = TRUE)) %>% 
  ungroup()

nfl_rvp <- pbp_rp %>% 
  filter(play_type != "no_play") %>% 
  group_by(play_type) %>% 
  summarize(avg_yds = mean(yards_gained, na.rm = TRUE)) %>% 
  ungroup() %>% 
  mutate(posteam = "NFL")

rush_v_pass <- bind_rows(rush_v_pass, nfl_rvp) %>% 
  mutate(play_type = factor(play_type,
                            levels = c("pass", "run"),
                            labels = c("Pass", "Rush")))

rush_v_pass %>% 
  ggplot(aes(x = fct_rev(fct_reorder2(posteam, desc(play_type), avg_yds)), y = avg_yds, color = play_type)) +
  geom_line(aes(group = posteam), color = "grey", size = 3) +
  geom_point(size = 5) +
  coord_flip()

rush_v_pass %>%
  ggplot(aes(x = fct_rev(fct_reorder2(posteam, desc(play_type), avg_yds)), y = avg_yds, color = play_type)) +
  geom_line(aes(group = posteam), color = "grey", size = 3) +
  geom_point(size = 5) +
  geom_text(
    data = filter(rush_v_pass, posteam == "KC" & play_type == "Pass"),
    aes(label = play_type),
    hjust = 0, nudge_y = 0.2, fontface = "bold", size = 6
  ) +
  geom_text(
    data = filter(rush_v_pass, posteam == "KC" & play_type == "Rush"),
    aes(label = play_type),
    hjust = 1, nudge_y = -0.2, fontface = "bold", size = 6
  ) +
  coord_flip() +
  scale_color_manual(values = c("#003399", "#ff2b4f")) +
  theme_538() +
  theme(
    panel.grid.major.y = element_blank(),
    legend.position = "none",
    axis.text.y = element_text(color = if_else(rush_v_pass$posteam == "NFL", "red", "black"))
  ) +
  labs(
    x = "",
    y = "\n Average Yards Gained",
    title = "Through Week 7",
    caption = "Data: @nflscrapR"
  ) +
  scale_y_continuous(
    limits = c(0, 15),
    breaks = seq(3, 8, 1)
  )

Slope Charts

game_num <- pbp_rp %>% 
  mutate(game_week = case_when(
    between(game_date, as.Date("2019-09-05"), as.Date("2019-09-11")) ~ 1,
    between(game_date, as.Date("2019-09-12"), as.Date("2019-09-18")) ~ 2,
    between(game_date, as.Date("2019-09-19"), as.Date("2019-09-25")) ~ 3,
    between(game_date, as.Date("2019-09-26"), as.Date("2019-10-02")) ~ 4,
    between(game_date, as.Date("2019-10-03"), as.Date("2019-10-09")) ~ 5,
    between(game_date, as.Date("2019-10-10"), as.Date("2019-10-16")) ~ 6,
    between(game_date, as.Date("2019-10-17"), as.Date("2019-10-23")) ~ 7,
    between(game_date, as.Date("2019-10-24"), as.Date("2019-10-30")) ~ 8,
    between(game_date, as.Date("2019-10-31"), as.Date("2019-11-06")) ~ 9,
    between(game_date, as.Date("2019-11-07"), as.Date("2019-11-13")) ~ 10,
    between(game_date, as.Date("2019-11-14"), as.Date("2019-11-20")) ~ 11,
    between(game_date, as.Date("2019-11-21"), as.Date("2019-11-27")) ~ 12,
    between(game_date, as.Date("2019-11-28"), as.Date("2019-12-04")) ~ 13,
    between(game_date, as.Date("2019-12-05"), as.Date("2019-12-11")) ~ 14,
    between(game_date, as.Date("2019-12-12"), as.Date("2019-12-18")) ~ 15,
    between(game_date, as.Date("2019-12-19"), as.Date("2019-12-25")) ~ 16,
    between(game_date, as.Date("2019-12-30"), as.Date("2020-01-01")) ~ 17,
    TRUE ~ 99
    )
  ) %>% 
  filter(game_week != 99)
wk_rvp <- game_num %>%
  filter(play_type != "no_play", game_half %in% c("Half1", "Half2")) %>%
  mutate(game_half = if_else(game_half == "Half1", "1st Half", "2nd Half")) %>%
  group_by(posteam, game_half, game_week, play_type) %>%
  count() %>% 
  ungroup()

bal_rvp <- wk_rvp %>% 
  filter(posteam == "BAL") %>% 
  mutate(game_num = if_else(game_week <=11, game_week, game_week - 1),
         play_type = if_else(play_type == "run", "Rush", "Pass"),
         game_text = glue::glue("Game {game_num}")
         )

bal_rvp
bal_rvp %>% 
  ggplot(aes(x = game_half, y = n, group = game_num)) +
  geom_point() +
  geom_line() +
  facet_grid(~play_type)

bal_runs <- bal_rvp %>% 
  filter(play_type == "Rush") %>% 
  spread(game_half, n) %>%
  mutate(balance = if_else(`1st Half` >= `2nd Half`, "Ran More in 1st", "Ran More in 2nd")) %>% 
  gather(key = "game_half", value = "n", `1st Half`:`2nd Half`) %>% 
  select(posteam, game_num,game_half, balance)

bal_runs 

#THIS NEEDS MORE WORK

bal_rvp %>%
  ggplot(
    aes(
      x = game_half, y = n, group = game_week, 
      color = if_else(game_num %in% c(1, 2, 3, 4), "#241773", "#003399")
    )
  ) +
  geom_point() +
  geom_vline(xintercept = c(1, 2), size = 2, color = "black", alpha = 0.5) +
  geom_line(size = 2) +
  geom_point(size = 5) +
  geom_text_repel(
    data = filter(
      bal_rvp, game_num %in% c(1, 2, 3),
      game_half == "2nd Half"
    ),
    aes(label = game_text),
    direction = "y", nudge_x = 0.1, segment.size = 0.1, hjust = 0,
    size = 5, fontface = "bold"
  ) +
  facet_grid(~play_type) +
  scale_color_identity() +
  theme_538() +
  theme(panel.grid.major.x = element_blank()) +
  labs(x = "", y = "N of Plays\n",
       title = "The Ravens throw more than you think! ",
       caption = "Data: @nflscrapR")

schotty %>%
  slice(1:5, 28:32) %>%
  gt()
posteam mean_pass plays
WAS 0.4444444 135
DEN 0.4594595 148
PHI 0.4765625 128
IND 0.4806202 129
SEA 0.4842767 159
CHI 0.6030534 131
LA 0.6038961 154
GB 0.6093750 128
BUF 0.6527778 144
KC 0.7400000 150
schotty_gt <- schotty %>%
  slice(1:5, 28:32) %>%
  arrange(desc(mean_pass)) %>%
  mutate(play_focus = if_else(mean_pass >= .50, "Pass Heavy", "Run Heavy")) %>%
  group_by(play_focus) %>%
  gt()

schotty_gt
posteam mean_pass plays
Pass Heavy
KC 0.7400000 150
BUF 0.6527778 144
GB 0.6093750 128
LA 0.6038961 154
CHI 0.6030534 131
Run Heavy
SEA 0.4842767 159
IND 0.4806202 129
PHI 0.4765625 128
DEN 0.4594595 148
WAS 0.4444444 135
schotty_gt %>%
  fmt_percent(columns = vars(mean_pass), decimals = 1) %>%
  tab_header(
    title = "Percentage of Passes by teams on 1st/2nd Down in 1st Half",
    subtitle = "Win Prob between 20 & 80, excludes final 2 minutes of the half"
  ) %>%
  cols_label(
    posteam = "Player",
    mean_pass = "Pass %",
    plays = "Plays"
  ) %>%
  cols_align(
    align = "center"
  )
Percentage of Passes by teams on 1st/2nd Down in 1st Half
Win Prob between 20 & 80, excludes final 2 minutes of the half
Player Pass % Plays
Pass Heavy
KC 74.0% 150
BUF 65.3% 144
GB 60.9% 128
LA 60.4% 154
CHI 60.3% 131
Run Heavy
SEA 48.4% 159
IND 48.1% 129
PHI 47.7% 128
DEN 45.9% 148
WAS 44.4% 135
# 2019 and pass plays
pass_2019 <- pbp_rp %>%
  filter(play_type == "pass", penalty == 0, sack == 0, qb_scramble == 0)

third_down_passes <- pass_2019 %>%
  filter(down == 3, ydstogo <= 10) %>%
  group_by(receiver_player_name) %>%
  mutate(converted = if_else(yards_gained > ydstogo, 1, 0)) %>%
  select(receiver_player_name, yards_gained, ydstogo, epa, converted) %>%
  summarise(
    mean_epa = mean(epa, na.rm = TRUE),
    mean_yardage = mean(yards_gained, na.rm = TRUE),
    mean_ydstogo = mean(ydstogo, na.rm = TRUE),
    n = n(),
    conv_rate = sum(converted) / n
  ) %>%
  ungroup() %>%
  arrange(desc(conv_rate))

rbs <- c(
  "A.Kamara", "J.White", "J.Conner", "C.McCaffrey", "S.Barkley", "E.Elliott",
  "J.Mixon", "T.Gurley", "D.Johnson", "D.Cook", "N.Chubb", "J.Jacobs", "M.Ingram",
  "C.Hyde", "C.Thompson", "L.Fournette", "M.Breida", "F.Gore", "K.Johnson", "C.Carson",
  "A.Ekeler", "D.Henry"
)

wrs <- c(
  "K.Allen", "L.Fitzgerald", "C.Kupp", "M.Sanu", "Z.Jones", "J.Edelman",
  "J.Wright", "W.Snead IV", "A.Miller", "T.Lockett", "T.Gabriel", "S.Shepard", "C.Beasley",
  "T.Boyd", "J.Landry", "E.Sanders", "R.Cobb", "K.Coutee", "C.Rodgers", "D.Westbrook",
  "D.Amendola", "A.Thielen", "M.Thomas", "N.Agholor", "J.Crowder"
)

tes <- c("T.Kelce", "Z.Ertz", "G. Kittle", "E.Engram", "J.Cook", "E.Ebron",
         "G.Olsen", "D.Waller", "M.Andrews")

top_players <- c(rbs, wrs, tes)
third_conv_table <- third_down_passes %>% 
  filter(n >= 4) %>% 
  mutate(position = case_when(
    receiver_player_name %in% rbs ~ "RB",
    receiver_player_name %in% wrs ~ "WR",
    receiver_player_name %in% tes ~ "TE",
    TRUE ~ NA_character_
  ),
  position = factor(position, levels = c("RB", "WR", "TE"))
  ) %>%
  filter(receiver_player_name %in% top_players) %>% 
  select(receiver_player_name, conv_rate, n,  everything(), -mean_epa) %>% 
  group_by(position) %>% 
  arrange(desc(conv_rate)) %>% 
  ungroup() %>% 
  gt::gt(groupname_col = "position") 

third_conv_table
receiver_player_name conv_rate n mean_yardage mean_ydstogo
RB
C.McCaffrey 0.7500000 8 10.500000 5.125000
J.Conner 0.7142857 7 10.285714 4.857143
A.Kamara 0.6000000 5 6.000000 4.600000
A.Ekeler 0.5000000 6 4.000000 4.666667
D.Cook 0.5000000 4 7.250000 5.750000
E.Elliott 0.5000000 4 7.000000 5.250000
J.White 0.5000000 24 8.583333 6.583333
D.Johnson 0.4090909 22 7.000000 6.318182
S.Barkley 0.4000000 5 6.400000 5.200000
K.Johnson 0.3333333 12 5.333333 6.333333
L.Fournette 0.2222222 9 4.222222 7.111111
C.Thompson 0.1666667 6 2.666667 6.166667
WR
T.Gabriel 0.7500000 4 9.250000 6.750000
M.Sanu 0.7142857 7 11.857143 8.142857
S.Shepard 0.6666667 9 10.000000 6.777778
K.Allen 0.5882353 17 7.647059 5.529412
M.Thomas 0.5789474 19 9.315789 5.789474
D.Amendola 0.5555556 9 8.111111 5.777778
R.Cobb 0.5555556 9 5.888889 5.444444
A.Thielen 0.5384615 13 7.846154 6.615385
C.Kupp 0.5384615 26 12.423077 5.961538
A.Miller 0.5000000 6 8.833333 3.666667
N.Agholor 0.5000000 14 6.285714 4.500000
T.Lockett 0.5000000 10 11.600000 6.300000
J.Crowder 0.4545455 11 7.636364 4.909091
C.Beasley 0.4285714 14 4.928571 6.500000
T.Boyd 0.4117647 17 5.941176 5.470588
J.Edelman 0.4000000 10 4.900000 5.600000
J.Wright 0.3750000 8 8.125000 6.000000
L.Fitzgerald 0.3750000 16 4.062500 5.437500
K.Coutee 0.2857143 7 8.142857 6.142857
D.Westbrook 0.2222222 9 4.333333 4.222222
J.Landry 0.2000000 10 5.000000 4.900000
E.Sanders 0.1666667 6 1.666667 5.166667
TE
Z.Ertz 0.5833333 12 7.833333 5.416667
T.Kelce 0.5625000 16 7.562500 5.562500
E.Ebron 0.5000000 10 6.700000 5.800000
J.Cook 0.4545455 11 8.636364 4.727273
G.Olsen 0.3750000 8 4.000000 5.500000
D.Waller 0.3333333 9 3.333333 5.666667
M.Andrews 0.3333333 15 6.133333 6.733333
E.Engram 0.2500000 12 3.250000 3.416667
third_conv_table %>% 
  tab_header(
    title = "3rd Down Conversion Rates (Slot WR vs RB vs TE)",
    subtitle = "Yds to go <= 10, N of Plays >= 4"
  ) %>% 
  fmt_percent(.,
              columns = vars(conv_rate),
              decimals = 1
  ) %>% 
  fmt_number(
    columns = vars(mean_yardage, mean_ydstogo),
    decimals = 1
  ) %>% 
  cols_label(
    receiver_player_name = "Player",
    mean_yardage = "Yds Gained",
    mean_ydstogo = "Yds to Go",
    n = "Plays",
    conv_rate = "Conversion Rate"
  ) %>% 
  cols_align(
    align = "center"
  ) %>% 
  tab_source_note(
    source_note = "Table: @thomas_mock | Data: @nflscrapR"
  ) %>% 
  tab_footnote(
    footnote = "Average Yards",
    locations = cells_column_labels(
      columns = vars(mean_yardage, mean_ydstogo)
    )
  )
3rd Down Conversion Rates (Slot WR vs RB vs TE)
Yds to go <= 10, N of Plays >= 4
Player Conversion Rate Plays Yds Gained1 Yds to Go1
RB
C.McCaffrey 75.0% 8 10.5 5.1
J.Conner 71.4% 7 10.3 4.9
A.Kamara 60.0% 5 6.0 4.6
A.Ekeler 50.0% 6 4.0 4.7
D.Cook 50.0% 4 7.2 5.8
E.Elliott 50.0% 4 7.0 5.2
J.White 50.0% 24 8.6 6.6
D.Johnson 40.9% 22 7.0 6.3
S.Barkley 40.0% 5 6.4 5.2
K.Johnson 33.3% 12 5.3 6.3
L.Fournette 22.2% 9 4.2 7.1
C.Thompson 16.7% 6 2.7 6.2
WR
T.Gabriel 75.0% 4 9.2 6.8
M.Sanu 71.4% 7 11.9 8.1
S.Shepard 66.7% 9 10.0 6.8
K.Allen 58.8% 17 7.6 5.5
M.Thomas 57.9% 19 9.3 5.8
D.Amendola 55.6% 9 8.1 5.8
R.Cobb 55.6% 9 5.9 5.4
A.Thielen 53.8% 13 7.8 6.6
C.Kupp 53.8% 26 12.4 6.0
A.Miller 50.0% 6 8.8 3.7
N.Agholor 50.0% 14 6.3 4.5
T.Lockett 50.0% 10 11.6 6.3
J.Crowder 45.5% 11 7.6 4.9
C.Beasley 42.9% 14 4.9 6.5
T.Boyd 41.2% 17 5.9 5.5
J.Edelman 40.0% 10 4.9 5.6
J.Wright 37.5% 8 8.1 6.0
L.Fitzgerald 37.5% 16 4.1 5.4
K.Coutee 28.6% 7 8.1 6.1
D.Westbrook 22.2% 9 4.3 4.2
J.Landry 20.0% 10 5.0 4.9
E.Sanders 16.7% 6 1.7 5.2
TE
Z.Ertz 58.3% 12 7.8 5.4
T.Kelce 56.2% 16 7.6 5.6
E.Ebron 50.0% 10 6.7 5.8
J.Cook 45.5% 11 8.6 4.7
G.Olsen 37.5% 8 4.0 5.5
D.Waller 33.3% 9 3.3 5.7
M.Andrews 33.3% 15 6.1 6.7
E.Engram 25.0% 12 3.2 3.4
Table: @thomas_mock | Data: @nflscrapR

1 Average Yards

2018 Fun stuff

url <- "https://fivethirtyeight.com/features/sorry-running-backs-even-your-receiving-value-can-be-easily-replaced/"

rb_receiving <- url %>% 
  xml2::read_html() %>% 
  rvest::html_table() %>% 
  purrr::chuck(1) %>% 
  purrr::set_names(nm = c("team", "attempts", "successful", "success_rate")) %>% 
  dplyr::as_tibble() %>% 
  filter(team != "team") %>% 
  mutate(success_rate = stringr::str_remove(success_rate, "%")) %>% 
  mutate_at(.vars = vars(attempts:success_rate), as.double)

rb_receiving  
rb_receiving %>%
  mutate(
    success_rate = if_else(team == "Kansas City",
      success_rate / 100,
      success_rate
    )
  ) %>%
  gt() %>%
  tab_spanner(
    label = "PASSES TO RBS",
    columns = vars(attempts, successful)
  ) %>%
  tab_options(
    table.border.top.color = "white",
    row.striping.include_table_body = FALSE,
    row.padding = px(4)
  ) %>%
  tab_source_note(
    source_note = "SOURCE: ESPN STATS & INFORMATION GROUP"
  ) %>%
  tab_style(
    style = list(
      cell_fill(color = "lightblue")
    ),
    locations = cells_data(
      columns = vars(success_rate)
    )
  ) %>%
  fmt_percent(
    columns = vars(success_rate),
    rows = 1,
    decimals = 1
  ) %>%
  cols_label(
    team = "TEAM",
    attempts = "ATTEMPTS",
    successful = "SUCCESSFUL",
    success_rate = "SUCCESS RATE"
  )
TEAM PASSES TO RBS SUCCESS RATE
ATTEMPTS SUCCESSFUL
Kansas City 99 62 62.6%
Carolina 134 73 54.5
San Francisco 107 58 54.2
L.A. Chargers 138 73 52.9
L.A. Rams 93 48 51.6
Pittsburgh 110 56 50.9
Oakland 132 67 50.8
New England 170 86 50.6
Chicago 131 66 50.4
New Orleans 142 71 50.0
Seattle 84 42 50.0
Cleveland 109 54 49.5
Miami 101 50 49.5
Jacksonville 133 62 46.6
Green Bay 97 45 46.4
Denver 128 58 45.3
Baltimore 91 41 45.1
Minnesota 98 44 44.9
Atlanta 87 39 44.8
N.Y. Jets 103 46 44.7
Philadelphia 101 45 44.6
Indianapolis 126 55 43.7
Cincinnati 108 47 43.5
Detroit 143 62 43.4
Tennessee 86 36 41.9
N.Y. Giants 149 60 40.3
Washington 108 43 39.8
Tampa Bay 88 35 39.8
Buffalo 93 35 37.6
Houston 67 25 37.3
Dallas 111 41 36.9
Arizona 109 38 34.9
SOURCE: ESPN STATS & INFORMATION GROUP
url <- "https://fivethirtyeight.com/features/are-we-sure-aaron-rodgers-is-still-an-elite-quarterback/"

rodgers <- url %>% 
  xml2::read_html() %>% 
  rvest::html_table() %>% 
  purrr::chuck(1) %>% 
  janitor::clean_names() %>% 
  dplyr::rename("rank" = x) %>% 
  dplyr::as_tibble() %>% 
  mutate(yards = stringr::str_remove(yards, ",")) %>% 
  mutate_at(.vars = vars(g:qbr), as.double)

rodgers 
rodgers_table <- rodgers %>%
  gt() %>%
  tab_options(
    table.border.top.color = "white",
    row.striping.include_table_body = FALSE,
    row.padding = px(4)
  ) %>%
  tab_source_note(
    source_note = "SOURCE: ESPN STATS & INFORMATION GROUP"
  ) %>%
  fmt_number(
    columns = vars(yards),
    decimals = 0
  ) %>%
  cols_label(
    rank = "",
    player = "PLAYER",
    g = "G",
    dropbacks = "DROPBACKS",
    yards = "YARDS",
    yards_per_dropback = "YARDS PER DROPBACK",
    qbr = "QBR"
  ) 

rodgers_table
PLAYER G DROPBACKS YARDS YARDS PER DROPBACK QBR
1 Deshaun Watson 22 211 1,851 8.8 86.2
2 Tom Brady 60 450 4,364 9.7 82.9
3 Drew Brees 62 392 3,137 8.0 82.1
4 Carson Palmer 38 223 2,214 9.9 82.1
5 Russell Wilson 64 531 4,186 7.9 80.8
6 Philip Rivers 64 342 3,254 9.5 80.7
7 Andrew Luck 38 286 2,559 8.9 80.6
8 Matt Ryan 64 562 5,285 9.4 80.1
9 Case Keenum 44 351 2,999 8.5 79.4
10 Kirk Cousins 64 431 3,892 9.0 78.9
11 Jameis Winston 54 377 2,998 8.0 77.6
12 Dak Prescott 48 323 2,350 7.3 77.1
13 Matthew Stafford 64 351 3,092 8.8 77.0
14 Blaine Gabbert 21 145 1,039 7.2 76.8
15 Carson Wentz 40 339 2,668 7.9 75.8
16 Nick Foles 20 153 1,243 8.1 75.5
17 Patrick Mahomes 17 153 1,401 9.2 74.8
18 Tyrod Taylor 45 297 2,301 7.7 74.6
19 Ryan Fitzpatrick 37 223 1,856 8.3 74.3
20 Andy Dalton 56 401 3,236 8.1 73.1
21 Sam Bradford 34 229 2,219 9.7 71.7
22 Brian Hoyer 20 173 1,395 8.1 69.6
23 Jay Cutler 34 200 1,516 7.6 69.4
24 Jared Goff 38 336 2,979 8.9 68.8
25 Blake Bortles 60 371 2,752 7.4 68.1
26 Ben Roethlisberger 56 225 1,898 8.4 65.2
27 Marcus Mariota 55 376 3,170 8.4 62.5
28 Derek Carr 62 329 2,224 6.8 61.3
29 Ryan Tannehill 40 263 2,053 7.8 60.0
30 Alex Smith 56 301 2,275 7.6 60.0
31 Eli Manning 63 433 3,261 7.5 59.9
32 Aaron Rodgers 55 330 2,000 6.1 59.8
33 Teddy Bridgewater 17 132 959 7.3 58.0
34 Cam Newton 60 432 2,965 6.9 57.2
35 Mitchell Trubisky 26 158 1,069 6.8 57.0
36 Colin Kaepernick 19 164 1,140 7.0 53.9
37 Trevor Siemian 24 177 1,180 6.7 52.1
38 Josh McCown 27 141 1,012 7.2 51.2
39 Jacoby Brissett 17 122 769 6.3 45.0
40 Joe Flacco 51 410 2,866 7.0 40.5
41 Brock Osweiler 30 200 1,356 6.8 37.9
SOURCE: ESPN STATS & INFORMATION GROUP
rodgers_table %>% 
  # This is the big player
  data_color(
    columns = vars(yards_per_dropback),
    colors = scales::col_numeric(
      palette = c("#F8F8F8","#30a2da"),
      domain = NULL
      )
    ) %>% 
  tab_style(
    style = cell_text(
      weight = "bold"
    ),
    locations = cells_data(
      rows = player == "Aaron Rodgers"
    )
  )
PLAYER G DROPBACKS YARDS YARDS PER DROPBACK QBR
1 Deshaun Watson 22 211 1,851 8.8 86.2
2 Tom Brady 60 450 4,364 9.7 82.9
3 Drew Brees 62 392 3,137 8.0 82.1
4 Carson Palmer 38 223 2,214 9.9 82.1
5 Russell Wilson 64 531 4,186 7.9 80.8
6 Philip Rivers 64 342 3,254 9.5 80.7
7 Andrew Luck 38 286 2,559 8.9 80.6
8 Matt Ryan 64 562 5,285 9.4 80.1
9 Case Keenum 44 351 2,999 8.5 79.4
10 Kirk Cousins 64 431 3,892 9.0 78.9
11 Jameis Winston 54 377 2,998 8.0 77.6
12 Dak Prescott 48 323 2,350 7.3 77.1
13 Matthew Stafford 64 351 3,092 8.8 77.0
14 Blaine Gabbert 21 145 1,039 7.2 76.8
15 Carson Wentz 40 339 2,668 7.9 75.8
16 Nick Foles 20 153 1,243 8.1 75.5
17 Patrick Mahomes 17 153 1,401 9.2 74.8
18 Tyrod Taylor 45 297 2,301 7.7 74.6
19 Ryan Fitzpatrick 37 223 1,856 8.3 74.3
20 Andy Dalton 56 401 3,236 8.1 73.1
21 Sam Bradford 34 229 2,219 9.7 71.7
22 Brian Hoyer 20 173 1,395 8.1 69.6
23 Jay Cutler 34 200 1,516 7.6 69.4
24 Jared Goff 38 336 2,979 8.9 68.8
25 Blake Bortles 60 371 2,752 7.4 68.1
26 Ben Roethlisberger 56 225 1,898 8.4 65.2
27 Marcus Mariota 55 376 3,170 8.4 62.5
28 Derek Carr 62 329 2,224 6.8 61.3
29 Ryan Tannehill 40 263 2,053 7.8 60.0
30 Alex Smith 56 301 2,275 7.6 60.0
31 Eli Manning 63 433 3,261 7.5 59.9
32 Aaron Rodgers 55 330 2,000 6.1 59.8
33 Teddy Bridgewater 17 132 959 7.3 58.0
34 Cam Newton 60 432 2,965 6.9 57.2
35 Mitchell Trubisky 26 158 1,069 6.8 57.0
36 Colin Kaepernick 19 164 1,140 7.0 53.9
37 Trevor Siemian 24 177 1,180 6.7 52.1
38 Josh McCown 27 141 1,012 7.2 51.2
39 Jacoby Brissett 17 122 769 6.3 45.0
40 Joe Flacco 51 410 2,866 7.0 40.5
41 Brock Osweiler 30 200 1,356 6.8 37.9
SOURCE: ESPN STATS & INFORMATION GROUP