#Load in some new libraries!
library(tidyverse) # Data Cleaning, manipulation, summarization, plotting
library(ggtext)
library(gt)
library(DT) # beautiful interactive tables
library(ggthemes) # custom pre-built themes
library(ggforce) # better annotations
library(ggridges) # many distributions at once
library(ggrepel) # better labels
library(ggbeeswarm) # beeswarm plots
library(extrafont) # for extra fonts
library(bbplot)
library(dplyr)
library(na.tools)
library(ggimage)
library(knitr)
library(kableExtra)
library(teamcolors)
library(concaveman)
Basic setup of the document can be attributed to the great work of Ben Baldwin and the nflscrapr Community :)
# Importing the data from github and saving it as the object php
# The year can be changed by editing the CSV
pbp <- read_csv(url("https://github.com/ryurko/nflscrapR-data/raw/master/play_by_play_data/regular_season/reg_pbp_2019.csv"))
## Cleaning the data
#piping the pbp object to select posteam, defteam and sort by descending play type - second pip to call just the head of the data
pbp %>% select(posteam, defteam, desc, play_type) %>% head
# The desc column includes a bunch of data that should be removed
# Punts, Kickoffs, field goals and dead ball penalities will be removed as they don't impact EPA
pbp %>%
mutate(
stick_throw = case_when(
air_yards < ydstogo ~ "Short of Sticks",
air_yards == ydstogo ~ "At Stick",
air_yards > ydstogo ~ "Past Stick",
TRUE ~ NA_character_
)
) %>%
select(air_yards, ydstogo, stick_throw) %>%
filter(!is.na(air_yards))
#Filtering out punts, kickoffs, field goals, dead ball penalties
pbp_rp <- pbp %>%
filter(!is_na(epa), play_type=="no_play" | play_type=="pass" | play_type=="run")
# View the "no play" plays
pbp_rp %>% filter(play_type=="no_play") %>% select(desc, rush_attempt, pass_attempt) %>% head
# Some of these plays could be attempted rush or pass plays that should count when computing EPA.
# The data currently shows 0 for each attempt which is wrong - pass, create "pass", which searches the "desc" variable for plays with "pass", "sacked", or "scramble", along with a variable for rush and for a successful play
pbp_rp <- pbp_rp %>%
mutate(
pass = if_else(str_detect(desc, "(pass)|(sacked)|(scramble)"), 1, 0),
rush = if_else(str_detect(desc, "(left end)|(left tackle)|(left guard)|(up the middle)|(right guard)|(right tackle)|(right end)") & pass == 0, 1, 0),
success = ifelse(epa>0, 1 , 0)
) %>%
# filter to only pass or rush plays
filter(pass == 1 | rush == 1) %>%
mutate(
passer_player_name = ifelse(play_type == "no_play" & pass == 1,
str_extract(desc, "(?<=\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?(?=\\s((pass)|(sack)|(scramble)))"),
passer_player_name
),
receiver_player_name = ifelse(play_type == "no_play" & str_detect(desc, "pass"),
str_extract(
desc,
"(?<=to\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?"
),
receiver_player_name
),
rusher_player_name = ifelse(play_type == "no_play" & rush == 1,
str_extract(desc, "(?<=\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?(?=\\s((left end)|(left tackle)|(left guard)| (up the middle)|(right guard)|(right tackle)|(right end)))"),
rusher_player_name
)
) %>%
mutate(
name = if_else(!is.na(passer_player_name), passer_player_name, rusher_player_name),
rusher = rusher_player_name,
receiver = receiver_player_name,
play = 1
)
#review the data to validate that the correct plays are pulling in
pbp_rp %>% filter(play_type=="no_play") %>% select(pass, rush, desc) %>% head
# Creating a custom theme
theme_fivethirtyeight <- function(base_size = 12, base_family = "sans") {
colors <- deframe(ggthemes::ggthemes_data[["fivethirtyeight"]])
(theme_foundation(base_size = base_size, base_family = base_family)
+ theme(
line = element_line(colour = "black"),
rect = element_rect(
fill = colors["Light Gray"],
linetype = 0, colour = NA
),
text = element_text(colour = colors["Dark Gray"]),
axis.title = element_blank(),
axis.text = element_text(),
axis.ticks = element_blank(),
axis.line = element_blank(),
legend.background = element_rect(),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
panel.grid = element_line(colour = NULL),
panel.grid.major =
element_line(colour = colors["Medium Gray"]),
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = 0, size = rel(1.5), face = "bold"),
plot.margin = unit(c(1, 1, 1, 1), "lines"),
strip.background = element_rect()
))
}
#Editing the custom theme
theme_538 <- function(base_size = 12, font = "TT Times New Roman") {
# Text setting
txt <- element_text(size = base_size + 2, colour = "black", face = "plain")
bold_txt <- element_text(
size = base_size + 2, colour = "black",
family = "Montserrat", face = "bold"
)
large_txt <- element_text(size = base_size + 4, color = "black", face = "bold")
theme_minimal(base_size = base_size, base_family = font) +
theme(
# Legend Settings
legend.key = element_blank(),
legend.background = element_blank(),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "vertical",
# Backgrounds
strip.background = element_blank(),
strip.text = large_txt,
plot.background = element_blank(),
plot.margin = unit(c(1, 1, 1, 1), "lines"),
# Axis & Titles
text = txt,
axis.text = txt,
axis.ticks = element_blank(),
axis.line = element_blank(),
axis.title = bold_txt,
plot.title = large_txt,
# Panel
panel.grid = element_line(colour = NULL),
panel.grid.major = element_line(colour = "#D2D2D2"),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_blank()
)
}
# Prepare data
wr_duel <- pbp_rp %>%
filter(receiver %in% c("M.Evans", "C.Godwin")) %>%
group_by(game_date, receiver) %>%
summarize(mean_epa = mean(epa, na.rm = TRUE))
# Clean it up a bit
wr_duel_plot <- ggplot(
wr_duel,
aes(x = game_date, y = mean_epa, color = receiver)
) +
geom_line(size = 1) +
theme_538() +
geom_hline(yintercept = 0, size = 1, color = "black") +
labs(
x = "\nGame Date",
y = "EPA (Average)",
title = "Quick comparison of Godwin vs Evans across the 2019 season",
caption = "Data: @nflscrapR"
)
# Change the Colors
tb_colors <- teamcolors %>%
filter(name == "Tampa Bay Buccaneers") %>%
select(name:secondary)
tb_primary <- pull(tb_colors, primary)
tb_secondary <- pull(tb_colors, secondary)
# Assign the Colors
wr_duel_plot <- ggplot(wr_duel,
aes(x = game_date, y = mean_epa,
color = if_else(receiver == "M.Evans", tb_primary , tb_secondary))) +
geom_line(size = 1) +
theme_538() +
geom_hline(yintercept = 0, size = 1, color = "black") +
labs(x = "",
y = "EPA (Average)",
title = "Quick comparison of <span style='color:#d50a0a'>**Mike Evans**</span> vs <span style='color:#34302b'>**Chris Godwin**</span> across the 2019 season",
caption = "Data: @nflscrapR") +
scale_color_identity() +
theme(plot.title = element_markdown())
wr_duel_plot
# Prepare data
cgod_do_it <- pbp_rp %>%
filter(receiver == "C.Godwin") %>%
arrange(desc(game_date)) %>%
group_by(game_date) %>%
summarize(
total_yards = sum(yards_gained, na.rm = TRUE),
total_airyards = sum(air_yards, na.rm = TRUE)
) %>%
head(5) %>%
mutate(
game_num = row_number(),
game_text = glue::glue("Game {game_num}")
)
ggplot(cgod_do_it, aes(x = total_airyards, y = total_yards, color = game_num)) +
# geom path follows the order of underlying data
geom_path(size = 2) +
geom_point(size = 5) +
# creates a line for comparison
geom_abline(intercept = 0, slope = 1, color = "grey", linetype = "dashed") +
# adds labels to only game 1 and 5
geom_text(
data = filter(cgod_do_it, game_num %in% c(1, 5)),
aes(label = game_text),
hjust = 1, nudge_x = -5
) +
# set scales for 0-axis
scale_x_continuous(limits = c(0, 140)) +
scale_y_continuous(limits = c(0, 140)) +
# change color gradient to start at black and transition to yellow
scale_color_gradient(low = tb_primary, high = tb_secondary) +
theme_538() +
labs(
x = "\nTotal Air Yards",
y = "Total Yards\n",
title = "Chris Godwin Air Yards vs Total Yards",
caption = "Data: @nflscrapR"
) +
theme(legend.position = "none")
# Prepare data
mevans_do_it <- pbp_rp %>%
filter(receiver == "M.Evans") %>%
arrange(desc(game_date)) %>%
group_by(game_date) %>%
summarize(
total_yards = sum(yards_gained, na.rm = TRUE),
total_airyards = sum(air_yards, na.rm = TRUE)
) %>%
head(5) %>%
mutate(
game_num = row_number(),
game_text = glue::glue("Game {game_num}")
)
ggplot(mevans_do_it, aes(x = total_airyards, y = total_yards, color = game_num)) +
# geom path follows the order of underlying data
geom_path(size = 2) +
geom_point(size = 5) +
# creates a line for comparison
geom_abline(intercept = 0, slope = 1, color = "grey", linetype = "dashed") +
# adds labels to only game 1 and 5
geom_text(
data = filter(mevans_do_it, game_num %in% c(1, 5)),
aes(label = game_text),
hjust = 1, nudge_x = -5
) +
# set scales for 0-axis
scale_x_continuous(limits = c(0, 140)) +
scale_y_continuous(limits = c(0, 140)) +
# change color gradient to start at black and transition to yellow
scale_color_gradient(low = tb_primary, high = tb_secondary) +
theme_538() +
labs(
x = "\nTotal Air Yards",
y = "Total Yards\n",
title = "Mike Evans Air Yards vs Total Yards",
caption = "Data: @nflscrapR"
) +
theme(legend.position = "none")
Win Probability (WP) - The probability that a team will win a game in progress, given a particular combination of circumstances including score, time remaining, field position, down, and to go distance.
WP is based on a model built on actual outcomes of NFL games from recent seasons that featured similar circumstances
Which teams have been the most pass-heavy in the first half on early downs with win probability between 20 and 80, excluding the final 2 minutes of the half when everyone is pass-happy
#Plotting the data
ggplot(schotty, aes(x=reorder(posteam,-mean_pass), y=mean_pass)) +
geom_text(aes(label=posteam))
ggsave('FILENAME.png', dpi=1000)
#Take all plays w/pass=1, grouping by team, summarising number of dropbacks and calculating EPA/db and success rate (anytime epa>o)
chart_data <- pbp_rp %>%
filter(pass==1) %>%
group_by(posteam) %>%
summarise(
num_db = n(),
epa_per_db = sum(epa) / num_db,
success_rate = sum(epa > 0) / num_db
)
#pulling in NFL logos Data - joining by teamcode/posteam
nfl_logos_df <- read_csv("https://raw.githubusercontent.com/statsbylopez/BlogPosts/master/nfl_teamlogos.csv")
chart <- chart_data %>% left_join(nfl_logos_df, by = c("posteam" = "team_code"))
#Home v Away EPA
team_epa <- pbp_rp %>%
group_by(posteam) %>%
summarise(
total_home_epa = sum(total_home_epa),
total_away_epa = sum(total_away_epa)
)
epa_chart <- team_epa %>% left_join(nfl_logos_df, by = c("posteam" = "team_code"))
epa_chart %>%
ggplot(aes(x = total_home_epa, y = total_away_epa)) +
geom_image(aes(image = url), size = 0.05) +
labs(x = "Total Home EPA",
y = "Total Away EPA",
caption = "Data from nflscrapR",
title = "Home v Away EPA by Team",
subtitle = "2019") +
theme_bw() +
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 14),
plot.caption = element_text(size = 12))
ggsave('FILENAME.png', dpi=1000)
## Saving 7 x 5 in image
chart %>%
ggplot(aes(x = success_rate, y = epa_per_db)) +
geom_image(aes(image = url), size = 0.05) +
labs(x = "Success rate",
y = "EPA per play",
caption = "Data from nflscrapR",
title = "Dropback success rate & EPA/play",
subtitle = "2019") +
theme_bw() +
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 14),
plot.caption = element_text(size = 12))
ggsave('FILENAME.png', dpi=1000)
#Bring in Team Colors
library(teamcolors)
filter(teamcolors, league == "nfl")
#Join teamcolors and Play BY Play datasets'
nfl_colors <- teamcolors %>%
filter(league == "nfl") %>%
mutate(
team_abb = case_when(
name == "Arizona Cardinals" ~ "ARI",
name == "Atlanta Falcons" ~ "ATL",
name == "Baltimore Ravens" ~ "BAL",
name == "Buffalo Bills" ~ "BUF",
name == "Carolina Panthers" ~ "CAR",
name == "Chicago Bears" ~ "CHI",
name == "Cincinnati Bengals" ~ "CIN",
name == "Cleveland Browns" ~ "CLE",
name == "Dallas Cowboys" ~ "DAL",
name == "Denver Broncos" ~ "DEN",
name == "Detroit Lions" ~ "DET",
name == "Green Bay Packers" ~ "GB",
name == "Houston Texans" ~ "HOU",
name == "Indianapolis Colts" ~ "IND",
name == "Jacksonville Jaguars" ~ "JAX",
name == "Kansas City Chiefs" ~ "KC",
name == "Los Angeles Rams" ~ "LA",
name == "Los Angeles Chargers" ~ "LAC",
name == "Miami Dolphins" ~ "MIA",
name == "Minnesota Vikings" ~ "MIN",
name == "New England Patriots" ~ "NE",
name == "New Orleans Saints" ~ "NO",
name == "New York Giants" ~ "NYG",
name == "New York Jets" ~ "NYJ",
name == "Oakland Raiders" ~ "OAK",
name == "Philadelphia Eagles" ~ "PHI",
name == "Pittsburgh Steelers" ~ "PIT",
name == "Seattle Seahawks" ~ "SEA",
name == "San Francisco 49ers" ~ "SF",
name == "Tampa Bay Buccaneers" ~ "TB",
name == "Tennessee Titans" ~ "TEN",
name == "Washington Redskins" ~ "WAS",
TRUE ~ NA_character_
),
posteam = team_abb
)
# left_join the data together
pbp_colors <- left_join(pbp, nfl_colors, by = c("posteam"))
pbp_colors %>%
# Excludes non-plays, eg end of quarter
filter(!is.na(posteam)) %>%
select(posteam, team_abb, name, primary, secondary, logo) %>%
# Distinct grabs only the distinct/unique cases of column
distinct(posteam, .keep_all = TRUE)
Who likes the committee approach when it comes to running backs? (*crickets)
Let’s creat a visualization of 2019 data for some of the Committee based teams
rb_quad <- pbp_rp %>%
filter(
posteam == "SF",
receiver %in% c("M.Breida", "T.Coleman", "R.Mostert", "J.Wilson") |
rusher %in% c("M.Breida", "T.Coleman", "R.Mostert", "J.Wilson"),
play_type != "no_play"
) %>%
mutate(
# Assign a single player name for filtering regardless of play_type
player = if_else(is.na(receiver), rusher, receiver),
# Add nice labels to play_type
play_type = factor(play_type, labels = c("Reception", "Rush"))
) %>%
group_by(player, play_type) %>%
summarize(
n = n(),
mean_yards = sum(yards_gained, na.rm = TRUE) / n,
mean_success = sum(success, na.rm = TRUE) / n
)
rb_quad_plot <- rb_quad %>%
ggplot(aes(x = player, y = mean_yards)) +
geom_col(aes(fill = play_type), position = "dodge")
#Breaking this out into Facets
rb_quad_plot <- rb_quad %>%
ggplot(aes(x = player, y = mean_yards, fill = player, position = "dodge", group = play_type)) +
geom_col() +
facet_grid(~play_type)
# Change the Colors
sf_colors <- teamcolors %>%
filter(name == "San Francisco 49ers") %>%
select(name:secondary)
sf_primary <- pull(sf_colors, primary)
sf_secondary <- pull(sf_colors, secondary)
rb_quad_plot +
geom_hline(yintercept = 0.03, color = "black", size = 2) +
theme_538() +
scale_fill_manual(values = c("grey", sf_primary, sf_secondary, "Black")) +
labs(
x = "",
y = "Avg Yards per Play",
title = "49ers Running Backs in 2019",
subtitle = "McKinnon has not been active",
caption = "Data: @nflscrapR"
) +
theme(
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(color = "white", size = 1),
panel.ontop = TRUE,
legend.position = "none"
) +
scale_y_continuous(
breaks = seq(0, 15, 1)
)
rb_trio <- pbp_rp %>%
filter(
posteam == "WAS",
receiver %in% c("A.Peterson", "D.Guice", "C.Thompson") |
rusher %in% c("A.Peterson", "D.Guice", "C.Thompson"),
play_type != "no_play"
) %>%
mutate(
# Assign a single player name for filtering regardless of play_type
player = if_else(is.na(receiver), rusher, receiver),
# Add nice labels to play_type
play_type = factor(play_type, labels = c("Reception", "Rush"))
) %>%
group_by(player, play_type) %>%
summarize(
n = n(),
mean_yards = sum(yards_gained, na.rm = TRUE) / n,
mean_success = sum(success, na.rm = TRUE) / n
)
rb_trio_plot <- rb_trio %>%
ggplot(aes(x = player, y = mean_yards)) +
geom_col(aes(fill = play_type), position = "dodge")
# Change the Colors
was_colors <- teamcolors %>%
filter(name == "Washington Redskins") %>%
select(name:secondary)
was_primary <- pull(was_colors, primary)
was_secondary <- pull(was_colors, secondary)
#Breaking this out into Facets
rb_trio_plot <- rb_trio %>%
ggplot(aes(x = player, y = mean_yards, fill = player, position = "dodge", group = play_type)) +
geom_col() +
facet_grid(~play_type)
rb_trio_plot +
geom_hline(yintercept = 0.03, color = "black", size = 2) +
theme_538() +
scale_fill_manual(values = c(was_primary, was_secondary, "Black")) +
labs(
x = "",
y = "Avg Yards per Play",
title = "Redskins Running Backs in 2019",
subtitle = "AP is a Generational Talent",
caption = "Data: @nflscrapR"
) +
theme(
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(color = "white", size = 1),
panel.ontop = TRUE,
legend.position = "none"
) +
scale_y_continuous(
breaks = seq(0, 15, 1)
)
epa_play <- pbp_rp %>%
filter(pass == 1) %>%
group_by(posteam) %>%
summarize(
n = n(),
epa_per_db = sum(epa, na.rm = TRUE) / n,
success_rate = sum(epa) / n
)
epa_play %>%
ggplot(aes(x = reorder(posteam, epa_per_db), y = epa_per_db)) +
geom_point(aes(color = if_else(epa_per_db >= 0, "#2c7bb6", "#d7181c")),
size = 3
) +
geom_text(aes(
label = posteam,
color = if_else(epa_per_db >= 0, "#2c7bb6", "#d7181c"),
hjust = if_else(epa_per_db > 0, -0.2, 1.2)
)) +
coord_flip() +
scale_fill_identity(aesthetics = c("fill", "colour")) +
theme_538() +
theme(
panel.grid.major.y = element_blank(),
axis.text.y = element_blank()
) +
geom_hline(yintercept = 0) +
scale_y_continuous(breaks = seq(-0.5, 0.5, 0.1)) +
labs(
x = "",
y = "2019 EPA per Dropback",
title = "The majority of teams had positive EPA/dropback",
subtitle = "But there are some clear outliers",
caption = "Data: @nflscrapR"
)
#Data Cleanup
pbp_players <- pbp_rp %>%
mutate(
passer_player_name = ifelse(play_type == "no_play" & pass == 1,
str_extract(desc, "(?<=\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?(?=\\s((pass)|(sack)|(scramble)))"),
passer_player_name
),
receiver_player_name = ifelse(play_type == "no_play" & str_detect(desc, "pass"),
str_extract(
desc,
"(?<=to\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?"
),
receiver_player_name
),
rusher_player_name = ifelse(play_type == "no_play" & rush == 1,
str_extract(desc, "(?<=\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?(?=\\s((left end)|(left tackle)|(left guard)| (up the middle)|(right guard)|(right tackle)|(right end)))"),
rusher_player_name
)
)
#Generate a summarry dataframe
qbs <- pbp_players %>%
mutate(
name = ifelse(!is.na(passer_player_name), passer_player_name, rusher_player_name),
rusher = rusher_player_name,
receiver = receiver_player_name,
play = 1
) %>%
group_by(name, posteam) %>%
summarize(
n_dropbacks = sum(pass),
n_rush = sum(rush),
n_plays = sum(play),
epa_per_play = sum(epa) / n_plays,
success_per_play = sum(success) / n_plays
) %>%
filter(n_dropbacks >= 20) %>%
ungroup() # always ungroup if you no longer need the grouping effect
qbs %>%
ggplot(aes(x = success_per_play, y = epa_per_play)) +
# Notice that color/size inside aes()
geom_point(aes(color = if_else(posteam == "MIN", "#4f2683", "black"), size = n_plays / 60), alpha = 0.50) +
# we need this to assign purple/black to the actual color
scale_color_identity() +
# add labels for all players
geom_text_repel(aes(label = name, color = if_else(posteam == "MIN", "#4f2683", "black")),
force = 1, point.padding = 0.1,
segment.size = 0.2
) +
labs(
x = "Success rate",
y = "EPA per play",
caption = "Data from nflscrapR",
title = "QB success rate and EPA/play",
subtitle = "2019, min 20 pass attempts, includes all QB's rush and pass plays"
) +
theme_bw() +
theme(
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
plot.title = element_text(size = 16, hjust = 0.5),
plot.subtitle = element_text(size = 14, hjust = 0.5),
plot.caption = element_text(size = 12)
) +
theme(legend.position = "none")
Let’s compare Kirk Cousins to Teddy Bridgewater and Case Keenum this year :(
qbs %>%
ggplot(aes(x = success_per_play, y = epa_per_play)) +
# Notice that color/size inside aes()
geom_point(aes(
color = if_else(posteam == "MIN", "#4f2683", "black"),
size = n_plays / 60
),
alpha = 0.50
) +
# we need this to assign purple/black to the actual color
scale_color_identity() +
# add labels JUST for Bridgewater/Keenum/Cousins with ggforce
geom_mark_hull(
aes(
filter = name %in% c("K.Cousins", "C.Keenum", "T.Bridgewater"),
description = "This is the Bermuda Triangle of Mediocrity"
),
color = "red", label.fontface = "bold", label.colour = "red", con.colour = "red"
) +
labs(
x = "Success rate",
y = "EPA per play",
caption = "Data from nflscrapR",
title = "QB success rate and EPA/play",
subtitle = "2019, min 20 pass attempts, includes all QB's rush and pass plays"
) +
theme_bw() +
theme(
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
plot.title = element_text(size = 16, hjust = 0.5),
plot.subtitle = element_text(size = 14, hjust = 0.5),
plot.caption = element_text(size = 12)
) +
theme(legend.position = "none")
#get from tutorial
rosters <- read_csv("https://raw.githubusercontent.com/ryurko/nflscrapR-data/master/roster_data/regular_season/reg_roster_2019.csv") %>%
filter(position %in% c("WR", "RB", "FB", "TE"), season == 2019) %>%
mutate(name = abbr_player_name, posteam = team) %>%
select(season, name, posteam, position)
#Cleaning the data again
data_clean <- pbp_rp %>%
filter(pass == 1 & sack == 0 & qb_scramble == 0) %>%
select(
name, pass, desc, posteam, epa, defteam, complete_pass, incomplete_pass,
air_yards, receiver_player_name, down, success, complete_pass
) %>%
left_join(rosters, by = c("receiver_player_name" = "name", "posteam")) %>%
mutate(
qb = ifelse(is.na(position), 0, 1), rec = receiver_player_name,
drop = if_else(str_detect(desc, "(sacked)|(scramble)"), 1, 0)
) %>%
filter(drop == 0)
problem_wrs <- c(
"K.Benjamin", "A.Cooper", "G.Tate", "A.Robinson", "B.Marshall",
"D.Hilliard", "D.Thompson", "De.Thomas", "E.St", "K.Benjamin", "K.Bibbs",
"Ty.Williams", "W.Snead", "W.Snead IV", "T.Pryor", "E.St. Brown",
"A.Robinson II", "J.Gordon", "D.Carter", "B.Ellington",
"A.Holmes", "R.Matthews", "M.Valdes", "V.Bolden"
)
problem_rbs <- c(
"A.Abdullah", "C.Hyde", "Dam.", "T.Montgomery", "A.Ekeler", "T.Yeldon",
"Dam. Williams", "Dar.Williams", "R.Jones II", "C.Anderson"
)
# fix a bunch of problem players
pos <- data_clean %>%
mutate(
position = if_else(
rec %in% problem_wrs, "WR", position
),
position = if_else(
rec %in% problem_rbs, "RB", position
),
position = if_else(position == "FB", "RB", position)
) %>%
filter(!is.na(position), down <= 2)
pos %>%
ggplot(aes(x = air_yards, y = epa, color = if_else(complete_pass == 1, "blue", "red"))) +
geom_point() +
scale_color_identity()
Facet the data by Position (WR, TE, RB) and only show passes that travel between 1-25 yards * Look at the RB concentration at the 0-5 yard mark. + WR has a somewhat linear increase in EPA with an increase in Yards + RB/TE sem so decline dramatically once beyond the 0-10 yard range
pos %>%
mutate(position = factor(position, levels = c("WR", "TE", "RB"))) %>%
filter(between(air_yards, 1, 25)) %>%
group_by(position, air_yards) %>%
mutate(n = n(),
mean = mean(epa)) %>%
ungroup() %>%
ggplot(aes(x = air_yards, y = epa, fill = position)) +
geom_point(aes(group = air_yards), shape = 21, alpha = 0.2, fill = "black") +
geom_point(aes(size = n, x = air_yards, y = mean), shape = 21, stroke = 0.5, color = "white", alpha = 0.8) +
geom_hline(yintercept = 0, size = 1, color = "black") +
stat_smooth(color = "white", method = "loess", alpha = 0.5) +
facet_grid(~position) +
coord_cartesian(ylim = c(-1.5, 5)) +
scale_y_continuous(breaks = seq(-1.5, 4.5, by = 0.5)) +
ggthemes::theme_fivethirtyeight() +
theme(
legend.position = "none",
strip.text = element_text(face = "bold")
) +
scale_fill_manual(
values = c("#00b159", "#003399", "#ff2b4f"),
aesthetics = c("color", "fill")
) +
labs(
x = "Air Yards (Depth of Target)",
y = "EPA\n",
title = "WR and TE EPA generally increases by depth of target",
subtitle = "However, RBs generally don't get targeted at these distances!\n\nPasses = 1st/2nd, Air Yards between 1 and 25",
caption = "Data: @nflscrapR"
)
pass_comp <- pos %>%
mutate(position = factor(position, levels = c("WR", "TE", "RB"))) %>%
filter(between(air_yards, 1, 25)) %>%
group_by(position, air_yards) %>%
summarize(
n = n(),
comp_rate = sum(complete_pass, na.rm = TRUE) / n,
epa = mean(epa, na.rm = TRUE)
)
pass_comp_plot <- pass_comp %>%
ggplot(aes(x = air_yards, y = comp_rate, fill = position)) +
geom_point(aes(size = n), shape = 21, stroke = 0.5) +
geom_smooth(color = "white", method = "loess") +
geom_hline(yintercept = 0, size = 1, color = "black") +
geom_vline(xintercept = 20, size = 1, color = "black", linetype = "dashed", alpha = 0.5) +
geom_hline(yintercept = 0.5, size = 1, color = "black", linetype = "dashed", alpha = 0.5) +
facet_grid(~position) +
ggthemes::theme_fivethirtyeight() +
scale_fill_manual(
values = c("#00b159", "#003399", "#ff2b4f"),
aesthetics = c("color", "fill")
) +
scale_y_continuous(labels = scales::percent) +
labs(
x = "Air Yards (Depth of Target)",
y = "EPA\n",
title = "Completion rate by Depth of Target on 1st/2nd Down",
subtitle = "Completion rate generally drops below 50% for passes > 20 air yards",
caption = "Graph: @thomas_mock | Data: @nflscrapR",
size = "N of Passes"
) +
theme(strip.text = element_text(face = "bold")) +
guides(color = FALSE, fill = FALSE) +
theme(
legend.direction = "vertical",
legend.position = c(0.1, 0.1),
legend.background = element_blank(),
legend.title = element_text(face = "bold")
)
pass_comp_plot
So far in 2019 it appears that Dallas and KC are best in the league in QB efficiency. Lets compare them
dal_color <- teamcolors %>%
filter(name == "Dallas Cowboys") %>%
pull(primary)
kc_color <- teamcolors %>%
filter(name == "Kansas City Chiefs") %>%
pull(primary)
pbp_rp %>%
filter(play_type != "no_play", posteam %in% c("DAL", "KC")) %>%
group_by(posteam, play_type) %>%
summarize(n = n()) %>%
mutate(freq = n / sum(n))
loadfonts(device = "win")
pbp_rp %>%
filter(play_type == "pass") %>%
filter(posteam %in% c("DAL", "KC")) %>%
ggplot(aes(x = air_yards, fill = posteam)) +
geom_histogram(binwidth = 2, alpha = 0.9) +
scale_fill_manual(values = c(dal_color, kc_color)) +
geom_hline(yintercept = 0, size = 1) +
theme_538() +
theme(
legend.title = element_blank(),
legend.position = c(0.6, 0.9)
) +
scale_x_continuous(breaks = seq(-10, 60, 10)) +
labs(
x = "\nAir Yards",
y = "Count",
title = "Dallas vs Kansas City",
caption = "Data: @nflscrapR"
)
pbp_rp %>%
filter(play_type == "pass") %>%
filter(posteam %in% c("DAL", "KC")) %>%
ggplot(aes(x = air_yards, y = ..scaled.., fill = posteam)) +
geom_density(alpha = 0.8) +
scale_fill_manual(values = c(dal_color, kc_color)) +
theme_538() +
theme(
legend.title = element_blank(),
legend.position = c(0.6, 0.9)
) +
scale_x_continuous(breaks = seq(-10, 60, 10))
pbp_rp %>%
filter(play_type == "pass") %>%
filter(posteam %in% c("DAL", "KC")) %>%
ggplot(aes(x = air_yards, y = posteam, fill = posteam)) +
geom_density_ridges() +
scale_fill_manual(values = c(dal_color, kc_color)) +
theme_538() +
theme(
panel.grid.major.y = element_blank(),
legend.position = "none"
) +
scale_x_continuous(breaks = seq(-10, 60, 10)) +
labs(
x = "Air Yards",
y = "",
title = "DAL and KC pass to similar depths of the field",
caption = "Data: @nflscrapR"
)
pbp_rp %>%
filter(play_type == "pass") %>%
filter(posteam %in% c("DAL", "KC")) %>%
ggplot(aes(x = posteam, y = air_yards, fill = posteam)) +
geom_boxplot() +
geom_jitter(width = 0.2, alpha = 0.2) +
scale_fill_manual(values = c(dal_color, kc_color)) +
theme_538() +
theme(legend.position = "none")
pbp_rp %>%
filter(play_type != "no_play", posteam %in% c("DAL", "KC")) %>%
ggplot(aes(x = play_type, y = epa , fill = posteam)) +
geom_boxplot() +
geom_jitter(width = 0.3, alpha = 0.1) +
scale_fill_manual(values = c(dal_color, kc_color), aesthetics = c("fill", "color")) +
theme_538() +
theme(legend.position = "none") +
facet_grid(~posteam)
pbp_rp %>%
filter(play_type != "no_play", posteam %in% c("DAL", "KC")) %>%
ggplot(aes(x = play_type, y = epa, color = posteam)) +
geom_sina(alpha = 0.5) +
scale_fill_manual(values = c(dal_color, kc_color), aesthetics = c("fill", "color")) +
theme_538() +
theme(legend.position = "none") +
facet_grid(~posteam)
pbp_rp %>%
filter(play_type != "no_play", posteam %in% c("DAL", "KC")) %>%
ggplot(
aes(x = play_type, y = epa, color = posteam)
) +
geom_beeswarm(priority = "random", alpha = 0.5, size = 0.5) +
scale_fill_manual(values = c(dal_color, kc_color), aesthetics = c("fill", "color")) +
theme_538() +
theme(legend.position = "none") +
facet_grid(~posteam)
rush_v_pass <- pbp_rp %>%
filter(play_type != "no_play", penalty == 0) %>%
group_by(play_type, posteam) %>%
summarize(avg_yds = mean(yards_gained, na.rm = TRUE)) %>%
ungroup()
nfl_rvp <- pbp_rp %>%
filter(play_type != "no_play") %>%
group_by(play_type) %>%
summarize(avg_yds = mean(yards_gained, na.rm = TRUE)) %>%
ungroup() %>%
mutate(posteam = "NFL")
rush_v_pass <- bind_rows(rush_v_pass, nfl_rvp) %>%
mutate(play_type = factor(play_type,
levels = c("pass", "run"),
labels = c("Pass", "Rush")))
rush_v_pass %>%
ggplot(aes(x = fct_rev(fct_reorder2(posteam, desc(play_type), avg_yds)), y = avg_yds, color = play_type)) +
geom_line(aes(group = posteam), color = "grey", size = 3) +
geom_point(size = 5) +
coord_flip()
rush_v_pass %>%
ggplot(aes(x = fct_rev(fct_reorder2(posteam, desc(play_type), avg_yds)), y = avg_yds, color = play_type)) +
geom_line(aes(group = posteam), color = "grey", size = 3) +
geom_point(size = 5) +
geom_text(
data = filter(rush_v_pass, posteam == "KC" & play_type == "Pass"),
aes(label = play_type),
hjust = 0, nudge_y = 0.2, fontface = "bold", size = 6
) +
geom_text(
data = filter(rush_v_pass, posteam == "KC" & play_type == "Rush"),
aes(label = play_type),
hjust = 1, nudge_y = -0.2, fontface = "bold", size = 6
) +
coord_flip() +
scale_color_manual(values = c("#003399", "#ff2b4f")) +
theme_538() +
theme(
panel.grid.major.y = element_blank(),
legend.position = "none",
axis.text.y = element_text(color = if_else(rush_v_pass$posteam == "NFL", "red", "black"))
) +
labs(
x = "",
y = "\n Average Yards Gained",
title = "Through Week 7",
caption = "Data: @nflscrapR"
) +
scale_y_continuous(
limits = c(0, 15),
breaks = seq(3, 8, 1)
)
game_num <- pbp_rp %>%
mutate(game_week = case_when(
between(game_date, as.Date("2019-09-05"), as.Date("2019-09-11")) ~ 1,
between(game_date, as.Date("2019-09-12"), as.Date("2019-09-18")) ~ 2,
between(game_date, as.Date("2019-09-19"), as.Date("2019-09-25")) ~ 3,
between(game_date, as.Date("2019-09-26"), as.Date("2019-10-02")) ~ 4,
between(game_date, as.Date("2019-10-03"), as.Date("2019-10-09")) ~ 5,
between(game_date, as.Date("2019-10-10"), as.Date("2019-10-16")) ~ 6,
between(game_date, as.Date("2019-10-17"), as.Date("2019-10-23")) ~ 7,
between(game_date, as.Date("2019-10-24"), as.Date("2019-10-30")) ~ 8,
between(game_date, as.Date("2019-10-31"), as.Date("2019-11-06")) ~ 9,
between(game_date, as.Date("2019-11-07"), as.Date("2019-11-13")) ~ 10,
between(game_date, as.Date("2019-11-14"), as.Date("2019-11-20")) ~ 11,
between(game_date, as.Date("2019-11-21"), as.Date("2019-11-27")) ~ 12,
between(game_date, as.Date("2019-11-28"), as.Date("2019-12-04")) ~ 13,
between(game_date, as.Date("2019-12-05"), as.Date("2019-12-11")) ~ 14,
between(game_date, as.Date("2019-12-12"), as.Date("2019-12-18")) ~ 15,
between(game_date, as.Date("2019-12-19"), as.Date("2019-12-25")) ~ 16,
between(game_date, as.Date("2019-12-30"), as.Date("2020-01-01")) ~ 17,
TRUE ~ 99
)
) %>%
filter(game_week != 99)
wk_rvp <- game_num %>%
filter(play_type != "no_play", game_half %in% c("Half1", "Half2")) %>%
mutate(game_half = if_else(game_half == "Half1", "1st Half", "2nd Half")) %>%
group_by(posteam, game_half, game_week, play_type) %>%
count() %>%
ungroup()
bal_rvp <- wk_rvp %>%
filter(posteam == "BAL") %>%
mutate(game_num = if_else(game_week <=11, game_week, game_week - 1),
play_type = if_else(play_type == "run", "Rush", "Pass"),
game_text = glue::glue("Game {game_num}")
)
bal_rvp
bal_rvp %>%
ggplot(aes(x = game_half, y = n, group = game_num)) +
geom_point() +
geom_line() +
facet_grid(~play_type)
bal_runs <- bal_rvp %>%
filter(play_type == "Rush") %>%
spread(game_half, n) %>%
mutate(balance = if_else(`1st Half` >= `2nd Half`, "Ran More in 1st", "Ran More in 2nd")) %>%
gather(key = "game_half", value = "n", `1st Half`:`2nd Half`) %>%
select(posteam, game_num,game_half, balance)
bal_runs
#THIS NEEDS MORE WORK
bal_rvp %>%
ggplot(
aes(
x = game_half, y = n, group = game_week,
color = if_else(game_num %in% c(1, 2, 3, 4), "#241773", "#003399")
)
) +
geom_point() +
geom_vline(xintercept = c(1, 2), size = 2, color = "black", alpha = 0.5) +
geom_line(size = 2) +
geom_point(size = 5) +
geom_text_repel(
data = filter(
bal_rvp, game_num %in% c(1, 2, 3),
game_half == "2nd Half"
),
aes(label = game_text),
direction = "y", nudge_x = 0.1, segment.size = 0.1, hjust = 0,
size = 5, fontface = "bold"
) +
facet_grid(~play_type) +
scale_color_identity() +
theme_538() +
theme(panel.grid.major.x = element_blank()) +
labs(x = "", y = "N of Plays\n",
title = "The Ravens throw more than you think! ",
caption = "Data: @nflscrapR")
schotty %>%
slice(1:5, 28:32) %>%
gt()
posteam | mean_pass | plays |
---|---|---|
WAS | 0.4444444 | 135 |
DEN | 0.4594595 | 148 |
PHI | 0.4765625 | 128 |
IND | 0.4806202 | 129 |
SEA | 0.4842767 | 159 |
CHI | 0.6030534 | 131 |
LA | 0.6038961 | 154 |
GB | 0.6093750 | 128 |
BUF | 0.6527778 | 144 |
KC | 0.7400000 | 150 |
schotty_gt <- schotty %>%
slice(1:5, 28:32) %>%
arrange(desc(mean_pass)) %>%
mutate(play_focus = if_else(mean_pass >= .50, "Pass Heavy", "Run Heavy")) %>%
group_by(play_focus) %>%
gt()
schotty_gt
posteam | mean_pass | plays |
---|---|---|
Pass Heavy | ||
KC | 0.7400000 | 150 |
BUF | 0.6527778 | 144 |
GB | 0.6093750 | 128 |
LA | 0.6038961 | 154 |
CHI | 0.6030534 | 131 |
Run Heavy | ||
SEA | 0.4842767 | 159 |
IND | 0.4806202 | 129 |
PHI | 0.4765625 | 128 |
DEN | 0.4594595 | 148 |
WAS | 0.4444444 | 135 |
schotty_gt %>%
fmt_percent(columns = vars(mean_pass), decimals = 1) %>%
tab_header(
title = "Percentage of Passes by teams on 1st/2nd Down in 1st Half",
subtitle = "Win Prob between 20 & 80, excludes final 2 minutes of the half"
) %>%
cols_label(
posteam = "Player",
mean_pass = "Pass %",
plays = "Plays"
) %>%
cols_align(
align = "center"
)
Percentage of Passes by teams on 1st/2nd Down in 1st Half | ||
---|---|---|
Win Prob between 20 & 80, excludes final 2 minutes of the half | ||
Player | Pass % | Plays |
Pass Heavy | ||
KC | 74.0% | 150 |
BUF | 65.3% | 144 |
GB | 60.9% | 128 |
LA | 60.4% | 154 |
CHI | 60.3% | 131 |
Run Heavy | ||
SEA | 48.4% | 159 |
IND | 48.1% | 129 |
PHI | 47.7% | 128 |
DEN | 45.9% | 148 |
WAS | 44.4% | 135 |
# 2019 and pass plays
pass_2019 <- pbp_rp %>%
filter(play_type == "pass", penalty == 0, sack == 0, qb_scramble == 0)
third_down_passes <- pass_2019 %>%
filter(down == 3, ydstogo <= 10) %>%
group_by(receiver_player_name) %>%
mutate(converted = if_else(yards_gained > ydstogo, 1, 0)) %>%
select(receiver_player_name, yards_gained, ydstogo, epa, converted) %>%
summarise(
mean_epa = mean(epa, na.rm = TRUE),
mean_yardage = mean(yards_gained, na.rm = TRUE),
mean_ydstogo = mean(ydstogo, na.rm = TRUE),
n = n(),
conv_rate = sum(converted) / n
) %>%
ungroup() %>%
arrange(desc(conv_rate))
rbs <- c(
"A.Kamara", "J.White", "J.Conner", "C.McCaffrey", "S.Barkley", "E.Elliott",
"J.Mixon", "T.Gurley", "D.Johnson", "D.Cook", "N.Chubb", "J.Jacobs", "M.Ingram",
"C.Hyde", "C.Thompson", "L.Fournette", "M.Breida", "F.Gore", "K.Johnson", "C.Carson",
"A.Ekeler", "D.Henry"
)
wrs <- c(
"K.Allen", "L.Fitzgerald", "C.Kupp", "M.Sanu", "Z.Jones", "J.Edelman",
"J.Wright", "W.Snead IV", "A.Miller", "T.Lockett", "T.Gabriel", "S.Shepard", "C.Beasley",
"T.Boyd", "J.Landry", "E.Sanders", "R.Cobb", "K.Coutee", "C.Rodgers", "D.Westbrook",
"D.Amendola", "A.Thielen", "M.Thomas", "N.Agholor", "J.Crowder"
)
tes <- c("T.Kelce", "Z.Ertz", "G. Kittle", "E.Engram", "J.Cook", "E.Ebron",
"G.Olsen", "D.Waller", "M.Andrews")
top_players <- c(rbs, wrs, tes)
third_conv_table <- third_down_passes %>%
filter(n >= 4) %>%
mutate(position = case_when(
receiver_player_name %in% rbs ~ "RB",
receiver_player_name %in% wrs ~ "WR",
receiver_player_name %in% tes ~ "TE",
TRUE ~ NA_character_
),
position = factor(position, levels = c("RB", "WR", "TE"))
) %>%
filter(receiver_player_name %in% top_players) %>%
select(receiver_player_name, conv_rate, n, everything(), -mean_epa) %>%
group_by(position) %>%
arrange(desc(conv_rate)) %>%
ungroup() %>%
gt::gt(groupname_col = "position")
third_conv_table
receiver_player_name | conv_rate | n | mean_yardage | mean_ydstogo |
---|---|---|---|---|
RB | ||||
C.McCaffrey | 0.7500000 | 8 | 10.500000 | 5.125000 |
J.Conner | 0.7142857 | 7 | 10.285714 | 4.857143 |
A.Kamara | 0.6000000 | 5 | 6.000000 | 4.600000 |
A.Ekeler | 0.5000000 | 6 | 4.000000 | 4.666667 |
D.Cook | 0.5000000 | 4 | 7.250000 | 5.750000 |
E.Elliott | 0.5000000 | 4 | 7.000000 | 5.250000 |
J.White | 0.5000000 | 24 | 8.583333 | 6.583333 |
D.Johnson | 0.4090909 | 22 | 7.000000 | 6.318182 |
S.Barkley | 0.4000000 | 5 | 6.400000 | 5.200000 |
K.Johnson | 0.3333333 | 12 | 5.333333 | 6.333333 |
L.Fournette | 0.2222222 | 9 | 4.222222 | 7.111111 |
C.Thompson | 0.1666667 | 6 | 2.666667 | 6.166667 |
WR | ||||
T.Gabriel | 0.7500000 | 4 | 9.250000 | 6.750000 |
M.Sanu | 0.7142857 | 7 | 11.857143 | 8.142857 |
S.Shepard | 0.6666667 | 9 | 10.000000 | 6.777778 |
K.Allen | 0.5882353 | 17 | 7.647059 | 5.529412 |
M.Thomas | 0.5789474 | 19 | 9.315789 | 5.789474 |
D.Amendola | 0.5555556 | 9 | 8.111111 | 5.777778 |
R.Cobb | 0.5555556 | 9 | 5.888889 | 5.444444 |
A.Thielen | 0.5384615 | 13 | 7.846154 | 6.615385 |
C.Kupp | 0.5384615 | 26 | 12.423077 | 5.961538 |
A.Miller | 0.5000000 | 6 | 8.833333 | 3.666667 |
N.Agholor | 0.5000000 | 14 | 6.285714 | 4.500000 |
T.Lockett | 0.5000000 | 10 | 11.600000 | 6.300000 |
J.Crowder | 0.4545455 | 11 | 7.636364 | 4.909091 |
C.Beasley | 0.4285714 | 14 | 4.928571 | 6.500000 |
T.Boyd | 0.4117647 | 17 | 5.941176 | 5.470588 |
J.Edelman | 0.4000000 | 10 | 4.900000 | 5.600000 |
J.Wright | 0.3750000 | 8 | 8.125000 | 6.000000 |
L.Fitzgerald | 0.3750000 | 16 | 4.062500 | 5.437500 |
K.Coutee | 0.2857143 | 7 | 8.142857 | 6.142857 |
D.Westbrook | 0.2222222 | 9 | 4.333333 | 4.222222 |
J.Landry | 0.2000000 | 10 | 5.000000 | 4.900000 |
E.Sanders | 0.1666667 | 6 | 1.666667 | 5.166667 |
TE | ||||
Z.Ertz | 0.5833333 | 12 | 7.833333 | 5.416667 |
T.Kelce | 0.5625000 | 16 | 7.562500 | 5.562500 |
E.Ebron | 0.5000000 | 10 | 6.700000 | 5.800000 |
J.Cook | 0.4545455 | 11 | 8.636364 | 4.727273 |
G.Olsen | 0.3750000 | 8 | 4.000000 | 5.500000 |
D.Waller | 0.3333333 | 9 | 3.333333 | 5.666667 |
M.Andrews | 0.3333333 | 15 | 6.133333 | 6.733333 |
E.Engram | 0.2500000 | 12 | 3.250000 | 3.416667 |
third_conv_table %>%
tab_header(
title = "3rd Down Conversion Rates (Slot WR vs RB vs TE)",
subtitle = "Yds to go <= 10, N of Plays >= 4"
) %>%
fmt_percent(.,
columns = vars(conv_rate),
decimals = 1
) %>%
fmt_number(
columns = vars(mean_yardage, mean_ydstogo),
decimals = 1
) %>%
cols_label(
receiver_player_name = "Player",
mean_yardage = "Yds Gained",
mean_ydstogo = "Yds to Go",
n = "Plays",
conv_rate = "Conversion Rate"
) %>%
cols_align(
align = "center"
) %>%
tab_source_note(
source_note = "Table: @thomas_mock | Data: @nflscrapR"
) %>%
tab_footnote(
footnote = "Average Yards",
locations = cells_column_labels(
columns = vars(mean_yardage, mean_ydstogo)
)
)
3rd Down Conversion Rates (Slot WR vs RB vs TE) | ||||
---|---|---|---|---|
Yds to go <= 10, N of Plays >= 4 | ||||
Player | Conversion Rate | Plays | Yds Gained1 | Yds to Go1 |
RB | ||||
C.McCaffrey | 75.0% | 8 | 10.5 | 5.1 |
J.Conner | 71.4% | 7 | 10.3 | 4.9 |
A.Kamara | 60.0% | 5 | 6.0 | 4.6 |
A.Ekeler | 50.0% | 6 | 4.0 | 4.7 |
D.Cook | 50.0% | 4 | 7.2 | 5.8 |
E.Elliott | 50.0% | 4 | 7.0 | 5.2 |
J.White | 50.0% | 24 | 8.6 | 6.6 |
D.Johnson | 40.9% | 22 | 7.0 | 6.3 |
S.Barkley | 40.0% | 5 | 6.4 | 5.2 |
K.Johnson | 33.3% | 12 | 5.3 | 6.3 |
L.Fournette | 22.2% | 9 | 4.2 | 7.1 |
C.Thompson | 16.7% | 6 | 2.7 | 6.2 |
WR | ||||
T.Gabriel | 75.0% | 4 | 9.2 | 6.8 |
M.Sanu | 71.4% | 7 | 11.9 | 8.1 |
S.Shepard | 66.7% | 9 | 10.0 | 6.8 |
K.Allen | 58.8% | 17 | 7.6 | 5.5 |
M.Thomas | 57.9% | 19 | 9.3 | 5.8 |
D.Amendola | 55.6% | 9 | 8.1 | 5.8 |
R.Cobb | 55.6% | 9 | 5.9 | 5.4 |
A.Thielen | 53.8% | 13 | 7.8 | 6.6 |
C.Kupp | 53.8% | 26 | 12.4 | 6.0 |
A.Miller | 50.0% | 6 | 8.8 | 3.7 |
N.Agholor | 50.0% | 14 | 6.3 | 4.5 |
T.Lockett | 50.0% | 10 | 11.6 | 6.3 |
J.Crowder | 45.5% | 11 | 7.6 | 4.9 |
C.Beasley | 42.9% | 14 | 4.9 | 6.5 |
T.Boyd | 41.2% | 17 | 5.9 | 5.5 |
J.Edelman | 40.0% | 10 | 4.9 | 5.6 |
J.Wright | 37.5% | 8 | 8.1 | 6.0 |
L.Fitzgerald | 37.5% | 16 | 4.1 | 5.4 |
K.Coutee | 28.6% | 7 | 8.1 | 6.1 |
D.Westbrook | 22.2% | 9 | 4.3 | 4.2 |
J.Landry | 20.0% | 10 | 5.0 | 4.9 |
E.Sanders | 16.7% | 6 | 1.7 | 5.2 |
TE | ||||
Z.Ertz | 58.3% | 12 | 7.8 | 5.4 |
T.Kelce | 56.2% | 16 | 7.6 | 5.6 |
E.Ebron | 50.0% | 10 | 6.7 | 5.8 |
J.Cook | 45.5% | 11 | 8.6 | 4.7 |
G.Olsen | 37.5% | 8 | 4.0 | 5.5 |
D.Waller | 33.3% | 9 | 3.3 | 5.7 |
M.Andrews | 33.3% | 15 | 6.1 | 6.7 |
E.Engram | 25.0% | 12 | 3.2 | 3.4 |
Table: @thomas_mock | Data: @nflscrapR | ||||
1
Average Yards
|
url <- "https://fivethirtyeight.com/features/sorry-running-backs-even-your-receiving-value-can-be-easily-replaced/"
rb_receiving <- url %>%
xml2::read_html() %>%
rvest::html_table() %>%
purrr::chuck(1) %>%
purrr::set_names(nm = c("team", "attempts", "successful", "success_rate")) %>%
dplyr::as_tibble() %>%
filter(team != "team") %>%
mutate(success_rate = stringr::str_remove(success_rate, "%")) %>%
mutate_at(.vars = vars(attempts:success_rate), as.double)
rb_receiving
rb_receiving %>%
mutate(
success_rate = if_else(team == "Kansas City",
success_rate / 100,
success_rate
)
) %>%
gt() %>%
tab_spanner(
label = "PASSES TO RBS",
columns = vars(attempts, successful)
) %>%
tab_options(
table.border.top.color = "white",
row.striping.include_table_body = FALSE,
row.padding = px(4)
) %>%
tab_source_note(
source_note = "SOURCE: ESPN STATS & INFORMATION GROUP"
) %>%
tab_style(
style = list(
cell_fill(color = "lightblue")
),
locations = cells_data(
columns = vars(success_rate)
)
) %>%
fmt_percent(
columns = vars(success_rate),
rows = 1,
decimals = 1
) %>%
cols_label(
team = "TEAM",
attempts = "ATTEMPTS",
successful = "SUCCESSFUL",
success_rate = "SUCCESS RATE"
)
TEAM | PASSES TO RBS | SUCCESS RATE | |
---|---|---|---|
ATTEMPTS | SUCCESSFUL | ||
Kansas City | 99 | 62 | 62.6% |
Carolina | 134 | 73 | 54.5 |
San Francisco | 107 | 58 | 54.2 |
L.A. Chargers | 138 | 73 | 52.9 |
L.A. Rams | 93 | 48 | 51.6 |
Pittsburgh | 110 | 56 | 50.9 |
Oakland | 132 | 67 | 50.8 |
New England | 170 | 86 | 50.6 |
Chicago | 131 | 66 | 50.4 |
New Orleans | 142 | 71 | 50.0 |
Seattle | 84 | 42 | 50.0 |
Cleveland | 109 | 54 | 49.5 |
Miami | 101 | 50 | 49.5 |
Jacksonville | 133 | 62 | 46.6 |
Green Bay | 97 | 45 | 46.4 |
Denver | 128 | 58 | 45.3 |
Baltimore | 91 | 41 | 45.1 |
Minnesota | 98 | 44 | 44.9 |
Atlanta | 87 | 39 | 44.8 |
N.Y. Jets | 103 | 46 | 44.7 |
Philadelphia | 101 | 45 | 44.6 |
Indianapolis | 126 | 55 | 43.7 |
Cincinnati | 108 | 47 | 43.5 |
Detroit | 143 | 62 | 43.4 |
Tennessee | 86 | 36 | 41.9 |
N.Y. Giants | 149 | 60 | 40.3 |
Washington | 108 | 43 | 39.8 |
Tampa Bay | 88 | 35 | 39.8 |
Buffalo | 93 | 35 | 37.6 |
Houston | 67 | 25 | 37.3 |
Dallas | 111 | 41 | 36.9 |
Arizona | 109 | 38 | 34.9 |
SOURCE: ESPN STATS & INFORMATION GROUP |
url <- "https://fivethirtyeight.com/features/are-we-sure-aaron-rodgers-is-still-an-elite-quarterback/"
rodgers <- url %>%
xml2::read_html() %>%
rvest::html_table() %>%
purrr::chuck(1) %>%
janitor::clean_names() %>%
dplyr::rename("rank" = x) %>%
dplyr::as_tibble() %>%
mutate(yards = stringr::str_remove(yards, ",")) %>%
mutate_at(.vars = vars(g:qbr), as.double)
rodgers
rodgers_table <- rodgers %>%
gt() %>%
tab_options(
table.border.top.color = "white",
row.striping.include_table_body = FALSE,
row.padding = px(4)
) %>%
tab_source_note(
source_note = "SOURCE: ESPN STATS & INFORMATION GROUP"
) %>%
fmt_number(
columns = vars(yards),
decimals = 0
) %>%
cols_label(
rank = "",
player = "PLAYER",
g = "G",
dropbacks = "DROPBACKS",
yards = "YARDS",
yards_per_dropback = "YARDS PER DROPBACK",
qbr = "QBR"
)
rodgers_table
PLAYER | G | DROPBACKS | YARDS | YARDS PER DROPBACK | QBR | |
---|---|---|---|---|---|---|
1 | Deshaun Watson | 22 | 211 | 1,851 | 8.8 | 86.2 |
2 | Tom Brady | 60 | 450 | 4,364 | 9.7 | 82.9 |
3 | Drew Brees | 62 | 392 | 3,137 | 8.0 | 82.1 |
4 | Carson Palmer | 38 | 223 | 2,214 | 9.9 | 82.1 |
5 | Russell Wilson | 64 | 531 | 4,186 | 7.9 | 80.8 |
6 | Philip Rivers | 64 | 342 | 3,254 | 9.5 | 80.7 |
7 | Andrew Luck | 38 | 286 | 2,559 | 8.9 | 80.6 |
8 | Matt Ryan | 64 | 562 | 5,285 | 9.4 | 80.1 |
9 | Case Keenum | 44 | 351 | 2,999 | 8.5 | 79.4 |
10 | Kirk Cousins | 64 | 431 | 3,892 | 9.0 | 78.9 |
11 | Jameis Winston | 54 | 377 | 2,998 | 8.0 | 77.6 |
12 | Dak Prescott | 48 | 323 | 2,350 | 7.3 | 77.1 |
13 | Matthew Stafford | 64 | 351 | 3,092 | 8.8 | 77.0 |
14 | Blaine Gabbert | 21 | 145 | 1,039 | 7.2 | 76.8 |
15 | Carson Wentz | 40 | 339 | 2,668 | 7.9 | 75.8 |
16 | Nick Foles | 20 | 153 | 1,243 | 8.1 | 75.5 |
17 | Patrick Mahomes | 17 | 153 | 1,401 | 9.2 | 74.8 |
18 | Tyrod Taylor | 45 | 297 | 2,301 | 7.7 | 74.6 |
19 | Ryan Fitzpatrick | 37 | 223 | 1,856 | 8.3 | 74.3 |
20 | Andy Dalton | 56 | 401 | 3,236 | 8.1 | 73.1 |
21 | Sam Bradford | 34 | 229 | 2,219 | 9.7 | 71.7 |
22 | Brian Hoyer | 20 | 173 | 1,395 | 8.1 | 69.6 |
23 | Jay Cutler | 34 | 200 | 1,516 | 7.6 | 69.4 |
24 | Jared Goff | 38 | 336 | 2,979 | 8.9 | 68.8 |
25 | Blake Bortles | 60 | 371 | 2,752 | 7.4 | 68.1 |
26 | Ben Roethlisberger | 56 | 225 | 1,898 | 8.4 | 65.2 |
27 | Marcus Mariota | 55 | 376 | 3,170 | 8.4 | 62.5 |
28 | Derek Carr | 62 | 329 | 2,224 | 6.8 | 61.3 |
29 | Ryan Tannehill | 40 | 263 | 2,053 | 7.8 | 60.0 |
30 | Alex Smith | 56 | 301 | 2,275 | 7.6 | 60.0 |
31 | Eli Manning | 63 | 433 | 3,261 | 7.5 | 59.9 |
32 | Aaron Rodgers | 55 | 330 | 2,000 | 6.1 | 59.8 |
33 | Teddy Bridgewater | 17 | 132 | 959 | 7.3 | 58.0 |
34 | Cam Newton | 60 | 432 | 2,965 | 6.9 | 57.2 |
35 | Mitchell Trubisky | 26 | 158 | 1,069 | 6.8 | 57.0 |
36 | Colin Kaepernick | 19 | 164 | 1,140 | 7.0 | 53.9 |
37 | Trevor Siemian | 24 | 177 | 1,180 | 6.7 | 52.1 |
38 | Josh McCown | 27 | 141 | 1,012 | 7.2 | 51.2 |
39 | Jacoby Brissett | 17 | 122 | 769 | 6.3 | 45.0 |
40 | Joe Flacco | 51 | 410 | 2,866 | 7.0 | 40.5 |
41 | Brock Osweiler | 30 | 200 | 1,356 | 6.8 | 37.9 |
SOURCE: ESPN STATS & INFORMATION GROUP |
rodgers_table %>%
# This is the big player
data_color(
columns = vars(yards_per_dropback),
colors = scales::col_numeric(
palette = c("#F8F8F8","#30a2da"),
domain = NULL
)
) %>%
tab_style(
style = cell_text(
weight = "bold"
),
locations = cells_data(
rows = player == "Aaron Rodgers"
)
)
PLAYER | G | DROPBACKS | YARDS | YARDS PER DROPBACK | QBR | |
---|---|---|---|---|---|---|
1 | Deshaun Watson | 22 | 211 | 1,851 | 8.8 | 86.2 |
2 | Tom Brady | 60 | 450 | 4,364 | 9.7 | 82.9 |
3 | Drew Brees | 62 | 392 | 3,137 | 8.0 | 82.1 |
4 | Carson Palmer | 38 | 223 | 2,214 | 9.9 | 82.1 |
5 | Russell Wilson | 64 | 531 | 4,186 | 7.9 | 80.8 |
6 | Philip Rivers | 64 | 342 | 3,254 | 9.5 | 80.7 |
7 | Andrew Luck | 38 | 286 | 2,559 | 8.9 | 80.6 |
8 | Matt Ryan | 64 | 562 | 5,285 | 9.4 | 80.1 |
9 | Case Keenum | 44 | 351 | 2,999 | 8.5 | 79.4 |
10 | Kirk Cousins | 64 | 431 | 3,892 | 9.0 | 78.9 |
11 | Jameis Winston | 54 | 377 | 2,998 | 8.0 | 77.6 |
12 | Dak Prescott | 48 | 323 | 2,350 | 7.3 | 77.1 |
13 | Matthew Stafford | 64 | 351 | 3,092 | 8.8 | 77.0 |
14 | Blaine Gabbert | 21 | 145 | 1,039 | 7.2 | 76.8 |
15 | Carson Wentz | 40 | 339 | 2,668 | 7.9 | 75.8 |
16 | Nick Foles | 20 | 153 | 1,243 | 8.1 | 75.5 |
17 | Patrick Mahomes | 17 | 153 | 1,401 | 9.2 | 74.8 |
18 | Tyrod Taylor | 45 | 297 | 2,301 | 7.7 | 74.6 |
19 | Ryan Fitzpatrick | 37 | 223 | 1,856 | 8.3 | 74.3 |
20 | Andy Dalton | 56 | 401 | 3,236 | 8.1 | 73.1 |
21 | Sam Bradford | 34 | 229 | 2,219 | 9.7 | 71.7 |
22 | Brian Hoyer | 20 | 173 | 1,395 | 8.1 | 69.6 |
23 | Jay Cutler | 34 | 200 | 1,516 | 7.6 | 69.4 |
24 | Jared Goff | 38 | 336 | 2,979 | 8.9 | 68.8 |
25 | Blake Bortles | 60 | 371 | 2,752 | 7.4 | 68.1 |
26 | Ben Roethlisberger | 56 | 225 | 1,898 | 8.4 | 65.2 |
27 | Marcus Mariota | 55 | 376 | 3,170 | 8.4 | 62.5 |
28 | Derek Carr | 62 | 329 | 2,224 | 6.8 | 61.3 |
29 | Ryan Tannehill | 40 | 263 | 2,053 | 7.8 | 60.0 |
30 | Alex Smith | 56 | 301 | 2,275 | 7.6 | 60.0 |
31 | Eli Manning | 63 | 433 | 3,261 | 7.5 | 59.9 |
32 | Aaron Rodgers | 55 | 330 | 2,000 | 6.1 | 59.8 |
33 | Teddy Bridgewater | 17 | 132 | 959 | 7.3 | 58.0 |
34 | Cam Newton | 60 | 432 | 2,965 | 6.9 | 57.2 |
35 | Mitchell Trubisky | 26 | 158 | 1,069 | 6.8 | 57.0 |
36 | Colin Kaepernick | 19 | 164 | 1,140 | 7.0 | 53.9 |
37 | Trevor Siemian | 24 | 177 | 1,180 | 6.7 | 52.1 |
38 | Josh McCown | 27 | 141 | 1,012 | 7.2 | 51.2 |
39 | Jacoby Brissett | 17 | 122 | 769 | 6.3 | 45.0 |
40 | Joe Flacco | 51 | 410 | 2,866 | 7.0 | 40.5 |
41 | Brock Osweiler | 30 | 200 | 1,356 | 6.8 | 37.9 |
SOURCE: ESPN STATS & INFORMATION GROUP |