# ----------------------------------------------------------
# Step 1: Install required packages (if missing)
# ----------------------------------------------------------
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("nflreadr")) install.packages("nflreadr")
if (!require("plotly")) install.packages("plotly")
library(tidyverse)
library(nflreadr)
library(plotly)
# ----------------------------------------------------------
# Step 2: Set global options
# ----------------------------------------------------------
# Turn off scientific notation
options(scipen = 9999)
# ----------------------------------------------------------
# Step 3: Load NFL play-by-play data (2025 season)
# ----------------------------------------------------------
# Year can be adjusted as needed
data <- load_pbp(2025)
# ----------------------------------------------------------
# Step 4: Filter for run or pass plays with EPA
# ----------------------------------------------------------
pbp_rp <- data %>%
filter((rush == 1 | pass == 1), !is.na(epa))
# ----------------------------------------------------------
# Step 5: Create trimmed data frame for analysis
# ----------------------------------------------------------
mydata <- pbp_rp %>%
select(
posteam,
pass,
wp,
qtr,
down,
half_seconds_remaining
)
glimpse(mydata)
# ----------------------------------------------------------
# Step 6: Filter for early-down, first-half,
# neutral-game-state plays
# ----------------------------------------------------------
# Research question:
# Which teams were the most pass-heavy in the first half on early downs
# with win probability between 20% and 80%, excluding the final
# 2 minutes of the half?
mydata_summary <- mydata %>%
filter(
wp > 0.20,
wp < 0.80,
down <= 2,
qtr <= 2,
half_seconds_remaining > 120
) %>%
group_by(posteam) %>%
summarize(
mean_pass = mean(pass),
plays = n(),
.groups = "drop"
) %>%
arrange(desc(mean_pass))
# ----------------------------------------------------------
# Step 7: Order teams for plotting
# ----------------------------------------------------------
mydata_summary <- mydata_summary %>%
mutate(
posteam = factor(
posteam,
levels = posteam[order(mean_pass, decreasing = TRUE)]
)
)
# ----------------------------------------------------------
# Step 8: Create Plotly visualization
# ----------------------------------------------------------
graph <- plot_ly(
data = mydata_summary,
x = ~posteam,
y = ~mean_pass,
type = "scatter",
mode = "text",
text = ~posteam,
textposition = "middle center",
hovertemplate = paste(
"<b>%{text}</b><br>",
"Pass rate: %{y:.1%}<br>",
"Plays: %{customdata}<extra></extra>"
),
customdata = ~plays
) %>%
layout(
title = "Tendency to pass, by NFL team, 2024",
xaxis = list(
title = "Team",
showticklabels = FALSE
),
yaxis = list(
title = "Percent pass plays",
tickformat = ".0%"
)
)
graph