Sports data analysis with R

# https://www.nflfastr.com/articles/beginners_guide.html

# Install, if needed, and load necessary packages

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("ggrepel")) install.packages("ggrepel")
if (!require("nflreadr")) install.packages("nflreadr")
if (!require("nflplotR")) install.packages("nflplotR")
library(tidyverse)
library(ggrepel)
library(nflreadr)
library(nflplotR)

# Turn off scientific notation

options(scipen = 9999)

# Load play-by-plat data for 2024 season. Year can be adjusted

data <- load_pbp(2024)

# Filter data for run or pass plays that produced an
# expected points added (epa) value.

pbp_rp <- data %>%
  filter(rush == 1 | pass == 1, !is.na(epa))

# Create a "mydata" data frame containing only the variables
# needed for the analysis. Then show the first 25 lines.
# Note: I'm customizing the example code, here.

mydata <- pbp_rp %>% 
  select(posteam, pass, wp, qtr, down, half_seconds_remaining)
glimpse(mydata)

# Further filter the mydata data frame for cases needed to examine
# "Which teams were the most pass-heavy in the first half on early 
# downs with win probability between 20 and 80, excluding the final
# 2 minutes of the half when everyone is pass-happy." Note that the
# code creates a "mean_pass" variable that averages each team's "pass"
# values for every play of the season. "Pass" is coded 1 for a pass 
# and zero for not a pass, so an average above 0.5 indicates more passes
# than runs. The code collapses the data by team, then saves the results
# in a "mydata_summary" data frame.

mydata_summary <- mydata %>%
  filter(wp > .20 &
           wp < .80 &
           down <= 2 &
           qtr <= 2 &
           half_seconds_remaining > 120) %>%
  group_by(posteam) %>%
  summarize(mean_pass = mean(pass), plays = n()) %>%
  arrange(-mean_pass)
glimpse(mydata_summary)

# Graphing the mean_pass values by team.

graph <- ggplot(mydata_summary,
       aes(x=reorder(posteam,-mean_pass), 
           y=mean_pass)) +
  geom_text(aes(label=posteam, size = 2)) +
  theme(axis.text.x = element_blank())

graph