# ----------------------------------------------------------
# Step 1: Install required packages (if missing)
# ----------------------------------------------------------

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("nflreadr")) install.packages("nflreadr")
if (!require("plotly")) install.packages("plotly")

library(tidyverse)
library(nflreadr)
library(plotly)
# ----------------------------------------------------------
# Step 2: Set global options
# ----------------------------------------------------------

# Turn off scientific notation
options(scipen = 9999)
# ----------------------------------------------------------
# Step 3: Load NFL play-by-play data (2025 season)
# ----------------------------------------------------------

# Year can be adjusted as needed
data <- load_pbp(2025)
# ----------------------------------------------------------
# Step 4: Filter for run or pass plays with EPA
# ----------------------------------------------------------

pbp_rp <- data %>%
  filter((rush == 1 | pass == 1), !is.na(epa))
# ----------------------------------------------------------
# Step 5: Create trimmed data frame for analysis
# ----------------------------------------------------------

mydata <- pbp_rp %>% 
  select(
    posteam,
    pass,
    wp,
    qtr,
    down,
    half_seconds_remaining
  )

glimpse(mydata)
# ----------------------------------------------------------
# Step 6: Filter for early-down, first-half, 
# neutral-game-state plays
# ----------------------------------------------------------
# Research question:
# Which teams were the most pass-heavy in the first half on early downs
# with win probability between 20% and 80%, excluding the final
# 2 minutes of the half?

mydata_summary <- mydata %>%
  filter(
    wp > 0.20,
    wp < 0.80,
    down <= 2,
    qtr <= 2,
    half_seconds_remaining > 120
  ) %>%
  group_by(posteam) %>%
  summarize(
    mean_pass = mean(pass),
    plays = n(),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_pass))
# ----------------------------------------------------------
# Step 7: Order teams for plotting
# ----------------------------------------------------------

mydata_summary <- mydata_summary %>%
  mutate(
    posteam = factor(
      posteam,
      levels = posteam[order(mean_pass, decreasing = TRUE)]
    )
  )
# ----------------------------------------------------------
# Step 8: Create Plotly visualization
# ----------------------------------------------------------

graph <- plot_ly(
  data = mydata_summary,
  x = ~posteam,
  y = ~mean_pass,
  type = "scatter",
  mode = "text",
  text = ~posteam,
  textposition = "middle center",
  hovertemplate = paste(
    "<b>%{text}</b><br>",
    "Pass rate: %{y:.1%}<br>",
    "Plays: %{customdata}<extra></extra>"
  ),
  customdata = ~plays
) %>%
  layout(
    title = "Tendency to pass, by NFL team, 2024",
    xaxis = list(
      title = "Team",
      showticklabels = FALSE
    ),
    yaxis = list(
      title = "Percent pass plays",
      tickformat = ".0%"
    )
  )

graph