Link to access this: https://rpubs.com/tbreedy/1345623

Test

# ---- Libraries ----
install.packages("nflreadr")
install.packages("tidyverse")
install.packages("lubridate")
install.packages("janitor")
install.packages("scales")
install.packages("gt")
install.packages("ggrepel")
install.packages("zoo")

library(nflreadr)    # data access

## Warning: package 'nflreadr' was built under R version 4.4.3

library(tidyverse)   # dplyr/ggplot/readr, etc.

## Warning: package 'tidyverse' was built under R version 4.4.3

## Warning: package 'lubridate' was built under R version 4.4.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lubridate)   # dates/times
library(janitor)     # clean_names, tabyl

## Warning: package 'janitor' was built under R version 4.4.3

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(scales)      # pretty labels

## Warning: package 'scales' was built under R version 4.4.3

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

library(gt)          # presentation tables (not saved here)

## Warning: package 'gt' was built under R version 4.4.3

library(ggrepel)     # labels for charts

## Warning: package 'ggrepel' was built under R version 4.4.3

library(zoo)         # rolling calcs

## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

Q1

1.1

# ---- Parameters ----
team   <- "IND"
season <- 2025

# ---- 1) Identify Colts' first regular-season game in 2025 ----
sched <- load_schedules(seasons = season) |> clean_names()

colts_g1 <- sched |>
  filter(game_type == "REG",
         season == !!season,
         home_team == !!team | away_team == !!team) |>
  arrange(gameday, week) |>
  slice(1)

colts_g1

## ── nflverse games and schedules ────────────────────────────────────────────────

## ℹ Data updated: 2025-09-21 13:33:11 EDT

## # A tibble: 1 × 46
##   game_id   season game_type  week gameday weekday gametime away_team away_score
##   <chr>      <int> <chr>     <int> <chr>   <chr>   <chr>    <chr>          <int>
## 1 2025_01_…   2025 REG           1 2025-0… Sunday  13:00    MIA                8
## # ℹ 37 more variables: home_team <chr>, home_score <int>, location <chr>,
## #   result <int>, total <int>, overtime <int>, old_game_id <chr>, gsis <int>,
## #   nfl_detail_id <chr>, pfr <chr>, pff <int>, espn <chr>, ftn <int>,
## #   away_rest <int>, home_rest <int>, away_moneyline <int>,
## #   home_moneyline <int>, spread_line <dbl>, away_spread_odds <int>,
## #   home_spread_odds <int>, total_line <dbl>, under_odds <int>,
## #   over_odds <int>, div_game <int>, roof <chr>, surface <chr>, temp <int>, …

Questions

load_schedule loads the scheduled data (really helpful the way nflreadr does this)
get the schedule, filter for regular season, then arrange the games in chronological order so if we select the first game of the data, we get the first game of the season.
you want to make sure the data is arranged first so the first value is the first game. If it was arranged, the first value could be any game in the season, so the first value you pull may not be the first game.

stopifnot(nrow(colts_g1) == 1)

game_id <- colts_g1$game_id
week    <- colts_g1$week
gameday <- colts_g1$gameday
home_tm <- colts_g1$home_team
away_tm <- colts_g1$away_team
opp     <- ifelse(home_tm == team, away_tm, home_tm)

message(glue::glue("Found Colts Week {week} ({gameday}): {away_tm} @ {home_tm} | game_id = {game_id}"))

## Found Colts Week 1 (2025-09-07): MIA @ IND | game_id = 2025_01_MIA_IND

1.2

# ---- 2) Pull core data for this game ----
pbp_g1 <- load_pbp(seasons = season) |>
  clean_names() |>
  filter(game_id == !!game_id)

rw_g1 <- load_rosters_weekly(seasons = season) |>
  clean_names() |>
  filter(week == !!week, team %in% c(team, opp))

officials_g1 <- load_officials(seasons = season) |>
  clean_names() |>
  filter(game_id == !!game_id)

players_master <- load_players() |> clean_names()

# ---- A) Structural sanity: dimensions & basic types ----
cat("\n--- STRUCTURE ---\n")

## 
## --- STRUCTURE ---

cat("PBP rows/cols:", nrow(pbp_g1), ncol(pbp_g1), "\n")

## PBP rows/cols: 152 372

cat("Weekly roster rows/cols:", nrow(rw_g1), ncol(rw_g1), "\n")

## Weekly roster rows/cols: 3047 36

cat("Officials rows/cols:", nrow(officials_g1), ncol(officials_g1), "\n\n")

## Officials rows/cols: 0 9

# ---- B) Key constraints & duplicates ----
# Expect play-level key uniqueness by (game_id, play_id)
dup_plays <- pbp_g1 |>
  count(game_id, play_id) |>
  filter(n > 1)

if (nrow(dup_plays) > 0) {
  warning("Duplicate plays detected in (game_id, play_id). Inspect `dup_plays`.")
} else {
  cat("Key check passed: (game_id, play_id) appear unique for this game.\n")
}

## Key check passed: (game_id, play_id) appear unique for this game.

# For weekly rosters, (team, gsis_id) often repeats by position slots; check basic duplication
dup_roster <- rw_g1 |>
  count(team, gsis_id, week) |>
  filter(n > 1)
if (nrow(dup_roster) > 0) {
  message("Note: roster has repeated (team, gsis_id, week) rows—can be normal when multiple rows exist per role/slot.")
}

# ---- C) Referential integrity (players present in master) ----
# Attempt to match weekly roster players to master by gsis_id (best available common key)
if ("gsis_id" %in% names(rw_g1) && "gsis_id" %in% names(players_master)) {
  roster_master_match <- rw_g1 |>
    mutate(in_master = gsis_id %in% players_master$gsis_id) |>
    summarize(match_rate = mean(in_master, na.rm = TRUE))
  cat("Player master match rate (weekly roster -> master by gsis_id):",
      round(roster_master_match$match_rate*100, 1), "%\n")
} else {
  message("gsis_id not present to check roster->player master referential integrity.")
}

## Player master match rate (weekly roster -> master by gsis_id): 93.3 %

# ---- D) Missingness (critical analytical fields) ----
na_rate <- function(x) mean(is.na(x))
na_summary <- pbp_g1 |>
  summarize(
    na_epa          = na_rate(epa),
    na_success      = na_rate(success),
    na_posteam      = na_rate(posteam),
    na_defteam      = na_rate(defteam),
    na_yardline_100 = na_rate(yardline_100),
    na_down         = na_rate(down),
    na_ydstogo      = na_rate(ydstogo),
    na_play_type    = na_rate(play_type)
  )
cat("\n--- MISSINGNESS (PBP) ---\n"); print(na_summary)

## 
## --- MISSINGNESS (PBP) ---

## # A tibble: 1 × 8
##   na_epa na_success na_posteam na_defteam na_yardline_100 na_down na_ydstogo
##    <dbl>      <dbl>      <dbl>      <dbl>           <dbl>   <dbl>      <dbl>
## 1 0.0132     0.0132     0.0658     0.0658          0.0789   0.171          0
## # ℹ 1 more variable: na_play_type <dbl>

# ---- E) Range & logical checks ----
# yardline_100 should be within [0, 100]
bad_yardline <- pbp_g1 |> filter(!is.na(yardline_100) & (yardline_100 < 0 | yardline_100 > 100))
if (nrow(bad_yardline) > 0) warning("Out-of-range yardline_100 values observed.")

# down should be 1–4 (ignore special plays like kickoffs which may have NA)
bad_down <- pbp_g1 |> filter(!is.na(down) & !(down %in% 1:4))
if (nrow(bad_down) > 0) warning("Unexpected down values outside 1–4.")

# ydstogo should be positive for standard plays
bad_ydstogo <- pbp_g1 |> filter(!is.na(ydstogo) & ydstogo <= 0)
if (nrow(bad_ydstogo) > 0) message("Non-positive ydstogo rows exist (could be goal-to-go anomalies or data glitches).")

## Non-positive ydstogo rows exist (could be goal-to-go anomalies or data glitches).

# game_id consistency
if (length(unique(pbp_g1$game_id)) != 1) warning("Multiple game_ids in pbp_g1; filter may be off.")

# posteam/defteam should be one of the two teams (allow NA on non-plays)
valid_teams <- c(team, opp)
weird_teams <- pbp_g1 |>
  filter(!is.na(posteam) & !(posteam %in% valid_teams)) |>
  distinct(posteam)
if (nrow(weird_teams) > 0) warning("Found posteam not matching Colts or opponent: check `weird_teams`.")

# ---- F) Categorical consistency (team abbreviations) ----
cat("\n--- TEAM ABBREVS IN PBP ---\n")

## 
## --- TEAM ABBREVS IN PBP ---

print(sort(unique(na.omit(pbp_g1$posteam))))

## [1] "IND" "MIA"

print(sort(unique(na.omit(pbp_g1$defteam))))

## [1] "IND" "MIA"

Questions

dup_plays are any plays that are duplicated in the data (each play should have one instance because it happened once).
you want to make sure that there aren’t any egregious errors in the data. It’s impossible for a value to be outside of the range 0-100, but if there is, then there is some sort of error. At a minimum, this could suggest that the yard is unknown (perhaps a -1 or NAN value), but it could also suggest that there is a pervasive issue that alters some subset of the yard data. That could be bad if say a quarter of the data was shifted 5 yards down or up, not too many of the issues would be flagged, so errors would go unnoticed.
goal-to-go anomalies or data glitches. It could potentially be a rounding error (4th and inches?), and maybe there is something strange happening like a kick off or other glitches like a penalty which messes with the value.

Q2

2.1

# ---- 3) Game overview: scoring & tempo ----

pbp_g1 <- pbp_g1 |>
  mutate(
    points_scored = case_when(
      touchdown == 1 ~ 6,                               # Touchdowns
      field_goal_result == "made" ~ 3,                  # Field goals
      safety == 1 ~ 2,                                  # Safeties
      extra_point_result == "good" ~ 1,                 # PAT kicks
      two_point_attempt == 1 & two_point_conv_result == "success" ~ 2, # 2PT conversions
      TRUE ~ 0
    )
  )

Questions

case_when is nice for interpretability when working with so many datatypes, especially when they are conditional and non-numeric. Formulas could be made such as, say, sum(touchdowns) * 6 to get touchdown points (if a touchdown is a 1, then it counts the number of touchdowns and multiplies by 6 points). However, field_goal_made would need to be converted from “made” to the value 1, then that needs to be put into a formula. It’s complicated, but case_when makes it easier.
The scores would be understated by 2x the number of successful two point conversions omitted. This would also affect every other statistic that may be based off of the success of this play (player ratings, win/loss if calculated dependent from this score calculation, etc).

2.2

score_by_q <- pbp_g1 |>
  filter(!is.na(qtr), qtr %in% 1:4) |>
  group_by(qtr) |>
  summarize(
    ind_points = sum(if_else(posteam == "IND", points_scored, 0), na.rm = TRUE),
    opp_points = sum(if_else(posteam == opp,   points_scored, 0), na.rm = TRUE),
    plays = n(),
    .groups = "drop"
  )
cat("\n--- SCORE BY QUARTER (derived from PBP points events) ---\n")

## 
## --- SCORE BY QUARTER (derived from PBP points events) ---

print(score_by_q)

## # A tibble: 4 × 4
##     qtr ind_points opp_points plays
##   <dbl>      <dbl>      <dbl> <int>
## 1     1          3          0    31
## 2     2         17          0    45
## 3     3          3          0    33
## 4     4         10          8    43

Questions

You need to group by quarter first or else you’d summarize on the entire data set. In other words, you wouldn’t get the data aggregated by quarter.
You could do this many ways. You could group by halves, you could create a new variable called halves and put 1 or 2 based on the quarter value, you could even group by quarter then add the 1-2 and 3-4 quarter values together to make the first and second half values.

2.3

tempo <- pbp_g1 |>
  filter(!is.na(posteam), !is.na(game_seconds_remaining)) |>
  arrange(game_seconds_remaining) |>
  group_by(posteam) |>
  mutate(sec_between = lag(game_seconds_remaining) - game_seconds_remaining) |>
  summarize(sec_per_play = median(sec_between, na.rm = TRUE), .groups = "drop") |>
  filter(!is.na(sec_per_play))
cat("\n--- TEMPO (median sec/play) ---\n"); print(tempo)

## 
## --- TEMPO (median sec/play) ---

## # A tibble: 2 × 2
##   posteam sec_per_play
##   <chr>          <dbl>
## 1 IND            -38.5
## 2 MIA            -34.5

# ---- 4) Offensive profile: play mix & efficiency ----
off_mix <- pbp_g1 |>
  filter(!is.na(posteam)) |>
  mutate(play_family = case_when(
    play_type %in% c("run") ~ "Run",
    play_type %in% c("pass") ~ "Pass",
    play_type %in% c("qb_kneel","qb_spike") ~ "Clock",
    play_type %in% c("no_play","timeout") ~ "Other",
    TRUE ~ "Other"
  )) |>
  group_by(posteam, play_family) |>
  summarize(
    plays          = n(),
    share          = n()/nrow(pbp_g1),
    epa_per_play   = mean(epa, na.rm = TRUE),
    success_rate   = mean(success, na.rm = TRUE),
    yards_per_play = mean(yards_gained, na.rm = TRUE),
    .groups = "drop"
  ) |>
  arrange(posteam, desc(plays))
cat("\n--- OFFENSIVE MIX & EFFICIENCY ---\n"); print(off_mix, n = 50)

## 
## --- OFFENSIVE MIX & EFFICIENCY ---

## # A tibble: 6 × 7
##   posteam play_family plays  share epa_per_play success_rate yards_per_play
##   <chr>   <chr>       <int>  <dbl>        <dbl>        <dbl>          <dbl>
## 1 IND     Run            40 0.263         0.138        0.5             3.9 
## 2 IND     Pass           30 0.197         0.433        0.567           8.73
## 3 IND     Other          15 0.0987        0.190        0.667           0   
## 4 MIA     Pass           35 0.230        -0.461        0.371           3.86
## 5 MIA     Run            12 0.0789        0.339        0.583           6.5 
## 6 MIA     Other          10 0.0658        0.352        0.778           0

Questions

There may be different play types that we are interested in, such as qb_kneel or qb_spike, so instead of putting an or statement when looking at both, we can simply filter by clock.
sum that value and ensure it adds up to 1

# EPA distribution (visual)
epa_dist <- pbp_g1 |>
  filter(!is.na(posteam), play_type %in% c("run","pass")) |>
  mutate(is_colts = if_else(posteam == "IND", "Colts", "Opponent"))
ggplot(epa_dist, aes(x = epa, fill = is_colts)) +
  geom_histogram(bins = 40, alpha = 0.6, position = "identity") +
  geom_vline(data = epa_dist |> group_by(is_colts) |> summarize(mu = mean(epa, na.rm = TRUE)),
             aes(xintercept = mu, color = is_colts), linewidth = 0.9) +
  scale_x_continuous(labels = number_format(accuracy = 0.1)) +
  labs(title = "EPA distribution (pass & run plays)",
       x = "EPA per play", y = "Count", fill = "", color = "") +
  theme_minimal()

2.4

# ---- 5) Situational analysis ----
situ <- pbp_g1 |>
  filter(!is.na(down), !is.na(ydstogo), play_type %in% c("pass","run")) |>
  mutate(
    ytg_bucket = case_when(
      ydstogo <= 2 ~ "1-2",
      ydstogo <= 5 ~ "3-5",
      ydstogo <= 10 ~ "6-10",
      TRUE ~ "11+"
    ),
    down_lab = paste0("Down ", down)
  ) |>
  group_by(posteam, down_lab, ytg_bucket) |>
  summarize(
    plays = n(),
    success_rate = mean(success, na.rm = TRUE),
    epa_per_play = mean(epa, na.rm = TRUE),
    .groups = "drop"
  )

ggplot(situ |> filter(posteam %in% c("IND", opp)),
       aes(x = ytg_bucket, y = success_rate, group = posteam, color = posteam)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  facet_wrap(~down_lab) +
  scale_y_continuous(labels = percent) +
  labs(title = "Success rate by down & yards-to-go",
       x = "Yards to go (bucket)", y = "Success rate", color = "Offense") +
  theme_minimal()

Questions

it helps turn numeric values, especially in varying ranges, into categorical variables which can easily be displayed and aggregated.
It looks like the further away a team is on downs, the higher the success rate - but with two major exceptions: this doesn’t appear to apply for 3rd downs, and there is a nike swoosh for 2nd downs where the rate is high for 1-2 yards, drops significantly for 3-5, then graudually increases from distances past there.

2.5

# Field position zones
fieldpos <- pbp_g1 |>
  filter(!is.na(yardline_100), play_type %in% c("run","pass")) |>
  mutate(
    field_zone = case_when(
      yardline_100 >= 80 ~ "Own 20-0",
      yardline_100 >= 50 ~ "Own 49-21",
      yardline_100 >= 21 ~ "Opp 49-21",
      TRUE ~ "Red Zone (<=20)"
    )
  ) |>
  group_by(posteam, field_zone) |>
  summarize(
    plays = n(),
    epa_per_play = mean(epa, na.rm = TRUE),
    success_rate = mean(success, na.rm = TRUE),
    .groups = "drop"
  )
ggplot(fieldpos |> filter(posteam %in% c("IND", opp)),
       aes(x = field_zone, y = epa_per_play, fill = posteam)) +
  geom_col(position = position_dodge(width = 0.8)) +
  coord_flip() +
  labs(title = "EPA/play by field zone",
       x = "", y = "EPA per play", fill = "Offense") +
  theme_minimal()

# ---- 6) Drive lens ----
drive_sum <- pbp_g1 |>
  filter(!is.na(drive), !is.na(posteam)) |>
  group_by(posteam, drive) |>
  summarize(
    plays        = n(),
    yards        = sum(yards_gained, na.rm = TRUE),
    points       = sum(points_scored, na.rm = TRUE),   # <-- FIXED
    epa          = sum(epa, na.rm = TRUE),
    success_rate = mean(success, na.rm = TRUE),
    ended_score  = any(touchdown == 1 | field_goal_result == "made" | safety == 1, na.rm = TRUE),
    .groups = "drop"
  ) |>
  arrange(posteam, desc(points), desc(yards))

Questions

the play wasn’t a drive, maybe it was another classification of play like timeout, penalty, kick off, etc.
it means the drive was bad and the team is in a worse position than they were before the drive.

2.6

# ---- 7) Player involvement ----
colts_targets <- pbp_g1 |>
  filter(posteam == team, pass == 1) |>
  count(receiver_player_name, sort = TRUE, name = "targets")

colts_rushers <- pbp_g1 |>
  filter(posteam == team, rush == 1) |>
  count(rusher_player_name, sort = TRUE, name = "rush_att")

epa_by_receiver <- pbp_g1 |>
  filter(posteam == team, pass == 1, !is.na(receiver_player_name)) |>
  group_by(receiver_player_name) |>
  summarize(
    targets = n(),
    epa_sum = sum(epa, na.rm = TRUE),
    epa_mean = mean(epa, na.rm = TRUE),
    .groups = "drop"
  ) |> arrange(desc(epa_sum))

ggplot(epa_by_receiver, aes(x = targets, y = epa_sum, label = receiver_player_name)) +
  geom_point(size = 3) +
  geom_text_repel(min.segment.length = 0) +
  labs(title = "Colts receivers: total EPA vs targets",
       x = "Targets", y = "Total EPA") +
  theme_minimal()

epa_by_receiver

## # A tibble: 7 × 4
##   receiver_player_name targets epa_sum epa_mean
##   <chr>                  <int>   <dbl>    <dbl>
## 1 M.Pittman                  8   6.89     0.861
## 2 J.Taylor                   3   3.09     1.03 
## 3 M.Alie-Cox                 1   1.36     1.36 
## 4 T.Warren                   9   1.19     0.132
## 5 A.Pierce                   3   1.03     0.342
## 6 A.Mitchell                 2   0.730    0.365
## 7 J.Downs                    3   0.434    0.145

Questions

because you want to make sure the play is a pass - when it isn’t, it doesn’t matter.
M. Alie-Cox (1.36)

2.7

# ---- 8) Penalties & Special Teams ----
penalties <- pbp_g1 |>
  filter(!is.na(penalty)) |>
  mutate(off_def = if_else(penalty_team == posteam, "Offense", "Defense")) |>
  count(penalty_team, off_def, penalty_type, sort = TRUE)
cat("\n--- PENALTIES ---\n"); print(penalties, n = 30)

## 
## --- PENALTIES ---

## ── nflverse play by play data ──────────────────────────────────────────────────

## ℹ Data updated: 2025-09-21 05:19:36 EDT

## # A tibble: 6 × 4
##   penalty_team off_def penalty_type                        n
##   <chr>        <chr>   <chr>                           <int>
## 1 <NA>         <NA>    <NA>                              140
## 2 IND          Offense Offensive Holding                   3
## 3 IND          Defense Unnecessary Roughness               1
## 4 MIA          Defense Defensive Holding                   1
## 5 MIA          Defense Running Into the Kicker             1
## 6 MIA          Offense Offensive Too Many Men on Field     1

st_fg <- pbp_g1 |>
  filter(play_type == "field_goal") |>
  count(posteam, field_goal_result)
st_punt <- pbp_g1 |>
  filter(play_type == "punt") |>
  summarize(punts = n(), avg_punt_yards = mean(yards_gained, na.rm = TRUE))
cat("\n--- SPECIAL TEAMS ---\n"); print(list(field_goals = st_fg, punts = st_punt))

## 
## --- SPECIAL TEAMS ---

## $field_goals

## ── nflverse play by play data ──────────────────────────────────────────────────

## ℹ Data updated: 2025-09-21 05:19:36 EDT

## # A tibble: 1 × 3
##   posteam field_goal_result     n
##   <chr>   <chr>             <int>
## 1 IND     made                  4
## 
## $punts
## # A tibble: 1 × 2
##   punts avg_punt_yards
##   <int>          <dbl>
## 1     1              0

Questions

IND
If there’s a penalty such as a long 4th quarter, 4th down play being turned into an automatic 1st down, this goes from a team being unlikely to score a critical touchdown (and perhaps immediately turning into an unfavorable defensive switch), to a fighting hope to continue the run.

2.8

# ---- 9) Time-based trends ----
epa_by_q <- pbp_g1 %>%
  filter(qtr %in% 1:4, play_type %in% c("pass","run")) %>%
  group_by(posteam, qtr) %>%
  summarize(
    epa_per_play = mean(epa, na.rm = TRUE),
    success_rate = mean(success, na.rm = TRUE),
    .groups = "drop"
  )
ggplot(epa_by_q, aes(x = factor(qtr), y = epa_per_play, group = posteam, color = posteam)) +
  geom_line(linewidth = 1) + geom_point(size = 2) +
  labs(title = "EPA/play by quarter", x = "Quarter", y = "EPA/play", color = "Offense") +
  theme_minimal()

# Rolling pass rate (10-play window) for Colts
colts_seq <- pbp_g1 %>%
  filter(posteam == team, play_type %in% c("pass","run")) %>%
  mutate(play_index = row_number(),
         pass_flag  = if_else(play_type == "pass", 1, 0)) %>%
  arrange(play_index)
if (nrow(colts_seq) >= 10) {
  colts_seq_roll <- colts_seq %>%
    mutate(pass_rate_10 = zoo::rollapply(pass_flag, width = 10, FUN = mean,
                                         align = "right", fill = NA_real_))
  ggplot(colts_seq_roll, aes(x = play_index, y = pass_rate_10)) +
    geom_line(linewidth = 1) +
    scale_y_continuous(labels = percent) +
    labs(title = "Colts rolling pass rate (10-play window)",
         x = "Offensive play index", y = "Pass rate") +
    theme_minimal()
}

## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).

colts_seq_roll[c('game_half', 'pass_rate_10')] |>
  group_by(game_half) |>
  summarise(avg_pass_rate_10 = mean(pass_rate_10, na.rm = TRUE))

## # A tibble: 2 × 2
##   game_half avg_pass_rate_10
##   <chr>                <dbl>
## 1 Half1                0.535
## 2 Half2                0.274

Questions

it appears that the average pass rate decreased significantly by the 2nd half
This can change sharply based on what is happening during the game. Leads, turnovers, momentum, penalties, injuries, and strategies like clock burning can all cause the pass rate to change over the course of a game.

Test

Thomas Reedy

2025-09-21

Test

Q1

1.1

Questions

1.2

Questions

Q2

2.1

Questions

2.2

Questions

2.3

Questions

2.4

Questions

2.5

Questions

2.6

Questions

2.7

Questions

2.8

Questions