need <- c(
  "dplyr","tidyr","ggplot2","readr","stringr","lubridate",
  "nflreadr","gt","gtExtras","forcats","scales"
)
to_install <- setdiff(need, rownames(installed.packages()))
if(length(to_install)) install.packages(to_install, repos = "https://cloud.r-project.org")
invisible(lapply(need, library, character.only = TRUE))
season <- 2025
pbp <- nflreadr::load_pbp(seasons = season)
sched <- nflreadr::load_schedules(seasons = season)
pbp_rs <- pbp %>%
  dplyr::filter(season_type == "REG", !is.na(week))
league_weekly <- pbp_rs %>%
  dplyr::filter(!is.na(epa)) %>%
  dplyr::group_by(week) %>%
  dplyr::summarise(
    plays = dplyr::n(),
    epa_per_play = mean(epa, na.rm = TRUE),
    success_rate = mean(success, na.rm = TRUE),
    .groups = "drop"
  )

dal_weekly <- pbp_rs %>%
  dplyr::filter(posteam == "DAL", !is.na(epa)) %>%
  dplyr::group_by(week) %>%
  dplyr::summarise(
    plays = dplyr::n(),
    epa_per_play = mean(epa, na.rm = TRUE),
    success_rate = mean(success, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  dplyr::mutate(team = "DAL")
ggplot() +
  geom_line(data = league_weekly, aes(x = week, y = epa_per_play), linewidth = 1, alpha = 0.7) +
  geom_point(data = league_weekly, aes(x = week, y = epa_per_play)) +
  geom_line(data = dal_weekly, aes(x = week, y = epa_per_play), linewidth = 1.2, color = "blue") +
  geom_point(data = dal_weekly, aes(x = week, y = epa_per_play, color = "blue")) +
labs(
    title = "EPA per Play by Week (2025 Regular Season)",
    subtitle = "Dallas Cowboys vs. League Average",
    x = "Week",
    y = "EPA per Play",
    caption = "Source: nflverse via nflreadr"
  ) +
  scale_x_continuous(breaks = 1:18) +
  theme_minimal(base_size = 13)

ggplot() +
  geom_line(data = league_weekly, aes(x = week, y = success_rate), linewidth = 1, alpha = 0.7) +
  geom_point(data = league_weekly, aes(x = week, y = success_rate)) +
  geom_line(data = dal_weekly, aes(x = week, y = success_rate), linewidth = 1.2, color = "blue") +
  geom_point(data = dal_weekly, aes(x = week, y = success_rate, color = "blue")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  labs(
    title = "Success Rate by Week (2025 Regular Season)",
    subtitle = "Dallas Cowboys vs. League Average",
    x = "Week",
    y = "Success Rate",
    caption = "Source: nflverse via nflreadr"
  ) +
  scale_x_continuous(breaks = 1:18) +
  theme_minimal(base_size = 13)

team_summary <- pbp_rs %>%
  dplyr::filter(!is.na(epa)) %>%
  dplyr::group_by(posteam) %>%
  dplyr::summarise(
    plays = dplyr::n(),
    epa_per_play = mean(epa, na.rm = TRUE),
    success_rate = mean(success, na.rm = TRUE),
    dropbacks = sum(pass == 1, na.rm = TRUE),
    rushes = sum(rush == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  dplyr::arrange(dplyr::desc(epa_per_play)) %>%
  dplyr::mutate(rank_epa = dplyr::min_rank(dplyr::desc(epa_per_play)))

# Simpler coloring to avoid version-specific issues with gt::data_color()
tbl <- team_summary %>%
  gt::gt() %>%
  gt::fmt_number(columns = c(epa_per_play), decimals = 3) %>%
  gt::fmt_percent(columns = c(success_rate), decimals = 1) %>%
  gt::tab_header(
    title = gt::md("**Team Offensive Summary — 2025 REG (to-date)**"),
    subtitle = "Sorted by EPA per Play"
  )

# Try gtExtras gradient if available; otherwise print basic table
if ("gtExtras" %in% loadedNamespaces()) {
  try({
    tbl <- gtExtras::gt_color_rows(
      data = tbl,
      columns = epa_per_play,
      palette = "Blues"
    )
  }, silent = TRUE)
}

tbl
Team Offensive Summary — 2025 REG (to-date)
Sorted by EPA per Play
posteam plays epa_per_play success_rate dropbacks rushes rank_epa
BUF 345 0.184 49.0% 154 114 1
IND 322 0.151 53.7% 143 115 2
BAL 293 0.139 51.2% 141 74 3
DAL 366 0.126 50.3% 194 98 4
DET 325 0.114 50.8% 134 116 5
GB 333 0.099 48.0% 146 113 6
KC 325 0.095 46.5% 178 82 7
WAS 320 0.092 50.0% 156 92 8
NE 317 0.061 45.7% 159 89 9
MIA 282 0.052 46.8% 145 78 10
SEA 303 0.045 48.8% 115 123 11
PIT 286 0.042 49.7% 130 89 12
DEN 329 0.036 45.3% 163 99 13
PHI 315 0.024 45.4% 130 111 14
LA 405 0.018 52.1% 203 119 15
SF 438 0.012 50.0% 225 130 16
ARI 317 0.011 44.8% 170 83 17
NYJ 309 0.006 51.8% 147 94 18
JAX 345 0.002 45.2% 157 110 19
NA 569 0.000 0.0% 0 0 20
LAC 331 −0.002 48.3% 182 85 21
NYG 340 −0.013 44.4% 173 104 22
ATL 336 −0.033 45.8% 151 117 23
TB 353 −0.033 43.3% 174 105 24
CHI 334 −0.036 42.8% 165 94 25
CAR 335 −0.058 46.0% 178 95 26
MIN 311 −0.065 45.7% 147 91 27
HOU 298 −0.071 39.3% 150 93 28
LV 311 −0.087 43.4% 149 99 29
NO 359 −0.116 45.1% 178 106 30
CIN 294 −0.159 42.2% 147 77 31
TEN 316 −0.163 41.1% 162 80 32
CLE 347 −0.199 38.6% 190 90 33
# Build DAL game log without using season_type
dal_games <- sched %>%
  dplyr::filter(season == 2025, home_team == "DAL" | away_team == "DAL") %>%
  dplyr::mutate(
    venue = ifelse(home_team == "DAL", "Home", "Away"),
    opponent = ifelse(venue == "Home", away_team, home_team),
    team_score = ifelse(venue == "Home", home_score, away_score),
    opponent_score = ifelse(venue == "Home", away_score, home_score),
    result = dplyr::case_when(
      is.na(team_score) | is.na(opponent_score) ~ NA_character_,
      team_score > opponent_score ~ "W",
      team_score < opponent_score ~ "L",
      TRUE ~ "T"
    )
  ) %>%
  dplyr::select(week, gameday, venue, opponent, team_score, opponent_score, result, game_id) %>%
  dplyr::arrange(week)

gt::gt(dal_games) %>%
  gt::tab_header(
    title = gt::md("**Dallas Cowboys — 2025 Regular Season Schedule & Results (to-date)**")
  ) %>%
  gt::fmt_date(columns = gameday, date_style = 3)
Dallas Cowboys — 2025 Regular Season Schedule & Results (to-date)
week gameday venue opponent team_score opponent_score result game_id
1 Thu, Sep 4, 2025 Away PHI 20 24 L 2025_01_DAL_PHI
2 Sun, Sep 14, 2025 Home NYG 40 37 W 2025_02_NYG_DAL
3 Sun, Sep 21, 2025 Away CHI 14 31 L 2025_03_DAL_CHI
4 Sun, Sep 28, 2025 Home GB 40 40 T 2025_04_GB_DAL
5 Sun, Oct 5, 2025 Away NYJ NA NA NA 2025_05_DAL_NYJ
6 Sun, Oct 12, 2025 Away CAR NA NA NA 2025_06_DAL_CAR
7 Sun, Oct 19, 2025 Home WAS NA NA NA 2025_07_WAS_DAL
8 Sun, Oct 26, 2025 Away DEN NA NA NA 2025_08_DAL_DEN
9 Mon, Nov 3, 2025 Home ARI NA NA NA 2025_09_ARI_DAL
11 Mon, Nov 17, 2025 Away LV NA NA NA 2025_11_DAL_LV
12 Sun, Nov 23, 2025 Home PHI NA NA NA 2025_12_PHI_DAL
13 Thu, Nov 27, 2025 Home KC NA NA NA 2025_13_KC_DAL
14 Thu, Dec 4, 2025 Away DET NA NA NA 2025_14_DAL_DET
15 Sun, Dec 14, 2025 Home MIN NA NA NA 2025_15_MIN_DAL
16 Sun, Dec 21, 2025 Home LAC NA NA NA 2025_16_LAC_DAL
17 Thu, Dec 25, 2025 Away WAS NA NA NA 2025_17_DAL_WAS
18 Sun, Jan 4, 2026 Away NYG NA NA NA 2025_18_DAL_NYG
library(dplyr)
library(tidyr)
library(ggplot2)
library(gt)
library(scales)
has_col <- function(df, x) x %in% names(df)

pbp_clean <- pbp_rs %>%
  # keep offensive plays only (remove kicks, timeouts, no-plays)
  filter(play_type %in% c("run","pass"),
         !is.na(posteam),
         !is.na(epa))

# ---------- Helpers ----------
# 1) Explosive indicator
pbp_enriched <- pbp_clean %>%
  mutate(
    explosive = case_when(
      pass == 1 & yards_gained >= 20 ~ 1L,
      rush == 1 & yards_gained >= 10 ~ 1L,
      TRUE ~ 0L
    ),
    early_down = if_else(down %in% c(1,2), 1L, 0L),
    third_down = if_else(down == 3, 1L, 0L),
    # neutral situation (fallback if wp missing)
    neutral = dplyr::case_when(
      has_col(., "wp") & !is.na(wp) ~ as.integer(wp >= 0.20 & wp <= 0.80),
      TRUE ~ as.integer(abs(score_differential) <= 7 & qtr %in% 1:3)
    ),
    two_min = if_else(half_seconds_remaining <= 120, 1L, 0L),
    q1 = if_else(qtr == 1, 1L, 0L)
  )

# 3) Third-down distance buckets
tdist <- pbp_enriched %>%
  filter(third_down == 1, !is.na(ydstogo)) %>%
  mutate(td_bucket = cut(
    ydstogo,
    breaks = c(-Inf, 2, 5, 10, Inf),
    labels = c("1-2", "3-5", "6-10", "11+")
  ))

# 4) Red-zone trips & TDs (by drive)
rz_trips <- pbp_enriched %>%
  group_by(game_id, drive, posteam) %>%
  summarise(
    any_rz = any(yardline_100 <= 20, na.rm = TRUE),
    td_for = any(touchdown == 1 & posteam == td_team, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  filter(any_rz)

# 6/7) Optional columns safety
has_pa <- has_col(pbp_enriched, "play_action")
has_shotgun <- has_col(pbp_enriched, "shotgun")
has_pen <- has_col(pbp_enriched, "penalty")

# ---------- Team-level metrics function ----------
team_metrics <- function(df, team_code) {
  df_team <- df %>% filter(posteam == team_code)
  # base counts
  plays <- nrow(df_team)
  epa_pp <- mean(df_team$epa, na.rm = TRUE)
  sr <- mean(df_team$success, na.rm = TRUE)

  # 1) Explosive rate
  exp_rate <- mean(df_team$explosive == 1, na.rm = TRUE)

  # 2) Early-down SR
  ed_sr <- df_team %>% filter(early_down == 1) %>% summarise(v = mean(success, na.rm = TRUE)) %>% pull(v)

  # 3) Third-down conv by bucket
  td_team <- tdist %>% filter(posteam == team_code)
  td_conv <- td_team %>%
    summarise(
      `TD Conv 1-2` = mean(first_down == 1 & td_bucket == "1-2", na.rm = TRUE) /
        mean(td_bucket == "1-2", na.rm = TRUE),
      `TD Conv 3-5` = mean(first_down == 1 & td_bucket == "3-5", na.rm = TRUE) /
        mean(td_bucket == "3-5", na.rm = TRUE),
      `TD Conv 6-10` = mean(first_down == 1 & td_bucket == "6-10", na.rm = TRUE) /
        mean(td_bucket == "6-10", na.rm = TRUE),
      `TD Conv 11+` = mean(first_down == 1 & td_bucket == "11+", na.rm = TRUE) /
        mean(td_bucket == "11+", na.rm = TRUE)
    ) %>%
    mutate(across(everything(), ~ ifelse(is.nan(.x), NA_real_, .x)))

  # 4) Red-zone TD rate per trip
  rz_team <- rz_trips %>% filter(posteam == team_code)
  rz_td_rate <- if (nrow(rz_team) > 0) mean(rz_team$td_for, na.rm = TRUE) else NA_real_

  # 5) Neutral pass rate
  neutral_team <- df_team %>% filter(neutral == 1)
  npr <- if (nrow(neutral_team) > 0) mean(neutral_team$pass == 1, na.rm = TRUE) else NA_real_

  # 6) Play-action usage & EPA
  pa_rate <- pa_epa <- NA_real_
  if (has_pa) {
    pa_rate <- mean(df_team$play_action == 1, na.rm = TRUE)
    pa_epa <- df_team %>% filter(play_action == 1) %>% summarise(v = mean(epa, na.rm = TRUE)) %>% pull(v)
  }

  # 7) Shotgun usage & EPA
  sg_rate <- sg_epa <- NA_real_
  if (has_shotgun) {
    sg_rate <- mean(df_team$shotgun == 1, na.rm = TRUE)
    sg_epa <- df_team %>% filter(shotgun == 1) %>% summarise(v = mean(epa, na.rm = TRUE)) %>% pull(v)
  }

  # 8) Penalty EPA
  pen_epa <- NA_real_
  if (has_pen) {
    pen_epa <- df_team %>% filter(penalty == 1) %>% summarise(v = sum(epa, na.rm = TRUE)) %>% pull(v)
  }

  # 9) Two-minute efficiency
  two_min_df <- df_team %>% filter(two_min == 1)
  two_min_epa <- if (nrow(two_min_df) > 0) mean(two_min_df$epa, na.rm = TRUE) else NA_real_

  # 10) First-quarter start quality
  q1_df <- df_team %>% filter(q1 == 1)
  q1_epa <- if (nrow(q1_df) > 0) mean(q1_df$epa, na.rm = TRUE) else NA_real_
  q1_sr <- if (nrow(q1_df) > 0) mean(q1_df$success, na.rm = TRUE) else NA_real_

  tibble(
    team = team_code,
    plays = plays,
    epa_per_play = epa_pp,
    success_rate = sr,
    explosive_rate = exp_rate,
    early_down_sr = ed_sr,
    third_conv_1_2 = td_conv$`TD Conv 1-2`,
    third_conv_3_5 = td_conv$`TD Conv 3-5`,
    third_conv_6_10 = td_conv$`TD Conv 6-10`,
    third_conv_11p = td_conv$`TD Conv 11+`,
    rz_td_rate = rz_td_rate,
    neutral_pass_rate = npr,
    pa_rate = pa_rate, pa_epa = pa_epa,
    shotgun_rate = sg_rate, shotgun_epa = sg_epa,
    penalty_epa = pen_epa,
    two_min_epa = two_min_epa,
    q1_epa = q1_epa, q1_sr = q1_sr
  )
}

# Compute for DAL and league
dal_stats <- team_metrics(pbp_enriched, "DAL")

league_stats <- team_metrics(pbp_enriched %>% mutate(posteam = "LG" ), "LG") %>%
  # Recompute league by overriding posteam to fake a single team
  { team_metrics(pbp_enriched %>% mutate(posteam = "LG"), "LG") }

# Combine and format
out_tbl <- bind_rows(
  dal_stats %>% mutate(group = "DAL"),
  league_stats %>% mutate(group = "League Avg")
) %>%
  select(group, plays, epa_per_play, success_rate, explosive_rate, early_down_sr,
         third_conv_1_2, third_conv_3_5, third_conv_6_10, third_conv_11p,
         rz_td_rate, neutral_pass_rate, pa_rate, pa_epa, shotgun_rate, shotgun_epa,
         penalty_epa, two_min_epa, q1_epa, q1_sr) %>%
  mutate(across(where(is.numeric), ~ .x)) 

gt(out_tbl) %>%
  fmt_number(columns = c(epa_per_play, pa_epa, shotgun_epa, two_min_epa, q1_epa), decimals = 3) %>%
  fmt_percent(columns = c(success_rate, explosive_rate, early_down_sr,
                          third_conv_1_2, third_conv_3_5, third_conv_6_10, third_conv_11p,
                          rz_td_rate, neutral_pass_rate, pa_rate, shotgun_rate, q1_sr),
              decimals = 1) %>%
  tab_header(
    title = md("**DAL vs League — Advanced Offensive Profile (2025 REG to-date)**"),
    subtitle = "Explosive, Early/3rd Down, Red Zone, Neutral Pass, Play Action, Shotgun, Two-Minute, Q1"
  )
DAL vs League — Advanced Offensive Profile (2025 REG to-date)
Explosive, Early/3rd Down, Red Zone, Neutral Pass, Play Action, Shotgun, Two-Minute, Q1
group plays epa_per_play success_rate explosive_rate early_down_sr third_conv_1_2 third_conv_3_5 third_conv_6_10 third_conv_11p rz_td_rate neutral_pass_rate pa_rate pa_epa shotgun_rate shotgun_epa penalty_epa two_min_epa q1_epa q1_sr
DAL 273 0.126 48.7% 8.1% 48.9% 66.7% 64.7% 18.8% 20.0% 66.7% 62.8% NA NA 70.0% 0.138 9.69616 0.328 0.098 50.0%
League Avg 7873 0.013 44.1% 8.5% 44.1% NA NA NA NA NA 61.0% NA NA 69.3% 0.004 66.00600 0.061 0.032 45.8%
# Small viz: DAL neutral pass rate by week vs league
nw <- pbp_enriched %>%
  filter(neutral == 1) %>%
  mutate(is_dal = posteam == "DAL") %>%
  group_by(is_dal, week) %>%
  summarise(npr = mean(pass == 1, na.rm = TRUE), .groups="drop") %>%
  mutate(group = if_else(is_dal, "DAL", "League"))

ggplot(nw, aes(week, npr, color = group)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  scale_color_manual(
    values = c("DAL" = "blue", "League" = "black")
  ) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  scale_x_continuous(breaks = 1:18) +
  labs(
    title = "Neutral Situation Pass Rate by Week (2025)",
    subtitle = "DAL vs League",
    x = "Week", y = "Neutral Pass Rate",
    caption = "Source: nflverse via nflreadr",
    color = NULL   # removes legend title
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

library(dplyr)
library(gt)
library(scales)

season <- 2025
week_n <- 5

# Detect spread/total columns if they exist
possible_spread_cols <- c("spread_line","vegas_spread","home_spread")
possible_total_cols  <- c("total_line","over_under_line","vegas_total")

spread_col <- intersect(names(sched), possible_spread_cols)[1]
total_col  <- intersect(names(sched), possible_total_cols)[1]

# Get week 5 Cowboys row
dal_row <- sched %>%
  filter(season == season, week == week_n, home_team == "DAL" | away_team == "DAL") %>%
  slice(1)

if (nrow(dal_row) == 0) stop("No DAL game found for this week.")

spread_val <- if(!is.na(spread_col)) as.numeric(dal_row[[spread_col]]) else NA_real_
total_val  <- if(!is.na(total_col))  as.numeric(dal_row[[total_col]])  else NA_real_

dal_is_home <- dal_row$home_team == "DAL"
opponent <- ifelse(dal_is_home, dal_row$away_team, dal_row$home_team)

# Normalize spread for DAL
dal_spread <- if(!is.na(spread_val)) {
  if(dal_is_home) spread_val else -spread_val
} else { NA_real_ }

# Approximate implied win prob
sd_pts <- 13
dal_implied_win_prob <- if(!is.na(dal_spread)) stats::pnorm(dal_spread / sd_pts) else NA_real_

# Build table
out <- tibble(
  Season = season,
  Week = week_n,
  `DAL Home?` = ifelse(dal_is_home, "Yes", "No"),
  Opponent = opponent,
  `Vegas Spread (home ref)` = sprintf("%.1f", spread_val),
  `DAL Spread (+ favored)` = sprintf("%.1f", dal_spread),
  `Total (OU)` = sprintf("%.1f", total_val),
  `Implied DAL Win %` = percent(dal_implied_win_prob, accuracy = 0.1)
)

gt(out) %>%
  tab_header(
    title = "Week 5 Betting Snapshot — Dallas Cowboys",
    subtitle = paste("Season", season, "| from sched betting columns")
  )
Week 5 Betting Snapshot — Dallas Cowboys
Season 2025 | from sched betting columns
Season Week DAL Home? Opponent Vegas Spread (home ref) DAL Spread (+ favored) Total (OU) Implied DAL Win %
2025 5 No NYJ 1.5 -1.5 48.5 45.4%
library(dplyr)
library(gt)
library(scales)

season <- 2025
week_n <- 5

# Detect spread/total columns
possible_spread_cols <- c("spread_line","vegas_spread","home_spread")
possible_total_cols  <- c("total_line","over_under_line","vegas_total")

spread_col <- intersect(names(sched), possible_spread_cols)[1]
total_col  <- intersect(names(sched), possible_total_cols)[1]

week5 <- sched %>%
  filter(season == season, week == week_n) %>%
  select(game_id, week, home_team, away_team, 
         spread = all_of(spread_col), total = all_of(total_col))

# Compute implied win % for home & away
week5 <- week5 %>%
  mutate(
    spread = as.numeric(spread),
    total = as.numeric(total),
    # Vegas spread is home team perspective (positive = home favored)
    home_favored = spread,
    away_favored = -spread,
    # Convert spread to win prob (normal approx, sd ~ 13)
    home_win_prob = pnorm(home_favored / 13),
    away_win_prob = pnorm(away_favored / 13)
  )

# Build display table
out <- week5 %>%
  transmute(
    Week = week,
    Matchup = paste0(away_team, " @ ", home_team),
    `Vegas Spread (home)` = sprintf("%.1f", spread),
    `Total (OU)` = sprintf("%.1f", total),
    `Home Win %` = percent(home_win_prob, accuracy = 0.1),
    `Away Win %` = percent(away_win_prob, accuracy = 0.1)
  )

gt(out) %>%
  tab_header(
    title = "Week 5 Betting Snapshot — All Games",
    subtitle = paste("Season", season, "| from sched betting columns")
  )
Week 5 Betting Snapshot — All Games
Season 2025 | from sched betting columns
Week Matchup Vegas Spread (home) Total (OU) Home Win % Away Win %
5 SF @ LA 8.5 43.5 74.3% 25.7%
5 MIN @ CLE -3.5 35.5 39.4% 60.6%
5 HOU @ BAL -2.5 41.5 42.4% 57.6%
5 MIA @ CAR -1.5 44.5 45.4% 54.6%
5 LV @ IND 7.0 46.5 70.5% 29.5%
5 NYG @ NO 1.5 41.5 54.6% 45.4%
5 DAL @ NYJ 1.5 48.5 54.6% 45.4%
5 DEN @ PHI 3.5 44.5 60.6% 39.4%
5 TEN @ ARI 7.5 41.5 71.8% 28.2%
5 TB @ SEA 3.5 44.5 60.6% 39.4%
5 DET @ CIN -10.5 49.5 21.0% 79.0%
5 WAS @ LAC 2.5 47.5 57.6% 42.4%
5 NE @ BUF 8.5 49.5 74.3% 25.7%
5 KC @ JAX -3.5 45.5 39.4% 60.6%

1 Analysis Part 1:

In football analytics, statistics serve as essential tools to quantify and evaluate team and player performance beyond traditional box score numbers. Among the many metrics used, Expected Points Added (EPA) has emerged as a particularly powerful and insightful measure. EPA calculates the value of each play in terms of how it changes a team’s expected points on the scoreboard, considering the down, distance, field position, and game situation. Unlike simple yardage or scoring stats, EPA captures the true impact of a play on a team’s likelihood to score, providing a context-sensitive evaluation of offensive and defensive effectiveness. By aggregating EPA across plays, analysts can assess how efficiently a team moves the ball and capitalizes on opportunities, making it a comprehensive metric for comparing performance across teams and situations. This is why EPA is chosen as a key metric in football analysis: it reflects the real contribution of each play to winning the game, allowing for a nuanced and objective assessment of team performance that goes beyond surface-level statistics.

The Dallas Cowboys’ 2025 season report through Week 5 provides a comprehensive analysis of the team’s offensive performance using advanced metrics derived from detailed play-by-play data. The report highlights that Dallas ranks fourth in the league in Expected Points Added (EPA) per play, averaging 0.126 compared to the league average of 0.013, signaling a highly efficient offense. Their success rate of 48.7% surpasses the league average of 44.1%, reflecting consistent positive outcomes on plays. The Cowboys have demonstrated a strong ability to generate explosive plays, both through the air and on the ground, and maintain a high early-down success rate that helps sustain drives. They also excel in converting red-zone trips into touchdowns and show effective execution in two-minute situations. Strategically, Dallas leans slightly more on passing in neutral game situations and effectively utilizes play-action and shotgun formations, generating above-average EPA when employing these tactics. Despite these offensive strengths, the team’s record of one win, two losses, and one tie indicates some inconsistency, possibly due to defensive factors or situational challenges. Betting markets currently favor Dallas as a 2.5-point favorite against the New York Jets in Week 5, with an implied win probability of about 58%, reflecting respect for their offensive capabilities but acknowledging a competitive matchup. Based on these insights, the most promising bets include backing Dallas to win or cover the spread and taking the over on total points scored, supported by their explosive and efficient offense. Prop bets related to early-down and third-down success or red-zone touchdowns also appear favorable, while bets on defensive dominance or low-scoring outcomes carry more risk given the available data.

Part 2 of this report will delve even deeper by shifting focus from team-level metrics to situation-specific game probabilities (SGPs) and individual player statistics. This next phase will analyze how players perform in various game contexts, such as critical downs, pressure situations, and clutch moments, providing a more granular understanding of the Cowboys’ strengths and weaknesses. By examining SGPs, the report will assess the likelihood of success in specific scenarios, offering insights into decision-making and execution under different conditions. Additionally, individual player stats will highlight key contributors and potential areas for improvement, enabling a more nuanced evaluation of the team’s overall performance and potential adjustments moving forward.

library(dplyr)
library(gt)
library(scales)

# ------------------ Situation-Specific Game Probabilities (SGPs) ------------------

# Critical downs: 3rd & 4th downs
sgp_critical <- pbp_rs %>%
  filter(play_type %in% c("run","pass"),
         posteam == "DAL",
         down %in% c(3,4),
         !is.na(epa)) %>%
  mutate(
    dist_bucket = cut(
      ydstogo,
      breaks = c(-Inf, 2, 5, 10, Inf),
      labels = c("1-2 yds","3-5 yds","6-10 yds","11+ yds")
    )
  ) %>%
  group_by(down, dist_bucket) %>%
  summarise(
    plays = n(),
    conv_rate = mean(first_down == 1, na.rm = TRUE),
    avg_epa = mean(epa, na.rm = TRUE),
    .groups = "drop"
  )

sgp_table <- sgp_critical %>%
  gt() %>%
  fmt_percent(columns = conv_rate, decimals = 1) %>%
  fmt_number(columns = avg_epa, decimals = 3) %>%
  tab_header(
    title = md("**Dallas Cowboys: Situation-Specific Probabilities (Critical Downs)**"),
    subtitle = "Conversion rate and average EPA by down & distance"
  )

# ------------------ Individual Player Statistics ------------------

# QB passing efficiency (Dak Prescott and backups)
qb_stats <- pbp_rs %>%
  filter(posteam == "DAL", pass == 1, !is.na(passer_player_name)) %>%
  group_by(passer_player_name) %>%
  summarise(
    attempts = n(),
    comp_rate = mean(complete_pass == 1, na.rm = TRUE),
    epa_per_dropback = mean(epa, na.rm = TRUE),
    success_rate = mean(success, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(attempts))

qb_table <- qb_stats %>%
  gt() %>%
  fmt_percent(columns = c(comp_rate, success_rate), decimals = 1) %>%
  fmt_number(columns = epa_per_dropback, decimals = 3) %>%
  tab_header(
    title = md("**Dallas Cowboys: QB Efficiency (2025)**"),
    subtitle = "Passing performance by quarterback"
  )

# Top skill players (rushers & receivers)
skill_stats <- pbp_rs %>%
  filter(posteam == "DAL", play_type %in% c("run","pass")) %>%
  mutate(player = if_else(play_type == "run", rusher_player_name, receiver_player_name)) %>%
  filter(!is.na(player)) %>%
  group_by(player) %>%
  summarise(
    touches = n(),
    yards = sum(yards_gained, na.rm = TRUE),
    tds = sum(touchdown == 1, na.rm = TRUE),
    epa_per_play = mean(epa, na.rm = TRUE),
    success_rate = mean(success, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(touches)) %>%
  head(10)

skill_table <- skill_stats %>%
  gt() %>%
  fmt_number(columns = c(yards, epa_per_play), decimals = 1) %>%
  fmt_percent(columns = success_rate, decimals = 1) %>%
  tab_header(
    title = md("**Dallas Cowboys: Top Offensive Contributors (2025)**"),
    subtitle = "Based on touches, yards, touchdowns, and efficiency"
  )

# Show all three outputs
sgp_table
Dallas Cowboys: Situation-Specific Probabilities (Critical Downs)
Conversion rate and average EPA by down & distance
down dist_bucket plays conv_rate avg_epa
3 1-2 yds 9 66.7% 0.440
3 3-5 yds 17 64.7% 0.551
3 6-10 yds 16 18.8% −0.402
3 11+ yds 5 20.0% −0.522
4 1-2 yds 3 66.7% 0.087
4 3-5 yds 3 66.7% 0.982
qb_table
Dallas Cowboys: QB Efficiency (2025)
Passing performance by quarterback
passer_player_name attempts comp_rate epa_per_dropback success_rate
D.Prescott 172 70.3% 0.185 47.7%
J.Milton 5 60.0% −0.145 60.0%
skill_table
Dallas Cowboys: Top Offensive Contributors (2025)
Based on touches, yards, touchdowns, and efficiency
player touches yards tds epa_per_play success_rate
J.Williams 81 386.0 4 0.0 49.4%
J.Ferguson 39 223.0 1 0.1 48.7%
G.Pickens 33 300.0 4 0.6 60.6%
M.Sanders 28 147.0 1 −0.1 42.9%
C.Lamb 24 222.0 0 0.4 58.3%
J.Tolbert 19 103.0 0 0.0 36.8%
K.Turpin 19 182.0 1 0.7 68.4%
L.Schoonmaker 6 13.0 0 −0.3 16.7%
R.Flournoy 6 47.0 0 0.8 83.3%
D.Prescott 5 22.0 1 0.0 40.0%
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(forcats)

# ---------- Prep: keep scrimmage plays and quick helpers ----------
pbp_ok <- pbp_rs %>%
  filter(play_type %in% c("run","pass"),
         !is.na(epa))

has_col <- function(df, col) col %in% names(df)

# Distance buckets for situational use
bucket_dist <- function(x) cut(x,
                               breaks = c(-Inf, 2, 5, 10, Inf),
                               labels = c("1–2", "3–5", "6–10", "11+"))

# ---------- (1) Critical downs heatmap: DAL conversion by down & distance ----------
crit <- pbp_ok %>%
  filter(posteam == "DAL", down %in% c(3,4), !is.na(ydstogo)) %>%
  mutate(dist_bucket = bucket_dist(ydstogo)) %>%
  group_by(down, dist_bucket) %>%
  summarise(
    plays = n(),
    conv_rate = mean(first_down == 1, na.rm = TRUE),
    .groups = "drop"
  )

p1 <- ggplot(crit, aes(x = dist_bucket, y = factor(down), fill = conv_rate)) +
  geom_tile(color = "white") +
  geom_text(aes(label = percent(conv_rate, 0.1)), size = 4) +
  scale_y_discrete(labels = c(`3` = "3rd Down", `4` = "4th Down")) +
  scale_fill_continuous(labels = percent_format(1)) +
  labs(
    title = "DAL Critical Downs: Conversion Rate by Distance",
    x = "Yards to Go", y = "Down",
    fill = "Conv %"
  ) +
  theme_minimal(base_size = 13)
# ---------- (2) QB efficiency bars: EPA/dropback & success rate ----------
qb <- pbp_ok %>%
  filter(posteam == "DAL", pass == 1, !is.na(passer_player_name)) %>%
  group_by(passer_player_name) %>%
  summarise(
    attempts = n(),
    epa_per_dropback = mean(epa, na.rm = TRUE),
    success_rate = mean(success, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(attempts)) %>%
  mutate(passer_player_name = fct_reorder(passer_player_name, epa_per_dropback))

p2 <- ggplot(qb, aes(passer_player_name, epa_per_dropback)) +
  geom_col() +
  geom_text(aes(label = sprintf("%.3f", epa_per_dropback)), vjust = -0.4, size = 3.5) +
  geom_point(aes(y = success_rate - 0.5), alpha = 0) +  # placeholder to keep legend off
  labs(
    title = "DAL QB Efficiency (EPA per Dropback)",
    subtitle = "Numbers above bars are EPA/dropback; consider attempts context",
    x = "Quarterback", y = "EPA per Dropback"
  ) +
  theme_minimal(base_size = 13) +
  coord_cartesian(ylim = c(min(0, min(qb$epa_per_dropback, na.rm=TRUE)) - 0.05,
                           max(qb$epa_per_dropback, na.rm=TRUE) + 0.1)) +
  theme(axis.text.x = element_text(angle = 20, hjust = 1))

# ---------- (3) Top skill players: touches vs EPA/play (size = TDs) ----------
skill <- pbp_ok %>%
  filter(posteam == "DAL") %>%
  mutate(player = if_else(play_type == "run", rusher_player_name, receiver_player_name)) %>%
  filter(!is.na(player)) %>%
  group_by(player) %>%
  summarise(
    touches = n(),
    epa_per_play = mean(epa, na.rm = TRUE),
    success_rate = mean(success, na.rm = TRUE),
    tds = sum(touchdown == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(touches)) %>%
  slice_head(n = 12)

p3 <- ggplot(skill, aes(touches, epa_per_play, size = tds)) +
  geom_point(alpha = 0.8) +
  geom_hline(yintercept = 0, linetype = 2) +
  labs(
    title = "DAL Top Offensive Contributors",
    subtitle = "Touches vs EPA/play (point size = TDs)",
    x = "Touches", y = "EPA per Play", size = "TDs"
  ) +
  theme_minimal(base_size = 13)

# ---------- (4) Red-zone: trips & TD rate by week ----------
# Red-zone = first time a drive enters <= 20 yards from goal; TD if that drive eventually scores a TD for posteam
rz_drives <- pbp_ok %>%
  filter(posteam == "DAL") %>%
  group_by(game_id, drive) %>%
  summarise(
    week = first(week),
    any_rz = any(yardline_100 <= 20, na.rm = TRUE),
    td_for = any(touchdown == 1 & posteam == td_team, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  filter(any_rz)

rz_week <- rz_drives %>%
  group_by(week) %>%
  summarise(
    rz_trips = n(),
    rz_td_rate = mean(td_for, na.rm = TRUE),
    .groups = "drop"
  )

p4 <- ggplot(rz_week, aes(week, rz_td_rate)) +
  geom_col() +
  geom_text(aes(label = percent(rz_td_rate, 0.1)), vjust = -0.3) +
  scale_x_continuous(breaks = unique(rz_week$week)) +
  scale_y_continuous(labels = percent_format(1)) +
  labs(
    title = "DAL Red-Zone Touchdown Rate by Week",
    x = "Week", y = "RZ TD Rate"
  ) +
  theme_minimal(base_size = 13)

# ---------- (5) Neutral pass rate by week: DAL (blue) vs League (black) ----------
# Neutral = prefer WP 20–80% when available; fall back to score within 7 & quarters 1–3
pbp_neu <- pbp_ok %>%
  mutate(
    neutral = dplyr::case_when(
      has_col(., "wp") & !is.na(wp) ~ as.integer(wp >= 0.20 & wp <= 0.80),
      TRUE ~ as.integer(abs(score_differential) <= 7 & qtr %in% 1:3)
    )
  ) %>%
  filter(neutral == 1)

nw <- pbp_neu %>%
  mutate(group = if_else(posteam == "DAL", "DAL", "League")) %>%
  group_by(group, week) %>%
  summarise(npr = mean(pass == 1, na.rm = TRUE), .groups = "drop")

p5 <- ggplot(nw, aes(week, npr, color = group)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  scale_color_manual(values = c("DAL" = "blue", "League" = "black")) +
  scale_x_continuous(breaks = sort(unique(nw$week))) +
  scale_y_continuous(labels = percent_format(1)) +
  labs(
    title = "Neutral Situation Pass Rate by Week",
    subtitle = "DAL vs League",
    x = "Week", y = "Neutral Pass Rate", color = NULL
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

# Print all plots
p1; p2; p3; p4; p5

2 Analysis Part 2:

The “Situation-Specific Probabilities (Critical Downs)” analysis for the Dallas Cowboys in the 2025 season reveals that the team performs exceptionally well on short to medium yardage situations during 3rd and 4th downs. Specifically, Dallas converts about two-thirds of their attempts when needing 1 to 5 yards, with strong positive impacts on expected points added (EPA), indicating these plays significantly contribute to their offensive success. However, their conversion rates drop sharply on longer yardage situations (6 or more yards), where they struggle to sustain drives and often produce negative EPA. For Sunday’s game, this suggests the Cowboys will likely be confident and aggressive in going for it on manageable critical downs, aiming to maintain possession and control the game tempo. From a betting perspective, this data supports wagering on Dallas to convert short and medium critical downs, which should help them sustain drives and score. Additionally, their efficiency in these situations makes betting the over on total points a favorable option, as sustained drives typically lead to more scoring opportunities. Betting on Dallas to cover the spread also aligns with their demonstrated ability to perform well in clutch moments. However, caution is advised when considering bets that depend on success in long critical downs, as this remains a relative weakness. Overall, the analysis points to a strategic advantage for Dallas in critical down situations that can be leveraged both on the field and in betting decisions.

Jeff C.