pla <- full_join(player_plays, plays, by = c("gameId", "playId"))
snap <- tibble(
linesetmotion = pla$motionSinceLineset,
ballsnapmotion = pla$inMotionAtBallSnap,
shiftSinceLineset = pla$shiftSinceLineset,
passResult = pla$passResult,
hadRushAttempt = pla$hadRushAttempt,
hadDropback = pla$hadDropback,
game = pla$gameId,
nfl = pla$nflId,
play = pla$playId,
desc = pla$playDescription,
formation = pla$offenseFormation,
yards = pla$yardsGained,
clock = pla$playClockAtSnap,
yardsToGo = pla$yardsToGo,
pressure = pla$unblockedPressure,
manZone = pla$pff_manZone,
coverage = pla$pff_passCoverage,
run = pla$pff_runConceptPrimary
)
snap_unique <- snap %>%
distinct(desc, .keep_all = TRUE) %>%
mutate(play_type = ifelse(is.na(passResult), "Rush", "Pass")) %>%
filter(!is.na(formation) & formation != "NA")
play_summary <- snap_unique %>%
group_by(yardsToGo, play_type, formation) %>%
summarise(play_count = n(), .groups = 'drop') %>%
group_by(yardsToGo, formation) %>%
mutate(percentage = play_count / sum(play_count) * 100)
ggplot(play_summary, aes(x = yardsToGo, y = percentage, fill = play_type)) +
geom_bar(stat = "identity", position = "stack") +
facet_wrap(~ formation, scales = "free_y") +
labs(title = "Play Type Percentage by Yards to Go and Formation",
x = "Yards to Go", y = "Percentage of Plays (%)") +
scale_fill_manual(values = c("Rush" = "blue", "Pass" = "orange")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
legend.title = element_blank())
coverage_data <- snap_unique %>%
drop_na(formation, coverage) %>%
group_by(formation, coverage) %>%
summarise(count = n(), .groups = 'drop') %>%
group_by(formation) %>%
mutate(percentage = (count / sum(count)) * 100)
ggplot(coverage_data, aes(x = formation, y = percentage, fill = coverage)) +
geom_bar(stat = "identity", position = "fill") +
labs(title = "Percentage Distribution of Coverage Types by Formation",
x = "Formation", y = "Percentage of Coverage") +
scale_y_continuous(labels = scales::percent_format(scale = 1)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
library(ggplot2)
library(dplyr)
library(forcats)
# Prepare the data
plot_data <- snap_unique %>%
filter(!is.na(formation) & !is.na(coverage) & !is.na(yards)) %>%
group_by(formation, coverage) %>%
summarise(
avg_yards = mean(yards, na.rm = TRUE),
n = n(),
.groups = 'drop'
) %>%
filter(n > 10) # Filter out combinations with few occurrences
# Create the plot
ggplot(plot_data, aes(x = fct_reorder(formation, avg_yards), y = coverage, fill = avg_yards)) +
geom_tile() +
geom_text(aes(label = sprintf("%.1f", avg_yards)), color = "black", size = 3) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
labs(
title = "Average Yards Gained by Offensive Formation and Defensive Coverage",
x = "Offensive Formation",
y = "Defensive Coverage",
fill = "Avg Yards"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
legend.position = "right"
)
motion_summary <- snap_unique %>%
group_by(formation) %>%
summarise(
pass_attempts = sum(passResult != "NA", na.rm = TRUE),
rush_attempts = sum(is.na(passResult)),
total_plays = n(),
average_yards = mean(yards, na.rm = TRUE)
) %>%
mutate(
pass_prob = pass_attempts / total_plays,
rush_prob = rush_attempts / total_plays
)
print(motion_summary)
## # A tibble: 7 × 7
## formation pass_attempts rush_attempts total_plays average_yards pass_prob
## <chr> <int> <int> <int> <dbl> <dbl>
## 1 EMPTY 1295 47 1342 5.78 0.965
## 2 I_FORM 299 736 1035 5.00 0.289
## 3 JUMBO 21 104 125 2.33 0.168
## 4 PISTOL 226 415 641 5.04 0.353
## 5 SHOTGUN 6551 2238 8789 5.74 0.745
## 6 SINGLEBACK 1306 2609 3915 5.35 0.334
## 7 WILDCAT 13 74 87 4.64 0.149
## # ℹ 1 more variable: rush_prob <dbl>
###The use of pre-snap motion is more common in pass plays than in rush plays. ### These findings suggest that pre-snap motion can be a valuable tool for offenses, particularly in the passing game. Defenses should be especially alert to the possibility of a pass play when they observe pre-snap motion.
formation_summary <- snap_unique %>%
group_by(formation) %>%
summarise(
avg_yards = mean(yards, na.rm = TRUE),
play_count = n(),
pass_percentage = mean(play_type == "Pass") * 100,
.groups = 'drop'
) %>%
arrange(desc(avg_yards))
ggplot(formation_summary, aes(x = reorder(formation, avg_yards), y = avg_yards)) +
geom_bar(stat = "identity", fill = "skyblue") +
geom_text(aes(label = sprintf("%.2f", avg_yards), y = avg_yards + 0.1), vjust = 0) +
geom_text(aes(label = sprintf("%.0f%%", pass_percentage), y = 0.2), vjust = 0, color = "red") +
labs(title = "Average Yards Gained by Formation",
subtitle = "Red percentage indicates pass play frequency",
x = "Formation", y = "Average Yards Gained") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1))
print(formation_summary)
## # A tibble: 7 × 4
## formation avg_yards play_count pass_percentage
## <chr> <dbl> <int> <dbl>
## 1 EMPTY 5.78 1342 96.5
## 2 SHOTGUN 5.74 8789 74.5
## 3 SINGLEBACK 5.35 3915 33.4
## 4 PISTOL 5.04 641 35.3
## 5 I_FORM 5.00 1035 28.9
## 6 WILDCAT 4.64 87 14.9
## 7 JUMBO 2.33 125 16.8