Data Visualization
The analysis uses visualizations to highlight key trends and
findings, including:
- Heatmaps to illustrate player movements in the red
zone.
- Bar charts to compare offensive formations and
scoring efficiency.
- Scatterplots and boxplots to
examine relationships between variables like down, formation, and
expected points.
- Depth-based visualizations to analyze red zone
strategies by field position.
setwd("C:/Users/jason/Desktop/F24 Classes/IS470")
source('https://raw.githubusercontent.com/ptallon/SportsAnalytics_Fall2024/main/SharedCode.R')
# From Prof's Assignment 3 code...
load_packages(c("httr", "ggplot2", "ggalt", "ggforce", "hms", "gganimate", "lubridate", "data.table", "dplyr",
"nflfastR", "gifski", "png", "ggimage", "randomForest", "DescTools", "progress", "tidyr", "caret", "scales", "viridis", "kableExtra"))
# 1. Merge Datasets for Comprehensive Analysis
# Load additional datasets to include context and player information.
games <- fread("Data/NFLBDB2025/games.csv")
player_play <- fread("Data/NFLBDB2025/player_play.csv")
plays <- fread("Data/NFLBDB2025/plays.csv")
players <- fread("Data/NFLBDB2025/players.csv")
tracking <- fread("Data/NFLBDB2025/tracking_week_1.csv") # Replace with other weeks as needed
df <- left_join(tracking, games, by = c("gameId"))
df <- left_join(df, plays, by = c("gameId", "playId"))
df <- left_join(df, players, by = c("nflId"))
df <- left_join(df, player_play, by = c("gameId", "playId", "nflId"))
# 2. Filter Red Zone Plays
# We focus only on plays happening within the opponent's 20-yard line,
# which is considered the red zone, using the absoluteYardlineNumber column.
red_zone_plays <- df %>%
filter(absoluteYardlineNumber <= 20)
# 3. Pre-Snap Efficiency Analysis
# Analyze offensive formations in the red zone to see which are most effective.
# We group by offenseFormation and calculate average yards gained, expected points,
# and the touchdown rate for each formation type.
formation_analysis <- df %>%
mutate(offenseFormation = replace_na(offenseFormation, "Other")) %>%
group_by(offenseFormation) %>%
summarize(
avg_yards_gained = mean(yardsToGo),
avg_expected_points = mean(expectedPointsAdded),
touchdown_rate = mean(ifelse(event == "touchdown", 1, 0))
)
# Make the table prettier with kableExtra
table_data <- table(df$offenseFormation, useNA = "ifany")
kable(table_data, caption = "Offense Formation Distribution") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Offense Formation Distribution
|
Var1
|
Freq
|
|
EMPTY
|
563086
|
|
I_FORM
|
491648
|
|
JUMBO
|
30176
|
|
PISTOL
|
298632
|
|
SHOTGUN
|
4002828
|
|
SINGLEBACK
|
1674653
|
|
WILDCAT
|
8418
|
|
NA
|
35259
|
# Examine pre-snap movements like player motions or shifts,
# and assess their impact on the average yards gained and expected points.
pre_snap_analysis <- df %>%
filter(inMotionAtBallSnap == TRUE | motionSinceLineset == TRUE) %>%
summarize(
avg_yards_gained = mean(yardsToGo, na.rm = TRUE),
avg_expected_points = mean(expectedPointsAdded, na.rm = TRUE)
)
# Print the results of the pre-snap analysis with improved formatting
kable(pre_snap_analysis, caption = "Pre-Snap Analysis of Movements and Expected Points") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Pre-Snap Analysis of Movements and Expected Points
|
avg_yards_gained
|
avg_expected_points
|
|
8.327148
|
-0.0077426
|
kable(formation_analysis, caption = "Formation Analysis in Red Zone") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Formation Analysis in Red Zone
|
offenseFormation
|
avg_yards_gained
|
avg_expected_points
|
touchdown_rate
|
|
EMPTY
|
8.261171
|
-0.0993501
|
NA
|
|
I_FORM
|
8.044910
|
-0.1050960
|
NA
|
|
JUMBO
|
3.629573
|
-0.1293538
|
NA
|
|
Other
|
10.172211
|
-0.1030225
|
NA
|
|
PISTOL
|
8.902110
|
0.0512716
|
NA
|
|
SHOTGUN
|
8.429796
|
0.0025405
|
NA
|
|
SINGLEBACK
|
8.437379
|
-0.0192747
|
NA
|
|
WILDCAT
|
3.989071
|
-2.1371462
|
NA
|
# 4. Post-Snap Analysis Using Tracking Data
# Assess player speed, acceleration, and orientation during plays to evaluate their effectiveness.
tracking_analysis <- df %>%
group_by(nflId) %>%
summarize(
avg_speed = mean(s, na.rm = TRUE),
avg_acceleration = mean(a, na.rm = TRUE),
avg_orientation = mean(o, na.rm = TRUE)
)
# Analyze key play events like ball_snap, pass_forward, and touchdown,
# and calculate the average yards gained and play success rate for these events.
event_analysis <- df %>%
filter(event %in% c("ball_snap", "pass_forward", "touchdown")) %>%
group_by(event) %>%
summarize(
avg_yards_gained = mean(yardsToGo, na.rm = TRUE),
play_success_rate = mean(ifelse(event == "touchdown", 1, 0), na.rm = TRUE)
)
# 5. Develop Efficiency Metrics
# Create new metrics to evaluate red zone play success and scoring efficiency.
# Calculate play success based on gaining the necessary yards or scoring a touchdown.
# Scoring efficiency is calculated as expected points added divided by yards to go.
efficiency_metrics <- df %>%
mutate(
play_success = ifelse(yardsGained >= yardsToGo | event == "touchdown", 1, 0),
scoring_efficiency = expectedPointsAdded / yardsToGo
) %>%
summarize(
play_success_rate = mean(play_success, na.rm = TRUE),
avg_scoring_efficiency = mean(scoring_efficiency, na.rm = TRUE)
)
# Print the calculated efficiency metrics with improved formatting
kable(event_analysis, caption = "Event Analysis for Key Play Outcomes") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Event Analysis for Key Play Outcomes
|
event
|
avg_yards_gained
|
play_success_rate
|
|
ball_snap
|
8.355236
|
0
|
|
pass_forward
|
8.558442
|
0
|
|
touchdown
|
5.291667
|
1
|
Visualization 1 : The heatmap visualizes player movements in
the red zone, revealing concentrated activity between the 15-25-yard
markers along the central axis of the field. The data also highlights a
hotspot near the 20-yard line, indicating intensified play activity as
teams approach scoring opportunities. These patterns suggest heavily
utilized areas that could be targeted for strategic offensive or
defensive adjustments, helping teams refine their red zone
approaches.
# 6. Visualize Key Insights
# Create a heatmap of player movements to show density in the red zone.
# Filter the data to include only red zone (x between 10 and 30)
red_zone_data <- tracking %>%
filter(x >= 10 & x <= 30)
# Create the heatmap with density
g <- ggplot(red_zone_data, aes(x = x, y = y)) +
geom_bin2d(bins = 30, aes(fill = ..count..)) + # Bin data and fill based on count
scale_fill_viridis(option = "turbo") + # Use the Inferno color scale
labs(title = "Heatmap of Player Movements in the Red Zone",
x = "Field Position X (10 to 30 yards)",
y = "Field Position Y") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5), # Center the title
axis.text.x = element_text(angle = 45, hjust = 1), # Angle x-axis labels for better visibility
axis.text.y = element_text(angle = 0, hjust = 1) # Keep y-axis labels readable
)
# Print the plot
print(g)

Visualization 3: Formation usage varies across different red
zone depths, with Shotgun dominating at both 10-15 and 15-20-yard
markers. Single back is the second most common, particularly at 15-20
yards, while specialized formations like Jumbo and Pistol serve specific
strategic roles. This analysis highlights how teams adjust their
formation preferences based on field position, helping optimize their
play-calling strategies in the red zone. Understanding formation
tendencies at different red zone depths provides valuable insights for
defenses, enabling them to anticipate offensive strategies and adjust
their alignments accordingly. This knowledge can help defenses better
position themselves to disrupt plays and reduce scoring
opportunities.
# Reworking formation preference summary to be based on unique plays and retaining necessary columns
formation_preference <- red_zone_plays %>%
distinct(gameId, playId, offenseFormation, absoluteYardlineNumber, yardsToGo, event, expectedPointsAdded) %>% # Retain necessary columns
mutate(red_zone_depth = case_when(
absoluteYardlineNumber <= 5 ~ "Goal Line",
absoluteYardlineNumber <= 10 ~ "5-10 Yards",
absoluteYardlineNumber <= 15 ~ "10-15 Yards",
TRUE ~ "15-20 Yards"
)) %>%
group_by(offenseFormation, red_zone_depth) %>%
summarize(
count = n(),
avg_yards_gained = mean(yardsToGo, na.rm = TRUE),
touchdown_rate = mean(ifelse(event == "touchdown", 1, 0)),
avg_expected_points = mean(expectedPointsAdded, na.rm = TRUE)
) %>%
arrange(red_zone_depth, desc(count))
# Print the formation preference summary with kableExtra for better styling
kable(formation_preference, caption = "Formation Preference in Red Zone by Depth") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Formation Preference in Red Zone by Depth
|
offenseFormation
|
red_zone_depth
|
count
|
avg_yards_gained
|
touchdown_rate
|
avg_expected_points
|
|
SHOTGUN
|
10-15 Yards
|
213
|
4.352113
|
NA
|
-0.0873441
|
|
SINGLEBACK
|
10-15 Yards
|
99
|
5.191919
|
NA
|
-0.2274308
|
|
I_FORM
|
10-15 Yards
|
55
|
3.400000
|
NA
|
0.2396531
|
|
JUMBO
|
10-15 Yards
|
25
|
1.000000
|
NA
|
0.5331242
|
|
EMPTY
|
10-15 Yards
|
7
|
4.000000
|
NA
|
1.3909096
|
|
SHOTGUN
|
15-20 Yards
|
209
|
7.488038
|
NA
|
0.1717825
|
|
SINGLEBACK
|
15-20 Yards
|
60
|
7.733333
|
NA
|
-0.3738211
|
|
I_FORM
|
15-20 Yards
|
32
|
8.687500
|
NA
|
-0.3113056
|
|
EMPTY
|
15-20 Yards
|
12
|
5.500000
|
NA
|
-0.5233404
|
|
PISTOL
|
15-20 Yards
|
8
|
9.000000
|
NA
|
0.3271988
|
# Visualize Formation Preferences by Red Zone Depth
formation_depth_plot <- ggplot(formation_preference, aes(x = red_zone_depth, y = count, fill = offenseFormation)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
labs(
title = "Formation Preferences by Red Zone Depth (Based on Plays)",
x = "Red Zone Depth",
y = "Number of Plays",
fill = "Formation"
) +
scale_fill_viridis_d(option = "plasma", direction = -1) + # Better color palette
theme(
plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
axis.title = element_text(size = 17),
axis.text = element_text(size = 14),
legend.title = element_text(size = 14),
legend.text = element_text(size = 12)
)
# Print the plot
print(formation_depth_plot)

Visualization 4: This comparison evaluates the performance of
offensive formations against their expected outcomes in the red zone.
The Jumbo formation consistently exceeded expectations, demonstrating
exceptional reliability and effectiveness. Meanwhile, formations like
Single back underperformed, indicating inefficiencies. These findings
allow teams to focus on formations that consistently deliver results and
refine underperforming strategies to improve overall efficiency.
# Expected vs Actual Points Analysis
expected_vs_actual <- red_zone_plays %>%
mutate(
actual_points = ifelse(event == "touchdown", 7, ifelse(event == "field_goal", 3, 0))
) %>%
summarize(
avg_expected_points = mean(expectedPointsAdded, na.rm = TRUE),
avg_actual_points = mean(actual_points, na.rm = TRUE)
)
# Print the expected vs actual points table with kableExtra
kable(expected_vs_actual, caption = "Expected vs Actual Points in Red Zone") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Expected vs Actual Points in Red Zone
|
avg_expected_points
|
avg_actual_points
|
|
-0.0720081
|
0.1674641
|
# Motion and Formation Impact on Points
motion_impact <- red_zone_plays %>%
filter(inMotionAtBallSnap == TRUE | motionSinceLineset == TRUE) %>%
summarize(
avg_expected_points_motion = mean(expectedPointsAdded, na.rm = TRUE),
avg_actual_points_motion = mean(ifelse(event == "touchdown", 7, ifelse(event == "field_goal", 3, 0)), na.rm = TRUE)
)
formation_impact <- red_zone_plays %>%
group_by(offenseFormation) %>%
summarize(
avg_expected_points = mean(expectedPointsAdded, na.rm = TRUE),
avg_actual_points = mean(ifelse(event == "touchdown", 7, ifelse(event == "field_goal", 3, 0)), na.rm = TRUE)
)
# Print motion and formation impact tables with kableExtra
kable(motion_impact, caption = "Impact of Motion on Expected and Actual Points") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Impact of Motion on Expected and Actual Points
|
avg_expected_points_motion
|
avg_actual_points_motion
|
|
-0.1384531
|
0.1761006
|
kable(formation_impact, caption = "Impact of Formation on Expected and Actual Points") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Impact of Formation on Expected and Actual Points
|
offenseFormation
|
avg_expected_points
|
avg_actual_points
|
|
EMPTY
|
-0.0064161
|
0.0000000
|
|
I_FORM
|
0.0071192
|
0.2763158
|
|
JUMBO
|
0.5139676
|
0.6363636
|
|
PISTOL
|
0.3271988
|
0.0000000
|
|
SHOTGUN
|
-0.0327270
|
0.1338798
|
|
SINGLEBACK
|
-0.3181078
|
0.1500000
|
# Visualize Comparison of Expected vs Actual Points by Offensive Formation
g2 <- ggplot(formation_impact, aes(x = reorder(offenseFormation, avg_actual_points))) +
geom_bar(aes(y = avg_actual_points, fill = "Actual Points"), stat = "identity", position = "dodge", alpha = 0.8) +
geom_bar(aes(y = avg_expected_points, fill = "Expected Points"), stat = "identity", position = "dodge", alpha = 0.8) +
geom_text(aes(y = avg_actual_points, label = round(avg_actual_points, 2)), vjust = -0.5, color = "black", size = 4.5, fontface = "bold") +
geom_text(aes(y = avg_expected_points, label = round(avg_expected_points, 2)), vjust = 1.5, color = "black", size = 4.5, fontface = "bold") +
theme_minimal() +
labs(
title = "Comparison of Expected vs. Actual Points by Offensive Formation",
subtitle = "Bars sorted by average actual points",
x = "Offensive Formation",
y = "Average Points",
fill = "Points Type"
) +
scale_fill_manual(values = c("Expected Points" = "green", "Actual Points" = "coral")) +
theme(
plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 14, hjust = 0.5),
axis.title = element_text(size = 18),
axis.text.x = element_text(size = 14, angle = 45, hjust = 1),
axis.text.y = element_text(size = 14),
legend.title = element_text(size = 14),
legend.text = element_text(size = 12)
)
print(g2)
Visualization 5: The chart examines how offensive efficiency,
measured by EPA, changes by down within the red zone. Most plays yielded
minimal change to scoring chances, but third down stood out with the
widest variability, representing a high-risk, high-reward situation.
Understanding these trends helps coaches identify critical moments where
strategic play-calling can maximize scoring opportunities and mitigate
potential losses.
# Analyze key predictors for expected points
expected_points_analysis <- red_zone_plays %>%
select(expectedPointsAdded, yardsToGo, down, absoluteYardlineNumber, offenseFormation) %>%
drop_na() %>%
summarise(
correlation_yardsToGo = cor(expectedPointsAdded, yardsToGo, use = "complete.obs"),
correlation_down = cor(expectedPointsAdded, down, use = "complete.obs"),
correlation_yardline = cor(expectedPointsAdded, absoluteYardlineNumber, use = "complete.obs")
)
kable(expected_points_analysis, caption = "Correlation of Key Predictors with Expected Points") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Correlation of Key Predictors with Expected Points
|
correlation_yardsToGo
|
correlation_down
|
correlation_yardline
|
|
-0.1337717
|
0.0076663
|
-0.1176501
|
# Ensure that the required columns are selected and drop rows with NA values
expected_points_analysis <- red_zone_plays %>%
select(expectedPointsAdded, yardsToGo, down, absoluteYardlineNumber, offenseFormation) %>%
drop_na()
# Scatter plots to visualize relationships with expectedPointsAdded
expected_points_analysis <- red_zone_plays %>%
select(expectedPointsAdded, yardsToGo, down, absoluteYardlineNumber, offenseFormation) %>%
drop_na()
ggplot(expected_points_analysis, aes(x = down, y = expectedPointsAdded)) +
geom_point(color = "red") +
labs(title = "Down vs Expected Points Added",
x = "Down",
y = "Expected Points Added") +
theme_minimal() +
theme(
plot.title = element_text(size = 20, face = "bold", hjust = 0.5), # Center title
axis.title = element_text(size = 18), # Increase label size
axis.text = element_text(size = 16) # Increase tick label size
)
