We decided to focus our project on the overall success of a play when motion is used. We first, had to define what a “successful” play would be. We did this by generating a metric to determine if a play was successful based upon the down and the number of yards gained. Another goal of ours was to see how the conditions (temperature, wind, surface type) of each game effected the outcomes of each play with motion. To show this, we created anova tables to see any correlations for when motion occurs compared to the success of the play.
For our visualizations, we utilized different chart types to look at how teams compared to one another in their effectiveness with motion. We also looked into the success of motion per play for NFL teams overall.
suppressMessages(library(data.table))
suppressMessages(library(dplyr))
suppressMessages(library(lubridate))
suppressMessages(library(httr))
suppressMessages(library(DescTools))
suppressMessages(library(scales))
suppressMessages(library(ggplot2))
suppressMessages(library(nflfastR))
suppressMessages(library(tidyr))
fileURL <- "https://www.dropbox.com/scl/fi/spy2limdm8kqleswa1pqo/nfl-big-data-bowl-2025.zip?rlkey=e4cz61xvmzlv3itdli1oee3dw&raw=1"
setwd("/Users/jonahgoodman/Desktop/NFLBDB2025/Data")
mypath <- paste0(getwd(),"/Data/NFLBDB2025")
source('https://raw.githubusercontent.com/ptallon/SportsAnalytics_Fall2024/main/SharedCode.R')
################################ Conditions Data
allGames <- nflreadr::load_schedules()
BDBGames <- allGames %>%
select(season, week, temp, wind, roof, home_team, away_team, surface) %>%
filter(season == 2022, week %in% c(1,2,3,4,5,6,7,8,9))
temp <- c(72,72,80,68,75,72,72,90,74,74,72,72,72,78,72,75,
79,81,81,72,81,72,79,79,72,68,72,80,72,72,66,74,
60,68,65,68,88,72,67,70,77,75,72,72,72,89,73,66,
58,64,69,57,72,72,72,65,57,55,57,64,62,72,75,70,
70,62,53,60,81,72,59,72,61,84,62,65,72,72,54,72,
47,78,64,48,63,84,72,62,61,72,77,64,59,72,
88,65,72,73,72,76,73,62,67,72,72,67,77,57,
79,61,72,65,72,72,72,60,60,72,72,72,53,48,63,
72,79,58,69,72,80,73,72,74,76,86,52,72
)
wind <- c(0,0,9,9,4,0,0,10,3,5,0,0,0,6,0,4,
8,6,11,0,6,0,10,3,0,13,0,6,0,6,10,3,
17,6,11,13,6,0,8,8,9,9,0,0,6,4,6,7,
5,8,4,12,0,0,0,11,12,12,5,3,8,0,5,6,
3,12,16,11,5,0,10,0,10,3,7,6,0,0,3,4,
14,4,9,10,8,12,0,2,2,0,3,13,1,0,
2,8,3,8,0,2,10,7,7,0,0,7,8,3,
2,13,0,6,0,0,0,2,1,0,0,0,9,6,6,
0,3,17,9,0,8,13,7,9,3,4,7,0
)
Precipitation <- c(F,F,F,T,F,F,F,F,T,F,F,F,F,F,F,F,
F,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,
F,F,F,F,F,F,F,F,F,F,F,F,F,F,F,F,
F,F,F,T,F,F,F,F,T,T,F,F,F,F,F,F,
F,F,F,F,F,F,F,F,F,F,F,F,F,F,F,F,
F,F,F,F,F,F,F,F,F,F,F,F,F,F,
F,F,F,F,F,F,F,F,F,F,F,F,F,F,
F,F,F,F,F,F,F,F,F,F,F,F,F,F,F,
F,F,F,F,F,F,F,F,F,F,F,F,F
)
BDBGames$temp <- temp
BDBGames$wind <- wind
BDBGames$precip <- Precipitation
library(nflfastR)
games_2022 <- fast_scraper_schedules(2022) %>% select(old_game_id, roof, surface, temp, wind)
games <- fread("Data/NFLBDB2025/games.csv")
games_2022$old_game_id <- as.integer(games_2022$old_game_id)
new_games_df <- games %>% select(gameId) %>%
left_join(games_2022, by = c("gameId" = "old_game_id")) %>%
data.frame()
list.files(path = "Data/NFLBDB2025")
## [1] "games.csv" "player_play.csv" "players.csv"
## [4] "plays.csv" "tracking_week_1.csv" "tracking_week_9.csv"
####Week 1
##########Creating the play success analytics
df <- load_data_for_one_week(directory = mypath, 1, TRUE)
BDBdfChat <- df %>%
select(down, yardsGained, inMotionAtBallSnap, yardsToGo, motionSinceLineset, shiftSinceLineset, homeTeamAbbr,visitorTeamAbbr,gameId) %>%
mutate(
# Define success based on the down
isSuccessful = case_when(
down == 1 ~ yardsGained >= 0.5 * yardsToGo,
down == 2 ~ yardsGained >= 2/3 * yardsToGo,
down %in% c(3, 4) ~ yardsGained >= yardsToGo,
TRUE ~ FALSE
)
) %>%
group_by(down,inMotionAtBallSnap,homeTeamAbbr,visitorTeamAbbr,gameId, motionSinceLineset, shiftSinceLineset) %>%
summarise(
successRate = mean(isSuccessful),
avgYards = mean(yardsGained),
.groups = "keep"
)
###################################
# Identify the last frame for each playId
last_frame_df <- df %>%
group_by(playId) %>%
summarise(last_frame = max(frameId), .groups = 'drop')
####REDUCING THE TRACKING DATA
df_reduced <- df %>%
left_join(last_frame_df, by = "playId") %>%
filter(frameId == last_frame) %>%
select(playId, frameId, yardsGained, inMotionAtBallSnap, motionSinceLineset, shiftSinceLineset, homeTeamAbbr,visitorTeamAbbr, gameId, down)
###FUTHER REDUCE TO GET RID OF 23 OBSERVATIONS FOR 1 PLAY
df_reduced_playlevel <- df_reduced %>%
group_by(playId) %>%
summarise(
totalYards = mean(yardsGained),
inMotionAtBallSnap = first(inMotionAtBallSnap),
motionSinceLineset = first(motionSinceLineset),
shiftSinceLineset = first(shiftSinceLineset),
homeTeamAbbr = first(homeTeamAbbr),
visitorTeamAbbr = first(visitorTeamAbbr),
gameId = first(gameId),
down = first(down),
.groups = 'drop'
)
BDBGames_week1 <- BDBGames %>%
filter(week == 1)
df_combined1 <- df_reduced_playlevel %>%
left_join(BDBGames_week1 %>%
select(home_team, away_team, temp, wind, roof, surface),
by = c("homeTeamAbbr" = "home_team", "visitorTeamAbbr" = "away_team"))
##LAST
df_combined1 <- BDBdfChat %>%
left_join(BDBGames, by = c("homeTeamAbbr" = "home_team", "visitorTeamAbbr" = "away_team"))
#################Week 9
df <- load_data_for_one_week(directory = mypath, 9, TRUE)
BDBdfChat <- df %>%
select(down, yardsGained, inMotionAtBallSnap, yardsToGo, motionSinceLineset, shiftSinceLineset, homeTeamAbbr,visitorTeamAbbr,gameId) %>%
mutate(
# Define success based on the down
isSuccessful = case_when(
down == 1 ~ yardsGained >= 0.5 * yardsToGo,
down == 2 ~ yardsGained >= 2/3 * yardsToGo,
down %in% c(3, 4) ~ yardsGained >= yardsToGo,
TRUE ~ FALSE
)
) %>%
group_by(down,inMotionAtBallSnap,homeTeamAbbr,visitorTeamAbbr,gameId, motionSinceLineset, shiftSinceLineset) %>%
summarise(
successRate = mean(isSuccessful),
avgYards = mean(yardsGained),
.groups = "keep"
)
###################################
# Identify the last frame for each playId
last_frame_df <- df %>%
group_by(playId) %>%
summarise(last_frame = max(frameId), .groups = 'drop')
####REDUCING THE TRACKING DATA
df_reduced <- df %>%
left_join(last_frame_df, by = "playId") %>%
filter(frameId == last_frame) %>%
select(playId, frameId, yardsGained, inMotionAtBallSnap, motionSinceLineset, shiftSinceLineset, homeTeamAbbr,visitorTeamAbbr, gameId, down)
###FUTHER REDUCE TO GET RID OF 23 OBSERVATIONS FOR 1 PLAY
df_reduced_playlevel <- df_reduced %>%
group_by(playId) %>%
summarise(
totalYards = mean(yardsGained),
inMotionAtBallSnap = first(inMotionAtBallSnap),
motionSinceLineset = first(motionSinceLineset),
shiftSinceLineset = first(shiftSinceLineset),
homeTeamAbbr = first(homeTeamAbbr),
visitorTeamAbbr = first(visitorTeamAbbr),
gameId = first(gameId),
down = first(down),
.groups = 'drop'
)
BDBGames_week9 <- BDBGames %>%
filter(week == 9)
df_combined9 <- df_reduced_playlevel %>%
left_join(BDBGames_week9 %>%
select(home_team, away_team, temp, wind, roof, surface),
by = c("homeTeamAbbr" = "home_team", "visitorTeamAbbr" = "away_team"))
##LAST
df_combined9 <- BDBdfChat %>%
left_join(BDBGames, by = c("homeTeamAbbr" = "home_team", "visitorTeamAbbr" = "away_team"))
#############################################################
##Week 1:
df_combined1$inMotionAtBallSnap <- as.factor(df_combined1$inMotionAtBallSnap)
df_combined1$motionSinceLineset <- as.factor(df_combined1$motionSinceLineset)
df_combined1$shiftSinceLineset <- as.factor(df_combined1$shiftSinceLineset)
df_combined1$surface <- as.factor(df_combined1$surface)
df_combined1$temp <- as.numeric(df_combined1$temp)
df_combined1$wind <- as.numeric(df_combined1$wind)
anova_model <- aov(successRate ~ shiftSinceLineset + motionSinceLineset + inMotionAtBallSnap + temp + wind + surface, data = df_combined1)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## shiftSinceLineset 1 0.142 0.1423 2.011 0.1577
## motionSinceLineset 1 0.064 0.0635 0.897 0.3446
## inMotionAtBallSnap 1 0.066 0.0664 0.938 0.3338
## temp 1 0.082 0.0820 1.158 0.2831
## wind 1 0.341 0.3413 4.822 0.0292 *
## surface 4 0.388 0.0970 1.371 0.2452
## Residuals 206 14.580 0.0708
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 99 observations deleted due to missingness
# Week one box plot
df_combined1 <- df_combined1 %>%
pivot_longer(cols = c(homeTeamAbbr, visitorTeamAbbr),
names_to = "teamType",
values_to = "teamAbbr") %>%
mutate(teamType = ifelse(teamType == "homeTeamAbbr", "Home", "Away"))
ggplot(df_combined1, aes(x = interaction(teamAbbr, week), y = successRate, fill = inMotionAtBallSnap)) +
geom_boxplot() +
labs(title = "Success Rates by Home and Away Team for Each Matchup",
x = "Matchup (Home vs Away) and Week",
y = "Success Rate") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~week) + # Separate by week
theme(axis.text.x = element_text(angle = 90, hjust = 1))
##Week 9
df_combined9$inMotionAtBallSnap <- as.factor(df_combined9$inMotionAtBallSnap)
df_combined9$motionSinceLineset <- as.factor(df_combined9$motionSinceLineset)
df_combined9$shiftSinceLineset <- as.factor(df_combined9$shiftSinceLineset)
df_combined9$surface <- as.factor(df_combined9$surface)
df_combined9$temp <- as.numeric(df_combined9$temp)
df_combined9$wind <- as.numeric(df_combined9$wind)
anova_model <- aov(successRate ~ shiftSinceLineset + motionSinceLineset + inMotionAtBallSnap + temp + wind + surface, data = df_combined9)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## shiftSinceLineset 1 0.097 0.09731 1.173 0.2806
## motionSinceLineset 1 0.007 0.00709 0.085 0.7705
## inMotionAtBallSnap 1 0.111 0.11137 1.343 0.2485
## temp 1 0.114 0.11387 1.373 0.2433
## wind 1 0.266 0.26563 3.202 0.0756 .
## surface 3 0.499 0.16647 2.007 0.1157
## Residuals 144 11.945 0.08295
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 260 observations deleted due to missingness
## Week 9 Box plot
df_combined9 <- df_combined9 %>%
pivot_longer(cols = c(homeTeamAbbr, visitorTeamAbbr),
names_to = "teamType",
values_to = "teamAbbr") %>%
mutate(teamType = ifelse(teamType == "homeTeamAbbr", "Home", "Away"))
ggplot(df_combined9, aes(x = interaction(teamAbbr, week), y = successRate, fill = inMotionAtBallSnap)) +
geom_boxplot() +
labs(title = "Success Rates by Home and Away Team for Each Matchup",
x = "Matchup (Home vs Away) and Week",
y = "Success Rate") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
facet_wrap(~week) + # Separate by week
theme(axis.text.x = element_text(angle = 90, hjust = 1))
#Week 1
###Scatter plot
ggplot(df_combined1, aes(x = avgYards, y = successRate)) +
geom_point(aes(color = inMotionAtBallSnap), alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE, color = "black") +
labs(title = "Yards Gained vs Success Rate",
x = "Yards Gained", y = "Success Rate") +
scale_color_manual(values = c("red", "blue")) +
theme_minimal()
#### Bar chart week 1
summary_data <- df_combined1 %>%
group_by(down, inMotionAtBallSnap) %>%
summarise(successRate = mean(successRate), .groups = "drop")
ggplot(summary_data, aes(x = as.factor(down), y = successRate, fill = as.factor(inMotionAtBallSnap))) +
geom_bar(stat = "identity", position = "stack", color = "black") +
geom_text(aes(label = round(successRate, 2)),
position = position_stack(vjust = 0.5),
color = "white", size = 4) +
labs(
title = "Success Rate by Down Type and Motion at Ball Snap",
x = "Down",
y = "Average Success Rate",
fill = "Motion at Ball Snap"
) +
scale_fill_manual(values = c("blue", "orange")) +
scale_x_discrete(labels = c("1" = "1st Down", "2" = "2nd Down", "3" = "3rd Down", "4" = "4th Down")) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top",
plot.title = element_text(hjust = 0.5)
)
#Week 9
###Scatter plot
ggplot(df_combined9, aes(x = avgYards, y = successRate)) +
geom_point(aes(color = inMotionAtBallSnap), alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE, color = "black") +
labs(title = "Yards Gained vs Success Rate",
x = "Yards Gained", y = "Success Rate") +
scale_color_manual(values = c("red", "blue")) +
theme_minimal()
#### Bar chart week 9
summary_data <- df_combined9 %>%
group_by(down, inMotionAtBallSnap) %>%
summarise(successRate = mean(successRate), .groups = "drop")
ggplot(summary_data, aes(x = as.factor(down), y = successRate, fill = as.factor(inMotionAtBallSnap))) +
geom_bar(stat = "identity", position = "stack", color = "black") +
geom_text(aes(label = round(successRate, 2)),
position = position_stack(vjust = 0.5),
color = "white", size = 4) +
labs(
title = "Success Rate by Down Type and Motion at Ball Snap",
x = "Down",
y = "Average Success Rate",
fill = "Motion at Ball Snap"
) +
scale_fill_manual(values = c("blue", "orange")) +
scale_x_discrete(labels = c("1" = "1st Down", "2" = "2nd Down", "3" = "3rd Down", "4" = "4th Down")) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top",
plot.title = element_text(hjust = 0.5)
)
We used tracking data from week 1 and week 9 to highlight the changes teams made during the season with motion and how it effected each teams success rate. Teams like the Chiefs had a much higher success rate than others, while the Giants and Titans tended to be less successful than the average team. Chart types that we used included, scatter plots, box plots, and bar graphs to highlight these differences. For week 1, we found that using motion on later downs had a higher success rate compared to earlier downs. For week 9, we found that teams had adjusted their defensive schemes, and the success rate was similar across all downs. Looking at the anova tables, we found that the variable shiftSinceLineset had the greatest statistically significance effect on the success rate of a play, with a p-value of 0.0509. Overall, we were very pleased with our findings on success using motion, as well as how conditions like wind and others played a part in the degree of success. We enjoyed working on this project and were very happy with the findings we have seen.