setwd("~/IS470")
library(httr)
library(dplyr)
library(data.table)
library(ggplot2)
source("https://raw.githubusercontent.com/ptallon/SportsAnalytics_Fall2024/main/SharedCode.R")
directory <- paste0(getwd(), "/NFLBDB2025")
In this assignment, we will be doing a statistical analysis of jet motion plays using tracking data from the 2025 NFL Big Data Bowl Competition, comparing jet motion plays to normal motion plays and non-motion plays.
# Inputting all dataframes
weekOne <- load_data_for_one_week(directory, 1, TRUE)
weekOne <- select(weekOne, week, gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,
x,y,s,a,dis,o,dir,homeTeamAbbr,visitorTeamAbbr,playDescription,down,yardsToGo,
possessionTeam,defensiveTeam,yardlineSide,yardlineNumber, expectedPoints, offenseFormation, receiverAlignment,yardsGained,
expectedPointsAdded,pff_passCoverage, pff_manZone, position, inMotionAtBallSnap, shiftSinceLineset, motionSinceLineset, team)
weekTwo <- load_data_for_one_week(directory, 2, TRUE)
weekTwo <- select(weekTwo, week, gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,
x,y,s,a,dis,o,dir,homeTeamAbbr,visitorTeamAbbr,playDescription,down,yardsToGo,
possessionTeam,defensiveTeam,yardlineSide,yardlineNumber, expectedPoints, offenseFormation, receiverAlignment,yardsGained,
expectedPointsAdded,pff_passCoverage, pff_manZone, position, inMotionAtBallSnap, shiftSinceLineset, motionSinceLineset, team)
weekThree <- load_data_for_one_week(directory, 3, TRUE)
weekThree <- select(weekThree, week, gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,
x,y,s,a,dis,o,dir,homeTeamAbbr,visitorTeamAbbr,playDescription,down,yardsToGo,
possessionTeam,defensiveTeam,yardlineSide,yardlineNumber, expectedPoints, offenseFormation, receiverAlignment,yardsGained,
expectedPointsAdded,pff_passCoverage, pff_manZone, position, inMotionAtBallSnap, shiftSinceLineset, motionSinceLineset, team)
weekFour <- load_data_for_one_week(directory, 4, TRUE)
weekFour <- select(weekFour, week, gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,
x,y,s,a,dis,o,dir,homeTeamAbbr,visitorTeamAbbr,playDescription,down,yardsToGo,
possessionTeam,defensiveTeam,yardlineSide,yardlineNumber, expectedPoints, offenseFormation, receiverAlignment,yardsGained,
expectedPointsAdded,pff_passCoverage, pff_manZone, position, inMotionAtBallSnap, shiftSinceLineset, motionSinceLineset, team)
weekFive <- load_data_for_one_week(directory, 5, TRUE)
weekFive <- select(weekFive, week, gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,
x,y,s,a,dis,o,dir,homeTeamAbbr,visitorTeamAbbr,playDescription,down,yardsToGo,
possessionTeam,defensiveTeam,yardlineSide,yardlineNumber, expectedPoints, offenseFormation, receiverAlignment,yardsGained,
expectedPointsAdded,pff_passCoverage, pff_manZone, position, inMotionAtBallSnap, shiftSinceLineset, motionSinceLineset, team)
weekSix <- load_data_for_one_week(directory, 6, TRUE)
weekSix <- select(weekSix, week, gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,
x,y,s,a,dis,o,dir,homeTeamAbbr,visitorTeamAbbr,playDescription,down,yardsToGo,
possessionTeam,defensiveTeam,yardlineSide,yardlineNumber, expectedPoints, offenseFormation, receiverAlignment,yardsGained,
expectedPointsAdded,pff_passCoverage, pff_manZone, position, inMotionAtBallSnap, shiftSinceLineset, motionSinceLineset, team)
weekSeven <- load_data_for_one_week(directory, 7, TRUE)
weekSeven <- select(weekSeven, week, gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,
x,y,s,a,dis,o,dir,homeTeamAbbr,visitorTeamAbbr,playDescription,down,yardsToGo,
possessionTeam,defensiveTeam,yardlineSide,yardlineNumber, expectedPoints, offenseFormation, receiverAlignment,yardsGained,
expectedPointsAdded,pff_passCoverage, pff_manZone, position, inMotionAtBallSnap, shiftSinceLineset, motionSinceLineset, team)
weekEight <- load_data_for_one_week(directory, 8, TRUE)
weekEight <- select(weekEight, week, gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,
x,y,s,a,dis,o,dir,homeTeamAbbr,visitorTeamAbbr,playDescription,down,yardsToGo,
possessionTeam,defensiveTeam,yardlineSide,yardlineNumber, expectedPoints, offenseFormation, receiverAlignment,yardsGained,
expectedPointsAdded,pff_passCoverage, pff_manZone, position, inMotionAtBallSnap, shiftSinceLineset, motionSinceLineset, team)
weekNine <- load_data_for_one_week(directory, 9, TRUE)
weekNine <- select(weekNine, week, gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,
x,y,s,a,dis,o,dir,homeTeamAbbr,visitorTeamAbbr,playDescription,down,yardsToGo,
possessionTeam,defensiveTeam,yardlineSide,yardlineNumber, expectedPoints, offenseFormation, receiverAlignment,yardsGained,
expectedPointsAdded,pff_passCoverage, pff_manZone, position, inMotionAtBallSnap, shiftSinceLineset, motionSinceLineset, team)
# Final DF
workingDF <- rbind(weekOne,weekTwo,weekThree,weekFour,weekFive,weekSix, weekSeven, weekEight, weekNine)
#write.csv(workingDF, file = "2025BDBDATA.csv", row.names = FALSE)
rm(weekOne, weekTwo,weekThree,weekFour,weekFive,weekSix,weekSeven,weekEight,weekNine)
player_play <- fread('NFLBDB2025/player_play.csv')
motionStats <- motion_stats_single_week(workingDF, player_play)
## [1] "2022091112 112" "2022101604 273" "2022091101 1826" "2022091111 520"
## [5] "2022092500 3391" "2022101604 295" "2022110602 1272" "2022091104 2877"
## [9] "2022091108 3063" "2022091111 336" "2022091500 1237" "2022091805 3355"
## [13] "2022092500 3160" "2022100206 1307" "2022100905 2736" "2022110601 2616"
## [17] "2022090800 2712" "2022090800 3283" "2022091101 85" "2022091101 438"
## [21] "2022091101 2365" "2022091101 2629" "2022091101 2951" "2022091101 3080"
## [25] "2022091101 3862" "2022091101 3961" "2022091101 4242" "2022091102 1700"
## [29] "2022091102 2065" "2022091102 2373" "2022091102 2596" "2022091102 3022"
## [33] "2022091102 3148" "2022091102 3739" "2022091103 58" "2022091103 499"
## [37] "2022091103 520" "2022091103 1105" "2022091103 1150" "2022091103 1228"
## [41] "2022091103 1493" "2022091103 2008" "2022091103 2082" "2022091103 2719"
## [45] "2022091103 3105" "2022091103 3126" "2022091103 3613" "2022091103 3680"
## [49] "2022091104 288" "2022091104 1094" "2022091104 3016" "2022091104 3502"
## [53] "2022091104 4046" "2022091105 336" "2022091105 1283" "2022091107 391"
## [57] "2022091108 296" "2022091108 947" "2022091108 1354" "2022091108 2846"
## [61] "2022091108 3396" "2022091109 3938" "2022091109 4051" "2022091111 3457"
## [65] "2022091112 62" "2022091112 736" "2022091112 760" "2022091800 724"
## [69] "2022091800 1108" "2022091800 3179" "2022091801 1204" "2022091802 1010"
## [73] "2022091803 2064" "2022091805 79" "2022091805 100" "2022091805 616"
## [77] "2022091805 2229" "2022091805 3313" "2022091806 1491" "2022091806 3222"
## [81] "2022091808 544" "2022091808 1643" "2022091808 2882" "2022091810 99"
## [85] "2022091811 379" "2022092200 214" "2022092200 419" "2022092200 746"
## [89] "2022092200 1348" "2022092200 1477" "2022092200 3620" "2022092500 87"
## [93] "2022092500 808" "2022092500 2790" "2022092500 3631" "2022092501 3485"
## [97] "2022092502 182" "2022092502 485" "2022092502 664" "2022092502 955"
## [101] "2022092504 896" "2022092509 741" "2022092509 1687" "2022092509 2936"
## [105] "2022092513 603" "2022092513 1237" "2022092513 1991" "2022092513 2991"
## [109] "2022100203 1236" "2022100205 2065" "2022100207 56" "2022100207 165"
## [113] "2022100207 877" "2022100207 1668" "2022100207 2288" "2022100208 407"
## [117] "2022100209 89" "2022100209 3000" "2022100210 488" "2022100210 994"
## [121] "2022100210 3214" "2022100300 2665" "2022100300 3150" "2022100903 1227"
## [125] "2022100905 3026" "2022100907 2511" "2022100908 63" "2022100908 435"
## [129] "2022100908 1156" "2022100908 2661" "2022100910 1082" "2022100910 1439"
## [133] "2022100910 1692" "2022100910 2153" "2022100910 2834" "2022101600 849"
## [137] "2022101600 2300" "2022101600 2957" "2022101603 641" "2022101603 1418"
## [141] "2022101603 2901" "2022101603 3772" "2022101603 3793" "2022101604 494"
## [145] "2022101604 3502" "2022101606 1288" "2022101607 241" "2022101607 360"
## [149] "2022101607 1252" "2022101607 1368" "2022101607 4052" "2022101608 100"
## [153] "2022101608 196" "2022101608 578" "2022101608 878" "2022101608 1871"
## [157] "2022101608 2322" "2022102300 265" "2022102300 2215" "2022102300 2342"
## [161] "2022102301 389" "2022102301 643" "2022102301 664" "2022102303 421"
## [165] "2022102303 1742" "2022102303 3283" "2022102304 923" "2022102304 1038"
## [169] "2022102305 264" "2022102305 1861" "2022102305 2349" "2022102310 874"
## [173] "2022102310 1776" "2022102311 976" "2022102311 1018" "2022102311 1063"
## [177] "2022102311 2835" "2022102311 3096" "2022102311 3570" "2022102700 2870"
## [181] "2022102700 3581" "2022102700 4205" "2022103002 1399" "2022103003 56"
## [185] "2022103003 1934" "2022103005 3801" "2022103008 848" "2022103008 3482"
## [189] "2022103009 626" "2022103009 2741" "2022103010 879" "2022103010 1012"
## [193] "2022103010 1905" "2022103010 2386" "2022103010 2410" "2022103010 2553"
## [197] "2022103011 2344" "2022103012 1183" "2022103012 3301" "2022110601 104"
## [201] "2022110601 2357" "2022110601 2738" "2022110604 3314" "2022110607 248"
## [205] "2022110607 3475" "2022110607 3596"
For the sake of our analysis, we must define jet motion. Our working definition will be met when a receiver is in motion at the snap of the ball, has traveled over 10 yards before the snap, and has an average speed of 1 yard per second.
# Beginning of A4
workingDF <- workingDF %>%
group_by(playId) %>%
mutate(hasMotion = inMotionAtBallSnap | motionSinceLineset | shiftSinceLineset) %>%
ungroup()
workingDF <- left_join(workingDF, motionStats, by=c("gameId", "playId"), relationship = "many-to-many")
workingDF <- workingDF %>%
mutate(jetMotion = inMotionAtBallSnap & sum_abs_dy > 10 & speed_yrds_second > 1)
jetMotionDF <- workingDF %>%
filter(jetMotion == TRUE)
motionDF <- workingDF %>%
filter(hasMotion == TRUE & jetMotion == FALSE)
noMotionDF <- workingDF %>%
filter(hasMotion == FALSE)
#colnames(workingDF)
# General Motion
jetMotionVsMotion <- t.test(jetMotionDF$expectedPointsAdded, motionDF$expectedPointsAdded, alternative = "greater")
print(jetMotionVsMotion)
##
## Welch Two Sample t-test
##
## data: jetMotionDF$expectedPointsAdded and motionDF$expectedPointsAdded
## t = 19.861, df = 36101, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.1171671 Inf
## sample estimates:
## mean of x mean of y
## 0.22156674 0.09381934
meanJetMotionEPA <- mean(jetMotionDF$expectedPointsAdded)
meanMotionEPA <- mean(motionDF$expectedPointsAdded)
# Combine data into a list
jetMotionVsMotionboxplot_data <- list(
"Jet Motion" = jetMotionDF$expectedPointsAdded,
"Motion" = motionDF$expectedPointsAdded
)
# Create the boxplot
jetMotionvsMotionBP <- boxplot(jetMotionVsMotionboxplot_data,
main = "Jet Motion vs Motion",
xlab = "Motion Type",
ylab = "Expected Points Added",
col = c("skyblue", "lightgreen"),
ylim = c(-3,3)
)
motionVsNoMotion <- t.test(motionDF$expectedPointsAdded, noMotionDF$expectedPointsAdded, alternative = "greater")
print(motionVsNoMotion)
##
## Welch Two Sample t-test
##
## data: motionDF$expectedPointsAdded and noMotionDF$expectedPointsAdded
## t = 2.4923, df = 816413, p-value = 0.006345
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.001568306 Inf
## sample estimates:
## mean of x mean of y
## 0.09381934 0.08920716
meanNoMotionEPA <- mean(noMotionDF$expectedPointsAdded)
# Combine data into a list
motionboxplot_data <- list(
"Motion" = motionDF$expectedPointsAdded,
"No Motion" = noMotionDF$expectedPointsAdded
)
# Create the boxplot
motionBoxplot <- boxplot(motionboxplot_data,
main = "Comparison of Expected Points Added: Motion vs No Motion",
xlab = "Type",
ylab = "Expected Points Added",
col = c("skyblue", "lightgreen"),
ylim = c(-3, 3)) # Adjust y-axis range as needed
alignmentANOVA <- aov(expectedPointsAdded ~ receiverAlignment, data = jetMotionDF)
summary(alignmentANOVA)
## Df Sum Sq Mean Sq F value Pr(>F)
## receiverAlignment 6 453 75.55 65.39 <2e-16 ***
## Residuals 30673 35440 1.16
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
alignment_means <- tapply(jetMotionDF$expectedPointsAdded, jetMotionDF$receiverAlignment, mean)
# Print the mean values for each receiverAlignment category
print(alignment_means)
## 1x1 2x0 2x1 2x2 3x1 3x2 4x1
## 0.6756986 0.5651000 0.5326591 0.1854076 0.2496157 0.1647839 -0.8945360
# Create a boxplot for expectedPointsAdded by receiverAlignment
alignmentPlot <- boxplot(expectedPointsAdded ~ receiverAlignment,
data = jetMotionDF,
main = "Expected Points Added by Receiver Alignment",
xlab = "Receiver Alignment",
ylab = "Expected Points Added",
col = c("lightblue", "lightgreen", "lightpink", "turquoise", "purple", "yellow", "brown"),
ylim = c(-3, 3)) # Adjust y-axis limits as necessary
downsANOVA <- aov(expectedPointsAdded ~ down, data = jetMotionDF)
summary(downsANOVA)
## Df Sum Sq Mean Sq F value Pr(>F)
## down 1 234 233.79 201.1 <2e-16 ***
## Residuals 30678 35659 1.16
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
downsBP <- boxplot(expectedPointsAdded ~ down,
data = jetMotionDF,
main = "Expected Points Added by Down",
xlab = "Down",
ylab = "Expected Points Added",
col = c("lightblue", "lightgreen", "lightpink", "turquoise"), # Colors for up to 4 downs
ylim = c(-5, 5))
# Calculate mean of expectedPointsAdded for each down category
down_means <- tapply(jetMotionDF$expectedPointsAdded, jetMotionDF$down, mean)
# Print the mean values
print(down_means)
## 1 2 3
## 0.1035905 0.3249087 0.2661528
coverageANOVA <- aov(expectedPointsAdded ~ pff_manZone, data = jetMotionDF)
means_by_coverage <- tapply(jetMotionDF$expectedPointsAdded, jetMotionDF$pff_manZone, mean)
# Print the result
print(means_by_coverage)
## Man Other Zone
## 0.4192390 0.5553587 0.1191999
summary(coverageANOVA)
## Df Sum Sq Mean Sq F value Pr(>F)
## pff_manZone 2 795 397.3 347.3 <2e-16 ***
## Residuals 30677 35098 1.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coverageboxplot <- boxplot(expectedPointsAdded ~ pff_manZone,
data = jetMotionDF,
main = "Expected Points Added by Man/Zone Coverage",
xlab = "Man/Zone Coverage",
ylab = "Expected Points Added",
col = c("lightblue", "lightgreen", "turquoise"), # Colors for each category
ylim = c(-5, 5))
safetyDF <- jetMotionDF %>%
mutate(safety_alignment = case_when(
pff_passCoverage == "Cover-3" ~ "1-High",
pff_passCoverage == "Quarters" ~ "2-High",
pff_passCoverage == "Cover-2" ~ "2-High",
pff_passCoverage == "Cover-1" ~ "1-High",
pff_passCoverage == "Cover-3 Seam" ~ "1-High",
pff_passCoverage == "2-Man" ~ "2-High",
pff_passCoverage == "Goal Line" ~ "Other",
pff_passCoverage == "Cover 6-Left" ~ "2-High",
pff_passCoverage == "Cover-0" ~ "Other",
pff_passCoverage == "Red Zone" ~ "Other",
pff_passCoverage == "Cover-6 Right" ~ "2-High"
))
oneHighDF <- safetyDF %>% filter(safety_alignment == "1-High")
oneHighMean <- mean(oneHighDF$expectedPointsAdded)
twoHighDF <- safetyDF %>% filter(safety_alignment == "2-High")
twoHighMean <- mean(twoHighDF$expectedPointsAdded)
t.test(oneHighDF$expectedPointsAdded, twoHighDF$expectedPointsAdded, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: oneHighDF$expectedPointsAdded and twoHighDF$expectedPointsAdded
## t = 18.652, df = 23727, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.216045 Inf
## sample estimates:
## mean of x mean of y
## 0.29434179 0.05740062
# Combine the two dataframes to make plotting easier
combinedSafetyDF <- bind_rows(oneHighDF, twoHighDF, .id = "alignment_group")
# Create the boxplot
safetyboxplot <- boxplot(expectedPointsAdded ~ alignment_group,
data = combinedSafetyDF,
main = "Expected Points Added for 1-High vs 2-High Safety Alignments",
xlab = "Safety Alignment",
ylab = "Expected Points Added",
col = c("lightblue", "lightgreen"), # Color for the two groups
ylim = c(-3, 3)) # Adjust y-axis limits as necessary
It’s clear that utilizing jet motion is a very effective strategy for offenses, especially out of “imbalanced” receiver alignments where the defense has to adjust to the strength of the formation changing. On the defensive side, zone coverage is effective at stopping jet motion, as it allows defenders to avoid tunnel vision on their pass-coverage assignments in man. Furthermore, 2-high safety shells tend to be much more effective at stopping jet motion.