For Assingment 3, I modified the distance metric used for calculating the traffic meter scale for a punt returned for a touchdown. Initially, I standardized the play direction using code from kaggle then created a new boolean variable: isBlocking. isBlocking checks the player to see if they are on the same team and if they are within .5 yards in either the x or y direction from an opposing player. With the addition of isBlocking, I then calculate the distance from the returner to all opposing players that are not blocking and are also ahead of the returner in the x direction or behind them within 7 yards. In other words, an opponent’s position is calculated into the traffic meter scale if they are not blocked and within tackling range of the ball carrier.
#Buffalo (home) vs Miami Dolphins (away)
suppressWarnings(suppressMessages(library(data.table)))
suppressWarnings(suppressMessages(library(dplyr)))
suppressWarnings(suppressMessages(library(ggplot2)))
suppressWarnings(suppressMessages(library(gganimate)))
suppressWarnings(suppressMessages(library(gifski)))
#read in the necessary files for tracking and plays
player_tracking_df <- fread("tracking2020.csv")
## Avoidable 6.407 seconds. This file is very unusual: it ends abruptly without a final newline, and also its size is a multiple of 4096 bytes. Please properly end the last row with a newline using for example 'echo >> file' to avoid this time to copy.
plays_df <- fread("plays.csv")
#Standardize Play Direction
player_tracking_df <- player_tracking_df %>%
mutate(x = ifelse(playDirection == "left", 120-x, x),
y = ifelse(playDirection == "left", 160/3 - y, y))
The PlayerStats function accepts five parameters: player tracking data, play data, game ID, play ID, and the player’s jersey number. The function returns a list containing 4 items; three descriptive results of the play, the game, and the player; and the combined dataframe created from the parameter dataframes.
#Player stats function
player_stats <- function(pt_df,p_df,game_ID,play_ID, jersey) {
df <- pt_df %>%
filter(gameId %in% game_ID & playId %in% play_ID) %>%
left_join(p_df, by = c("gameId" = "gameId", "playId" = "playId")) %>%
select(gameId, playId, time, x, y, dis, nflId, jerseyNumber, team,
frameId, playDescription,s,playDirection) %>%
data.frame()
df19 <- df %>%
select(gameId, playId, time, jerseyNumber, x, y, frameId, dis, s) %>%
filter(jerseyNumber == jersey) %>%
data.frame()
df2 <- left_join(df, df19, by = c("gameId" = "gameId",
"playId" = "playId",
"time" = "time",
"frameId" = "frameId")) %>%
select(gameId, playId, time, x.x, y.x, nflId, jerseyNumber.x,
frameId, x.y, y.y,team, playDescription,playDirection, dis.y, s.y) %>%
# compute the distance from player 19 to opposing players that are not being blocked
#If Opponent is Ahead or behind within 7 yards, calculate their distance
mutate(isBlocking = ifelse(team == "home" & (abs(x.y - x.x) <= .5 & abs(y.x-y.y) <= .5),FALSE,NA),
dist = ifelse((isBlocking == FALSE & team == "away" & x.y > x.x )|(isBlocking == FALSE & team == "away" & x.x - x.y <= -7),
sqrt((x.x - x.y)^2 + (y.x - y.y)^2),
NA )) %>%
# clean up or rename variables
select(-c("x.y","y.y")) %>%
rename(x = x.x,
y = y.x,
s = s.y,
jerseyNumber = jerseyNumber.x) %>%
# find the average distance from player 19 to opposing players within tackling range
#113.33 = dist on field from lower left corner to upper right corner
#100 yards long and 53.3333 wide or 120 feet wide
group_by(gameId, playId, frameId) %>%
mutate(mean_dist_to_19 = mean(dist, na.rm = TRUE),
alpha = 1 - (mean_dist_to_19/113.33) ) %>%
ungroup() %>%
# compute the total distance traveled by player 19
arrange(jerseyNumber, time, frameId) %>%
group_by(gameId, playId, jerseyNumber) %>%
mutate(dist_running_total = cumsum(dis.y)) %>%
# create a width for a moving bar
mutate(width = round( 20*(1-(mean_dist_to_19/113.33)) ,0) ) %>%
# create each play level stats for the receiver
group_by(gameId, playId) %>%
mutate(play_stat = mean(unique(mean_dist_to_19))) %>%
ungroup() %>%
# create each game level stats for the receiver
group_by(gameId) %>%
mutate(game_stat = mean(unique(play_stat))) %>%
ungroup() %>%
# create each player stat over all plays and all games
mutate(player_stat = mean(unique(game_stat))) %>%
# create a new data frame
data.frame()
#print out play, game, and player stats
result1 <- df2 %>% select(gameId, playId, play_stat, playDirection) %>% distinct %>% data.frame()
result2 <- df2 %>% select(gameId, game_stat) %>% distinct %>% data.frame()
result3 <- df2 %>% select(player_stat) %>% distinct %>% data.frame()
return(list(result1,result2,result3,df2))
}
The Summary dataframe contains punts returned for touchdowns and then ranks the returner’s based on player stats.
players_df <- fread("players.csv")
jersey_df <- player_tracking_df %>%
select(nflId,jerseyNumber)%>%
filter(!is.na(jerseyNumber))%>%
distinct() %>%
data.frame()
punt_kickoff_df <- plays_df %>%
filter(specialTeamsPlayType %in% c("Punt","Kickoff"),
tolower(playDescription) %like% "touchdown",
!tolower(playDescription) %like% "nullified",
!tolower(playDescription) %like% "recovered",
!is.na(returnerId))%>%
mutate(returnerId = as.integer(returnerId))%>%
filter(gameId %in% player_tracking_df$gameId)%>%
left_join(jersey_df, by = c("returnerId" = "nflId"))%>%
left_join(players_df, by = c("returnerId"= "nflId"))%>%
data_frame()
summary_df <- punt_kickoff_df%>%
select(displayName, jerseyNumber, gameId, playId)%>%
group_by(displayName,jerseyNumber)%>%
summarise(games = as.vector(paste(unique(gameId),collapse = ",")),
plays = as.vector(paste(playId, collapse = ",")),
.groups = 'keep') %>%
data.frame()
# Run 15 Times for top 15 Players based on player_stats
for(i in 1:nrow(summary_df)){
out <- player_stats(player_tracking_df,plays_df,
summary_df$games[i],summary_df$plays[i],summary_df$jerseyNumber[i])
if(i==1){
result <- data.frame(matrix(ncol = 5, nrow = 0))
colnames(result) <- c("displayName","jerseyNumber","gameId","playId","play_stat")
}
result <- rbind(result, data.frame("displayName" = summary_df$displayName[i],
"jerseyNumber"= summary_df$displayName[i],
out[[1]][1],
out[[1]][2],
out[[1]][3])
)
}
result <- result %>%
mutate(rank = rank(play_stat, ties.method = "random"))%>%
arrange(rank)
game_ID <- c("2021010300")
play_ID <- c("1586", "395", "1232", "3930")
out <- player_stats(player_tracking_df,plays_df,game_ID, play_ID,19)
my_df <- out[[4]]
my_df$playId <- as.factor(my_df$playId)
result
## displayName jerseyNumber gameId playId play_stat rank
## 1 A.J. Brown A.J. Brown 2020112903 4350 NaN 1
## 2 Brandon Wilson Brandon Wilson 2020112902 281 NaN 2
## 3 Byron Pringle Byron Pringle 2020102509 1496 NaN 3
## 4 CeeDee Lamb CeeDee Lamb 2020122013 4543 NaN 4
## 5 Cordarrelle Patterson Cordarrelle Patterson 2020111600 1853 NaN 5
## 6 Devin Duvernay Devin Duvernay 2020092800 1132 NaN 6
## 7 Diontae Spencer Diontae Spencer 2020121300 288 NaN 7
## 8 Gunner Olszewski Gunner Olszewski 2020120611 870 NaN 8
## 9 Isaiah McKenzie Isaiah McKenzie 2021010300 1586 NaN 9
## 10 Isaiah Rodgers Isaiah Rodgers 2020101110 2229 NaN 10
## 11 Jakeem Grant Jakeem Grant 2020110106 1473 NaN 11
## 12 Jalen Reagor Jalen Reagor 2020120610 3092 NaN 12
## 13 Jamal Agnew Jamal Agnew 2020122600 2547 NaN 13
## 14 Keelan Cole Keelan Cole 2020111503 1246 NaN 14
## 15 Mecole Hardman Mecole Hardman 2020121305 2196 NaN 15
This code runs the animation provided by Professor Tallon, incorporating my updated traffic meter scale.
# THE CODE BELOW IS DESIGNED FOR gameId = 2021010300 and playId = 1586
df3 <- my_df %>% filter(gameId == 2021010300 & playId == 1586) %>% data.frame()
# create a title for the plot using the play description
plot_title <- paste0(df3$playDescription[1],
"\n The halo around the receiver grows darker based on the proximity of players from the opposing team.")
#loading command to make NFL field in ggplot (credit to Marschall Furman)
source('https://raw.githubusercontent.com/mlfurman3/gg_field/main/gg_field.R')
g <- ggplot(data=df3) +
# sets up colors, shapes, and circle sizes
scale_size_manual(values = c(6, 4, 6), guide="none") +
scale_shape_manual(values = c(21, 16, 21), guide="none") +
scale_fill_manual(values = c("dodgerblue1", "#663300", "firebrick1"), guide="none") +
scale_colour_manual(values = c("#000000", "#663300", "#000000"), guide="none") +
# plot the field (full size)
gg_field(yardmin = -5, yardmax = 125) +
# add labels and background to the plot
labs(title = plot_title) +
theme(panel.background = element_rect(fill = 'forestgreen',
color = 'forestgreen'),
panel.grid = element_blank()) +
# add points for players on both teams and the football
geom_point(aes(x = x,
y = y,
shape = team,
fill = team,
group = nflId,
size = team,
colour = team)) +
# add a larger circle around the receiver and shade it
geom_point( data = df3%>% filter(jerseyNumber==19),
aes(x=x,y=y,alpha=alpha),
shape=21,
size=10,
stroke=1,
fill="black",
colour="black") +
# include jersey numbers in the center of all players' circles
geom_text( aes(x = x,
y = y,
group = nflId,
label=ifelse(is.na(jerseyNumber),"",jerseyNumber)),
colour="white", vjust=0.36, size=3.5) +
# add informational text at the top of the plot
geom_text(aes(x=-3, y=56, label="Traffic Meter Scale:"),
colour="white",vjust=0,hjust=0) +
geom_text(aes(x=38, y=56, label="Value:"),
colour="white",vjust=0,hjust=0) +
geom_text(aes(x=53, y=56, label="Distance traveled by receiver (yards):"),
colour="white",vjust=0,hjust=0) +
geom_text(aes(x=92.5, y=56, label="Speed (yards / sec):"),
colour="white",vjust=0,hjust=0) +
# draw an empty rectangle at the top of the plot, fill it in
geom_rect(aes(xmin = 15, xmax = 35, ymin = 56, ymax = 57),
color="black", fill="white") +
geom_rect(data=df3 %>% filter(jerseyNumber==19),
aes(xmin = 15, xmax = 15 + width,
ymin = 56, ymax = 57, alpha=alpha),
color="black", fill="black") +
# report out key metrics (mean dist to receiver)
geom_text(data = df3%>% filter(jerseyNumber==19),
aes(x = 44.5, y = 56,
#label=format(round(100-mean_dist_to_19,2),nsmall=2),
label = format( round( 100*(1-(mean_dist_to_19/113.33)) ,2), nsmall=2),
vjust=0, hjust=0),
color="white") +
# report out key metrics (total yards ran by receiver)
geom_text(data = df3%>% filter(jerseyNumber==19),
aes(x = 85.5, y = 56, label=format(dist_running_total, nsmall=2),vjust=0,hjust=0),
color="white") +
# report out key metrics (speed of receiver)
geom_text(data = df3%>% filter(jerseyNumber==19),
aes(x = 111.9, y = 56, label=format(s, nsmall=2),vjust=0,hjust=0),
color="white") +
# sketch the plot for each frameId
transition_time(frameId) +
# remove all color legends from the side of the plot
guides(alpha = "none")
# number of frames to animate
frames_to_display <- length(unique(df3$frameId))
# build the animation, specify fps, frame size
animate(g, fps=10, nframe = frames_to_display,
width = 800, height = 380,
renderer = gifski_renderer())
# if you want to save the plot as a gif file
anim_save("out.gif")
In conclusion, I really enjoyed this project as it allowed me to think critically when determining what influences a traffic rating. Although my metric is not perfect, I learned how to make use of multivariate analysis in a new manner with dplyr in R. Potential changes / modifications could be: incorporating wind speed, returner speed relative to that of the opponents, and the exponential increase of the chance of success for every additional blocker near the returner. Alternativley to my current model, it could be recreated in a more complex manner incorporating player average speeds, wind direction, and matchup history.