Introduction

For Assingment 3, I modified the distance metric used for calculating the traffic meter scale for a punt returned for a touchdown. Initially, I standardized the play direction using code from kaggle then created a new boolean variable: isBlocking. isBlocking checks the player to see if they are on the same team and if they are within .5 yards in either the x or y direction from an opposing player. With the addition of isBlocking, I then calculate the distance from the returner to all opposing players that are not blocking and are also ahead of the returner in the x direction or behind them within 7 yards. In other words, an opponent’s position is calculated into the traffic meter scale if they are not blocked and within tackling range of the ball carrier.

#Buffalo (home) vs Miami Dolphins (away)
suppressWarnings(suppressMessages(library(data.table)))
suppressWarnings(suppressMessages(library(dplyr)))
suppressWarnings(suppressMessages(library(ggplot2)))
suppressWarnings(suppressMessages(library(gganimate)))
suppressWarnings(suppressMessages(library(gifski)))

#read in the necessary files for tracking and plays
player_tracking_df <- fread("tracking2020.csv")
## Avoidable 6.407 seconds. This file is very unusual: it ends abruptly without a final newline, and also its size is a multiple of 4096 bytes. Please properly end the last row with a newline using for example 'echo >> file' to avoid this  time to copy.
plays_df <- fread("plays.csv")

#Standardize Play Direction
player_tracking_df <- player_tracking_df %>%
  mutate(x = ifelse(playDirection == "left", 120-x, x),
         y = ifelse(playDirection == "left", 160/3 - y, y))

The PlayerStats Function

The PlayerStats function accepts five parameters: player tracking data, play data, game ID, play ID, and the player’s jersey number. The function returns a list containing 4 items; three descriptive results of the play, the game, and the player; and the combined dataframe created from the parameter dataframes.

#Player stats function
player_stats <- function(pt_df,p_df,game_ID,play_ID, jersey) {

  df <- pt_df %>%
    filter(gameId %in% game_ID & playId %in% play_ID) %>%
    left_join(p_df, by = c("gameId" = "gameId", "playId" = "playId")) %>%
    select(gameId, playId, time, x, y, dis, nflId, jerseyNumber, team, 
           frameId, playDescription,s,playDirection) %>%
    data.frame()
  
  df19 <- df %>%
    select(gameId, playId, time, jerseyNumber, x, y, frameId, dis, s) %>%
    filter(jerseyNumber == jersey) %>%
    data.frame()
  
  df2 <- left_join(df, df19, by = c("gameId" = "gameId",
                                    "playId" = "playId",
                                    "time" = "time", 
                                    "frameId" = "frameId")) %>%
    select(gameId, playId, time, x.x, y.x, nflId, jerseyNumber.x, 
           frameId, x.y, y.y,team, playDescription,playDirection, dis.y, s.y) %>%
  
    # compute the distance from player 19 to opposing players that are not being blocked 
    #If Opponent is Ahead or behind within 7 yards, calculate their distance 
    mutate(isBlocking = ifelse(team == "home" & (abs(x.y - x.x) <= .5 & abs(y.x-y.y) <= .5),FALSE,NA),
           dist = ifelse((isBlocking == FALSE & team == "away" & x.y > x.x )|(isBlocking == FALSE & team ==    "away" & x.x - x.y <= -7),
                         sqrt((x.x - x.y)^2 + (y.x - y.y)^2),
                         NA )) %>%
  
  # clean up or rename variables
  select(-c("x.y","y.y")) %>%
  rename(x = x.x, 
         y = y.x, 
         s = s.y,
         jerseyNumber = jerseyNumber.x) %>%
  
  # find the average distance from player 19 to opposing players within tackling range 
  #113.33 = dist on field from lower left corner to upper right corner
  #100 yards long and 53.3333 wide or 120 feet wide
  group_by(gameId, playId, frameId) %>%  
  mutate(mean_dist_to_19 = mean(dist, na.rm = TRUE),
         alpha = 1 - (mean_dist_to_19/113.33) ) %>%
  ungroup() %>%         
  
  # compute the total distance traveled by player 19
  arrange(jerseyNumber, time, frameId) %>%
  group_by(gameId, playId, jerseyNumber) %>% 
  mutate(dist_running_total = cumsum(dis.y)) %>%
  
  # create a width for a moving bar 
  mutate(width = round(  20*(1-(mean_dist_to_19/113.33))  ,0) ) %>%
  
  # create each play level stats for the receiver
  group_by(gameId, playId) %>%
  mutate(play_stat = mean(unique(mean_dist_to_19))) %>%
  ungroup() %>%
  
  # create each game level stats for the receiver
  group_by(gameId) %>%
  mutate(game_stat = mean(unique(play_stat))) %>%
  ungroup() %>%
  
  # create each player stat over all plays and all games
  mutate(player_stat = mean(unique(game_stat))) %>%
  
  # create a new data frame
  data.frame() 
  
  #print out play, game, and player stats
  result1 <- df2 %>% select(gameId, playId, play_stat, playDirection) %>% distinct %>% data.frame()
  result2 <- df2 %>% select(gameId, game_stat) %>% distinct %>% data.frame()
  result3 <- df2 %>% select(player_stat) %>% distinct %>% data.frame()
  
  return(list(result1,result2,result3,df2))
}

Summary dataframe

The Summary dataframe contains punts returned for touchdowns and then ranks the returner’s based on player stats.

players_df <- fread("players.csv")
jersey_df <- player_tracking_df %>%
  select(nflId,jerseyNumber)%>%
  filter(!is.na(jerseyNumber))%>%
  distinct() %>%
  data.frame()

punt_kickoff_df <- plays_df %>%
  filter(specialTeamsPlayType %in% c("Punt","Kickoff"),
         tolower(playDescription) %like% "touchdown", 
         !tolower(playDescription) %like% "nullified",
         !tolower(playDescription) %like% "recovered",
         !is.na(returnerId))%>%
  mutate(returnerId = as.integer(returnerId))%>%
  filter(gameId %in% player_tracking_df$gameId)%>%
  left_join(jersey_df, by = c("returnerId" = "nflId"))%>%
  left_join(players_df, by = c("returnerId"= "nflId"))%>%
  data_frame()

summary_df <- punt_kickoff_df%>%
  select(displayName, jerseyNumber, gameId, playId)%>%
  group_by(displayName,jerseyNumber)%>%
  summarise(games = as.vector(paste(unique(gameId),collapse = ",")),
            plays = as.vector(paste(playId, collapse = ",")),
            .groups = 'keep') %>%
  data.frame()
# Run 15 Times for top 15 Players based on player_stats
for(i in 1:nrow(summary_df)){
  out <- player_stats(player_tracking_df,plays_df,
                      summary_df$games[i],summary_df$plays[i],summary_df$jerseyNumber[i])
  
  if(i==1){
    result <- data.frame(matrix(ncol = 5, nrow = 0))
    colnames(result) <- c("displayName","jerseyNumber","gameId","playId","play_stat")
  }
  result <- rbind(result, data.frame("displayName" = summary_df$displayName[i],
                                     "jerseyNumber"= summary_df$displayName[i],
                                     out[[1]][1],
                                     out[[1]][2],
                                     out[[1]][3])
  )
}

result <- result %>%
  mutate(rank = rank(play_stat, ties.method = "random"))%>%
  arrange(rank)
game_ID <- c("2021010300")
play_ID <- c("1586", "395", "1232", "3930")
out <- player_stats(player_tracking_df,plays_df,game_ID, play_ID,19)
my_df <- out[[4]]
my_df$playId <- as.factor(my_df$playId)
result
##              displayName          jerseyNumber     gameId playId play_stat rank
## 1             A.J. Brown            A.J. Brown 2020112903   4350       NaN    1
## 2         Brandon Wilson        Brandon Wilson 2020112902    281       NaN    2
## 3          Byron Pringle         Byron Pringle 2020102509   1496       NaN    3
## 4            CeeDee Lamb           CeeDee Lamb 2020122013   4543       NaN    4
## 5  Cordarrelle Patterson Cordarrelle Patterson 2020111600   1853       NaN    5
## 6         Devin Duvernay        Devin Duvernay 2020092800   1132       NaN    6
## 7        Diontae Spencer       Diontae Spencer 2020121300    288       NaN    7
## 8       Gunner Olszewski      Gunner Olszewski 2020120611    870       NaN    8
## 9        Isaiah McKenzie       Isaiah McKenzie 2021010300   1586       NaN    9
## 10        Isaiah Rodgers        Isaiah Rodgers 2020101110   2229       NaN   10
## 11          Jakeem Grant          Jakeem Grant 2020110106   1473       NaN   11
## 12          Jalen Reagor          Jalen Reagor 2020120610   3092       NaN   12
## 13           Jamal Agnew           Jamal Agnew 2020122600   2547       NaN   13
## 14           Keelan Cole           Keelan Cole 2020111503   1246       NaN   14
## 15        Mecole Hardman        Mecole Hardman 2020121305   2196       NaN   15

Animation

This code runs the animation provided by Professor Tallon, incorporating my updated traffic meter scale.

# THE CODE BELOW IS DESIGNED FOR gameId = 2021010300 and playId = 1586
df3 <- my_df %>% filter(gameId == 2021010300 & playId == 1586) %>% data.frame()

# create a title for the plot using the play description
plot_title <- paste0(df3$playDescription[1], 
                     "\n The halo around the receiver grows darker based on the proximity of players from the opposing team.")

#loading command to make NFL field in ggplot (credit to Marschall Furman)
source('https://raw.githubusercontent.com/mlfurman3/gg_field/main/gg_field.R')

g <- ggplot(data=df3) +
  # sets up colors, shapes, and circle sizes
  scale_size_manual(values = c(6, 4, 6), guide="none") + 
  scale_shape_manual(values = c(21, 16, 21), guide="none") +
  scale_fill_manual(values = c("dodgerblue1", "#663300", "firebrick1"), guide="none") + 
  scale_colour_manual(values = c("#000000", "#663300", "#000000"), guide="none") + 
  
  # plot the field (full size)  
  gg_field(yardmin = -5, yardmax = 125) +
  
  # add labels and background to the plot
  labs(title = plot_title) + 
  theme(panel.background = element_rect(fill = 'forestgreen',
                                        color = 'forestgreen'),
        panel.grid = element_blank()) +  
  
  # add points for players on both teams and the football
  geom_point(aes(x = x, 
                 y = y, 
                 shape = team,
                 fill = team, 
                 group = nflId, 
                 size = team, 
                 colour = team)) +
  
  # add a larger circle around the receiver and shade it
  geom_point( data = df3%>% filter(jerseyNumber==19), 
              aes(x=x,y=y,alpha=alpha), 
              shape=21, 
              size=10,
              stroke=1,
              fill="black",
              colour="black") +
  
  # include jersey numbers in the center of all players' circles
  geom_text( aes(x = x, 
                 y = y,
                 group = nflId,
                 label=ifelse(is.na(jerseyNumber),"",jerseyNumber)), 
             colour="white", vjust=0.36, size=3.5) +
  
  # add informational text at the top of the plot
  geom_text(aes(x=-3, y=56, label="Traffic Meter Scale:"),
            colour="white",vjust=0,hjust=0) +
  geom_text(aes(x=38, y=56, label="Value:"),
            colour="white",vjust=0,hjust=0) +
  geom_text(aes(x=53, y=56, label="Distance traveled by receiver (yards):"),
            colour="white",vjust=0,hjust=0) +
  geom_text(aes(x=92.5, y=56, label="Speed (yards / sec):"),
            colour="white",vjust=0,hjust=0) +
  
  # draw an empty rectangle at the top of the plot, fill it in
  geom_rect(aes(xmin = 15, xmax = 35, ymin = 56, ymax = 57),
            color="black", fill="white") +
  geom_rect(data=df3 %>% filter(jerseyNumber==19), 
            aes(xmin = 15, xmax = 15 + width,
                ymin = 56, ymax = 57, alpha=alpha),
            color="black", fill="black") +
  
  # report out key metrics (mean dist to receiver)
  geom_text(data = df3%>% filter(jerseyNumber==19), 
            aes(x = 44.5, y = 56, 
                #label=format(round(100-mean_dist_to_19,2),nsmall=2),
                label = format( round( 100*(1-(mean_dist_to_19/113.33)) ,2), nsmall=2),
                vjust=0, hjust=0), 
            color="white") +
  
  # report out key metrics (total yards ran by receiver)
  geom_text(data = df3%>% filter(jerseyNumber==19), 
            aes(x = 85.5, y = 56, label=format(dist_running_total, nsmall=2),vjust=0,hjust=0), 
            color="white") +
  
  # report out key metrics (speed of receiver)
  geom_text(data = df3%>% filter(jerseyNumber==19), 
            aes(x = 111.9, y = 56, label=format(s, nsmall=2),vjust=0,hjust=0), 
            color="white") +
  
  # sketch the plot for each frameId
  transition_time(frameId) +
  
  # remove all color legends from the side of the plot
  guides(alpha = "none") 

# number of frames to animate
frames_to_display <- length(unique(df3$frameId))

# build the animation, specify fps, frame size 
animate(g, fps=10, nframe = frames_to_display, 
        width = 800, height = 380, 
        renderer = gifski_renderer())

# if you want to save the plot as a gif file
anim_save("out.gif")

Conclusion

In conclusion, I really enjoyed this project as it allowed me to think critically when determining what influences a traffic rating. Although my metric is not perfect, I learned how to make use of multivariate analysis in a new manner with dplyr in R. Potential changes / modifications could be: incorporating wind speed, returner speed relative to that of the opponents, and the exponential increase of the chance of success for every additional blocker near the returner. Alternativley to my current model, it could be recreated in a more complex manner incorporating player average speeds, wind direction, and matchup history.