Introduction

To determine the traffic volume that Isaiah McKenzie went through during the specified punt play where he returned it for a touchdown, we created a metric that calculates the players within a 10 yard radius of McKenzie to describe at each frame how much traffic he was evading.

Description of Project

For this project, we decided to display the metric two different ways. We chose to display the metric numerically based on how many defensive players entered a 10 yard area around Isaiah. We created a subset of the data for every frame id and collected defensive players around McKenzie at that frame.Using the amount of players inside the 10 yard radius for each frame, it was divided by the amount of defensive players on the field, which would be 11. The quotient of these will provide us with our metric. We changed the animation function given by Paul Tallon to create a color scale for defensive players where they begin the play at light blue when none of them are near McKenzie. As the play goes on and defensive players get closer to tackling McKenzie, their circles begin to get darker almost being black when McKenzie is surrounded by defenders. Isaiah McKenzie’s teammates will always stay red because they are not a threat to tackle him.

Data Visualization

For our data visualization, we animated the play while displaying our metric “Traffic Volume” at the bottom of the screen with the value of traffic Isaiah McKenzie is facing in each frame in the play. We also changed the color of defenders as the play goes on from a light blue to a dark color as defenders begin to enclose on McKenzie.

setwd("/Users/jose/Desktop/Data Viz")
 
 
# ---------- supplied functions start here -----------------------------------------------------------------
 
 
# ---------------------------------------------------------------------------
# --------------------------- Load Libraries --------------------------------
# ---------------------------------------------------------------------------
# supply a vector of desired packages. Packages will be installed if not
# already installed and available to RStudio
 
 
load_packages <- function(packages = c()) {
  if (length(packages) == 0) {
    print('You did not specify any packages/libraries to load')
  }
  else {
    for (i in packages){
      if(! i %in% installed.packages()){
        install.packages(i, dependencies = TRUE)
      }
      suppressMessages(suppressWarnings(library(i, character.only=T)))
    }
  }
}
 
 
 
 
# -----------------------------------------------------------------------------------------------
# ------------ Checks for a valid playId and gameId; optionally check for valid frameId ---------
# -----------------------------------------------------------------------------------------------
check_playId_gameId <- function(game_Id,
                                play_Id,
                                frame_Id = c(),
                                data) {
  if (!game_Id %in% unique(data$gameId)) {
    return("the game_Id you have used is not in the data frame.")
  }
  if (!play_Id %in% unique(data$playId)) {
    return("the play_Id you have used is not in the data frame.")
  }
  if(!missing(frame_Id)) {
    if (!frame_Id %in% unique(data$frameId)) {
      return("the frame_Id you have used is not in the data frame.")
    }
  } 
  return("ok")
}
# provide a merged data frame, a game Id, a play Id, and a player.
visualize_play <- function(df,
                           df_team1,
                           df_team2,
                           playerNumber = NA)
{
 
  source('https://raw.githubusercontent.com/mlfurman3/gg_field/main/gg_field.R')
 
  
  g <- ggplot(data = df, aes(x = x, y = y)) +
   
    # customize colors, shapes, and sizes of players and the football
    scale_size_manual(values = c(6, 4, 6), guide = "none") +
    scale_shape_manual(values = c(21, 16, 21), guide = "none") +
    scale_fill_gradient(low = "lightblue", high = "black", limits = c(0, 60), guide = "none") +
    scale_colour_manual(values = c("black", "#663300", "black"), guide = "none") +
   
    gg_field(yardmin = -5, yardmax = 125) +
   
    # add home team logo to the center point
    geom_image(data = df_team1, aes(x = 59, y = 53.33/2, image = team_logo_wikipedia), size = 0.15) +
   
    # add end zone team names
    annotate("text", x = 4, y = 27, label = df_team2$team_name, angle = 90, size = 5, fontface = 2, color = "white") +
    annotate("text", x = 115, y = 27, label = df_team1$team_name, angle = 270, size = 5, fontface = 2, color = "white") +
   
    # add informational text at the top of the field
    annotate("text", x = -3, y = 56, label = "Traffic Meter Scale:", colour = "white", vjust = 0, hjust = 0 ) +
    annotate("text", x = 38, y = 56, label = "Value:"              , colour = "white", vjust = 0, hjust = 0 ) +
    annotate("text", x = 52, y = 56, label = "Distance traveled by receiver (yards):", colour = "white", vjust = 0, hjust = 0 ) +
    annotate("text", x = 92.5, y = 56, label = "Speed (yards / sec):", colour = "white", vjust = 0, hjust = 0 ) +
   
    # -------New Information from the new metric----------
    annotate("text", x = 38, y = -2, label = "Traffic Volume:", colour = "white", vjust = 0, hjust = 0 ) +
   
    
    # add halo around receiver
    geom_point(data = df %>% filter(jerseyNumber == playerNumber),
               aes(x = x, y = y, alpha = alpha),
               shape = 21,
               size = 10,
               stroke = 1, # width of the circle border
               fill = "black",
               colour = "black") +
   
    # --------The following goem points are to differentiate between away and home teams. Only the away team changes color when they approach the Qb--------
    geom_point(data = df %>% filter(team == "away"),
               aes(x = x, y = y, shape = team, colour = team, size = team, fill = metric_of_players_in_frame)) +
    geom_point(data = df %>% filter(team != "away"),
               aes(x = x, y = y, shape = team, colour = team, size = team), fill = "firebrick1") +
   
    # insert jersey number for each player
    geom_text( data = df %>% filter(team != "football"),
               aes(x = x, y = y,
                   label = jerseyNumber), colour = "white", size = 3.5, vjust = 0.36 ) +
   
    # --------report out NEW KEY METRIC-----------
    geom_text( data = df %>% filter(jerseyNumber == playerNumber),
               aes(x = 60, y = -2, label = round(metric_of_players_in_frame, digits = 2),
                   vjust = 0, hjust = 0), color = "white" ) +
   
    # report out key metrics (total yards ran by receiver)
    geom_text( data = df %>% filter(jerseyNumber == playerNumber),
               aes(x = 85.5, y = 56, label = format(dist_running_total, nsmall = 2),
                   vjust = 0, hjust = 0), color = "white" ) +
   
    # report out key metrics (mean dist to player)
    geom_text( data = df %>% filter(jerseyNumber == playerNumber),
               aes(x = 44.5, y = 56, label = format(round(100 * (1 - (mean_dist_to_player/113.33)),2), nsmall = 2),
                   vjust = 0, hjust = 0), color = "white" ) +
   
    # report out key metrics (speed of receiver)
    geom_text( data = df %>% filter(jerseyNumber == playerNumber),
               aes(x = 111.9, y = 56, label = s, vjust = 0, hjust = 0), color = "white" ) +
   
    # add some labels to report the play description
    labs(title = df$playDescription) +
   
    # set the theme to dark green to color the areas beyond the end zones
    theme(panel.background = element_rect(fill = "forestgreen",
                                          color = "forestgreen"), panel.grid = element_blank()) +
    annotate("rect", xmin = 15, xmax = 35, ymin = 56, ymax = 57, color = "black", fill = "white") +
   
    geom_rect(data = df %>% filter(jerseyNumber == playerNumber),
              aes(xmin = 15, xmax = 15 + (alpha*20), ymin = 56, ymax = 57,
                  alpha = alpha), color = "black", fill = "black" ) +
    guides(alpha = "none") +
    transition_time(frameId)
 
  
  g
 
  
}
# ----------  main code starts here ----------------------------------------------------------------------
 
load_packages(c("ggplot2", "ggalt", "ggforce", "hms", "gganimate", "lubridate", "data.table", "dplyr", "nflfastR", "gifski", "png", "ggimage"))
 
df_tracking <- fread("Data/NFLBDB2022/NFL2022/tracking2020.csv")
df_plays    <- fread("Data/NFLBDB2022/NFL2022/plays.csv")
df_games    <- fread("Data/NFLBDB2022/NFL2022/games.csv")
 
 
# you are creating a metric for a player in a play (playId) in a game (gameId)
my_gameId <- 2021010300
my_playId <- 1586
my_playerNumber <- 19
df <- df_tracking %>%
  filter(gameId == my_gameId & playId == my_playId ) %>%
  left_join(df_plays, by = c("playId" = "playId", "gameId" = "gameId")) %>%
  select(x, y, displayName, jerseyNumber, team, gameId, playId, frameId, time,
         nflId, dis, playDescription, s) %>%
  data.frame()
 
team1 <- as.character(  df_games[df_games$gameId == my_gameId, "homeTeamAbbr"]  )
team2 <- as.character(  df_games[df_games$gameId == my_gameId, "visitorTeamAbbr"]  )
 
 
df_team1 <- teams_colors_logos %>%
  filter(team_abbr == team1)
 
 
df_team2 <- teams_colors_logos %>%
  filter(team_abbr == team2)
 
 
df_player1 <- df %>%
  filter(jerseyNumber == my_playerNumber) %>%
  select(gameId, playId, frameId, x, y, dis, team) %>%
  data.frame()
 
# ------------This is the original DF created in class -----------
df <- df %>%
  left_join(df_player1, by = c( "playId" = "playId", "gameId" = "gameId", "frameId" = "frameId")) %>%
  mutate(dist = ifelse( team.x != team.y & team.x != "football",
                       sqrt( (x.x - x.y)^2 + (y.x - y.y)^2),
                       NA)) %>%
  group_by(gameId, playId, frameId) %>%
  mutate(mean_dist_to_player = mean(dist, na.rm = TRUE ),
         alpha = 1 - (mean_dist_to_player/113.33)  ) %>%
  ungroup() %>%
  arrange(nflId, frameId) %>%
  group_by(nflId) %>%
  mutate( dist_running_total = cumsum(dis.x)) %>%
  rename( "x" = "x.x",
          "y" = "y.x",
          "team" = "team.x",
          "dis" = "dis.x") %>%
  mutate(inside = ifelse(dist < 10 & team != "home", 1, 0)) %>%
  data.frame()
#--------This is a subset of the data where we counted defensive players in the danger zone---------
# From the count of inside, we provided a proportion using a quotient to get a percentage of players in the zone------
df1 <- df %>%
  filter(inside == 1) %>%
  select(dist, inside, team, frameId) %>%
  group_by(frameId) %>%
  summarize(metric_of_players_in_frame = ((n() / 11) * 100)) %>%
  data.frame()
#-------- DF2 is the joined version of DF, this creates a column of the metric for each frame of the play -----------
df2 <- df %>%
  left_join(df1, by = c("frameId"= "frameId")) %>%
  data.frame()
 
# Replace NA with 0 using ifelse
df2$metric_of_players_in_frame <- ifelse(is.na(df2$metric_of_players_in_frame), 0, df2$metric_of_players_in_frame)
# run the line below if you want to visualize the play
visualize_play(df2, df_team1, df_team2, playerNumber = 19)