setwd("C:/Users/17169/OneDrive - Loyola University Maryland/IS470SportsAnalytics/Data")


# ---------- supplied functions start here -----------------------------------------------------------------


# ---------------------------------------------------------------------------
# --------------------------- Load Libraries --------------------------------
# ---------------------------------------------------------------------------
# supply a vector of desired packages. Packages will be installed if not
# already installed and available to RStudio


load_packages <- function(packages = c()) {
  if (length(packages) == 0) {
    print('You did not specify any packages/libraries to load')
  }
  else {
    for (i in packages){
      if(! i %in% installed.packages()){
        install.packages(i, dependencies = TRUE)
      }
      suppressMessages(suppressWarnings(library(i, character.only=T)))
    }
  }
}




# -----------------------------------------------------------------------------------------------
# ------------ Checks for a valid playId and gameId; optionally check for valid frameId ---------
# -----------------------------------------------------------------------------------------------
check_playId_gameId <- function(game_Id, 
                                play_Id, 
                                frame_Id = c(), 
                                data) {
  if (!game_Id %in% unique(data$gameId)) {
    return("the game_Id you have used is not in the data frame.")
  }
  if (!play_Id %in% unique(data$playId)) {
    return("the play_Id you have used is not in the data frame.")
  }
  if(!missing(frame_Id)) {
    if (!frame_Id %in% unique(data$frameId)) {
      return("the frame_Id you have used is not in the data frame.")
    } 
  }  
  return("ok")
}




# ---------------------------------------------------------------------------
# ------------------ Visualize an Individual Play (with player) -------------
# ---------------------------------------------------------------------------
# provide a merged data frame, a game Id, a play Id, and a player.
visualize_play <- function(df,
                           df_team1,
                           df_team2, 
                           playerNumber = NA) 
{
  
  source('https://raw.githubusercontent.com/mlfurman3/gg_field/main/gg_field.R')
  
  
  g <- ggplot(data = df, aes(x = x, y = y)) +
    
    # customize colors, shapes, and sizes of players and the football
    scale_size_manual(values = c(6, 4, 6), guide = "none") +
    scale_shape_manual(values = c(21, 16, 21), guide = "none") +
    scale_fill_manual(values = c("dodgerblue1", "#663300", "firebrick1"), guide = "none") +
    scale_colour_manual(values = c("black", "#663300", "black"), guide = "none") +
    
    gg_field(yardmin = -5, yardmax = 125) +
    
    # add home team logo to the center point
    geom_image(data = df_team1, aes(x = 59, y = 53.33/2, image = team_logo_wikipedia), size = 0.15) +
    
    # add end zone team names
    annotate("text", x = 4, y = 27, label = df_team2$team_name, angle = 90, size = 5, fontface = 2, color = "white") +
    annotate("text", x = 115, y = 27, label = df_team1$team_name, angle = 270, size = 5, fontface = 2, color = "white") +
    
    # add informational text at the top of the field
    annotate("text", x = -3, y = 56, label = "Traffic Meter Scale:", colour = "white", vjust = 0, hjust = 0 ) +
    annotate("text", x = 38, y = 56, label = "Value:"              , colour = "white", vjust = 0, hjust = 0 ) +
    annotate("text", x = 52, y = 56, label = "Distance traveled by receiver (yards):", colour = "white", vjust = 0, hjust = 0 ) +
    annotate("text", x = 92.5, y = 56, label = "Speed (yards / sec):", colour = "white", vjust = 0, hjust = 0 ) +
    
    # add convex hulls for both teams
    geom_encircle( data = df %>% filter(team != "football"), 
                   aes(fill = team,
                       group = team,
                       color = team),
                   s_shape = 1,
                   expand = 0,
                   alpha = 0.5,
                   show.legend = TRUE) +
    
    # add halo around receiver
    geom_point(data = df %>% filter(jerseyNumber == playerNumber),
               aes(x = x, y = y, alpha = alpha), 
               shape = 21,
               size = 10,
               stroke = 1, # width of the circle border
               fill = "black",
               colour = "black") +
    
    # add points to plot for all players and the football
    geom_point(data = df, aes(x = x, y = y, shape = team, colour = team, size = team, fill = team) ) +
    
    # insert jersey number for each player
    geom_text( data = df %>% filter(team != "football"),
               aes(x = x, y = y, 
                   label = jerseyNumber), colour = "white", size = 3.5, vjust = 0.36 ) +
    
    # report out key metrics (total yards ran by receiver)
    geom_text( data = df %>% filter(jerseyNumber == playerNumber),
               aes(x = 85.5, y = 56, label = format(dist_running_total, nsmall = 2),
                   vjust = 0, hjust = 0), color = "white" ) +
    
    # report out key metrics (mean dist to player)
    geom_text( data = df %>% filter(jerseyNumber == playerNumber),
               aes(x = 44.5, y = 56, label = format(round(100 * (1 - (mean_dist_to_player/113.33)),2), nsmall = 2),
                   vjust = 0, hjust = 0), color = "white" ) +
    
    # report out key metrics (speed of receiver)
    geom_text( data = df %>% filter(jerseyNumber == playerNumber),
               aes(x = 111.9, y = 56, label = s, vjust = 0, hjust = 0), color = "white" ) +
    
    # add some labels to report the play description
    labs(title = df$playDescription) +
    
    # set the theme to dark green to color the areas beyond the end zones
    theme(panel.background = element_rect(fill = "forestgreen", 
                                          color = "forestgreen"), panel.grid = element_blank()) +
    annotate("rect", xmin = 15, xmax = 35, ymin = 56, ymax = 57, color = "black", fill = "white") +
    
    geom_rect(data = df %>% filter(jerseyNumber == playerNumber),
              aes(xmin = 15, xmax = 15 + (alpha*20), ymin = 56, ymax = 57, 
                  alpha = alpha), color = "black", fill = "black" ) +
    guides(alpha = "none") +
    transition_time(frameId)
  
  
  g
  
  
}

load_packages(c("ggplot2", "ggalt", "ggforce", "hms", "gganimate", "data.table", "dplyr", "nflfastR", "gifski", "png", "ggimage"))

df_tracking <- fread("NFLBDB2022/NFL2022/tracking2020.csv")
df_plays    <- fread("NFLBDB2022/NFL2022/plays.csv")
df_games    <- fread("NFLBDB2022/NFL2022/games.csv")
df_players  <- fread("NFLBDB2022/NFL2022/players.csv")



# you are creating a metric for a player in a play (playId) in a game (gameId)
my_gameId <- 2021010300
my_playId <- 1586 
my_playerNumber <- 19


df <- df_tracking %>%
  filter(gameId == my_gameId & playId == my_playId ) %>%
  left_join(df_plays, by = c("playId" = "playId", "gameId" = "gameId")) %>%
  select(x, y, displayName, jerseyNumber, team, gameId, playId, frameId, time, 
         nflId, dis, playDescription, s) %>%
  data.frame()


team1 <- as.character(  df_games[df_games$gameId == my_gameId, "homeTeamAbbr"]  )
team2 <- as.character(  df_games[df_games$gameId == my_gameId, "visitorTeamAbbr"]  )


df_team1 <- teams_colors_logos %>%
  filter(team_abbr == team1)


df_team2 <- teams_colors_logos %>%
  filter(team_abbr == team2)


df_player1 <- df %>%
  filter(jerseyNumber == my_playerNumber) %>%
  select(gameId, playId, frameId, x, y, dis, team) %>%
  data.frame()


df <- df %>%
  left_join(df_player1, by = c( "playId" = "playId", "gameId" = "gameId", "frameId" = "frameId")) %>%
  
  # ----------------------------- here is the code that I created in class -----------------------------
# compute the distance to each player on the opposing team (not to your teammates and not to the ball)


  mutate(dist = ifelse( team.x != team.y & team.x != "football", 
                      sqrt( (x.x - x.y)^2 + (y.x - y.y)^2), 
                      NA)) %>%
  
  group_by(gameId, playId, frameId) %>%
  
  # compute the average of all distances
  mutate(mean_dist_to_player = mean(dist, na.rm = TRUE ),
         alpha = 1 - (mean_dist_to_player/113.33)  ) %>%
  ungroup()

Description of Project

For this project we looked to enhance the original varible metric, ‘mean_dist_to_player’, by starting with only looking to players that were closest to the return player. We first did this by creating 2 variables, closest2_distance, which was a variable that we used to check the distance of the second closest player, and closest_player which categorized the 2 closest players to the returner as 1, and all other defenders as 0. Following this we created a final variable average_to_closest_2_players which would find the average between the closest 2 players to the return man. To compare our results with the original metric, we used the cor.test function to messure the correlation

df2 <- df%>%
  arrange(frameId, dist)%>%
  group_by(frameId, team.x == "away")%>%
  mutate(closest2_distance = nth(dist, 2 , order_by = dist), 
         closest_players = ifelse(dist <= closest2_distance, 1, 0) )%>%
  
  ungroup()%>%
  group_by(frameId,team.x == "away", closest_players)%>%
  mutate(average_to_closest_2_players = mean(dist))%>%
  ungroup()%>%
  mutate(average_to_closest_2_players = ifelse(closest_players == 0, NA, average_to_closest_2_players))%>%
  data.frame()



correlation_test = cor.test(df$mean_dist_to_player, df2$average_to_closest_2_players, na.rm = T)
correlation_test
## 
##  Pearson's product-moment correlation
## 
## data:  df$mean_dist_to_player and df2$average_to_closest_2_players
## t = 0.051073, df = 460, p-value = 0.9593
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08886694  0.09358988
## sample estimates:
##         cor 
## 0.002381285
library(ggplot2)
theme_set(theme_classic())

df_filtered <- df2 %>%
  filter(!is.na(average_to_closest_2_players))
# Comparison Line Chart
ggplot(df_filtered, aes(x = frameId)) +
  geom_line(aes(y = mean_dist_to_player, color = "Mean Distance to Player"), size = 2) +
  geom_line(aes(y = average_to_closest_2_players, color = "Average Distance of the Closest 2 Defenders"), size = 2) +
  scale_color_manual(values = c("Mean Distance to Player" = "blue", "Average Distance of the Closest 2 Defenders" = "green")) +
  labs(title = "Comparison of Mean Distance to Player and Average to Closest 2 Players",
       x = "Frame ID",
       y = "Distance (In Yards",
       color = "Legend") +
  theme_minimal()+
  theme(plot.title = element_text(hjust = .5))


Conclusion

We created a duel line graph to examine the two variables side by side, analyzing the areas of similarity and the areas of difference. The graph compares the average_to_closest_2_players with mean_dist_to_player, to highlight how the proximity to the nearest players is very similar to the average distance across all players. We also ran a t-test which put out a strong p-value of 0.9593, indicating that the relationship between these metrics was statistically significant. This data shows the variable could be meaningful for analyzing player positioning during a return.