setwd("C:/Users/17169/OneDrive - Loyola University Maryland/IS470SportsAnalytics/Data")
# ---------- supplied functions start here -----------------------------------------------------------------
# ---------------------------------------------------------------------------
# --------------------------- Load Libraries --------------------------------
# ---------------------------------------------------------------------------
# supply a vector of desired packages. Packages will be installed if not
# already installed and available to RStudio
load_packages <- function(packages = c()) {
if (length(packages) == 0) {
print('You did not specify any packages/libraries to load')
}
else {
for (i in packages){
if(! i %in% installed.packages()){
install.packages(i, dependencies = TRUE)
}
suppressMessages(suppressWarnings(library(i, character.only=T)))
}
}
}
# -----------------------------------------------------------------------------------------------
# ------------ Checks for a valid playId and gameId; optionally check for valid frameId ---------
# -----------------------------------------------------------------------------------------------
check_playId_gameId <- function(game_Id,
play_Id,
frame_Id = c(),
data) {
if (!game_Id %in% unique(data$gameId)) {
return("the game_Id you have used is not in the data frame.")
}
if (!play_Id %in% unique(data$playId)) {
return("the play_Id you have used is not in the data frame.")
}
if(!missing(frame_Id)) {
if (!frame_Id %in% unique(data$frameId)) {
return("the frame_Id you have used is not in the data frame.")
}
}
return("ok")
}
# ---------------------------------------------------------------------------
# ------------------ Visualize an Individual Play (with player) -------------
# ---------------------------------------------------------------------------
# provide a merged data frame, a game Id, a play Id, and a player.
visualize_play <- function(df,
df_team1,
df_team2,
playerNumber = NA)
{
source('https://raw.githubusercontent.com/mlfurman3/gg_field/main/gg_field.R')
g <- ggplot(data = df, aes(x = x, y = y)) +
# customize colors, shapes, and sizes of players and the football
scale_size_manual(values = c(6, 4, 6), guide = "none") +
scale_shape_manual(values = c(21, 16, 21), guide = "none") +
scale_fill_manual(values = c("dodgerblue1", "#663300", "firebrick1"), guide = "none") +
scale_colour_manual(values = c("black", "#663300", "black"), guide = "none") +
gg_field(yardmin = -5, yardmax = 125) +
# add home team logo to the center point
geom_image(data = df_team1, aes(x = 59, y = 53.33/2, image = team_logo_wikipedia), size = 0.15) +
# add end zone team names
annotate("text", x = 4, y = 27, label = df_team2$team_name, angle = 90, size = 5, fontface = 2, color = "white") +
annotate("text", x = 115, y = 27, label = df_team1$team_name, angle = 270, size = 5, fontface = 2, color = "white") +
# add informational text at the top of the field
annotate("text", x = -3, y = 56, label = "Traffic Meter Scale:", colour = "white", vjust = 0, hjust = 0 ) +
annotate("text", x = 38, y = 56, label = "Value:" , colour = "white", vjust = 0, hjust = 0 ) +
annotate("text", x = 52, y = 56, label = "Distance traveled by receiver (yards):", colour = "white", vjust = 0, hjust = 0 ) +
annotate("text", x = 92.5, y = 56, label = "Speed (yards / sec):", colour = "white", vjust = 0, hjust = 0 ) +
# add convex hulls for both teams
geom_encircle( data = df %>% filter(team != "football"),
aes(fill = team,
group = team,
color = team),
s_shape = 1,
expand = 0,
alpha = 0.5,
show.legend = TRUE) +
# add halo around receiver
geom_point(data = df %>% filter(jerseyNumber == playerNumber),
aes(x = x, y = y, alpha = alpha),
shape = 21,
size = 10,
stroke = 1, # width of the circle border
fill = "black",
colour = "black") +
# add points to plot for all players and the football
geom_point(data = df, aes(x = x, y = y, shape = team, colour = team, size = team, fill = team) ) +
# insert jersey number for each player
geom_text( data = df %>% filter(team != "football"),
aes(x = x, y = y,
label = jerseyNumber), colour = "white", size = 3.5, vjust = 0.36 ) +
# report out key metrics (total yards ran by receiver)
geom_text( data = df %>% filter(jerseyNumber == playerNumber),
aes(x = 85.5, y = 56, label = format(dist_running_total, nsmall = 2),
vjust = 0, hjust = 0), color = "white" ) +
# report out key metrics (mean dist to player)
geom_text( data = df %>% filter(jerseyNumber == playerNumber),
aes(x = 44.5, y = 56, label = format(round(100 * (1 - (mean_dist_to_player/113.33)),2), nsmall = 2),
vjust = 0, hjust = 0), color = "white" ) +
# report out key metrics (speed of receiver)
geom_text( data = df %>% filter(jerseyNumber == playerNumber),
aes(x = 111.9, y = 56, label = s, vjust = 0, hjust = 0), color = "white" ) +
# add some labels to report the play description
labs(title = df$playDescription) +
# set the theme to dark green to color the areas beyond the end zones
theme(panel.background = element_rect(fill = "forestgreen",
color = "forestgreen"), panel.grid = element_blank()) +
annotate("rect", xmin = 15, xmax = 35, ymin = 56, ymax = 57, color = "black", fill = "white") +
geom_rect(data = df %>% filter(jerseyNumber == playerNumber),
aes(xmin = 15, xmax = 15 + (alpha*20), ymin = 56, ymax = 57,
alpha = alpha), color = "black", fill = "black" ) +
guides(alpha = "none") +
transition_time(frameId)
g
}
load_packages(c("ggplot2", "ggalt", "ggforce", "hms", "gganimate", "data.table", "dplyr", "nflfastR", "gifski", "png", "ggimage"))
df_tracking <- fread("NFLBDB2022/NFL2022/tracking2020.csv")
df_plays <- fread("NFLBDB2022/NFL2022/plays.csv")
df_games <- fread("NFLBDB2022/NFL2022/games.csv")
df_players <- fread("NFLBDB2022/NFL2022/players.csv")
# you are creating a metric for a player in a play (playId) in a game (gameId)
my_gameId <- 2021010300
my_playId <- 1586
my_playerNumber <- 19
df <- df_tracking %>%
filter(gameId == my_gameId & playId == my_playId ) %>%
left_join(df_plays, by = c("playId" = "playId", "gameId" = "gameId")) %>%
select(x, y, displayName, jerseyNumber, team, gameId, playId, frameId, time,
nflId, dis, playDescription, s) %>%
data.frame()
team1 <- as.character( df_games[df_games$gameId == my_gameId, "homeTeamAbbr"] )
team2 <- as.character( df_games[df_games$gameId == my_gameId, "visitorTeamAbbr"] )
df_team1 <- teams_colors_logos %>%
filter(team_abbr == team1)
df_team2 <- teams_colors_logos %>%
filter(team_abbr == team2)
df_player1 <- df %>%
filter(jerseyNumber == my_playerNumber) %>%
select(gameId, playId, frameId, x, y, dis, team) %>%
data.frame()
df <- df %>%
left_join(df_player1, by = c( "playId" = "playId", "gameId" = "gameId", "frameId" = "frameId")) %>%
# ----------------------------- here is the code that I created in class -----------------------------
# compute the distance to each player on the opposing team (not to your teammates and not to the ball)
mutate(dist = ifelse( team.x != team.y & team.x != "football",
sqrt( (x.x - x.y)^2 + (y.x - y.y)^2),
NA)) %>%
group_by(gameId, playId, frameId) %>%
# compute the average of all distances
mutate(mean_dist_to_player = mean(dist, na.rm = TRUE ),
alpha = 1 - (mean_dist_to_player/113.33) ) %>%
ungroup()
For this project we looked to enhance the original varible metric, ‘mean_dist_to_player’, by starting with only looking to players that were closest to the return player. We first did this by creating 2 variables, closest2_distance, which was a variable that we used to check the distance of the second closest player, and closest_player which categorized the 2 closest players to the returner as 1, and all other defenders as 0. Following this we created a final variable average_to_closest_2_players which would find the average between the closest 2 players to the return man. To compare our results with the original metric, we used the cor.test function to messure the correlation
df2 <- df%>%
arrange(frameId, dist)%>%
group_by(frameId, team.x == "away")%>%
mutate(closest2_distance = nth(dist, 2 , order_by = dist),
closest_players = ifelse(dist <= closest2_distance, 1, 0) )%>%
ungroup()%>%
group_by(frameId,team.x == "away", closest_players)%>%
mutate(average_to_closest_2_players = mean(dist))%>%
ungroup()%>%
mutate(average_to_closest_2_players = ifelse(closest_players == 0, NA, average_to_closest_2_players))%>%
data.frame()
correlation_test = cor.test(df$mean_dist_to_player, df2$average_to_closest_2_players, na.rm = T)
correlation_test
##
## Pearson's product-moment correlation
##
## data: df$mean_dist_to_player and df2$average_to_closest_2_players
## t = 0.051073, df = 460, p-value = 0.9593
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08886694 0.09358988
## sample estimates:
## cor
## 0.002381285
library(ggplot2)
theme_set(theme_classic())
df_filtered <- df2 %>%
filter(!is.na(average_to_closest_2_players))
# Comparison Line Chart
ggplot(df_filtered, aes(x = frameId)) +
geom_line(aes(y = mean_dist_to_player, color = "Mean Distance to Player"), size = 2) +
geom_line(aes(y = average_to_closest_2_players, color = "Average Distance of the Closest 2 Defenders"), size = 2) +
scale_color_manual(values = c("Mean Distance to Player" = "blue", "Average Distance of the Closest 2 Defenders" = "green")) +
labs(title = "Comparison of Mean Distance to Player and Average to Closest 2 Players",
x = "Frame ID",
y = "Distance (In Yards",
color = "Legend") +
theme_minimal()+
theme(plot.title = element_text(hjust = .5))
We created a duel line graph to examine the two variables side by side, analyzing the areas of similarity and the areas of difference. The graph compares the average_to_closest_2_players with mean_dist_to_player, to highlight how the proximity to the nearest players is very similar to the average distance across all players. We also ran a t-test which put out a strong p-value of 0.9593, indicating that the relationship between these metrics was statistically significant. This data shows the variable could be meaningful for analyzing player positioning during a return.