Introduction

This is a draft of the code to create Rating a Punt Return.

I started out by filtering distances of team members on the offense (in increment of 5 and then settled for distances >20 uards ) from player 19 and selecting the direction of the play to “right”.

setwd("C:/Users/rande/Rprogram/IS470SportsAnalytics")
suppressWarnings(suppressMessages(library(data.table)))
suppressWarnings(suppressMessages(library(dplyr)))
suppressWarnings(suppressMessages(library(ggplot2)))
suppressWarnings(suppressMessages(library(gganimate)))
suppressWarnings(suppressMessages(library(gifski)))
library(httr)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(data.table)
library(tidyverse)
fileURL <- "https://www.dropbox.com/s/1omgytiw40crxos/nfl-big-data-bowl-2020.zip?raw=1"
zip_file <- "NFL2020.zip"

if (file.exists(zip_file)) {
  file.remove(zip_file)
}
## [1] TRUE
GET (url = fileURL, write_disk(zip_file))
## Response [https://uc0d2b8f42cba09e33d070f944cd.dl.dropboxusercontent.com/cd/0/inline2/BwXspXKKFzHWvOEMi-0ppFf9rP6ou24ULmxbb5-ApbdIjt8TO3ADLkYo_gYXSVnHlGpqI-YeaMJ6ffSkVZh2m1jkmJTHRVmZh_jldlIkwprecKi9uxeZCy1S8B25c6otn--doOoyeHbcng19CzzHDL9MH0JV-c1db6S2a_nTUh0rB8_0IhxoNN_rUdxpOmlcgKqVHr9vEQYJ6baei3CaZew4vp3o9XVDYCYrk7VJNXL1pqavpxE9BY09E3MnKAwI3DbXcoA1t82R0w6_evr8IaRtyT7-n_6T3s-pIYAhba3LJYhTDI9JNn7Md_dXswZ13w8jmR4zoHu9P6eVmJ1PJukeoaVZ6lO1tkhVEFMD1JhqhUJJ4IKOegpu_mlCXnwB-3Z1r8TLEyndyoofdX5DuU3TrfZNcypwuTmBcSirwt6tzQ/file]
##   Date: 2022-11-08 05:41
##   Status: 200
##   Content-Type: application/zip
##   Size: 821 MB
## <ON DISK>  C:\Users\rande\Documents\NFL2020.zip
unzip(zip_file)

player_tracking_df <- fread("tracking2020.csv")
plays_df <- fread("plays.csv")

Create function

player_stats<- function(pt_df,p_df, game_ID, play_ID, jersey){

df <- pt_df %>%
  filter(gameId %in% game_ID & playId %in% play_ID) %>%
  left_join(p_df, by = c("gameId" = "gameId", "playId" = "playId")) %>%
  select(gameId, playId, time, x, y, dis, nflId, jerseyNumber, team, 
         frameId, playDescription,s, playDirection, specialTeamsPlayType) %>%
  data.frame()


df19 <- df %>%
  select(gameId, playId, time, jerseyNumber, x, y, frameId, dis, s) %>%
  filter(jerseyNumber == jersey) %>%  ### you can also add the team name if there are two players with the same jersey number
  data.frame()


df2 <- left_join(df, df19, by = c("gameId" = "gameId",
                                  "playId" = "playId",
                                  "time" = "time", 
                                  "frameId" = "frameId")) %>%
  select(gameId, playId, time, x.x, y.x, nflId, jerseyNumber.x, 
         frameId, x.y, y.y,team, playDescription, dis.y, s.y, playDirection) %>%
  # compute the distance from player 19 to all opposing players, otherwise NA 
  mutate(dist = ifelse(team == "away", sqrt((x.x - x.y)^2 + (y.x - y.y)^2), NA )) %>%
  filter(dist >20 & playDirection %in% c("right"))%>%
  
  
   # clean up or rename variables
  select(-c("x.y","y.y")) %>%
  rename(x = x.x, 
         y = y.x, 
         s = s.y,
         jerseyNumber = jerseyNumber.x) %>%
  
  # find the average distance from player 19 to opposing players
  #113.33 = dist on field from lower left corner to upper right corner
  #100 yards long and 53.3333 wide or 120 feet wide
  group_by(gameId, playId, frameId) %>%  
  mutate(mean_dist_to_19 = mean(dist, na.rm = TRUE),
         alpha = 1 - (mean_dist_to_19/113.33) ) %>%
  ungroup() %>%         
  
  # compute the total distance traveled by player 19
  arrange(jerseyNumber, time, frameId) %>%
  group_by(gameId, playId, jerseyNumber) %>% 
  mutate(dist_running_total = cumsum(dis.y)) %>%
  
  # create a width for a moving bar 
  mutate(width = round(  20*(1-(mean_dist_to_19/113.33))  ,0) ) %>%
  
  # create each play level stats for the receiver
  group_by(gameId, playId) %>%
  mutate(play_stat = mean(unique(mean_dist_to_19))) %>%
  ungroup() %>%
  
  # create each game level stats for the receiver
  group_by(gameId) %>%
  mutate(game_stat = mean(unique(play_stat))) %>%
  ungroup() %>%
  
  # create each player stat over all plays and all games
  mutate(player_stat = mean(unique(game_stat))) %>%
  
  # create a new data frame
  data.frame() 

  #print out play, game, and player stats
  result1 <-df2 %>% select(gameId, playId, play_stat,  playDirection) %>% distinct %>% data.frame()
  result2 <-df2 %>% select(gameId, game_stat) %>% distinct %>% data.frame()
  result3<-df2 %>% select(player_stat) %>% distinct %>% data.frame()
  return(list(result1,result2, result3, df2))
}
out<-player_stats(player_tracking_df,
                  plays_df, 
                  c("2021010300"),
                  c("1586", "395", "1232", "3930"),
                  19 )
my_df<-out[[4]]
my_df$playId <- as.factor(my_df$playId)

ggplot(my_df%>% filter(team=="away"), aes(x=dist, group=playId, color=playId, fill=playId))+
  geom_histogram(binwidth = 2, aes(y=..density..), color="black", fill="white")+
  geom_density(alpha=0.6)+  
  facet_wrap(~playId, scales="free")

players_df <-fread("players.csv")
  
jersey_df<-  player_tracking_df%>%
  select(nflId, jerseyNumber)%>%
  filter(!is.na(jerseyNumber))%>%
  distinct()%>%
  data.frame()

View(jersey_df)
View(plays_df)
View(players_df)
punt_kickoff_df<- plays_df%>%
  filter(specialTeamsPlayType %in% c("Punt", "Kickoff"),
         tolower(playDescription) %like% "touchdown",
         !tolower(playDescription) %like% "nullified", 
         !tolower(playDescription) %like% "recovered",
         !is.na(returnerId))%>%
  mutate(returnerId = as.integer(returnerId))%>%
  filter(gameId %in% player_tracking_df$gameId)%>%
  left_join(jersey_df, by = c("returnerId"= "nflId"))%>%
  left_join(players_df, by = c("returnerId"="nflId"))%>%
  data.frame()
summary_df <- punt_kickoff_df %>%
  select(displayName, jerseyNumber, gameId, playId)%>%
  group_by(displayName, jerseyNumber)%>%
  summarise(games=as.vector(paste(unique(gameId), collapse=",")),
            plays=as.vector(paste(playId, collapse="," )),
            .groups="keep" 
            )%>%
  data.frame()