This is a draft of the code to create Rating a Punt Return.
I started out by filtering distances of team members on the offense (in increment of 5 and then settled for distances >20 uards ) from player 19 and selecting the direction of the play to “right”.
setwd("C:/Users/rande/Rprogram/IS470SportsAnalytics")
suppressWarnings(suppressMessages(library(data.table)))
suppressWarnings(suppressMessages(library(dplyr)))
suppressWarnings(suppressMessages(library(ggplot2)))
suppressWarnings(suppressMessages(library(gganimate)))
suppressWarnings(suppressMessages(library(gifski)))
library(httr)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(data.table)
library(tidyverse)
fileURL <- "https://www.dropbox.com/s/1omgytiw40crxos/nfl-big-data-bowl-2020.zip?raw=1"
zip_file <- "NFL2020.zip"
if (file.exists(zip_file)) {
file.remove(zip_file)
}
## [1] TRUE
GET (url = fileURL, write_disk(zip_file))
## Response [https://uc0d2b8f42cba09e33d070f944cd.dl.dropboxusercontent.com/cd/0/inline2/BwXspXKKFzHWvOEMi-0ppFf9rP6ou24ULmxbb5-ApbdIjt8TO3ADLkYo_gYXSVnHlGpqI-YeaMJ6ffSkVZh2m1jkmJTHRVmZh_jldlIkwprecKi9uxeZCy1S8B25c6otn--doOoyeHbcng19CzzHDL9MH0JV-c1db6S2a_nTUh0rB8_0IhxoNN_rUdxpOmlcgKqVHr9vEQYJ6baei3CaZew4vp3o9XVDYCYrk7VJNXL1pqavpxE9BY09E3MnKAwI3DbXcoA1t82R0w6_evr8IaRtyT7-n_6T3s-pIYAhba3LJYhTDI9JNn7Md_dXswZ13w8jmR4zoHu9P6eVmJ1PJukeoaVZ6lO1tkhVEFMD1JhqhUJJ4IKOegpu_mlCXnwB-3Z1r8TLEyndyoofdX5DuU3TrfZNcypwuTmBcSirwt6tzQ/file]
## Date: 2022-11-08 05:41
## Status: 200
## Content-Type: application/zip
## Size: 821 MB
## <ON DISK> C:\Users\rande\Documents\NFL2020.zip
unzip(zip_file)
player_tracking_df <- fread("tracking2020.csv")
plays_df <- fread("plays.csv")
player_stats<- function(pt_df,p_df, game_ID, play_ID, jersey){
df <- pt_df %>%
filter(gameId %in% game_ID & playId %in% play_ID) %>%
left_join(p_df, by = c("gameId" = "gameId", "playId" = "playId")) %>%
select(gameId, playId, time, x, y, dis, nflId, jerseyNumber, team,
frameId, playDescription,s, playDirection, specialTeamsPlayType) %>%
data.frame()
df19 <- df %>%
select(gameId, playId, time, jerseyNumber, x, y, frameId, dis, s) %>%
filter(jerseyNumber == jersey) %>% ### you can also add the team name if there are two players with the same jersey number
data.frame()
df2 <- left_join(df, df19, by = c("gameId" = "gameId",
"playId" = "playId",
"time" = "time",
"frameId" = "frameId")) %>%
select(gameId, playId, time, x.x, y.x, nflId, jerseyNumber.x,
frameId, x.y, y.y,team, playDescription, dis.y, s.y, playDirection) %>%
# compute the distance from player 19 to all opposing players, otherwise NA
mutate(dist = ifelse(team == "away", sqrt((x.x - x.y)^2 + (y.x - y.y)^2), NA )) %>%
filter(dist >20 & playDirection %in% c("right"))%>%
# clean up or rename variables
select(-c("x.y","y.y")) %>%
rename(x = x.x,
y = y.x,
s = s.y,
jerseyNumber = jerseyNumber.x) %>%
# find the average distance from player 19 to opposing players
#113.33 = dist on field from lower left corner to upper right corner
#100 yards long and 53.3333 wide or 120 feet wide
group_by(gameId, playId, frameId) %>%
mutate(mean_dist_to_19 = mean(dist, na.rm = TRUE),
alpha = 1 - (mean_dist_to_19/113.33) ) %>%
ungroup() %>%
# compute the total distance traveled by player 19
arrange(jerseyNumber, time, frameId) %>%
group_by(gameId, playId, jerseyNumber) %>%
mutate(dist_running_total = cumsum(dis.y)) %>%
# create a width for a moving bar
mutate(width = round( 20*(1-(mean_dist_to_19/113.33)) ,0) ) %>%
# create each play level stats for the receiver
group_by(gameId, playId) %>%
mutate(play_stat = mean(unique(mean_dist_to_19))) %>%
ungroup() %>%
# create each game level stats for the receiver
group_by(gameId) %>%
mutate(game_stat = mean(unique(play_stat))) %>%
ungroup() %>%
# create each player stat over all plays and all games
mutate(player_stat = mean(unique(game_stat))) %>%
# create a new data frame
data.frame()
#print out play, game, and player stats
result1 <-df2 %>% select(gameId, playId, play_stat, playDirection) %>% distinct %>% data.frame()
result2 <-df2 %>% select(gameId, game_stat) %>% distinct %>% data.frame()
result3<-df2 %>% select(player_stat) %>% distinct %>% data.frame()
return(list(result1,result2, result3, df2))
}
out<-player_stats(player_tracking_df,
plays_df,
c("2021010300"),
c("1586", "395", "1232", "3930"),
19 )
my_df<-out[[4]]
my_df$playId <- as.factor(my_df$playId)
ggplot(my_df%>% filter(team=="away"), aes(x=dist, group=playId, color=playId, fill=playId))+
geom_histogram(binwidth = 2, aes(y=..density..), color="black", fill="white")+
geom_density(alpha=0.6)+
facet_wrap(~playId, scales="free")
players_df <-fread("players.csv")
jersey_df<- player_tracking_df%>%
select(nflId, jerseyNumber)%>%
filter(!is.na(jerseyNumber))%>%
distinct()%>%
data.frame()
View(jersey_df)
View(plays_df)
View(players_df)
punt_kickoff_df<- plays_df%>%
filter(specialTeamsPlayType %in% c("Punt", "Kickoff"),
tolower(playDescription) %like% "touchdown",
!tolower(playDescription) %like% "nullified",
!tolower(playDescription) %like% "recovered",
!is.na(returnerId))%>%
mutate(returnerId = as.integer(returnerId))%>%
filter(gameId %in% player_tracking_df$gameId)%>%
left_join(jersey_df, by = c("returnerId"= "nflId"))%>%
left_join(players_df, by = c("returnerId"="nflId"))%>%
data.frame()
summary_df <- punt_kickoff_df %>%
select(displayName, jerseyNumber, gameId, playId)%>%
group_by(displayName, jerseyNumber)%>%
summarise(games=as.vector(paste(unique(gameId), collapse=",")),
plays=as.vector(paste(playId, collapse="," )),
.groups="keep"
)%>%
data.frame()