Introduction

For assignment #2, I merged the 3 csv files : players, games and scouting data. I selected the “punt” by creating a subset of data with only the punt as the main type of kick and also selected specific columns such as kicklength to investigate the consistency of kicklength of the punters.

I set all na = 0. I filtered by season and players.

I calculated the mean and the standard deviation of the kicklength of the punt and the standard deviation. The best kicker was defined as: the player with the smallest standard deviation around the mean kicklength . The smallest standard deviation meant that the player was fairly consistent in the kicklength of the punt.

my_path <-"C:/Users/rande/Rprogram/IS470SportsAnalytics/"
setwd(my_path)

library(httr)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(data.table)
library(tidyverse)
library(tidytext)

dfplays <- read.csv('plays.csv')
dfplays <- dfplays%>%
  filter(specialTeamsPlayType %in% c("Punt"))%>%
  data.frame()
dfplayers <- read.csv('players.csv')
dfgames <- read.csv("games.csv" )
dfscouting <- read.csv('PFFScoutingData.csv')

df_all <- merge(dfplayers, dfplays, by.x = c("nflId"), by.y = c("kickerId"), all.x=TRUE)
df_all <- merge(df_all, dfgames, by.x = c("gameId"),all.x = TRUE)
df_all <- merge(df_all, dfscouting, by.x = c("gameId", "playId"), all.x=TRUE)       

cols_to_use <- c("season", "displayName", "kickLength", "specialTeamsPlayType", "hangTime" )

df_f <-subset(df_all, select=cols_to_use)

###part 1

df_f <-subset(df_all, select=cols_to_use)
df_f <- na.omit(df_f)

dft1<- df_f %>%
  group_by(season, displayName) %>%
  summarize(kl_m= round(mean(kickLength),2) , kl_sd = round(sd(kickLength),2) ,.group='keep') %>%
  mutate (displayName= reorder(displayName, kl_sd),
          displayName=substr(displayName, 0, nchar(as.character(displayName))-7)) %>%
data.frame()
dft1[is.na(dft1)]<-0

ggplot(dft1, aes(x=reorder(displayName, -kl_m), y= kl_m,  fill = kl_sd)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=paste0(kl_sd)), vjust=-0.5, size = 3) +
  theme(axis.text.x = element_text(angle=90, vjust=.5, hjust=1)) +
  labs(x = "Player Name", y ="mean KickLength of punt", title="Mean kicklength of Punt by Player") +
  scale_y_continuous(limits=c(0, max(dft1$kl_m)*1.1))+
  scale_fill_continuous(
    limits =c(1,15),
    labels=paste0(seq(0,15,1)),
    breaks= seq(0,15,1),
    low="red",
    high = "dark green") +
facet_wrap(ncol =  1, nrow=3, ~season, scales='free')+
theme(plot.title = element_text(hjust=0.5))

#Bump Chart ###I used the ranking function to rank the standard deviation around the mean kicklength to evaluate the consistency of the kicklenght of the best “punt” kicker.

dft1<- df_f %>%
  group_by(season, displayName) %>%
  summarize(kl_m= round(mean(kickLength),2) , kl_sd = round(sd(kickLength),2) ,.group='keep') %>%
  mutate (rank = rank(-kl_sd),  
          displayName= reorder_within(displayName, kl_sd, season),
          displayName=substr(displayName, 0, nchar(as.character(displayName))-7)) %>%
  data.frame()

cols<-colorRampPalette(brewer.pal(8,"Set2"))
myPal<-cols(53)
ggplot(dft1, aes(x=season,  y=rank, group = displayName)) +
  geom_line(aes(color=displayName), size=2) +
  geom_point(shape=21, size=4, fill="blue")+
  scale_y_reverse(breaks=seq(max(dft1$rank), 1, -1))+
  geom_text(data=dft1%>% filter (season==min(season)),
            aes(x=season-.05,
                y=rank,
                label=displayName),
            size=3,
            hjust=1)+
  geom_text(data=dft1%>% filter (season==max(season)),
            aes(x=season +0.05,
                y=rank,
                label=displayName),
            size=3,
            hjust=0)+
  scale_x_continuous(breaks = min(dft1$season):max(dft1$season),
                     labels= as.character(min(dft1$season):max(dft1$season)))+
scale_color_manual(values=myPal)+
  labs ( title = "Bump chart for Players by Standard deviation around mean kicklength- ranking by lowest SD",
         x= "Season",
         y= "Rank",
         colour= "Players")+
  theme(plot.title = element_text(hjust=0.5))

Conclusion:

Best punt kicker with best consistency in length kick for the punt displayed by the smallest standard deviation around the mean kicklength is by season:

Jonny Hecker in 2018 Jake Bailey in 2019 Coery Borjorques in 2020