my_path <- "C:/Users/pptallon/Dropbox/G/Teaching/Loyola College/IS470 Sports Analytics/"
setwd("~/Desktop/Data")
library(httr)
library(data.table)
library(dplyr)
library(tidyverse)
library(ggpubr)
library(tidytext)
#----MERGING-----
df_plays <- fread("plays.csv")
df_plays <- df_plays %>%
filter(specialTeamsPlayType %in% c("Extra Point", "Field Goal")) %>%
data.frame()
df_games <- fread("games.csv")
df_players <- fread("players.csv")
df_PFFScouting <- fread("PFFScoutingData.csv")
df1 <- merge(df_plays, df_players, by.x = c("kickerId"), by.y = c("nflId"), all.x = TRUE)
df1 <- merge (df1, df_games, by = c("gameId"), all.x = TRUE)
df1 <- merge(df1, df_PFFScouting, by = c("gameId", "playId"), all.x = TRUE)
In the code above I first started by merging all of data together to create my df1. MY plan to find the best kicker is to use two variables, total good field goals and total good feild goal length. whith theses two variables I beleive that I would find the best kicker for each season of the NFL.
#------CLEANING FIELD GOALS DATA------
fgoalgood <- df1 %>%
select(season, displayName, specialTeamsResult, specialTeamsPlayType) %>%
filter(specialTeamsPlayType == "Field Goal", specialTeamsResult != "Blocked Kick Attempt") %>%
mutate(score = ifelse(specialTeamsResult == "Kick Attempt Good", 1, 0)) %>%
group_by(season,displayName) %>%
summarise(succes = sum(score),
fail = n()-succes,.groups = "keep") %>%
data.frame()
#----------CLEANING KICK LENGTH DATA----------
df_kicklenb <- df1 %>%
select(season, displayName,specialTeamsPlayType, specialTeamsResult, kickLength) %>%
filter(specialTeamsPlayType == "Field Goal",specialTeamsResult != "Blocked Kick Attempt", specialTeamsResult != "Kick Attempt Good") %>%
group_by(season, displayName) %>%
summarise( totb = sum(kickLength),.groups = "keep") %>%
data.frame()
df_kickleng <- df1 %>%
select(season, displayName,specialTeamsPlayType, specialTeamsResult, kickLength) %>%
filter(specialTeamsPlayType == "Field Goal",specialTeamsResult != "Blocked Kick Attempt", specialTeamsResult != "Kick Attempt No Good") %>%
group_by(season, displayName) %>%
summarise( totg = sum(kickLength),.groups = "keep") %>%
data.frame()
df2 <- merge(x = df_kicklenb, y = df_kickleng, by=c("season", "displayName"), all =TRUE)
I started my cleaning by focusing on field goals. I just want the successful and not successful field goals. The reason why I am not tracking Blocked kicked attempts is because this would not be the kickers fault, it would be the offense lines fault. The data frame I created to tack this data is fgoalsgood. The next colum i tacked was kick length. I wanted to tack the total field goals made kick length and the total bad field goals kick length. After I found this data I merged the two data sets df_kicklenb and dfkickleng to df2.
#--------FINDING THE TOT----------
fgoalgood <- fgoalgood %>% mutate_at(c("succes", "fail"), ~replace_na(.,0))
fgtot <- fgoalgood %>%
select(season, displayName, succes, fail) %>%
group_by(season, displayName) %>%
mutate(fgtot = succes + fail) %>%
data.frame()
df2 <- df2 %>% mutate_at(c("totb", "totg"), ~replace_na(.,0))
kltot <- df2 %>%
select(season, displayName, totb, totg)%>%
group_by(season, displayName) %>%
mutate(kltot = totg + totb) %>%
data.frame()
df3 <- merge(x = fgtot, y = kltot, by=c("season", "displayName"), all = TRUE)
Here I added up the successful and not successful field goals, and I added up the good field goal kick length and bad field goal kick length. I then merge the two data sets fgtot and kltot to df3 so I can have all the data I collected put into one data set.
fgoalgoodchart <- fgoalgood %>%
filter(season == 2019) %>%
select(season, displayName, succes, fail) %>%
group_by(season, displayName) %>%
data.frame()
fgoalgoodchart <- na.omit(fgoalgoodchart)
ggplot(fgoalgoodchart, aes(x = reorder(displayName, - succes), y = succes, fill = succes)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5,)) +
geom_bar(stat="identity") +
labs(x = "Players", y = "Field Goals Good", title = "Field Goals", fill = "Number Of Field Goals") +
geom_text(aes(label = paste0(succes)), vjust = -.5, size = 2) +
scale_fill_continuous(
limits = c(0,40),
labels=paste0(seq(0, 40, 10), ""),
breaks = seq(0, 40, 10),
low = "red",
high = "dark green") +
theme(plot.title = element_text(hjust=0.5))
This chart shows us how many good field goals a player has made in the 2019 season. This will be my first variable of how I will determining the best kicker is.
kicklnchart <- df3 %>%
filter(season == 2019) %>%
select(season, displayName, totb, totg)%>%
group_by(season, displayName) %>%
data.frame()
kicklnchart <- na.omit(kicklnchart)
ggplot(kicklnchart, aes(x = reorder(displayName, - totg), y = totg, fill = totg)) +# data for bar chart
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5,)) +
geom_bar(stat="identity") + # you need stat="identity" to start bar chart
labs(x = "Players", y = "Kick Length", title = "Good Feild Goal Kick Lengths", fill = "Good Kick Length") + #changes labels
geom_text(aes(label = paste0(totg)), vjust = -.5, size = 2, angle= 45, hjust = -.04) + #adds lable to bars and adds %
scale_fill_continuous(
limits = c(0,1300),
labels=paste0(seq(0, 1300, 100), ""),# starts a 30 ends at 80 ingerments of 10
breaks = seq(0, 1300, 100),
low = "red",
high = "dark green") +
theme(plot.title = element_text(hjust=0.5))
This chart shows the total amount of good field goals kick length made in the 2019 season. This will be my second variable I will be using to find the best kicker.
per <- df3 %>%
select(season, displayName, succes, fgtot, totg,) %>%
group_by(season, displayName) %>%
mutate(percent = round((totg/succes) * (succes/fgtot))) %>%
data.frame()
per <- per %>% mutate_at(c("percent"), ~replace_na(.,0))
per <- na.omit(per)
To find the best kicker I took into consideration the average amount of times a player makes a field goal and the successful field goal kick length. I fist divide the successful field goal kick length by success field goal and then multiplied the two. By doing this equation if the player has more successful kick length score than his ranking will be higher.
perchart <- df3 %>%
filter(season == 2019) %>%
select(season, displayName, succes, fgtot, totg,) %>%
group_by(season, displayName) %>%
mutate(percent = round((totg/succes) * (succes/fgtot))) %>%
data.frame()
perchart <- na.omit(perchart)
ggplot(perchart, aes(x = reorder(displayName, - percent), y = percent, fill = percent)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5,)) +
geom_bar(stat="identity") + # you need stat="identity" to start bar chart
labs(x = "Players", y = "Ranking", title = "Best Kicker For 2019", fill = "Ranking Score") +
geom_text(aes(label = paste0(percent)), vjust = -.5, size = 2) +
scale_fill_continuous(
limits = c(0,50),
labels=paste0(seq(0, 50, 10), ""),
breaks = seq(0, 50, 10),
low = "red",
high = "dark green") +
theme(plot.title = element_text(hjust=0.5))
We can see here in this graph by using the equation we can see that Cody Parkey is the best kicker for the 2019 season because He has the highest ranking. Notice the change in raking once we take into account total good field goal kick length.
#-----------PLOTTING----------
ggplot(per, aes(x = reorder(displayName, -percent,), y = percent, fill = percent)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5,)) +
geom_bar(stat="identity") +
geom_text(aes(label = paste0(percent,"")), vjust = .06, size = 2) +
labs(x = "players", y = "Ranking", title = "Best Kicker", fill = "Ranking Score") +
scale_y_continuous(limits = c(0, max(per$percent)*1.1) ) +
scale_x_reordered() +
scale_fill_continuous(
limits = c(1,50),
labels=paste0(seq(0, 50, 10)),
breaks = seq(0, 50, 10),
low = "red",
high = "dark green") +
facet_wrap(ncol = 1, nrow = 8, ~season, scales = 'free') +
theme(plot.title = element_text(hjust=0.5))
This graph shows us the best kicker for each season which are, 2018: Giorgio Tavecchio, 2019: Cody Parkey, and 2020: Josh Lambo. This like explain before is taking into account the player’s total Good field goal point, and the players Kick length for each of the field goals and using the equation made to make the ranking score.