#here is my code
library(data.table)
library(dplyr)
library(ggplot2)
first_df <- fread("Data/NFLBDB2022/plays.csv")
second_df <- fread("Data/NFLBDB2022/players.csv")
third_df <- fread("Data/NFLBDB2022/games.csv")
fourth_df <- fread("Data/NFLBDB2022/PFFScoutingData.csv")
df <- left_join(first_df, second_df, by = c("kickerId" = "nflId"))
df <- left_join(df, third_df, by = c("gameId"))
df <- left_join(df, fourth_df, by = c("gameId", "playId"))
To determine the best kicker, I will be determining which kicker has the best field goal percentage from 40 yards plus. The average field goal made in the NFL is about 39.9 yards. My program will determine kickers who excel at making field goals above the average made field goal in the NFL.
I have determined the best 20 NFL kickers from 40 yards plus. To begin with, I created a data frame of field goal attempts from 40 yards plus. In this, I would be able calculate field goal percentage, made field goals, missed field goals, and the average field goal distance. Next, I made a vertical bar chart to display the results.
I created a vertical bar chart with the 20 best NFL kickers based on their field goal percentage. On the x-axis, I have it ordered with the kickers name and the amount of made field goals from 40 yards plus. On the y-axis, it is the field goal percentage.
setwd("//apporto.com/dfs/LOYOLA/Users/ssilguero_loyola/Desktop/IS470")
library(ggplot2)
library(data.table)
library(dplyr)
library(scales)
library(tidytext)
library(RColorBrewer)
library(kableExtra)
first_df <- fread("Data/NFLBDB2022/plays.csv")
second_df <- fread("Data/NFLBDB2022/players.csv")
third_df <- fread("Data/NFLBDB2022/games.csv")
fourth_df <- fread("Data/NFLBDB2022/PFFScoutingData.csv")
df <- left_join(first_df, second_df, by = c("kickerId" = "nflId"))
df <- left_join(df, third_df, by = c("gameId"))
df <- left_join(df, fourth_df, by = c("gameId", "playId"))
fieldGoal_df <- df %>%
select(displayName, kickerId, kickLength, possessionTeam, specialTeamsPlayType, specialTeamsResult) %>%
filter((specialTeamsPlayType == "Field Goal"), kickLength >= 40) %>%
group_by(displayName, specialTeamsPlayType) %>%
mutate(successFG = ifelse(specialTeamsResult == "Kick Attempt Good", TRUE, FALSE), kickLength >= 40) %>%
summarise(fieldGoalAttempts = n(),
madeFieldGoals = sum(successFG),
missedFieldGoals = fieldGoalAttempts - madeFieldGoals,
averageFieldGoalDistance = mean(kickLength),
successPercent = round(100*(madeFieldGoals / fieldGoalAttempts), 2),
.groups = "keep") %>%
group_by(displayName) %>%
arrange(desc(madeFieldGoals), -madeFieldGoals, -averageFieldGoalDistance, -missedFieldGoals)%>%
head(10) %>%
data.frame()
fieldGoal_df
## displayName specialTeamsPlayType fieldGoalAttempts madeFieldGoals
## 1 Brandon McManus Field Goal 46 37
## 2 Justin Tucker Field Goal 41 37
## 3 Dustin Hopkins Field Goal 44 35
## 4 Jason Sanders Field Goal 44 33
## 5 Wil Lutz Field Goal 40 33
## 6 Jason Myers Field Goal 38 32
## 7 Greg Zuerlein Field Goal 47 31
## 8 Ka'imi Fairbairn Field Goal 44 31
## 9 Randy Bullock Field Goal 38 29
## 10 Mason Crosby Field Goal 35 28
## missedFieldGoals averageFieldGoalDistance successPercent
## 1 9 48.78261 80.43
## 2 4 48.17073 90.24
## 3 9 47.00000 79.55
## 4 11 47.61364 75.00
## 5 7 45.97500 82.50
## 6 6 47.34211 84.21
## 7 16 48.55319 65.96
## 8 13 47.36364 70.45
## 9 9 47.60526 76.32
## 10 7 46.65714 80.00
knitr::kable(head(fieldGoal_df, 20), caption = 'Top 20 NFL Kickers from 40 yards plus') %>%
kable_styling(bootstrap_options = c("striped", "hover"))
displayName | specialTeamsPlayType | fieldGoalAttempts | madeFieldGoals | missedFieldGoals | averageFieldGoalDistance | successPercent |
---|---|---|---|---|---|---|
Brandon McManus | Field Goal | 46 | 37 | 9 | 48.78261 | 80.43 |
Justin Tucker | Field Goal | 41 | 37 | 4 | 48.17073 | 90.24 |
Dustin Hopkins | Field Goal | 44 | 35 | 9 | 47.00000 | 79.55 |
Jason Sanders | Field Goal | 44 | 33 | 11 | 47.61364 | 75.00 |
Wil Lutz | Field Goal | 40 | 33 | 7 | 45.97500 | 82.50 |
Jason Myers | Field Goal | 38 | 32 | 6 | 47.34211 | 84.21 |
Greg Zuerlein | Field Goal | 47 | 31 | 16 | 48.55319 | 65.96 |
Ka’imi Fairbairn | Field Goal | 44 | 31 | 13 | 47.36364 | 70.45 |
Randy Bullock | Field Goal | 38 | 29 | 9 | 47.60526 | 76.32 |
Mason Crosby | Field Goal | 35 | 28 | 7 | 46.65714 | 80.00 |
# vertical bar chart
ggplot(data = fieldGoal_df, aes(x = reorder(displayName, madeFieldGoals), y = successPercent, fill = successPercent/100)) +
geom_bar(stat = "identity") +
labs(x = "Kickers", y = "Field Goal Percentage", title = "Top 20 NFL Kickers from 40 Yards Plus", fill = "Field Goal Percentage") +
geom_text(aes( label = label_percent(accuracy = 1L)(successPercent/100)), vjust = -0.5) +
scale_fill_continuous(breaks = seq(0, 1, 0.2),
limits = c(0, 1),
labels = paste0(100*seq(0, 1, 0.2), "%"),
low = "lightgray",
high = "dodgerblue") +
theme(plot.title = element_text(hjust = 0.50, face="bold", size=15),
axis.text.x=element_text(size=5),
axis.text.y=element_text(size=10),
axis.title=element_text(size=10,face="bold"))
Overall, there were many ways to displays data with the utilization of R. The preparation of the data was essential before getting to the data visualizations. Learning about how useful R is used for creating data visualizations was difficult but rewarding in the end to understand.
#knitr::include_graphics("c:/Users/pptallon/Dropbox/G/Personal/Tallon005.jpg")