#here is my code
library(data.table)
library(dplyr)
library(ggplot2)

first_df <- fread("Data/NFLBDB2022/plays.csv")
second_df <- fread("Data/NFLBDB2022/players.csv")
third_df <- fread("Data/NFLBDB2022/games.csv")
fourth_df <- fread("Data/NFLBDB2022/PFFScoutingData.csv")

df <- left_join(first_df, second_df, by = c("kickerId" = "nflId"))
df <- left_join(df, third_df, by = c("gameId"))
df <- left_join(df, fourth_df, by = c("gameId", "playId"))

Introduction

To determine the best kicker, I will be determining which kicker has the best field goal percentage from 40 yards plus. The average field goal made in the NFL is about 39.9 yards. My program will determine kickers who excel at making field goals above the average made field goal in the NFL.

Description of Project

I have determined the best 20 NFL kickers from 40 yards plus. To begin with, I created a data frame of field goal attempts from 40 yards plus. In this, I would be able calculate field goal percentage, made field goals, missed field goals, and the average field goal distance. Next, I made a vertical bar chart to display the results.

Data Visualization

I created a vertical bar chart with the 20 best NFL kickers based on their field goal percentage. On the x-axis, I have it ordered with the kickers name and the amount of made field goals from 40 yards plus. On the y-axis, it is the field goal percentage.

setwd("//apporto.com/dfs/LOYOLA/Users/ssilguero_loyola/Desktop/IS470")

library(ggplot2)
library(data.table)
library(dplyr)
library(scales)
library(tidytext)
library(RColorBrewer)
library(kableExtra)

first_df <- fread("Data/NFLBDB2022/plays.csv")
second_df <- fread("Data/NFLBDB2022/players.csv")
third_df <- fread("Data/NFLBDB2022/games.csv")
fourth_df <- fread("Data/NFLBDB2022/PFFScoutingData.csv")

df <- left_join(first_df, second_df, by = c("kickerId" = "nflId"))
df <- left_join(df, third_df, by = c("gameId"))
df <- left_join(df, fourth_df, by = c("gameId", "playId"))


fieldGoal_df <- df %>%
  select(displayName, kickerId, kickLength, possessionTeam, specialTeamsPlayType, specialTeamsResult) %>%
  filter((specialTeamsPlayType == "Field Goal"), kickLength >= 40) %>%
  group_by(displayName, specialTeamsPlayType) %>%
  mutate(successFG = ifelse(specialTeamsResult == "Kick Attempt Good", TRUE, FALSE), kickLength >= 40) %>%
  summarise(fieldGoalAttempts = n(), 
            madeFieldGoals = sum(successFG), 
            missedFieldGoals = fieldGoalAttempts - madeFieldGoals,
            averageFieldGoalDistance = mean(kickLength),
            successPercent = round(100*(madeFieldGoals / fieldGoalAttempts), 2),
            .groups = "keep") %>%
  group_by(displayName) %>%
  arrange(desc(madeFieldGoals), -madeFieldGoals, -averageFieldGoalDistance, -missedFieldGoals)%>%
  head(10) %>%
  data.frame()
fieldGoal_df
##         displayName specialTeamsPlayType fieldGoalAttempts madeFieldGoals
## 1   Brandon McManus           Field Goal                46             37
## 2     Justin Tucker           Field Goal                41             37
## 3    Dustin Hopkins           Field Goal                44             35
## 4     Jason Sanders           Field Goal                44             33
## 5          Wil Lutz           Field Goal                40             33
## 6       Jason Myers           Field Goal                38             32
## 7     Greg Zuerlein           Field Goal                47             31
## 8  Ka'imi Fairbairn           Field Goal                44             31
## 9     Randy Bullock           Field Goal                38             29
## 10     Mason Crosby           Field Goal                35             28
##    missedFieldGoals averageFieldGoalDistance successPercent
## 1                 9                 48.78261          80.43
## 2                 4                 48.17073          90.24
## 3                 9                 47.00000          79.55
## 4                11                 47.61364          75.00
## 5                 7                 45.97500          82.50
## 6                 6                 47.34211          84.21
## 7                16                 48.55319          65.96
## 8                13                 47.36364          70.45
## 9                 9                 47.60526          76.32
## 10                7                 46.65714          80.00
  knitr::kable(head(fieldGoal_df, 20), caption = 'Top 20 NFL Kickers from 40 yards plus') %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Top 20 NFL Kickers from 40 yards plus
displayName specialTeamsPlayType fieldGoalAttempts madeFieldGoals missedFieldGoals averageFieldGoalDistance successPercent
Brandon McManus Field Goal 46 37 9 48.78261 80.43
Justin Tucker Field Goal 41 37 4 48.17073 90.24
Dustin Hopkins Field Goal 44 35 9 47.00000 79.55
Jason Sanders Field Goal 44 33 11 47.61364 75.00
Wil Lutz Field Goal 40 33 7 45.97500 82.50
Jason Myers Field Goal 38 32 6 47.34211 84.21
Greg Zuerlein Field Goal 47 31 16 48.55319 65.96
Ka’imi Fairbairn Field Goal 44 31 13 47.36364 70.45
Randy Bullock Field Goal 38 29 9 47.60526 76.32
Mason Crosby Field Goal 35 28 7 46.65714 80.00

Visualization 1: A graph that shows …

# vertical bar chart
ggplot(data = fieldGoal_df, aes(x = reorder(displayName, madeFieldGoals), y = successPercent, fill = successPercent/100)) +
  geom_bar(stat = "identity") +
  labs(x = "Kickers", y = "Field Goal Percentage", title = "Top 20 NFL Kickers from 40 Yards Plus", fill = "Field Goal Percentage") +
  geom_text(aes( label =  label_percent(accuracy = 1L)(successPercent/100)), vjust = -0.5) +
  scale_fill_continuous(breaks = seq(0, 1, 0.2),
                        limits = c(0, 1),
                        labels = paste0(100*seq(0, 1, 0.2), "%"),
                        low = "lightgray",
                        high = "dodgerblue") +
  theme(plot.title = element_text(hjust = 0.50, face="bold", size=15),
        axis.text.x=element_text(size=5),
        axis.text.y=element_text(size=10),
        axis.title=element_text(size=10,face="bold"))


Conclusion

Overall, there were many ways to displays data with the utilization of R. The preparation of the data was essential before getting to the data visualizations. Learning about how useful R is used for creating data visualizations was difficult but rewarding in the end to understand.

Note

#knitr::include_graphics("c:/Users/pptallon/Dropbox/G/Personal/Tallon005.jpg")