About the assignment

In this assignment:

I firstly approached the data by filtering the file to look at only Field Goal attempts and Extra Point attempts for each kicker in the league. My next step was to select specific columns that I believed held significant information to decide whether a kicker is good or not.

Then, I decided to take into consideration the total amount of kicks for each kicker, and separate then into two groups, kicks that are successful and kicks that are not successful. With that type of information, I was able to find the succeeded percentage and failed percentage for each kicker given the total amount of kicks they have performed.

I classified a valid kicker to be analyzed only if the kicker performed more than 20 kicks, only if they are kickers that can perform both types of kicks, Field Goals and Extra Points. Although, I chose that the main criteria to give me the best kicker in the NFL to be the ability to complete Field Goals with a successful percentage above 91%.

my_path <- "U:R Studio - IS470/"
setwd(my_path)

library(data.table)
library(dplyr)


my_df1 <- fread("plays.csv")

table(my_df1$specialTeamsResult)
## 
##     Blocked Kick Attempt             Blocked Punt                   Downed 
##                       61                       39                      834 
##               Fair Catch        Kick Attempt Good     Kick Attempt No Good 
##                     1645                     5470                      585 
##    Kickoff Team Recovery                   Muffed Non-Special Teams Result 
##                       16                      214                      101 
##            Out of Bounds                   Return                Touchback 
##                      651                     5207                     5156
my_df1 <- my_df1 %>%
  filter(specialTeamsPlayType %in% c("Extra Point", "Field Goal", "Punt")) %>%
  data.frame()

my_df2 <- fread("players.csv")
my_df3 <- fread("games.csv")
my_df4 <- fread("PFFScoutingData.csv")

my_df <- merge(my_df1, my_df2, by.x = c("kickerId"), by.y = ("nflId"), all.x = TRUE)
my_df <- merge(my_df, my_df3, by = c("gameId"), all.x = TRUE)
my_df <- merge(my_df, my_df4, by = c("gameId", "playId"), all.x = TRUE)

df <- my_df %>%
  filter(specialTeamsPlayType %in% c("Extra Point", "Field Goal"),
         !is.na(displayName)) %>%
  select(displayName, kickLength, possessionTeam, specialTeamsPlayType, specialTeamsResult) %>%
  mutate(success = ifelse(specialTeamsResult %in% c("Kick Attempt Good"), 1, 0)) %>%
  group_by(displayName, specialTeamsPlayType) %>%
  dplyr::summarise(attempts = n(),
                   succeeded = sum(success),
                   failed = attempts - succeeded,
                   succeeded_pc = round(100*(succeeded/attempts), 2),
                   failed_pc = round(100*(failed/attempts), 2),
                   #average_kicklength = mean(kickLength),
                   .groups = "keep") %>%
  filter(attempts >= 20) %>%
  group_by(displayName) %>%
  mutate(type_of_different_kicks = n()) %>%
  filter(type_of_different_kicks >= 2) %>%
  ungroup() %>%
  group_by(displayName, specialTeamsPlayType) %>%
  arrange(desc(specialTeamsPlayType), -succeeded_pc, -failed_pc) %>%
  filter(succeeded_pc >= 0.91) %>%
  head(10) %>%
  data.frame()
df
##        displayName specialTeamsPlayType attempts succeeded failed succeeded_pc
## 1       Josh Lambo           Field Goal       57        54      3        94.74
## 2      Graham Gano           Field Goal       45        42      3        93.33
## 3    Justin Tucker           Field Goal       94        87      7        92.55
## 4      Jason Myers           Field Goal       80        73      7        91.25
## 5     Younghoe Koo           Field Goal       57        52      5        91.23
## 6  Harrison Butker           Field Goal       85        77      8        90.59
## 7        Nick Folk           Field Goal       42        38      4        90.48
## 8         Wil Lutz           Field Goal       86        77      9        89.53
## 9     Mason Crosby           Field Goal       71        63      8        88.73
## 10   Chris Boswell           Field Goal       63        55      8        87.30
##    failed_pc type_of_different_kicks
## 1       5.26                       2
## 2       6.67                       2
## 3       7.45                       2
## 4       8.75                       2
## 5       8.77                       2
## 6       9.41                       2
## 7       9.52                       2
## 8      10.47                       2
## 9      11.27                       2
## 10     12.70                       2

My Plot

This is my plot:

library(ggplot2)

ggplot(df) + aes(x = reorder(displayName, -succeeded_pc), y = succeeded, fill = succeeded_pc) +
         geom_bar(stat = "identity",width = 0.5) +
  labs(title = "Best Kicker in the NFL", x = "Player Name", y = "Successful Number of Kicks", fill = "Success Percentage") +
  geom_text(aes(label = paste0(succeeded_pc, "%")), vjust = -0.5) +
  scale_fill_continuous(
    limits = c(87, 95),
    labels = paste0(seq(80, 100, 5), "%"),
    breaks = seq(80, 100, 5),
    low = "red",
    high = "green") +
  theme(plot.title = element_text(hjust=0.5)) 

Conclusion:

The best kicker in the NFL is Josh Lambo with the highest succeeded percentage of kicks of 94.74%