QUESTION:

How did kickers play inside vs outside?

While working with this data set I decided to take a look at how kickers field goal percentages, throughout the league, was effected by the game being played outside or in a dome. To get the data required to conduct this analysis I first had to merge the four data sets that I was pulling information from into one. Once I had all the data in one data frame I had to create another, smaller, data frame with the rows that I would be using to conduct this analysis. Then, since the data did not provide the information on weather the stadium was a dome or was open I had to hard code this information for each team in the NFL adding a column to the data frame called “Dome”. I then filtered the data even more focusing strictly on columns that had information on kicks, more specifically field goals. I then filtered the data again, this time into two data sets one containing kick info for domes and the other for open stadiums. While doing this I created three new columns in each data frame, fgMade, fgMissed, fgP to show the field goal percentages.

library(data.table)   
library(lubridate)
library(dplyr)
library(ggplot2)

my_path <- "~/Desktop/IS470"
setwd(my_path)

games <- fread('games.csv')
plays <- fread('plays.csv')
players <- fread('players.csv')
PFF <- fread('PFFScoutingData.csv')

my_df <- merge(plays, players, by.x = c("kickerId"), by.y = c("nflId"), all.x = TRUE)
my_df <- merge(my_df, games, by = c('gameId'), all.x = TRUE)
my_df <- merge(my_df, PFF, by = c('gameId', 'playId'), all.x = TRUE)
df_filter <- my_df %>%
  filter(specialTeamsPlayType %in% c("Field Goal", "Extra Point", "Punt")) %>%
  data.frame()

#Teams who play in open stadiums
#NFC EAST
df_filter[df_filter$homeTeamAbbr == 'PHI', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'NYG', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'WAS', 'Dome'] = 'Open'
#NFC NORTH
df_filter[df_filter$homeTeamAbbr == 'GB', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'CHI', 'Dome'] = 'Open'
#NFC SOUTH
df_filter[df_filter$homeTeamAbbr == 'TB', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'CAR', 'Dome'] = 'Open'
#NFC WEST
df_filter[df_filter$homeTeamAbbr == 'SF', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'SEA', 'Dome'] = 'Open'
#AFC WEST
df_filter[df_filter$homeTeamAbbr == 'KC', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'DEN', 'Dome'] = 'Open'
#AFC SOUTH
df_filter[df_filter$homeTeamAbbr == 'TEN', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'JAX', 'Dome'] = 'Open'
#AFC NORTH
df_filter[df_filter$homeTeamAbbr == 'BAL', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'CLE', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'CIN', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'PIT', 'Dome'] = 'Open'
#AFC EAST
df_filter[df_filter$homeTeamAbbr == 'BUF', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'NYJ', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'MIA', 'Dome'] = 'Open'
df_filter[df_filter$homeTeamAbbr == 'NE', 'Dome'] = 'Open'
#Teams who play in dome stadiums
#NFC SOUTH
df_filter[df_filter$homeTeamAbbr == 'ATL', 'Dome'] = 'Closed'
df_filter[df_filter$homeTeamAbbr == 'NO', 'Dome'] = 'Closed'
#NFC NORTH
df_filter[df_filter$homeTeamAbbr == 'DET', 'Dome'] = 'Closed'
df_filter[df_filter$homeTeamAbbr == 'MIN', 'Dome'] = 'Closed'
#AFC WEST
df_filter[df_filter$homeTeamAbbr == 'OAK', 'Dome'] = 'Closed'
df_filter[df_filter$homeTeamAbbr == 'LAC', 'Dome'] = 'Closed'
#AFC SOUTH
df_filter[df_filter$homeTeamAbbr == 'IND', 'Dome'] = 'Closed'
df_filter[df_filter$homeTeamAbbr == 'HOU', 'Dome'] = 'Closed'
#NFC WEST
df_filter[df_filter$homeTeamAbbr == 'LA', 'Dome'] = 'Closed'
df_filter[df_filter$homeTeamAbbr == 'ARI', 'Dome'] = 'Closed'
#NFC EAST
df_filter[df_filter$homeTeamAbbr == 'DAL', 'Dome'] = 'Closed'

# Creates df of kick statistics
KickStat <- df_filter %>%     
  select(gameId, playId, kickerId, playDescription, kickLength, specialTeamsResult,
         displayName, gameDate, homeTeamAbbr, visitorTeamAbbr, Dome)

KickStat$gameDate <- mdy(KickStat$gameDate)         # Creates date object column and two new columns containing game year and month
KickStat$gameYear <- year(KickStat$gameDate)
KickStat$gameMonth <- month(KickStat$gameDate)

Open <- KickStat %>%              # Create df for stats in open stadiums
  filter(Dome == 'Open')

Dome <- KickStat %>%              # Create df for stats in Dome/Closed stadiums
  filter(Dome == 'Closed')

#Creates DF for each kind of kick or field goal
fgOpen <- Open %>%
  filter(specialTeamsResult == 'Kick Attempt Good' | specialTeamsResult == 'Kick Attempt No Good')


fgDome <- Dome %>%
  filter(specialTeamsResult == 'Kick Attempt Good' | specialTeamsResult == 'Kick Attempt No Good')


OpenFG <- fgOpen %>%
  group_by(gameMonth, gameYear)%>%
  summarise(totalKicks = n(),
            fgMade = length(which(specialTeamsResult == 'Kick Attempt Good')),
            fgMissed = length(which(specialTeamsResult == 'Kick Attempt No Good')), 
            fgP = fgMade/totalKicks
            , .groups = 'keep') %>%
  data.frame()
OpenFG <- OpenFG[-c(1),]

OpenFG$fgP <- format(round(OpenFG$fgP,2))

DomeFG <- fgDome %>%
  group_by(gameMonth, gameYear)%>%
  summarise(totalKicks = n(),
            fgMade = length(which(specialTeamsResult == 'Kick Attempt Good')),
            fgMissed = length(which(specialTeamsResult == 'Kick Attempt No Good')), 
            fgP = fgMade/totalKicks
            , .groups = 'keep') %>%
  data.frame()
DomeFG$fgP <- format(round(DomeFG$fgP,2))
DomeFG <- DomeFG[-c(1),]
#Trellis Chart for FG % 2018-2020 in Open Stadiums
ggplot(OpenFG, aes(x = gameMonth, y = totalKicks), fill = fgP)+ 
  geom_bar(stat = 'identity', fill = 'dark green') +
  geom_text(aes(label = paste0(fgP, "%")), vjust = -0.3, size = 3)+
  labs(x = 'Month', y = 'Number of Kicks', title = "Field Goal % by Month in Open Stadiums") + 
  facet_wrap(ncol = 1, nrow = 3, ~gameYear, scales = 'free' )

#Trellis Chart for FG % 2018-2020 in Dome Stadiums
ggplot(DomeFG, aes(x = gameMonth, y = totalKicks), fill = fgP)+ 
  geom_bar(stat = 'identity', fill = 'dark red') +
  geom_text(aes(label = paste0(fgP, "%")), vjust = -0.3, size = 3)+
  labs(x = 'Month', y = 'Number of Kicks', title = "Field Goal % by Month in Dome Stadiums") + 
  facet_wrap(ncol = 1, nrow = 3, ~gameYear, scales = 'free' )

Conclusion & Results

After conducting this analysis and looking at my results I was able to see the effect that the both whether and stadium type played on kickers. As we know the NFL is played in the fall to the winter and towards the end of the season whether starts to become a more important factor. After conducting my analysis I was able to determine that kickers are more accurate and reliable within domes in the winter months. This was reveled when looking at the averages of kickers field goal percentages for each month of the season:

September

Indoor FG %: 90.6%
Outdoor FG %: 90.3%

October

Indoor FG %: 90.6%
Outdoor FG %: 90%

November

Indoor FG %: 89.3%
Outdoor FG %: 89.33%

December

Indoor FG %: 92%
Outdoor FG %: 89.33%

These results revealed that kickers have a higher, if ever so slightly higher, percentage of kicks made when playing indoors at all points of the season, but most importantly in the month of December Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.