Topic: Is Home Field Advantage Still Important in the NFL?
Research Questions:
1. How does the average win percentge of home teams compare to away
teams in post Covid NFL seasons (2021–2024)?
2. On average, do home teams score more points or win by larger
margins than away teams?
3. Has the home-field advantage in wins or point differential
increased, decreased, or stayed consistent over time?
4. Which teams have the strongest and weakest home-field advantages
based on win rate or average margin at home vs. away?
Load NFL Scores Dataset (Kaggle)
nfl_scores <- read.csv("NFL_scores_data.csv")
str(nfl_scores)
## 'data.frame': 14358 obs. of 17 variables:
## $ schedule_date : chr "9/2/66" "9/3/66" "9/4/66" "9/9/66" ...
## $ schedule_season : int 1966 1966 1966 1966 1966 1966 1966 1966 1966 1966 ...
## $ schedule_week : chr "1" "1" "1" "2" ...
## $ schedule_playoff : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ team_home : chr "Miami Dolphins" "Houston Oilers" "San Diego Chargers" "Miami Dolphins" ...
## $ score_home : int 14 45 27 14 24 31 24 14 20 14 ...
## $ score_away : int 23 7 7 19 3 0 0 19 42 3 ...
## $ team_away : chr "Oakland Raiders" "Denver Broncos" "Buffalo Bills" "New York Jets" ...
## $ team_favorite_id : chr "" "" "" "" ...
## $ spread_favorite : num NA NA NA NA NA NA NA NA NA NA ...
## $ over_under_line : num NA NA NA NA NA NA NA NA NA NA ...
## $ stadium : chr "Orange Bowl" "Rice Stadium" "Balboa Stadium" "Orange Bowl" ...
## $ stadium_neutral : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ weather_temperature: int 83 81 70 82 64 77 69 71 63 67 ...
## $ weather_wind_mph : int 6 7 7 11 8 6 9 7 11 7 ...
## $ weather_humidity : int 71 70 82 78 62 82 81 57 73 73 ...
## $ weather_detail : chr "" "" "" "" ...
Dataset Cleaning & Manipulation
Select Relevant Columns
nfl_selected <- select(nfl_scores, schedule_season, schedule_week, team_home, score_home, team_away, score_away)
head(nfl_selected)
## schedule_season schedule_week team_home score_home team_away
## 1 1966 1 Miami Dolphins 14 Oakland Raiders
## 2 1966 1 Houston Oilers 45 Denver Broncos
## 3 1966 1 San Diego Chargers 27 Buffalo Bills
## 4 1966 2 Miami Dolphins 14 New York Jets
## 5 1966 1 Green Bay Packers 24 Baltimore Colts
## 6 1966 2 Houston Oilers 31 Oakland Raiders
## score_away
## 1 23
## 2 7
## 3 7
## 4 19
## 5 3
## 6 0
Rename Columns
names(nfl_selected) <- c("season", "week", "home_team", "home_points", "away_team", "away_points")
Add Margin Column
nfl_margin <- nfl_selected %>%
mutate(margin = home_points - away_points)
head(nfl_margin)
## season week home_team home_points away_team away_points margin
## 1 1966 1 Miami Dolphins 14 Oakland Raiders 23 -9
## 2 1966 1 Houston Oilers 45 Denver Broncos 7 38
## 3 1966 1 San Diego Chargers 27 Buffalo Bills 7 20
## 4 1966 2 Miami Dolphins 14 New York Jets 19 -5
## 5 1966 1 Green Bay Packers 24 Baltimore Colts 3 21
## 6 1966 2 Houston Oilers 31 Oakland Raiders 0 31
Check for Missing Values
sum(is.na(nfl_margin))
## [1] 537
apply(is.na(nfl_margin), 2 , sum)
## season week home_team home_points away_team away_points
## 0 0 0 179 0 179
## margin
## 179
nfl_with_na <- nfl_margin[!complete.cases(nfl_margin), ]
head(nfl_with_na)
## season week home_team home_points away_team
## 14180 2025 7 Cincinnati Bengals NA Pittsburgh Steelers
## 14181 2025 7 Jacksonville Jaguars NA Los Angeles Rams
## 14182 2025 7 Chicago Bears NA New Orleans Saints
## 14183 2025 7 Cleveland Browns NA Miami Dolphins
## 14184 2025 7 Kansas City Chiefs NA Las Vegas Raiders
## 14185 2025 7 Minnesota Vikings NA Philadelphia Eagles
## away_points margin
## 14180 NA NA
## 14181 NA NA
## 14182 NA NA
## 14183 NA NA
## 14184 NA NA
## 14185 NA NA
Remove 2025 Season Rows
nfl_filtered <- filter(nfl_margin, season != 2025)
sum(is.na(nfl_filtered))
## [1] 0
Add Win Percentage Column
nfl_winper <- nfl_filtered %>%
mutate(home_win_pct = if_else(margin >= 1, 100, 0))
head(nfl_winper)
## season week home_team home_points away_team away_points margin
## 1 1966 1 Miami Dolphins 14 Oakland Raiders 23 -9
## 2 1966 1 Houston Oilers 45 Denver Broncos 7 38
## 3 1966 1 San Diego Chargers 27 Buffalo Bills 7 20
## 4 1966 2 Miami Dolphins 14 New York Jets 19 -5
## 5 1966 1 Green Bay Packers 24 Baltimore Colts 3 21
## 6 1966 2 Houston Oilers 31 Oakland Raiders 0 31
## home_win_pct
## 1 0
## 2 100
## 3 100
## 4 0
## 5 100
## 6 100
Filter Dataset for Post Covid NFL & Exclude Superbowl Games
(neutral field)
nfl_modern <- filter(nfl_winper, season >= 2021 & season <= 2024 & week != "Superbowl")
nrow(nfl_modern)
## [1] 1135
Rename Washington Commanders
nfl_modern <- mutate(nfl_modern,
home_team = if_else(home_team == "Washington Football Team", "Washington Commanders", home_team),
away_team = if_else(away_team == "Washington Football Team", "Washington Commanders", away_team)
)
Initial Dataset Stats
summary(select(nfl_modern, home_points, away_points, margin, home_win_pct))
## home_points away_points margin home_win_pct
## Min. : 0.00 Min. : 0.00 Min. :-40.000 Min. : 0.0
## 1st Qu.:17.00 1st Qu.:14.00 1st Qu.: -6.000 1st Qu.: 0.0
## Median :23.00 Median :21.00 Median : 2.000 Median :100.0
## Mean :23.61 Mean :21.31 Mean : 2.301 Mean : 54.8
## 3rd Qu.:30.00 3rd Qu.:28.00 3rd Qu.: 10.000 3rd Qu.:100.0
## Max. :70.00 Max. :51.00 Max. : 50.000 Max. :100.0
Team Specific
team_avg_win <- nfl_modern %>%
group_by(home_team) %>%
summarise(mean_home_win_pct = mean(home_win_pct, na.rm = TRUE)) %>%
arrange(desc(mean_home_win_pct))
head(team_avg_win)
## # A tibble: 6 × 2
## home_team mean_home_win_pct
## <chr> <dbl>
## 1 Kansas City Chiefs 81.0
## 2 Buffalo Bills 80.5
## 3 Philadelphia Eagles 74.4
## 4 Miami Dolphins 70.6
## 5 Green Bay Packers 68.6
## 6 San Francisco 49ers 65.8
tail(team_avg_win)
## # A tibble: 6 × 2
## home_team mean_home_win_pct
## <chr> <dbl>
## 1 Chicago Bears 41.2
## 2 New York Giants 38.2
## 3 New York Jets 38.2
## 4 Carolina Panthers 35.3
## 5 Arizona Cardinals 35.3
## 6 New England Patriots 32.4