Question 1

data(cars)
median(cars$speed)
## [1] 15

Question 2

library(jsonlite)
btc_data <- fromJSON("https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100")
btc_data <- btc_data$Data$Data

max_close <- max(btc_data$close, na.rm = TRUE)
print(max_close)
## [1] 124723

Question 3

Topic: Is Home Field Advantage Still Important in the NFL?

Research Questions:

1. How does the average win percentge of home teams compare to away teams in post Covid NFL seasons (2021–2024)?

2. On average, do home teams score more points or win by larger margins than away teams?

3. Has the home-field advantage in wins or point differential increased, decreased, or stayed consistent over time?

4. Which teams have the strongest and weakest home-field advantages based on win rate or average margin at home vs. away?

Load NFL Scores Dataset (Kaggle)

nfl_scores <- read.csv("NFL_scores_data.csv")
str(nfl_scores)
## 'data.frame':    14358 obs. of  17 variables:
##  $ schedule_date      : chr  "9/2/66" "9/3/66" "9/4/66" "9/9/66" ...
##  $ schedule_season    : int  1966 1966 1966 1966 1966 1966 1966 1966 1966 1966 ...
##  $ schedule_week      : chr  "1" "1" "1" "2" ...
##  $ schedule_playoff   : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ team_home          : chr  "Miami Dolphins" "Houston Oilers" "San Diego Chargers" "Miami Dolphins" ...
##  $ score_home         : int  14 45 27 14 24 31 24 14 20 14 ...
##  $ score_away         : int  23 7 7 19 3 0 0 19 42 3 ...
##  $ team_away          : chr  "Oakland Raiders" "Denver Broncos" "Buffalo Bills" "New York Jets" ...
##  $ team_favorite_id   : chr  "" "" "" "" ...
##  $ spread_favorite    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ over_under_line    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ stadium            : chr  "Orange Bowl" "Rice Stadium" "Balboa Stadium" "Orange Bowl" ...
##  $ stadium_neutral    : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ weather_temperature: int  83 81 70 82 64 77 69 71 63 67 ...
##  $ weather_wind_mph   : int  6 7 7 11 8 6 9 7 11 7 ...
##  $ weather_humidity   : int  71 70 82 78 62 82 81 57 73 73 ...
##  $ weather_detail     : chr  "" "" "" "" ...

Dataset Cleaning & Manipulation

Select Relevant Columns

nfl_selected <- select(nfl_scores, schedule_season, schedule_week, team_home, score_home, team_away, score_away)

head(nfl_selected)
##   schedule_season schedule_week          team_home score_home       team_away
## 1            1966             1     Miami Dolphins         14 Oakland Raiders
## 2            1966             1     Houston Oilers         45  Denver Broncos
## 3            1966             1 San Diego Chargers         27   Buffalo Bills
## 4            1966             2     Miami Dolphins         14   New York Jets
## 5            1966             1  Green Bay Packers         24 Baltimore Colts
## 6            1966             2     Houston Oilers         31 Oakland Raiders
##   score_away
## 1         23
## 2          7
## 3          7
## 4         19
## 5          3
## 6          0

Rename Columns

names(nfl_selected) <- c("season", "week", "home_team", "home_points", "away_team", "away_points")

Add Margin Column

nfl_margin <- nfl_selected %>%
  mutate(margin = home_points - away_points)

head(nfl_margin)
##   season week          home_team home_points       away_team away_points margin
## 1   1966    1     Miami Dolphins          14 Oakland Raiders          23     -9
## 2   1966    1     Houston Oilers          45  Denver Broncos           7     38
## 3   1966    1 San Diego Chargers          27   Buffalo Bills           7     20
## 4   1966    2     Miami Dolphins          14   New York Jets          19     -5
## 5   1966    1  Green Bay Packers          24 Baltimore Colts           3     21
## 6   1966    2     Houston Oilers          31 Oakland Raiders           0     31

Check for Missing Values

sum(is.na(nfl_margin))
## [1] 537
apply(is.na(nfl_margin), 2 , sum)
##      season        week   home_team home_points   away_team away_points 
##           0           0           0         179           0         179 
##      margin 
##         179
nfl_with_na <- nfl_margin[!complete.cases(nfl_margin), ]
head(nfl_with_na)
##       season week            home_team home_points           away_team
## 14180   2025    7   Cincinnati Bengals          NA Pittsburgh Steelers
## 14181   2025    7 Jacksonville Jaguars          NA    Los Angeles Rams
## 14182   2025    7        Chicago Bears          NA  New Orleans Saints
## 14183   2025    7     Cleveland Browns          NA      Miami Dolphins
## 14184   2025    7   Kansas City Chiefs          NA   Las Vegas Raiders
## 14185   2025    7    Minnesota Vikings          NA Philadelphia Eagles
##       away_points margin
## 14180          NA     NA
## 14181          NA     NA
## 14182          NA     NA
## 14183          NA     NA
## 14184          NA     NA
## 14185          NA     NA

Remove 2025 Season Rows

nfl_filtered <- filter(nfl_margin, season != 2025)
sum(is.na(nfl_filtered))
## [1] 0

Add Win Percentage Column

nfl_winper <- nfl_filtered %>%
  mutate(home_win_pct = if_else(margin >= 1, 100, 0))

head(nfl_winper)
##   season week          home_team home_points       away_team away_points margin
## 1   1966    1     Miami Dolphins          14 Oakland Raiders          23     -9
## 2   1966    1     Houston Oilers          45  Denver Broncos           7     38
## 3   1966    1 San Diego Chargers          27   Buffalo Bills           7     20
## 4   1966    2     Miami Dolphins          14   New York Jets          19     -5
## 5   1966    1  Green Bay Packers          24 Baltimore Colts           3     21
## 6   1966    2     Houston Oilers          31 Oakland Raiders           0     31
##   home_win_pct
## 1            0
## 2          100
## 3          100
## 4            0
## 5          100
## 6          100

Filter Dataset for Post Covid NFL & Exclude Superbowl Games (neutral field)

nfl_modern <- filter(nfl_winper, season >= 2021 & season <= 2024 & week != "Superbowl")
nrow(nfl_modern)
## [1] 1135

Rename Washington Commanders

nfl_modern <- mutate(nfl_modern,
  home_team = if_else(home_team == "Washington Football Team", "Washington Commanders", home_team),
  away_team = if_else(away_team == "Washington Football Team", "Washington Commanders", away_team)
)

Initial Dataset Stats

summary(select(nfl_modern, home_points, away_points, margin, home_win_pct))
##   home_points     away_points        margin         home_win_pct  
##  Min.   : 0.00   Min.   : 0.00   Min.   :-40.000   Min.   :  0.0  
##  1st Qu.:17.00   1st Qu.:14.00   1st Qu.: -6.000   1st Qu.:  0.0  
##  Median :23.00   Median :21.00   Median :  2.000   Median :100.0  
##  Mean   :23.61   Mean   :21.31   Mean   :  2.301   Mean   : 54.8  
##  3rd Qu.:30.00   3rd Qu.:28.00   3rd Qu.: 10.000   3rd Qu.:100.0  
##  Max.   :70.00   Max.   :51.00   Max.   : 50.000   Max.   :100.0

Team Specific

team_avg_win <- nfl_modern %>%
  group_by(home_team) %>%
  summarise(mean_home_win_pct = mean(home_win_pct, na.rm = TRUE)) %>%
  arrange(desc(mean_home_win_pct))

head(team_avg_win)
## # A tibble: 6 × 2
##   home_team           mean_home_win_pct
##   <chr>                           <dbl>
## 1 Kansas City Chiefs               81.0
## 2 Buffalo Bills                    80.5
## 3 Philadelphia Eagles              74.4
## 4 Miami Dolphins                   70.6
## 5 Green Bay Packers                68.6
## 6 San Francisco 49ers              65.8
tail(team_avg_win)
## # A tibble: 6 × 2
##   home_team            mean_home_win_pct
##   <chr>                            <dbl>
## 1 Chicago Bears                     41.2
## 2 New York Giants                   38.2
## 3 New York Jets                     38.2
## 4 Carolina Panthers                 35.3
## 5 Arizona Cardinals                 35.3
## 6 New England Patriots              32.4