Import data

# excel file
data <- read_excel("../00_data/nfl_attendance.xlsx") %>%
    mutate(weekly_attendance = as.numeric(weekly_attendance))
data
## # A tibble: 10,846 × 8
##    team    team_name  year  total   home   away  week weekly_attendance
##    <chr>   <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl>             <dbl>
##  1 Arizona Cardinals  2000 893926 387475 506451     1             77434
##  2 Arizona Cardinals  2000 893926 387475 506451     2             66009
##  3 Arizona Cardinals  2000 893926 387475 506451     3                NA
##  4 Arizona Cardinals  2000 893926 387475 506451     4             71801
##  5 Arizona Cardinals  2000 893926 387475 506451     5             66985
##  6 Arizona Cardinals  2000 893926 387475 506451     6             44296
##  7 Arizona Cardinals  2000 893926 387475 506451     7             38293
##  8 Arizona Cardinals  2000 893926 387475 506451     8             62981
##  9 Arizona Cardinals  2000 893926 387475 506451     9             35286
## 10 Arizona Cardinals  2000 893926 387475 506451    10             52244
## # ℹ 10,836 more rows

State one question

Which NFL team had the highest attendance rates?

Plot data

# create ridgeline graph
library(ggplot2)
library(ggridges)

data %>%
    ggplot(aes(weekly_attendance, fct_reorder(team, weekly_attendance))) +
    geom_boxplot()

ggplot(data, 
       aes(x = weekly_attendance, y = team_name, fill = team_name)) +
  geom_density_ridges() + 
  theme_ridges() +
  labs("NFL Attendance by Team") +
  theme(legend.position = "none")

Interpret

For the median weekly attendance, New York had the highest rates compared to other teams. However, New York, San Francisco, and Arizona all had one week with a higher weekly attendance than all of the others.