# excel file
data <- read_excel("../00_data/nfl_attendance.xlsx") %>%
mutate(weekly_attendance = as.numeric(weekly_attendance))
data
## # A tibble: 10,846 × 8
## team team_name year total home away week weekly_attendance
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Arizona Cardinals 2000 893926 387475 506451 1 77434
## 2 Arizona Cardinals 2000 893926 387475 506451 2 66009
## 3 Arizona Cardinals 2000 893926 387475 506451 3 NA
## 4 Arizona Cardinals 2000 893926 387475 506451 4 71801
## 5 Arizona Cardinals 2000 893926 387475 506451 5 66985
## 6 Arizona Cardinals 2000 893926 387475 506451 6 44296
## 7 Arizona Cardinals 2000 893926 387475 506451 7 38293
## 8 Arizona Cardinals 2000 893926 387475 506451 8 62981
## 9 Arizona Cardinals 2000 893926 387475 506451 9 35286
## 10 Arizona Cardinals 2000 893926 387475 506451 10 52244
## # ℹ 10,836 more rows
Which NFL team had the highest attendance rates?
# create ridgeline graph
library(ggplot2)
library(ggridges)
data %>%
ggplot(aes(weekly_attendance, fct_reorder(team, weekly_attendance))) +
geom_boxplot()
ggplot(data,
aes(x = weekly_attendance, y = team_name, fill = team_name)) +
geom_density_ridges() +
theme_ridges() +
labs("NFL Attendance by Team") +
theme(legend.position = "none")
For the median weekly attendance, New York had the highest rates compared to other teams. However, New York, San Francisco, and Arizona all had one week with a higher weekly attendance than all of the others.