data <- read_excel("../00_data/Data.xlsx")
## New names:
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
data
## # A tibble: 10,846 × 14
## team `Team City` Population team_name year total home away week
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 1
## 2 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 2
## 3 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 3
## 4 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 4
## 5 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 5
## 6 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 6
## 7 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 7
## 8 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 8
## 9 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 9
## 10 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 10
## # ℹ 10,836 more rows
## # ℹ 5 more variables: weekly_attendance <chr>, ...11 <lgl>, ...12 <chr>,
## # ...13 <lgl>, ...14 <dbl>
data %>% count(team_name)
## # A tibble: 32 × 2
## team_name n
## <chr> <int>
## 1 49ers 340
## 2 Bears 340
## 3 Bengals 340
## 4 Bills 340
## 5 Broncos 340
## 6 Browns 340
## 7 Buccaneers 340
## 8 Cardinals 340
## 9 Chargers 340
## 10 Chiefs 340
## # ℹ 22 more rows
x1 <- c("Cardinals","Packers", "Patriots", "Dolphins", "Jets", "Giants", "Texans", "Vikings", "Cowboys", "Eagles", "Bears", "Lions", "Saints", "Panthers")
team_name_levels <- c("Cardinals","Packers", "Patriots", "Dolphins", "Jets", "Giants", "Texans", "Vikings", "Cowboys", "Eagles", "Bears", "Lions", "Saints", "Panthers")
team_name_rev <- data %>%
mutate(team_name = team_name %>% factor(levels = team_name_levels))
team_name_rev
## # A tibble: 10,846 × 14
## team `Team City` Population team_name year total home away week
## <chr> <chr> <dbl> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 1
## 2 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 2
## 3 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 3
## 4 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 4
## 5 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 5
## 6 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 6
## 7 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 7
## 8 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 8
## 9 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 9
## 10 Arizona Phoenix 1608139 Cardinals 2000 893926 387475 506451 10
## # ℹ 10,836 more rows
## # ℹ 5 more variables: weekly_attendance <chr>, ...11 <lgl>, ...12 <chr>,
## # ...13 <lgl>, ...14 <dbl>
y1 <- factor(x1, levels = team_name_levels)
y1
## [1] Cardinals Packers Patriots Dolphins Jets Giants Texans
## [8] Vikings Cowboys Eagles Bears Lions Saints Panthers
## 14 Levels: Cardinals Packers Patriots Dolphins Jets Giants Texans ... Panthers
Make two bar charts here - one before ordering another after
# Calculate average home attendance per team_name
home_attendance_per_team_name <- data %>%
group_by(team_name) %>%
summarise(
avg_home_attendance = mean(home, na.rm = TRUE)
)
home_attendance_per_team_name
## # A tibble: 32 × 2
## team_name avg_home_attendance
## <chr> <dbl>
## 1 49ers 550459.
## 2 Bears 497954.
## 3 Bengals 476364.
## 4 Bills 539234.
## 5 Broncos 607991
## 6 Browns 555725.
## 7 Buccaneers 489756.
## 8 Cardinals 452536.
## 9 Chargers 466237.
## 10 Chiefs 598235.
## # ℹ 22 more rows
ggplot(home_attendance_per_team_name, aes(avg_home_attendance, team_name)) +
geom_point()
ggplot(home_attendance_per_team_name, aes(avg_home_attendance, fct_reorder(team_name, avg_home_attendance))) +
geom_point()
Show examples of three functions:
data %>%
mutate(`Team City` = fct_recode(`Team City`,
"phoenix" = "Phoenix",
"atlanta" = "Atlanta",
"baltimore" = "Baltimore",
"buffalo" = "Buffalo",
"charlotte" = "Charlotte",
"chicago" = "Chicago",
"cincinnati" = "Cincinnati",
"cleveland" = "Cleveland",
"dallas" = "Dallas",
"denver" ="Denver",
"detroit" = "Detroit",
"green bay" = "Green Bay",
"houston" = "Houston",
"indianapolis" = "Indianapolis",
"jacksonville" = "Jacksonville",
"kansas city" = "Kansas City",
"miami" = "Miami",
"minneapolis" = "Minneapolis",
"boston" = "Boston",
"new orleans" = "New Orleans",
"new york" = "New York",
"oakland" = "Oakland",
"philadelphia" = "Philadelphia",
"pittsburgh" = "Pittsburgh",
"san diego" = "San Diego",
"san francisco" = "San Francisco",
"seattle" = "Seattle",
"saint louis" = "St. Louis",
"tampa" = "Tampa",
"nashville" = "Nashville",
"dc" = "Washington DC"
)) %>%
count(`Team City`)
## # A tibble: 32 × 2
## `Team City` n
## <fct> <int>
## 1 atlanta 340
## 2 baltimore 340
## 3 boston 340
## 4 buffalo 340
## 5 charlotte 340
## 6 chicago 340
## 7 cincinnati 340
## 8 cleveland 340
## 9 dallas 340
## 10 denver 340
## # ℹ 22 more rows
data %>%
mutate(team = fct_collapse(team,
Arizona = "Arizona",
Baltimore = "Baltimore",
Other = c("Miami", "Atlanta", "Denver", "Houston"))) %>%
count(team)
## # A tibble: 29 × 2
## team n
## <fct> <int>
## 1 Arizona 340
## 2 Other 1326
## 3 Baltimore 340
## 4 Buffalo 340
## 5 Carolina 340
## 6 Chicago 340
## 7 Cincinnati 340
## 8 Cleveland 340
## 9 Dallas 340
## 10 Detroit 340
## # ℹ 19 more rows
data %>%
mutate(team = fct_lump(team, prop = 0.03)) %>%
count(team)
## # A tibble: 29 × 2
## team n
## <fct> <int>
## 1 Arizona 340
## 2 Atlanta 340
## 3 Baltimore 340
## 4 Buffalo 340
## 5 Carolina 340
## 6 Chicago 340
## 7 Cincinnati 340
## 8 Cleveland 340
## 9 Dallas 340
## 10 Denver 340
## # ℹ 19 more rows
data %>%
mutate(team = fct_lump(team, prop = 0.05)) %>%
count(team)
## # A tibble: 2 × 2
## team n
## <fct> <int>
## 1 New York 680
## 2 Other 10166
No need to do anything here.