nhl_rosters <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-01-09/nhl_rosters.csv')
## Rows: 54883 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): team_code, position_type, headshot, first_name, last_name, positi...
## dbl (7): season, player_id, sweater_number, height_in_inches, weight_in_po...
## date (1): birth_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nhl_rosters %>% count(team_code)
## # A tibble: 58 × 2
## team_code n
## <chr> <int>
## 1 AFM 222
## 2 ANA 1093
## 3 ARI 359
## 4 ATL 410
## 5 BOS 2988
## 6 BRK 26
## 7 BUF 1800
## 8 CAR 852
## 9 CBJ 853
## 10 CGS 184
## # ℹ 48 more rows
teams_code_levels <- c("BOS", "VAN", "CGY")
nhl_rosters_rev <- nhl_rosters %>%
mutate(team_code = team_code %>% factor(levels = teams_code_levels))
Make two bar charts here - one before ordering another after
# Summary of average height by team
nhl_rosters_summary <- nhl_rosters %>%
group_by(team_code) %>%
summarise(
height_in_centimeters = mean(height_in_centimeters, na.rm = TRUE)
)
nhl_rosters_summary
## # A tibble: 58 × 2
## team_code height_in_centimeters
## <chr> <dbl>
## 1 AFM 183.
## 2 ANA 186.
## 3 ARI 186.
## 4 ATL 185.
## 5 BOS 182.
## 6 BRK 179.
## 7 BUF 185.
## 8 CAR 186.
## 9 CBJ 186.
## 10 CGS 181.
## # ℹ 48 more rows
# Plot before reordering
ggplot(nhl_rosters_summary, aes(x = team_code, y = height_in_centimeters)) +
geom_point(stat = "identity") +
labs(x = "Team Code", y = "Average Height (cm)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Before Ordering")
# Plot after reordering by average height
ggplot(nhl_rosters_summary, aes(x = fct_reorder(team_code, height_in_centimeters), y = height_in_centimeters)) +
geom_point(stat = "identity") +
labs(x = "Team Code", y = "Average Height (cm)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("After Ordering")
Show examples of three functions:
nhl_rosters %>%
mutate(team_code = fct_recode(team_code,
"Boston Bruins" = "BOS",
"Vancouver Canucks" = "VAN",
"Calgary Flames" = "CGY")) %>%
count(team_code)
## # A tibble: 58 × 2
## team_code n
## <fct> <int>
## 1 AFM 222
## 2 ANA 1093
## 3 ARI 359
## 4 ATL 410
## 5 Boston Bruins 2988
## 6 BRK 26
## 7 BUF 1800
## 8 CAR 852
## 9 CBJ 853
## 10 CGS 184
## # ℹ 48 more rows
nhl_rosters %>%
mutate(team_code = fct_collapse(team_code,
East = c("BOS", "NYR", "PHI"),
West = c("VAN", "CGY", "EDM"),
Central = c("CHI", "STL", "NSH"))) %>%
count(team_code)
## # A tibble: 52 × 2
## team_code n
## <fct> <int>
## 1 AFM 222
## 2 ANA 1093
## 3 ARI 359
## 4 ATL 410
## 5 East 7855
## 6 BRK 26
## 7 BUF 1800
## 8 CAR 852
## 9 CBJ 853
## 10 CGS 184
## # ℹ 42 more rows
nhl_rosters %>%
mutate(team_code = fct_lump(team_code, n = 5)) %>%
count(team_code)
## # A tibble: 6 × 2
## team_code n
## <fct> <int>
## 1 BOS 2988
## 2 DET 2883
## 3 MTL 3009
## 4 NYR 2943
## 5 TOR 2944
## 6 Other 40116
No need to do anything here.