nhl_rosters <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-01-09/nhl_rosters.csv')
## Rows: 54883 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): team_code, position_type, headshot, first_name, last_name, positi...
## dbl (7): season, player_id, sweater_number, height_in_inches, weight_in_po...
## date (1): birth_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# using set seed this time so we get same players...
set.seed(121)
nhl_filtered <- nhl_rosters %>%
select(team_code, last_name, position_type, weight_in_pounds, height_in_centimeters, birth_country)%>%
sample_n(500)
nhl_filtered
## # A tibble: 500 × 6
## team_code last_name position_type weight_in_pounds height_in_centimeters
## <chr> <chr> <chr> <dbl> <dbl>
## 1 EDM Chychrun defensemen 215 193
## 2 MTL Hodge goalies 150 168
## 3 DAL Harvey forwards 210 183
## 4 TBL Leach defensemen 220 196
## 5 MIN Clutterbuck forwards 212 183
## 6 DET Giacomin goalies 180 180
## 7 PHI Simmonds forwards 184 188
## 8 DAL Nieuwendyk forwards 209 188
## 9 EDM Semenko forwards 216 188
## 10 MNS Rombough forwards 215 191
## # ℹ 490 more rows
## # ℹ 1 more variable: birth_country <chr>
NHL Player Weight by country unordered
nhl_weights <- nhl_filtered %>%
group_by(birth_country) %>%
summarise(
weight_in_pounds = mean(weight_in_pounds, na.rm = TRUE),
n = n()
)
ggplot(nhl_weights, aes(x = birth_country, y = weight_in_pounds)) +
geom_col()+
labs(
x = "Birth Country",
y = "Avg Weight (lbs)",
title = "Avg Weight by Birth Country",
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(nhl_weights, aes(x = fct_reorder(birth_country, weight_in_pounds), y = weight_in_pounds)) +
geom_col() +
labs(
x = "Birth Country",
y = "Avg Weight (lbs)",
title = "Avg Weight by Birth Country",
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
NHL Player Height by country unordered
nhl_heights <- nhl_filtered %>%
group_by(birth_country) %>%
summarise(
height_in_centimeters = mean(height_in_centimeters, na.rm = TRUE),
n = n()
)
ggplot(nhl_heights, aes(x = birth_country, y = height_in_centimeters)) +
geom_col()
ggplot(nhl_filtered, aes(birth_country)) +
geom_bar()
fct_recode:
nhl_filtered %>% distinct(birth_country)
## # A tibble: 14 × 1
## birth_country
## <chr>
## 1 CAN
## 2 USA
## 3 RUS
## 4 GBR
## 5 NLD
## 6 UKR
## 7 SVK
## 8 FIN
## 9 CHE
## 10 CZE
## 11 SWE
## 12 DEU
## 13 FRA
## 14 LVA
nhl_filtered %>%
mutate(birth_country_rev = fct_recode(birth_country, "cold place" = "CAN")) %>%
select(birth_country, birth_country_rev) %>%
filter(birth_country == "CAN")
## # A tibble: 322 × 2
## birth_country birth_country_rev
## <chr> <fct>
## 1 CAN cold place
## 2 CAN cold place
## 3 CAN cold place
## 4 CAN cold place
## 5 CAN cold place
## 6 CAN cold place
## 7 CAN cold place
## 8 CAN cold place
## 9 CAN cold place
## 10 CAN cold place
## # ℹ 312 more rows
nhl_filtered %>%
mutate(birth_country_col = fct_collapse(birth_country, "Scandinavian country" = c("FIN","SWE"))) %>%
select(birth_country, birth_country_col) %>%
filter(birth_country == "FIN" | birth_country == "SWE")
## # A tibble: 37 × 2
## birth_country birth_country_col
## <chr> <fct>
## 1 FIN Scandinavian country
## 2 SWE Scandinavian country
## 3 FIN Scandinavian country
## 4 SWE Scandinavian country
## 5 FIN Scandinavian country
## 6 SWE Scandinavian country
## 7 SWE Scandinavian country
## 8 FIN Scandinavian country
## 9 FIN Scandinavian country
## 10 FIN Scandinavian country
## # ℹ 27 more rows
nhl_filtered %>% count(birth_country)
## # A tibble: 14 × 2
## birth_country n
## <chr> <int>
## 1 CAN 322
## 2 CHE 1
## 3 CZE 13
## 4 DEU 4
## 5 FIN 13
## 6 FRA 1
## 7 GBR 5
## 8 LVA 1
## 9 NLD 1
## 10 RUS 21
## 11 SVK 8
## 12 SWE 24
## 13 UKR 2
## 14 USA 84
nhl_filtered %>% mutate(country_lump = fct_lump(birth_country)) %>% distinct(country_lump)
## # A tibble: 2 × 1
## country_lump
## <fct>
## 1 CAN
## 2 Other