canada_births_1991_2022 <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-01-09/canada_births_1991_2022.csv')
## Rows: 384 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): year, month, births
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nhl_player_births <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-01-09/nhl_player_births.csv')
## Rows: 8474 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): first_name, last_name, birth_city, birth_country, birth_state_prov...
## dbl (3): player_id, birth_year, birth_month
## date (1): birth_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nhl_rosters <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-01-09/nhl_rosters.csv')
## Rows: 54883 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): team_code, position_type, headshot, first_name, last_name, positi...
## dbl (7): season, player_id, sweater_number, height_in_inches, weight_in_po...
## date (1): birth_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nhl_teams <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-01-09/nhl_teams.csv')
## Rows: 59 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): team_code, full_name
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Count number of players by province
nhl_player_births %>%
filter(birth_country == "CAN") %>%
count(birth_state_province)
## # A tibble: 12 × 2
## birth_state_province n
## <chr> <int>
## 1 Alberta 645
## 2 British Columbia 408
## 3 Manitoba 408
## 4 New Brunswick 53
## 5 Newfoundland and Labrador 31
## 6 Northwest Territories 4
## 7 Nova Scotia 79
## 8 Ontario 2407
## 9 Prince Edward Island 35
## 10 Quebec 865
## 11 Saskatchewan 530
## 12 Yukon Territory 3
prov_levels <- c("Ontario", "Quebec", "Alberta", "British Columbia",
"Saskatchewan", "Manitoba", "Nova Scotia", "New Brunswick",
"Newfoundland and Labrador", "Prince Edward Island")
data_rev <- nhl_player_births %>%
filter(birth_country == "CAN") %>%
mutate(birth_state_province = birth_state_province %>%
factor(levels = prov_levels))
Make two bar charts here - one before ordering another after
# Summarize data
data_summary <- nhl_player_births %>%
filter(birth_country == "CAN") %>%
group_by(birth_state_province) %>%
summarise(
players = n()
)
data_summary
## # A tibble: 12 × 2
## birth_state_province players
## <chr> <int>
## 1 Alberta 645
## 2 British Columbia 408
## 3 Manitoba 408
## 4 New Brunswick 53
## 5 Newfoundland and Labrador 31
## 6 Northwest Territories 4
## 7 Nova Scotia 79
## 8 Ontario 2407
## 9 Prince Edward Island 35
## 10 Quebec 865
## 11 Saskatchewan 530
## 12 Yukon Territory 3
# First plot
ggplot(data_summary, aes(players, birth_state_province)) +
geom_point()
# Second plot
ggplot(data_summary, aes(players, fct_reorder(birth_state_province, players))) +
geom_point()
Show examples of three functions:
nhl_player_births %>%
filter(birth_country == "CAN") %>%
mutate(birth_state_province = fct_recode(birth_state_province,
"BC" = "British Columbia",
"AB" = "Alberta",
"SK" = "Saskatchewan",
"MB" = "Manitoba",
"ON" = "Ontario",
"QC" = "Quebec",
"NB" = "New Brunswick",
"NS" = "Nova Scotia",
"PEI" = "Prince Edward Island",
"NL" = "Newfoundland and Labrador")) %>%
count(birth_state_province)
## # A tibble: 12 × 2
## birth_state_province n
## <fct> <int>
## 1 AB 645
## 2 BC 408
## 3 MB 408
## 4 NB 53
## 5 NL 31
## 6 Northwest Territories 4
## 7 NS 79
## 8 ON 2407
## 9 PEI 35
## 10 QC 865
## 11 SK 530
## 12 Yukon Territory 3
nhl_player_births %>%
filter(birth_country == "CAN") %>%
mutate(birth_state_province = fct_collapse(birth_state_province,
West = c("British Columbia", "Alberta", "Saskatchewan", "Manitoba"),
East = c("Ontario", "Quebec", "New Brunswick", "Nova Scotia",
"Prince Edward Island", "Newfoundland and Labrador"))) %>%
count(birth_state_province)
## # A tibble: 4 × 2
## birth_state_province n
## <fct> <int>
## 1 West 1991
## 2 East 3470
## 3 Northwest Territories 4
## 4 Yukon Territory 3
nhl_player_births %>%
filter(birth_country == "CAN") %>%
mutate(birth_state_province = fct_lump(birth_state_province, n = 3)) %>%
count(birth_state_province)
## # A tibble: 4 × 2
## birth_state_province n
## <fct> <int>
## 1 Alberta 645
## 2 Ontario 2407
## 3 Quebec 865
## 4 Other 1551