Import your data

nhl_rosters <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-01-09/nhl_rosters.csv')
## Rows: 54883 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (10): team_code, position_type, headshot, first_name, last_name, positi...
## dbl   (7): season, player_id, sweater_number, height_in_inches, weight_in_po...
## date  (1): birth_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

 nhl_rosters %>% count(team_code)
## # A tibble: 58 × 2
##    team_code     n
##    <chr>     <int>
##  1 AFM         222
##  2 ANA        1093
##  3 ARI         359
##  4 ATL         410
##  5 BOS        2988
##  6 BRK          26
##  7 BUF        1800
##  8 CAR         852
##  9 CBJ         853
## 10 CGS         184
## # ℹ 48 more rows
teams_code_levels <- c("BOS", "VAN", "CGY")

nhl_rosters_rev <- nhl_rosters %>%
    mutate(team_code = team_code %>% factor(levels = teams_code_levels))

Modify factor order

Make two bar charts here - one before ordering another after

# Summary of average height by team
nhl_rosters_summary <- nhl_rosters %>%
    group_by(team_code) %>%
    summarise(
        height_in_centimeters = mean(height_in_centimeters, na.rm = TRUE) 
    )
nhl_rosters_summary
## # A tibble: 58 × 2
##    team_code height_in_centimeters
##    <chr>                     <dbl>
##  1 AFM                        183.
##  2 ANA                        186.
##  3 ARI                        186.
##  4 ATL                        185.
##  5 BOS                        182.
##  6 BRK                        179.
##  7 BUF                        185.
##  8 CAR                        186.
##  9 CBJ                        186.
## 10 CGS                        181.
## # ℹ 48 more rows
# Plot before reordering
ggplot(nhl_rosters_summary, aes(x = team_code, y = height_in_centimeters)) +
    geom_point(stat = "identity") +
    labs(x = "Team Code", y = "Average Height (cm)") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    ggtitle("Before Ordering")

# Plot after reordering by average height
ggplot(nhl_rosters_summary, aes(x = fct_reorder(team_code, height_in_centimeters), y = height_in_centimeters)) + 
    geom_point(stat = "identity") +
    labs(x = "Team Code", y = "Average Height (cm)") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    ggtitle("After Ordering")

Modify factor levels

Show examples of three functions:

  • fct_recode
nhl_rosters %>%
    mutate(team_code = fct_recode(team_code,
                                  "Boston Bruins" = "BOS",
                                  "Vancouver Canucks" = "VAN",
                                  "Calgary Flames" = "CGY")) %>%
    count(team_code)
## # A tibble: 58 × 2
##    team_code         n
##    <fct>         <int>
##  1 AFM             222
##  2 ANA            1093
##  3 ARI             359
##  4 ATL             410
##  5 Boston Bruins  2988
##  6 BRK              26
##  7 BUF            1800
##  8 CAR             852
##  9 CBJ             853
## 10 CGS             184
## # ℹ 48 more rows
  • fct_collapse
nhl_rosters %>%
    mutate(team_code = fct_collapse(team_code,
                                    East = c("BOS", "NYR", "PHI"),
                                    West = c("VAN", "CGY", "EDM"),
                                    Central = c("CHI", "STL", "NSH"))) %>%
    count(team_code)
## # A tibble: 52 × 2
##    team_code     n
##    <fct>     <int>
##  1 AFM         222
##  2 ANA        1093
##  3 ARI         359
##  4 ATL         410
##  5 East       7855
##  6 BRK          26
##  7 BUF        1800
##  8 CAR         852
##  9 CBJ         853
## 10 CGS         184
## # ℹ 42 more rows
  • fct_lump
nhl_rosters %>%
    mutate(team_code = fct_lump(team_code, n = 5)) %>%  
    count(team_code)
## # A tibble: 6 × 2
##   team_code     n
##   <fct>     <int>
## 1 BOS        2988
## 2 DET        2883
## 3 MTL        3009
## 4 NYR        2943
## 5 TOR        2944
## 6 Other     40116

Chapter 16

No need to do anything here.