Import your data

data <- read_excel("../00_data/my.Data.xlsx")
## New names:
## • `` -> `...1`
categorical_col <- "genre"
numeric_col <- "differential"  

cat("Categorical column:", categorical_col, "\n")
## Categorical column: genre
cat("Numeric column:", numeric_col, "\n")
## Numeric column: differential

Chapter 15

Create a factor

data <- data %>%
  mutate(!!categorical_col := as_factor(.data[[categorical_col]]))

Modify factor order

Make two bar charts here - one before ordering another after

unordered factor levels

avg_by_category <- data %>%
  group_by(across(all_of(categorical_col))) %>%
  summarise(avg_value = mean(.data[[numeric_col]], na.rm = TRUE))

avg_by_category %>%
  ggplot(aes(x = avg_value, y = .data[[categorical_col]])) +
  geom_point() +
  labs(
    title = "Unordered Factor Levels",
    x = paste("Average", numeric_col),
    y = categorical_col
  )

Ordered factor levels

avg_by_category <- avg_by_category %>%
  mutate(ordered_category = fct_reorder(.data[[categorical_col]], avg_value))

avg_by_category %>%
  ggplot(aes(x = avg_value, y = ordered_category)) +
  geom_point() +
  labs(
    title = "Ordered Factor Levels",
    x = paste("Average", numeric_col),
    y = categorical_col
  ) +
  theme_minimal()

Modify factor levels

Show examples of three functions:

  • fct_recode
data <- data %>%
  mutate(
    genre_recode = fct_recode(
      genre,
      "Rock" = "Rock n' Roll/Rhythm & Blues",
      "Jazz" = "Big Band/Jazz"
    )
  )

data %>%
  select(genre, genre_recode) %>%
  filter(genre %in% c("Rock n' Roll/Rhythm & Blues", "Big Band/Jazz"))
## # A tibble: 24 × 2
##    genre                       genre_recode
##    <fct>                       <fct>       
##  1 Big Band/Jazz               Jazz        
##  2 Rock n' Roll/Rhythm & Blues Rock        
##  3 Rock n' Roll/Rhythm & Blues Rock        
##  4 Big Band/Jazz               Jazz        
##  5 Rock n' Roll/Rhythm & Blues Rock        
##  6 Big Band/Jazz               Jazz        
##  7 Big Band/Jazz               Jazz        
##  8 Big Band/Jazz               Jazz        
##  9 Big Band/Jazz               Jazz        
## 10 Big Band/Jazz               Jazz        
## # ℹ 14 more rows
  • fct_collapse
data <- data %>%
  mutate(
    genre_collapse = fct_collapse(
      genre,
      "Popular Genres" = c("Rock n' Roll/Rhythm & Blues", "Big Band/Jazz"),
      "Other" = c("Folk", "Hip-Hop")
    )
  )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `genre_collapse = fct_collapse(...)`.
## Caused by warning:
## ! Unknown levels in `f`: Folk, Hip-Hop
data %>%
  select(genre, genre_collapse) %>%
  filter(genre %in% c("Rock n' Roll/Rhythm & Blues", "Big Band/Jazz", "Folk", "Hip-Hop"))
## # A tibble: 24 × 2
##    genre                       genre_collapse
##    <fct>                       <fct>         
##  1 Big Band/Jazz               Popular Genres
##  2 Rock n' Roll/Rhythm & Blues Popular Genres
##  3 Rock n' Roll/Rhythm & Blues Popular Genres
##  4 Big Band/Jazz               Popular Genres
##  5 Rock n' Roll/Rhythm & Blues Popular Genres
##  6 Big Band/Jazz               Popular Genres
##  7 Big Band/Jazz               Popular Genres
##  8 Big Band/Jazz               Popular Genres
##  9 Big Band/Jazz               Popular Genres
## 10 Big Band/Jazz               Popular Genres
## # ℹ 14 more rows
  • fct_lump
data <- data %>%
  mutate(
    genre_lump = fct_lump(genre, n = 3)
  )

data %>%
  count(genre_lump) %>%
  arrange(desc(n))
## # A tibble: 4 × 2
##   genre_lump                            n
##   <fct>                             <int>
## 1 Other                               368
## 2 NA                                  164
## 3 Punk/Post-Punk/New Wave/Power Pop    84
## 4 Soul/Gospel/R&B                      75

Chapter 16

No need to do anything here.