data <- read_excel("../00_data/my.Data.xlsx")
## New names:
## • `` -> `...1`
categorical_col <- "genre"
numeric_col <- "differential"
cat("Categorical column:", categorical_col, "\n")
## Categorical column: genre
cat("Numeric column:", numeric_col, "\n")
## Numeric column: differential
data <- data %>%
mutate(!!categorical_col := as_factor(.data[[categorical_col]]))
Make two bar charts here - one before ordering another after
unordered factor levels
avg_by_category <- data %>%
group_by(across(all_of(categorical_col))) %>%
summarise(avg_value = mean(.data[[numeric_col]], na.rm = TRUE))
avg_by_category %>%
ggplot(aes(x = avg_value, y = .data[[categorical_col]])) +
geom_point() +
labs(
title = "Unordered Factor Levels",
x = paste("Average", numeric_col),
y = categorical_col
)
Ordered factor levels
avg_by_category <- avg_by_category %>%
mutate(ordered_category = fct_reorder(.data[[categorical_col]], avg_value))
avg_by_category %>%
ggplot(aes(x = avg_value, y = ordered_category)) +
geom_point() +
labs(
title = "Ordered Factor Levels",
x = paste("Average", numeric_col),
y = categorical_col
) +
theme_minimal()
Show examples of three functions:
data <- data %>%
mutate(
genre_recode = fct_recode(
genre,
"Rock" = "Rock n' Roll/Rhythm & Blues",
"Jazz" = "Big Band/Jazz"
)
)
data %>%
select(genre, genre_recode) %>%
filter(genre %in% c("Rock n' Roll/Rhythm & Blues", "Big Band/Jazz"))
## # A tibble: 24 × 2
## genre genre_recode
## <fct> <fct>
## 1 Big Band/Jazz Jazz
## 2 Rock n' Roll/Rhythm & Blues Rock
## 3 Rock n' Roll/Rhythm & Blues Rock
## 4 Big Band/Jazz Jazz
## 5 Rock n' Roll/Rhythm & Blues Rock
## 6 Big Band/Jazz Jazz
## 7 Big Band/Jazz Jazz
## 8 Big Band/Jazz Jazz
## 9 Big Band/Jazz Jazz
## 10 Big Band/Jazz Jazz
## # ℹ 14 more rows
data <- data %>%
mutate(
genre_collapse = fct_collapse(
genre,
"Popular Genres" = c("Rock n' Roll/Rhythm & Blues", "Big Band/Jazz"),
"Other" = c("Folk", "Hip-Hop")
)
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `genre_collapse = fct_collapse(...)`.
## Caused by warning:
## ! Unknown levels in `f`: Folk, Hip-Hop
data %>%
select(genre, genre_collapse) %>%
filter(genre %in% c("Rock n' Roll/Rhythm & Blues", "Big Band/Jazz", "Folk", "Hip-Hop"))
## # A tibble: 24 × 2
## genre genre_collapse
## <fct> <fct>
## 1 Big Band/Jazz Popular Genres
## 2 Rock n' Roll/Rhythm & Blues Popular Genres
## 3 Rock n' Roll/Rhythm & Blues Popular Genres
## 4 Big Band/Jazz Popular Genres
## 5 Rock n' Roll/Rhythm & Blues Popular Genres
## 6 Big Band/Jazz Popular Genres
## 7 Big Band/Jazz Popular Genres
## 8 Big Band/Jazz Popular Genres
## 9 Big Band/Jazz Popular Genres
## 10 Big Band/Jazz Popular Genres
## # ℹ 14 more rows
data <- data %>%
mutate(
genre_lump = fct_lump(genre, n = 3)
)
data %>%
count(genre_lump) %>%
arrange(desc(n))
## # A tibble: 4 × 2
## genre_lump n
## <fct> <int>
## 1 Other 368
## 2 NA 164
## 3 Punk/Post-Punk/New Wave/Power Pop 84
## 4 Soul/Gospel/R&B 75
No need to do anything here.