taylor_album_songs <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-10-17/taylor_album_songs.csv')
## Rows: 194 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): album_name, track_name, artist, featuring, key_name, mode_name, k...
## dbl (14): track_number, danceability, energy, key, loudness, mode, speechin...
## lgl (4): ep, bonus_track, explicit, lyrics
## date (4): album_release, promotional_release, single_release, track_release
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
taylor_album_songs %>%
count(album_name)
## # A tibble: 10 × 2
## album_name n
## <chr> <int>
## 1 1989 16
## 2 Fearless (Taylor's Version) 26
## 3 Lover 18
## 4 Midnights 23
## 5 Red (Taylor's Version) 30
## 6 Speak Now 17
## 7 Taylor Swift 15
## 8 evermore 17
## 9 folklore 17
## 10 reputation 15
x1 <- c("1989", "Fearless (Taylor's Version", "Lover", "Midnights", "Red (Taylor's Version", "Speak Now", "Taylor Swift", "evermore", "folklore", "reputation")
album_name_levels <- c("1989", "Fearless (Taylor's Version", "Lover", "Midnights", "Red (Taylor's Version", "Speak Now", "Taylor Swift", "evermore", "folklore", "reputation")
y1 <- factor(x1, levels = album_name_levels)
y1
## [1] 1989 Fearless (Taylor's Version
## [3] Lover Midnights
## [5] Red (Taylor's Version Speak Now
## [7] Taylor Swift evermore
## [9] folklore reputation
## 10 Levels: 1989 Fearless (Taylor's Version Lover ... reputation
sort(y1)
## [1] 1989 Fearless (Taylor's Version
## [3] Lover Midnights
## [5] Red (Taylor's Version Speak Now
## [7] Taylor Swift evermore
## [9] folklore reputation
## 10 Levels: 1989 Fearless (Taylor's Version Lover ... reputation
Make two bar charts here - one before ordering another after
duration_ms_by_album_name <- taylor_album_songs %>%
group_by(album_name) %>%
summarise(
avg_duration_ms = mean(duration_ms, na.rm = TRUE)
)
duration_ms_by_album_name
## # A tibble: 10 × 2
## album_name avg_duration_ms
## <chr> <dbl>
## 1 1989 229438.
## 2 Fearless (Taylor's Version) 245866.
## 3 Lover 206188.
## 4 Midnights 208464.
## 5 Red (Taylor's Version) 261328
## 6 Speak Now 280583.
## 7 Taylor Swift 213972.
## 8 evermore 243817.
## 9 folklore 236965.
## 10 reputation 223020.
# Plot
duration_ms_by_album_name %>%
ggplot(aes(x = avg_duration_ms, y = album_name)) + geom_point()
duration_ms_by_album_name %>%
ggplot(aes(x = avg_duration_ms, y = fct_reorder(.f = album_name, .x = avg_duration_ms))) + geom_point() +
# Labeling
labs(y = NULL, x = "Mean Duration Ms")
Show examples of three functions:
taylor_album_songs %>%
mutate(album_name = fct_recode(album_name, "Red" = "Red (Taylor's Version)")) %>%
count(album_name)
## # A tibble: 10 × 2
## album_name n
## <fct> <int>
## 1 1989 16
## 2 evermore 17
## 3 Fearless (Taylor's Version) 26
## 4 folklore 17
## 5 Lover 18
## 6 Midnights 23
## 7 Red 30
## 8 reputation 15
## 9 Speak Now 17
## 10 Taylor Swift 15
taylor_album_songs %>%
mutate(album_name = fct_collapse(album_name,
after2020 = c("Midnights", "evermore", "folklore", "Red (Taylor's Version)", "Fearless (Taylor's Version)"),
before2020 = c("1989", "Lover", "reputation", "Speak Now", "Taylor Swift")
)) %>%
count(album_name)
## # A tibble: 2 × 2
## album_name n
## <fct> <int>
## 1 before2020 81
## 2 after2020 113
taylor_album_songs %>%
mutate(album_name = fct_lump(album_name, n = 5)) %>%
count(album_name, sort = TRUE)
## # A tibble: 8 × 2
## album_name n
## <fct> <int>
## 1 Other 46
## 2 Red (Taylor's Version) 30
## 3 Fearless (Taylor's Version) 26
## 4 Midnights 23
## 5 Lover 18
## 6 evermore 17
## 7 folklore 17
## 8 Speak Now 17
No need to do anything here.