Load libraries
library(spotifyr)
library(magrittr)
library(geniusr)
library(dplyr)
library(tidyverse)
library(tidytext)
library(textdata)
library(stringr)
Access token
id <- “”
secret <- “”
Sys.setenv(SPOTIFY_CLIENT_ID = id)
Sys.setenv(SPOTIFY_CLIENT_SECRET = secret)
access_token <- get_spotify_access_token()
Uses playlist ID to identify playlists
year_id <- data.frame(id = c("37i9dQZF1DXe2bobNYDtW8", "37i9dQZF1DWVRSukIED0e9", "37i9dQZF1DX7Jl5KP2eZaS", "5GhQiRkGuqzpWZSE7OU4Se", "37i9dQZF1DX18jTM2l2fJY"), year = 2018:2022)
top_songs <- data.frame()
for(j in 1:5) {
playlist_id <- year_id[j, 1]
year <- year_id[j, 2]
tracks <- get_playlist_tracks(
playlist_id = playlist_id,
fields = c("track.artists", "track.duration_ms", "track.explicit", "track.id", "track.name", "track.popularity", "track.album.name", "track.album.release.date")
) %>%
mutate(year = year)
top_songs <- rbind(top_songs, tracks)
}
Subset to take top 50 songs by year
top_songs <- top_songs %>%
group_by(year) %>%
mutate(rown = row_number()) %>%
ungroup()
top_songs <- top_songs[which(top_songs$rown <= 50), ]
top_songs
## # A tibble: 250 × 9
## track.artists track.dur…¹ track…² track…³ track…⁴ track…⁵ track…⁶ year rown
## <list> <int> <lgl> <chr> <chr> <int> <chr> <int> <int>
## 1 <df [1 × 6]> 198973 TRUE 6DCZcS… God's … 84 Scorpi… 2018 1
## 2 <df [2 × 6]> 218146 TRUE 0e7ipj… rockst… 84 beerbo… 2018 2
## 3 <df [2 × 6]> 200185 FALSE 0u2P5u… lovely… 89 lovely… 2018 3
## 4 <df [1 × 6]> 228373 FALSE 09mEdo… Call O… 89 My Dea… 2018 4
## 5 <df [2 × 6]> 214846 FALSE 7ef4Dl… One Ki… 87 One Ki… 2018 5
## 6 <df [1 × 6]> 239835 TRUE 285pBl… Lucid … 85 Goodby… 2018 6
## 7 <df [3 × 6]> 184732 FALSE 09ISts… The Mi… 79 The Mi… 2018 7
## 8 <df [2 × 6]> 202620 TRUE 08bNPG… FRIENDS 79 FRIENDS 2018 8
## 9 <df [3 × 6]> 253390 TRUE 58q2HK… I Like… 78 Invasi… 2018 9
## 10 <df [1 × 6]> 184153 FALSE 6IPwKM… we fel… 85 we fel… 2018 10
## # … with 240 more rows, and abbreviated variable names ¹track.duration_ms,
## # ²track.explicit, ³track.id, ⁴track.name, ⁵track.popularity,
## # ⁶track.album.name
Unpack artist names
artists <- top_songs$track.artists
artists2 <- matrix(0, 0, nrow = 250, ncol = 1)
for(j in 1:250){
df <- artists[[j]]
name <- df$name
artists2[j] <- name
}
top_songs <- cbind(top_songs, artists2)
songinfo <- select(top_songs, c(track.name, artists2, year)) %>%
rename(track.artists = artists2)
songinfo[1:50, ]
## track.name track.artists year
## 1 God's Plan Drake 2018
## 2 rockstar (feat. 21 Savage) Post Malone 2018
## 3 lovely (with Khalid) Billie Eilish 2018
## 4 Call Out My Name The Weeknd 2018
## 5 One Kiss (with Dua Lipa) Calvin Harris 2018
## 6 Lucid Dreams Juice WRLD 2018
## 7 The Middle Zedd 2018
## 8 FRIENDS Marshmello 2018
## 9 I Like It Cardi B 2018
## 10 we fell in love in october girl in red 2018
## 11 no tears left to cry Ariana Grande 2018
## 12 All The Stars (with SZA) Kendrick Lamar 2018
## 13 SAD! XXXTENTACION 2018
## 14 Ric Flair Drip (with Metro Boomin) Offset 2018
## 15 Yes Indeed Lil Baby 2018
## 16 Happier Marshmello 2018
## 17 SICKO MODE Travis Scott 2018
## 18 LOVE. FEAT. ZACARI. Kendrick Lamar 2018
## 19 Moonlight XXXTENTACION 2018
## 20 2002 Anne-Marie 2018
## 21 Natural Imagine Dragons 2018
## 22 Better Now Post Malone 2018
## 23 Silence Marshmello 2018
## 24 I Like Me Better Lauv 2018
## 25 Let You Down NF 2018
## 26 Wolves Selena Gomez 2018
## 27 Be Alright Dean Lewis 2018
## 28 changes XXXTENTACION 2018
## 29 IDGAF Dua Lipa 2018
## 30 God is a woman Ariana Grande 2018
## 31 Delicate Taylor Swift 2018
## 32 Nonstop Drake 2018
## 33 In My Mind Dynoro 2018
## 34 Him & I (with Halsey) G-Eazy 2018
## 35 Mo Bamba Sheck Wes 2018
## 36 Stir Fry Migos 2018
## 37 Te Boté - Remix Nio Garcia 2018
## 38 Psycho (feat. Ty Dolla $ign) Post Malone 2018
## 39 1, 2, 3 (feat. Jason Derulo & De La Ghetto) Sofía Reyes 2018
## 40 Shotgun George Ezra 2018
## 41 Nice For What Drake 2018
## 42 Let Me Go (with Alesso, Florida Georgia Line & watt) Hailee Steinfeld 2018
## 43 Walk It Talk It Migos 2018
## 44 Taki Taki (with Selena Gomez, Ozuna & Cardi B) DJ Snake 2018
## 45 Déjala Que Vuelva (feat. Manuel Turizo) Piso 21 2018
## 46 In My Feelings Drake 2018
## 47 Mine Bazzi 2018
## 48 Body Loud Luxury 2018
## 49 It's Not Living (If It's Not With You) The 1975 2018
## 50 In My Blood Shawn Mendes 2018
Remove parentheses in song titles and separate into data frames by year
songinfo$track.name <- gsub("\\s*\\([^\\)]+\\)","",as.character(songinfo$track.name))
songinfo2018 <- songinfo[which(songinfo$year == 2018), ]
songinfo2019 <- songinfo[which(songinfo$year == 2019), ]
songinfo2020 <- songinfo[which(songinfo$year == 2020), ]
songinfo2021 <- songinfo[which(songinfo$year == 2021), ]
songinfo2022 <- songinfo[which(songinfo$year == 2022), ]
2018
songinfo2018[19, 1] <- "LOVE."
songinfo2018[15, 2] <- "Lil Baby and Drake"
songinfo2018[6, 2] <- "Juice wrld"
songinfo2018[13, 1] <- "happier"
songinfo2018[17, 2] <- "Offset and Metro Boomin"
songinfo2018[21, 2] <- "Anne marie"
songinfo2018[32, 2] <- "Dynoro and gigi dagostino"
songinfo2018[33, 2] <- "G Eazy and Halsey"
songinfo2018[33, 1] <- "Him and I"
songinfo2018[36, 1] <- "Te bote"
songinfo2018[39, 1] <- "1 2 3"
songinfo2018[39, 2] <- "Sofia Reyes"
songinfo2018[41, 2] <- "Hailee Steinfeld and Alesso"
songinfo2018[48, 1] <- "Dejala Que Vuelva"
2019
songinfo2019[3, 2] <- "Shawn Mendes and Camila Cabello"
songinfo2019[3, 1] <- "Senorita"
songinfo2019[5, 1] <- "Sunflower"
songinfo2019[44, 1] <- "10000 hours"
songinfo2019[44, 2] <- "Dan shay"
2020
songinfo2020[21, 2] <- "Jawsh 685 and Jason Derulo"
songinfo2020[21, 1] <- "Savage love laxed siren beat"
songinfo2020[26, 1] <- "Senorita"
songinfo2020[26, 2] <- "Shawn Mendes and Camila Cabello"
songinfo2020[38, 1] <- "Sunflower"
songinfo2020[39, 1] <- "Hawai"
songinfo2020[42, 1] <- "ritmo bad boys for life"
songinfo2020[42, 2] <- "The black eyed peas and j balvin"
songinfo2020[47, 2] <- "Ariana Grande and Justin Bieber"
2021
songinfo2021[2, 1] <- "Montero Call Me by Your Name"
songinfo2021[10, 1] <- "Beggin"
songinfo2021[10, 2] <- "Maneskin"
songinfo2021[12, 1] <- "Dakiti"
songinfo2021[17, 2] <- "Silk Sonic"
songinfo2021[21, 2] <- "Tiesto"
songinfo2021[25, 2] <- "Riton and Nightcrawlers"
songinfo2021[25, 1] <- "Friday dopamine re edit"
songinfo2021[26, 1] <- "telepatia"
songinfo2021[33, 2] <- "Myke Towers and Juhn"
songinfo2021[34, 2] <- "Maneskin"
songinfo2021[46, 1] <- "Que Mas Pues"
songinfo2021[48, 1] <- "34 35"
songinfo2021[50, 1] <- "Pareja Del Ano"
songinfo2021[50, 2] <- "Sebastian yatra and Myke Towers"
2022
songinfo2022[5, 1] <- "Titi Me Pregunto"
songinfo2022[7, 1] <- "Enemy"
songinfo2022[8, 1] <- "quevedo bzrp music sessions vol 52"
songinfo2022[8, 2] <- "Bizarrap and quevedo"
songinfo2022[10, 1] <- "Running up that hill a deal with god"
songinfo2022[19, 2] <- "Elley Duhe"
songinfo2022[22, 2] <- "Rauw alejandro and chencho corleone"
songinfo2022[29, 2] <- "Lost frequencies and calum scott"
songinfo2022[36, 1] <- "I Aint Worried"
songinfo2022[39, 1] <- "Una Noche en Medellin"
songinfo2022[42, 2] <- "Bad Bunny & Rauw Alejandro"
songinfo2022[44, 2] <- "Tiesto"
songs_ana <- data.frame()
for(j in 1:5) {
playlist_id <- year_id[j, 1]
year <- year_id[j, 2]
tracks <- get_playlist_audio_features(
"spotify",
playlist_id) %>%
mutate(year = year)
songs_ana <- rbind(songs_ana, tracks)
}
songs_ana <- inner_join(songs_ana, top_songs, by = c("track.id", "year"))
songs_ana <- songs_ana %>%
select(danceability, energy, key, loudness, speechiness, acousticness, instrumentalness, liveness, valence, tempo, year, track.name.y, track.id)
songs_ana
## # A tibble: 250 × 13
## danceabi…¹ energy key loudn…² speec…³ acous…⁴ instr…⁵ liven…⁶ valence tempo
## <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.754 0.449 7 -9.21 0.109 0.0332 8.29e-5 0.552 0.357 77.2
## 2 0.585 0.52 5 -6.14 0.0712 0.124 7.01e-5 0.131 0.129 160.
## 3 0.351 0.296 4 -10.1 0.0333 0.934 0 0.095 0.12 115.
## 4 0.461 0.593 1 -4.95 0.0356 0.17 0 0.307 0.175 134.
## 5 0.791 0.862 9 -3.24 0.11 0.037 2.19e-5 0.0814 0.592 124.
## 6 0.511 0.566 6 -7.23 0.2 0.349 0 0.34 0.218 83.9
## 7 0.753 0.657 7 -3.06 0.0449 0.171 0 0.112 0.437 107.
## 8 0.626 0.88 9 -2.38 0.0504 0.205 0 0.128 0.534 95.1
## 9 0.816 0.726 5 -4.00 0.129 0.099 0 0.372 0.65 136.
## 10 0.566 0.366 7 -12.8 0.028 0.113 1.81e-1 0.155 0.237 130.
## # … with 240 more rows, 3 more variables: year <int>, track.name.y <chr>,
## # track.id <chr>, and abbreviated variable names ¹danceability, ²loudness,
## # ³speechiness, ⁴acousticness, ⁵instrumentalness, ⁶liveness