Load libraries

library(spotifyr)
library(magrittr)
library(geniusr)
library(dplyr)
library(tidyverse)
library(tidytext)
library(textdata)
library(stringr)

Access token

id <- “”

secret <- “”

Sys.setenv(SPOTIFY_CLIENT_ID = id)

Sys.setenv(SPOTIFY_CLIENT_SECRET = secret)

access_token <- get_spotify_access_token()

Scraping top 100 songs of the year playlists

Uses playlist ID to identify playlists

year_id <- data.frame(id = c("37i9dQZF1DXe2bobNYDtW8", "37i9dQZF1DWVRSukIED0e9", "37i9dQZF1DX7Jl5KP2eZaS", "5GhQiRkGuqzpWZSE7OU4Se", "37i9dQZF1DX18jTM2l2fJY"), year = 2018:2022)

top_songs <- data.frame()
for(j in 1:5) {
playlist_id <- year_id[j, 1]
year <- year_id[j, 2]
tracks <- get_playlist_tracks(
    playlist_id = playlist_id,
    fields = c("track.artists", "track.duration_ms", "track.explicit", "track.id", "track.name", "track.popularity", "track.album.name", "track.album.release.date")
) %>%
  mutate(year = year)
top_songs <- rbind(top_songs, tracks)
}

Subset to take top 50 songs by year

top_songs <- top_songs %>% 
  group_by(year) %>% 
  mutate(rown = row_number()) %>%
  ungroup()
top_songs <- top_songs[which(top_songs$rown <= 50), ]
top_songs
## # A tibble: 250 × 9
##    track.artists track.dur…¹ track…² track…³ track…⁴ track…⁵ track…⁶  year  rown
##    <list>              <int> <lgl>   <chr>   <chr>     <int> <chr>   <int> <int>
##  1 <df [1 × 6]>       198973 TRUE    6DCZcS… God's …      84 Scorpi…  2018     1
##  2 <df [2 × 6]>       218146 TRUE    0e7ipj… rockst…      84 beerbo…  2018     2
##  3 <df [2 × 6]>       200185 FALSE   0u2P5u… lovely…      89 lovely…  2018     3
##  4 <df [1 × 6]>       228373 FALSE   09mEdo… Call O…      89 My Dea…  2018     4
##  5 <df [2 × 6]>       214846 FALSE   7ef4Dl… One Ki…      87 One Ki…  2018     5
##  6 <df [1 × 6]>       239835 TRUE    285pBl… Lucid …      85 Goodby…  2018     6
##  7 <df [3 × 6]>       184732 FALSE   09ISts… The Mi…      79 The Mi…  2018     7
##  8 <df [2 × 6]>       202620 TRUE    08bNPG… FRIENDS      79 FRIENDS  2018     8
##  9 <df [3 × 6]>       253390 TRUE    58q2HK… I Like…      78 Invasi…  2018     9
## 10 <df [1 × 6]>       184153 FALSE   6IPwKM… we fel…      85 we fel…  2018    10
## # … with 240 more rows, and abbreviated variable names ¹​track.duration_ms,
## #   ²​track.explicit, ³​track.id, ⁴​track.name, ⁵​track.popularity,
## #   ⁶​track.album.name

Unpack artist names

artists <- top_songs$track.artists
artists2 <- matrix(0, 0, nrow = 250, ncol = 1)
for(j in 1:250){ 
     df <- artists[[j]]
     name <- df$name
     artists2[j] <- name
}
top_songs <- cbind(top_songs, artists2)
songinfo <- select(top_songs, c(track.name, artists2, year)) %>%
  rename(track.artists = artists2)
songinfo[1:50, ]
##                                              track.name    track.artists year
## 1                                            God's Plan            Drake 2018
## 2                            rockstar (feat. 21 Savage)      Post Malone 2018
## 3                                  lovely (with Khalid)    Billie Eilish 2018
## 4                                      Call Out My Name       The Weeknd 2018
## 5                              One Kiss (with Dua Lipa)    Calvin Harris 2018
## 6                                          Lucid Dreams       Juice WRLD 2018
## 7                                            The Middle             Zedd 2018
## 8                                               FRIENDS       Marshmello 2018
## 9                                             I Like It          Cardi B 2018
## 10                           we fell in love in october      girl in red 2018
## 11                                 no tears left to cry    Ariana Grande 2018
## 12                             All The Stars (with SZA)   Kendrick Lamar 2018
## 13                                                 SAD!     XXXTENTACION 2018
## 14                   Ric Flair Drip (with Metro Boomin)           Offset 2018
## 15                                           Yes Indeed         Lil Baby 2018
## 16                                              Happier       Marshmello 2018
## 17                                           SICKO MODE     Travis Scott 2018
## 18                                  LOVE. FEAT. ZACARI.   Kendrick Lamar 2018
## 19                                            Moonlight     XXXTENTACION 2018
## 20                                                 2002       Anne-Marie 2018
## 21                                              Natural  Imagine Dragons 2018
## 22                                           Better Now      Post Malone 2018
## 23                                              Silence       Marshmello 2018
## 24                                     I Like Me Better             Lauv 2018
## 25                                         Let You Down               NF 2018
## 26                                               Wolves     Selena Gomez 2018
## 27                                           Be Alright       Dean Lewis 2018
## 28                                              changes     XXXTENTACION 2018
## 29                                                IDGAF         Dua Lipa 2018
## 30                                       God is a woman    Ariana Grande 2018
## 31                                             Delicate     Taylor Swift 2018
## 32                                              Nonstop            Drake 2018
## 33                                           In My Mind           Dynoro 2018
## 34                                Him & I (with Halsey)           G-Eazy 2018
## 35                                             Mo Bamba        Sheck Wes 2018
## 36                                             Stir Fry            Migos 2018
## 37                                      Te Boté - Remix       Nio Garcia 2018
## 38                         Psycho (feat. Ty Dolla $ign)      Post Malone 2018
## 39          1, 2, 3 (feat. Jason Derulo & De La Ghetto)      Sofía Reyes 2018
## 40                                              Shotgun      George Ezra 2018
## 41                                        Nice For What            Drake 2018
## 42 Let Me Go (with Alesso, Florida Georgia Line & watt) Hailee Steinfeld 2018
## 43                                      Walk It Talk It            Migos 2018
## 44       Taki Taki (with Selena Gomez, Ozuna & Cardi B)         DJ Snake 2018
## 45              Déjala Que Vuelva (feat. Manuel Turizo)          Piso 21 2018
## 46                                       In My Feelings            Drake 2018
## 47                                                 Mine            Bazzi 2018
## 48                                                 Body      Loud Luxury 2018
## 49               It's Not Living (If It's Not With You)         The 1975 2018
## 50                                          In My Blood     Shawn Mendes 2018

Edit song and artist names to match Genius website

Remove parentheses in song titles and separate into data frames by year

songinfo$track.name <- gsub("\\s*\\([^\\)]+\\)","",as.character(songinfo$track.name))
songinfo2018 <- songinfo[which(songinfo$year == 2018), ]
songinfo2019 <- songinfo[which(songinfo$year == 2019), ]
songinfo2020 <- songinfo[which(songinfo$year == 2020), ]
songinfo2021 <- songinfo[which(songinfo$year == 2021), ]
songinfo2022 <- songinfo[which(songinfo$year == 2022), ]

2018

songinfo2018[19, 1] <- "LOVE."
songinfo2018[15, 2] <- "Lil Baby and Drake"
songinfo2018[6, 2] <- "Juice wrld"
songinfo2018[13, 1] <- "happier"
songinfo2018[17, 2] <- "Offset and Metro Boomin"
songinfo2018[21, 2] <- "Anne marie"
songinfo2018[32, 2] <- "Dynoro and gigi dagostino"
songinfo2018[33, 2] <- "G Eazy and Halsey"
songinfo2018[33, 1] <- "Him and I"
songinfo2018[36, 1] <- "Te bote"
songinfo2018[39, 1] <- "1 2 3"
songinfo2018[39, 2] <- "Sofia Reyes"
songinfo2018[41, 2] <- "Hailee Steinfeld and Alesso"
songinfo2018[48, 1] <- "Dejala Que Vuelva"

2019

songinfo2019[3, 2] <- "Shawn Mendes and Camila Cabello"
songinfo2019[3, 1] <- "Senorita"
songinfo2019[5, 1] <- "Sunflower"
songinfo2019[44, 1] <- "10000 hours"
songinfo2019[44, 2] <- "Dan shay"

2020

songinfo2020[21, 2] <- "Jawsh 685 and Jason Derulo"
songinfo2020[21, 1] <- "Savage love laxed siren beat"
songinfo2020[26, 1] <- "Senorita"
songinfo2020[26, 2] <- "Shawn Mendes and Camila Cabello"
songinfo2020[38, 1] <- "Sunflower"
songinfo2020[39, 1] <- "Hawai"
songinfo2020[42, 1] <- "ritmo bad boys for life"
songinfo2020[42, 2] <- "The black eyed peas and j balvin"
songinfo2020[47, 2] <- "Ariana Grande and Justin Bieber"

2021

songinfo2021[2, 1] <- "Montero Call Me by Your Name"
songinfo2021[10, 1] <- "Beggin"
songinfo2021[10, 2] <- "Maneskin"
songinfo2021[12, 1] <- "Dakiti"
songinfo2021[17, 2] <- "Silk Sonic"
songinfo2021[21, 2] <- "Tiesto"
songinfo2021[25, 2] <- "Riton and Nightcrawlers"
songinfo2021[25, 1] <- "Friday dopamine re edit"
songinfo2021[26, 1] <- "telepatia"
songinfo2021[33, 2] <- "Myke Towers and Juhn"
songinfo2021[34, 2] <- "Maneskin"
songinfo2021[46, 1] <- "Que Mas Pues"
songinfo2021[48, 1] <- "34 35"
songinfo2021[50, 1] <- "Pareja Del Ano"
songinfo2021[50, 2] <- "Sebastian yatra and Myke Towers"

2022

songinfo2022[5, 1] <- "Titi Me Pregunto"
songinfo2022[7, 1] <- "Enemy"
songinfo2022[8, 1] <- "quevedo bzrp music sessions vol 52"
songinfo2022[8, 2] <- "Bizarrap and quevedo"
songinfo2022[10, 1] <- "Running up that hill a deal with god"
songinfo2022[19, 2] <- "Elley Duhe"
songinfo2022[22, 2] <- "Rauw alejandro and chencho corleone"
songinfo2022[29, 2] <- "Lost frequencies and calum scott"
songinfo2022[36, 1] <- "I Aint Worried"
songinfo2022[39, 1] <- "Una Noche en Medellin"
songinfo2022[42, 2] <- "Bad Bunny & Rauw Alejandro"
songinfo2022[44, 2] <- "Tiesto"

Scraping audio features information

songs_ana <- data.frame()
for(j in 1:5) {
playlist_id <- year_id[j, 1]
year <- year_id[j, 2]
tracks <- get_playlist_audio_features(
  "spotify",
  playlist_id) %>%
  mutate(year = year)
songs_ana <- rbind(songs_ana, tracks)
}
songs_ana <- inner_join(songs_ana, top_songs, by = c("track.id", "year"))
songs_ana <- songs_ana %>%
  select(danceability, energy, key, loudness, speechiness, acousticness, instrumentalness, liveness, valence, tempo, year, track.name.y, track.id)
songs_ana
## # A tibble: 250 × 13
##    danceabi…¹ energy   key loudn…² speec…³ acous…⁴ instr…⁵ liven…⁶ valence tempo
##         <dbl>  <dbl> <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>
##  1      0.754  0.449     7   -9.21  0.109   0.0332 8.29e-5  0.552    0.357  77.2
##  2      0.585  0.52      5   -6.14  0.0712  0.124  7.01e-5  0.131    0.129 160. 
##  3      0.351  0.296     4  -10.1   0.0333  0.934  0        0.095    0.12  115. 
##  4      0.461  0.593     1   -4.95  0.0356  0.17   0        0.307    0.175 134. 
##  5      0.791  0.862     9   -3.24  0.11    0.037  2.19e-5  0.0814   0.592 124. 
##  6      0.511  0.566     6   -7.23  0.2     0.349  0        0.34     0.218  83.9
##  7      0.753  0.657     7   -3.06  0.0449  0.171  0        0.112    0.437 107. 
##  8      0.626  0.88      9   -2.38  0.0504  0.205  0        0.128    0.534  95.1
##  9      0.816  0.726     5   -4.00  0.129   0.099  0        0.372    0.65  136. 
## 10      0.566  0.366     7  -12.8   0.028   0.113  1.81e-1  0.155    0.237 130. 
## # … with 240 more rows, 3 more variables: year <int>, track.name.y <chr>,
## #   track.id <chr>, and abbreviated variable names ¹​danceability, ²​loudness,
## #   ³​speechiness, ⁴​acousticness, ⁵​instrumentalness, ⁶​liveness