For this project, I chose a rap album I know with a lot of words. I chose “Sorry for Being Antisocial” by Roddy Ricch. (Sorry in advance for some of the word results, rap albums are a bit vulgar).
library(geniusr) # This package gets lyrics
library(tidyverse)
── Attaching packages ────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.1.0 ✔ purrr 0.2.5
✔ tibble 2.0.0 ✔ dplyr 0.7.8
✔ tidyr 0.8.2 ✔ stringr 1.3.1
✔ readr 1.3.1 ✔ forcats 0.3.0
── Conflicts ───────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(tidytext)
library(wordcloud2)
- First I went through the process to get the information from the album and get the lyrics, then I went on to unnest them.
genius_token()
search_song("the box")
NA
get_song_meta(5068155)
antisocial_tracks <- scrape_tracklist(512452)
argument is not an atomic vector; coercing
antisocial_tracks
antisocial_lyrics <- map_df(antisocial_tracks$song_lyrics_url, scrape_lyrics_url)
antisocial_lyrics
NA
antisocial_words <- antisocial_lyrics %>%
unnest_tokens(word, line) %>%
select(song_name, word)
antisocial_words
- Next, I cleaned the lyrics by removing stopwords, and then created a table and word cloud with the word counts.
antisocial_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T)
Joining, by = "word"
antisocial_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T) %>%
top_n(200) %>%
wordcloud2(size = .5)
Joining, by = "word"
Selecting by n
- Next I did a sentiment analyses using both bing and nrc, and created graphs of the words that contribute most to each sentiment.
bing <- get_sentiments("bing")
bing
antisocial_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "Roddy Ricch Sorry for Being Antisocial album: Words that contribute the most to each sentiment",
x = NULL) +
scale_fill_viridis_d() +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

antisocial_words %>%
inner_join(nrc) %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
top_n(5) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "Roddy Ricch Sorry for Being Antisocial album: Words that contribute the most to each sentiment",
x = NULL) +
scale_fill_viridis_d() +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

- Next I created bigrams of the lyrics, removed the stopwords, and created a table and word cloud of the most common bigrams.
antisocial_lyrics %>%
unnest_tokens(bigram, line, token = "ngrams", n = 2) %>%
select(bigram)
NA
antisocial_lyrics %>%
unnest_tokens(bigram, line, token = "ngrams", n = 2) %>%
select(bigram) -> antisocial_bigrams
antisocial_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ")
NA
antisocial_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort = T) %>%
filter(n > 1) %>%
wordcloud2(size = .5)
- Lastly, I used the bigram method to find the most common words that came after the words he/she.
first_word <- c("he", "she")
antisocial_bigrams %>%
count(bigram, sort = T) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(word1 %in% first_word) %>%
count(word1, word2, wt = n, sort = TRUE) %>%
rename(total = nn) %>%
mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>%
group_by(word1) %>%
top_n(5) %>%
ggplot(aes(word2, total, fill = word1)) +
scale_fill_viridis_d() +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = NULL, title = "Word following:") +
facet_wrap(~word1, scales = "free") +
coord_flip() +
theme_minimal()
Selecting by total

LS0tCnRpdGxlOiAiU29ycnkgZm9yIEJlaW5nIEFudGlzb2NpYWwiCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0Ci0tLQoKRm9yIHRoaXMgcHJvamVjdCwgSSBjaG9zZSBhIHJhcCBhbGJ1bSBJIGtub3cgd2l0aCBhIGxvdCBvZiB3b3Jkcy4gSSBjaG9zZSAiU29ycnkgZm9yIEJlaW5nIEFudGlzb2NpYWwiIGJ5IFJvZGR5IFJpY2NoLiAoU29ycnkgaW4gYWR2YW5jZSBmb3Igc29tZSBvZiB0aGUgd29yZCByZXN1bHRzLCByYXAgYWxidW1zIGFyZSBhIGJpdCB2dWxnYXIpLiAKCgoKCmBgYHtyfQpsaWJyYXJ5KGdlbml1c3IpICAgICAgICAgICAgICAgICAgICAgICAgICMgVGhpcyBwYWNrYWdlIGdldHMgbHlyaWNzCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KHdvcmRjbG91ZDIpCmBgYAoKCjEuIEZpcnN0IEkgd2VudCB0aHJvdWdoIHRoZSBwcm9jZXNzIHRvIGdldCB0aGUgaW5mb3JtYXRpb24gZnJvbSB0aGUgYWxidW0gIGFuZCBnZXQgdGhlIGx5cmljcywgdGhlbiBJIHdlbnQgb24gdG8gdW5uZXN0IHRoZW0uICAKYGBge3J9Cmdlbml1c190b2tlbigpCmBgYAoKYGBge3J9CnNlYXJjaF9zb25nKCJ0aGUgYm94IikKCmBgYApgYGB7cn0KZ2V0X3NvbmdfbWV0YSg1MDY4MTU1KQpgYGAKCmBgYHtyfQphbnRpc29jaWFsX3RyYWNrcyA8LSBzY3JhcGVfdHJhY2tsaXN0KDUxMjQ1MikKYW50aXNvY2lhbF90cmFja3MKYGBgCgpgYGB7cn0KYW50aXNvY2lhbF9seXJpY3MgPC0gbWFwX2RmKGFudGlzb2NpYWxfdHJhY2tzJHNvbmdfbHlyaWNzX3VybCwgc2NyYXBlX2x5cmljc191cmwpCmFudGlzb2NpYWxfbHlyaWNzCgpgYGAKCmBgYHtyfQphbnRpc29jaWFsX3dvcmRzIDwtIGFudGlzb2NpYWxfbHlyaWNzICU+JQogIHVubmVzdF90b2tlbnMod29yZCwgbGluZSkgJT4lIAogIHNlbGVjdChzb25nX25hbWUsIHdvcmQpCgphbnRpc29jaWFsX3dvcmRzCmBgYAoKMi4gTmV4dCwgSSBjbGVhbmVkIHRoZSBseXJpY3MgYnkgcmVtb3Zpbmcgc3RvcHdvcmRzLCBhbmQgdGhlbiBjcmVhdGVkIGEgdGFibGUgYW5kIHdvcmQgY2xvdWQgd2l0aCB0aGUgd29yZCBjb3VudHMuICAKCmBgYHtyfQphbnRpc29jaWFsX3dvcmRzICU+JSAKICBhbnRpX2pvaW4oZ2V0X3N0b3B3b3JkcygpKSAlPiUgCiAgY291bnQod29yZCwgc29ydCA9IFQpCmBgYAoKYGBge3J9CmFudGlzb2NpYWxfd29yZHMgJT4lIAogIGFudGlfam9pbihnZXRfc3RvcHdvcmRzKCkpICU+JSAKICBjb3VudCh3b3JkLCBzb3J0ID0gVCkgJT4lCiAgdG9wX24oMjAwKSAlPiUKICB3b3JkY2xvdWQyKHNpemUgPSAuNSkKYGBgCgoKMy4gTmV4dCBJIGRpZCBhIHNlbnRpbWVudCBhbmFseXNlcyB1c2luZyBib3RoIGJpbmcgYW5kIG5yYywgYW5kIGNyZWF0ZWQgZ3JhcGhzIG9mIHRoZSB3b3JkcyB0aGF0IGNvbnRyaWJ1dGUgbW9zdCB0byBlYWNoIHNlbnRpbWVudC4gIAoKYGBge3J9CmJpbmcgPC0gZ2V0X3NlbnRpbWVudHMoImJpbmciKQpiaW5nCmBgYAoKCmBgYHtyfQoKYW50aXNvY2lhbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihiaW5nKSAlPiUgCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSkgJT4lCiAgZ3JvdXBfYnkoc2VudGltZW50KSAlPiUKICB0b3BfbigxMCkgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBuKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkLCBuLCBmaWxsID0gc2VudGltZW50KSkgKwogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBmYWNldF93cmFwKHZhcnMoc2VudGltZW50KSwgc2NhbGVzID0gImZyZWUiKSArCiAgbGFicyh5ID0gIlJvZGR5IFJpY2NoIFNvcnJ5IGZvciBCZWluZyBBbnRpc29jaWFsIGFsYnVtOiBXb3JkcyB0aGF0IGNvbnRyaWJ1dGUgdGhlIG1vc3QgdG8gZWFjaCBzZW50aW1lbnQiLAogICAgICAgeCA9IE5VTEwpICsKICBzY2FsZV9maWxsX3ZpcmlkaXNfZCgpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCmBgYHtyfQoKYW50aXNvY2lhbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihucmMpICU+JSAKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKSAlPiUKICBncm91cF9ieShzZW50aW1lbnQpICU+JQogIHRvcF9uKDUpICU+JQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh2YXJzKHNlbnRpbWVudCksIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeSA9ICJSb2RkeSBSaWNjaCBTb3JyeSBmb3IgQmVpbmcgQW50aXNvY2lhbCBhbGJ1bTogV29yZHMgdGhhdCBjb250cmlidXRlIHRoZSBtb3N0IHRvIGVhY2ggc2VudGltZW50IiwKICAgICAgIHggPSBOVUxMKSArCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKSArCiAgY29vcmRfZmxpcCgpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgoKNC4gTmV4dCBJIGNyZWF0ZWQgYmlncmFtcyBvZiB0aGUgbHlyaWNzLCByZW1vdmVkIHRoZSBzdG9wd29yZHMsIGFuZCBjcmVhdGVkIGEgdGFibGUgYW5kIHdvcmQgY2xvdWQgb2YgdGhlIG1vc3QgY29tbW9uIGJpZ3JhbXMuCgpgYGB7cn0KYW50aXNvY2lhbF9seXJpY3MgJT4lCiAgdW5uZXN0X3Rva2VucyhiaWdyYW0sIGxpbmUsIHRva2VuID0gIm5ncmFtcyIsIG4gPSAyKSAlPiUgCiAgc2VsZWN0KGJpZ3JhbSkKCmBgYAoKYGBge3J9CmFudGlzb2NpYWxfbHlyaWNzICU+JQogIHVubmVzdF90b2tlbnMoYmlncmFtLCBsaW5lLCB0b2tlbiA9ICJuZ3JhbXMiLCBuID0gMikgJT4lIAogIHNlbGVjdChiaWdyYW0pIC0+IGFudGlzb2NpYWxfYmlncmFtcwoKYGBgCgpgYGB7cn0KYW50aXNvY2lhbF9iaWdyYW1zICU+JSAKICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lIAogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JQogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAKICB1bml0ZShiaWdyYW0sIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKQoKYGBgCmBgYHtyfQoKYW50aXNvY2lhbF9iaWdyYW1zICU+JSAKICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lIAogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JQogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAKICB1bml0ZShiaWdyYW0sIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKSAlPiUKICBjb3VudChiaWdyYW0sIHNvcnQgPSBUKSAlPiUKICBmaWx0ZXIobiA+IDEpICU+JSAKICB3b3JkY2xvdWQyKHNpemUgPSAuNSkKYGBgCgo1LiBMYXN0bHksIEkgdXNlZCB0aGUgYmlncmFtIG1ldGhvZCB0byBmaW5kIHRoZSBtb3N0IGNvbW1vbiB3b3JkcyB0aGF0IGNhbWUgYWZ0ZXIgdGhlIHdvcmRzIGhlL3NoZS4KCmBgYHtyfQoKCmZpcnN0X3dvcmQgPC0gYygiaGUiLCAic2hlIikgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCgphbnRpc29jaWFsX2JpZ3JhbXMgJT4lIAogIGNvdW50KGJpZ3JhbSwgc29ydCA9IFQpICU+JSAKICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lICAgICAgIAogIGZpbHRlcih3b3JkMSAlaW4lIGZpcnN0X3dvcmQpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgY291bnQod29yZDEsIHdvcmQyLCB3dCA9IG4sIHNvcnQgPSBUUlVFKSAlPiUgCiAgcmVuYW1lKHRvdGFsID0gbm4pICU+JQogIG11dGF0ZSh3b3JkMiA9IGZhY3Rvcih3b3JkMiwgbGV2ZWxzID0gcmV2KHVuaXF1ZSh3b3JkMikpKSkgJT4lICAgIAogIGdyb3VwX2J5KHdvcmQxKSAlPiUgCiAgdG9wX24oNSkgJT4lIAogIGdncGxvdChhZXMod29yZDIsIHRvdGFsLCBmaWxsID0gd29yZDEpKSArICAgICAgICAgICAgICAgICAgICAgICAgIAogIHNjYWxlX2ZpbGxfdmlyaWRpc19kKCkgKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBsYWJzKHggPSBOVUxMLCB5ID0gTlVMTCwgdGl0bGUgPSAiV29yZCBmb2xsb3dpbmc6IikgKwogIGZhY2V0X3dyYXAofndvcmQxLCBzY2FsZXMgPSAiZnJlZSIpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX21pbmltYWwoKQoKYGBgCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCg==