For this project, I chose a rap album I know with a lot of words. I chose “Sorry for Being Antisocial” by Roddy Ricch. (Sorry in advance for some of the word results, rap albums are a bit vulgar).

library(geniusr)                         # This package gets lyrics
library(tidyverse)
── Attaching packages ────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.1.0     ✔ purrr   0.2.5
✔ tibble  2.0.0     ✔ dplyr   0.7.8
✔ tidyr   0.8.2     ✔ stringr 1.3.1
✔ readr   1.3.1     ✔ forcats 0.3.0
── Conflicts ───────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(tidytext)
library(wordcloud2)
  1. First I went through the process to get the information from the album and get the lyrics, then I went on to unnest them.
genius_token()
search_song("the box")
NA
get_song_meta(5068155)
antisocial_tracks <- scrape_tracklist(512452)
argument is not an atomic vector; coercing
antisocial_tracks
antisocial_lyrics <- map_df(antisocial_tracks$song_lyrics_url, scrape_lyrics_url)
antisocial_lyrics
NA
antisocial_words <- antisocial_lyrics %>%
  unnest_tokens(word, line) %>% 
  select(song_name, word)

antisocial_words
  1. Next, I cleaned the lyrics by removing stopwords, and then created a table and word cloud with the word counts.
antisocial_words %>% 
  anti_join(get_stopwords()) %>% 
  count(word, sort = T)
Joining, by = "word"
antisocial_words %>% 
  anti_join(get_stopwords()) %>% 
  count(word, sort = T) %>%
  top_n(200) %>%
  wordcloud2(size = .5)
Joining, by = "word"
Selecting by n
  1. Next I did a sentiment analyses using both bing and nrc, and created graphs of the words that contribute most to each sentiment.
bing <- get_sentiments("bing")
bing

antisocial_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(10) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Roddy Ricch Sorry for Being Antisocial album: Words that contribute the most to each sentiment",
       x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal()
Joining, by = "word"
Selecting by n


antisocial_words %>% 
  inner_join(nrc) %>% 
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(5) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Roddy Ricch Sorry for Being Antisocial album: Words that contribute the most to each sentiment",
       x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal()
Joining, by = "word"
Selecting by n

  1. Next I created bigrams of the lyrics, removed the stopwords, and created a table and word cloud of the most common bigrams.
antisocial_lyrics %>%
  unnest_tokens(bigram, line, token = "ngrams", n = 2) %>% 
  select(bigram)
NA
antisocial_lyrics %>%
  unnest_tokens(bigram, line, token = "ngrams", n = 2) %>% 
  select(bigram) -> antisocial_bigrams
antisocial_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ")
NA

antisocial_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = T) %>%
  filter(n > 1) %>% 
  wordcloud2(size = .5)
  1. Lastly, I used the bigram method to find the most common words that came after the words he/she.


first_word <- c("he", "she")                                  

antisocial_bigrams %>% 
  count(bigram, sort = T) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%       
  filter(word1 %in% first_word) %>%                          
  count(word1, word2, wt = n, sort = TRUE) %>% 
  rename(total = nn) %>%
  mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>%    
  group_by(word1) %>% 
  top_n(5) %>% 
  ggplot(aes(word2, total, fill = word1)) +                         
  scale_fill_viridis_d() +                                          
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = NULL, title = "Word following:") +
  facet_wrap(~word1, scales = "free") +
  coord_flip() +
  theme_minimal()
Selecting by total

LS0tCnRpdGxlOiAiU29ycnkgZm9yIEJlaW5nIEFudGlzb2NpYWwiCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0Ci0tLQoKRm9yIHRoaXMgcHJvamVjdCwgSSBjaG9zZSBhIHJhcCBhbGJ1bSBJIGtub3cgd2l0aCBhIGxvdCBvZiB3b3Jkcy4gSSBjaG9zZSAiU29ycnkgZm9yIEJlaW5nIEFudGlzb2NpYWwiIGJ5IFJvZGR5IFJpY2NoLiAoU29ycnkgaW4gYWR2YW5jZSBmb3Igc29tZSBvZiB0aGUgd29yZCByZXN1bHRzLCByYXAgYWxidW1zIGFyZSBhIGJpdCB2dWxnYXIpLiAKCgoKCmBgYHtyfQpsaWJyYXJ5KGdlbml1c3IpICAgICAgICAgICAgICAgICAgICAgICAgICMgVGhpcyBwYWNrYWdlIGdldHMgbHlyaWNzCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KHdvcmRjbG91ZDIpCmBgYAoKCjEuIEZpcnN0IEkgd2VudCB0aHJvdWdoIHRoZSBwcm9jZXNzIHRvIGdldCB0aGUgaW5mb3JtYXRpb24gZnJvbSB0aGUgYWxidW0gIGFuZCBnZXQgdGhlIGx5cmljcywgdGhlbiBJIHdlbnQgb24gdG8gdW5uZXN0IHRoZW0uICAKYGBge3J9Cmdlbml1c190b2tlbigpCmBgYAoKYGBge3J9CnNlYXJjaF9zb25nKCJ0aGUgYm94IikKCmBgYApgYGB7cn0KZ2V0X3NvbmdfbWV0YSg1MDY4MTU1KQpgYGAKCmBgYHtyfQphbnRpc29jaWFsX3RyYWNrcyA8LSBzY3JhcGVfdHJhY2tsaXN0KDUxMjQ1MikKYW50aXNvY2lhbF90cmFja3MKYGBgCgpgYGB7cn0KYW50aXNvY2lhbF9seXJpY3MgPC0gbWFwX2RmKGFudGlzb2NpYWxfdHJhY2tzJHNvbmdfbHlyaWNzX3VybCwgc2NyYXBlX2x5cmljc191cmwpCmFudGlzb2NpYWxfbHlyaWNzCgpgYGAKCmBgYHtyfQphbnRpc29jaWFsX3dvcmRzIDwtIGFudGlzb2NpYWxfbHlyaWNzICU+JQogIHVubmVzdF90b2tlbnMod29yZCwgbGluZSkgJT4lIAogIHNlbGVjdChzb25nX25hbWUsIHdvcmQpCgphbnRpc29jaWFsX3dvcmRzCmBgYAoKMi4gTmV4dCwgSSBjbGVhbmVkIHRoZSBseXJpY3MgYnkgcmVtb3Zpbmcgc3RvcHdvcmRzLCBhbmQgdGhlbiBjcmVhdGVkIGEgdGFibGUgYW5kIHdvcmQgY2xvdWQgd2l0aCB0aGUgd29yZCBjb3VudHMuICAKCmBgYHtyfQphbnRpc29jaWFsX3dvcmRzICU+JSAKICBhbnRpX2pvaW4oZ2V0X3N0b3B3b3JkcygpKSAlPiUgCiAgY291bnQod29yZCwgc29ydCA9IFQpCmBgYAoKYGBge3J9CmFudGlzb2NpYWxfd29yZHMgJT4lIAogIGFudGlfam9pbihnZXRfc3RvcHdvcmRzKCkpICU+JSAKICBjb3VudCh3b3JkLCBzb3J0ID0gVCkgJT4lCiAgdG9wX24oMjAwKSAlPiUKICB3b3JkY2xvdWQyKHNpemUgPSAuNSkKYGBgCgoKMy4gTmV4dCBJIGRpZCBhIHNlbnRpbWVudCBhbmFseXNlcyB1c2luZyBib3RoIGJpbmcgYW5kIG5yYywgYW5kIGNyZWF0ZWQgZ3JhcGhzIG9mIHRoZSB3b3JkcyB0aGF0IGNvbnRyaWJ1dGUgbW9zdCB0byBlYWNoIHNlbnRpbWVudC4gIAoKYGBge3J9CmJpbmcgPC0gZ2V0X3NlbnRpbWVudHMoImJpbmciKQpiaW5nCmBgYAoKCmBgYHtyfQoKYW50aXNvY2lhbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihiaW5nKSAlPiUgCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSkgJT4lCiAgZ3JvdXBfYnkoc2VudGltZW50KSAlPiUKICB0b3BfbigxMCkgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBuKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkLCBuLCBmaWxsID0gc2VudGltZW50KSkgKwogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBmYWNldF93cmFwKHZhcnMoc2VudGltZW50KSwgc2NhbGVzID0gImZyZWUiKSArCiAgbGFicyh5ID0gIlJvZGR5IFJpY2NoIFNvcnJ5IGZvciBCZWluZyBBbnRpc29jaWFsIGFsYnVtOiBXb3JkcyB0aGF0IGNvbnRyaWJ1dGUgdGhlIG1vc3QgdG8gZWFjaCBzZW50aW1lbnQiLAogICAgICAgeCA9IE5VTEwpICsKICBzY2FsZV9maWxsX3ZpcmlkaXNfZCgpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCmBgYHtyfQoKYW50aXNvY2lhbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihucmMpICU+JSAKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKSAlPiUKICBncm91cF9ieShzZW50aW1lbnQpICU+JQogIHRvcF9uKDUpICU+JQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh2YXJzKHNlbnRpbWVudCksIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeSA9ICJSb2RkeSBSaWNjaCBTb3JyeSBmb3IgQmVpbmcgQW50aXNvY2lhbCBhbGJ1bTogV29yZHMgdGhhdCBjb250cmlidXRlIHRoZSBtb3N0IHRvIGVhY2ggc2VudGltZW50IiwKICAgICAgIHggPSBOVUxMKSArCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKSArCiAgY29vcmRfZmxpcCgpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgoKNC4gTmV4dCBJIGNyZWF0ZWQgYmlncmFtcyBvZiB0aGUgbHlyaWNzLCByZW1vdmVkIHRoZSBzdG9wd29yZHMsIGFuZCBjcmVhdGVkIGEgdGFibGUgYW5kIHdvcmQgY2xvdWQgb2YgdGhlIG1vc3QgY29tbW9uIGJpZ3JhbXMuCgpgYGB7cn0KYW50aXNvY2lhbF9seXJpY3MgJT4lCiAgdW5uZXN0X3Rva2VucyhiaWdyYW0sIGxpbmUsIHRva2VuID0gIm5ncmFtcyIsIG4gPSAyKSAlPiUgCiAgc2VsZWN0KGJpZ3JhbSkKCmBgYAoKYGBge3J9CmFudGlzb2NpYWxfbHlyaWNzICU+JQogIHVubmVzdF90b2tlbnMoYmlncmFtLCBsaW5lLCB0b2tlbiA9ICJuZ3JhbXMiLCBuID0gMikgJT4lIAogIHNlbGVjdChiaWdyYW0pIC0+IGFudGlzb2NpYWxfYmlncmFtcwoKYGBgCgpgYGB7cn0KYW50aXNvY2lhbF9iaWdyYW1zICU+JSAKICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lIAogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JQogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAKICB1bml0ZShiaWdyYW0sIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKQoKYGBgCmBgYHtyfQoKYW50aXNvY2lhbF9iaWdyYW1zICU+JSAKICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lIAogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JQogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAKICB1bml0ZShiaWdyYW0sIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKSAlPiUKICBjb3VudChiaWdyYW0sIHNvcnQgPSBUKSAlPiUKICBmaWx0ZXIobiA+IDEpICU+JSAKICB3b3JkY2xvdWQyKHNpemUgPSAuNSkKYGBgCgo1LiBMYXN0bHksIEkgdXNlZCB0aGUgYmlncmFtIG1ldGhvZCB0byBmaW5kIHRoZSBtb3N0IGNvbW1vbiB3b3JkcyB0aGF0IGNhbWUgYWZ0ZXIgdGhlIHdvcmRzIGhlL3NoZS4KCmBgYHtyfQoKCmZpcnN0X3dvcmQgPC0gYygiaGUiLCAic2hlIikgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCgphbnRpc29jaWFsX2JpZ3JhbXMgJT4lIAogIGNvdW50KGJpZ3JhbSwgc29ydCA9IFQpICU+JSAKICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lICAgICAgIAogIGZpbHRlcih3b3JkMSAlaW4lIGZpcnN0X3dvcmQpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgY291bnQod29yZDEsIHdvcmQyLCB3dCA9IG4sIHNvcnQgPSBUUlVFKSAlPiUgCiAgcmVuYW1lKHRvdGFsID0gbm4pICU+JQogIG11dGF0ZSh3b3JkMiA9IGZhY3Rvcih3b3JkMiwgbGV2ZWxzID0gcmV2KHVuaXF1ZSh3b3JkMikpKSkgJT4lICAgIAogIGdyb3VwX2J5KHdvcmQxKSAlPiUgCiAgdG9wX24oNSkgJT4lIAogIGdncGxvdChhZXMod29yZDIsIHRvdGFsLCBmaWxsID0gd29yZDEpKSArICAgICAgICAgICAgICAgICAgICAgICAgIAogIHNjYWxlX2ZpbGxfdmlyaWRpc19kKCkgKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBsYWJzKHggPSBOVUxMLCB5ID0gTlVMTCwgdGl0bGUgPSAiV29yZCBmb2xsb3dpbmc6IikgKwogIGZhY2V0X3dyYXAofndvcmQxLCBzY2FsZXMgPSAiZnJlZSIpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX21pbmltYWwoKQoKYGBgCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCg==