library(geniusr) # This package gets lyrics
library(tidyverse)
library(tidytext)
library(wordcloud2)
genius_token()
[1] "a4tjUC6-o2Q2woojDmmId0UtiQNH6kLYA-S6yNW2SHNIcyxLZWkxvMzXp5-3yJxY"
Assigment:
Pick an album of your choice - I recommend one with lots of lyrics. Rap albums are great for this.
- Find the album and get the lyrics, and unnest them.
- Clean the lyrics by removing stopwords, and then create a table and word cloud with the words counts.
- Do sentiment analyses using bing and nrc, and create graphs of the words that contribute most to each sentiment.
- Create bigrams of the lyrics, remove the stopwords, and create a table and word cloud of the most common bigrams.
- Use the bigram method to find the most common words that come after words of your choice, like i/you or he/she.
Step 1) Find the album and get the lyrics, and unnest them.
search_song("mansion")
get_song_meta(725820)
mansion_tracks <- scrape_tracklist(119880)
argument is not an atomic vector; coercing
mansion_tracks
mansion_lyrics <- map_df(mansion_tracks$song_lyrics_url, scrape_lyrics_url)
mansion_lyrics
NA
mansion_words <- mansion_lyrics %>%
unnest_tokens(word, line) %>%
select(song_name, word)
mansion_words
Step 2) Clean the lyrics by removing stopwords, and then create a table and word cloud with the words counts.
mansion_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T)
Joining, by = "word"
mansion_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T) %>%
top_n(200) %>%
wordcloud2(size = .5)
Joining, by = "word"
Selecting by n
Step 3. Do sentiment analyses using bing and nrc, and create graphs of the words that contribute most to each sentiment.
bing <- get_sentiments("bing")
mansion_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE)
Joining, by = "word"
mansion_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "NF mansion album: Words that contribute the most to each sentiment",
x = NULL) +
scale_fill_viridis_d() +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

nrc <- get_sentiments("nrc")
mansion_words %>%
inner_join(nrc) %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
top_n(3) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "NF's mansion album: Words that contribute the most to each sentiment",
x = NULL) +
scale_fill_viridis_d() +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

Step 4. 4. Create bigrams of the lyrics, remove the stopwords, and create a table and word cloud of the most common bigrams.
mansion_lyrics %>%
unnest_tokens(bigram, line, token = "ngrams", n = 2) %>%
select(bigram) -> mansion_bigrams
mansion_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort = T)
NA
mansion_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort = T) %>%
filter(n > 1) %>%
wordcloud2(size = 1)
Step 5. Use the bigram method to find the most common words that come after words of your choice, like i/you or he/she.
first_word <- c("i", "you") # these need to be lowercase
mansion_bigrams %>%
count(bigram, sort = T) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>% # separate the two words
filter(word1 %in% first_word) %>% # find first words from our list
count(word1, word2, wt = n, sort = TRUE) %>%
rename(total = nn) %>%
mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>% # put the words in order
group_by(word1) %>%
top_n(3) %>%
ggplot(aes(word2, total, fill = word1)) + #
scale_fill_viridis_d() + # set the color palette
geom_col(show.legend = FALSE) +
labs(x = NULL, y = NULL, title = "Word following:") +
facet_wrap(~word1, scales = "free") +
coord_flip() +
theme_minimal()
Selecting by total

LS0tCnRpdGxlOiAiaW5kZXBlbmRlbnQgbHlyaWMgYW5hbHlzaXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyfQpsaWJyYXJ5KGdlbml1c3IpICAgICAgICAgICAgICAgICAgICAgICAgICMgVGhpcyBwYWNrYWdlIGdldHMgbHlyaWNzCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KHdvcmRjbG91ZDIpCmBgYAoKYGBge3J9CgpnZW5pdXNfdG9rZW4oKQpgYGAKQXNzaWdtZW50OgoKUGljayBhbiBhbGJ1bSBvZiB5b3VyIGNob2ljZSAtIEkgcmVjb21tZW5kIG9uZSB3aXRoIGxvdHMgb2YgbHlyaWNzLiBSYXAgYWxidW1zIGFyZSBncmVhdCBmb3IgdGhpcy4KCjEuIEZpbmQgdGhlIGFsYnVtIGFuZCBnZXQgdGhlIGx5cmljcywgYW5kIHVubmVzdCB0aGVtLiAgCjIuIENsZWFuIHRoZSBseXJpY3MgYnkgcmVtb3Zpbmcgc3RvcHdvcmRzLCBhbmQgdGhlbiBjcmVhdGUgYSB0YWJsZSBhbmQgd29yZCBjbG91ZCB3aXRoIHRoZSB3b3JkcyBjb3VudHMuICAKMy4gRG8gc2VudGltZW50IGFuYWx5c2VzIHVzaW5nIGJpbmcgYW5kIG5yYywgYW5kIGNyZWF0ZSBncmFwaHMgb2YgdGhlIHdvcmRzIHRoYXQgY29udHJpYnV0ZSBtb3N0IHRvIGVhY2ggc2VudGltZW50LiAgCjQuIENyZWF0ZSBiaWdyYW1zIG9mIHRoZSBseXJpY3MsIHJlbW92ZSB0aGUgc3RvcHdvcmRzLCBhbmQgY3JlYXRlIGEgdGFibGUgYW5kIHdvcmQgY2xvdWQgb2YgdGhlIG1vc3QgY29tbW9uIGJpZ3JhbXMuICAKNS4gVXNlIHRoZSBiaWdyYW0gbWV0aG9kIHRvIGZpbmQgdGhlIG1vc3QgY29tbW9uIHdvcmRzIHRoYXQgY29tZSBhZnRlciB3b3JkcyBvZiB5b3VyIGNob2ljZSwgbGlrZSBpL3lvdSBvciBoZS9zaGUuCgpTdGVwIDEpIEZpbmQgdGhlIGFsYnVtIGFuZCBnZXQgdGhlIGx5cmljcywgYW5kIHVubmVzdCB0aGVtLgoKYGBge3J9CnNlYXJjaF9zb25nKCJtYW5zaW9uIikKYGBgCmBgYHtyfQpnZXRfc29uZ19tZXRhKDcyNTgyMCkKYGBgCgpgYGB7cn0KbWFuc2lvbl90cmFja3MgPC0gc2NyYXBlX3RyYWNrbGlzdCgxMTk4ODApCm1hbnNpb25fdHJhY2tzCmBgYAoKYGBge3J9Cm1hbnNpb25fbHlyaWNzIDwtIG1hcF9kZihtYW5zaW9uX3RyYWNrcyRzb25nX2x5cmljc191cmwsIHNjcmFwZV9seXJpY3NfdXJsKQptYW5zaW9uX2x5cmljcwoKYGBgCgpgYGB7cn0KbWFuc2lvbl93b3JkcyA8LSBtYW5zaW9uX2x5cmljcyAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIGxpbmUpICU+JSAKICBzZWxlY3Qoc29uZ19uYW1lLCB3b3JkKQoKbWFuc2lvbl93b3JkcwpgYGAKClN0ZXAgMikgQ2xlYW4gdGhlIGx5cmljcyBieSByZW1vdmluZyBzdG9wd29yZHMsIGFuZCB0aGVuIGNyZWF0ZSBhIHRhYmxlIGFuZCB3b3JkIGNsb3VkIHdpdGggdGhlIHdvcmRzIGNvdW50cy4KCmBgYHtyfQptYW5zaW9uX3dvcmRzICU+JSAKICBhbnRpX2pvaW4oZ2V0X3N0b3B3b3JkcygpKSAlPiUgCiAgY291bnQod29yZCwgc29ydCA9IFQpCmBgYAoKYGBge3J9Cm1hbnNpb25fd29yZHMgJT4lIAogIGFudGlfam9pbihnZXRfc3RvcHdvcmRzKCkpICU+JSAKICBjb3VudCh3b3JkLCBzb3J0ID0gVCkgJT4lIAogIHRvcF9uKDIwMCkgJT4lIAogIHdvcmRjbG91ZDIoc2l6ZSA9IC41KQpgYGAKClN0ZXAgMy4gRG8gc2VudGltZW50IGFuYWx5c2VzIHVzaW5nIGJpbmcgYW5kIG5yYywgYW5kIGNyZWF0ZSBncmFwaHMgb2YgdGhlIHdvcmRzIHRoYXQgY29udHJpYnV0ZSBtb3N0IHRvIGVhY2ggc2VudGltZW50LiAgCgpgYGB7cn0KYmluZyA8LSBnZXRfc2VudGltZW50cygiYmluZyIpCgptYW5zaW9uX3dvcmRzICU+JSAKICBpbm5lcl9qb2luKGJpbmcpICU+JSAKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKQoKYGBgCgpgYGB7cn0KbWFuc2lvbl93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihiaW5nKSAlPiUgCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSkgJT4lIAogIGdyb3VwX2J5KHNlbnRpbWVudCkgJT4lCiAgdG9wX24oMTApICU+JQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh2YXJzKHNlbnRpbWVudCksIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeSA9ICJORiBtYW5zaW9uIGFsYnVtOiBXb3JkcyB0aGF0IGNvbnRyaWJ1dGUgdGhlIG1vc3QgdG8gZWFjaCBzZW50aW1lbnQiLAogICAgICAgeCA9IE5VTEwpICsKICBzY2FsZV9maWxsX3ZpcmlkaXNfZCgpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCmBgYHtyfQpucmMgPC0gZ2V0X3NlbnRpbWVudHMoIm5yYyIpCm1hbnNpb25fd29yZHMgJT4lIAogIGlubmVyX2pvaW4obnJjKSAlPiUgCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSkgJT4lIAogIGdyb3VwX2J5KHNlbnRpbWVudCkgJT4lCiAgdG9wX24oMykgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBuKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkLCBuLCBmaWxsID0gc2VudGltZW50KSkgKwogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBmYWNldF93cmFwKHZhcnMoc2VudGltZW50KSwgc2NhbGVzID0gImZyZWUiKSArCiAgbGFicyh5ID0gIk5GJ3MgbWFuc2lvbiBhbGJ1bTogV29yZHMgdGhhdCBjb250cmlidXRlIHRoZSBtb3N0IHRvIGVhY2ggc2VudGltZW50IiwKICAgICAgIHggPSBOVUxMKSArCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKSArCiAgY29vcmRfZmxpcCgpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgpTdGVwIDQuIDQuIENyZWF0ZSBiaWdyYW1zIG9mIHRoZSBseXJpY3MsIHJlbW92ZSB0aGUgc3RvcHdvcmRzLCBhbmQgY3JlYXRlIGEgdGFibGUgYW5kIHdvcmQgY2xvdWQgb2YgdGhlIG1vc3QgY29tbW9uIGJpZ3JhbXMuCgpgYGB7cn0KbWFuc2lvbl9seXJpY3MgJT4lCiAgdW5uZXN0X3Rva2VucyhiaWdyYW0sIGxpbmUsIHRva2VuID0gIm5ncmFtcyIsIG4gPSAyKSAlPiUgCiAgc2VsZWN0KGJpZ3JhbSkgLT4gbWFuc2lvbl9iaWdyYW1zCm1hbnNpb25fYmlncmFtcyAlPiUgCiAgc2VwYXJhdGUoYmlncmFtLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAKICBmaWx0ZXIoIXdvcmQxICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUKICBmaWx0ZXIoIXdvcmQyICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgCiAgdW5pdGUoYmlncmFtLCB3b3JkMSwgd29yZDIsIHNlcCA9ICIgIikgJT4lIAogIGNvdW50KGJpZ3JhbSwgc29ydCA9IFQpCgpgYGAKYGBge3J9Cm1hbnNpb25fYmlncmFtcyAlPiUgCiAgc2VwYXJhdGUoYmlncmFtLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAKICBmaWx0ZXIoIXdvcmQxICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUKICBmaWx0ZXIoIXdvcmQyICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgCiAgdW5pdGUoYmlncmFtLCB3b3JkMSwgd29yZDIsIHNlcCA9ICIgIikgJT4lIAogIGNvdW50KGJpZ3JhbSwgc29ydCA9IFQpICU+JSAKICBmaWx0ZXIobiA+IDEpICU+JSAKICB3b3JkY2xvdWQyKHNpemUgPSAxKQpgYGAKClN0ZXAgNS4gVXNlIHRoZSBiaWdyYW0gbWV0aG9kIHRvIGZpbmQgdGhlIG1vc3QgY29tbW9uIHdvcmRzIHRoYXQgY29tZSBhZnRlciB3b3JkcyBvZiB5b3VyIGNob2ljZSwgbGlrZSBpL3lvdSBvciBoZS9zaGUuCgpgYGB7cn0KZmlyc3Rfd29yZCA8LSBjKCJpIiwgInlvdSIpICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgdGhlc2UgbmVlZCB0byBiZSBsb3dlcmNhc2UKCm1hbnNpb25fYmlncmFtcyAlPiUgCiAgY291bnQoYmlncmFtLCBzb3J0ID0gVCkgJT4lIAogIHNlcGFyYXRlKGJpZ3JhbSwgYygid29yZDEiLCAid29yZDIiKSwgc2VwID0gIiAiKSAlPiUgICAgICAgIyBzZXBhcmF0ZSB0aGUgdHdvIHdvcmRzCiAgZmlsdGVyKHdvcmQxICVpbiUgZmlyc3Rfd29yZCkgJT4lICAgICAgICAgICAgICAgICAgICAgICAgICAjIGZpbmQgZmlyc3Qgd29yZHMgZnJvbSBvdXIgbGlzdAogIGNvdW50KHdvcmQxLCB3b3JkMiwgd3QgPSBuLCBzb3J0ID0gVFJVRSkgJT4lIAogIHJlbmFtZSh0b3RhbCA9IG5uKSAlPiUgCgoKICBtdXRhdGUod29yZDIgPSBmYWN0b3Iod29yZDIsIGxldmVscyA9IHJldih1bmlxdWUod29yZDIpKSkpICU+JSAgICAgIyBwdXQgdGhlIHdvcmRzIGluIG9yZGVyCiAgZ3JvdXBfYnkod29yZDEpICU+JSAKICB0b3BfbigzKSAlPiUgCiAgZ2dwbG90KGFlcyh3b3JkMiwgdG90YWwsIGZpbGwgPSB3b3JkMSkpICsgICAgICAgICAgICAgICAgICAgICAgICAgICMKICBzY2FsZV9maWxsX3ZpcmlkaXNfZCgpICsgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyBzZXQgdGhlIGNvbG9yIHBhbGV0dGUKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgbGFicyh4ID0gTlVMTCwgeSA9IE5VTEwsIHRpdGxlID0gIldvcmQgZm9sbG93aW5nOiIpICsKICBmYWNldF93cmFwKH53b3JkMSwgc2NhbGVzID0gImZyZWUiKSArCiAgY29vcmRfZmxpcCgpICsKICB0aGVtZV9taW5pbWFsKCkKCmBgYAoKCgoK