library(geniusr) # This package gets lyrics
library(tidyverse)
library(tidytext)
library(wordcloud2)
genius_token()
search_song("Another Brick in the Wall")
This is a search for “Another Brick in the Wall” by Pink Floyd.
get_song_meta(116425)
This gathers information about the song.
TheWall_tracks <- scrape_tracklist(14831)
argument is not an atomic vector; coercing
TheWall_tracks
This obtains and displays the album information and tracklist.
TheWall_lyrics <- map_df(TheWall_tracks$song_lyrics_url, scrape_lyrics_url)
TheWall_lyrics
NA
This obtains the lyrics for the album.
TheWall_words <- TheWall_lyrics %>%
unnest_tokens(word, line) %>%
select(song_name, word)
TheWall_words
This breaks all the lyrics down to one word per line in order to make it easier to analyze.
TheWall_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T)
Joining, by = "word"
This table shows how common each word is by showing the word count, while removing stopwords.
TheWall_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T) %>%
top_n(200) %>%
wordcloud2(size = .5)
Joining, by = "word"
Selecting by n
NA
This is a word cloud that shows the common word with the word counts.
bing <- get_sentiments("bing")
bing
This is what is called a sentiment analysis. It analyizes each word and relates it to emotion. This obtains sentiments.
TheWall_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE)
Joining, by = "word"
This is the sentiment analysis of the song words “The Wall” by Pink Floyd.
TheWall_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "Pink Floyd's the Wall album: Words that contribute the most to each sentiment",
x = NULL) +
scale_fill_viridis_d() +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

This is a graph showing the sentiment data.
TheWall_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE) %>%
filter(sentiment == "positive") %>%
select(word, n) %>%
wordcloud2()
Joining, by = "word"
TheWall_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE) %>%
filter(sentiment == "negative") %>%
select(word, n) %>%
wordcloud2()
Joining, by = "word"
NA
This is a word cloud showing the sentiment data.
nrc <- get_sentiments("nrc")
nrc
This is another sentiment analysis using the standards set out by the National Research Council. This obtains the sentiments.
nrc %>%
distinct(sentiment)
This lists all the different sentiments.
TheWall_words %>%
inner_join(nrc) %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
top_n(3) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "Pink Floyd's The Wall: Words that contribute the most to each sentiment",
x = NULL) +
scale_fill_viridis_d() +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

These are mini graphs that shows the words that contribute to each sentiment.
TheWall_words %>%
inner_join(nrc) %>%
count(word, sentiment, sort = TRUE)
Joining, by = "word"
This lists the words, sentiments and the count of the amount of times the word was used.
TheWall_lyrics %>%
unnest_tokens(bigram, line, token = "ngrams", n = 2) %>%
select(bigram) -> TheWall_bigrams
This creates a bigram which are word pairs.
TheWall_bigrams %>%
count(bigram, sort = T)
This is a table showing the word pairs and the amount of times used.
TheWall_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort = T)
This is a table that shows the bigrams with the amount of times used while removing the stopwords.
TheWall_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort = T) %>%
filter(n > 1) %>%
wordcloud2(size = .5)
This is a word cloud of the bigrams and the count of how often the were used.
first_word <- c("i", "you") # these need to be lowercase
TheWall_bigrams %>%
count(bigram, sort = T) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>% # separate the two words
filter(word1 %in% first_word) %>% # find first words from our list
count(word1, word2, wt = n, sort = TRUE) %>%
rename(total = nn)
NA
Using the bigrams we can see words that follow a given word. This is a table that represents that.
first_word <- c("i", "you") # these need to be lowercase
TheWall_bigrams %>%
count(bigram, sort = T) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>% # separate the two words
filter(word1 %in% first_word) %>% # find first words from our list
count(word1, word2, wt = n, sort = TRUE) %>%
rename(total = nn) %>%
mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>% # put the words in order
group_by(word1) %>%
top_n(5) %>%
ggplot(aes(word2, total, fill = word1)) + #
scale_fill_viridis_d() + # set the color palette
geom_col(show.legend = FALSE) +
labs(x = NULL, y = NULL, title = "Word following:") +
facet_wrap(~word1, scales = "free") +
coord_flip() +
theme_minimal()
Selecting by total

These are graphs that represent the bigram analysis in the previous table.
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkoZ2VuaXVzcikgICAgICAgICAgICAgICAgICAgICAgICAgIyBUaGlzIHBhY2thZ2UgZ2V0cyBseXJpY3MKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkodGlkeXRleHQpCmxpYnJhcnkod29yZGNsb3VkMikKYGBgCgpgYGB7cn0KZ2VuaXVzX3Rva2VuKCkKYGBgCgpgYGB7cn0Kc2VhcmNoX3NvbmcoIkFub3RoZXIgQnJpY2sgaW4gdGhlIFdhbGwiKQpgYGAKVGhpcyBpcyBhIHNlYXJjaCBmb3IgIkFub3RoZXIgQnJpY2sgaW4gdGhlIFdhbGwiIGJ5IFBpbmsgRmxveWQuCmBgYHtyfQpnZXRfc29uZ19tZXRhKDExNjQyNSkKYGBgClRoaXMgZ2F0aGVycyBpbmZvcm1hdGlvbiBhYm91dCB0aGUgc29uZy4KYGBge3J9ClRoZVdhbGxfdHJhY2tzIDwtIHNjcmFwZV90cmFja2xpc3QoMTQ4MzEpClRoZVdhbGxfdHJhY2tzCmBgYApUaGlzIG9idGFpbnMgYW5kIGRpc3BsYXlzIHRoZSBhbGJ1bSBpbmZvcm1hdGlvbiBhbmQgdHJhY2tsaXN0LgpgYGB7cn0KVGhlV2FsbF9seXJpY3MgPC0gbWFwX2RmKFRoZVdhbGxfdHJhY2tzJHNvbmdfbHlyaWNzX3VybCwgc2NyYXBlX2x5cmljc191cmwpClRoZVdhbGxfbHlyaWNzCgpgYGAKVGhpcyBvYnRhaW5zIHRoZSBseXJpY3MgZm9yIHRoZSBhbGJ1bS4KYGBge3J9ClRoZVdhbGxfd29yZHMgPC0gVGhlV2FsbF9seXJpY3MgJT4lCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCBsaW5lKSAlPiUgCiAgc2VsZWN0KHNvbmdfbmFtZSwgd29yZCkKClRoZVdhbGxfd29yZHMKYGBgClRoaXMgYnJlYWtzIGFsbCB0aGUgbHlyaWNzIGRvd24gdG8gb25lIHdvcmQgcGVyIGxpbmUgaW4gb3JkZXIgdG8gbWFrZSBpdCBlYXNpZXIgdG8gYW5hbHl6ZS4KYGBge3J9ClRoZVdhbGxfd29yZHMgJT4lIAogIGFudGlfam9pbihnZXRfc3RvcHdvcmRzKCkpICU+JSAKICBjb3VudCh3b3JkLCBzb3J0ID0gVCkKYGBgClRoaXMgdGFibGUgc2hvd3MgaG93IGNvbW1vbiBlYWNoIHdvcmQgaXMgYnkgc2hvd2luZyB0aGUgd29yZCBjb3VudCwgd2hpbGUgcmVtb3Zpbmcgc3RvcHdvcmRzLgpgYGB7cn0KVGhlV2FsbF93b3JkcyAlPiUgCiAgYW50aV9qb2luKGdldF9zdG9wd29yZHMoKSkgJT4lIAogIGNvdW50KHdvcmQsIHNvcnQgPSBUKSAlPiUKICB0b3BfbigyMDApICU+JQogIHdvcmRjbG91ZDIoc2l6ZSA9IC41KQogICAgICAgICAgCmBgYApUaGlzIGlzIGEgd29yZCBjbG91ZCB0aGF0IHNob3dzIHRoZSBjb21tb24gd29yZCB3aXRoIHRoZSB3b3JkIGNvdW50cy4KYGBge3J9CmJpbmcgPC0gZ2V0X3NlbnRpbWVudHMoImJpbmciKQpiaW5nCmBgYApUaGlzIGlzIHdoYXQgaXMgY2FsbGVkIGEgc2VudGltZW50IGFuYWx5c2lzLiBJdCBhbmFseWl6ZXMgZWFjaCB3b3JkIGFuZCByZWxhdGVzIGl0IHRvIGVtb3Rpb24uIFRoaXMgb2J0YWlucyBzZW50aW1lbnRzLgpgYGB7cn0KVGhlV2FsbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihiaW5nKSAlPiUgCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSkKCmBgYApUaGlzIGlzIHRoZSBzZW50aW1lbnQgYW5hbHlzaXMgb2YgdGhlIHNvbmcgd29yZHMgIlRoZSBXYWxsIiBieSBQaW5rIEZsb3lkLgpgYGB7cn0KVGhlV2FsbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihiaW5nKSAlPiUgCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSkgJT4lCiAgZ3JvdXBfYnkoc2VudGltZW50KSAlPiUKICB0b3BfbigxMCkgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBuKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkLCBuLCBmaWxsID0gc2VudGltZW50KSkgKwogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBmYWNldF93cmFwKHZhcnMoc2VudGltZW50KSwgc2NhbGVzID0gImZyZWUiKSArCiAgbGFicyh5ID0gIlBpbmsgRmxveWQncyB0aGUgV2FsbCBhbGJ1bTogV29yZHMgdGhhdCBjb250cmlidXRlIHRoZSBtb3N0IHRvIGVhY2ggc2VudGltZW50IiwKICAgICAgIHggPSBOVUxMKSArCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKSArCiAgY29vcmRfZmxpcCgpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgClRoaXMgaXMgYSBncmFwaCBzaG93aW5nIHRoZSBzZW50aW1lbnQgZGF0YS4KYGBge3J9ClRoZVdhbGxfd29yZHMgJT4lIAogIGlubmVyX2pvaW4oYmluZykgJT4lIAogIGNvdW50KHdvcmQsIHNlbnRpbWVudCwgc29ydCA9IFRSVUUpICU+JSAKICBmaWx0ZXIoc2VudGltZW50ID09ICJwb3NpdGl2ZSIpICU+JQogIHNlbGVjdCh3b3JkLCBuKSAlPiUgCiAgd29yZGNsb3VkMigpCgoKVGhlV2FsbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihiaW5nKSAlPiUgCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSkgJT4lIAogIGZpbHRlcihzZW50aW1lbnQgPT0gIm5lZ2F0aXZlIikgJT4lCiAgc2VsZWN0KHdvcmQsIG4pICU+JSAKICB3b3JkY2xvdWQyKCkKICAKYGBgClRoaXMgaXMgYSB3b3JkIGNsb3VkIHNob3dpbmcgdGhlIHNlbnRpbWVudCBkYXRhLgpgYGB7cn0KbnJjIDwtIGdldF9zZW50aW1lbnRzKCJucmMiKQpucmMKYGBgClRoaXMgaXMgYW5vdGhlciBzZW50aW1lbnQgYW5hbHlzaXMgdXNpbmcgdGhlIHN0YW5kYXJkcyBzZXQgb3V0IGJ5IHRoZSBOYXRpb25hbCBSZXNlYXJjaCBDb3VuY2lsLiBUaGlzIG9idGFpbnMgdGhlIHNlbnRpbWVudHMuCmBgYHtyfQpucmMgJT4lCiAgZGlzdGluY3Qoc2VudGltZW50KQpgYGAKVGhpcyBsaXN0cyBhbGwgdGhlIGRpZmZlcmVudCBzZW50aW1lbnRzLgpgYGB7cn0KVGhlV2FsbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihucmMpICU+JSAKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKSAlPiUKICBncm91cF9ieShzZW50aW1lbnQpICU+JQogIHRvcF9uKDMpICU+JQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh2YXJzKHNlbnRpbWVudCksIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeSA9ICJQaW5rIEZsb3lkJ3MgVGhlIFdhbGw6IFdvcmRzIHRoYXQgY29udHJpYnV0ZSB0aGUgbW9zdCB0byBlYWNoIHNlbnRpbWVudCIsCiAgICAgICB4ID0gTlVMTCkgKwogIHNjYWxlX2ZpbGxfdmlyaWRpc19kKCkgKwogIGNvb3JkX2ZsaXAoKSArCiAgdGhlbWVfbWluaW1hbCgpCmBgYApUaGVzZSBhcmUgbWluaSBncmFwaHMgdGhhdCBzaG93cyB0aGUgd29yZHMgdGhhdCBjb250cmlidXRlIHRvIGVhY2ggc2VudGltZW50LgpgYGB7cn0KVGhlV2FsbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihucmMpICU+JSAKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKQoKYGBgClRoaXMgbGlzdHMgdGhlIHdvcmRzLCBzZW50aW1lbnRzIGFuZCB0aGUgIGNvdW50IG9mIHRoZSBhbW91bnQgb2YgdGltZXMgdGhlIHdvcmQgd2FzIHVzZWQuCgpgYGB7cn0KVGhlV2FsbF9seXJpY3MgJT4lCiAgdW5uZXN0X3Rva2VucyhiaWdyYW0sIGxpbmUsIHRva2VuID0gIm5ncmFtcyIsIG4gPSAyKSAlPiUgCiAgc2VsZWN0KGJpZ3JhbSkgLT4gVGhlV2FsbF9iaWdyYW1zCgpgYGAKVGhpcyBjcmVhdGVzIGEgYmlncmFtIHdoaWNoIGFyZSB3b3JkIHBhaXJzLgpgYGB7cn0KVGhlV2FsbF9iaWdyYW1zICU+JQogIGNvdW50KGJpZ3JhbSwgc29ydCA9IFQpCmBgYApUaGlzIGlzIGEgdGFibGUgc2hvd2luZyB0aGUgd29yZCBwYWlycyBhbmQgdGhlIGFtb3VudCBvZiB0aW1lcyB1c2VkLgpgYGB7cn0KVGhlV2FsbF9iaWdyYW1zICU+JSAKICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lIAogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JQogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAKICB1bml0ZShiaWdyYW0sIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKSAlPiUKICBjb3VudChiaWdyYW0sIHNvcnQgPSBUKQpgYGAKVGhpcyBpcyBhIHRhYmxlIHRoYXQgc2hvd3MgdGhlIGJpZ3JhbXMgd2l0aCB0aGUgYW1vdW50IG9mIHRpbWVzIHVzZWQgd2hpbGUgcmVtb3ZpbmcgdGhlIHN0b3B3b3Jkcy4KYGBge3J9ClRoZVdhbGxfYmlncmFtcyAlPiUgCiAgc2VwYXJhdGUoYmlncmFtLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAKICBmaWx0ZXIoIXdvcmQxICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUKICBmaWx0ZXIoIXdvcmQyICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgCiAgdW5pdGUoYmlncmFtLCB3b3JkMSwgd29yZDIsIHNlcCA9ICIgIikgJT4lCiAgY291bnQoYmlncmFtLCBzb3J0ID0gVCkgJT4lCiAgZmlsdGVyKG4gPiAxKSAlPiUgCiAgd29yZGNsb3VkMihzaXplID0gLjUpCmBgYApUaGlzIGlzIGEgd29yZCBjbG91ZCBvZiB0aGUgYmlncmFtcyBhbmQgdGhlIGNvdW50IG9mIGhvdyBvZnRlbiB0aGUgd2VyZSB1c2VkLgpgYGB7cn0KZmlyc3Rfd29yZCA8LSBjKCJpIiwgInlvdSIpICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgdGhlc2UgbmVlZCB0byBiZSBsb3dlcmNhc2UKClRoZVdhbGxfYmlncmFtcyAlPiUgCiAgY291bnQoYmlncmFtLCBzb3J0ID0gVCkgJT4lIAogIHNlcGFyYXRlKGJpZ3JhbSwgYygid29yZDEiLCAid29yZDIiKSwgc2VwID0gIiAiKSAlPiUgICAgICAgIyBzZXBhcmF0ZSB0aGUgdHdvIHdvcmRzCiAgZmlsdGVyKHdvcmQxICVpbiUgZmlyc3Rfd29yZCkgJT4lICAgICAgICAgICAgICAgICAgICAgICAgICAjIGZpbmQgZmlyc3Qgd29yZHMgZnJvbSBvdXIgbGlzdAogIGNvdW50KHdvcmQxLCB3b3JkMiwgd3QgPSBuLCBzb3J0ID0gVFJVRSkgJT4lIAogIHJlbmFtZSh0b3RhbCA9IG5uKQoKYGBgClVzaW5nIHRoZSBiaWdyYW1zIHdlIGNhbiBzZWUgd29yZHMgdGhhdCBmb2xsb3cgYSBnaXZlbiB3b3JkLiBUaGlzIGlzIGEgdGFibGUgdGhhdCByZXByZXNlbnRzIHRoYXQuCmBgYHtyfQpmaXJzdF93b3JkIDwtIGMoImkiLCAieW91IikgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyB0aGVzZSBuZWVkIHRvIGJlIGxvd2VyY2FzZQoKVGhlV2FsbF9iaWdyYW1zICU+JSAKICBjb3VudChiaWdyYW0sIHNvcnQgPSBUKSAlPiUgCiAgc2VwYXJhdGUoYmlncmFtLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAgICAgICAjIHNlcGFyYXRlIHRoZSB0d28gd29yZHMKICBmaWx0ZXIod29yZDEgJWluJSBmaXJzdF93b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICMgZmluZCBmaXJzdCB3b3JkcyBmcm9tIG91ciBsaXN0CiAgY291bnQod29yZDEsIHdvcmQyLCB3dCA9IG4sIHNvcnQgPSBUUlVFKSAlPiUgCiAgcmVuYW1lKHRvdGFsID0gbm4pICU+JQogIG11dGF0ZSh3b3JkMiA9IGZhY3Rvcih3b3JkMiwgbGV2ZWxzID0gcmV2KHVuaXF1ZSh3b3JkMikpKSkgJT4lICAgICAjIHB1dCB0aGUgd29yZHMgaW4gb3JkZXIKICBncm91cF9ieSh3b3JkMSkgJT4lIAogIHRvcF9uKDUpICU+JSAKICBnZ3Bsb3QoYWVzKHdvcmQyLCB0b3RhbCwgZmlsbCA9IHdvcmQxKSkgKyAgICAgICAgICAgICAgICAgICAgICAgICAgIwogIHNjYWxlX2ZpbGxfdmlyaWRpc19kKCkgKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHNldCB0aGUgY29sb3IgcGFsZXR0ZQogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBsYWJzKHggPSBOVUxMLCB5ID0gTlVMTCwgdGl0bGUgPSAiV29yZCBmb2xsb3dpbmc6IikgKwogIGZhY2V0X3dyYXAofndvcmQxLCBzY2FsZXMgPSAiZnJlZSIpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX21pbmltYWwoKQoKYGBgClRoZXNlIGFyZSBncmFwaHMgdGhhdCByZXByZXNlbnQgdGhlIGJpZ3JhbSBhbmFseXNpcyBpbiB0aGUgcHJldmlvdXMgdGFibGUuCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgo=