library(geniusr)                         # This package gets lyrics
library(tidyverse)
library(tidytext)
library(wordcloud2)
genius_token()
search_song("Another Brick in the Wall")

This is a search for “Another Brick in the Wall” by Pink Floyd.

get_song_meta(116425)

This gathers information about the song.

TheWall_tracks <- scrape_tracklist(14831)
argument is not an atomic vector; coercing
TheWall_tracks

This obtains and displays the album information and tracklist.

TheWall_lyrics <- map_df(TheWall_tracks$song_lyrics_url, scrape_lyrics_url)
TheWall_lyrics
NA

This obtains the lyrics for the album.

TheWall_words <- TheWall_lyrics %>%
  unnest_tokens(word, line) %>% 
  select(song_name, word)

TheWall_words

This breaks all the lyrics down to one word per line in order to make it easier to analyze.

TheWall_words %>% 
  anti_join(get_stopwords()) %>% 
  count(word, sort = T)
Joining, by = "word"

This table shows how common each word is by showing the word count, while removing stopwords.

TheWall_words %>% 
  anti_join(get_stopwords()) %>% 
  count(word, sort = T) %>%
  top_n(200) %>%
  wordcloud2(size = .5)
Joining, by = "word"
Selecting by n

NA

This is a word cloud that shows the common word with the word counts.

bing <- get_sentiments("bing")
bing

This is what is called a sentiment analysis. It analyizes each word and relates it to emotion. This obtains sentiments.

TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE)
Joining, by = "word"

This is the sentiment analysis of the song words “The Wall” by Pink Floyd.

TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(10) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Pink Floyd's the Wall album: Words that contribute the most to each sentiment",
       x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal()
Joining, by = "word"
Selecting by n

This is a graph showing the sentiment data.

TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>% 
  filter(sentiment == "positive") %>%
  select(word, n) %>% 
  wordcloud2()
Joining, by = "word"

TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>% 
  filter(sentiment == "negative") %>%
  select(word, n) %>% 
  wordcloud2()
Joining, by = "word"

NA

This is a word cloud showing the sentiment data.

nrc <- get_sentiments("nrc")
nrc

This is another sentiment analysis using the standards set out by the National Research Council. This obtains the sentiments.

nrc %>%
  distinct(sentiment)

This lists all the different sentiments.

TheWall_words %>% 
  inner_join(nrc) %>% 
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(3) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Pink Floyd's The Wall: Words that contribute the most to each sentiment",
       x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal()
Joining, by = "word"
Selecting by n

These are mini graphs that shows the words that contribute to each sentiment.

TheWall_words %>% 
  inner_join(nrc) %>% 
  count(word, sentiment, sort = TRUE)
Joining, by = "word"

This lists the words, sentiments and the count of the amount of times the word was used.

TheWall_lyrics %>%
  unnest_tokens(bigram, line, token = "ngrams", n = 2) %>% 
  select(bigram) -> TheWall_bigrams

This creates a bigram which are word pairs.

TheWall_bigrams %>%
  count(bigram, sort = T)

This is a table showing the word pairs and the amount of times used.

TheWall_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = T)

This is a table that shows the bigrams with the amount of times used while removing the stopwords.

TheWall_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = T) %>%
  filter(n > 1) %>% 
  wordcloud2(size = .5)

This is a word cloud of the bigrams and the count of how often the were used.

first_word <- c("i", "you")                                  # these need to be lowercase

TheWall_bigrams %>% 
  count(bigram, sort = T) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%       # separate the two words
  filter(word1 %in% first_word) %>%                          # find first words from our list
  count(word1, word2, wt = n, sort = TRUE) %>% 
  rename(total = nn)
NA

Using the bigrams we can see words that follow a given word. This is a table that represents that.

first_word <- c("i", "you")                                  # these need to be lowercase

TheWall_bigrams %>% 
  count(bigram, sort = T) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%       # separate the two words
  filter(word1 %in% first_word) %>%                          # find first words from our list
  count(word1, word2, wt = n, sort = TRUE) %>% 
  rename(total = nn) %>%
  mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>%     # put the words in order
  group_by(word1) %>% 
  top_n(5) %>% 
  ggplot(aes(word2, total, fill = word1)) +                          #
  scale_fill_viridis_d() +                                           # set the color palette
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = NULL, title = "Word following:") +
  facet_wrap(~word1, scales = "free") +
  coord_flip() +
  theme_minimal()
Selecting by total

These are graphs that represent the bigram analysis in the previous table.

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkoZ2VuaXVzcikgICAgICAgICAgICAgICAgICAgICAgICAgIyBUaGlzIHBhY2thZ2UgZ2V0cyBseXJpY3MKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkodGlkeXRleHQpCmxpYnJhcnkod29yZGNsb3VkMikKYGBgCgpgYGB7cn0KZ2VuaXVzX3Rva2VuKCkKYGBgCgpgYGB7cn0Kc2VhcmNoX3NvbmcoIkFub3RoZXIgQnJpY2sgaW4gdGhlIFdhbGwiKQpgYGAKVGhpcyBpcyBhIHNlYXJjaCBmb3IgIkFub3RoZXIgQnJpY2sgaW4gdGhlIFdhbGwiIGJ5IFBpbmsgRmxveWQuCmBgYHtyfQpnZXRfc29uZ19tZXRhKDExNjQyNSkKYGBgClRoaXMgZ2F0aGVycyBpbmZvcm1hdGlvbiBhYm91dCB0aGUgc29uZy4KYGBge3J9ClRoZVdhbGxfdHJhY2tzIDwtIHNjcmFwZV90cmFja2xpc3QoMTQ4MzEpClRoZVdhbGxfdHJhY2tzCmBgYApUaGlzIG9idGFpbnMgYW5kIGRpc3BsYXlzIHRoZSBhbGJ1bSBpbmZvcm1hdGlvbiBhbmQgdHJhY2tsaXN0LgpgYGB7cn0KVGhlV2FsbF9seXJpY3MgPC0gbWFwX2RmKFRoZVdhbGxfdHJhY2tzJHNvbmdfbHlyaWNzX3VybCwgc2NyYXBlX2x5cmljc191cmwpClRoZVdhbGxfbHlyaWNzCgpgYGAKVGhpcyBvYnRhaW5zIHRoZSBseXJpY3MgZm9yIHRoZSBhbGJ1bS4KYGBge3J9ClRoZVdhbGxfd29yZHMgPC0gVGhlV2FsbF9seXJpY3MgJT4lCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCBsaW5lKSAlPiUgCiAgc2VsZWN0KHNvbmdfbmFtZSwgd29yZCkKClRoZVdhbGxfd29yZHMKYGBgClRoaXMgYnJlYWtzIGFsbCB0aGUgbHlyaWNzIGRvd24gdG8gb25lIHdvcmQgcGVyIGxpbmUgaW4gb3JkZXIgdG8gbWFrZSBpdCBlYXNpZXIgdG8gYW5hbHl6ZS4KYGBge3J9ClRoZVdhbGxfd29yZHMgJT4lIAogIGFudGlfam9pbihnZXRfc3RvcHdvcmRzKCkpICU+JSAKICBjb3VudCh3b3JkLCBzb3J0ID0gVCkKYGBgClRoaXMgdGFibGUgc2hvd3MgaG93IGNvbW1vbiBlYWNoIHdvcmQgaXMgYnkgc2hvd2luZyB0aGUgd29yZCBjb3VudCwgd2hpbGUgcmVtb3Zpbmcgc3RvcHdvcmRzLgpgYGB7cn0KVGhlV2FsbF93b3JkcyAlPiUgCiAgYW50aV9qb2luKGdldF9zdG9wd29yZHMoKSkgJT4lIAogIGNvdW50KHdvcmQsIHNvcnQgPSBUKSAlPiUKICB0b3BfbigyMDApICU+JQogIHdvcmRjbG91ZDIoc2l6ZSA9IC41KQogICAgICAgICAgCmBgYApUaGlzIGlzIGEgd29yZCBjbG91ZCB0aGF0IHNob3dzIHRoZSBjb21tb24gd29yZCB3aXRoIHRoZSB3b3JkIGNvdW50cy4KYGBge3J9CmJpbmcgPC0gZ2V0X3NlbnRpbWVudHMoImJpbmciKQpiaW5nCmBgYApUaGlzIGlzIHdoYXQgaXMgY2FsbGVkIGEgc2VudGltZW50IGFuYWx5c2lzLiBJdCBhbmFseWl6ZXMgZWFjaCB3b3JkIGFuZCByZWxhdGVzIGl0IHRvIGVtb3Rpb24uIFRoaXMgb2J0YWlucyBzZW50aW1lbnRzLgpgYGB7cn0KVGhlV2FsbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihiaW5nKSAlPiUgCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSkKCmBgYApUaGlzIGlzIHRoZSBzZW50aW1lbnQgYW5hbHlzaXMgb2YgdGhlIHNvbmcgd29yZHMgIlRoZSBXYWxsIiBieSBQaW5rIEZsb3lkLgpgYGB7cn0KVGhlV2FsbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihiaW5nKSAlPiUgCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSkgJT4lCiAgZ3JvdXBfYnkoc2VudGltZW50KSAlPiUKICB0b3BfbigxMCkgJT4lCiAgdW5ncm91cCgpICU+JQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBuKSkgJT4lCiAgZ2dwbG90KGFlcyh3b3JkLCBuLCBmaWxsID0gc2VudGltZW50KSkgKwogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBmYWNldF93cmFwKHZhcnMoc2VudGltZW50KSwgc2NhbGVzID0gImZyZWUiKSArCiAgbGFicyh5ID0gIlBpbmsgRmxveWQncyB0aGUgV2FsbCBhbGJ1bTogV29yZHMgdGhhdCBjb250cmlidXRlIHRoZSBtb3N0IHRvIGVhY2ggc2VudGltZW50IiwKICAgICAgIHggPSBOVUxMKSArCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKSArCiAgY29vcmRfZmxpcCgpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgClRoaXMgaXMgYSBncmFwaCBzaG93aW5nIHRoZSBzZW50aW1lbnQgZGF0YS4KYGBge3J9ClRoZVdhbGxfd29yZHMgJT4lIAogIGlubmVyX2pvaW4oYmluZykgJT4lIAogIGNvdW50KHdvcmQsIHNlbnRpbWVudCwgc29ydCA9IFRSVUUpICU+JSAKICBmaWx0ZXIoc2VudGltZW50ID09ICJwb3NpdGl2ZSIpICU+JQogIHNlbGVjdCh3b3JkLCBuKSAlPiUgCiAgd29yZGNsb3VkMigpCgoKVGhlV2FsbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihiaW5nKSAlPiUgCiAgY291bnQod29yZCwgc2VudGltZW50LCBzb3J0ID0gVFJVRSkgJT4lIAogIGZpbHRlcihzZW50aW1lbnQgPT0gIm5lZ2F0aXZlIikgJT4lCiAgc2VsZWN0KHdvcmQsIG4pICU+JSAKICB3b3JkY2xvdWQyKCkKICAKYGBgClRoaXMgaXMgYSB3b3JkIGNsb3VkIHNob3dpbmcgdGhlIHNlbnRpbWVudCBkYXRhLgpgYGB7cn0KbnJjIDwtIGdldF9zZW50aW1lbnRzKCJucmMiKQpucmMKYGBgClRoaXMgaXMgYW5vdGhlciBzZW50aW1lbnQgYW5hbHlzaXMgdXNpbmcgdGhlIHN0YW5kYXJkcyBzZXQgb3V0IGJ5IHRoZSBOYXRpb25hbCBSZXNlYXJjaCBDb3VuY2lsLiBUaGlzIG9idGFpbnMgdGhlIHNlbnRpbWVudHMuCmBgYHtyfQpucmMgJT4lCiAgZGlzdGluY3Qoc2VudGltZW50KQpgYGAKVGhpcyBsaXN0cyBhbGwgdGhlIGRpZmZlcmVudCBzZW50aW1lbnRzLgpgYGB7cn0KVGhlV2FsbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihucmMpICU+JSAKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKSAlPiUKICBncm91cF9ieShzZW50aW1lbnQpICU+JQogIHRvcF9uKDMpICU+JQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh2YXJzKHNlbnRpbWVudCksIHNjYWxlcyA9ICJmcmVlIikgKwogIGxhYnMoeSA9ICJQaW5rIEZsb3lkJ3MgVGhlIFdhbGw6IFdvcmRzIHRoYXQgY29udHJpYnV0ZSB0aGUgbW9zdCB0byBlYWNoIHNlbnRpbWVudCIsCiAgICAgICB4ID0gTlVMTCkgKwogIHNjYWxlX2ZpbGxfdmlyaWRpc19kKCkgKwogIGNvb3JkX2ZsaXAoKSArCiAgdGhlbWVfbWluaW1hbCgpCmBgYApUaGVzZSBhcmUgbWluaSBncmFwaHMgdGhhdCBzaG93cyB0aGUgd29yZHMgdGhhdCBjb250cmlidXRlIHRvIGVhY2ggc2VudGltZW50LgpgYGB7cn0KVGhlV2FsbF93b3JkcyAlPiUgCiAgaW5uZXJfam9pbihucmMpICU+JSAKICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKQoKYGBgClRoaXMgbGlzdHMgdGhlIHdvcmRzLCBzZW50aW1lbnRzIGFuZCB0aGUgIGNvdW50IG9mIHRoZSBhbW91bnQgb2YgdGltZXMgdGhlIHdvcmQgd2FzIHVzZWQuCgpgYGB7cn0KVGhlV2FsbF9seXJpY3MgJT4lCiAgdW5uZXN0X3Rva2VucyhiaWdyYW0sIGxpbmUsIHRva2VuID0gIm5ncmFtcyIsIG4gPSAyKSAlPiUgCiAgc2VsZWN0KGJpZ3JhbSkgLT4gVGhlV2FsbF9iaWdyYW1zCgpgYGAKVGhpcyBjcmVhdGVzIGEgYmlncmFtIHdoaWNoIGFyZSB3b3JkIHBhaXJzLgpgYGB7cn0KVGhlV2FsbF9iaWdyYW1zICU+JQogIGNvdW50KGJpZ3JhbSwgc29ydCA9IFQpCmBgYApUaGlzIGlzIGEgdGFibGUgc2hvd2luZyB0aGUgd29yZCBwYWlycyBhbmQgdGhlIGFtb3VudCBvZiB0aW1lcyB1c2VkLgpgYGB7cn0KVGhlV2FsbF9iaWdyYW1zICU+JSAKICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lIAogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JQogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAKICB1bml0ZShiaWdyYW0sIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKSAlPiUKICBjb3VudChiaWdyYW0sIHNvcnQgPSBUKQpgYGAKVGhpcyBpcyBhIHRhYmxlIHRoYXQgc2hvd3MgdGhlIGJpZ3JhbXMgd2l0aCB0aGUgYW1vdW50IG9mIHRpbWVzIHVzZWQgd2hpbGUgcmVtb3ZpbmcgdGhlIHN0b3B3b3Jkcy4KYGBge3J9ClRoZVdhbGxfYmlncmFtcyAlPiUgCiAgc2VwYXJhdGUoYmlncmFtLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAKICBmaWx0ZXIoIXdvcmQxICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUKICBmaWx0ZXIoIXdvcmQyICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgCiAgdW5pdGUoYmlncmFtLCB3b3JkMSwgd29yZDIsIHNlcCA9ICIgIikgJT4lCiAgY291bnQoYmlncmFtLCBzb3J0ID0gVCkgJT4lCiAgZmlsdGVyKG4gPiAxKSAlPiUgCiAgd29yZGNsb3VkMihzaXplID0gLjUpCmBgYApUaGlzIGlzIGEgd29yZCBjbG91ZCBvZiB0aGUgYmlncmFtcyBhbmQgdGhlIGNvdW50IG9mIGhvdyBvZnRlbiB0aGUgd2VyZSB1c2VkLgpgYGB7cn0KZmlyc3Rfd29yZCA8LSBjKCJpIiwgInlvdSIpICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgdGhlc2UgbmVlZCB0byBiZSBsb3dlcmNhc2UKClRoZVdhbGxfYmlncmFtcyAlPiUgCiAgY291bnQoYmlncmFtLCBzb3J0ID0gVCkgJT4lIAogIHNlcGFyYXRlKGJpZ3JhbSwgYygid29yZDEiLCAid29yZDIiKSwgc2VwID0gIiAiKSAlPiUgICAgICAgIyBzZXBhcmF0ZSB0aGUgdHdvIHdvcmRzCiAgZmlsdGVyKHdvcmQxICVpbiUgZmlyc3Rfd29yZCkgJT4lICAgICAgICAgICAgICAgICAgICAgICAgICAjIGZpbmQgZmlyc3Qgd29yZHMgZnJvbSBvdXIgbGlzdAogIGNvdW50KHdvcmQxLCB3b3JkMiwgd3QgPSBuLCBzb3J0ID0gVFJVRSkgJT4lIAogIHJlbmFtZSh0b3RhbCA9IG5uKQoKYGBgClVzaW5nIHRoZSBiaWdyYW1zIHdlIGNhbiBzZWUgd29yZHMgdGhhdCBmb2xsb3cgYSBnaXZlbiB3b3JkLiBUaGlzIGlzIGEgdGFibGUgdGhhdCByZXByZXNlbnRzIHRoYXQuCmBgYHtyfQpmaXJzdF93b3JkIDwtIGMoImkiLCAieW91IikgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyB0aGVzZSBuZWVkIHRvIGJlIGxvd2VyY2FzZQoKVGhlV2FsbF9iaWdyYW1zICU+JSAKICBjb3VudChiaWdyYW0sIHNvcnQgPSBUKSAlPiUgCiAgc2VwYXJhdGUoYmlncmFtLCBjKCJ3b3JkMSIsICJ3b3JkMiIpLCBzZXAgPSAiICIpICU+JSAgICAgICAjIHNlcGFyYXRlIHRoZSB0d28gd29yZHMKICBmaWx0ZXIod29yZDEgJWluJSBmaXJzdF93b3JkKSAlPiUgICAgICAgICAgICAgICAgICAgICAgICAgICMgZmluZCBmaXJzdCB3b3JkcyBmcm9tIG91ciBsaXN0CiAgY291bnQod29yZDEsIHdvcmQyLCB3dCA9IG4sIHNvcnQgPSBUUlVFKSAlPiUgCiAgcmVuYW1lKHRvdGFsID0gbm4pICU+JQogIG11dGF0ZSh3b3JkMiA9IGZhY3Rvcih3b3JkMiwgbGV2ZWxzID0gcmV2KHVuaXF1ZSh3b3JkMikpKSkgJT4lICAgICAjIHB1dCB0aGUgd29yZHMgaW4gb3JkZXIKICBncm91cF9ieSh3b3JkMSkgJT4lIAogIHRvcF9uKDUpICU+JSAKICBnZ3Bsb3QoYWVzKHdvcmQyLCB0b3RhbCwgZmlsbCA9IHdvcmQxKSkgKyAgICAgICAgICAgICAgICAgICAgICAgICAgIwogIHNjYWxlX2ZpbGxfdmlyaWRpc19kKCkgKyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHNldCB0aGUgY29sb3IgcGFsZXR0ZQogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsKICBsYWJzKHggPSBOVUxMLCB5ID0gTlVMTCwgdGl0bGUgPSAiV29yZCBmb2xsb3dpbmc6IikgKwogIGZhY2V0X3dyYXAofndvcmQxLCBzY2FsZXMgPSAiZnJlZSIpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lX21pbmltYWwoKQoKYGBgClRoZXNlIGFyZSBncmFwaHMgdGhhdCByZXByZXNlbnQgdGhlIGJpZ3JhbSBhbmFseXNpcyBpbiB0aGUgcHJldmlvdXMgdGFibGUuCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgo=