library(genius)                         # This package gets lyrics
library(tidyverse)
library(tidytext)
library(wordcloud2)
  1. Find the album and get the lyrics, and unnest them.
fearless <- genius_album(artist = "taylor swift", album = "fearless")
Joining, by = c("album_name", "track_n", "track_url")
fearless_words <- fearless %>%
  unnest_tokens(word, lyric) %>% 
  select(track_title, word)

fearless_words

Here is the unnested words from the album “fearless” by taylor swift.

  1. Clean the lyrics by removing stopwords, and then create a table and word cloud with the words counts.
fearless_words %>% 
  anti_join(stop_words) %>% 
  count(word, sort = T)
Joining, by = "word"

table of non-stop words.

fearless_words %>% 
  anti_join(stop_words) %>% 
  count(word, sort = T)%>%
  top_n(100)%>%
  wordcloud2(size = .5)
Joining, by = "word"
Selecting by n

word cloud without stop words but with word count.

  1. Do the sentiment analysis as above, and create graphs of the words that contribute most to each sentiment.
bing <- get_sentiments("bing")
bing

This is the sentient analysis.

fearless_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>% 
  filter(sentiment == "positive") %>%
  select(word, n) %>% 
  wordcloud2()
Joining, by = "word"
fearless_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>% 
  filter(sentiment == "negative") %>%
  select(word, n) %>% 
  wordcloud2()
Joining, by = "word"
NA

These are the graphs showing the words that contribute most to each sentiment.

  1. Create bigrams of the lyrics, remove the stopwords, and create a table and word cloud of the most common bigrams.
fearless %>%
  unnest_tokens(bigram, lyric, token = "ngrams", n = 2) %>% 
  select(bigram) -> fearless_bigrams
fearless_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ")
NA

Here is a bigram of the lyrics with the stop words removed.

fearless_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ")%>%
  count(bigram, sort= T)

Here is a table showing the most common bigrams when the stop words are removed.

fearless_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ")%>%
  count(bigram, sort= T)%>%

  filter(n > 1) %>% 
  wordcloud2(size = .5)

This is a word cloud of the most common bigrams.

  1. Use the bigram method to find the most common words that come after words of your choice, like i/you or he/she.
first_word <- c("i", "you")                                  # these need to be lowercase

fearless_bigrams %>% 
  count(bigram, sort = T) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%       # separate the two words
  filter(word1 %in% first_word) %>%                          # find first words from our list
  count(word1, word2, wt = n, sort = TRUE)
NA

This is a table of the most common words that come after the words “i” and “you”.

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkoZ2VuaXVzKSAgICAgICAgICAgICAgICAgICAgICAgICAjIFRoaXMgcGFja2FnZSBnZXRzIGx5cmljcwpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeSh0aWR5dGV4dCkKbGlicmFyeSh3b3JkY2xvdWQyKQpgYGAKCgoxLiBGaW5kIHRoZSBhbGJ1bSBhbmQgZ2V0IHRoZSBseXJpY3MsIGFuZCB1bm5lc3QgdGhlbS4KYGBge3J9CmZlYXJsZXNzIDwtIGdlbml1c19hbGJ1bShhcnRpc3QgPSAidGF5bG9yIHN3aWZ0IiwgYWxidW0gPSAiZmVhcmxlc3MiKQpgYGAKCmBgYHtyfQpmZWFybGVzc193b3JkcyA8LSBmZWFybGVzcyAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIGx5cmljKSAlPiUgCiAgc2VsZWN0KHRyYWNrX3RpdGxlLCB3b3JkKQoKZmVhcmxlc3Nfd29yZHMKYGBgCkhlcmUgaXMgdGhlIHVubmVzdGVkIHdvcmRzIGZyb20gdGhlIGFsYnVtICJmZWFybGVzcyIgYnkgdGF5bG9yIHN3aWZ0LiAKCgoyLiBDbGVhbiB0aGUgbHlyaWNzIGJ5IHJlbW92aW5nIHN0b3B3b3JkcywgYW5kIHRoZW4gY3JlYXRlIGEgdGFibGUgYW5kIHdvcmQgY2xvdWQgd2l0aCB0aGUgd29yZHMgY291bnRzLiAgCgpgYGB7cn0KZmVhcmxlc3Nfd29yZHMgJT4lIAogIGFudGlfam9pbihzdG9wX3dvcmRzKSAlPiUgCiAgY291bnQod29yZCwgc29ydCA9IFQpCmBgYAp0YWJsZSBvZiBub24tc3RvcCB3b3Jkcy4gCgpgYGB7cn0KZmVhcmxlc3Nfd29yZHMgJT4lIAogIGFudGlfam9pbihzdG9wX3dvcmRzKSAlPiUgCiAgY291bnQod29yZCwgc29ydCA9IFQpJT4lCiAgdG9wX24oMTAwKSU+JQogIHdvcmRjbG91ZDIoc2l6ZSA9IC41KQpgYGAKd29yZCBjbG91ZCB3aXRob3V0IHN0b3Agd29yZHMgYnV0IHdpdGggd29yZCBjb3VudC4gCgoKMy4gRG8gdGhlIHNlbnRpbWVudCBhbmFseXNpcyBhcyBhYm92ZSwgYW5kIGNyZWF0ZSBncmFwaHMgb2YgdGhlIHdvcmRzIHRoYXQgY29udHJpYnV0ZSBtb3N0IHRvIGVhY2ggc2VudGltZW50LiAKCmBgYHtyfQpiaW5nIDwtIGdldF9zZW50aW1lbnRzKCJiaW5nIikKYmluZwpgYGAKVGhpcyBpcyB0aGUgc2VudGllbnQgYW5hbHlzaXMuCgpgYGB7cn0KZmVhcmxlc3Nfd29yZHMgJT4lIAogIGlubmVyX2pvaW4oYmluZykgJT4lIAogIGNvdW50KHdvcmQsIHNlbnRpbWVudCwgc29ydCA9IFRSVUUpICU+JSAKICBmaWx0ZXIoc2VudGltZW50ID09ICJwb3NpdGl2ZSIpICU+JQogIHNlbGVjdCh3b3JkLCBuKSAlPiUgCiAgd29yZGNsb3VkMigpCgoKZmVhcmxlc3Nfd29yZHMgJT4lIAogIGlubmVyX2pvaW4oYmluZykgJT4lIAogIGNvdW50KHdvcmQsIHNlbnRpbWVudCwgc29ydCA9IFRSVUUpICU+JSAKICBmaWx0ZXIoc2VudGltZW50ID09ICJuZWdhdGl2ZSIpICU+JQogIHNlbGVjdCh3b3JkLCBuKSAlPiUgCiAgd29yZGNsb3VkMigpCiAgCmBgYApUaGVzZSBhcmUgdGhlIGdyYXBocyBzaG93aW5nIHRoZSB3b3JkcyB0aGF0IGNvbnRyaWJ1dGUgbW9zdCB0byBlYWNoIHNlbnRpbWVudC4gCgoKNC4gQ3JlYXRlIGJpZ3JhbXMgb2YgdGhlIGx5cmljcywgcmVtb3ZlIHRoZSBzdG9wd29yZHMsIGFuZCBjcmVhdGUgYSB0YWJsZSBhbmQgd29yZCBjbG91ZCBvZiB0aGUgbW9zdCBjb21tb24gYmlncmFtcy4gIAoKYGBge3J9CmZlYXJsZXNzICU+JQogIHVubmVzdF90b2tlbnMoYmlncmFtLCBseXJpYywgdG9rZW4gPSAibmdyYW1zIiwgbiA9IDIpICU+JSAKICBzZWxlY3QoYmlncmFtKSAtPiBmZWFybGVzc19iaWdyYW1zCmBgYAoKCmBgYHtyfQpmZWFybGVzc19iaWdyYW1zICU+JSAKICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lIAogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JQogIGZpbHRlcighd29yZDIgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JSAKICB1bml0ZShiaWdyYW0sIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKQoKYGBgCkhlcmUgaXMgYSBiaWdyYW0gb2YgdGhlIGx5cmljcyB3aXRoIHRoZSBzdG9wIHdvcmRzIHJlbW92ZWQuIAoKYGBge3J9CmZlYXJsZXNzX2JpZ3JhbXMgJT4lIAogIHNlcGFyYXRlKGJpZ3JhbSwgYygid29yZDEiLCAid29yZDIiKSwgc2VwID0gIiAiKSAlPiUgCiAgZmlsdGVyKCF3b3JkMSAlaW4lIHN0b3Bfd29yZHMkd29yZCkgJT4lCiAgZmlsdGVyKCF3b3JkMiAlaW4lIHN0b3Bfd29yZHMkd29yZCkgJT4lIAogIHVuaXRlKGJpZ3JhbSwgd29yZDEsIHdvcmQyLCBzZXAgPSAiICIpJT4lCiAgY291bnQoYmlncmFtLCBzb3J0PSBUKQpgYGAKSGVyZSBpcyBhIHRhYmxlIHNob3dpbmcgdGhlIG1vc3QgY29tbW9uIGJpZ3JhbXMgd2hlbiB0aGUgc3RvcCB3b3JkcyBhcmUgcmVtb3ZlZC4gCgoKYGBge3J9CmZlYXJsZXNzX2JpZ3JhbXMgJT4lIAogIHNlcGFyYXRlKGJpZ3JhbSwgYygid29yZDEiLCAid29yZDIiKSwgc2VwID0gIiAiKSAlPiUgCiAgZmlsdGVyKCF3b3JkMSAlaW4lIHN0b3Bfd29yZHMkd29yZCkgJT4lCiAgZmlsdGVyKCF3b3JkMiAlaW4lIHN0b3Bfd29yZHMkd29yZCkgJT4lIAogIHVuaXRlKGJpZ3JhbSwgd29yZDEsIHdvcmQyLCBzZXAgPSAiICIpJT4lCiAgY291bnQoYmlncmFtLCBzb3J0PSBUKSU+JQoKICBmaWx0ZXIobiA+IDEpICU+JSAKICB3b3JkY2xvdWQyKHNpemUgPSAuNSkKYGBgClRoaXMgaXMgYSB3b3JkIGNsb3VkIG9mIHRoZSBtb3N0IGNvbW1vbiBiaWdyYW1zLiAKCgoKNS4gVXNlIHRoZSBiaWdyYW0gbWV0aG9kIHRvIGZpbmQgdGhlIG1vc3QgY29tbW9uIHdvcmRzIHRoYXQgY29tZSBhZnRlciB3b3JkcyBvZiB5b3VyIGNob2ljZSwgbGlrZSBpL3lvdSBvciBoZS9zaGUuCgpgYGB7cn0KZmlyc3Rfd29yZCA8LSBjKCJpIiwgInlvdSIpICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgdGhlc2UgbmVlZCB0byBiZSBsb3dlcmNhc2UKCmZlYXJsZXNzX2JpZ3JhbXMgJT4lIAogIGNvdW50KGJpZ3JhbSwgc29ydCA9IFQpICU+JSAKICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lICAgICAgICMgc2VwYXJhdGUgdGhlIHR3byB3b3JkcwogIGZpbHRlcih3b3JkMSAlaW4lIGZpcnN0X3dvcmQpICU+JSAgICAgICAgICAgICAgICAgICAgICAgICAgIyBmaW5kIGZpcnN0IHdvcmRzIGZyb20gb3VyIGxpc3QKICBjb3VudCh3b3JkMSwgd29yZDIsIHd0ID0gbiwgc29ydCA9IFRSVUUpCgpgYGAKClRoaXMgaXMgYSB0YWJsZSBvZiB0aGUgbW9zdCBjb21tb24gd29yZHMgdGhhdCBjb21lIGFmdGVyIHRoZSB3b3JkcyAiaSIgYW5kICJ5b3UiLiAKCgoKCgoKCgoKCgoKCgoKCgoKCgo=