1. The code below gets the lyrics from Hozier’s first album, “Hozier”.
hozier <- genius_album(artist = "hozier", album = "hozier")

This unnests the lyrics and separates each word by row.

hoz_words <- hozier %>%
  unnest_tokens(word, lyric) %>%
  select(track_title, word)
hoz_words
  1. This code removes the stop words, or unimportant words, from the lyrics.
hoz_words %>%
  anti_join(stop_words) %>%
  count(word, sort = T) 
Joining, by = "word"

This code creates a word cloud of the top 100 words in the album.

hoz_words %>%
  anti_join(stop_words) %>%
  count(word, sort = T) %>%
  top_n(100) %>%
  wordcloud2(size = .5)
Joining, by = "word"
Selecting by n
  1. This code runs a sentiment analysis on the words in the album and creates two graphs showing the top ten negative and positive words.
bing <- get_sentiments("bing")

hoz_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(10) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Hozier's Album, Hozier: Words that Contribute to the Most Sentiment", x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal ()
Joining, by = "word"
Selecting by n

  1. This code creates bigrams from the Hozier album, removes the unimprotant words, and creates a table of the most common bigrams.
hozier %>%
  unnest_tokens(bigram, lyric, token = "ngrams", n = 2) %>%
  select(bigram) -> hoz_bigrams

hoz_bigrams %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = T)

This code creates a word cloud of the most common bigrams.

hoz_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = T) %>%
  filter(n > 1) %>% 
  wordcloud2(size = .5)

#This code works in RStudio but for some reason it doesn't show up in preview or RPubs. 
  1. This code uses bigrams to find the most common words following the words “I” and “you” and display them with graphs.
first_word <- c("i", "you")

hoz_bigrams %>%
  count(bigram, sort = T) %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(word1 %in% first_word) %>%
  count(word1, word2, wt = n, sort = TRUE) %>%
  mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>%
  group_by(word1) %>%
  top_n(5) %>%
  ggplot(aes(word2, n, fill = word1)) +
  scale_fill_viridis_d() +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = NULL, title = "Word Following 'I/You' in Hozier Songs") +
  facet_wrap(~word1, scales = "free") +
  coord_flip() +
  theme_minimal()
Selecting by n

LS0tDQp0aXRsZTogIkx5cmljIEFuYWx5c2lzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KMS4gVGhlIGNvZGUgYmVsb3cgZ2V0cyB0aGUgbHlyaWNzIGZyb20gSG96aWVyJ3MgZmlyc3QgYWxidW0sICJIb3ppZXIiLiANCg0KYGBge3J9DQpob3ppZXIgPC0gZ2VuaXVzX2FsYnVtKGFydGlzdCA9ICJob3ppZXIiLCBhbGJ1bSA9ICJob3ppZXIiKQ0KYGBgDQoNClRoaXMgdW5uZXN0cyB0aGUgbHlyaWNzIGFuZCBzZXBhcmF0ZXMgZWFjaCB3b3JkIGJ5IHJvdy4gDQoNCmBgYHtyfQ0KaG96X3dvcmRzIDwtIGhvemllciAlPiUNCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCBseXJpYykgJT4lDQogIHNlbGVjdCh0cmFja190aXRsZSwgd29yZCkNCmhvel93b3Jkcw0KYGBgDQoNCjIuIFRoaXMgY29kZSByZW1vdmVzIHRoZSBzdG9wIHdvcmRzLCBvciB1bmltcG9ydGFudCB3b3JkcywgZnJvbSB0aGUgbHlyaWNzLiANCg0KYGBge3J9DQpob3pfd29yZHMgJT4lDQogIGFudGlfam9pbihzdG9wX3dvcmRzKSAlPiUNCiAgY291bnQod29yZCwgc29ydCA9IFQpIA0KYGBgDQoNClRoaXMgY29kZSBjcmVhdGVzIGEgd29yZCBjbG91ZCBvZiB0aGUgdG9wIDEwMCB3b3JkcyBpbiB0aGUgYWxidW0uIA0KDQpgYGB7cn0NCmhvel93b3JkcyAlPiUNCiAgYW50aV9qb2luKHN0b3Bfd29yZHMpICU+JQ0KICBjb3VudCh3b3JkLCBzb3J0ID0gVCkgJT4lDQogIHRvcF9uKDEwMCkgJT4lDQogIHdvcmRjbG91ZDIoc2l6ZSA9IC41KQ0KYGBgDQoNCjMuIFRoaXMgY29kZSBydW5zIGEgc2VudGltZW50IGFuYWx5c2lzIG9uIHRoZSB3b3JkcyBpbiB0aGUgYWxidW0gYW5kIGNyZWF0ZXMgdHdvIGdyYXBocyBzaG93aW5nIHRoZSB0b3AgdGVuIG5lZ2F0aXZlIGFuZCBwb3NpdGl2ZSB3b3Jkcy4gDQoNCmBgYHtyfQ0KYmluZyA8LSBnZXRfc2VudGltZW50cygiYmluZyIpDQoNCmhvel93b3JkcyAlPiUgDQogIGlubmVyX2pvaW4oYmluZykgJT4lIA0KICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKSAlPiUNCiAgZ3JvdXBfYnkoc2VudGltZW50KSAlPiUNCiAgdG9wX24oMTApICU+JQ0KICB1bmdyb3VwKCkgJT4lDQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBuKSkgJT4lDQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsNCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKw0KICBmYWNldF93cmFwKHZhcnMoc2VudGltZW50KSwgc2NhbGVzID0gImZyZWUiKSArDQogIGxhYnMoeSA9ICJIb3ppZXIncyBBbGJ1bSwgSG96aWVyOiBXb3JkcyB0aGF0IENvbnRyaWJ1dGUgdG8gdGhlIE1vc3QgU2VudGltZW50IiwgeCA9IE5VTEwpICsNCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKSArDQogIGNvb3JkX2ZsaXAoKSArDQogIHRoZW1lX21pbmltYWwgKCkNCmBgYA0KDQo0LiBUaGlzIGNvZGUgY3JlYXRlcyBiaWdyYW1zIGZyb20gdGhlIEhvemllciBhbGJ1bSwgcmVtb3ZlcyB0aGUgdW5pbXByb3RhbnQgd29yZHMsIGFuZCBjcmVhdGVzIGEgdGFibGUgb2YgdGhlIG1vc3QgY29tbW9uIGJpZ3JhbXMuIA0KDQpgYGB7cn0NCmhvemllciAlPiUNCiAgdW5uZXN0X3Rva2VucyhiaWdyYW0sIGx5cmljLCB0b2tlbiA9ICJuZ3JhbXMiLCBuID0gMikgJT4lDQogIHNlbGVjdChiaWdyYW0pIC0+IGhvel9iaWdyYW1zDQoNCmhvel9iaWdyYW1zICU+JQ0KICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lDQogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JQ0KICBmaWx0ZXIoIXdvcmQyICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgDQogIHVuaXRlKGJpZ3JhbSwgd29yZDEsIHdvcmQyLCBzZXAgPSAiICIpICU+JQ0KICBjb3VudChiaWdyYW0sIHNvcnQgPSBUKQ0KYGBgDQoNClRoaXMgY29kZSBjcmVhdGVzIGEgd29yZCBjbG91ZCBvZiB0aGUgbW9zdCBjb21tb24gYmlncmFtcy4gDQoNCmBgYHtyfQ0KaG96X2JpZ3JhbXMgJT4lIA0KICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lIA0KICBmaWx0ZXIoIXdvcmQxICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUNCiAgZmlsdGVyKCF3b3JkMiAlaW4lIHN0b3Bfd29yZHMkd29yZCkgJT4lIA0KICB1bml0ZShiaWdyYW0sIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKSAlPiUNCiAgY291bnQoYmlncmFtLCBzb3J0ID0gVCkgJT4lDQogIGZpbHRlcihuID4gMSkgJT4lIA0KICB3b3JkY2xvdWQyKHNpemUgPSAuNSkNCg0KI1RoaXMgY29kZSB3b3JrcyBpbiBSU3R1ZGlvIGJ1dCBmb3Igc29tZSByZWFzb24gaXQgZG9lc24ndCBzaG93IHVwIGluIHByZXZpZXcgb3IgUlB1YnMuIA0KDQpgYGANCg0KDQoNCjUuIFRoaXMgY29kZSB1c2VzIGJpZ3JhbXMgdG8gZmluZCB0aGUgbW9zdCBjb21tb24gd29yZHMgZm9sbG93aW5nIHRoZSB3b3JkcyAiSSIgYW5kICJ5b3UiIGFuZCBkaXNwbGF5IHRoZW0gd2l0aCBncmFwaHMuIA0KDQpgYGB7cn0NCmZpcnN0X3dvcmQgPC0gYygiaSIsICJ5b3UiKQ0KDQpob3pfYmlncmFtcyAlPiUNCiAgY291bnQoYmlncmFtLCBzb3J0ID0gVCkgJT4lDQogIHNlcGFyYXRlKGJpZ3JhbSwgYygid29yZDEiLCAid29yZDIiKSwgc2VwID0gIiAiKSAlPiUNCiAgZmlsdGVyKHdvcmQxICVpbiUgZmlyc3Rfd29yZCkgJT4lDQogIGNvdW50KHdvcmQxLCB3b3JkMiwgd3QgPSBuLCBzb3J0ID0gVFJVRSkgJT4lDQogIG11dGF0ZSh3b3JkMiA9IGZhY3Rvcih3b3JkMiwgbGV2ZWxzID0gcmV2KHVuaXF1ZSh3b3JkMikpKSkgJT4lDQogIGdyb3VwX2J5KHdvcmQxKSAlPiUNCiAgdG9wX24oNSkgJT4lDQogIGdncGxvdChhZXMod29yZDIsIG4sIGZpbGwgPSB3b3JkMSkpICsNCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKSArDQogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsNCiAgbGFicyh4ID0gTlVMTCwgeSA9IE5VTEwsIHRpdGxlID0gIldvcmQgRm9sbG93aW5nICdJL1lvdScgaW4gSG96aWVyIFNvbmdzIikgKw0KICBmYWNldF93cmFwKH53b3JkMSwgc2NhbGVzID0gImZyZWUiKSArDQogIGNvb3JkX2ZsaXAoKSArDQogIHRoZW1lX21pbmltYWwoKQ0KYGBgDQoNCg==