- The code below gets the lyrics from Hozier’s first album, “Hozier”.
hozier <- genius_album(artist = "hozier", album = "hozier")
This unnests the lyrics and separates each word by row.
hoz_words <- hozier %>%
unnest_tokens(word, lyric) %>%
select(track_title, word)
hoz_words
- This code removes the stop words, or unimportant words, from the lyrics.
hoz_words %>%
anti_join(stop_words) %>%
count(word, sort = T)
Joining, by = "word"
This code creates a word cloud of the top 100 words in the album.
hoz_words %>%
anti_join(stop_words) %>%
count(word, sort = T) %>%
top_n(100) %>%
wordcloud2(size = .5)
Joining, by = "word"
Selecting by n
- This code runs a sentiment analysis on the words in the album and creates two graphs showing the top ten negative and positive words.
bing <- get_sentiments("bing")
hoz_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "Hozier's Album, Hozier: Words that Contribute to the Most Sentiment", x = NULL) +
scale_fill_viridis_d() +
coord_flip() +
theme_minimal ()
Joining, by = "word"
Selecting by n

- This code creates bigrams from the Hozier album, removes the unimprotant words, and creates a table of the most common bigrams.
hozier %>%
unnest_tokens(bigram, lyric, token = "ngrams", n = 2) %>%
select(bigram) -> hoz_bigrams
hoz_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort = T)
This code creates a word cloud of the most common bigrams.
hoz_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort = T) %>%
filter(n > 1) %>%
wordcloud2(size = .5)
#This code works in RStudio but for some reason it doesn't show up in preview or RPubs.
- This code uses bigrams to find the most common words following the words “I” and “you” and display them with graphs.
first_word <- c("i", "you")
hoz_bigrams %>%
count(bigram, sort = T) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(word1 %in% first_word) %>%
count(word1, word2, wt = n, sort = TRUE) %>%
mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>%
group_by(word1) %>%
top_n(5) %>%
ggplot(aes(word2, n, fill = word1)) +
scale_fill_viridis_d() +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = NULL, title = "Word Following 'I/You' in Hozier Songs") +
facet_wrap(~word1, scales = "free") +
coord_flip() +
theme_minimal()
Selecting by n

LS0tDQp0aXRsZTogIkx5cmljIEFuYWx5c2lzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KMS4gVGhlIGNvZGUgYmVsb3cgZ2V0cyB0aGUgbHlyaWNzIGZyb20gSG96aWVyJ3MgZmlyc3QgYWxidW0sICJIb3ppZXIiLiANCg0KYGBge3J9DQpob3ppZXIgPC0gZ2VuaXVzX2FsYnVtKGFydGlzdCA9ICJob3ppZXIiLCBhbGJ1bSA9ICJob3ppZXIiKQ0KYGBgDQoNClRoaXMgdW5uZXN0cyB0aGUgbHlyaWNzIGFuZCBzZXBhcmF0ZXMgZWFjaCB3b3JkIGJ5IHJvdy4gDQoNCmBgYHtyfQ0KaG96X3dvcmRzIDwtIGhvemllciAlPiUNCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCBseXJpYykgJT4lDQogIHNlbGVjdCh0cmFja190aXRsZSwgd29yZCkNCmhvel93b3Jkcw0KYGBgDQoNCjIuIFRoaXMgY29kZSByZW1vdmVzIHRoZSBzdG9wIHdvcmRzLCBvciB1bmltcG9ydGFudCB3b3JkcywgZnJvbSB0aGUgbHlyaWNzLiANCg0KYGBge3J9DQpob3pfd29yZHMgJT4lDQogIGFudGlfam9pbihzdG9wX3dvcmRzKSAlPiUNCiAgY291bnQod29yZCwgc29ydCA9IFQpIA0KYGBgDQoNClRoaXMgY29kZSBjcmVhdGVzIGEgd29yZCBjbG91ZCBvZiB0aGUgdG9wIDEwMCB3b3JkcyBpbiB0aGUgYWxidW0uIA0KDQpgYGB7cn0NCmhvel93b3JkcyAlPiUNCiAgYW50aV9qb2luKHN0b3Bfd29yZHMpICU+JQ0KICBjb3VudCh3b3JkLCBzb3J0ID0gVCkgJT4lDQogIHRvcF9uKDEwMCkgJT4lDQogIHdvcmRjbG91ZDIoc2l6ZSA9IC41KQ0KYGBgDQoNCjMuIFRoaXMgY29kZSBydW5zIGEgc2VudGltZW50IGFuYWx5c2lzIG9uIHRoZSB3b3JkcyBpbiB0aGUgYWxidW0gYW5kIGNyZWF0ZXMgdHdvIGdyYXBocyBzaG93aW5nIHRoZSB0b3AgdGVuIG5lZ2F0aXZlIGFuZCBwb3NpdGl2ZSB3b3Jkcy4gDQoNCmBgYHtyfQ0KYmluZyA8LSBnZXRfc2VudGltZW50cygiYmluZyIpDQoNCmhvel93b3JkcyAlPiUgDQogIGlubmVyX2pvaW4oYmluZykgJT4lIA0KICBjb3VudCh3b3JkLCBzZW50aW1lbnQsIHNvcnQgPSBUUlVFKSAlPiUNCiAgZ3JvdXBfYnkoc2VudGltZW50KSAlPiUNCiAgdG9wX24oMTApICU+JQ0KICB1bmdyb3VwKCkgJT4lDQogIG11dGF0ZSh3b3JkID0gcmVvcmRlcih3b3JkLCBuKSkgJT4lDQogIGdncGxvdChhZXMod29yZCwgbiwgZmlsbCA9IHNlbnRpbWVudCkpICsNCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKw0KICBmYWNldF93cmFwKHZhcnMoc2VudGltZW50KSwgc2NhbGVzID0gImZyZWUiKSArDQogIGxhYnMoeSA9ICJIb3ppZXIncyBBbGJ1bSwgSG96aWVyOiBXb3JkcyB0aGF0IENvbnRyaWJ1dGUgdG8gdGhlIE1vc3QgU2VudGltZW50IiwgeCA9IE5VTEwpICsNCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKSArDQogIGNvb3JkX2ZsaXAoKSArDQogIHRoZW1lX21pbmltYWwgKCkNCmBgYA0KDQo0LiBUaGlzIGNvZGUgY3JlYXRlcyBiaWdyYW1zIGZyb20gdGhlIEhvemllciBhbGJ1bSwgcmVtb3ZlcyB0aGUgdW5pbXByb3RhbnQgd29yZHMsIGFuZCBjcmVhdGVzIGEgdGFibGUgb2YgdGhlIG1vc3QgY29tbW9uIGJpZ3JhbXMuIA0KDQpgYGB7cn0NCmhvemllciAlPiUNCiAgdW5uZXN0X3Rva2VucyhiaWdyYW0sIGx5cmljLCB0b2tlbiA9ICJuZ3JhbXMiLCBuID0gMikgJT4lDQogIHNlbGVjdChiaWdyYW0pIC0+IGhvel9iaWdyYW1zDQoNCmhvel9iaWdyYW1zICU+JQ0KICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lDQogIGZpbHRlcighd29yZDEgJWluJSBzdG9wX3dvcmRzJHdvcmQpICU+JQ0KICBmaWx0ZXIoIXdvcmQyICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUgDQogIHVuaXRlKGJpZ3JhbSwgd29yZDEsIHdvcmQyLCBzZXAgPSAiICIpICU+JQ0KICBjb3VudChiaWdyYW0sIHNvcnQgPSBUKQ0KYGBgDQoNClRoaXMgY29kZSBjcmVhdGVzIGEgd29yZCBjbG91ZCBvZiB0aGUgbW9zdCBjb21tb24gYmlncmFtcy4gDQoNCmBgYHtyfQ0KaG96X2JpZ3JhbXMgJT4lIA0KICBzZXBhcmF0ZShiaWdyYW0sIGMoIndvcmQxIiwgIndvcmQyIiksIHNlcCA9ICIgIikgJT4lIA0KICBmaWx0ZXIoIXdvcmQxICVpbiUgc3RvcF93b3JkcyR3b3JkKSAlPiUNCiAgZmlsdGVyKCF3b3JkMiAlaW4lIHN0b3Bfd29yZHMkd29yZCkgJT4lIA0KICB1bml0ZShiaWdyYW0sIHdvcmQxLCB3b3JkMiwgc2VwID0gIiAiKSAlPiUNCiAgY291bnQoYmlncmFtLCBzb3J0ID0gVCkgJT4lDQogIGZpbHRlcihuID4gMSkgJT4lIA0KICB3b3JkY2xvdWQyKHNpemUgPSAuNSkNCg0KI1RoaXMgY29kZSB3b3JrcyBpbiBSU3R1ZGlvIGJ1dCBmb3Igc29tZSByZWFzb24gaXQgZG9lc24ndCBzaG93IHVwIGluIHByZXZpZXcgb3IgUlB1YnMuIA0KDQpgYGANCg0KDQoNCjUuIFRoaXMgY29kZSB1c2VzIGJpZ3JhbXMgdG8gZmluZCB0aGUgbW9zdCBjb21tb24gd29yZHMgZm9sbG93aW5nIHRoZSB3b3JkcyAiSSIgYW5kICJ5b3UiIGFuZCBkaXNwbGF5IHRoZW0gd2l0aCBncmFwaHMuIA0KDQpgYGB7cn0NCmZpcnN0X3dvcmQgPC0gYygiaSIsICJ5b3UiKQ0KDQpob3pfYmlncmFtcyAlPiUNCiAgY291bnQoYmlncmFtLCBzb3J0ID0gVCkgJT4lDQogIHNlcGFyYXRlKGJpZ3JhbSwgYygid29yZDEiLCAid29yZDIiKSwgc2VwID0gIiAiKSAlPiUNCiAgZmlsdGVyKHdvcmQxICVpbiUgZmlyc3Rfd29yZCkgJT4lDQogIGNvdW50KHdvcmQxLCB3b3JkMiwgd3QgPSBuLCBzb3J0ID0gVFJVRSkgJT4lDQogIG11dGF0ZSh3b3JkMiA9IGZhY3Rvcih3b3JkMiwgbGV2ZWxzID0gcmV2KHVuaXF1ZSh3b3JkMikpKSkgJT4lDQogIGdyb3VwX2J5KHdvcmQxKSAlPiUNCiAgdG9wX24oNSkgJT4lDQogIGdncGxvdChhZXMod29yZDIsIG4sIGZpbGwgPSB3b3JkMSkpICsNCiAgc2NhbGVfZmlsbF92aXJpZGlzX2QoKSArDQogIGdlb21fY29sKHNob3cubGVnZW5kID0gRkFMU0UpICsNCiAgbGFicyh4ID0gTlVMTCwgeSA9IE5VTEwsIHRpdGxlID0gIldvcmQgRm9sbG93aW5nICdJL1lvdScgaW4gSG96aWVyIFNvbmdzIikgKw0KICBmYWNldF93cmFwKH53b3JkMSwgc2NhbGVzID0gImZyZWUiKSArDQogIGNvb3JkX2ZsaXAoKSArDQogIHRoZW1lX21pbmltYWwoKQ0KYGBgDQoNCg==