Load the necessary packages
library(geniusr) # This package gets lyrics
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 2.0.0 ✔ dplyr 0.7.8
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.3.1 ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tidytext)
library(wordcloud2)
Find song information
search_song("thrift shop")
Retrieve song information
get_song_meta(86538)
Access the album tracks
mack_tracks <- scrape_tracklist(22968)
## Warning in stri_match_all_regex(string, pattern, omit_no_match = TRUE,
## opts_regex = opts(pattern)): argument is not an atomic vector; coercing
mack_tracks
Access album song lyrics
mack_lyrics <- map_df(mack_tracks$song_lyrics_url, scrape_lyrics_url)
mack_lyrics
Unnest song lyrics from the album
mack_lyrics %>%
unnest_tokens(word, line) %>%
select(word) -> mack_words
Clean lyrics
mack_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T)
## Joining, by = "word"
Create a word cloud of album lyrics
mack_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T) %>%
top_n(150) %>%
wordcloud2(size = .7)
## Joining, by = "word"
## Selecting by n
Create a sentiment analysis
bing<- get_sentiments("bing")
nrc <- get_sentiments("nrc")
Display “bing” sentiments
mack_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = T)
## Joining, by = "word"
Create a graph displaying positive and negative sentiments in “bing.”
mack_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "Macklemore's and Ryan Lewis's The Heist Album: Words that contribute the most to each sentiment",
x = NULL) +
scale_fill_viridis_d() +
coord_flip() +
theme_minimal()
## Joining, by = "word"
## Selecting by n
Create a graph displaying positive and negative sentiments in “nrc.”
mack_words %>%
inner_join(nrc) %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
top_n(3) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "Macklemore and Ryan Lewis's The Heist Album: Words that contribute the most to each sentiment",
x = NULL) +
scale_fill_viridis_d() +
coord_flip() +
theme_minimal()
## Joining, by = "word"
## Selecting by n
Unnest bigrams
mack_lyrics %>%
unnest_tokens(bigram, line, token = "ngrams", n = 2) %>%
select(bigram)-> mack_bigrams
Remove Stopwords and create a table
mack_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort =T)
Create a word cloud
mack_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort =T)%>%
top_n(100) %>%
filter(n > 1) %>%
wordcloud2(size = .5)
## Selecting by n
Create a bar graph depicting the most common words
first_word <- c("i", "you") # these need to be lowercase
mack_bigrams %>%
count(bigram, sort = T) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>% # separate the two words
filter(word1 %in% first_word) %>% # find first words from our list
count(word1, word2, wt = n, sort = TRUE) %>%
rename(total = nn) %>%
mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>% # put the words in order
group_by(word1) %>%
top_n(10) %>%
ggplot(aes(word2, total, fill = word1)) + #
scale_fill_viridis_d() + # set the color palette
geom_col(show.legend = FALSE) +
labs(x = NULL, y = NULL, title = "Word following:") +
facet_wrap(~word1, scales = "free") +
coord_flip() +
theme_minimal()
## Selecting by total