Load the necessary packages

library(geniusr)                         # This package gets lyrics
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  2.0.0     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.3.1     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tidytext)
library(wordcloud2)

Find song information

search_song("thrift shop")

Retrieve song information

get_song_meta(86538)

Access the album tracks

mack_tracks <- scrape_tracklist(22968)
## Warning in stri_match_all_regex(string, pattern, omit_no_match = TRUE,
## opts_regex = opts(pattern)): argument is not an atomic vector; coercing
mack_tracks

Access album song lyrics

mack_lyrics <- map_df(mack_tracks$song_lyrics_url, scrape_lyrics_url)
mack_lyrics

Unnest song lyrics from the album

mack_lyrics %>% 
  unnest_tokens(word, line) %>% 
  select(word) -> mack_words

Clean lyrics

mack_words %>% 
  anti_join(get_stopwords()) %>% 
  count(word, sort = T)
## Joining, by = "word"

Create a word cloud of album lyrics

mack_words %>% 
  anti_join(get_stopwords()) %>% 
  count(word, sort = T) %>% 
  top_n(150) %>% 
  wordcloud2(size = .7)
## Joining, by = "word"
## Selecting by n

Create a sentiment analysis

bing<- get_sentiments("bing")
nrc <- get_sentiments("nrc")

Display “bing” sentiments

mack_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = T)
## Joining, by = "word"

Create a graph displaying positive and negative sentiments in “bing.”

mack_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(10) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Macklemore's and Ryan Lewis's The Heist Album: Words that contribute the most to each sentiment",
       x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal()
## Joining, by = "word"
## Selecting by n

Create a graph displaying positive and negative sentiments in “nrc.”

mack_words %>% 
  inner_join(nrc) %>% 
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(3) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Macklemore and Ryan Lewis's The Heist Album: Words that contribute the most to each sentiment",
       x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal()
## Joining, by = "word"
## Selecting by n

Unnest bigrams

mack_lyrics %>%
  unnest_tokens(bigram, line, token = "ngrams", n = 2) %>% 
  select(bigram)-> mack_bigrams

Remove Stopwords and create a table

mack_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort =T)

Create a word cloud

mack_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort =T)%>%
  top_n(100) %>% 
  filter(n > 1) %>% 
  wordcloud2(size = .5)
## Selecting by n

Create a bar graph depicting the most common words

first_word <- c("i", "you")                                  # these need to be lowercase

mack_bigrams %>% 
  count(bigram, sort = T) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%       # separate the two words
  filter(word1 %in% first_word) %>%                          # find first words from our list
  count(word1, word2, wt = n, sort = TRUE) %>% 
  rename(total = nn) %>%
  mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>%     # put the words in order
  group_by(word1) %>% 
  top_n(10) %>% 
  ggplot(aes(word2, total, fill = word1)) +                          #
  scale_fill_viridis_d() +                                           # set the color palette
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = NULL, title = "Word following:") +
  facet_wrap(~word1, scales = "free") +
  coord_flip() +
  theme_minimal()
## Selecting by total