library(geniusr)                         # This package gets lyrics
library(tidyverse)
library(tidytext)
library(wordcloud2)
genius_token()
search_song("Another Brick in the Wall")

This is a search for “Another Brick in the Wall” by Pink Floyd.

get_song_meta(116425)

This gathers information about the song.

TheWall_tracks <- scrape_tracklist(14831)
argument is not an atomic vector; coercing
TheWall_tracks

This obtains and displays the album information and tracklist.

TheWall_lyrics <- map_df(TheWall_tracks$song_lyrics_url, scrape_lyrics_url)
TheWall_lyrics
NA

This obtains the lyrics for the album.

TheWall_words <- TheWall_lyrics %>%
  unnest_tokens(word, line) %>% 
  select(song_name, word)

TheWall_words

This breaks all the lyrics down to one word per line in order to make it easier to analyze.

TheWall_words %>% 
  anti_join(get_stopwords()) %>% 
  count(word, sort = T)
Joining, by = "word"

This table shows how common each word is by showing the word count, while removing stopwords.

TheWall_words %>% 
  anti_join(get_stopwords()) %>% 
  count(word, sort = T) %>%
  top_n(200) %>%
  wordcloud2(size = .5)
Joining, by = "word"
Selecting by n

NA

This is a word cloud that shows the common word with the word counts.

bing <- get_sentiments("bing")
bing

This is what is called a sentiment analysis. It analyizes each word and relates it to emotion. This obtains sentiments.

TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE)
Joining, by = "word"

This is the sentiment analysis of the song words “The Wall” by Pink Floyd.

TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(10) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Pink Floyd's the Wall album: Words that contribute the most to each sentiment",
       x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal()
Joining, by = "word"
Selecting by n

This is a graph showing the sentiment data.

TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>% 
  filter(sentiment == "positive") %>%
  select(word, n) %>% 
  wordcloud2()
Joining, by = "word"

TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>% 
  filter(sentiment == "negative") %>%
  select(word, n) %>% 
  wordcloud2()
Joining, by = "word"

NA

This is a word cloud showing the sentiment data.

nrc <- get_sentiments("nrc")
nrc

This is another sentiment analysis using the standards set out by the National Research Council. This obtains the sentiments.

nrc %>%
  distinct(sentiment)

This lists all the different sentiments.

TheWall_words %>% 
  inner_join(nrc) %>% 
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(3) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Pink Floyd's The Wall: Words that contribute the most to each sentiment",
       x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal()
Joining, by = "word"
Selecting by n

These are mini graphs that shows the words that contribute to each sentiment.

TheWall_words %>% 
  inner_join(nrc) %>% 
  count(word, sentiment, sort = TRUE)
Joining, by = "word"

This lists the words, sentiments and the count of the amount of times the word was used.

TheWall_lyrics %>%
  unnest_tokens(bigram, line, token = "ngrams", n = 2) %>% 
  select(bigram) -> TheWall_bigrams

This creates a bigram which are word pairs.

TheWall_bigrams %>%
  count(bigram, sort = T)

This is a table showing the word pairs and the amount of times used.

TheWall_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = T)

This is a table that shows the bigrams with the amount of times used while removing the stopwords.

TheWall_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = T) %>%
  filter(n > 1) %>% 
  wordcloud2(size = .5)

This is a word cloud of the bigrams and the count of how often the were used.

first_word <- c("i", "you")                                  # these need to be lowercase

TheWall_bigrams %>% 
  count(bigram, sort = T) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%       # separate the two words
  filter(word1 %in% first_word) %>%                          # find first words from our list
  count(word1, word2, wt = n, sort = TRUE) %>% 
  rename(total = nn)
NA

Using the bigrams we can see words that follow a given word. This is a table that represents that.

first_word <- c("i", "you")                                  # these need to be lowercase

TheWall_bigrams %>% 
  count(bigram, sort = T) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%       # separate the two words
  filter(word1 %in% first_word) %>%                          # find first words from our list
  count(word1, word2, wt = n, sort = TRUE) %>% 
  rename(total = nn) %>%
  mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>%     # put the words in order
  group_by(word1) %>% 
  top_n(5) %>% 
  ggplot(aes(word2, total, fill = word1)) +                          #
  scale_fill_viridis_d() +                                           # set the color palette
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = NULL, title = "Word following:") +
  facet_wrap(~word1, scales = "free") +
  coord_flip() +
  theme_minimal()
Selecting by total

These are graphs that represent the bigram analysis in the previous table.

