library(genius)
package 㤼㸱genius㤼㸲 was built under R version 4.0.4
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages ------------------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.3     v purrr   0.3.4
v tibble  3.1.0     v dplyr   1.0.4
v tidyr   1.1.3     v stringr 1.4.0
v readr   1.4.0     v forcats 0.5.1
package 㤼㸱tidyr㤼㸲 was built under R version 4.0.4-- Conflicts ---------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(tidytext)
package 㤼㸱tidytext㤼㸲 was built under R version 4.0.4
library(wordcloud2)
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
judds <- genius_album(artist = "the judds", album = "rockin_with_the_rhythm")
`html_session()` was deprecated in rvest 1.0.0.
Please use `session()` instead.Joining, by = c("album_name", "track_n", "track_url")

TEXT ANALYSIS OF LYRICS BY THE JUDDS

For this text analysis of lyrics, I decided to use my favorite country artists, The Judds. The Judds have been one of my musical influences ever since I can remember. A mother and daughter duet, they rocked the country world in the 80s. In the following three tables, we will break down each song by word, determine which word is the most used, as well as remove the common words such as the, and, you, and I.

judds_words <- judds %>%
  unnest_tokens(word, lyric) %>%
  select(track_title, word)

judds_words
NA
judds_words %>%
  count(word, sort = T)
NA
NA



judds_words %>%
  anti_join(stop_words) %>%
  count(word, sort = T)
Joining, by = "word"

The word cloud below shows that the words baby and treat were used most often.

judds_words %>%
  anti_join(stop_words) %>%
  count(word, sort = T) %>%
  top_n(100) %>%
  wordcloud2(size = .5)
Joining, by = "word"
Selecting by n

bing <- get_sentiments("bing")
bing
NA

Next we will determine the sentiment behind the words that were used. The following table and graph shows that more words were used with a negative conotation than a positive one.


judds_words %>%
  inner_join(bing) %>%
  count(word, sentiment, sort = TRUE)
Joining, by = "word"

judds_words %>%
  inner_join(bing) %>%
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(10) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "The Judds - Words That Contribute the Most to Each Sentiment", x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal()
Joining, by = "word"
Selecting by n

The following word clouds shows that love and like were used the most with positive sentiment and fall and cry were used with a negative sentiment.

judds_words %>%
  inner_join(bing) %>%
  count(word, sentiment, sort = TRUE) %>%
  filter(sentiment == "positive") %>%
  select(word, n) %>%
  wordcloud2()
Joining, by = "word"

judds_words %>%
  inner_join(bing) %>%
  count(word, sentiment, sort = TRUE) %>%
  filter(sentiment == "negative") %>%
  select(word, n) %>%
  wordcloud2()
Joining, by = "word"

Now we will determine what two words were used together the most throughout the lyrics of the album.


judds %>%
  unnest_tokens(bigram, lyric, token = "ngrams", n = 2)%>%
  select(bigram)
NA
judds %>%
  unnest_tokens(bigram, lyric, token = "ngrams", n = 2)%>%
  select(bigram) -> judds_bigrams
judds_bigrams %>%
  count(bigram, sort = T)
NA

judds_bigrams %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>%
  unite(bigram, word1, word2, sep = " ")
NA

judds_bigrams %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>%
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = T)
NA

judds_bigrams %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>%
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = T) %>%
  filter(n  >  1) %>%
  wordcloud2(size = .5)
NA

In the word cloud above you can see that “river roll” and “na na” were the most common two word combinations.


first_word <- c("i", "you")


judds_bigrams %>%
  count(bigram, sort = T) %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(word1 %in% first_word) %>%
  count(word1, word2, wt = n, sort = TRUE)
NA

Next we will look at what words follow “I” and “you” as well as “he” and “she” the most.



first_word <- c("i", "you")


judds_bigrams %>%
  count(bigram, sort = T) %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(word1 %in% first_word) %>%
  count(word1, word2, wt = n, sort = TRUE) %>%
  mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>%
  group_by(word1) %>%
  top_n(5) %>%
  ggplot(aes(word2, n, fill = word1)) +
  scale_fill_viridis_d() + 
  geom_col(show.legend = FALSE) + 
  labs(x = NULL, y = NULL, title = "Words Following: I and You") +
  facet_wrap(~word1, scales = "free") +
  coord_flip() +
  theme_minimal()
Selecting by n

NA
NA

The table above indicates “wish” followed “I” more frequently, and “that” followed “you” most often. The table below indicates that “gave” followed “he” most often and “Wouldn’t” followed “she” more frequently.

The word combinations noticed makes sense as they do have a song called “River Roll on” and “I Wish She Wouldn’t Treat You That Way.”



first_word <- c("he", "she")


judds_bigrams %>%
  count(bigram, sort = T) %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(word1 %in% first_word) %>%
  count(word1, word2, wt = n, sort = TRUE) %>%
  mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>%
  group_by(word1) %>%
  top_n(5) %>%
  ggplot(aes(word2, n, fill = word1)) +
  scale_fill_viridis_d() + 
  geom_col(show.legend = FALSE) + 
  labs(x = NULL, y = NULL, title = "Words Following: He and She") +
  facet_wrap(~word1, scales = "free") +
  coord_flip() +
  theme_minimal()
Selecting by n


judds %>%
  unnest_tokens(words, lyric, token = "ngrams", n = 4)%>%
  count(words, sort = T)
NA

In the last analysis, we will look at what four word combination was used most on the album. As you can see, “Rockin With The Rhythm” was used the most. Again this makes sense as "Rockin With The Rhythm is the name of a song and one of the albums.


judds %>%
  unnest_tokens(words, lyric, token = "ngrams", n = 4)%>%
  count(words, sort = T) %>%
  filter(words == "rockin with the rhythm")
NA
