library(geniusr) # This package gets lyrics
library(tidyverse)
library(tidytext)
library(wordcloud2)
genius_token()
search_song("Another Brick in the Wall")
This is a search for “Another Brick in the Wall” by Pink Floyd.
get_song_meta(116425)
This gathers information about the song.
TheWall_tracks <- scrape_tracklist(14831)
argument is not an atomic vector; coercing
TheWall_tracks
This obtains and displays the album information and tracklist.
TheWall_lyrics <- map_df(TheWall_tracks$song_lyrics_url, scrape_lyrics_url)
TheWall_lyrics
NA
This obtains the lyrics for the album.
TheWall_words <- TheWall_lyrics %>%
unnest_tokens(word, line) %>%
select(song_name, word)
TheWall_words
This breaks all the lyrics down to one word per line in order to make it easier to analyze.
TheWall_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T)
Joining, by = "word"
This table shows how common each word is by showing the word count, while removing stopwords.
TheWall_words %>%
anti_join(get_stopwords()) %>%
count(word, sort = T) %>%
top_n(200) %>%
wordcloud2(size = .5)
Joining, by = "word"
Selecting by n
NA
This is a word cloud that shows the common word with the word counts.
bing <- get_sentiments("bing")
bing
This is what is called a sentiment analysis. It analyizes each word and relates it to emotion. This obtains sentiments.
TheWall_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE)
Joining, by = "word"
This is the sentiment analysis of the song words “The Wall” by Pink Floyd.
TheWall_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "Pink Floyd's the Wall album: Words that contribute the most to each sentiment",
x = NULL) +
scale_fill_viridis_d() +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

This is a graph showing the sentiment data.
TheWall_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE) %>%
filter(sentiment == "positive") %>%
select(word, n) %>%
wordcloud2()
Joining, by = "word"
TheWall_words %>%
inner_join(bing) %>%
count(word, sentiment, sort = TRUE) %>%
filter(sentiment == "negative") %>%
select(word, n) %>%
wordcloud2()
Joining, by = "word"
NA
This is a word cloud showing the sentiment data.
nrc <- get_sentiments("nrc")
nrc
This is another sentiment analysis using the standards set out by the National Research Council. This obtains the sentiments.
nrc %>%
distinct(sentiment)
This lists all the different sentiments.
TheWall_words %>%
inner_join(nrc) %>%
count(word, sentiment, sort = TRUE) %>%
group_by(sentiment) %>%
top_n(3) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(sentiment), scales = "free") +
labs(y = "Pink Floyd's The Wall: Words that contribute the most to each sentiment",
x = NULL) +
scale_fill_viridis_d() +
coord_flip() +
theme_minimal()
Joining, by = "word"
Selecting by n

These are mini graphs that shows the words that contribute to each sentiment.
TheWall_words %>%
inner_join(nrc) %>%
count(word, sentiment, sort = TRUE)
Joining, by = "word"
This lists the words, sentiments and the count of the amount of times the word was used.
TheWall_lyrics %>%
unnest_tokens(bigram, line, token = "ngrams", n = 2) %>%
select(bigram) -> TheWall_bigrams
This creates a bigram which are word pairs.
TheWall_bigrams %>%
count(bigram, sort = T)
This is a table showing the word pairs and the amount of times used.
TheWall_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort = T)
This is a table that shows the bigrams with the amount of times used while removing the stopwords.
TheWall_bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
unite(bigram, word1, word2, sep = " ") %>%
count(bigram, sort = T) %>%
filter(n > 1) %>%
wordcloud2(size = .5)
This is a word cloud of the bigrams and the count of how often the were used.
first_word <- c("i", "you") # these need to be lowercase
TheWall_bigrams %>%
count(bigram, sort = T) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>% # separate the two words
filter(word1 %in% first_word) %>% # find first words from our list
count(word1, word2, wt = n, sort = TRUE) %>%
rename(total = nn)
NA
Using the bigrams we can see words that follow a given word. This is a table that represents that.
first_word <- c("i", "you") # these need to be lowercase
TheWall_bigrams %>%
count(bigram, sort = T) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>% # separate the two words
filter(word1 %in% first_word) %>% # find first words from our list
count(word1, word2, wt = n, sort = TRUE) %>%
rename(total = nn) %>%
mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>% # put the words in order
group_by(word1) %>%
top_n(5) %>%
ggplot(aes(word2, total, fill = word1)) + #
scale_fill_viridis_d() + # set the color palette
geom_col(show.legend = FALSE) +
labs(x = NULL, y = NULL, title = "Word following:") +
facet_wrap(~word1, scales = "free") +
coord_flip() +
theme_minimal()
Selecting by total

These are graphs that represent the bigram analysis in the previous table.
---
title: "R Notebook"
output: html_notebook
---

```{r}
library(geniusr)                         # This package gets lyrics
library(tidyverse)
library(tidytext)
library(wordcloud2)
```

```{r}
genius_token()
```

```{r}
search_song("Another Brick in the Wall")
```
This is a search for "Another Brick in the Wall" by Pink Floyd.
```{r}
get_song_meta(116425)
```
This gathers information about the song.
```{r}
TheWall_tracks <- scrape_tracklist(14831)
TheWall_tracks
```
This obtains and displays the album information and tracklist.
```{r}
TheWall_lyrics <- map_df(TheWall_tracks$song_lyrics_url, scrape_lyrics_url)
TheWall_lyrics

```
This obtains the lyrics for the album.
```{r}
TheWall_words <- TheWall_lyrics %>%
  unnest_tokens(word, line) %>% 
  select(song_name, word)

TheWall_words
```
This breaks all the lyrics down to one word per line in order to make it easier to analyze.
```{r}
TheWall_words %>% 
  anti_join(get_stopwords()) %>% 
  count(word, sort = T)
```
This table shows how common each word is by showing the word count, while removing stopwords.
```{r}
TheWall_words %>% 
  anti_join(get_stopwords()) %>% 
  count(word, sort = T) %>%
  top_n(200) %>%
  wordcloud2(size = .5)
          
```
This is a word cloud that shows the common word with the word counts.
```{r}
bing <- get_sentiments("bing")
bing
```
This is what is called a sentiment analysis. It analyizes each word and relates it to emotion. This obtains sentiments.
```{r}
TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE)

```
This is the sentiment analysis of the song words "The Wall" by Pink Floyd.
```{r}
TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(10) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Pink Floyd's the Wall album: Words that contribute the most to each sentiment",
       x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal()
```
This is a graph showing the sentiment data.
```{r}
TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>% 
  filter(sentiment == "positive") %>%
  select(word, n) %>% 
  wordcloud2()


TheWall_words %>% 
  inner_join(bing) %>% 
  count(word, sentiment, sort = TRUE) %>% 
  filter(sentiment == "negative") %>%
  select(word, n) %>% 
  wordcloud2()
  
```
This is a word cloud showing the sentiment data.
```{r}
nrc <- get_sentiments("nrc")
nrc
```
This is another sentiment analysis using the standards set out by the National Research Council. This obtains the sentiments.
```{r}
nrc %>%
  distinct(sentiment)
```
This lists all the different sentiments.
```{r}
TheWall_words %>% 
  inner_join(nrc) %>% 
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(3) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(sentiment), scales = "free") +
  labs(y = "Pink Floyd's The Wall: Words that contribute the most to each sentiment",
       x = NULL) +
  scale_fill_viridis_d() +
  coord_flip() +
  theme_minimal()
```
These are mini graphs that shows the words that contribute to each sentiment.
```{r}
TheWall_words %>% 
  inner_join(nrc) %>% 
  count(word, sentiment, sort = TRUE)

```
This lists the words, sentiments and the  count of the amount of times the word was used.

```{r}
TheWall_lyrics %>%
  unnest_tokens(bigram, line, token = "ngrams", n = 2) %>% 
  select(bigram) -> TheWall_bigrams

```
This creates a bigram which are word pairs.
```{r}
TheWall_bigrams %>%
  count(bigram, sort = T)
```
This is a table showing the word pairs and the amount of times used.
```{r}
TheWall_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = T)
```
This is a table that shows the bigrams with the amount of times used while removing the stopwords.
```{r}
TheWall_bigrams %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word) %>% 
  unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = T) %>%
  filter(n > 1) %>% 
  wordcloud2(size = .5)
```
This is a word cloud of the bigrams and the count of how often the were used.
```{r}
first_word <- c("i", "you")                                  # these need to be lowercase

TheWall_bigrams %>% 
  count(bigram, sort = T) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%       # separate the two words
  filter(word1 %in% first_word) %>%                          # find first words from our list
  count(word1, word2, wt = n, sort = TRUE) %>% 
  rename(total = nn)

```
Using the bigrams we can see words that follow a given word. This is a table that represents that.
```{r}
first_word <- c("i", "you")                                  # these need to be lowercase

TheWall_bigrams %>% 
  count(bigram, sort = T) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%       # separate the two words
  filter(word1 %in% first_word) %>%                          # find first words from our list
  count(word1, word2, wt = n, sort = TRUE) %>% 
  rename(total = nn) %>%
  mutate(word2 = factor(word2, levels = rev(unique(word2)))) %>%     # put the words in order
  group_by(word1) %>% 
  top_n(5) %>% 
  ggplot(aes(word2, total, fill = word1)) +                          #
  scale_fill_viridis_d() +                                           # set the color palette
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = NULL, title = "Word following:") +
  facet_wrap(~word1, scales = "free") +
  coord_flip() +
  theme_minimal()

```
These are graphs that represent the bigram analysis in the previous table.































