## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## ── Attaching packages ──────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
album <- genius_album(artist="Queen", album="A Night At the Opera")
## Joining, by = c("track_title", "track_n", "track_url")
summary(album)
## track_title track_n line lyric
## Length:407 Min. : 1.000 Min. : 1.00 Length:407
## Class :character 1st Qu.: 4.000 1st Qu.: 10.00 Class :character
## Mode :character Median : 7.000 Median : 19.00 Mode :character
## Mean : 6.619 Mean : 25.21
## 3rd Qu.: 8.000 3rd Qu.: 32.00
## Max. :12.000 Max. :100.00
unique(album$track_title) #12 track titles
## [1] "Death on Two Legs (Dedicated to...)"
## [2] "Lazing on a Sunday Afternoon"
## [3] "I’m in Love with My Car"
## [4] "You're My Best Friend"
## [5] "'39"
## [6] "Sweet Lady"
## [7] "Seaside Rendezvous"
## [8] "The Prophet's Song"
## [9] "Love of My Life"
## [10] "Good Company"
## [11] "Bohemian Rhapsody"
## [12] "God Save the Queen"
removeSpecialCharacters <- function(x) gsub("[^a-zA-Z0-9 ]", " ", x)
album$lyric <- sapply(album$lyric, removeSpecialCharacters)
album$lyric <- sapply(album$lyric, tolower)
tidy_lyrics <- album %>%
group_by(track_title) %>%
mutate(linenumber = row_number()) %>%
ungroup() %>%
unnest_tokens(word, lyric)
tidy_lyrics
## # A tibble: 2,860 x 5
## track_title track_n line linenumber word
## <chr> <int> <int> <int> <chr>
## 1 Death on Two Legs (Dedicated to...) 1 1 1 ahh
## 2 Death on Two Legs (Dedicated to...) 1 2 2 you
## 3 Death on Two Legs (Dedicated to...) 1 2 2 suck
## 4 Death on Two Legs (Dedicated to...) 1 2 2 my
## 5 Death on Two Legs (Dedicated to...) 1 2 2 blood
## 6 Death on Two Legs (Dedicated to...) 1 2 2 like
## 7 Death on Two Legs (Dedicated to...) 1 2 2 a
## 8 Death on Two Legs (Dedicated to...) 1 2 2 leech
## 9 Death on Two Legs (Dedicated to...) 1 2 2 you
## 10 Death on Two Legs (Dedicated to...) 1 2 2 break
## # … with 2,850 more rows
“Easy” and “Poor” are the most frequent words appear in Bohemian Rhapshody.
tidy_lyrics %>%
filter(track_title == "Bohemian Rhapsody") %>%
inner_join(get_sentiments("bing")) %>%
count(word, sort=T)
## Joining, by = "word"
## # A tibble: 13 x 2
## word n
## <chr> <int>
## 1 easy 4
## 2 poor 4
## 3 die 2
## 4 aching 1
## 5 cry 1
## 6 dead 1
## 7 devil 1
## 8 frightening 1
## 9 killed 1
## 10 love 1
## 11 loves 1
## 12 monstrosity 1
## 13 right 1
Count the number of positive and negative words, in order to calculate the sentiment for each track title.
album_sentiment <- tidy_lyrics %>%
inner_join(get_sentiments("bing")) %>%
count(track_title, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining, by = "word"
album_sentiment
## # A tibble: 11 x 4
## track_title negative positive sentiment
## <chr> <dbl> <dbl> <dbl>
## 1 '39 3 5 2
## 2 Bohemian Rhapsody 13 7 -6
## 3 Death on Two Legs (Dedicated to...) 18 14 -4
## 4 Good Company 3 14 11
## 5 I’m in Love with My Car 1 10 9
## 6 Lazing on a Sunday Afternoon 0 1 1
## 7 Love of My Life 4 9 5
## 8 Seaside Rendezvous 2 13 11
## 9 Sweet Lady 2 25 23
## 10 The Prophet's Song 32 17 -15
## 11 You're My Best Friend 3 15 12
album_sentiment %>%
ggplot(., aes(x=track_title, y=sentiment, fill=track_title))+
geom_col(show.legend = F)+
coord_flip()