Loading the required libraries first
library(geniusr)
library(tidyverse)
library(tidytext)
library(textdata)
library(ggplot2)
library(dplyr)
library(wordcloud)
library(RColorBrewer)
library(reshape2)
library(stringr)This project is a sentiment analysis of the songs of the indie pop band, Foster The People. I am collectively analyzing the sentiments of the the songs within their various albums. The inspiration of this code is taken from class tutorial and Tom McNamara’s Tutorial.
# Find artist ID
search_artist("Foster The People") # 703## # A tibble: 1 × 3
## artist_id artist_name artist_url
## <int> <chr> <chr>
## 1 703 Foster the People https://genius.com/artists/Foster-the-people
songs <- get_artist_songs_df(703)
# Get all song IDs
ids <- c(as.character(songs$song_id))
# Create empty dataframe to house them
allLyrics <- data.frame()
# Add lyrics to that df
#for (id in ids) {
#allLyrics <- rbind(get_lyrics_id(id), allLyrics)
#}
# This loop behaves strangeThe above loop behaves strangely and returns lyrics to only some
songs and also incomeplte lyrics, so there’s another way to fix it. Here
I’m using the tryCatch() function.
while (length(ids) > 0) {
for (id in ids) {
tryCatch({
allLyrics <- rbind(get_lyrics_id(id), allLyrics)
successful <- unique(allLyrics$song_id)
ids <- ids[!ids %in% successful]
print(paste("done - ", id))
print(paste("New length is ", length(ids)))
}, error = function(e){})
}
}## [1] "done - 351472"
## [1] "New length is 81"
## [1] "done - 2915122"
## [1] "New length is 80"
## [1] "done - 380374"
## [1] "New length is 79"
## [1] "done - 377697"
## [1] "New length is 78"
## [1] "done - 356833"
## [1] "New length is 77"
## [1] "done - 2915131"
## [1] "New length is 76"
## [1] "done - 3364211"
## [1] "New length is 75"
## [1] "done - 489310"
## [1] "New length is 74"
## [1] "done - 6163946"
## [1] "New length is 73"
## [1] "done - 64214"
## [1] "New length is 72"
## [1] "done - 2258730"
## [1] "New length is 71"
## [1] "done - 2128330"
## [1] "New length is 70"
## [1] "done - 452086"
## [1] "New length is 69"
## [1] "done - 187587"
## [1] "New length is 68"
## [1] "done - 2915136"
## [1] "New length is 67"
## [1] "done - 4087035"
## [1] "New length is 66"
## [1] "done - 3067104"
## [1] "New length is 65"
## [1] "done - 64221"
## [1] "New length is 64"
## [1] "done - 1933855"
## [1] "New length is 63"
## [1] "done - 1982413"
## [1] "New length is 62"
## [1] "done - 6340738"
## [1] "New length is 61"
## [1] "done - 187616"
## [1] "New length is 60"
## [1] "done - 352103"
## [1] "New length is 59"
## [1] "done - 3121066"
## [1] "New length is 58"
## [1] "done - 64223"
## [1] "New length is 57"
## [1] "done - 2093740"
## [1] "New length is 56"
## [1] "done - 3292371"
## [1] "New length is 55"
## [1] "done - 3083173"
## [1] "New length is 54"
## [1] "done - 64216"
## [1] "New length is 53"
## [1] "done - 1821707"
## [1] "New length is 52"
## [1] "done - 3121069"
## [1] "New length is 51"
## [1] "done - 3121055"
## [1] "New length is 50"
## [1] "done - 5385028"
## [1] "New length is 49"
## [1] "done - 64215"
## [1] "New length is 48"
## [1] "done - 5584386"
## [1] "New length is 47"
## [1] "done - 64217"
## [1] "New length is 46"
## [1] "done - 3067109"
## [1] "New length is 45"
## [1] "done - 482096"
## [1] "New length is 44"
## [1] "done - 3121065"
## [1] "New length is 43"
## [1] "done - 1573181"
## [1] "New length is 42"
## [1] "done - 64218"
## [1] "New length is 41"
## [1] "done - 379762"
## [1] "New length is 40"
## [1] "done - 2915876"
## [1] "New length is 39"
## [1] "done - 3121057"
## [1] "New length is 38"
## [1] "done - 3067103"
## [1] "New length is 37"
## [1] "done - 3374644"
## [1] "New length is 36"
## [1] "done - 4842185"
## [1] "New length is 35"
## [1] "done - 356823"
## [1] "New length is 34"
## [1] "done - 2915133"
## [1] "New length is 33"
## [1] "done - 1429"
## [1] "New length is 32"
## [1] "done - 2860591"
## [1] "New length is 31"
## [1] "done - 2255415"
## [1] "New length is 30"
## [1] "done - 2257844"
## [1] "New length is 29"
## [1] "done - 460874"
## [1] "New length is 28"
## [1] "done - 3067107"
## [1] "New length is 27"
## [1] "done - 3103488"
## [1] "New length is 26"
## [1] "done - 3403256"
## [1] "New length is 25"
## [1] "done - 3403245"
## [1] "New length is 24"
## [1] "done - 3599670"
## [1] "New length is 23"
## [1] "done - 3121058"
## [1] "New length is 22"
## [1] "done - 4409461"
## [1] "New length is 21"
## [1] "done - 2016764"
## [1] "New length is 20"
## [1] "done - 2146715"
## [1] "New length is 19"
## [1] "done - 380210"
## [1] "New length is 18"
## [1] "done - 489311"
## [1] "New length is 17"
## [1] "done - 5646231"
## [1] "New length is 16"
## [1] "done - 379776"
## [1] "New length is 15"
## [1] "done - 2915139"
## [1] "New length is 14"
## [1] "done - 2387275"
## [1] "New length is 13"
## [1] "done - 3121071"
## [1] "New length is 12"
## [1] "done - 6159351"
## [1] "New length is 11"
## [1] "done - 6163945"
## [1] "New length is 10"
## [1] "done - 64222"
## [1] "New length is 9"
## [1] "done - 64219"
## [1] "New length is 8"
## [1] "done - 7501188"
## [1] "New length is 7"
## [1] "done - 4069722"
## [1] "New length is 6"
## [1] "done - 6163947"
## [1] "New length is 5"
## [1] "done - 2016159"
## [1] "New length is 4"
## [1] "done - 2898013"
## [1] "New length is 3"
## [1] "done - 4637732"
## [1] "New length is 2"
## [1] "done - 7065584"
## [1] "New length is 1"
## [1] "done - 3089129"
## [1] "New length is 0"
allIds <- data.frame(song_id = unique(allLyrics$song_id))
allIds$album <- ""
for (song in allIds$song_id) {
allIds[match(song,allIds$song_id),2] <- get_song_df(song)[12]
print(allIds[match(song,allIds$song_id),])
}## song_id album
## 1 3089129 <NA>
## song_id album
## 2 7065584 Torches X (Deluxe Edition)
## song_id album
## 3 4637732 <NA>
## song_id album
## 4 2898013 Torches X (Deluxe Edition)
## song_id album
## 5 2016159 Supermodel
## song_id album
## 6 6163947 In the Darkest of Nights, Let the Birds Sing
## song_id album
## 7 4069722 <NA>
## song_id album
## 8 7501188 <NA>
## song_id album
## 9 64219 Torches
## song_id album
## 10 64222 Torches
## song_id album
## 11 6163945 In the Darkest of Nights, Let the Birds Sing
## song_id album
## 12 6159351 In the Darkest of Nights, Let the Birds Sing
## song_id album
## 13 3121071 Sacred Hearts Club
## song_id album
## 14 2387275 Supermodel
## song_id album
## 15 2915139 Spotify Sessions (Live from The Village)
## song_id album
## 16 379776 Supermodel
## song_id album
## 17 5646231 In the Darkest of Nights, Let the Birds Sing
## song_id album
## 18 489311 Supermodel
## song_id album
## 19 380210 Supermodel
## song_id album
## 20 2146715 Supermodel
## song_id album
## 21 2016764 Supermodel
## song_id album
## 22 4409461 <NA>
## song_id album
## 23 3121058 Sacred Hearts Club
## song_id album
## 24 3599670 <NA>
## song_id album
## 25 3403245 Sit Next to Me (Versions)
## song_id album
## 26 3403256 Sit Next to Me (Versions)
## song_id album
## 27 3103488 Sacred Hearts Club
## song_id album
## 28 3067107 Sacred Hearts Club
## song_id album
## 29 460874 Torches X (Deluxe Edition)
## song_id album
## 30 2257844 Torches (Remixes)
## song_id album
## 31 2255415 Torches (Remixes)
## song_id album
## 32 2860591 Torches (Remixes)
## song_id album
## 33 1429 Torches
## song_id album
## 34 2915133 Spotify Sessions (Live from The Village)
## song_id album
## 35 356823 Supermodel
## song_id album
## 36 4842185 <NA>
## song_id album
## 37 3374644 <NA>
## song_id album
## 38 3067103 Sacred Hearts Club
## song_id album
## 39 3121057 Sacred Hearts Club
## song_id album
## 40 2915876 Spotify Sessions (Live from The Village)
## song_id album
## 41 379762 Supermodel
## song_id album
## 42 64218 Torches
## song_id album
## 43 1573181 <NA>
## song_id album
## 44 3121065 Sacred Hearts Club
## song_id album
## 45 482096 Torches X (Deluxe Edition)
## song_id album
## 46 3067109 Sacred Hearts Club
## song_id album
## 47 64217 Torches
## song_id album
## 48 5584386 In the Darkest of Nights, Let the Birds Sing
## song_id album
## 49 64215 Torches
## song_id album
## 50 5385028 <NA>
## song_id album
## 51 3121055 Sacred Hearts Club
## song_id album
## 52 3121069 Sacred Hearts Club
## song_id album
## 53 1821707 Torches X (Deluxe Edition)
## song_id album
## 54 64216 Torches
## song_id album
## 55 3083173 <NA>
## song_id album
## 56 3292371 <NA>
## song_id album
## 57 2093740 Torches X (Deluxe Edition)
## song_id album
## 58 64223 Torches
## song_id album
## 59 3121066 Sacred Hearts Club
## song_id album
## 60 352103 Supermodel
## song_id album
## 61 187616 Supermodel
## song_id album
## 62 6340738 <NA>
## song_id album
## 63 1982413 Don’t Stop (Color on the Walls) [Remixes]
## song_id album
## 64 1933855 Don’t Stop (Color on the Walls) [Remixes]
## song_id album
## 65 64221 Torches
## song_id album
## 66 3067104 Sacred Hearts Club
## song_id album
## 67 4087035 <NA>
## song_id album
## 68 2915136 Spotify Sessions (Live from The Village)
## song_id album
## 69 187587 Supermodel
## song_id album
## 70 452086 Torches X (Deluxe Edition)
## song_id album
## 71 2128330 Torches X (Deluxe Edition)
## song_id album
## 72 2258730 Torches (Remixes)
## song_id album
## 73 64214 Torches
## song_id album
## 74 6163946 In the Darkest of Nights, Let the Birds Sing
## song_id album
## 75 489310 Torches X (Deluxe Edition)
## song_id album
## 76 3364211 <NA>
## song_id album
## 77 2915131 Spotify Sessions (Live from The Village)
## song_id album
## 78 356833 Supermodel
## song_id album
## 79 377697 Supermodel
## song_id album
## 80 380374 Supermodel
## song_id album
## 81 2915122 Spotify Sessions (Live from The Village)
## song_id album
## 82 351472 Supermodel
allLyrics <- full_join(allIds, allLyrics)## Joining, by = "song_id"
head(allIds)## song_id album
## 1 3089129 <NA>
## 2 7065584 Torches X (Deluxe Edition)
## 3 4637732 <NA>
## 4 2898013 Torches X (Deluxe Edition)
## 5 2016159 Supermodel
## 6 6163947 In the Darkest of Nights, Let the Birds Sing
Here, we can see there are some songs that are not associated with an album. This means that they were relased as singles and Genius has not assigned them to an album. So, the code here replaces the NAs with “Single Only”.
allIds$album[is.na(allIds$album)] <- "Single Only"
head(allIds)## song_id album
## 1 3089129 Single Only
## 2 7065584 Torches X (Deluxe Edition)
## 3 4637732 Single Only
## 4 2898013 Torches X (Deluxe Edition)
## 5 2016159 Supermodel
## 6 6163947 In the Darkest of Nights, Let the Birds Sing
allLyrics2 <- full_join(allLyrics, allIds)## Joining, by = c("song_id", "album")
allLyricsTokenised <- allLyrics2 %>%
unnest_tokens(word, line)Looking at the most common word
head(allLyricsTokenised %>%
count(word, sort = TRUE))## word n
## 1 you 1026
## 2 the 988
## 3 i 738
## 4 to 591
## 5 and 590
## 6 yeah 414
Interestingly, songs by Foster the People have “you” as the most common word!
There are a lot of stopwords, so the code below removes these.
# Remove stopwords
tidyLyrics <- allLyricsTokenised %>%
anti_join(stop_words)## Joining, by = "word"
# Top words again
head(tidyLyrics %>%
count(word, sort = TRUE))## word n
## 1 yeah 414
## 2 run 241
## 3 stop 201
## 4 ooh 172
## 5 love 129
## 6 doo 84
Now, the most common word is “yeah”, followed by “run”, “stop”, “ooh”, “love”. I guess, “you” held some weight here!!
topFew <- tidyLyrics %>%
group_by(album, word) %>%
mutate(n = row_number()) %>%
ungroup()Removing extra columns from the dataframe
topFew <- topFew[,c("album", "word", "n")]
# Taking only max for each word by album
topFew <- topFew %>%
group_by(album, word) %>%
summarise(n = max(n))%>%
ungroup()## `summarise()` has grouped output by 'album'. You can override using the
## `.groups` argument.
Adding the columns and creating a subset with words that appear atleast 40 times. Also removing the word “ooh”!
# Subset
topFew <- topFew %>%
group_by(word) %>%
mutate(total = sum(n)) %>%
filter(total >= 40,
word != "ooh") %>%
ungroup()Assigning colors for each album that will show up in the graph.
albumCol <- c("#394887", # DS(CW)Remixes
"#9e5a47", # Darkest of Nights
"#f9c784", # Sacred Hearts Club
"#cf57d4", # Sit Next to Me (Versions)
"#e8b0a5", # Spotify Sessions
"#d18943", # Supermodel
"#4C1A57", # Torches
"#52BA4A", # Torches (remix)
"#7268CC", # Torches X (delux)
"#5BBFCF") # Singles
names(albumCol) <- c("Don't Stop (Colors on the Walls) [Remixes]", "In the Darkest of Nights, Let the Birds Sing",
"Sacred Hearts Club", "Sit Next to Me (Versions)", "Spotify Sessions (Live from The Village)", "Supermodel",
"Torches", "Torches (Remixes)", "Torches X (Deluxe Edition)", "NA")
# This ensures bars are stacked in order of release date
topFew$album <- factor(topFew$album, levels = c("Torches",
"Torches (Remixes)",
"Don't Stop (Colors on the Walls) [Remixes]",
"Spotify Sessions (Live from The Village)",
"Supermodel",
"Sit Next to Me (Versions)",
"In the Darkest of Nights, Let the Birds Sing", "Torches X (Deluxe Edition)", "NA"
))wordsPlot <- ggplot(topFew) +
geom_bar(aes(x = reorder(word, total),
y = n,
fill = as.factor(album)),
colour = "black",
stat = "identity") +
coord_flip() +
labs(title = "Foster The People's most used words",
subtitle = "The words that appear more than 40 times in Foster The People's catalogue",
caption = "Source: genius.com",
y = "Number of appearances",
x = "Word",
fill = "Album")+
scale_fill_manual(values = albumCol) +
theme(title = element_text(face = "italic", size = 10),
panel.border = element_rect(colour = "black", fill=NA, size=1),
panel.background = element_rect(colour = "black", fill = "white"),
panel.grid.major.x = element_line(colour="grey90",size = 0.1, linetype = 1),
axis.title = element_text(face = "italic",size = 9, colour = "black"),
axis.ticks.length = unit(5, units = "pt"),
legend.background = NULL,
legend.position = "top",
legend.key.size = unit(8,"pt"),
legend.box.spacing = unit(5,"pt"),
legend.text = element_text(size = 8),
axis.text.y = element_text(size = 8))
wordsPlot# Create Sentiment dataframe
ftpsentiments <- tidyLyrics %>%
inner_join(get_sentiments("bing"))%>%
count(album, song_name, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)## Joining, by = "word"
# Factor as we did above
ftpsentiments$album <- factor(ftpsentiments$album,
levels = c("Torches",
"Don't Stop (Colors on the Walls) [Remixes]",
"Spotify Sessions (Live from The Village)",
"Supermodel",
"In the Darkest of Nights, Let the Birds Sing"))
# sent plot
sentPlot <- ggplot(ftpsentiments,
aes(reorder(song_name,
sentiment),
sentiment,
fill = album)) +
geom_col(show.legend = FALSE) +
facet_wrap(~album,
ncol = 3,
scales = "free")+
scale_fill_manual(values = albumCol)+
labs(title = "Foster The People's songs ranked by sentiment",
caption = "Source: genius.com",
y = "Sentiment score",
fill = "Album")+
theme(title = element_text(face = "italic", size = 10),
panel.border = element_rect(colour = "black", fill=NA, size=1),
panel.background = element_rect(colour = "black", fill = "white"),
panel.grid.major.x = element_line(colour="grey90",size = 0.1, linetype = 1),
axis.title.x = element_text(face = "italic",size = 8, colour = "black"),
axis.title.y = element_blank(),
axis.ticks.length = unit(5, units = "pt"),
legend.background = NULL,
legend.position = "top",
legend.key.size = unit(8,"pt"),
legend.box.spacing = unit(5,"pt")) +
coord_flip()
sentPlotThis section evaluates the frequency of positive and negative words in the lyrics.
bing_word_counts <- tidyLyrics %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()## Joining, by = "word"
bing_word_counts %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
coord_flip()## Selecting by n
Creating a simple wordcloud out of the tokenized words.
tidyLyrics %>%
anti_join(stop_words) %>%
count(word) %>%
with(wordcloud(word, n, max.words = 100))## Joining, by = "word"
Creating a stylizeed wordcloud depicting postive emotion words in pink and negative emotion words in gray.
tidyLyrics %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("gray40", "pink"),
max.words = 100)## Joining, by = "word"