Reading & Exploring the data file
df <- read_csv("https://raw.githubusercontent.com/bdioli/KanyeWestText/master/data/kanye_lyrics.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## year = col_double(),
## type = col_character(),
## project = col_character(),
## track_num = col_double(),
## song = col_character(),
## lyrics = col_character()
## )
head(df)
## # A tibble: 6 x 6
## year type project track_num song lyrics
## <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 2003 mixta… Get Well … 1 Intro "[Intro: DeRay Davis]Kanye, …
## 2 2003 mixta… Get Well … 2 Live From Irv… "Intro: Talib Kweli:\nALRIGH…
## 3 2003 mixta… Get Well … 4 Jesus Walks "[Intro]\nWe at war\nWe at w…
## 4 2003 mixta… Get Well … 5 Through The W… "[Intro]\nYo, Gee, they can'…
## 5 2003 mixta… Get Well … 6 Two Words "[Intro: Kanye West]\nWe in …
## 6 2003 mixta… Get Well … 7 Champions "[Verse 1: Quavo](Quavo!)Lif…
# Kanye West was hospitalized in 2016 and diagnosed with Bi-Polar disorder.
# This project is going to review his lyrics before and after his hospitalization.
# I hypothesize that Kanye has incorporated more religious and biblical references
# as well as a more trusting and positive attitude in his music after his
# hospitalization in 2016.
df %>% count(project,year,type,sort=TRUE)
## # A tibble: 10 x 4
## project year type n
## <chr> <dbl> <chr> <int>
## 1 Late Registration 2005 album 23
## 2 The College Dropout 2004 album 21
## 3 The Life of Pablo 2016 album 20
## 4 Graduation 2007 album 15
## 5 My Beautiful Dark Twisted Fantasy 2010 album 14
## 6 808s & Heartbreak 2008 album 12
## 7 I'm Good 2003 mixtape 12
## 8 Can't Tell Me Nothing 2007 mixtape 11
## 9 Yeezus 2013 album 10
## 10 Get Well Soon... 2003 mixtape 9
#Only one album (The Life of Pablo) with 20 songs found post 2015.
df %>% count(song,sort=TRUE)
## # A tibble: 140 x 2
## song n
## <chr> <int>
## 1 Two Words 3
## 2 Can't Tell Me Nothing 2
## 3 Intro 2
## 4 Jesus Walks 2
## 5 Stronger 2
## 6 Through the Wire 2
## 7 30 Hours 1
## 8 A Million and One Freestyle 1
## 9 Addiction 1
## 10 All Falls Down 1
## # … with 130 more rows
#songs are duplicated in mix tapes, hence we will be using only albums going forth
Creating most used words pre & post 2016
#Adding a flag for Pre & post 2016 Era
df_kw <- df %>%
filter(type=="album") %>%
mutate(Era=ifelse(year>=2016,"Post 2016","Pre 2016"))
#Table with top 12 most used words pre & Post 2016 era
df_kw %>%
unnest_tokens(word,lyrics) %>%
anti_join(stop_words) %>% #Removing common words
count(word,Era,sort=TRUE) %>%
group_by(Era) %>%
slice_max(n,n=12,with_ties=FALSE) %>%
pivot_wider(names_from = "Era",
values_from = "n",
values_fill = list(n = 0)) %>%
kable()
## Joining, by = "word"
|
word
|
Post 2016
|
Pre 2016
|
|
kanye
|
69
|
283
|
|
bam
|
62
|
0
|
|
feel
|
61
|
0
|
|
ey
|
56
|
0
|
|
west
|
50
|
258
|
|
verse
|
36
|
219
|
|
god
|
34
|
0
|
|
love
|
32
|
129
|
|
hook
|
31
|
235
|
|
yeezy
|
29
|
0
|
|
baby
|
26
|
90
|
|
deep
|
26
|
0
|
|
la
|
0
|
183
|
|
shit
|
0
|
127
|
|
niggas
|
0
|
99
|
|
life
|
0
|
98
|
|
2
|
0
|
90
|
|
nigga
|
0
|
90
|
Creating chart of most used words
#Table with top 12 most used words pre & Post 2016 era
words <- df_kw %>%
unnest_tokens(word,lyrics) %>%
anti_join(stop_words) %>% #Removing common words
count(word,Era,sort=TRUE) %>%
group_by(Era) %>%
filter(!str_detect(word, "^[0-9]")) %>%
filter(word!="la",word!="hook",word!="verse")
## Joining, by = "word"
words %>%
slice_max(n,n=12,with_ties=FALSE) %>%
ungroup() %>%
mutate(word=fct_reorder(word,n)) %>%
ggplot(aes(x=n,y=word,fill=Era))+
geom_col(show.legend = FALSE)+
#scale_y_reordered()+
facet_wrap(~Era)+
theme_calc()+
labs(title="Most used words in Kanye's song pre & post 2016",
y="",
x="Word Frequency in Song lyrics")

# It is evident to see that words like God, deep have started appearing in his songs post 2016 where as words like shit nigga, girl have stopped appearing post 2016
Word Cloud for words Post 2016
# define a nice color palette
pal <- brewer.pal(8,"Dark2")
# plot the 50 most common words
words %>%
filter(Era=="Post 2016") %>%
with(wordcloud(word, n, random.order = FALSE, max.words = 100, colors=pal,min.freq = 20,
rot.per=0.2))

Word Cloud for words Pre 2016
# define a nice color palette
pal <- brewer.pal(8,"Dark2")
# plot the 50 most common words
words %>%
filter(Era=="Pre 2016") %>%
with(wordcloud(word, n, random.order = FALSE, max.words = 100, colors=pal,min.freq = 50,
rot.per=0.2))

Sentiment column chart for Kanye’s lyrics Pre & post 2016
words %>%
inner_join(get_sentiments("nrc")) %>%
count(Era,sentiment) %>%
mutate(per=n/sum(n)) %>%
ungroup() %>%
mutate(sentiment=fct_reorder(sentiment,per)) %>%
ggplot(aes(y=per,x=sentiment,fill=Era))+
geom_col( position = "dodge")+
#scale_y_reordered()+
#facet_wrap(~sentiment)+
theme_calc()+
labs(title="Sentiment Analysis of Kanye's lyrics pre & post 2016",
y="Percent of Total",
x="")+
scale_y_continuous(labels = percent_format())
## Joining, by = "word"

#Clearly we can see that the emotion of Trust, joy increasing & negativity,
# Sadness decreasing post 2016. It is safe to say that after his hospitalization
# in 2016 Kanye has transitioned into becoming more trusting and positive
# as shown through his lyrics.