For this project, I got csv’s for the youtube comments I hypothesis that over time, the trial will become more popular each day. I also think that the sentiment of comments will become more positive over time, since people grew to love Depp during the trial.
First, I am going to download all of the necessary libraries I will be using for this project.
library(tidyverse)
library(tidytext)
library(textdata)
library(devtools)
install_github('lchiffon/wordcloud2')
Next, I will import the data from the csv files I found on github.
library(readr)
day1 <- read_csv("~/Desktop/texxtanalysis project/day1.csv")
## Warning: One or more parsing issues, see `problems()` for details
View(day1)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day2 <- read_csv("~/Desktop/texxtanalysis project/day2.csv")
## Warning: One or more parsing issues, see `problems()` for details
View(day2)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day3 <- read_csv("~/Desktop/texxtanalysis project/day3.csv")
## Warning: One or more parsing issues, see `problems()` for details
View(day3)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day4 <- read_csv("~/Desktop/texxtanalysis project/day4.csv")
## Warning: One or more parsing issues, see `problems()` for details
View(day4)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day5 <- read_csv("~/Desktop/texxtanalysis project/day5.csv")
View(day5)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day6 <- read_csv("~/Desktop/texxtanalysis project/day6.csv")
View(day6)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day7 <- read_csv("~/Desktop/texxtanalysis project/day7.csv")
View(day7)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day8 <- read_csv("~/Desktop/texxtanalysis project/day8.csv")
View(day8)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day10 <- read_csv("~/Desktop/texxtanalysis project/day10.csv")
View(day10)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day12 <- read_csv("~/Desktop/texxtanalysis project/day12.csv")
View(day12)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day15 <- read_csv("~/Desktop/texxtanalysis project/day15.csv")
View(day15)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day16 <- read_csv("~/Desktop/texxtanalysis project/day16.csv")
View(day16)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day17 <- read_csv("~/Desktop/texxtanalysis project/day17.csv")
View(day17)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day18 <- read_csv("~/Desktop/texxtanalysis project/day18 2.csv")
View(day18)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day19 <- read_csv("~/Desktop/texxtanalysis project/day19.csv")
View(day19)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day20 <- read_csv("~/Desktop/texxtanalysis project/day20.csv")
View(day20)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day21 <- read_csv("~/Desktop/texxtanalysis project/day21.csv")
View(day21)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day22 <- read_csv("~/Desktop/texxtanalysis project/day22.csv")
View(day22)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day23 <- read_csv("~/Desktop/texxtanalysis project/day23.csv")
View(day23)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
library(readr)
day24 <- read_csv("~/Desktop/texxtanalysis project/day24.csv")
View(day24)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/
## modules/R_de.so'' had status 1
Now, I am going to use unest_tokens so that I can break up the comment into individual words, so that I am able to do an analysis.
day1_words <- day1 %>%
unnest_tokens (word,messages)
day2_words <- day2 %>%
unnest_tokens (word,messages)
day3_words <- day3 %>%
unnest_tokens (word,messages)
day4_words <- day4 %>%
unnest_tokens (word,messages)
day5_words <- day5 %>%
unnest_tokens (word,messages)
day6_words <- day6 %>%
unnest_tokens (word,messages)
day7_words <- day7 %>%
unnest_tokens (word,messages)
day8_words <- day8 %>%
unnest_tokens (word,messages)
day10_words <- day10 %>%
unnest_tokens (word,messages)
day12_words <- day12 %>%
unnest_tokens (word,messages)
day15_words <- day15 %>%
unnest_tokens (word,messages)
day16_words <- day16 %>%
unnest_tokens (word,messages)
day17_words <- day17 %>%
unnest_tokens (word,messages)
day18_words <- day18 %>%
unnest_tokens (word,messages)
day19_words <- day19 %>%
unnest_tokens (word,messages)
day20_words <- day20 %>%
unnest_tokens (word,messages)
day21_words <- day21 %>%
unnest_tokens (word,messages)
day22_words <- day22 %>%
unnest_tokens (word,messages)
day23_words <- day23 %>%
unnest_tokens (word,messages)
day24_words <- day24 %>%
unnest_tokens (word, messages)
Let’s find out how many words were commented on day 1 of the trial compared to 24. This will give us insight on if the trial became more popular, which I will analyze soon.
day1_words %>%
count -> day1count
day24_words %>%
count -> day24count
day1count
## # A tibble: 1 × 1
## n
## <int>
## 1 339109
day24count
## # A tibble: 1 × 1
## n
## <int>
## 1 1318056
Day 1 had 339,109 words written in the comments, and Day 24 had 1,318,056 words in the comment! That is 3.87x the words. This shows that over the trial from the first day to the last day that the number of YouTube comments increased, most likely meaning that the viewers itself grew over time. Either way, this shows there was more engagement in the trial.
Now, I am going to create variables for each day so that I am able later to graph the word count, and allow us to give a better visualization on the popularity of the trial over time, rather than just on Day 1 and Day 24.
day2_words %>%
count -> day2count
day3_words %>%
count -> day3count
day4_words %>%
count -> day4count
day5_words %>%
count -> day5count
day6_words %>%
count -> day6count
day7_words %>%
count -> day7count
day8_words %>%
count -> day8count
day10_words %>%
count -> day10count
day12_words %>%
count -> day12count
day15_words %>%
count -> day15count
day16_words %>%
count -> day16count
day17_words %>%
count -> day17count
day18_words %>%
count -> day18count
day19_words %>%
count -> day19count
day20_words %>%
count -> day20count
day21_words %>%
count -> day21count
day22_words %>%
count -> day22count
day23_words %>%
count -> day23count
Now I am going to mutate the days, specifically by the word count each day so I can merge them together and later graph over time the number of comments to see if the engagement constantly increased or not.
day1count %>%
mutate(Day=1)-> day1countmerge
day2count %>%
mutate(Day=2) -> day2countmerge
day3count %>%
mutate(Day=3) -> day3countmerge
day4count %>%
mutate(Day=4) -> day4countmerge
day5count %>%
mutate(Day=5) -> day5countmerge
day6count %>%
mutate(Day=6) -> day6countmerge
day7count %>%
mutate(Day=7) -> day7countmerge
day8count %>%
mutate(Day=8) -> day8countmerge
day10count %>%
mutate(Day=10) -> day10countmerge
day12count %>%
mutate(Day=12) -> day12countmerge
day15count %>%
mutate(Day=15) -> day15countmerge
day16count %>%
mutate(Day=16) -> day16countmerge
day17count %>%
mutate(Day=17) -> day17countmerge
day18count %>%
mutate(Day=18) -> day18countmerge
day20count %>%
mutate(Day=20) -> day20countmerge
day21count %>%
mutate(Day=21) -> day21countmerge
day22count %>%
mutate(Day=22) -> day22countmerge
day23count %>%
mutate(Day=23) -> day23countmerge
day24count %>%
mutate(Day=24) -> day24countmerge
Now I can actually merge together my variables to make a graph!
day1countmerge %>%
full_join(day2countmerge) %>%
full_join(day3countmerge) %>%
full_join(day4countmerge) %>%
full_join(day5countmerge) %>%
full_join(day6countmerge) %>%
full_join(day7countmerge) %>%
full_join(day8countmerge) %>%
full_join(day10countmerge) %>%
full_join(day12countmerge) %>%
full_join(day15countmerge) %>%
full_join(day16countmerge) %>%
full_join(day17countmerge) %>%
full_join(day18countmerge) %>%
full_join(day20countmerge) %>%
full_join(day21countmerge) %>%
full_join(day22countmerge) %>%
full_join(day23countmerge) %>%
full_join(day24countmerge) -> mergedcount
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
## Joining, by = c("n", "Day")
Now I will create a line plot with the number of comments on the y axis and the days on the x axis so we can see the number of comments over time.
mergedcount %>%
arrange(desc(n)) %>%
ggplot(aes(Day, n)) + geom_line()
We see in the graph the comments never get lower than they were from the first four days, then it peaks dramatically. We see the most comments were on day 20, not 24.
Let’s see what are the most common words on day 1 compared to day 24? I will include stop_words to filter out words we don’t want (if, then, etc).
day1_words %>%
anti_join(stop_words) %>%
count(word, sort=TRUE)
## Joining, by = "word"
## # A tibble: 19,205 × 2
## word n
## <chr> <int>
## 1 johnny 8569
## 2 amber 4910
## 3 justice 3552
## 4 red_heart 2948
## 5 clown_face 2325
## 6 depp 2325
## 7 face_with_tears_of_joy 1269
## 8 face_vomiting 1090
## 9 rolling_on_the_floor_laughing 1040
## 10 justiceforjohnnydepp 987
## # … with 19,195 more rows
We see the top 10 most common words include johnny, amber, justice, red_heart, clown_face depp face_with_tears_of_joy face_vomiting, rolling_on_the_floor_laughing, and justiceforjohhnydepp.
Now let’s see the most common words for day 24.
day24_words %>%
anti_join(stop_words) %>%
count(word, sort=TRUE)
## Joining, by = "word"
## # A tibble: 57,943 × 2
## word n
## <chr> <int>
## 1 johnny 27228
## 2 blue_heart 25783
## 3 red_heart 22531
## 4 amber 15780
## 5 justice 9648
## 6 face_with_tears_of_joy 9582
## 7 rolling_on_the_floor_laughing 8095
## 8 depp 6491
## 9 camille 6208
## 10 verdict 6078
## # … with 57,933 more rows
The most common words included johnny, blue_heart, red_heart, amber, justice, face_with_tears_of_joy, rolling_on_the_floor_laughing, depp, camille, and verdict. This makes sense that verdict is now a popular word since this was toward the last day of the trial.
Now I am going to create variables by the words said on each day so I can merge to graph words over time.
day1_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 1) -> day1_to_merge
## Joining, by = "word"
day2_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 2) -> day2_to_merge
## Joining, by = "word"
day3_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 3) -> day3_to_merge
## Joining, by = "word"
day4_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 4) -> day4_to_merge
## Joining, by = "word"
day5_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 5) -> day5_to_merge
## Joining, by = "word"
day6_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 6) -> day6_to_merge
## Joining, by = "word"
day7_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 7) -> day7_to_merge
## Joining, by = "word"
day8_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 8) -> day8_to_merge
## Joining, by = "word"
day10_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 10) -> day10_to_merge
## Joining, by = "word"
day12_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 12) -> day12_to_merge
## Joining, by = "word"
day15_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 15) -> day15_to_merge
## Joining, by = "word"
day16_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 16) -> day16_to_merge
## Joining, by = "word"
day17_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 17) -> day17_to_merge
## Joining, by = "word"
day18_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 18) -> day18_to_merge
## Joining, by = "word"
day19_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 19) -> day19_to_merge
## Joining, by = "word"
day20_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 20) -> day20_to_merge
## Joining, by = "word"
day21_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 21) -> day21_to_merge
## Joining, by = "word"
day22_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 22) -> day22_to_merge
## Joining, by = "word"
day23_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 23) -> day23_to_merge
## Joining, by = "word"
day24_words %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
mutate(Day = 24) -> day24_to_merge
## Joining, by = "word"
Now I will actually merge these variables together.
day1_to_merge %>%
full_join(day2_to_merge) %>%
full_join(day3_to_merge) %>%
full_join(day4_to_merge) %>%
full_join(day5_to_merge) %>%
full_join(day6_to_merge) %>%
full_join(day7_to_merge) %>%
full_join(day8_to_merge) %>%
full_join(day10_to_merge) %>%
full_join(day12_to_merge) %>%
full_join(day15_to_merge) %>%
full_join(day16_to_merge) %>%
full_join(day17_to_merge) %>%
full_join(day18_to_merge) %>%
full_join(day19_to_merge) %>%
full_join(day20_to_merge) %>%
full_join(day21_to_merge) %>%
full_join(day22_to_merge) %>%
full_join(day23_to_merge) -> merged_days
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
## Joining, by = c("word", "n", "Day")
With the line graph I am about to graph, we can see the most frequent words used during the trial in the key on the right, with the number of times they were said over time.
merged_days %>%
arrange(desc(n)) %>%
head(50) %>%
ggplot(aes(Day, n, color = word)) + geom_line()
Now i will create a word cloud for day 1 maximizing at 50 words. This visual is easier to see what words were frequently used, and how much they were used compared to one another.
library(wordcloud2)
day1_words %>%
anti_join(stop_words) %>%
count(word, sort=TRUE) %>%
head(50) %>%
wordcloud2()
## Joining, by = "word"
Through this I can see that ‘johnny’ and ‘amber’ are obviosuly noticable. It is interesting to see ‘red_heart’ and ‘clown_face’ which represents emojis!
Now I will create a word cloud for day 24.
library(wordcloud2)
day24_words %>%
anti_join(stop_words) %>%
count(word, sort=TRUE) %>%
head(50) %>%
wordcloud2()
## Joining, by = "word"
It is interesting that ‘johnny’ and ‘red_heart’ are more noticeable compared to ‘amber’. Also, the ‘rolling_on_the_floor_laughing’ emoji is extremely noticeable and ‘justice’ is as well.
Now, I want to see the most positive words for day 1.
day1_words %>%
anti_join(stop_words) %>%
inner_join(get_sentiments('afinn')) %>%
count(word, value, sort=TRUE) %>%
arrange(desc(value)) ->day1pos
## Joining, by = "word"
## Joining, by = "word"
day1pos
## # A tibble: 1,066 × 3
## word value n
## <chr> <dbl> <int>
## 1 outstanding 5 3
## 2 win 4 397
## 3 lmao 4 274
## 4 wow 4 231
## 5 wins 4 141
## 6 funny 4 82
## 7 winning 4 78
## 8 lmfao 4 42
## 9 amazing 4 39
## 10 fun 4 34
## # … with 1,056 more rows
The most positive words include outstanding, win, lmao, wow, wins, funny, winning, lmfao, amazing, and fun. Let’s compare with the most positive words for day 24.
day24_words %>%
anti_join(stop_words) %>%
inner_join(get_sentiments('afinn')) %>%
count(word, value, sort=TRUE) %>%
arrange(desc(value))
## Joining, by = "word"
## Joining, by = "word"
## # A tibble: 1,450 × 3
## word value n
## <chr> <dbl> <int>
## 1 outstanding 5 16
## 2 superb 5 6
## 3 hurrah 5 5
## 4 win 4 4436
## 5 wins 4 1923
## 6 lmao 4 1096
## 7 wow 4 836
## 8 amazing 4 380
## 9 winning 4 361
## 10 funny 4 350
## # … with 1,440 more rows
This includes outstanding, superb, hurrah, win, winds, lamo, wow, amazing, winning, funny. Now lets look at the most negative words for day 1!
day1_words %>%
anti_join(stop_words) %>%
inner_join(get_sentiments('afinn')) %>%
count(word, value, sort=TRUE) %>%
arrange(desc(-value)) -> day_1_negative_words
## Joining, by = "word"
## Joining, by = "word"
day_1_negative_words
## # A tibble: 1,066 × 3
## word value n
## <chr> <dbl> <int>
## 1 motherfucking -5 1
## 2 wtf -4 362
## 3 torture -4 5
## 4 fraud -4 4
## 5 damned -4 2
## 6 damn -4 1
## 7 hell -4 1
## 8 shrew -4 1
## 9 tortured -4 1
## 10 torturing -4 1
## # … with 1,056 more rows
This includes motherfucking, wtf, torture, fraud, damned, damn, hell, shrew tortured, torturing. Let’s compare with most negative words for day 24.
day24_words %>%
anti_join(stop_words) %>%
inner_join(get_sentiments('afinn')) %>%
count(word, value, sort=TRUE) %>%
arrange(desc(-value))
## Joining, by = "word"
## Joining, by = "word"
## # A tibble: 1,450 × 3
## word value n
## <chr> <dbl> <int>
## 1 wtf -4 728
## 2 fraud -4 25
## 3 torture -4 14
## 4 damned -4 6
## 5 catastrophic -4 2
## 6 tortured -4 2
## 7 damn -4 1
## 8 frauds -4 1
## 9 fraudsters -4 1
## 10 fraudulent -4 1
## # … with 1,440 more rows
Includes wtf, fraud, torture, damned, catastrophic, tortured, damn, frauds, fraudsters, fraudulent.
Now, I am going to see what the most positive and negative words are.
day1_words %>%
group_by(word) %>%
count(word, sort=TRUE) %>%
inner_join(get_sentiments('bing')) -> day1bing
## Joining, by = "word"
day1bing
## # A tibble: 1,668 × 3
## # Groups: word [1,667]
## word n sentiment
## <chr> <int> <chr>
## 1 like 1528 positive
## 2 love 943 positive
## 3 lies 587 negative
## 4 objection 579 negative
## 5 good 559 positive
## 6 bad 402 negative
## 7 win 397 positive
## 8 right 374 positive
## 9 support 309 positive
## 10 guilty 299 negative
## # … with 1,658 more rows
Now I am going to plot these results so we can visualize them easier.
day1bing %>%
group_by(word) %>%
arrange(desc(n)) %>%
#count(sentiment, sort = TRUE) %>%
head(10) %>%
ggplot( aes(reorder(word, n),n, fill = sentiment)) + geom_col() +
coord_flip()
Now I want to group sentiments by day, then calculate mean for each day. This will allow us to see how positvie or negative the comments are.
day1_words %>%
inner_join(get_sentiments('afinn')) -> day1_words_afinn
## Joining, by = "word"
mean(day1_words_afinn$value) -> day1mean
day2_words %>%
inner_join(get_sentiments('afinn')) -> day2_words_afinn
## Joining, by = "word"
mean(day2_words_afinn$value) -> day2mean
day3_words %>%
inner_join(get_sentiments('afinn')) -> day3_words_afinn
## Joining, by = "word"
mean(day3_words_afinn$value) -> day3mean
day4_words %>%
inner_join(get_sentiments('afinn')) -> day4_words_afinn
## Joining, by = "word"
mean(day4_words_afinn$value) -> day4mean
day5_words %>%
inner_join(get_sentiments('afinn')) -> day5_words_afinn
## Joining, by = "word"
mean(day5_words_afinn$value) -> day5mean
day6_words %>%
inner_join(get_sentiments('afinn')) -> day6_words_afinn
## Joining, by = "word"
mean(day6_words_afinn$value) -> day6mean
day7_words %>%
inner_join(get_sentiments('afinn')) -> day7_words_afinn
## Joining, by = "word"
mean(day7_words_afinn$value) -> day7mean
day8_words %>%
inner_join(get_sentiments('afinn')) -> day8_words_afinn
## Joining, by = "word"
mean(day8_words_afinn$value) -> day8mean
day10_words %>%
inner_join(get_sentiments('afinn')) -> day10_words_afinn
## Joining, by = "word"
mean(day10_words_afinn$value) -> day10mean
day12_words %>%
inner_join(get_sentiments('afinn')) -> day12_words_afinn
## Joining, by = "word"
mean(day12_words_afinn$value) -> day12mean
day15_words %>%
inner_join(get_sentiments('afinn')) -> day15_words_afinn
## Joining, by = "word"
mean(day15_words_afinn$value) -> day15mean
day16_words %>%
inner_join(get_sentiments('afinn')) -> day16_words_afinn
## Joining, by = "word"
mean(day16_words_afinn$value) -> day16mean
day17_words %>%
inner_join(get_sentiments('afinn')) -> day17_words_afinn
## Joining, by = "word"
mean(day17_words_afinn$value) -> day17mean
day18_words %>%
inner_join(get_sentiments('afinn')) -> day18_words_afinn
## Joining, by = "word"
mean(day18_words_afinn$value) -> day18mean
day19_words %>%
inner_join(get_sentiments('afinn')) -> day19_words_afinn
## Joining, by = "word"
mean(day19_words_afinn$value) -> day19mean
day20_words %>%
inner_join(get_sentiments('afinn')) -> day20_words_afinn
## Joining, by = "word"
mean(day20_words_afinn$value) -> day20mean
day21_words %>%
inner_join(get_sentiments('afinn')) -> day21_words_afinn
## Joining, by = "word"
mean(day21_words_afinn$value) -> day21mean
day22_words %>%
inner_join(get_sentiments('afinn')) -> day22_words_afinn
## Joining, by = "word"
mean(day22_words_afinn$value) -> day22mean
day23_words %>%
inner_join(get_sentiments('afinn')) -> day23_words_afinn
## Joining, by = "word"
mean(day1_words_afinn$value) -> day23mean
day24_words %>%
inner_join(get_sentiments('afinn')) -> day24_words_afinn
## Joining, by = "word"
mean(day24_words_afinn$value) -> day24mean
Now I am going to graph the mean values.
merged_days %>%
inner_join(get_sentiments('afinn')) %>%
group_by(Day) %>%
summarize(total = sum(value)) %>%
ggplot(aes(Day, total, fill = total)) + geom_col() + geom_smooth()
## Joining, by = "word"
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
This graph starts at 0 at the top, and goes down to negative numbers. Here, we can see that the trial always had a negative sentiment. There are missing days in the data which explains the gap in the graph. It is interesting to see how negative the values are from day 15 on.