library(tidytext)
library(RCurl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(janeaustenr)
library(stringr)
library(textdata)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
##
## complete
library(ggplot2)
library(rjson)
library(jsonlite)
##
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:rjson':
##
## fromJSON, toJSON
library(httr)
library(XML)
library(rvest)
library(wordcloud)
## Loading required package: RColorBrewer
library(wordcloud2)
library(RColorBrewer)
library(rtweet)
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:jsonlite':
##
## flatten
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:httr':
##
## content
## The following object is masked from 'package:ggplot2':
##
## annotate
## remove.packages("rlang")
## install.packages("rlang")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.6 v purrr 0.3.4
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x NLP::annotate() masks ggplot2::annotate()
## x tidyr::complete() masks RCurl::complete()
## x NLP::content() masks httr::content()
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks rtweet::flatten(), jsonlite::flatten()
## x jsonlite::fromJSON() masks rjson::fromJSON()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag() masks stats::lag()
## x jsonlite::toJSON() masks rjson::toJSON()
##install.packages("tidytext")
library(tidytext)
library(dplyr)
get_sentiments("afinn")
## # A tibble: 2,477 x 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandoned -2
## 3 abandons -2
## 4 abducted -2
## 5 abduction -2
## 6 abductions -2
## 7 abhor -3
## 8 abhorred -3
## 9 abhorrent -3
## 10 abhors -3
## # ... with 2,467 more rows
get_sentiments("nrc")
## # A tibble: 13,875 x 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
## 7 abandoned negative
## 8 abandoned sadness
## 9 abandonment anger
## 10 abandonment fear
## # ... with 13,865 more rows
get_sentiments("bing")
## # A tibble: 6,786 x 2
## word sentiment
## <chr> <chr>
## 1 2-faces negative
## 2 abnormal negative
## 3 abolish negative
## 4 abominable negative
## 5 abominably negative
## 6 abominate negative
## 7 abomination negative
## 8 abort negative
## 9 aborted negative
## 10 aborts negative
## # ... with 6,776 more rows
library(janeaustenr)
library(dplyr)
library(stringr)
tidy_books <- austen_books() %>%
group_by(book) %>%
mutate(
linenumber = row_number(),
chapter = cumsum(str_detect(text,
regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup() %>%
unnest_tokens(word, text)
head(tidy_books, 5) %>% knitr::kable()
book | linenumber | chapter | word |
---|---|---|---|
Sense & Sensibility | 1 | 0 | sense |
Sense & Sensibility | 1 | 0 | and |
Sense & Sensibility | 1 | 0 | sensibility |
Sense & Sensibility | 3 | 0 | by |
Sense & Sensibility | 3 | 0 | jane |
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
tidy_books %>%
filter(book == "Emma") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining, by = "word"
## # A tibble: 301 x 2
## word n
## <chr> <int>
## 1 good 359
## 2 friend 166
## 3 hope 143
## 4 happy 125
## 5 love 117
## 6 deal 92
## 7 found 92
## 8 present 89
## 9 kind 82
## 10 happiness 76
## # ... with 291 more rows
library(tidyr)
jane_austen_sentiment <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(book, index = linenumber %/% 80, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining, by = "word"
library(ggplot2)
ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, ncol = 2, scales = "free_x")
pride_prejudice <- tidy_books %>%
filter(book == "Pride & Prejudice")
afinn <- pride_prejudice %>%
inner_join(get_sentiments("afinn")) %>%
group_by(index = linenumber %/% 80) %>%
summarise(sentiment = sum(value)) %>%
mutate(method = "AFINN")
## Joining, by = "word"
bing_and_nrc <- bind_rows(
pride_prejudice %>%
inner_join(get_sentiments("bing")) %>%
mutate(method = "Bing et al."),
pride_prejudice %>%
inner_join(get_sentiments("nrc") %>%
filter(sentiment %in% c("positive",
"negative"))
) %>%
mutate(method = "NRC")) %>%
count(method, index = linenumber %/% 80, sentiment) %>%
pivot_wider(names_from = sentiment,
values_from = n,
values_fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining, by = "word"
## Joining, by = "word"
bind_rows(afinn,
bing_and_nrc) %>%
ggplot(aes(index, sentiment, fill = method)) +
geom_col(show.legend = FALSE) +
facet_wrap(~method, ncol = 1, scales = "free_y")
* #### Visualization using Wordclouds
tidy_books %>%
anti_join(stop_words) %>%
count(word) %>%
with(wordcloud(word, n, max.words = 100))
## Joining, by = "word"
## Warning in wordcloud(word, n, max.words = 100): miss could not be fit on page.
## It will not be plotted.
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("blue", "green"),
max.words = 100)
## Joining, by = "word"
speech_website<- read_html("https://www.washingtonpost.com/politics/2022/03/16/text-zelensky-address-congress/")
speech <- speech_website %>%
html_nodes("p") %>%
html_text()
## install.packages("syuzhet")
library(syuzhet)
##
## Attaching package: 'syuzhet'
## The following object is masked from 'package:rtweet':
##
## get_tokens
get_sentiment(speech[2:50])
## [1] 0.25 1.55 2.90 0.45 5.15 7.55 -0.15 -1.15 3.75 2.25 -1.70 -1.75
## [13] 0.45 -0.90 3.10 1.45 1.90 0.50 0.00 0.00 0.00 0.00 0.00 0.00
## [25] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [37] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [49] 0.00
knitr::kable(get_nrc_sentiment(speech[2:50]))
## Warning: `spread_()` was deprecated in tidyr 1.2.0.
## Please use `spread()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
anger | anticipation | disgust | fear | joy | sadness | surprise | trust | negative | positive |
---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 |
0 | 2 | 1 | 2 | 3 | 0 | 0 | 3 | 1 | 3 |
3 | 0 | 0 | 2 | 2 | 1 | 0 | 3 | 3 | 4 |
2 | 1 | 1 | 1 | 3 | 0 | 0 | 3 | 2 | 7 |
0 | 1 | 0 | 0 | 2 | 0 | 1 | 5 | 0 | 6 |
3 | 1 | 3 | 3 | 0 | 4 | 1 | 3 | 4 | 3 |
2 | 2 | 1 | 4 | 3 | 3 | 2 | 2 | 5 | 6 |
2 | 2 | 1 | 4 | 3 | 0 | 0 | 3 | 1 | 9 |
4 | 2 | 0 | 4 | 0 | 0 | 0 | 4 | 5 | 9 |
4 | 3 | 1 | 4 | 2 | 2 | 2 | 10 | 9 | 10 |
4 | 2 | 2 | 4 | 1 | 3 | 1 | 2 | 6 | 9 |
3 | 4 | 1 | 4 | 4 | 3 | 2 | 9 | 8 | 11 |
1 | 1 | 0 | 3 | 0 | 0 | 0 | 0 | 1 | 1 |
1 | 5 | 0 | 2 | 2 | 1 | 0 | 5 | 3 | 4 |
1 | 1 | 0 | 0 | 1 | 0 | 0 | 2 | 1 | 3 |
1 | 1 | 0 | 1 | 1 | 1 | 0 | 4 | 1 | 5 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
s_v <- get_sentences(speech[2:50])
s_v_sentiment <- get_sentiment(s_v)
s_v_sentiment
## [1] 0.25 1.55 0.50 1.55 0.85 0.45 2.80 -2.00 5.15 6.00 0.00 0.75
## [13] 0.80 -1.50 0.00 -1.70 1.30 -0.40 1.40 -1.25 -0.50 -0.35 -1.00 1.10
## [25] 0.75 0.10 0.75 2.75 0.00 0.25 0.25 0.00 0.75 0.25 1.20 2.90
## [37] -0.75 -0.10 0.40 -0.75 -2.15 -1.05 1.25 -0.50 -1.25 0.60 1.75 -0.25
## [49] -0.60 0.00 -0.90 0.00 0.00 0.90 5.20 -0.15 0.95 -0.70 -1.25 0.00
## [61] -0.40 -1.50 1.00 -0.50 1.00 1.35 0.75 2.50 -0.55 2.00 0.00 -0.90
## [73] -0.40 1.15 0.75 1.05 1.00 0.50 0.00 0.00 0.00 0.00 0.00 0.00
## [85] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [97] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## [109] 0.00 0.00
knitr::kable(get_sentiments("nrc") %>% count(sentiment, sort = TRUE) )
sentiment | n |
---|---|
negative | 3318 |
positive | 2308 |
fear | 1474 |
anger | 1246 |
trust | 1230 |
sadness | 1187 |
disgust | 1056 |
anticipation | 837 |
joy | 687 |
surprise | 532 |
tidy_speech <- speech[2:50]
tidy_speech_words <- unlist(as.list(strsplit(tidy_speech, " ")))
rowNumber <- seq(1:length(tidy_speech_words))
words.df <- data.frame(rowNumber, tidy_speech_words)
names(words.df) <- c("rowNumber","word")
speech_sentiment_quanteda<- words.df %>% inner_join(get_sentiments("nrc"))
## Joining, by = "word"
lang_word_counts <- words.df %>%
inner_join(get_sentiments("nrc")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
## Joining, by = "word"
lang_word_counts %>%
group_by(sentiment) %>%
slice_max(n, n = 5) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(n, word, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(x = "Contribution to sentiment",
y = NULL)
* #### This code give a wordcloud of the major words that the president used in his address.
library(reshape2)
lang_word_counts %>%
inner_join(get_sentiments("nrc")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("blue", "green"),
max.words = 100)
## Joining, by = c("word", "sentiment")
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## terrible could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## humanitarian could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## punished could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## attacking could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## death could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## found could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## freedom could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## freely could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## kind could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## save could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## true could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## expected could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## thought could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## depend could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## watch could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## peace could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## proud could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## fell could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## defense could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## die could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## kill could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## conflict could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## resisting could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## terrorize could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## fighting could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## unjust could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## words could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## moral could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## aggression could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## aggressor could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## brutal could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## experienced could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## honor could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## important could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## innocent could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## justice could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## leader could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## provide could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## respects could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## responsible could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## sincere could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## strength could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## battlefield could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## government could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## democracy could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## foundation could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## main could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## offer could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## overwhelming could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## preserve could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## protect could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## sense could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## sky could not be fit on page. It will not be plotted.
library(reshape2)
lang_word_counts %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("blue", "green"),
max.words = 500)
## Joining, by = c("word", "sentiment")
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## important could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## peace could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## protect could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## proud could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## offensive could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## terrible could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## terrorize could not be fit on page. It will not be plotted.
The sentimental analysis of Ukranian President Zelenski’s address to congress seemed to be in line with the situation that he is dealing with. He has used words of anger but there are more words that represent anticipation from the US as well as the sadness of the situation in Ukraine. He is surprised by the invasion and the death as shown under the surprise bucket. He is fearful but also positive in the leadership of the US. I think this kind of sentimental analysis would be helpful for the members of congress and the White House in formulating the right policy towards Ukraine.