First part:

library(tidytext)
library(RCurl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(janeaustenr)
library(stringr)
library(textdata)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
## 
##     complete
library(ggplot2)
library(rjson)
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:rjson':
## 
##     fromJSON, toJSON
library(httr)
library(XML)
library(rvest)
library(wordcloud)
## Loading required package: RColorBrewer
library(wordcloud2)
library(RColorBrewer)
library(rtweet)
## 
## Attaching package: 'rtweet'
## The following object is masked from 'package:jsonlite':
## 
##     flatten
library(tm)
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:httr':
## 
##     content
## The following object is masked from 'package:ggplot2':
## 
##     annotate
  • The below code uses afinn lexicon to get the sentiments.

## remove.packages("rlang")
## install.packages("rlang")

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.6     v purrr   0.3.4
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x NLP::annotate()         masks ggplot2::annotate()
## x tidyr::complete()       masks RCurl::complete()
## x NLP::content()          masks httr::content()
## x dplyr::filter()         masks stats::filter()
## x purrr::flatten()        masks rtweet::flatten(), jsonlite::flatten()
## x jsonlite::fromJSON()    masks rjson::fromJSON()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag()            masks stats::lag()
## x jsonlite::toJSON()      masks rjson::toJSON()
##install.packages("tidytext")
library(tidytext)
library(dplyr)
get_sentiments("afinn")
## # A tibble: 2,477 x 2
##    word       value
##    <chr>      <dbl>
##  1 abandon       -2
##  2 abandoned     -2
##  3 abandons      -2
##  4 abducted      -2
##  5 abduction     -2
##  6 abductions    -2
##  7 abhor         -3
##  8 abhorred      -3
##  9 abhorrent     -3
## 10 abhors        -3
## # ... with 2,467 more rows
  • The below code uses nrc lexicon to get the sentiments.

get_sentiments("nrc")
## # A tibble: 13,875 x 2
##    word        sentiment
##    <chr>       <chr>    
##  1 abacus      trust    
##  2 abandon     fear     
##  3 abandon     negative 
##  4 abandon     sadness  
##  5 abandoned   anger    
##  6 abandoned   fear     
##  7 abandoned   negative 
##  8 abandoned   sadness  
##  9 abandonment anger    
## 10 abandonment fear     
## # ... with 13,865 more rows
  • The below code uses bing lexicon to get the sentiments.

get_sentiments("bing")
## # A tibble: 6,786 x 2
##    word        sentiment
##    <chr>       <chr>    
##  1 2-faces     negative 
##  2 abnormal    negative 
##  3 abolish     negative 
##  4 abominable  negative 
##  5 abominably  negative 
##  6 abominate   negative 
##  7 abomination negative 
##  8 abort       negative 
##  9 aborted     negative 
## 10 aborts      negative 
## # ... with 6,776 more rows
  • Get the code from the text book and keep it working

library(janeaustenr)
library(dplyr)
library(stringr)

tidy_books <- austen_books() %>%
  group_by(book) %>%
  mutate(
    linenumber = row_number(),
    chapter = cumsum(str_detect(text, 
                                regex("^chapter [\\divxlc]", 
                                      ignore_case = TRUE)))) %>%
  ungroup() %>%
  unnest_tokens(word, text)
head(tidy_books, 5) %>% knitr::kable()
book linenumber chapter word
Sense & Sensibility 1 0 sense
Sense & Sensibility 1 0 and
Sense & Sensibility 1 0 sensibility
Sense & Sensibility 3 0 by
Sense & Sensibility 3 0 jane
  • Using sentiment dictionary nrc on te book exercise

nrc_joy <- get_sentiments("nrc") %>% 
  filter(sentiment == "joy")

tidy_books %>%
  filter(book == "Emma") %>%
  inner_join(nrc_joy) %>%
  count(word, sort = TRUE)
## Joining, by = "word"
## # A tibble: 301 x 2
##    word          n
##    <chr>     <int>
##  1 good        359
##  2 friend      166
##  3 hope        143
##  4 happy       125
##  5 love        117
##  6 deal         92
##  7 found        92
##  8 present      89
##  9 kind         82
## 10 happiness    76
## # ... with 291 more rows
  • Using sentiment dictionary bing on the book exercise

library(tidyr)

jane_austen_sentiment <- tidy_books %>%
  inner_join(get_sentiments("bing")) %>%
  count(book, index = linenumber %/% 80, sentiment) %>%
  pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>% 
  mutate(sentiment = positive - negative)
## Joining, by = "word"
library(ggplot2)

ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~book, ncol = 2, scales = "free_x")

  • Comparing the three sentiment dictionaries

pride_prejudice <- tidy_books %>% 
  filter(book == "Pride & Prejudice")
  • Now, we can use inner_join() to calculate the sentiment in different ways. Let’s again use integer division (%/%) to define larger sections of text that span multiple lines, and we can use the same pattern with count(), pivot_wider(), and mutate() to find the net sentiment in each of these sections of text.
afinn <- pride_prejudice %>% 
  inner_join(get_sentiments("afinn")) %>% 
  group_by(index = linenumber %/% 80) %>% 
  summarise(sentiment = sum(value)) %>% 
  mutate(method = "AFINN")
## Joining, by = "word"
bing_and_nrc <- bind_rows(
  pride_prejudice %>% 
    inner_join(get_sentiments("bing")) %>%
    mutate(method = "Bing et al."),
  pride_prejudice %>% 
    inner_join(get_sentiments("nrc") %>% 
                 filter(sentiment %in% c("positive", 
                                         "negative"))
    ) %>%
    mutate(method = "NRC")) %>%
  count(method, index = linenumber %/% 80, sentiment) %>%
  pivot_wider(names_from = sentiment,
              values_from = n,
              values_fill = 0) %>% 
  mutate(sentiment = positive - negative)
## Joining, by = "word"
## Joining, by = "word"
  • This is an analysis by combining the three dictionaries, afinn, bing and nrc.

bind_rows(afinn, 
          bing_and_nrc) %>%
  ggplot(aes(index, sentiment, fill = method)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~method, ncol = 1, scales = "free_y")

* #### Visualization using Wordclouds

tidy_books %>%
  anti_join(stop_words) %>%
  count(word) %>%
  with(wordcloud(word, n, max.words = 100))
## Joining, by = "word"
## Warning in wordcloud(word, n, max.words = 100): miss could not be fit on page.
## It will not be plotted.

  • Negative vs positive analysis using bing dictionary

library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
tidy_books %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("blue", "green"),
                   max.words = 100)
## Joining, by = "word"

Second Part:

speech_website<- read_html("https://www.washingtonpost.com/politics/2022/03/16/text-zelensky-address-congress/")
speech <- speech_website %>%
html_nodes("p") %>%
html_text()
  • This is to getsentiment using the syuzhet which I thought was good for a political sentimental analysis.
## install.packages("syuzhet")
library(syuzhet)
## 
## Attaching package: 'syuzhet'
## The following object is masked from 'package:rtweet':
## 
##     get_tokens
get_sentiment(speech[2:50])
##  [1]  0.25  1.55  2.90  0.45  5.15  7.55 -0.15 -1.15  3.75  2.25 -1.70 -1.75
## [13]  0.45 -0.90  3.10  1.45  1.90  0.50  0.00  0.00  0.00  0.00  0.00  0.00
## [25]  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## [37]  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## [49]  0.00
knitr::kable(get_nrc_sentiment(speech[2:50]))
## Warning: `spread_()` was deprecated in tidyr 1.2.0.
## Please use `spread()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
anger anticipation disgust fear joy sadness surprise trust negative positive
0 0 0 0 0 0 0 0 0 1
0 1 0 0 1 0 0 1 0 1
0 2 1 2 3 0 0 3 1 3
3 0 0 2 2 1 0 3 3 4
2 1 1 1 3 0 0 3 2 7
0 1 0 0 2 0 1 5 0 6
3 1 3 3 0 4 1 3 4 3
2 2 1 4 3 3 2 2 5 6
2 2 1 4 3 0 0 3 1 9
4 2 0 4 0 0 0 4 5 9
4 3 1 4 2 2 2 10 9 10
4 2 2 4 1 3 1 2 6 9
3 4 1 4 4 3 2 9 8 11
1 1 0 3 0 0 0 0 1 1
1 5 0 2 2 1 0 5 3 4
1 1 0 0 1 0 0 2 1 3
1 1 0 1 1 1 0 4 1 5
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
s_v <- get_sentences(speech[2:50])
s_v_sentiment <- get_sentiment(s_v)
s_v_sentiment
##   [1]  0.25  1.55  0.50  1.55  0.85  0.45  2.80 -2.00  5.15  6.00  0.00  0.75
##  [13]  0.80 -1.50  0.00 -1.70  1.30 -0.40  1.40 -1.25 -0.50 -0.35 -1.00  1.10
##  [25]  0.75  0.10  0.75  2.75  0.00  0.25  0.25  0.00  0.75  0.25  1.20  2.90
##  [37] -0.75 -0.10  0.40 -0.75 -2.15 -1.05  1.25 -0.50 -1.25  0.60  1.75 -0.25
##  [49] -0.60  0.00 -0.90  0.00  0.00  0.90  5.20 -0.15  0.95 -0.70 -1.25  0.00
##  [61] -0.40 -1.50  1.00 -0.50  1.00  1.35  0.75  2.50 -0.55  2.00  0.00 -0.90
##  [73] -0.40  1.15  0.75  1.05  1.00  0.50  0.00  0.00  0.00  0.00  0.00  0.00
##  [85]  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
##  [97]  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## [109]  0.00  0.00
knitr::kable(get_sentiments("nrc") %>% count(sentiment, sort = TRUE) )
sentiment n
negative 3318
positive 2308
fear 1474
anger 1246
trust 1230
sadness 1187
disgust 1056
anticipation 837
joy 687
surprise 532
tidy_speech <- speech[2:50]

tidy_speech_words <- unlist(as.list(strsplit(tidy_speech, " ")))
rowNumber <- seq(1:length(tidy_speech_words))
words.df <- data.frame(rowNumber, tidy_speech_words)
names(words.df) <- c("rowNumber","word")
speech_sentiment_quanteda<- words.df %>% inner_join(get_sentiments("nrc"))
## Joining, by = "word"
lang_word_counts <- words.df %>%
  inner_join(get_sentiments("nrc")) %>%
  count(word, sentiment, sort = TRUE) %>%
  ungroup()
## Joining, by = "word"
  • This code breaks the words into different categories such as anger, anticipation, fear, and sadness.

lang_word_counts %>%
  group_by(sentiment) %>%
  slice_max(n, n = 5) %>% 
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(n, word, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(x = "Contribution to sentiment",
       y = NULL)

* #### This code give a wordcloud of the major words that the president used in his address.

library(reshape2)

lang_word_counts %>%
  inner_join(get_sentiments("nrc")) %>%
  count(word, sentiment, sort = TRUE) %>%
  
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("blue", "green"),
                   max.words = 100)
## Joining, by = c("word", "sentiment")
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## terrible could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## humanitarian could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## punished could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## attacking could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## death could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## found could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## freedom could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## freely could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## kind could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## save could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## true could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## expected could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## thought could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## depend could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## watch could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## peace could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## proud could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## fell could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## defense could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## die could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## kill could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## conflict could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## resisting could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## terrorize could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## fighting could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## unjust could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## words could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## moral could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## aggression could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## aggressor could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## brutal could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## experienced could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## honor could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## important could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## innocent could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## justice could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## leader could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## provide could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## respects could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## responsible could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## sincere could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## strength could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## battlefield could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## government could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## democracy could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## foundation could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## main could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## offer could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## overwhelming could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## preserve could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## protect could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## sense could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 100):
## sky could not be fit on page. It will not be plotted.

  • This code gives a wordcloud where the words are grouped as negative vs. positive.

library(reshape2)

lang_word_counts %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("blue", "green"),
                   max.words = 500)
## Joining, by = c("word", "sentiment")
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## important could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## peace could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## protect could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## proud could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## offensive could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## terrible could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(., colors = c("blue", "green"), max.words = 500):
## terrorize could not be fit on page. It will not be plotted.

Conlcusion:

The sentimental analysis of Ukranian President Zelenski’s address to congress seemed to be in line with the situation that he is dealing with. He has used words of anger but there are more words that represent anticipation from the US as well as the sadness of the situation in Ukraine. He is surprised by the invasion and the death as shown under the surprise bucket. He is fearful but also positive in the leadership of the US. I think this kind of sentimental analysis would be helpful for the members of congress and the White House in formulating the right policy towards Ukraine.