Assignment was to copy the code from tidytextmining.com then to choose a different corpus and perform the same sentiment analysis as the example provided. Then one additional sentiment lexicon. In this case the book used sentiment lexicons: nrc, bing, and afinn. One additional analysis using sentiment lexion: loughran was done. Ultimately The scripts from The Lord of The Rings movies were used as the additional corpus.
Code was orignially from https://www.tidytextmining.com/sentiment.html
Silge , J., & Robinson, D. (n.d.). 2/Sentiment analysis with tidy data. In tidytextmining. essay. Retrieved April 3, 2024, from https://www.tidytextmining.com/sentiment.html. ISBN-10 1491981652 ISBN-13 978-1491981658
r 1-13 are copied from the chapter. only certain libraries were added.
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.3.3
library(textdata)
## Warning: package 'textdata' was built under R version 4.3.3
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.3.3
## Loading required package: RColorBrewer
library(RCurl)
##
## Attaching package: 'RCurl'
##
## The following object is masked from 'package:tidyr':
##
## complete
library(quanteda)
## Warning: package 'quanteda' was built under R version 4.3.3
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "ndiMatrix" of class "replValueSp"; definition not updated
## Package version: 3.3.1
## Unicode version: 15.1
## ICU version: 74.1
## Parallel computing: 20 of 20 threads used.
## See https://quanteda.io for tutorials and examples.
library(readtext)
## Warning: package 'readtext' was built under R version 4.3.3
##
## Attaching package: 'readtext'
##
## The following object is masked from 'package:quanteda':
##
## texts
get_sentiments("afinn")
## # A tibble: 2,477 × 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandoned -2
## 3 abandons -2
## 4 abducted -2
## 5 abduction -2
## 6 abductions -2
## 7 abhor -3
## 8 abhorred -3
## 9 abhorrent -3
## 10 abhors -3
## # ℹ 2,467 more rows
get_sentiments("bing")
## # A tibble: 6,786 × 2
## word sentiment
## <chr> <chr>
## 1 2-faces negative
## 2 abnormal negative
## 3 abolish negative
## 4 abominable negative
## 5 abominably negative
## 6 abominate negative
## 7 abomination negative
## 8 abort negative
## 9 aborted negative
## 10 aborts negative
## # ℹ 6,776 more rows
library(janeaustenr)
## Warning: package 'janeaustenr' was built under R version 4.3.3
library(dplyr)
library(stringr)
tidy_books <- austen_books() %>%
group_by(book) %>%
mutate(
linenumber = row_number(),
chapter = cumsum(str_detect(text,
regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup() %>%
unnest_tokens(word, text)
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
tidy_books %>%
filter(book == "Emma") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining with `by = join_by(word)`
## # A tibble: 301 × 2
## word n
## <chr> <int>
## 1 good 359
## 2 friend 166
## 3 hope 143
## 4 happy 125
## 5 love 117
## 6 deal 92
## 7 found 92
## 8 present 89
## 9 kind 82
## 10 happiness 76
## # ℹ 291 more rows
library(tidyr)
jane_austen_sentiment <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(book, index = linenumber %/% 80, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("bing")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 435434 of `x` matches multiple rows in `y`.
## ℹ Row 5051 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
library(ggplot2)
ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, ncol = 2, scales = "free_x")
pride_prejudice <- tidy_books %>%
filter(book == "Pride & Prejudice")
pride_prejudice
## # A tibble: 122,204 × 4
## book linenumber chapter word
## <fct> <int> <int> <chr>
## 1 Pride & Prejudice 1 0 pride
## 2 Pride & Prejudice 1 0 and
## 3 Pride & Prejudice 1 0 prejudice
## 4 Pride & Prejudice 3 0 by
## 5 Pride & Prejudice 3 0 jane
## 6 Pride & Prejudice 3 0 austen
## 7 Pride & Prejudice 7 1 chapter
## 8 Pride & Prejudice 7 1 1
## 9 Pride & Prejudice 10 1 it
## 10 Pride & Prejudice 10 1 is
## # ℹ 122,194 more rows
afinn <- pride_prejudice %>%
inner_join(get_sentiments("afinn")) %>%
group_by(index = linenumber %/% 80) %>%
summarise(sentiment = sum(value)) %>%
mutate(method = "AFINN")
## Joining with `by = join_by(word)`
bing_and_nrc <- bind_rows(
pride_prejudice %>%
inner_join(get_sentiments("bing")) %>%
mutate(method = "Bing et al."),
pride_prejudice %>%
inner_join(get_sentiments("nrc") %>%
filter(sentiment %in% c("positive",
"negative"))
) %>%
mutate(method = "NRC")) %>%
count(method, index = linenumber %/% 80, sentiment) %>%
pivot_wider(names_from = sentiment,
values_from = n,
values_fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining with `by = join_by(word)`
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("nrc") %>% filter(sentiment %in% : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 215 of `x` matches multiple rows in `y`.
## ℹ Row 5178 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
bind_rows(afinn,
bing_and_nrc) %>%
ggplot(aes(index, sentiment, fill = method)) +
geom_col(show.legend = FALSE) +
facet_wrap(~method, ncol = 1, scales = "free_y")
get_sentiments("nrc") %>%
filter(sentiment %in% c("positive", "negative")) %>%
count(sentiment)
## # A tibble: 2 × 2
## sentiment n
## <chr> <int>
## 1 negative 3316
## 2 positive 2308
get_sentiments("bing") %>%
count(sentiment)
## # A tibble: 2 × 2
## sentiment n
## <chr> <int>
## 1 negative 4781
## 2 positive 2005
bing_word_counts <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("bing")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 435434 of `x` matches multiple rows in `y`.
## ℹ Row 5051 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
bing_word_counts
## # A tibble: 2,585 × 3
## word sentiment n
## <chr> <chr> <int>
## 1 miss negative 1855
## 2 well positive 1523
## 3 good positive 1380
## 4 great positive 981
## 5 like positive 725
## 6 better positive 639
## 7 enough positive 613
## 8 happy positive 534
## 9 love positive 495
## 10 pleasure positive 462
## # ℹ 2,575 more rows
bing_word_counts %>%
group_by(sentiment) %>%
slice_max(n, n = 10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(n, word, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(x = "Contribution to sentiment",
y = NULL)
custom_stop_words <- bind_rows(tibble(word = c("miss"),
lexicon = c("custom")),
stop_words)
custom_stop_words
## # A tibble: 1,150 × 2
## word lexicon
## <chr> <chr>
## 1 miss custom
## 2 a SMART
## 3 a's SMART
## 4 able SMART
## 5 about SMART
## 6 above SMART
## 7 according SMART
## 8 accordingly SMART
## 9 across SMART
## 10 actually SMART
## # ℹ 1,140 more rows
library(wordcloud)
tidy_books %>%
anti_join(stop_words) %>%
count(word) %>%
with(wordcloud(word, n, max.words = 100))
## Joining with `by = join_by(word)`
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("gray20", "gray80"),
max.words = 100)
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("bing")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 435434 of `x` matches multiple rows in `y`.
## ℹ Row 5051 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
p_and_p_sentences <- tibble(text = prideprejudice) %>%
unnest_tokens(sentence, text, token = "sentences")
p_and_p_sentences$sentence[2]
## [1] "by jane austen"
austen_chapters <- austen_books() %>%
group_by(book) %>%
unnest_tokens(chapter, text, token = "regex",
pattern = "Chapter|CHAPTER [\\dIVXLC]") %>%
ungroup()
austen_chapters %>%
group_by(book) %>%
summarise(chapters = n())
## # A tibble: 6 × 2
## book chapters
## <fct> <int>
## 1 Sense & Sensibility 51
## 2 Pride & Prejudice 62
## 3 Mansfield Park 49
## 4 Emma 56
## 5 Northanger Abbey 32
## 6 Persuasion 25
bingnegative <- get_sentiments("bing") %>%
filter(sentiment == "negative")
wordcounts <- tidy_books %>%
group_by(book, chapter) %>%
summarize(words = n())
## `summarise()` has grouped output by 'book'. You can override using the
## `.groups` argument.
tidy_books %>%
semi_join(bingnegative) %>%
group_by(book, chapter) %>%
summarize(negativewords = n()) %>%
left_join(wordcounts, by = c("book", "chapter")) %>%
mutate(ratio = negativewords/words) %>%
filter(chapter != 0) %>%
slice_max(ratio, n = 1) %>%
ungroup()
## Joining with `by = join_by(word)`
## `summarise()` has grouped output by 'book'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 5
## book chapter negativewords words ratio
## <fct> <int> <int> <int> <dbl>
## 1 Sense & Sensibility 43 161 3405 0.0473
## 2 Pride & Prejudice 34 111 2104 0.0528
## 3 Mansfield Park 46 173 3685 0.0469
## 4 Emma 15 151 3340 0.0452
## 5 Northanger Abbey 21 149 2982 0.0500
## 6 Persuasion 4 62 1807 0.0343
library(corpustools)
## Warning: package 'corpustools' was built under R version 4.3.3
##
## Attaching package: 'corpustools'
## The following object is masked from 'package:tidytext':
##
## get_stopwords
library(tm)
## Warning: package 'tm' was built under R version 4.3.3
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following objects are masked from 'package:quanteda':
##
## meta, meta<-
## The following object is masked from 'package:ggplot2':
##
## annotate
##
## Attaching package: 'tm'
## The following object is masked from 'package:quanteda':
##
## stopwords
episode4 <- getURL("https://storage.googleapis.com/kagglesdsdata/datasets/25491/32521/SW_EpisodeIV.txt?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240403%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240403T003858Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=5ad0b59ab9ef17cb90a03d81bcdf43bfd5f42c372b6171849f317925bb374f0df4a273806d01df0b63cfd3de634fd2a6fa31f43d29b788cf468501ba8213713bc74d91f3e5ca3193debcdfd78ca7069244973a903907c28c46f8456939b1fb015a8e069f88ea93d9b1db45e40d9f65466c1688e31e4fd4ae5016b42892ef86d9944c93113640740106f1f50d1433979b7042fcb9c31be80a3aa0cec3341d54f3d922041a381e77361c6f58f167a11baf03e14eee3e8be6d81ae4fe22c5ca618bcb6b89b402de63d8e4cc5cc6d722b52b8f7f58ce60d05102301d86bef3e2634dcd25cf5335953fcdd74d1f5b22035b6804b787cac2b8579f2f256003fb47bd72")
episode4df <- data.frame("line", "character", "dialogue")
# The script for Star wars episode 4 a new hope was also considered.
LOTR <- getURL("https://raw.githubusercontent.com/division-zero/Data607/main/Week%2010%20assignment/lotr_scripts.csv")
LOTRdf <- data.frame(read.csv(text = LOTR))
corp_LOTR <- corpus(LOTRdf, text_field = "dialog")
print(corp_LOTR)
## Corpus consisting of 2,390 documents and 3 docvars.
## text1 :
## "Oh Smeagol Ive got one! , Ive got a fish Smeagol, Smeagol! ..."
##
## text2 :
## "Pull it in! Go on, go on, go on, pull it in! "
##
## text3 :
## "Arrghh! "
##
## text4 :
## "Deagol! "
##
## text5 :
## "Deagol! "
##
## text6 :
## "Deagol! "
##
## [ reached max_ndoc ... 2,384 more documents ]
#view(corp_LOTR)
unique(LOTRdf$movie)
## [1] "The Return of the King " "The Two Towers "
## [3] "The Fellowship of the Ring "
head(LOTRdf)
## X char dialog
## 1 0 DEAGOL Oh Smeagol Ive got one! , Ive got a fish Smeagol, Smeagol!
## 2 1 SMEAGOL Pull it in! Go on, go on, go on, pull it in!
## 3 2 DEAGOL Arrghh!
## 4 3 SMEAGOL Deagol!
## 5 4 SMEAGOL Deagol!
## 6 5 SMEAGOL Deagol!
## movie
## 1 The Return of the King
## 2 The Return of the King
## 3 The Return of the King
## 4 The Return of the King
## 5 The Return of the King
## 6 The Return of the King
tidy_LOTR <- LOTRdf %>%
group_by(movie) %>%
mutate(
linenumber = row_number()) %>%
ungroup() %>%
unnest_tokens(word, dialog)
head(tidy_LOTR)
## # A tibble: 6 × 5
## X char movie linenumber word
## <int> <chr> <chr> <int> <chr>
## 1 0 DEAGOL "The Return of the King " 1 oh
## 2 0 DEAGOL "The Return of the King " 1 smeagol
## 3 0 DEAGOL "The Return of the King " 1 ive
## 4 0 DEAGOL "The Return of the King " 1 got
## 5 0 DEAGOL "The Return of the King " 1 one
## 6 0 DEAGOL "The Return of the King " 1 ive
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
tidy_LOTR %>%
filter(movie == "The Fellowship of the Ring ") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining with `by = join_by(word)`
## # A tibble: 46 × 2
## word n
## <chr> <int>
## 1 good 16
## 2 friend 10
## 3 gift 4
## 4 love 4
## 5 peace 4
## 6 birthday 3
## 7 found 3
## 8 merry 3
## 9 precious 3
## 10 grow 2
## # ℹ 36 more rows
tidy_LOTR %>%
filter(movie == "The Fellowship of the Ring ") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining with `by = join_by(word)`
## # A tibble: 46 × 2
## word n
## <chr> <int>
## 1 good 16
## 2 friend 10
## 3 gift 4
## 4 love 4
## 5 peace 4
## 6 birthday 3
## 7 found 3
## 8 merry 3
## 9 precious 3
## 10 grow 2
## # ℹ 36 more rows
tidy_LOTR %>%
filter(movie == "The Two Towers ") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining with `by = join_by(word)`
## # A tibble: 55 × 2
## word n
## <chr> <int>
## 1 good 23
## 2 merry 19
## 3 precious 15
## 4 friend 9
## 5 alive 8
## 6 tree 8
## 7 save 7
## 8 hope 6
## 9 found 5
## 10 child 4
## # ℹ 45 more rows
tidy_LOTR %>%
filter(movie == "The Return of the King ") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining with `by = join_by(word)`
## # A tibble: 70 × 2
## word n
## <chr> <int>
## 1 merry 15
## 2 precious 15
## 3 hope 11
## 4 love 11
## 5 peace 9
## 6 tree 9
## 7 save 8
## 8 victory 8
## 9 good 7
## 10 journey 6
## # ℹ 60 more rows
The LOTR csv file containing the line from the three movies was put into a dataframe. Each word from the dialog from the scripts were extracted and listed. The joyful words from each movie and their number are listed.
LOTR_sentiment <- tidy_LOTR %>%
inner_join(get_sentiments("bing")) %>%
count(movie, index = linenumber %/% 20, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining with `by = join_by(word)`
library(ggplot2)
ggplot(LOTR_sentiment, aes(index, sentiment, fill = movie)) +
geom_col(show.legend = FALSE) +
facet_wrap(~movie, ncol = 2, scales = "free_x")
the_fellowship <- tidy_LOTR %>%
filter(movie == "The Fellowship of the Ring ")
the_fellowship
## # A tibble: 5,465 × 5
## X char movie linenumber word
## <int> <chr> <chr> <int> <chr>
## 1 1400 MERRY "The Fellowship of the Ring " 1 what
## 2 1400 MERRY "The Fellowship of the Ring " 1 are
## 3 1400 MERRY "The Fellowship of the Ring " 1 they
## 4 1400 MERRY "The Fellowship of the Ring " 1 eating
## 5 1400 MERRY "The Fellowship of the Ring " 1 when
## 6 1400 MERRY "The Fellowship of the Ring " 1 they
## 7 1400 MERRY "The Fellowship of the Ring " 1 can't
## 8 1400 MERRY "The Fellowship of the Ring " 1 get
## 9 1400 MERRY "The Fellowship of the Ring " 1 hobbit
## 10 1402 FRODO "The Fellowship of the Ring " 3 who
## # ℹ 5,455 more rows
The sentiment scores were plotted using the bing sentiment lexicon across the movies’ plot, using the scores from 20 lines. The majority seems to score negatively across the movies.
afinn <- the_fellowship %>%
inner_join(get_sentiments("afinn")) %>%
group_by(index = linenumber %/% 20) %>%
summarise(sentiment = sum(value)) %>%
mutate(method = "AFINN")
## Joining with `by = join_by(word)`
bing_and_nrc <- bind_rows(
the_fellowship %>%
inner_join(get_sentiments("bing")) %>%
mutate(method = "Bing et al."),
the_fellowship %>%
inner_join(get_sentiments("nrc") %>%
filter(sentiment %in% c("positive",
"negative"))
) %>%
mutate(method = "NRC")) %>%
count(method, index = linenumber %/% 20, sentiment) %>%
pivot_wider(names_from = sentiment,
values_from = n,
values_fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining with `by = join_by(word)`
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("nrc") %>% filter(sentiment %in% : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 49 of `x` matches multiple rows in `y`.
## ℹ Row 1627 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
bind_rows(afinn,
bing_and_nrc) %>%
ggplot(aes(index, sentiment, fill = method)) +
geom_col(show.legend = FALSE) +
facet_wrap(~method, ncol = 1, scales = "free_y")
get_sentiments("nrc") %>%
filter(sentiment %in% c("positive", "negative")) %>%
count(sentiment)
## # A tibble: 2 × 2
## sentiment n
## <chr> <int>
## 1 negative 3316
## 2 positive 2308
get_sentiments("bing") %>%
count(sentiment)
## # A tibble: 2 × 2
## sentiment n
## <chr> <int>
## 1 negative 4781
## 2 positive 2005
We can compare the sentiment analysis from the different sentiment lexicons for the movie:“The Fellowship of the Ring”
bing_word_counts_LOTR <- tidy_LOTR %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
## Joining with `by = join_by(word)`
bing_word_counts_LOTR
## # A tibble: 477 × 3
## word sentiment n
## <chr> <chr> <int>
## 1 master positive 51
## 2 good positive 46
## 3 dead negative 45
## 4 death negative 37
## 5 merry positive 37
## 6 precious positive 33
## 7 well positive 31
## 8 like positive 30
## 9 great positive 27
## 10 right positive 25
## # ℹ 467 more rows
List of the words and their sentiments from bing for the whole trilogy of movies.
bing_word_counts_LOTR %>%
group_by(sentiment) %>%
slice_max(n, n = 10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(n, word, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(x = "Contribution to sentiment",
y = NULL)
## Bing word counts
How each word contributes to each sentiment score using bing. It lists the most common negative and positive words.
tidy_LOTR %>%
anti_join(stop_words) %>%
count(word) %>%
with(wordcloud(word, n, max.words = 100))
## Joining with `by = join_by(word)`
## Word Cloud
A word cloud can show the frequency of words in a visual way.
library(reshape2)
tidy_LOTR %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("red", "gray20"),
max.words = 100)
## Joining with `by = join_by(word)`
## Word Cloud continued
A word Cloud can be manipulated to show the sentiment of the words.
bingnegative <- get_sentiments("bing") %>%
filter(sentiment == "negative")
wordcountsLOTR <- tidy_LOTR %>%
group_by(movie) %>%
summarize(words = n())
tidy_LOTR %>%
semi_join(bingnegative) %>%
group_by(movie) %>%
summarize(negativewords = n()) %>%
left_join(wordcountsLOTR, by = c("movie")) %>%
mutate(ratio = negativewords/words) %>%
filter() %>%
slice_max(ratio, n = 3) %>%
ungroup()
## Joining with `by = join_by(word)`
## # A tibble: 3 × 4
## movie negativewords words ratio
## <chr> <int> <int> <dbl>
## 1 "The Return of the King " 398 9536 0.0417
## 2 "The Two Towers " 374 9893 0.0378
## 3 "The Fellowship of the Ring " 172 5465 0.0315
The Return of the King had the most negative words for the three movies according to bing.
##One additional Sentiment Lexicon “loughran”
library(syuzhet)
## Warning: package 'syuzhet' was built under R version 4.3.3
unique(get_sentiments("loughran")[,2])
## # A tibble: 6 × 1
## sentiment
## <chr>
## 1 negative
## 2 positive
## 3 uncertainty
## 4 litigious
## 5 constraining
## 6 superfluous
loughran_word_counts_LOTR <- tidy_LOTR %>%
inner_join(get_sentiments("loughran")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("loughran")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1741 of `x` matches multiple rows in `y`.
## ℹ Row 2826 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
loughran_word_counts_LOTR
## # A tibble: 202 × 3
## word sentiment n
## <chr> <chr> <int>
## 1 good positive 46
## 2 shall litigious 32
## 3 could uncertainty 30
## 4 great positive 27
## 5 may uncertainty 21
## 6 lost negative 19
## 7 fear negative 17
## 8 against negative 15
## 9 destroy negative 12
## 10 strength positive 12
## # ℹ 192 more rows
loughran_word_counts_LOTR %>%
group_by(sentiment) %>%
slice_max(n, n = 10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(n, word, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(x = "Contribution to sentiment",
y = NULL)
## Conclusion:
Loughran has additional sentiments and their frequency of the words within the three movies can be plotted. In this assignment we have shown how sentiment analysis used in tidytextmining.com book can be used on a different corpus namely the Lord of the Rings movie scripts.