Emotional Words Analysis

Author

Satya Narayana Panda

Download the following libraries

library(quanteda)
Warning in .recacheSubclasses(def@className, def, env): undefined subclass
"ndiMatrix" of class "replValueSp"; definition not updated
Package version: 4.0.2
Unicode version: 14.0
ICU version: 71.1
Parallel computing: disabled
See https://quanteda.io for tutorials and examples.
library(tidytext)
library(gutenbergr)
library(RColorBrewer)
library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.2     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.0
✔ readr     2.1.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Load the project gutenberg texts and text data for processing

usher <- gutenberg_download(932)
Determining mirror for Project Gutenberg from https://www.gutenberg.org/robot/harvest
Using mirror http://aleph.gutenberg.org
red_circle <- gutenberg_download(2345)

Tokenize Texts

# Tokenize texts
usher_tokens <- usher %>%
  mutate(text = as.character(text)) %>%
  unnest_tokens(word, text)

# Tokenize texts
red_circle_tokens <- red_circle %>%
  mutate(text = as.character(text)) %>%
  unnest_tokens(word, text)

Load NRC Dictionary

nrc <- get_sentiments("nrc")

Join NRC sentiment Scores

# Join NRC sentiment scores
corpus1 <- suppressWarnings({
  usher_tokens %>%
    inner_join(distinct(nrc), by = "word") %>%
    group_by(word) %>%
    summarise(sentiment_score = n_distinct(sentiment))
})

corpus2 <- suppressWarnings({
  red_circle_tokens %>%
    inner_join(distinct(nrc), by = "word") %>%
    group_by(word) %>%
    summarise(sentiment_score = n_distinct(sentiment))
})

Combine corpus 1 and 2

# Combine corpus1 and corpus2
data_pos_neg <- bind_rows(corpus1, corpus2) %>%
  mutate(sentiment = ifelse(sentiment_score %in% c(1, 2, 3), "negative", "positive")) %>%
  filter(sentiment %in% c("positive", "negative"))

# Display the number of rows after filtering
print("Number of rows after filtering:")
[1] "Number of rows after filtering:"
print(nrow(data_pos_neg))
[1] 851
# Display the first few rows of data_pos_neg
print("First few rows of data_pos_neg:")
[1] "First few rows of data_pos_neg:"
print(head(data_pos_neg))
# A tibble: 6 × 3
  word       sentiment_score sentiment
  <chr>                <int> <chr>    
1 abandon                  3 negative 
2 abrupt                   1 negative 
3 absolute                 1 negative 
4 accelerate               1 negative 
5 account                  1 negative 
6 accredited               2 negative 

Emotional Words Without Negative and Positive Sentiments

# Assuming sentiment_score is a column in corpus1 and corpus2
# Filter emotional words without positive and negative sentiment
usher_emotional <- corpus1 %>%
  filter(sentiment_score != "positive" & sentiment_score != "negative")

red_circle_emotional <- corpus2 %>%
  filter(sentiment_score != "positive" & sentiment_score != "negative")

# Create plots for emotional words distribution
usher_emotional_plot <- ggplot(usher_emotional, aes(x = word)) +
  geom_bar() +
  labs(title = "Emotional Words Distribution in 'The Fall of the House of Usher'",
       x = "Words", y = "Count") +
  theme_minimal()

red_circle_emotional_plot <- ggplot(red_circle_emotional, aes(x = word)) +
  geom_bar() +
  labs(title = "Emotional Words Distribution in 'The Adventure of the Red Circle'",
       x = "Words", y = "Count") +
  theme_minimal()

Create bar plots for negative and positive sentiments

# Convert sentiment_score to factor
data_pos_neg$sentiment_score <- factor(data_pos_neg$sentiment_score, levels = c("positive", "negative"))

# Create bar plots for positive and negative sentiment words distribution
positive_words_plot <- ggplot(data_pos_neg, aes(x = word, y = sentiment_score, fill = sentiment)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  labs(title = "Positive Sentiment Words Distribution",
       x = "Words", y = "Sentiment") +  # Change "Sentiment Score" to "Sentiment"
  scale_fill_manual(values = c("positive" = "#31A354", "negative" = "#E4BFBF"))  # Use scale_fill_manual for discrete values

negative_words_plot <- ggplot(data_pos_neg, aes(x = word, y = sentiment_score, fill = sentiment)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  labs(title = "Negative Sentiment Words Distribution",
       x = "Words", y = "Sentiment") +  # Change "Sentiment Score" to "Sentiment"
  scale_fill_manual(values = c("positive" = "#31A354", "negative" = "#E4BFBF"))  # Use scale_fill_manual for discrete values
ggsave("usher_emotional_plot.png", usher_emotional_plot)
Saving 7 x 5 in image
ggsave("red_circle_emotional_plot.png", red_circle_emotional_plot)
Saving 7 x 5 in image
ggsave("positive_words_plot.png", positive_words_plot)
Saving 7 x 5 in image
ggsave("negative_words_plot.png", negative_words_plot)
Saving 7 x 5 in image

Loading saved plots

knitr::include_graphics("usher_emotional_plot.png")

knitr::include_graphics("red_circle_emotional_plot.png")

knitr::include_graphics("positive_words_plot.png")

knitr::include_graphics("negative_words_plot.png")