Starting with the base code in Chapter 2 of the book “Text Mining
with R” by Silge and Robinson in their analysis on sentiment analysis
(Silge & Robinson, n.d.):
Silge, J., & Robinson, D. (n.d.). Sentiment analysis. In Text
Mining with R: A Tidy Approach. Retrieved March 31, 2024, from https://www.tidytextmining.com/sentiment.html
library(tidytext)
# The sentiments datasets
get_sentiments("afinn")
## # A tibble: 2,477 × 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandoned -2
## 3 abandons -2
## 4 abducted -2
## 5 abduction -2
## 6 abductions -2
## 7 abhor -3
## 8 abhorred -3
## 9 abhorrent -3
## 10 abhors -3
## # ℹ 2,467 more rows
get_sentiments("bing")
## # A tibble: 6,786 × 2
## word sentiment
## <chr> <chr>
## 1 2-faces negative
## 2 abnormal negative
## 3 abolish negative
## 4 abominable negative
## 5 abominably negative
## 6 abominate negative
## 7 abomination negative
## 8 abort negative
## 9 aborted negative
## 10 aborts negative
## # ℹ 6,776 more rows
get_sentiments("nrc")
## # A tibble: 13,872 × 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
## 7 abandoned negative
## 8 abandoned sadness
## 9 abandonment anger
## 10 abandonment fear
## # ℹ 13,862 more rows
# Sentiment analysis with inner join
library(janeaustenr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
tidy_books <- austen_books() %>%
group_by(book) %>%
mutate(
linenumber = row_number(),
chapter = cumsum(str_detect(text,
regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup() %>%
unnest_tokens(word, text)
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
tidy_books %>%
filter(book == "Emma") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining with `by = join_by(word)`
## # A tibble: 301 × 2
## word n
## <chr> <int>
## 1 good 359
## 2 friend 166
## 3 hope 143
## 4 happy 125
## 5 love 117
## 6 deal 92
## 7 found 92
## 8 present 89
## 9 kind 82
## 10 happiness 76
## # ℹ 291 more rows
library(tidyr)
jane_austen_sentiment <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(book, index = linenumber %/% 80, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("bing")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 435434 of `x` matches multiple rows in `y`.
## ℹ Row 5051 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
library(ggplot2)
ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, ncol = 2, scales = "free_x")

# Comparing the three sentiment dictionaries
pride_prejudice <- tidy_books %>%
filter(book == "Pride & Prejudice")
pride_prejudice
## # A tibble: 122,204 × 4
## book linenumber chapter word
## <fct> <int> <int> <chr>
## 1 Pride & Prejudice 1 0 pride
## 2 Pride & Prejudice 1 0 and
## 3 Pride & Prejudice 1 0 prejudice
## 4 Pride & Prejudice 3 0 by
## 5 Pride & Prejudice 3 0 jane
## 6 Pride & Prejudice 3 0 austen
## 7 Pride & Prejudice 7 1 chapter
## 8 Pride & Prejudice 7 1 1
## 9 Pride & Prejudice 10 1 it
## 10 Pride & Prejudice 10 1 is
## # ℹ 122,194 more rows
afinn <- pride_prejudice %>%
inner_join(get_sentiments("afinn")) %>%
group_by(index = linenumber %/% 80) %>%
summarise(sentiment = sum(value)) %>%
mutate(method = "AFINN")
## Joining with `by = join_by(word)`
bing_and_nrc <- bind_rows(
pride_prejudice %>%
inner_join(get_sentiments("bing")) %>%
mutate(method = "Bing et al."),
pride_prejudice %>%
inner_join(get_sentiments("nrc") %>%
filter(sentiment %in% c("positive",
"negative"))
) %>%
mutate(method = "NRC")) %>%
count(method, index = linenumber %/% 80, sentiment) %>%
pivot_wider(names_from = sentiment,
values_from = n,
values_fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining with `by = join_by(word)`
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("nrc") %>% filter(sentiment %in% : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 215 of `x` matches multiple rows in `y`.
## ℹ Row 5178 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
bind_rows(afinn,
bing_and_nrc) %>%
ggplot(aes(index, sentiment, fill = method)) +
geom_col(show.legend = FALSE) +
facet_wrap(~method, ncol = 1, scales = "free_y")

get_sentiments("nrc") %>%
filter(sentiment %in% c("positive", "negative")) %>%
count(sentiment)
## # A tibble: 2 × 2
## sentiment n
## <chr> <int>
## 1 negative 3316
## 2 positive 2308
get_sentiments("bing") %>%
count(sentiment)
## # A tibble: 2 × 2
## sentiment n
## <chr> <int>
## 1 negative 4781
## 2 positive 2005
# Most common positive and negative words
bing_word_counts <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("bing")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 435434 of `x` matches multiple rows in `y`.
## ℹ Row 5051 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
bing_word_counts
## # A tibble: 2,585 × 3
## word sentiment n
## <chr> <chr> <int>
## 1 miss negative 1855
## 2 well positive 1523
## 3 good positive 1380
## 4 great positive 981
## 5 like positive 725
## 6 better positive 639
## 7 enough positive 613
## 8 happy positive 534
## 9 love positive 495
## 10 pleasure positive 462
## # ℹ 2,575 more rows
bing_word_counts %>%
group_by(sentiment) %>%
slice_max(n, n = 10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(n, word, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(x = "Contribution to sentiment",
y = NULL)

custom_stop_words <- bind_rows(tibble(word = c("miss"),
lexicon = c("custom")),
stop_words)
custom_stop_words
## # A tibble: 1,150 × 2
## word lexicon
## <chr> <chr>
## 1 miss custom
## 2 a SMART
## 3 a's SMART
## 4 able SMART
## 5 about SMART
## 6 above SMART
## 7 according SMART
## 8 accordingly SMART
## 9 across SMART
## 10 actually SMART
## # ℹ 1,140 more rows
# Wordclouds
library(wordcloud)
## Loading required package: RColorBrewer
tidy_books %>%
anti_join(stop_words) %>%
count(word) %>%
with(wordcloud(word, n, max.words = 100))
## Joining with `by = join_by(word)`

library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("gray20", "gray80"),
max.words = 100)
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("bing")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 435434 of `x` matches multiple rows in `y`.
## ℹ Row 5051 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.

# Looking at units beyond just words
p_and_p_sentences <- tibble(text = prideprejudice) %>%
unnest_tokens(sentence, text, token = "sentences")
p_and_p_sentences$sentence[2]
## [1] "by jane austen"
austen_chapters <- austen_books() %>%
group_by(book) %>%
unnest_tokens(chapter, text, token = "regex",
pattern = "Chapter|CHAPTER [\\dIVXLC]") %>%
ungroup()
austen_chapters %>%
group_by(book) %>%
summarise(chapters = n())
## # A tibble: 6 × 2
## book chapters
## <fct> <int>
## 1 Sense & Sensibility 51
## 2 Pride & Prejudice 62
## 3 Mansfield Park 49
## 4 Emma 56
## 5 Northanger Abbey 32
## 6 Persuasion 25
bingnegative <- get_sentiments("bing") %>%
filter(sentiment == "negative")
wordcounts <- tidy_books %>%
group_by(book, chapter) %>%
summarize(words = n())
## `summarise()` has grouped output by 'book'. You can override using the
## `.groups` argument.
tidy_books %>%
semi_join(bingnegative) %>%
group_by(book, chapter) %>%
summarize(negativewords = n()) %>%
left_join(wordcounts, by = c("book", "chapter")) %>%
mutate(ratio = negativewords/words) %>%
filter(chapter != 0) %>%
slice_max(ratio, n = 1) %>%
ungroup()
## Joining with `by = join_by(word)`
## `summarise()` has grouped output by 'book'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 5
## book chapter negativewords words ratio
## <fct> <int> <int> <int> <dbl>
## 1 Sense & Sensibility 43 161 3405 0.0473
## 2 Pride & Prejudice 34 111 2104 0.0528
## 3 Mansfield Park 46 173 3685 0.0469
## 4 Emma 15 151 3340 0.0452
## 5 Northanger Abbey 21 149 2982 0.0500
## 6 Persuasion 4 62 1807 0.0343
Now exploring the works of a few famous author (with and without
depression) using the Gutenberg Project:
I want to explore the works of famous authos that suffered from
depression and check if the sentiment analysis of their works differed
from those authors known to not have such illness. I will rely on the
Gutenberg project too. One note here is that listing certain famous
authors as a comparison group for “not having depression” is inherently
more challenging because of the inability to accurately confirm (since
they may have had it, but never been diagnosed with it, or kept it
private due to the societal stigma around mental illness in previous
historical periods). So this is for illustrative purposes for the most
part.
The authors I want to include in this analysis are: - Those with
Depression: Friedrich Nietzsche, Virginia Woolf, Edgar Allan Poe, Ernest
Hemingway, Fyodor Dostoevsky, Leo Tolstoy. - Those known to not have had
depression (comparison): Jane Austen, Mark Twain, Agatha Christie,
George Bernard Shaw, Louisa May Alcott.
# install.packages("gutenbergr")
library(gutenbergr)
library(dplyr)
## function to download works and retain necessary metadata
download_works_with_metadata <- function(author_name) {
works <- gutenberg_works(author == author_name) %>%
gutenberg_download(meta_fields = c("author", "title", "gutenberg_id", "language", "gutenberg_bookshelf", "rights")) %>%
mutate(author = author_name) # Ensure consistent author naming
return(works)
}
## Authors known to have had Depression
nietzsche_works <- download_works_with_metadata("Nietzsche, Friedrich Wilhelm")
woolf_works <- download_works_with_metadata("Woolf, Virginia")
poe_works <- download_works_with_metadata("Poe, Edgar Allan")
hemingway_works <- download_works_with_metadata("Hemingway, Ernest")
dostoevsky_works <- download_works_with_metadata("Dostoyevsky, Fyodor")
tolstoy_works <- download_works_with_metadata("Tolstoy, Leo, graf")
## Authors known to not have had Depression
austen_works <- download_works_with_metadata("Austen, Jane")
twain_works <- download_works_with_metadata("Twain, Mark")
christtie_works <- download_works_with_metadata("Christie, Agatha")
shaw_works <- download_works_with_metadata("Shaw, Bernard")
alcott_works <- download_works_with_metadata("Alcott, Louisa May")
Combining the Data Frames:
## Combine authors known to have had depression
depression_authors_works <- rbind(nietzsche_works, woolf_works, poe_works,
hemingway_works, dostoevsky_works, tolstoy_works)
## Combine authors known to not have had depression
non_depression_authors_works <- rbind(austen_works, twain_works,
christtie_works, shaw_works, alcott_works)
# Saving the work so far in CSV files (to reduce having to redo all the steps if need be later):
write.csv(depression_authors_works, "C:/Users/teraw/Dropbox/_CUNY SPS MSDS/-- DATA 607/5- Modules/Week 10/depression_authors_works.csv", row.names = TRUE)
write.csv(non_depression_authors_works, "C:/Users/teraw/Dropbox/_CUNY SPS MSDS/-- DATA 607/5- Modules/Week 10/non_depression_authors_works.csv", row.names = TRUE)
Now I am going to clean and tidy the data: I will convert the texts
into a tidy format where each row contains a single word. In other
words, what I am going to do next is “tokenization”. Then, I’ll remove
common stopwords (words like “the”, “and”, etc.), that don’t contribute
much meaning to the sentiment analysis). I will also add steps for:
- Lowercasing all words so that the same words are recognized as the
same token, regardless of their case.
- Removing punctuation and numbers to focus only on words
library(tidytext)
library(dplyr)
library(stringr)
# Tokenization
depression_authors_works_tidy <- depression_authors_works %>%
unnest_tokens(word, text)
non_depression_authors_works_tidy <- non_depression_authors_works %>%
unnest_tokens(word, text)
# Remove stopwords
data("stop_words")
depression_authors_works_tidy <- depression_authors_works_tidy %>%
anti_join(stop_words, by = "word")
non_depression_authors_works_tidy <- non_depression_authors_works_tidy %>%
anti_join(stop_words, by = "word")
# lowercasing and removing punctuation/numbers
depression_authors_works_tidy <- depression_authors_works_tidy %>%
mutate(word = tolower(word)) %>%
filter(!str_detect(word, "^[0-9]+$"))
non_depression_authors_works_tidy <- non_depression_authors_works_tidy %>%
mutate(word = tolower(word)) %>%
filter(!str_detect(word, "^[0-9]+$"))
Performing the sentiment analysis.
Using the bing sentiment lexicon:
library(tidytext)
library(ggplot2)
library(tidyr)
library(dplyr)
# Sentiment analysis for depression group
depression_sentiment <- depression_authors_works_tidy %>%
inner_join(get_sentiments("bing"), by = "word") %>%
group_by(gutenberg_id) %>%
count(sentiment) %>%
spread(key = sentiment, value = n, fill = 0) %>%
mutate(sentiment_score = positive - negative) %>%
ungroup()
## Warning in inner_join(., get_sentiments("bing"), by = "word"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 28665 of `x` matches multiple rows in `y`.
## ℹ Row 2102 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
# Sentiment analysis for non-depression group
non_depression_sentiment <- non_depression_authors_works_tidy %>%
inner_join(get_sentiments("bing"), by = "word") %>%
group_by(gutenberg_id) %>%
count(sentiment) %>%
spread(key = sentiment, value = n, fill = 0) %>%
mutate(sentiment_score = positive - negative) %>%
ungroup()
## Warning in inner_join(., get_sentiments("bing"), by = "word"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 107165 of `x` matches multiple rows in `y`.
## ℹ Row 6514 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
# Merge sentiment scores with the original works data to include author and title
depression_sentiment_enriched <- merge(depression_sentiment, depression_authors_works_tidy[, c("gutenberg_id", "author", "title")], by = "gutenberg_id", all.x = TRUE)
non_depression_sentiment_enriched <- merge(non_depression_sentiment, non_depression_authors_works_tidy[, c("gutenberg_id", "author", "title")], by = "gutenberg_id", all.x = TRUE)
# Plot for depression group
ggplot(depression_sentiment_enriched, aes(x = reorder(author, sentiment_score), y = sentiment_score, fill = author)) +
geom_col() +
coord_flip() +
labs(title = "Sentiment Score by Author for Depression Group",
x = "Author",
y = "Sentiment Score") +
theme(legend.position = "none")

# Plot for non-depression group
ggplot(non_depression_sentiment_enriched, aes(x = reorder(author, sentiment_score), y = sentiment_score, fill = author)) +
geom_col() +
coord_flip() +
labs(title = "Sentiment Score by Author for Non-Depression Group",
x = "Author",
y = "Sentiment Score") +
theme(legend.position = "none")

Combining the plots for a better visual comparison:
depression_sentiment_enriched$group <- "Depression"
non_depression_sentiment_enriched$group <- "Non-Depression"
combined_sentiment_data <- rbind(depression_sentiment_enriched, non_depression_sentiment_enriched)
ggplot(combined_sentiment_data, aes(x = reorder(author, sentiment_score), y = sentiment_score, fill = group)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.7) +
coord_flip() +
facet_wrap(~group, scales = "free", ncol = 1) +
scale_fill_manual(values = c("Depression" = "#FF9999", "Non-Depression" = "#9999FF")) +
labs(title = "Sentiment Score by Author Group",
x = "Author",
y = "Sentiment Score") +
theme_minimal() +
theme(legend.position = "top",
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title.x = element_text(size = 12, face = "bold"),
axis.title.y = element_text(size = 12, face = "bold"),
strip.text = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12, face = "bold")) +
guides(fill = guide_legend(title = "Group"))
Conclusion:
- The sentiment scores are calculated based on the words used in their
texts, with positive sentiment contributing to a higher score and
negative sentiment contributing to a lower score.
- All authors in the “Depression” group have negative sentiment
scores. This indicates that, on average, words associated with negative
sentiment outweigh those associated with positive sentiment in their
works. It’s also notable that some authors, such as Nietzsche, have
sentiment scores that are more negative than others.
- he “Non-Depression” group shows a mix of negative and positive
sentiment scores. This suggests more variability in sentiment across
these authors’ works. For example, Louisa May Alcott shows a positive
sentiment score, which implies that positive words are more prevalent in
her texts.
- There seems to be a trend where the group known to have had
depression has consistently lower sentiment scores than the group not
known to have had depression. However, it’s important to note that there
are exceptions, such as Agatha Christie, who also shows a negative
sentiment score.
Caution with the previous conclusions (ie, Limitations of
this analysis): Interpretation of sentiment scores should take
into account the context in which these authors wrote, their literary
styles, and the genres they worked in. Additionally, sentiment analysis
based on word occurrence does not capture literary nuances such as
sarcasm, irony, and complex emotional expression.
Another Way: Using the SentiWordNet sentiment lexicon:
SentiWordNet assigns to each synset of WordNet three sentiment
scores: positivity, negativity, and objectivity. SentiWordNet is
described in details in the papers: https://github.com/aesuli/SentiWordNet/blob/master/papers/LREC06.pdf
and https://github.com/aesuli/SentiWordNet/blob/master/papers/LREC10.pdf
# SentiWordNet can be downloaded from its official repo
sentiwordnet_url <- "https://raw.githubusercontent.com/aesuli/SentiWordNet/master/data/SentiWordNet_3.0.0.txt"
download.file(sentiwordnet_url, destfile = "SentiWordNet_3.0.0.txt")
# Read SentiWordNet data, skipping initial comments
sentiwordnet <- read.delim("SentiWordNet_3.0.0.txt", comment.char = "#", header = TRUE, stringsAsFactors = FALSE)
# Assign correct column names based on the data structure
colnames(sentiwordnet) <- c("POS", "ID", "PosScore", "NegScore", "SynsetTerms")
# Process the SynsetTerms column to extract individual words
sentiwordnet <- sentiwordnet %>%
mutate(word = gsub("#[0-9]+", "", SynsetTerms)) %>%
select(word, PosScore, NegScore)
library(dplyr)
library(tidyr)
# Join with SentiWordNet and Calculate Sentiment for Depression Group
depression_sentiment_analysis <- depression_authors_works_tidy %>%
inner_join(sentiwordnet, by = "word") %>%
group_by(gutenberg_id, author, title) %>%
summarise(PositivityScore = sum(PosScore, na.rm = TRUE),
NegativityScore = sum(NegScore, na.rm = TRUE),
NetSentimentScore = PositivityScore - NegativityScore) %>%
ungroup()
## Warning in inner_join(., sentiwordnet, by = "word"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 6199 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## `summarise()` has grouped output by 'gutenberg_id', 'author'. You can override
## using the `.groups` argument.
# Join with SentiWordNet and Calculate Sentiment for Non-Depression Group
non_depression_sentiment_analysis <- non_depression_authors_works_tidy %>%
inner_join(sentiwordnet, by = "word") %>%
group_by(gutenberg_id, author, title) %>%
summarise(PositivityScore = sum(PosScore, na.rm = TRUE),
NegativityScore = sum(NegScore, na.rm = TRUE),
NetSentimentScore = PositivityScore - NegativityScore) %>%
ungroup()
## Warning in inner_join(., sentiwordnet, by = "word"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 5 of `x` matches multiple rows in `y`.
## ℹ Row 52704 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## `summarise()` has grouped output by 'gutenberg_id', 'author'. You can override
## using the `.groups` argument.
# Visualize
library(ggplot2)
# Combine the results for visualization
combined_sentiment_analysis <- bind_rows(
depression_sentiment_analysis %>% mutate(Group = "Depression"),
non_depression_sentiment_analysis %>% mutate(Group = "Non-Depression")
)
ggplot(combined_sentiment_analysis, aes(x = reorder(author, NetSentimentScore), y = NetSentimentScore, fill = Group)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.7) +
coord_flip() +
scale_fill_manual(values = c("Depression" = "#E69F00", "Non-Depression" = "#56B4E9")) +
labs(title = "Net Sentiment Score by Author Group",
x = "Author",
y = "Net Sentiment Score") +
facet_wrap(~Group, scales = "free_y", ncol = 1) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(hjust = 0.5), # Updated code for horizontal justification
axis.title.x = element_text(size = 12, face = "bold"),
axis.title.y = element_text(size = 12, face = "bold"),
strip.text = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10),
plot.title = element_text(hjust = 0.5)
)

Conclusion: In this other method for sentiment
analysis, both groups display positive net sentiment scores, the range
of scores is wider in the depression group. There does not appear to be
a clear distinction between the groups based on net sentiment alone.