Warning: package 'tidytext' was built under R version 4.5.3
library(janeaustenr)
Warning: package 'janeaustenr' was built under R version 4.5.3
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(stringr)library(tidyr)library(ggplot2)# Reproduction of Chapter 2: Sentiment analysis of Jane Austen's novelstidy_books <-austen_books() %>%group_by(book) %>%mutate(linenumber =row_number(),chapter =cumsum(str_detect(text, regex("^chapter [\\divxlc]", ignore_case =TRUE))) ) %>%ungroup() %>%unnest_tokens(word, text)# Using the Bing lexicon to find sentiment trajectoryjane_austen_sentiment <- tidy_books %>%inner_join(get_sentiments("bing")) %>%count(book, index = linenumber %/%80, sentiment) %>%pivot_wider(names_from = sentiment, values_from = n, values_fill =0) %>%mutate(sentiment = positive - negative)
Joining with `by = join_by(word)`
Warning in inner_join(., get_sentiments("bing")): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 435434 of `x` matches multiple rows in `y`.
ℹ Row 5051 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
# Visualizing the resultsggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +geom_col(show.legend =FALSE) +facet_wrap(~book, ncol =2, scales ="free_x") +labs(title ="Sentiment in Jane Austen's Novels",caption ="Source: Silge & Robinson, Text Mining with R")
pulling from the gutenbergr to download The War of the Worlds and The Time Machine
library(gutenbergr)
Warning: package 'gutenbergr' was built under R version 4.5.3
#Downloading 'The War of the Worlds' (36) and 'The Time Machine' (35)wells_books <-gutenberg_download(c(35, 36), meta_fields ="title")
Warning in inner_join(., loughran): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 3326 of `x` matches multiple rows in `y`.
ℹ Row 2826 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
ggplot(wells_loughran, aes(index, n, fill = sentiment)) +geom_col() +facet_wrap(~title, scales ="free_x") +labs(title ="Sentiment Complexity in H.G. Wells (Loughran Lexicon)",y ="Word Count",x ="Narrative Progress (80-line chunks)")