loading packages
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.0.5
sentiment lexicons
get_sentiments("afinn")
## # A tibble: 2,477 x 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandoned -2
## 3 abandons -2
## 4 abducted -2
## 5 abduction -2
## 6 abductions -2
## 7 abhor -3
## 8 abhorred -3
## 9 abhorrent -3
## 10 abhors -3
## # ... with 2,467 more rows
get_sentiments("bing")
## # A tibble: 6,786 x 2
## word sentiment
## <chr> <chr>
## 1 2-faces negative
## 2 abnormal negative
## 3 abolish negative
## 4 abominable negative
## 5 abominably negative
## 6 abominate negative
## 7 abomination negative
## 8 abort negative
## 9 aborted negative
## 10 aborts negative
## # ... with 6,776 more rows
get_sentiments("nrc")
## # A tibble: 13,901 x 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
## 7 abandoned negative
## 8 abandoned sadness
## 9 abandonment anger
## 10 abandonment fear
## # ... with 13,891 more rows
The author provides the counts for words labeled as “joy” within the nrc lexicon that appear in jane austen books
library(janeaustenr)
## Warning: package 'janeaustenr' was built under R version 4.0.5
library(dplyr)
library(stringr)
# Borrowed Code
# Julia Silge and David Robinson. Text Mining with R. O'Riley, 2017.
tidy_books <- austen_books() %>%
group_by(book) %>%
mutate(linenumber = row_number(),
chapter = cumsum(str_detect(text,
regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup() %>%
unnest_tokens(word, text)
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
tidy_books %>%
filter(book == "Emma") %>%
inner_join(nrc_joy, by = "word") %>%
count(word, sort = TRUE)
## # A tibble: 303 x 2
## word n
## <chr> <int>
## 1 good 359
## 2 young 192
## 3 friend 166
## 4 hope 143
## 5 happy 125
## 6 love 117
## 7 deal 92
## 8 found 92
## 9 present 89
## 10 kind 82
## # ... with 293 more rows
I wish to try using one of the other lexicons to group the words in Jane Austen’s “Emma”
#implementing afinn lexicon
afinn = get_sentiments("afinn")
afinn_words = tidy_books%>%
filter(book == 'Emma')%>%
inner_join(afinn, by = 'word')%>%
group_by(word)%>%
summarize(sum = sum(value))%>%
arrange(desc(sum))
head(afinn_words)
## # A tibble: 6 x 2
## word sum
## <chr> <dbl>
## 1 good 1077
## 2 great 792
## 3 dear 482
## 4 like 400
## 5 happy 375
## 6 love 351
tail(afinn_words)
## # A tibble: 6 x 2
## word sum
## <chr> <dbl>
## 1 ill -144
## 2 cried -162
## 3 bad -180
## 4 poor -272
## 5 no -742
## 6 miss -1198
# overall sentiment is positive
sum(afinn_words$sum)
## [1] 5837
how about her other books?
# implementing afinn on her other books
books = tidy_books%>%
inner_join(afinn,by = 'word')%>%
group_by(book)%>%
summarize(sum = sum(value))
ggplot(books,aes(x=reorder(book,sum),y=sum))+geom_bar(stat='identity')+labs(title = "Overall Sentiment for Each of Austen's Books",x='books',y='sentiment')+coord_flip()
