loading packages

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.0.5

sentiment lexicons

get_sentiments("afinn")
## # A tibble: 2,477 x 2
##    word       value
##    <chr>      <dbl>
##  1 abandon       -2
##  2 abandoned     -2
##  3 abandons      -2
##  4 abducted      -2
##  5 abduction     -2
##  6 abductions    -2
##  7 abhor         -3
##  8 abhorred      -3
##  9 abhorrent     -3
## 10 abhors        -3
## # ... with 2,467 more rows
get_sentiments("bing")
## # A tibble: 6,786 x 2
##    word        sentiment
##    <chr>       <chr>    
##  1 2-faces     negative 
##  2 abnormal    negative 
##  3 abolish     negative 
##  4 abominable  negative 
##  5 abominably  negative 
##  6 abominate   negative 
##  7 abomination negative 
##  8 abort       negative 
##  9 aborted     negative 
## 10 aborts      negative 
## # ... with 6,776 more rows
get_sentiments("nrc")
## # A tibble: 13,901 x 2
##    word        sentiment
##    <chr>       <chr>    
##  1 abacus      trust    
##  2 abandon     fear     
##  3 abandon     negative 
##  4 abandon     sadness  
##  5 abandoned   anger    
##  6 abandoned   fear     
##  7 abandoned   negative 
##  8 abandoned   sadness  
##  9 abandonment anger    
## 10 abandonment fear     
## # ... with 13,891 more rows

The author provides the counts for words labeled as “joy” within the nrc lexicon that appear in jane austen books

library(janeaustenr)
## Warning: package 'janeaustenr' was built under R version 4.0.5
library(dplyr)
library(stringr)

# Borrowed Code
# Julia Silge and David Robinson. Text Mining with R. O'Riley, 2017.

tidy_books <- austen_books() %>%
  group_by(book) %>%
  mutate(linenumber = row_number(),
         chapter = cumsum(str_detect(text, 
                                     regex("^chapter [\\divxlc]",
                                           ignore_case = TRUE)))) %>%
  ungroup() %>%
  unnest_tokens(word, text)

nrc_joy <- get_sentiments("nrc") %>% 
  filter(sentiment == "joy")

tidy_books %>%
  filter(book == "Emma") %>%
  inner_join(nrc_joy, by = "word") %>%
  count(word, sort = TRUE)
## # A tibble: 303 x 2
##    word        n
##    <chr>   <int>
##  1 good      359
##  2 young     192
##  3 friend    166
##  4 hope      143
##  5 happy     125
##  6 love      117
##  7 deal       92
##  8 found      92
##  9 present    89
## 10 kind       82
## # ... with 293 more rows

I wish to try using one of the other lexicons to group the words in Jane Austen’s “Emma”

#implementing afinn lexicon

afinn = get_sentiments("afinn")

afinn_words = tidy_books%>%
  filter(book == 'Emma')%>%
  inner_join(afinn, by = 'word')%>%
  group_by(word)%>%
  summarize(sum = sum(value))%>%
  arrange(desc(sum))

head(afinn_words)
## # A tibble: 6 x 2
##   word    sum
##   <chr> <dbl>
## 1 good   1077
## 2 great   792
## 3 dear    482
## 4 like    400
## 5 happy   375
## 6 love    351
tail(afinn_words)
## # A tibble: 6 x 2
##   word    sum
##   <chr> <dbl>
## 1 ill    -144
## 2 cried  -162
## 3 bad    -180
## 4 poor   -272
## 5 no     -742
## 6 miss  -1198
# overall sentiment is positive
sum(afinn_words$sum)
## [1] 5837

how about her other books?

# implementing afinn on her other books

books = tidy_books%>%
  inner_join(afinn,by = 'word')%>%
  group_by(book)%>%
  summarize(sum = sum(value))
ggplot(books,aes(x=reorder(book,sum),y=sum))+geom_bar(stat='identity')+labs(title = "Overall Sentiment for Each of Austen's Books",x='books',y='sentiment')+coord_flip()