Example from https://www.tidytextmining.com/sentiment.html. This was something very new to me, so I kept it all in the markdown.

get_sentiments("afinn")
## # A tibble: 2,477 × 2
##    word       value
##    <chr>      <dbl>
##  1 abandon       -2
##  2 abandoned     -2
##  3 abandons      -2
##  4 abducted      -2
##  5 abduction     -2
##  6 abductions    -2
##  7 abhor         -3
##  8 abhorred      -3
##  9 abhorrent     -3
## 10 abhors        -3
## # … with 2,467 more rows
get_sentiments("bing")
## # A tibble: 6,786 × 2
##    word        sentiment
##    <chr>       <chr>    
##  1 2-faces     negative 
##  2 abnormal    negative 
##  3 abolish     negative 
##  4 abominable  negative 
##  5 abominably  negative 
##  6 abominate   negative 
##  7 abomination negative 
##  8 abort       negative 
##  9 aborted     negative 
## 10 aborts      negative 
## # … with 6,776 more rows
get_sentiments("nrc")
## # A tibble: 13,872 × 2
##    word        sentiment
##    <chr>       <chr>    
##  1 abacus      trust    
##  2 abandon     fear     
##  3 abandon     negative 
##  4 abandon     sadness  
##  5 abandoned   anger    
##  6 abandoned   fear     
##  7 abandoned   negative 
##  8 abandoned   sadness  
##  9 abandonment anger    
## 10 abandonment fear     
## # … with 13,862 more rows
# lexicons <- c(‘bing’, ‘afinn’, ‘loughran’, ‘nrc’)
# for (lex in lexicons) {
#     print(paste0(“lexicon: “, lex))
#     print(get_sentiments(lex))
tidy_books <- austen_books() %>%
  group_by(book) %>%
  mutate(
    linenumber = row_number(),
    chapter = cumsum(str_detect(text, 
                                regex("^chapter [\\divxlc]", 
                                      ignore_case = TRUE)))) %>%
  ungroup() %>%
  unnest_tokens(word, text)

nrc_joy <- get_sentiments("nrc") %>% 
  filter(sentiment == "joy")

tidy_books %>%
  filter(book == "Emma") %>%
  inner_join(nrc_joy) %>%
  count(word, sort = TRUE)
## Joining, by = "word"
## # A tibble: 301 × 2
##    word          n
##    <chr>     <int>
##  1 good        359
##  2 friend      166
##  3 hope        143
##  4 happy       125
##  5 love        117
##  6 deal         92
##  7 found        92
##  8 present      89
##  9 kind         82
## 10 happiness    76
## # … with 291 more rows
jane_austen_sentiment <- tidy_books %>%
  inner_join(get_sentiments("bing")) %>%
  count(book, index = linenumber %/% 80, sentiment) %>%
  pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>% 
  mutate(sentiment = positive - negative)
## Joining, by = "word"
jane_austen_sentiment
## # A tibble: 920 × 5
##    book                index negative positive sentiment
##    <fct>               <dbl>    <int>    <int>     <int>
##  1 Sense & Sensibility     0       16       32        16
##  2 Sense & Sensibility     1       19       53        34
##  3 Sense & Sensibility     2       12       31        19
##  4 Sense & Sensibility     3       15       31        16
##  5 Sense & Sensibility     4       16       34        18
##  6 Sense & Sensibility     5       16       51        35
##  7 Sense & Sensibility     6       24       40        16
##  8 Sense & Sensibility     7       23       51        28
##  9 Sense & Sensibility     8       30       40        10
## 10 Sense & Sensibility     9       15       19         4
## # … with 910 more rows
ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~book, ncol = 2, scales = "free_x")

Loading the gutenbergr library to decide which book I want to analyze

library("gutenbergr")
print(gutenberg_metadata, n=50)
## # A tibble: 51,997 × 8
##    gutenberg_id title              author guten…¹ langu…² guten…³ rights has_t…⁴
##           <int> <chr>              <chr>    <int> <chr>   <chr>   <chr>  <lgl>  
##  1            0  <NA>              <NA>        NA en      <NA>    Publi… TRUE   
##  2            1 "The Declaration … Jeffe…    1638 en      United… Publi… TRUE   
##  3            2 "The United State… Unite…       1 en      Americ… Publi… TRUE   
##  4            3 "John F. Kennedy'… Kenne…    1666 en      <NA>    Publi… TRUE   
##  5            4 "Lincoln's Gettys… Linco…       3 en      US Civ… Publi… TRUE   
##  6            5 "The United State… Unite…       1 en      Americ… Publi… TRUE   
##  7            6 "Give Me Liberty … Henry…       4 en      Americ… Publi… TRUE   
##  8            7 "The Mayflower Co… <NA>        NA en      <NA>    Publi… TRUE   
##  9            8 "Abraham Lincoln'… Linco…       3 en      US Civ… Publi… TRUE   
## 10            9 "Abraham Lincoln'… Linco…       3 en      US Civ… Publi… TRUE   
## 11           10 "The King James V… <NA>        NA en      <NA>    Publi… TRUE   
## 12           11 "Alice's Adventur… Carro…       7 en      Childr… Publi… TRUE   
## 13           12 "Through the Look… Carro…       7 en      Childr… Publi… TRUE   
## 14           13 "The Hunting of t… Carro…       7 en      Childr… Publi… TRUE   
## 15           14 "The 1990 CIA Wor… Unite…       8 en      CIA Wo… Publi… TRUE   
## 16           15 "Moby Dick"        Melvi…       9 en      Best B… Publi… TRUE   
## 17           16 "Peter Pan"        Barri…      10 en      Movie … Copyr… TRUE   
## 18           17 "The Book of Morm… <NA>        NA en      Christ… Publi… TRUE   
## 19           18 "The Federalist P… <NA>        NA en      Politi… Publi… TRUE   
## 20           19 "The Song of Hiaw… Longf…      16 en      Native… Publi… TRUE   
## 21           20 "Paradise Lost"    Milto…      17 en      Poetry… Publi… TRUE   
## 22           21 "Aesop's Fables\r… Aesop       18 en      <NA>    Publi… TRUE   
## 23           22 "Roget's Thesauru… Roget…      20 en      Refere… Publi… TRUE   
## 24           23 "Narrative of the… Dougl…   34510 en      Slaver… Publi… TRUE   
## 25           24 "O Pioneers!"      Cathe…      22 en      <NA>    Publi… TRUE   
## 26           25 "The 1991 CIA Wor… Unite…       8 en      CIA Wo… Publi… TRUE   
## 27           26 "Paradise Lost"    Milto…      17 en      <NA>    Publi… TRUE   
## 28           27 "Far from the Mad… Hardy…      23 en      <NA>    Publi… TRUE   
## 29           28 "Aesop's Fables"   Aesop       18 en      Harvar… Publi… TRUE   
## 30           29 "The 1990 United … Unite…      25 en      <NA>    Publi… TRUE   
## 31           30 "The Bible, King … <NA>        NA en      <NA>    Publi… TRUE   
## 32           31 "Plays of Sophocl… Sopho…      26 en      Harvar… Publi… TRUE   
## 33           32 "Herland"          Gilma…      27 en      Best B… Publi… TRUE   
## 34           33 "The Scarlet Lett… Hawth…      28 en      Movie … Publi… TRUE   
## 35           34 "Zen and the Art … Kehoe…      29 en      <NA>    Copyr… TRUE   
## 36           35 "The Time Machine" Wells…      30 en      Scienc… Publi… TRUE   
## 37           36 "The War of the W… Wells…      30 en      Movie … Publi… TRUE   
## 38           37 "The 1990 United … Unite…      25 en      <NA>    Publi… TRUE   
## 39           38 "The Jargon File,… <NA>        NA en      <NA>    Publi… TRUE   
## 40           39 "Hitchhiker's Gui… Krol,…      32 en      <NA>    Copyr… TRUE   
## 41           40 "NorthWestNet Use… Kochm…      33 en      <NA>    Copyr… FALSE  
## 42           41 "The Legend of Sl… Irvin…      34 en      Childr… Publi… TRUE   
## 43           42 "The Strange Case… Steve…      35 en      Horror… Publi… TRUE   
## 44           43 "The Strange Case… Steve…      35 en      Horror… Publi… TRUE   
## 45           44 "The Song of the … Cathe…      22 en      Opera   Publi… TRUE   
## 46           45 "Anne of Green Ga… Montg…      36 en      Childr… Publi… TRUE   
## 47           46 "A Christmas Caro… Dicke…      37 en      Christ… Publi… TRUE   
## 48           47 "Anne of Avonlea"  Montg…      36 en      Childr… Publi… TRUE   
## 49           48 "The 1992 CIA Wor… Unite…       8 en      CIA Wo… Publi… TRUE   
## 50           49 "Surfing the Inte… Polly…      38 en      <NA>    Copyr… TRUE   
## # … with 51,947 more rows, and abbreviated variable names ¹​gutenberg_author_id,
## #   ²​language, ³​gutenberg_bookshelf, ⁴​has_text

My analysis on The Legend of Sleepy Hollow: I have never read this book, however I love the movie. It's such a good haunting story.

chosen_book <- gutenberg_works(title  == "The Legend of Sleepy Hollow")
chosen_book
## # A tibble: 1 × 8
##   gutenberg_id title               author guten…¹ langu…² guten…³ rights has_t…⁴
##          <int> <chr>               <chr>    <int> <chr>   <chr>   <chr>  <lgl>  
## 1           41 The Legend of Slee… Irvin…      34 en      Childr… Publi… TRUE   
## # … with abbreviated variable names ¹​gutenberg_author_id, ²​language,
## #   ³​gutenberg_bookshelf, ⁴​has_text
class(chosen_book)
## [1] "tbl_df"     "tbl"        "data.frame"
#chosen_book<-as.data.frame(chosen_book)
class(chosen_book)
## [1] "tbl_df"     "tbl"        "data.frame"
#I had to come back and redo this as a download since the analysis was getting messed up as a df
sleepy <- gutenberg_download(41)
## Determining mirror for Project Gutenberg from http://www.gutenberg.org/robot/harvest
## Using mirror http://aleph.gutenberg.org
text <- tibble(line = 1:nrow(sleepy), sleepy$text)
colnames(text) <- c('lines', 'text')


book <- text  %>%
  unnest_tokens(word, text) #splits a columns into tokens

#counting list of positive words
book  %>%
  inner_join(get_sentiments("bing"))  %>%
  count(word, sort = TRUE)
## Joining, by = "word"
## # A tibble: 528 × 2
##    word         n
##    <chr>    <int>
##  1 like        26
##  2 hollow      23
##  3 great       20
##  4 good        12
##  5 favorite     9
##  6 master       8
##  7 well         8
##  8 broken       7
##  9 dark         7
## 10 fearful      7
## # … with 518 more rows

Making my word cloud for negative words.

book %>%
  anti_join(stop_words) %>%
  count(word) %>%
  with(wordcloud(word, n, max.words = 100, colors=brewer.pal(8, "Dark2")))
## Joining, by = "word"

Making wordcloud for negative words

book %>%
  inner_join(get_sentiments("nrc")) %>%
  anti_join(stop_words) %>%
  count(word)%>% with(wordcloud(word, n, max.words = 100, colors=brewer.pal(8, "Dark2")))
## Joining, by = "word"
## Joining, by = "word"

bing_word<- book %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort=TRUE)
## Joining, by = "word"
bing_word %>%
  group_by(sentiment) %>% top_n(10) %>% ggplot(aes(reorder(word, n), n, fill = sentiment)) +
  geom_bar(stat = "identity", show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(y = "Sentiment Comparison of Negative vs Positive", x = NULL) +
  coord_flip()
## Selecting by n

The words I got for positive and negative make sense. It's quite a chilling story, although I don't know the author's syntax/style. The Legend of Sleepy Hollow is a favorite horror story of mine and I love the town of Sleepy Hollow! Sentiment analysis was an interesting topic to learn. I changed my dataset twice, and ended up choosing the book within the package because I wanted to choose something more comfortable while I am still learning a new topic.

book  %>%
  inner_join(get_sentiments("afinn"))  %>%
  count(word, sort = TRUE)
## Joining, by = "word"
## # A tibble: 317 × 2
##    word         n
##    <chr>    <int>
##  1 like        26
##  2 no          22
##  3 great       20
##  4 good        12
##  5 favorite     9
##  6 haunted      9
##  7 kind         9
##  8 broken       7
##  9 fearful      7
## 10 fire         7
## # … with 307 more rows
book %>%
  anti_join(stop_words) %>% count(word) %>%
  with(wordcloud(word, n, max.words = 100, colors=brewer.pal(8, "Dark2")))
## Joining, by = "word"

Above I started doing wordcloud for "afinn", but I did not get time to finish. If Prof. Catlin gives me a chance to redo this assignment, I can clean this all up.

Citations: Afinn: This dataset was published in Saif M. Mohammad and Peter Turney. (2013), ``Crowdsourcing a Word-Emotion Association Lexicon.'' Computational Intelligence, 29(3): 436-465. article{mohammad13, author = {Mohammad, Saif M. and Turney, Peter D.}, title = {Crowdsourcing a Word-Emotion Association Lexicon}, journal = {Computational Intelligence}, volume = {29}, number = {3}, pages = {436-465}, doi = {10.1111/j.1467-8640.2012.00460.x}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1467-8640.2012.00460.x}, eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1111/j.1467-8640.2012.00460.x}, year = {2013}