In Text Mining with R, Chapter 2 looks at Sentiment Analysis. In this assignment, you should start by getting the primary example code from chapter 2 working in an R Markdown document. You should provide a citation to this base code. You’re then asked to extend the code in two ways:
Work with a different corpus of your choosing, and Incorporate at least one additional sentiment lexicon (possibly from another R package that you’ve found through research). As usual, please submit links to both an .Rmd file posted in your GitHub repository and to your code on rpubs.com. You make work on a small team on this assignment.
library(textdata)
## Warning: package 'textdata' was built under R version 4.0.3
library(tidytext)
get_sentiments("afinn")
## # A tibble: 2,477 x 2
## word value
## <chr> <dbl>
## 1 abandon -2
## 2 abandoned -2
## 3 abandons -2
## 4 abducted -2
## 5 abduction -2
## 6 abductions -2
## 7 abhor -3
## 8 abhorred -3
## 9 abhorrent -3
## 10 abhors -3
## # ... with 2,467 more rows
get_sentiments("bing")
## # A tibble: 6,786 x 2
## word sentiment
## <chr> <chr>
## 1 2-faces negative
## 2 abnormal negative
## 3 abolish negative
## 4 abominable negative
## 5 abominably negative
## 6 abominate negative
## 7 abomination negative
## 8 abort negative
## 9 aborted negative
## 10 aborts negative
## # ... with 6,776 more rows
get_sentiments("bing")
## # A tibble: 6,786 x 2
## word sentiment
## <chr> <chr>
## 1 2-faces negative
## 2 abnormal negative
## 3 abolish negative
## 4 abominable negative
## 5 abominably negative
## 6 abominate negative
## 7 abomination negative
## 8 abort negative
## 9 aborted negative
## 10 aborts negative
## # ... with 6,776 more rows
get_sentiments("nrc")
## # A tibble: 13,901 x 2
## word sentiment
## <chr> <chr>
## 1 abacus trust
## 2 abandon fear
## 3 abandon negative
## 4 abandon sadness
## 5 abandoned anger
## 6 abandoned fear
## 7 abandoned negative
## 8 abandoned sadness
## 9 abandonment anger
## 10 abandonment fear
## # ... with 13,891 more rows
library(janeaustenr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
tidy_books <- austen_books() %>%
group_by(book) %>%
mutate(
linenumber = row_number(),
chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]",
ignore_case = TRUE
)))
) %>%
ungroup() %>%
unnest_tokens(word, text)
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
tidy_books %>%
filter(book == "Emma") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining, by = "word"
## # A tibble: 303 x 2
## word n
## <chr> <int>
## 1 good 359
## 2 young 192
## 3 friend 166
## 4 hope 143
## 5 happy 125
## 6 love 117
## 7 deal 92
## 8 found 92
## 9 present 89
## 10 kind 82
## # ... with 293 more rows
library(tidyr)
jane_austen_sentiment <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(book, index = linenumber %/% 80, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining, by = "word"
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, ncol = 2, scales = "free_x")
pride_prejudice <- tidy_books %>%
filter(book == "Pride & Prejudice")
pride_prejudice
## # A tibble: 122,204 x 4
## book linenumber chapter word
## <fct> <int> <int> <chr>
## 1 Pride & Prejudice 1 0 pride
## 2 Pride & Prejudice 1 0 and
## 3 Pride & Prejudice 1 0 prejudice
## 4 Pride & Prejudice 3 0 by
## 5 Pride & Prejudice 3 0 jane
## 6 Pride & Prejudice 3 0 austen
## 7 Pride & Prejudice 7 1 chapter
## 8 Pride & Prejudice 7 1 1
## 9 Pride & Prejudice 10 1 it
## 10 Pride & Prejudice 10 1 is
## # ... with 122,194 more rows
afinn <- pride_prejudice %>%
inner_join(get_sentiments("afinn")) %>%
group_by(index = linenumber %/% 80) %>%
summarise(sentiment = sum(value)) %>%
mutate(method = "AFINN")
## Joining, by = "word"
## `summarise()` ungrouping output (override with `.groups` argument)
bing_and_nrc <- bind_rows(
pride_prejudice %>%
inner_join(get_sentiments("bing")) %>%
mutate(method = "Bing et al."),
pride_prejudice %>%
inner_join(get_sentiments("nrc") %>%
filter(sentiment %in% c(
"positive",
"negative"
))) %>%
mutate(method = "NRC")
) %>%
count(method, index = linenumber %/% 80, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
## Joining, by = "word"
## Joining, by = "word"
bind_rows(
afinn,
bing_and_nrc
) %>%
ggplot(aes(index, sentiment, fill = method)) +
geom_col(show.legend = FALSE) +
facet_wrap(~method, ncol = 1, scales = "free_y")
get_sentiments("nrc") %>%
filter(sentiment %in% c(
"positive",
"negative"
)) %>%
count(sentiment)
## # A tibble: 2 x 2
## sentiment n
## <chr> <int>
## 1 negative 3324
## 2 positive 2312
get_sentiments("bing") %>%
count(sentiment)
## # A tibble: 2 x 2
## sentiment n
## <chr> <int>
## 1 negative 4781
## 2 positive 2005
bing_word_counts <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
## Joining, by = "word"
bing_word_counts
## # A tibble: 2,585 x 3
## word sentiment n
## <chr> <chr> <int>
## 1 miss negative 1855
## 2 well positive 1523
## 3 good positive 1380
## 4 great positive 981
## 5 like positive 725
## 6 better positive 639
## 7 enough positive 613
## 8 happy positive 534
## 9 love positive 495
## 10 pleasure positive 462
## # ... with 2,575 more rows
bing_word_counts %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(
y = "Contribution to sentiment",
x = NULL
) +
coord_flip()
## Selecting by n
custom_stop_words <- bind_rows(
tibble(
word = c("miss"),
lexicon = c("custom")
),
stop_words
)
custom_stop_words
## # A tibble: 1,150 x 2
## word lexicon
## <chr> <chr>
## 1 miss custom
## 2 a SMART
## 3 a's SMART
## 4 able SMART
## 5 about SMART
## 6 above SMART
## 7 according SMART
## 8 accordingly SMART
## 9 across SMART
## 10 actually SMART
## # ... with 1,140 more rows
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.0.3
## Loading required package: RColorBrewer
tidy_books %>%
anti_join(stop_words) %>%
count(word) %>%
with(wordcloud(word, n, max.words = 100))
## Joining, by = "word"
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(
colors = c("gray20", "gray80"),
max.words = 100
)
## Joining, by = "word"
#### 2.6 Looking at units beyond just words
PandP_sentences <- tibble(text = prideprejudice) %>%
unnest_tokens(sentence, text, token = "sentences")
austen_chapters <- austen_books() %>%
group_by(book) %>%
unnest_tokens(chapter, text,
token = "regex",
pattern = "Chapter|CHAPTER [\\dIVXLC]"
) %>%
ungroup()
austen_chapters %>%
group_by(book) %>%
summarise(chapters = n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 6 x 2
## book chapters
## <fct> <int>
## 1 Sense & Sensibility 51
## 2 Pride & Prejudice 62
## 3 Mansfield Park 49
## 4 Emma 56
## 5 Northanger Abbey 32
## 6 Persuasion 25
bingnegative <- get_sentiments("bing") %>%
filter(sentiment == "negative")
wordcounts <- tidy_books %>%
group_by(book, chapter) %>%
summarize(words = n())
## `summarise()` regrouping output by 'book' (override with `.groups` argument)
tidy_books %>%
semi_join(bingnegative) %>%
group_by(book, chapter) %>%
summarize(negativewords = n()) %>%
left_join(wordcounts, by = c("book", "chapter")) %>%
mutate(ratio = negativewords / words) %>%
filter(chapter != 0) %>%
top_n(1) %>%
ungroup()
## Joining, by = "word"
## `summarise()` regrouping output by 'book' (override with `.groups` argument)
## Selecting by ratio
## # A tibble: 6 x 5
## book chapter negativewords words ratio
## <fct> <int> <int> <int> <dbl>
## 1 Sense & Sensibility 43 161 3405 0.0473
## 2 Pride & Prejudice 34 111 2104 0.0528
## 3 Mansfield Park 46 173 3685 0.0469
## 4 Emma 15 151 3340 0.0452
## 5 Northanger Abbey 21 149 2982 0.0500
## 6 Persuasion 4 62 1807 0.0343
library(jsonlite)
url <- "https://api.nytimes.com/svc/books/v3/reviews.json?author=Stephen+King&api-key=MQpxlncgfrAvMcbl9bDwvMLsk4vFJBPm"
data <- fromJSON(url)
df <- data$results
knitr:: kable (df)
| url | publication_dt | byline | book_title | book_author | summary | uuid | uri | isbn13 |
|---|---|---|---|---|---|---|---|---|
| http://www.nytimes.com/2011/11/13/books/review/11-22-63-by-stephen-king-book-review.html | 2011-11-13 | ERROL MORRIS | 11/22/63 | Stephen King | Stephen King’s time traveler tries to undo some painful history. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780307951434, 9780606351461, 9781442344280, 9781442344303, 9781442391635, 9781444727326, 9781451627282, 9781451627299, 9781451627305, 9781451651645, 9781501120602, 9781594135590 |
| http://www.nytimes.com/2011/10/31/books/stephen-kings-11-23-63-review.html | 2011-10-31 | JANET MASLIN | 11/22/63 | Stephen King | Stephen King’s latest novel, “11/22/63,” tells of a schoolteacher who travels back to 1958 to alter history, and falls in love as well. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780307951434, 9780606351461, 9781442344280, 9781442344303, 9781442391635, 9781444727326, 9781451627282, 9781451627299, 9781451627305, 9781451651645, 9781501120602, 9781594135590 |
| http://www.nytimes.com/2004/01/04/books/the-quest-for-the-north-central-positronics.html | 2004-01-04 | ANDREW O’HEHIR | Wolves of the Calla | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781848941137 | |
| http://www.nytimes.com/1993/10/24/books/in-short-fiction-284093.html | 1993-10-24 | RICHARD E. NICHOLLS | Nightmares and Dreamscapes | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781441615299 | |
| http://www.nytimes.com/2001/11/04/books/books-in-brief-fiction-poetry-851302.html | 2001-11-04 | MARY ELIZABETH WILLIAMS | Black House | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780375504396 | |
| http://www.nytimes.com/1990/05/13/books/armageddon-complete-and-uncut.html | 1990-05-13 | ROBERT KIELY | The Stand | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781848940833 | |
| http://www.nytimes.com/1990/09/02/books/scared-but-safe.html | 1990-09-02 | ANDY SOLOMON | Four Past Midnight | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780816151776 | |
| http://www.nytimes.com/1991/10/20/books/l-defending-stephen-king-075091.html | 1991-10-20 | Needful Things | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781598877427 | ||
| http://www.nytimes.com/1992/06/29/books/books-of-the-times-to-be-read-in-daylight-away-from-hungry-dogs.html | 1992-06-29 | CHRISTOPHER LEHMANN-HAUPT | Gerald’s Game | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780670846504 | |
| http://www.nytimes.com/2002/04/14/books/the-horror-etc.html | 2002-04-14 | WALTER KIRN | Everything’s Eventual : 14 Dark Tales | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781439568200 | |
| http://www.nytimes.com/1986/08/21/books/books-of-the-times-547486.html | 1986-08-21 | CHRISTOPHER LEHMANN-HAUPT | It | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780451169518 | |
| http://www.nytimes.com/2010/11/28/books/review/Rafferty-t.html | 2010-11-28 | TERRENCE RAFFERTY | Full Dark, No Stars | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781439192566, 9781439192597, 9781439192603, 9781441677112, 9781442335769, 9781444712582, 9781451650600, 9781602859463 | |
| http://www.nytimes.com/1998/09/27/books/familiar-terrors.html | 1998-09-27 | DANIEL MENDELSOHN | Bag of Bones | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780671026073 | |
| http://www.nytimes.com/2002/03/18/books/books-of-the-times-storytelling-mogul-decides-to-sweep-out-odds-and-ends.html | 2002-03-18 | JANET MASLIN | Everything’s Eventual : 14 Dark Tales | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781416537816 | |
| http://www.nytimes.com/1995/07/02/books/in-short-fiction-077895.html | 1995-07-02 | RICHARD E. NICHOLLS | Rose Madder | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781568952611 | |
| http://www.nytimes.com/2013/09/16/books/doctor-sleep-is-stephen-kings-sequel-to-the-shining.html | 2013-09-16 | JANET MASLIN | Doctor Sleep | Stephen King | In “Doctor Sleep” Stephen King offers a sequel to “The Shining.” | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781451698855 |
| http://www.nytimes.com/2013/09/22/books/review/stephen-kings-shining-sequel-doctor-sleep.html | 2013-09-22 | MARGARET ATWOOD | Doctor Sleep | Stephen King | “Doctor Sleep” picks up the story of Danny, the little boy with psycho-intuitive powers from Stephen King’s 1977 novel “The Shining.” | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781451698855 |
| http://www.nytimes.com/2013/06/23/books/review/joyland-by-stephen-king.html | 2013-06-23 | WALTER KIRN | Joyland | Stephen King | Stephen King’s lovelorn narrator takes a summer job at a haunted Southern amusement park. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781781162644 |
| http://www.nytimes.com/2009/11/08/books/review/JParker-t.html | 2009-11-08 | JAMES PARKER | Under the Dome | Stephen King | When an enormous transparent dome settles over a small town in Maine in Stephen King’s new novel, it’s just fine with Big Jim, the local tyrant-in-waiting, and his pet goon squad. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781476735474 |
| http://www.nytimes.com/2009/11/12/books/12book.html | 2009-11-12 | JANET MASLIN | Under the Dome | Stephen King | “Under the Dome” gravely threatens Stephen King’s status as a mere chart-busting pop cultural phenomenon. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781476735474 |
| http://www.nytimes.com/2014/11/14/books/stephen-kings-revival.html | 2014-11-14 | JANET MASLIN | Revival | Stephen King | In Stephen King’s “Revival,” a boy is befriended by a charismatic minister with whom he will reconnect in eerie and unsettling ways as an adult. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781410473837, 9781442372764, 9781442372788, 9781476770383, 9781476770390, 9781476770406 |
| http://www.nytimes.com/2014/11/23/books/review/stephen-kings-revival.html | 2014-11-23 | DANIELLE TRUSSONI | Revival | Stephen King | Two men are locked in a battle of wills in Stephen King’s novel of fanaticism and what might exist on the other side of life. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781410473837, 9781442372764, 9781442372788, 9781476770383, 9781476770390, 9781476770406 |
| http://www.nytimes.com/2006/11/12/books/review/Windolf.t.html | 2006-11-12 | JIM WINDOLF | Lisey’s Story | Stephen King | When Stephen King really wants to put a scare into you, he brings on his most fearsome monster of all, the writer. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781416523352 |
| http://www.nytimes.com/2008/11/05/books/05masl.html | 2008-11-05 | JANET MASLIN | Just After Sunset | Stephen King | Everyday situations became open portals to fantasy and horror in Stephen King’s succinct, fast-moving new collection. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781416586654 |
| http://www.nytimes.com/2008/11/23/books/review/Taylor-t.html | 2008-11-23 | CHARLES TAYLOR | Just After Sunset | Stephen King | A new short-story collection takes Stephen King back to the form that gave him his seat-of-the-pants start. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781416586654 |
| http://www.nytimes.com/2008/01/21/books/21maslin.html | 2008-01-21 | JANET MASLIN | Duma Key | Stephen King | Stephen King’s use of horror is not what it used to be. It may still be the impetus for his stories, but it is no longer the foremost reason they’re interesting. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781416552963 |
| http://www.nytimes.com/2008/03/02/books/review/Campbell-t.html | 2008-03-02 | JAMES CAMPBELL | Duma Key | Stephen King | In Stephen King’s new novel, a man recuperating in the Florida Keys begins to paint, with sinister results. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781416552963 |
| http://www.nytimes.com/2006/10/23/books/23masl.html | 2006-10-23 | JANET MASLIN | Lisey’s Story | Stephen King | In his new book, Stephen King delivers his version of Joycean wordplay, idiosyncrasy, voluptuousness and stubborn, obsessive chronology. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781416523352 |
| http://www.nytimes.com/2014/06/08/books/review/stephen-kings-mr-mercedes.html | 2014-06-08 | MEGAN ABBOTT | Mr. Mercedes | Stephen King | With one eye on Philip Marlowe, Stephen King takes a crack at the detective novel. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781410469007, 9781442369788, 9781442371347, 9781442395404, 9781476754451, 9781476754468, 9781476754475, 9781501125607 |
| http://www.nytimes.com/1981/05/10/books/scare-tactics.html | 1981-05-10 | MICHELE SLUNG | Stephen King’s Danse Macabre | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780896960763 | |
| http://www.nytimes.com/1981/08/14/books/books-of-the-times-books-of-the-times.html | 1981-08-14 | Cujo | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780670451937 | ||
| http://www.nytimes.com/1982/08/11/books/books-of-the-times-074639.html | 1982-08-11 | Different Seasons | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780670272662 | ||
| http://www.nytimes.com/1982/08/29/books/horror-writer-s-holiday.html | 1982-08-29 | ALAN CHEUSE | Different Seasons | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780670272662 | |
| http://www.nytimes.com/1983/04/03/books/the-other-woman-was-a-car.html | 1983-04-03 | PHILLIPE VAN RJNDT | Christine | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780670220267 | |
| http://www.nytimes.com/1983/04/12/books/books-of-the-times-093019.html | 1983-04-12 | Christine | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780670220267 | ||
| http://www.nytimes.com/1983/10/21/books/books-of-the-times-243538.html | 1983-10-21 | Pet Sematary | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780385182447 | ||
| http://www.nytimes.com/1985/06/09/books/don-t-turn-your-back-on-this-book.html | 1985-06-09 | SUSAN BOLOTIN | Skeleton Crew | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | NULL | |
| http://www.nytimes.com/1987/02/22/books/what-the-wicked-magician-did.html | 1987-02-22 | BARBARA TRITEL | The Eyes of the Dragon | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | NULL | |
| http://www.nytimes.com/1987/05/31/books/summer-reading-sheldon-gets-the-ax.html | 1987-05-31 | JOHN KATZENBACH | Misery | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780670813643 | |
| http://www.nytimes.com/1987/06/08/books/books-of-the-times-301987.html | 1987-06-08 | Misery | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780670813643 | ||
| http://www.nytimes.com/1987/11/05/books/books-of-the-times-950387.html | 1987-11-05 | The Tommyknockers | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780399133145 | ||
| http://www.nytimes.com/1987/12/20/books/not-with-a-bang-but-an-eeeooooarrrhmm.html | 1987-12-20 | NINA AUERBACH | The Tommyknockers | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780399133145 | |
| http://www.nytimes.com/1989/10/23/books/books-of-the-times-from-stephen-king-a-writer-s-demon.html | 1989-10-23 | The Dark Half | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | NULL | ||
| http://www.nytimes.com/1989/10/29/books/his-alter-ego-is-a-killer.html | 1989-10-29 | GEORGE STADE | The Dark Half | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | NULL | |
| http://www.nytimes.com/1991/09/29/books/avaunt-thee-recreant-cyborg.html | 1991-09-29 | RICHARD E. NICHOLLS | The Waste Lands: The Dark Tower Book III | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780452267404 | |
| http://www.nytimes.com/1991/10/03/books/books-of-the-times-turning-favors-into-catastrophe.html | 1991-10-03 | Needful Things: The Last Castle Rock Story | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780670839537 | ||
| http://www.nytimes.com/1994/10/06/books/books-of-the-times-lack-of-sleep-the-least-of-his-problems.html | 1994-10-06 | Insomnia,Stephen King Library Ed. | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780670855032 | ||
| http://www.nytimes.com/1995/06/26/books/book-review-a-punch-in-the-nose-then-a-new-life-begins.html | 1995-06-26 | Rose Madder | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780670858699 | ||
| http://www.nytimes.com/1998/09/21/books/books-of-the-times-death-terror-and-writer-s-block.html | 1998-09-21 | Bag of Bones | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780684853505 | ||
| http://www.nytimes.com/1999/04/15/books/books-of-the-times-a-modern-fairy-tale-of-the-dark-north-woods.html | 1999-04-15 | The Girl Who Loved Tom Gordon : A Novel | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780684867625 | ||
| http://www.nytimes.com/1999/05/16/books/books-in-brief-fiction-583618.html | 1999-05-16 | ANDREW ESSEX | The Girl Who Loved Tom Gordon : A Novel | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780684867625 | |
| http://www.nytimes.com/2000/10/05/books/books-of-the-times-how-to-write-if-you-re-stephen-king.html | 2000-10-05 | JANET MASLIN | On Writing: a Memoir of the Craft | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780606222549, 9780606231862, 9780671024253, 9780684853529, 9780743204361, 9780743211536, 9780743455961, 9780743563376, 9781416549864, 9781417647019, 9781439156810, 9781439193631, 9781441658869, 9781444723250, 9781848941083, 9781931208048 | |
| http://www.nytimes.com/2000/10/08/books/making-it.html | 2000-10-08 | FREDERICK BUSCH | On Writing: a Memoir of the Craft | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780606222549, 9780606231862, 9780671024253, 9780684853529, 9780743204361, 9780743211536, 9780743455961, 9780743563376, 9781416549864, 9781417647019, 9781439156810, 9781439193631, 9781441658869, 9781444723250, 9781848941083, 9781931208048 | |
| http://www.nytimes.com/2001/03/15/books/books-of-the-times-a-fateful-step-off-a-curb-and-into-alien-territory.html | 2001-03-15 | JANET MASLIN | Dreamcatcher | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780743211383 | |
| http://www.nytimes.com/2001/04/15/books/weasel-from-another-planet.html | 2001-04-15 | COLIN HARRISON | Dreamcatcher | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780743211383 | |
| http://www.nytimes.com/2002/09/23/books/books-of-the-times-cruising-the-dark-side-forget-about-seat-belts.html | 2002-09-23 | JANET MASLIN | From a Buick 8 | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780743211376 | |
| http://www.nytimes.com/2002/09/29/books/not-your-father-s-roadmaster.html | 2002-09-29 | LAURA MILLER | From a Buick 8 | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780743211376 | |
| http://www.nytimes.com/2004/06/20/books/books-in-brief-fiction-937851.html | 2004-06-20 | BEN SISARIO | The Dark Tower VI | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781880418598 | |
| http://www.nytimes.com/2006/01/23/books/23masl.html | 2006-01-23 | JANET MASLIN | Cell | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780743292337 | |
| http://www.nytimes.com/2006/02/05/books/review/05itzkoff.html | 2006-02-05 | DAVE ITZKOFF | Cell | Stephen King | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9780743292337 | |
| http://www.nytimes.com/2015/05/31/books/review/stephen-kings-finders-keepers.html | 2015-05-31 | LAURA LIPPMAN | Finders Keepers | Stephen King | Laura Lippman reviews Stephen King’s “Finders Keepers,” the second entry in a planned trilogy that began with “Mr. Mercedes.” | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781410479501, 9781442384347, 9781501100079, 9781501100123, 9781501100130, 9781594138522 |
| http://www.nytimes.com/2016/06/12/books/review/stephen-kings-end-of-watch.html | 2016-06-12 | DENISE MINA | End of Watch | Stephen King | A retired police detective sees the return of his nemesis in this suspense story with a supernatural twist. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781410489906, 9781501129742, 9781501134142, 9781501134159, 9781508211358 |
| https://www.nytimes.com/2018/05/22/books/review/outsider-stephen-king.html | 2018-05-22 | VICTOR LAVALLE | The Outsider | Stephen King | “The Outsider” starts out as a routine police procedural but before long transforms into something much more sinister. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781501180989 |
| https://www.nytimes.com/2018/10/26/books/review/stephen-king-elevation.html | 2018-10-26 | GILBERT CRUZ | Elevation | Stephen King | King’s slim new novel, “Elevation,” returns us to Castle Rock, where prejudice drives a plot that blends the fantastical with the mundane. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781982102319 |
| https://www.nytimes.com/2019/09/08/books/review-institute-stephen-king.html | 2019-09-08 | DWIGHT GARNER | The Institute | Stephen King | In his latest, King tells the story of an institution where children with special powers are cultivated — but that’s just where the nightmare begins. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781982110567 |
| https://www.nytimes.com/2019/09/10/books/review/stephen-king-the-institute.html | 2019-09-10 | LAURA MILLER | The Institute | Stephen King | The terror doesn’t come from ghosts or fiends or clowns — it’s ordinary people, folks just like you and me, who are the evil ones. | 00000000-0000-0000-0000-000000000000 | nyt://book/00000000-0000-0000-0000-000000000000 | 9781982110567 |
I would like to use sentimentr package, since it is designed to quickly calculate text polarity sentiment at the sentence level and optionally aggregate by rows or grouping variable(s).
library(sentimentr)
## Warning: package 'sentimentr' was built under R version 4.0.3
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, first, last
sentiment <- sentiment_by(df$summary)
knitr:: kable (sentiment)
| element_id | word_count | sd | ave_sentiment |
|---|---|---|---|
| 1 | 11 | NA | -0.1959824 |
| 2 | 22 | NA | 0.2665009 |
| 3 | 0 | NA | 0.0000000 |
| 4 | 0 | NA | 0.0000000 |
| 5 | 0 | NA | 0.0000000 |
| 6 | 0 | NA | 0.0000000 |
| 7 | 0 | NA | 0.0000000 |
| 8 | 0 | NA | 0.0000000 |
| 9 | 0 | NA | 0.0000000 |
| 10 | 0 | NA | 0.0000000 |
| 11 | 0 | NA | 0.0000000 |
| 12 | 0 | NA | 0.0000000 |
| 13 | 0 | NA | 0.0000000 |
| 14 | 0 | NA | 0.0000000 |
| 15 | 0 | NA | 0.0000000 |
| 16 | 11 | NA | 0.2110579 |
| 17 | 22 | NA | 0.0234521 |
| 18 | 15 | NA | -0.1032796 |
| 19 | 35 | NA | -0.0084515 |
| 20 | 17 | NA | -0.3395499 |
| 21 | 26 | NA | -0.0294174 |
| 22 | 26 | NA | -0.1765045 |
| 23 | 22 | NA | -0.6332061 |
| 24 | 18 | NA | 0.5185450 |
| 25 | 21 | NA | 0.1963961 |
| 26 | 33 | 0.8383012 | -0.2876833 |
| 27 | 19 | NA | 0.1147079 |
| 28 | 18 | NA | -0.0824958 |
| 29 | 15 | NA | -0.0774597 |
| 30 | 0 | NA | 0.0000000 |
| 31 | 0 | NA | 0.0000000 |
| 32 | 0 | NA | 0.0000000 |
| 33 | 0 | NA | 0.0000000 |
| 34 | 0 | NA | 0.0000000 |
| 35 | 0 | NA | 0.0000000 |
| 36 | 0 | NA | 0.0000000 |
| 37 | 0 | NA | 0.0000000 |
| 38 | 0 | NA | 0.0000000 |
| 39 | 0 | NA | 0.0000000 |
| 40 | 0 | NA | 0.0000000 |
| 41 | 0 | NA | 0.0000000 |
| 42 | 0 | NA | 0.0000000 |
| 43 | 0 | NA | 0.0000000 |
| 44 | 0 | NA | 0.0000000 |
| 45 | 0 | NA | 0.0000000 |
| 46 | 0 | NA | 0.0000000 |
| 47 | 0 | NA | 0.0000000 |
| 48 | 0 | NA | 0.0000000 |
| 49 | 0 | NA | 0.0000000 |
| 50 | 0 | NA | 0.0000000 |
| 51 | 0 | NA | 0.0000000 |
| 52 | 0 | NA | 0.0000000 |
| 53 | 0 | NA | 0.0000000 |
| 54 | 0 | NA | 0.0000000 |
| 55 | 0 | NA | 0.0000000 |
| 56 | 0 | NA | 0.0000000 |
| 57 | 0 | NA | 0.0000000 |
| 58 | 0 | NA | 0.0000000 |
| 59 | 0 | NA | 0.0000000 |
| 60 | 0 | NA | 0.0000000 |
| 61 | 20 | NA | 0.0223607 |
| 62 | 18 | NA | -0.6246110 |
| 63 | 18 | NA | -0.6010997 |
| 64 | 23 | NA | -0.1876630 |
| 65 | 25 | NA | -0.1573800 |
| 66 | 26 | NA | -0.2451452 |
sentiment_df<- setDF(sentiment)
get_sentiment_class <- function(ave_sentiment){
if (ave_sentiment < 0){
sentiment_class = "Negative"}
else if (ave_sentiment>=0 && ave_sentiment<=0.01){
sentiment_class = "Neutral"
}
else{
sentiment_class="Positive"
}
sentiment_class
}
sentiment_df$ave_sentiment <-
sapply(sentiment_df$ave_sentiment,get_sentiment_class)
knitr::kable(sentiment_df)
| element_id | word_count | sd | ave_sentiment |
|---|---|---|---|
| 1 | 11 | NA | Negative |
| 2 | 22 | NA | Positive |
| 3 | 0 | NA | Neutral |
| 4 | 0 | NA | Neutral |
| 5 | 0 | NA | Neutral |
| 6 | 0 | NA | Neutral |
| 7 | 0 | NA | Neutral |
| 8 | 0 | NA | Neutral |
| 9 | 0 | NA | Neutral |
| 10 | 0 | NA | Neutral |
| 11 | 0 | NA | Neutral |
| 12 | 0 | NA | Neutral |
| 13 | 0 | NA | Neutral |
| 14 | 0 | NA | Neutral |
| 15 | 0 | NA | Neutral |
| 16 | 11 | NA | Positive |
| 17 | 22 | NA | Positive |
| 18 | 15 | NA | Negative |
| 19 | 35 | NA | Negative |
| 20 | 17 | NA | Negative |
| 21 | 26 | NA | Negative |
| 22 | 26 | NA | Negative |
| 23 | 22 | NA | Negative |
| 24 | 18 | NA | Positive |
| 25 | 21 | NA | Positive |
| 26 | 33 | 0.8383012 | Negative |
| 27 | 19 | NA | Positive |
| 28 | 18 | NA | Negative |
| 29 | 15 | NA | Negative |
| 30 | 0 | NA | Neutral |
| 31 | 0 | NA | Neutral |
| 32 | 0 | NA | Neutral |
| 33 | 0 | NA | Neutral |
| 34 | 0 | NA | Neutral |
| 35 | 0 | NA | Neutral |
| 36 | 0 | NA | Neutral |
| 37 | 0 | NA | Neutral |
| 38 | 0 | NA | Neutral |
| 39 | 0 | NA | Neutral |
| 40 | 0 | NA | Neutral |
| 41 | 0 | NA | Neutral |
| 42 | 0 | NA | Neutral |
| 43 | 0 | NA | Neutral |
| 44 | 0 | NA | Neutral |
| 45 | 0 | NA | Neutral |
| 46 | 0 | NA | Neutral |
| 47 | 0 | NA | Neutral |
| 48 | 0 | NA | Neutral |
| 49 | 0 | NA | Neutral |
| 50 | 0 | NA | Neutral |
| 51 | 0 | NA | Neutral |
| 52 | 0 | NA | Neutral |
| 53 | 0 | NA | Neutral |
| 54 | 0 | NA | Neutral |
| 55 | 0 | NA | Neutral |
| 56 | 0 | NA | Neutral |
| 57 | 0 | NA | Neutral |
| 58 | 0 | NA | Neutral |
| 59 | 0 | NA | Neutral |
| 60 | 0 | NA | Neutral |
| 61 | 20 | NA | Positive |
| 62 | 18 | NA | Negative |
| 63 | 18 | NA | Negative |
| 64 | 23 | NA | Negative |
| 65 | 25 | NA | Negative |
| 66 | 26 | NA | Negative |
ggplot(data=sentiment_df,
aes(x=ave_sentiment,fill=ave_sentiment))+geom_bar()
I would like to see if it produce different result using NRC lexicon
x <- tibble (txt=df$summary)
x <-x %>% unnest_tokens(word,txt)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
y <-join(x,get_sentiments("nrc"),type="inner")
## Joining by: word
y
## word sentiment
## 1 time anticipation
## 2 undo negative
## 3 painful anger
## 4 painful disgust
## 5 painful fear
## 6 painful negative
## 7 painful sadness
## 8 love joy
## 9 love positive
## 10 doctor positive
## 11 doctor trust
## 12 king positive
## 13 sequel anticipation
## 14 shining anticipation
## 15 shining joy
## 16 shining positive
## 17 doctor positive
## 18 doctor trust
## 19 boy disgust
## 20 boy negative
## 21 intuitive positive
## 22 shining anticipation
## 23 shining joy
## 24 shining positive
## 25 job positive
## 26 haunted fear
## 27 haunted negative
## 28 haunted sadness
## 29 amusement joy
## 30 amusement positive
## 31 small negative
## 32 tyrant anger
## 33 tyrant disgust
## 34 tyrant fear
## 35 tyrant negative
## 36 tyrant sadness
## 37 pet negative
## 38 status positive
## 39 chart trust
## 40 pop negative
## 41 pop surprise
## 42 revival anticipation
## 43 revival joy
## 44 revival positive
## 45 revival trust
## 46 boy disgust
## 47 boy negative
## 48 battle anger
## 49 battle negative
## 50 king positive
## 51 fanaticism fear
## 52 king positive
## 53 scare anger
## 54 scare anticipation
## 55 scare fear
## 56 scare negative
## 57 scare surprise
## 58 monster fear
## 59 monster negative
## 60 writer positive
## 61 horror anger
## 62 horror disgust
## 63 horror fear
## 64 horror negative
## 65 horror sadness
## 66 horror surprise
## 67 succinct positive
## 68 king positive
## 69 start anticipation
## 70 horror anger
## 71 horror disgust
## 72 horror fear
## 73 horror negative
## 74 horror sadness
## 75 horror surprise
## 76 reason positive
## 77 interesting positive
## 78 sinister anger
## 79 sinister disgust
## 80 sinister fear
## 81 sinister negative
## 82 king positive
## 83 king positive
## 84 crack negative
## 85 police fear
## 86 police positive
## 87 police trust
## 88 suspense anticipation
## 89 suspense fear
## 90 suspense surprise
## 91 outsider fear
## 92 routine positive
## 93 routine trust
## 94 police fear
## 95 police positive
## 96 police trust
## 97 long anticipation
## 98 sinister anger
## 99 sinister disgust
## 100 sinister fear
## 101 sinister negative
## 102 slim positive
## 103 elevation anticipation
## 104 elevation fear
## 105 elevation joy
## 106 elevation positive
## 107 elevation trust
## 108 rock positive
## 109 prejudice anger
## 110 prejudice negative
## 111 king positive
## 112 special joy
## 113 special positive
## 114 cultivated positive
## 115 nightmare fear
## 116 nightmare negative
## 117 terror fear
## 118 terror negative
## 119 evil anger
## 120 evil disgust
## 121 evil fear
## 122 evil negative
## 123 evil sadness
y_df<- setDF(y)
get_sentiment_class <- function(sentiment){
if (sentiment < 0){
sentiment_class = "Negative"
}
else if (sentiment >=0 && sentiment<=0.01){
sentiment_class = "Neutral"
}
else{
sentiment_class="Positive"
}
sentiment_class
}
y_df$sentiment <-
sapply(y_df$sentiment,get_sentiment_class)
y_df
## word sentiment
## 1 time Positive
## 2 undo Positive
## 3 painful Positive
## 4 painful Positive
## 5 painful Positive
## 6 painful Positive
## 7 painful Positive
## 8 love Positive
## 9 love Positive
## 10 doctor Positive
## 11 doctor Positive
## 12 king Positive
## 13 sequel Positive
## 14 shining Positive
## 15 shining Positive
## 16 shining Positive
## 17 doctor Positive
## 18 doctor Positive
## 19 boy Positive
## 20 boy Positive
## 21 intuitive Positive
## 22 shining Positive
## 23 shining Positive
## 24 shining Positive
## 25 job Positive
## 26 haunted Positive
## 27 haunted Positive
## 28 haunted Positive
## 29 amusement Positive
## 30 amusement Positive
## 31 small Positive
## 32 tyrant Positive
## 33 tyrant Positive
## 34 tyrant Positive
## 35 tyrant Positive
## 36 tyrant Positive
## 37 pet Positive
## 38 status Positive
## 39 chart Positive
## 40 pop Positive
## 41 pop Positive
## 42 revival Positive
## 43 revival Positive
## 44 revival Positive
## 45 revival Positive
## 46 boy Positive
## 47 boy Positive
## 48 battle Positive
## 49 battle Positive
## 50 king Positive
## 51 fanaticism Positive
## 52 king Positive
## 53 scare Positive
## 54 scare Positive
## 55 scare Positive
## 56 scare Positive
## 57 scare Positive
## 58 monster Positive
## 59 monster Positive
## 60 writer Positive
## 61 horror Positive
## 62 horror Positive
## 63 horror Positive
## 64 horror Positive
## 65 horror Positive
## 66 horror Positive
## 67 succinct Positive
## 68 king Positive
## 69 start Positive
## 70 horror Positive
## 71 horror Positive
## 72 horror Positive
## 73 horror Positive
## 74 horror Positive
## 75 horror Positive
## 76 reason Positive
## 77 interesting Positive
## 78 sinister Positive
## 79 sinister Positive
## 80 sinister Positive
## 81 sinister Positive
## 82 king Positive
## 83 king Positive
## 84 crack Positive
## 85 police Positive
## 86 police Positive
## 87 police Positive
## 88 suspense Positive
## 89 suspense Positive
## 90 suspense Positive
## 91 outsider Positive
## 92 routine Positive
## 93 routine Positive
## 94 police Positive
## 95 police Positive
## 96 police Positive
## 97 long Positive
## 98 sinister Positive
## 99 sinister Positive
## 100 sinister Positive
## 101 sinister Positive
## 102 slim Positive
## 103 elevation Positive
## 104 elevation Positive
## 105 elevation Positive
## 106 elevation Positive
## 107 elevation Positive
## 108 rock Positive
## 109 prejudice Positive
## 110 prejudice Positive
## 111 king Positive
## 112 special Positive
## 113 special Positive
## 114 cultivated Positive
## 115 nightmare Positive
## 116 nightmare Positive
## 117 terror Positive
## 118 terror Positive
## 119 evil Positive
## 120 evil Positive
## 121 evil Positive
## 122 evil Positive
## 123 evil Positive
ggplot(data=y_df,aes(x=sentiment,fill=sentiment))+geom_bar()
As result show above, sentimentr package is produce better result since it attempts to take into account valence shifters (i.e., negators, amplifiers (intensifiers), de-amplifiers (downtoners), and adversative conjunctions) while maintaining speed. Simply put, sentimentr is an augmented dictionary lookup