LOADING
BASE
tidy_books <- austen_books() %>%
group_by(book) %>%
mutate(
linenumber = row_number(),
chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup() %>%
unnest_tokens(word, text)
# Look at the data
print(head(tidy_books))
## # A tibble: 6 × 4
## book linenumber chapter word
## <fct> <int> <int> <chr>
## 1 Sense & Sensibility 1 0 sense
## 2 Sense & Sensibility 1 0 and
## 3 Sense & Sensibility 1 0 sensibility
## 4 Sense & Sensibility 3 0 by
## 5 Sense & Sensibility 3 0 jane
## 6 Sense & Sensibility 3 0 austen
SCRAPING
afinn <- tryCatch({
get_sentiments("afinn")
}, error = function(e) {
message("Error in loading the AFINN lexicon: ", e$message)
NULL
})
## Error in loading the AFINN lexicon: The textdata package is required to download the AFINN lexicon.
## Install the textdata package to access this dataset.
if (!is.null(afinn)) {
print("AFINN lexicon (first 6 rows):")
head(afinn)
} else {
message("AFINN lexicon not available.")
}
## AFINN lexicon not available.
ANALYZING
tidy_tweets <- tibble(
id = 1:6,
text = c(
"I love the new movie! Absolutely fantastic and thrilling.",
"The weather is gloomy. I feel so depressed and sad.",
"What an amazing day; everything is going great.",
"Totally disappointed by the service. Would not recommend.",
"Feeling happy and blessed today.",
"This is the worst experience ever. Completely awful!"
)
) %>%
unnest_tokens(word, text)
bing <- get_sentiments("bing")
head(bing)
## # A tibble: 6 × 2
## word sentiment
## <chr> <chr>
## 1 2-faces negative
## 2 abnormal negative
## 3 abolish negative
## 4 abominable negative
## 5 abominably negative
## 6 abominate negative
tweet_sentiment <- tidy_tweets %>%
inner_join(bing, by = "word") %>%
count(id, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(net_sentiment = positive - negative)
print(tweet_sentiment)
## # A tibble: 6 × 4
## id positive negative net_sentiment
## <int> <int> <int> <int>
## 1 1 3 0 3
## 2 2 0 3 -3
## 3 3 2 0 2
## 4 4 1 1 0
## 5 5 1 0 1
## 6 6 0 2 -2
VISUALIZING
ggplot(tweet_sentiment, aes(x = factor(id), y = net_sentiment, fill = net_sentiment > 0)) +
geom_bar(stat = "identity") +
labs(title = "Net Sentiment of Tweets (Bing Lexicon)",
x = "Tweet ID", y = "Net Sentiment") +
scale_fill_manual(values = c("red", "green"), guide = FALSE)
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
