Lexicon Sentiment Analysis

LOADING

BASE

tidy_books <- austen_books() %>%
  group_by(book) %>%
  mutate(
    linenumber = row_number(),
    chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", 
                                           ignore_case = TRUE)))) %>%
  ungroup() %>%
  unnest_tokens(word, text)

# Look at the data
print(head(tidy_books))

## # A tibble: 6 × 4
##   book                linenumber chapter word       
##   <fct>                    <int>   <int> <chr>      
## 1 Sense & Sensibility          1       0 sense      
## 2 Sense & Sensibility          1       0 and        
## 3 Sense & Sensibility          1       0 sensibility
## 4 Sense & Sensibility          3       0 by         
## 5 Sense & Sensibility          3       0 jane       
## 6 Sense & Sensibility          3       0 austen

SCRAPING

afinn <- tryCatch({  
  get_sentiments("afinn")  
}, error = function(e) {  
  message("Error in loading the AFINN lexicon: ", e$message)  
  NULL  
})

## Error in loading the AFINN lexicon: The textdata package is required to download the AFINN lexicon.
## Install the textdata package to access this dataset.

if (!is.null(afinn)) {  
  print("AFINN lexicon (first 6 rows):")  
  head(afinn)  
} else {  
  message("AFINN lexicon not available.")  
}

## AFINN lexicon not available.

ANALYZING

tidy_tweets <- tibble(  
  id = 1:6,  
  text = c(  
    "I love the new movie! Absolutely fantastic and thrilling.",  
    "The weather is gloomy. I feel so depressed and sad.",  
    "What an amazing day; everything is going great.",  
    "Totally disappointed by the service. Would not recommend.",  
    "Feeling happy and blessed today.",  
    "This is the worst experience ever. Completely awful!"  
  )  
) %>%  
  unnest_tokens(word, text)  
  
bing <- get_sentiments("bing")  
head(bing)

## # A tibble: 6 × 2
##   word       sentiment
##   <chr>      <chr>    
## 1 2-faces    negative 
## 2 abnormal   negative 
## 3 abolish    negative 
## 4 abominable negative 
## 5 abominably negative 
## 6 abominate  negative

tweet_sentiment <- tidy_tweets %>%  
  inner_join(bing, by = "word") %>%  
  count(id, sentiment) %>%  
  pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%  
  mutate(net_sentiment = positive - negative)  

print(tweet_sentiment)

## # A tibble: 6 × 4
##      id positive negative net_sentiment
##   <int>    <int>    <int>         <int>
## 1     1        3        0             3
## 2     2        0        3            -3
## 3     3        2        0             2
## 4     4        1        1             0
## 5     5        1        0             1
## 6     6        0        2            -2

VISUALIZING

ggplot(tweet_sentiment, aes(x = factor(id), y = net_sentiment, fill = net_sentiment > 0)) +  
  geom_bar(stat = "identity") +  
  labs(title = "Net Sentiment of Tweets (Bing Lexicon)",  
       x = "Tweet ID", y = "Net Sentiment") +  
  scale_fill_manual(values = c("red", "green"), guide = FALSE)

## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Lexicon Sentiment Analysis

Stefan Huber

LOADING

BASE

SCRAPING

ANALYZING

VISUALIZING