Sentiment Analysis on Twitter Data

The following code performs sentiment analysis on a CSV file containing tweets. We’ll use the tidytext package and the bing sentiment lexicon for this purpose.

# Install and Load Libraries
#options(repos = c(CRAN = "https://cloud.r-project.org/"))
#install.packages(c("tidyverse", "textdata", "tidytext"))
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.0.9     ✔ readr     2.1.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.1.7
## ✔ lubridate 1.8.0     ✔ tidyr     1.2.0
## ✔ purrr     0.3.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the [conflicted package](http://conflicted.r-lib.org/) to force all conflicts to become errors
library(textdata)
## Warning: package 'textdata' was built under R version 4.2.3
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.2.3
# Load tweets from a CSV file into a dataframe
tweets_data <- read.csv("tweets.csv")

# Convert the tweets column to a character vector to ensure text processing compatibility
tweets_data$text <- as.character(tweets_data$text)

# Fetch the "bing" sentiment lexicon. 
bing_lexicon <- get_sentiments("bing")

# Process tweets to determine sentiment
tweets_sentiment <- tweets_data %>%
  unnest_tokens(word, text) %>%  
  inner_join(bing_lexicon) %>%
  count(tweet_id, sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = ifelse(positive > negative, "positive", 
                            ifelse(positive < negative, "negative", "neutral")))
## Joining, by = "word"
# Display the first few rows of the resulting data to inspect the sentiment determination
head(tweets_sentiment)
##     tweet_id negative positive sentiment
## 1    tweet_1        0        1  positive
## 2   tweet_10        1        0  negative
## 3 tweet_1001        0        1  positive
## 4 tweet_1002        0        2  positive
## 5 tweet_1003        1        0  negative
## 6 tweet_1004        0        1  positive
# Visualization
tweets_sentiment %>%
  group_by(sentiment) %>%
  summarise(count = n()) %>%
  ggplot(aes(x=sentiment, y=count)) +
  geom_bar(stat="identity", fill="steelblue") +
  ggtitle("Sentiment Distribution of Tweets") +
  xlab("Sentiment") + ylab("Number of Tweets")