The following code performs sentiment analysis on a CSV file
containing tweets. We’ll use the tidytext package and the
bing sentiment lexicon for this purpose.
# Install and Load Libraries
#options(repos = c(CRAN = "https://cloud.r-project.org/"))
#install.packages(c("tidyverse", "textdata", "tidytext"))
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.0.9 ✔ readr 2.1.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.1.7
## ✔ lubridate 1.8.0 ✔ tidyr 1.2.0
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the [conflicted package](http://conflicted.r-lib.org/) to force all conflicts to become errors
library(textdata)
## Warning: package 'textdata' was built under R version 4.2.3
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.2.3
# Load tweets from a CSV file into a dataframe
tweets_data <- read.csv("tweets.csv")
# Convert the tweets column to a character vector to ensure text processing compatibility
tweets_data$text <- as.character(tweets_data$text)
# Fetch the "bing" sentiment lexicon.
bing_lexicon <- get_sentiments("bing")
# Process tweets to determine sentiment
tweets_sentiment <- tweets_data %>%
unnest_tokens(word, text) %>%
inner_join(bing_lexicon) %>%
count(tweet_id, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = ifelse(positive > negative, "positive",
ifelse(positive < negative, "negative", "neutral")))
## Joining, by = "word"
# Display the first few rows of the resulting data to inspect the sentiment determination
head(tweets_sentiment)
## tweet_id negative positive sentiment
## 1 tweet_1 0 1 positive
## 2 tweet_10 1 0 negative
## 3 tweet_1001 0 1 positive
## 4 tweet_1002 0 2 positive
## 5 tweet_1003 1 0 negative
## 6 tweet_1004 0 1 positive
# Visualization
tweets_sentiment %>%
group_by(sentiment) %>%
summarise(count = n()) %>%
ggplot(aes(x=sentiment, y=count)) +
geom_bar(stat="identity", fill="steelblue") +
ggtitle("Sentiment Distribution of Tweets") +
xlab("Sentiment") + ylab("Number of Tweets")