Sentiment Analysis of FOMC Statements

1️⃣ Setup and Install Required Packages

# Install Required Packages
target_packages <- c("tidyverse", "tidytext", "tm", "SentimentAnalysis", "ggplot2", "readr", "quantmod", "tokenizers")
missing_packages <- target_packages[!(target_packages %in% installed.packages()[,"Package"])]
if(length(missing_packages)) install.packages(missing_packages)

# Load Required Libraries
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidytext)
library(tm)

## Loading required package: NLP
## 
## Attaching package: 'NLP'
## 
## The following object is masked from 'package:ggplot2':
## 
##     annotate

library(SentimentAnalysis)

## 
## Attaching package: 'SentimentAnalysis'
## 
## The following object is masked from 'package:base':
## 
##     write

library(ggplot2)
library(readr)
library(quantmod)

## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(tokenizers)

2️⃣ Load and Preprocess FOMC Text Data

# Function to Read Text Files with UTF-8 Encoding
read_text_files <- function(path) {
  files <- list.files(path, pattern = "\\.txt$", full.names = TRUE)
  texts <- tibble(
    file = basename(files),
    content = map_chr(files, ~ read_file(.x, locale = locale(encoding = "UTF-8")))
  )
  return(texts)
}

# Load FOMC Statements from the same directory as the R Markdown file
fomc_statements <- read_text_files("./")

# Convert Text Encoding to UTF-8 and Ensure Character Format
fomc_statements$content <- iconv(fomc_statements$content, from = "latin1", to = "UTF-8", sub = "")
fomc_texts <- as.character(fomc_statements$content)
fomc_df <- data.frame(text = fomc_texts, stringsAsFactors = FALSE)

3️⃣ Text Cleaning: Tokenization & Preprocessing

# Tokenization and Removing Stopwords
fomc_tokens <- fomc_df %>%
  unnest_tokens(word, text) %>%
  anti_join(get_stopwords())

## Joining with `by = join_by(word)`

# Removing Non-Alphabetic Characters
fomc_tokens <- fomc_tokens %>% filter(!str_detect(word, "\\d"))

# Perform Stemming (Optional: Replace with Lemmatization if Needed)
fomc_tokens <- fomc_tokens %>% mutate(word = SnowballC::wordStem(word))

# Create Document-Term Matrix (DTM)
# Ensure a document ID column exists
fomc_tokens <- fomc_tokens %>%
  mutate(doc_id = row_number())  # Assign unique document IDs

# Create Document-Term Matrix (DTM)
dtm <- fomc_tokens %>%
  count(doc_id, word) %>%
  cast_dtm(document = doc_id, term = word, value = n)
# Count word frequencies
top_words <- fomc_tokens %>%
  count(word, sort = TRUE) %>%
  top_n(20, n)  # Select top 20 words

# Plot Top 20 Words
ggplot(top_words, aes(x = reorder(word, n), y = n)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +  # Flip for better readability
  labs(title = "Top 20 Most Frequent Words in FOMC Statements",
       x = "Word",
       y = "Frequency") +
  theme_minimal()

4️⃣ Sentiment Analysis: LM & GI Dictionaries

# Load Sentiment Dictionaries
dict_lm <- loadDictionaryLM()
dict_gi <- loadDictionaryGI()

# Run Sentiment Analysis on FOMC Statements
sentiment_scores <- analyzeSentiment(fomc_df$text)

# Convert List to DataFrame if Needed
if (is.list(sentiment_scores)) {
  sentiment_scores <- do.call(rbind, lapply(sentiment_scores, as.data.frame))
}
if (!is.matrix(sentiment_scores)) {
  sentiment_scores <- as.matrix(sentiment_scores)
}

5️⃣ Visualizing Sentiment Trends

# Extract Date from File Names (Ensure Correct Format)
fomc_statements <- fomc_statements %>%
  mutate(date = as.Date(str_extract(file, "\\d{8}"), format = "%Y%m%d"))  # Adjusted to match your filenames

# Ensure sentiment_scores is a dataframe
sentiment_scores <- as.data.frame(sentiment_scores)

# Rename column correctly
colnames(sentiment_scores) <- "SentimentLM"

# Aggregate Sentiment Scores to Match 81 Documents
sentiment_aggregated <- sentiment_scores %>%
  mutate(doc_id = rep(1:nrow(fomc_statements), length.out = nrow(sentiment_scores))) %>%  # Assign document IDs
  group_by(doc_id) %>%
  summarize(SentimentLM = mean(SentimentLM, na.rm = TRUE))  # Average per document

# Ensure correct row order before merging
sentiment_aggregated <- sentiment_aggregated %>%
  arrange(doc_id)

# Merge with fomc_statements ensuring same row count
sentiment_data <- fomc_statements %>%
  mutate(sentiment = sentiment_aggregated$SentimentLM)

# Debugging: Check structure before plotting
print(head(sentiment_data))

## # A tibble: 6 × 4
##   file         content                                      date       sentiment
##   <chr>        <chr>                                        <date>         <dbl>
## 1 20080122.txt "The Federal Open Market Committee has deci… 2008-01-22      10.8
## 2 20080130.txt "The Federal Open Market Committee decided … 2008-01-30      11.0
## 3 20080311.txt "Since the coordinated actions taken in Dec… 2008-03-11      14.7
## 4 20080318.txt "The Federal Open Market Committee decided … 2008-03-18      12.7
## 5 20080430.txt "The Federal Open Market Committee decided … 2008-04-30      13.6
## 6 20080625.txt "The Federal Open Market Committee decided … 2008-06-25      11.2

# Plot Sentiment Trends
ggplot(sentiment_data, aes(x = date, y = sentiment)) +
  geom_line(color = "blue") +
  geom_point(color = "red") +  # Adds points to verify data
  labs(title = "FOMC Statement Sentiment Over Time",
       x = "Date",
       y = "Sentiment Score") +
  theme_minimal()

## **6️⃣ Discussion & Conclusion**

``` r
# Summary Statistics
summary(sentiment_scores)

##   SentimentLM      
##  Min.   : -0.0703  
##  1st Qu.:  0.0279  
##  Median :  0.0596  
##  Mean   : 23.0332  
##  3rd Qu.:  0.1339  
##  Max.   :560.0000

# Discussion
cat("The sentiment analysis of FOMC statements shows a trend in tone over time, which can be linked to macroeconomic conditions, monetary policy changes, and financial market reactions. A more positive sentiment may correspond with expansionary policies, while a negative sentiment could indicate economic concerns or tightening measures.")

## The sentiment analysis of FOMC statements shows a trend in tone over time, which can be linked to macroeconomic conditions, monetary policy changes, and financial market reactions. A more positive sentiment may correspond with expansionary policies, while a negative sentiment could indicate economic concerns or tightening measures.

Sentiment Analysis of FOMC Statements

Joanna

2025-02-17

1️⃣ Setup and Install Required Packages

2️⃣ Load and Preprocess FOMC Text Data

3️⃣ Text Cleaning: Tokenization & Preprocessing

4️⃣ Sentiment Analysis: LM & GI Dictionaries

5️⃣ Visualizing Sentiment Trends