1️⃣ Setup and Install Required Packages
# Install Required Packages
target_packages <- c("tidyverse", "tidytext", "tm", "SentimentAnalysis", "ggplot2", "readr", "quantmod", "tokenizers")
missing_packages <- target_packages[!(target_packages %in% installed.packages()[,"Package"])]
if(length(missing_packages)) install.packages(missing_packages)
# Load Required Libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytext)
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
##
## The following object is masked from 'package:ggplot2':
##
## annotate
library(SentimentAnalysis)
##
## Attaching package: 'SentimentAnalysis'
##
## The following object is masked from 'package:base':
##
## write
library(ggplot2)
library(readr)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tokenizers)
2️⃣ Load and Preprocess FOMC Text Data
# Function to Read Text Files with UTF-8 Encoding
read_text_files <- function(path) {
files <- list.files(path, pattern = "\\.txt$", full.names = TRUE)
texts <- tibble(
file = basename(files),
content = map_chr(files, ~ read_file(.x, locale = locale(encoding = "UTF-8")))
)
return(texts)
}
# Load FOMC Statements from the same directory as the R Markdown file
fomc_statements <- read_text_files("./")
# Convert Text Encoding to UTF-8 and Ensure Character Format
fomc_statements$content <- iconv(fomc_statements$content, from = "latin1", to = "UTF-8", sub = "")
fomc_texts <- as.character(fomc_statements$content)
fomc_df <- data.frame(text = fomc_texts, stringsAsFactors = FALSE)
3️⃣ Text Cleaning: Tokenization &
Preprocessing
# Tokenization and Removing Stopwords
fomc_tokens <- fomc_df %>%
unnest_tokens(word, text) %>%
anti_join(get_stopwords())
## Joining with `by = join_by(word)`
# Removing Non-Alphabetic Characters
fomc_tokens <- fomc_tokens %>% filter(!str_detect(word, "\\d"))
# Perform Stemming (Optional: Replace with Lemmatization if Needed)
fomc_tokens <- fomc_tokens %>% mutate(word = SnowballC::wordStem(word))
# Create Document-Term Matrix (DTM)
# Ensure a document ID column exists
fomc_tokens <- fomc_tokens %>%
mutate(doc_id = row_number()) # Assign unique document IDs
# Create Document-Term Matrix (DTM)
dtm <- fomc_tokens %>%
count(doc_id, word) %>%
cast_dtm(document = doc_id, term = word, value = n)
# Count word frequencies
top_words <- fomc_tokens %>%
count(word, sort = TRUE) %>%
top_n(20, n) # Select top 20 words
# Plot Top 20 Words
ggplot(top_words, aes(x = reorder(word, n), y = n)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() + # Flip for better readability
labs(title = "Top 20 Most Frequent Words in FOMC Statements",
x = "Word",
y = "Frequency") +
theme_minimal()

4️⃣ Sentiment Analysis: LM & GI
Dictionaries
# Load Sentiment Dictionaries
dict_lm <- loadDictionaryLM()
dict_gi <- loadDictionaryGI()
# Run Sentiment Analysis on FOMC Statements
sentiment_scores <- analyzeSentiment(fomc_df$text)
# Convert List to DataFrame if Needed
if (is.list(sentiment_scores)) {
sentiment_scores <- do.call(rbind, lapply(sentiment_scores, as.data.frame))
}
if (!is.matrix(sentiment_scores)) {
sentiment_scores <- as.matrix(sentiment_scores)
}
5️⃣ Visualizing Sentiment Trends
# Extract Date from File Names (Ensure Correct Format)
fomc_statements <- fomc_statements %>%
mutate(date = as.Date(str_extract(file, "\\d{8}"), format = "%Y%m%d")) # Adjusted to match your filenames
# Ensure sentiment_scores is a dataframe
sentiment_scores <- as.data.frame(sentiment_scores)
# Rename column correctly
colnames(sentiment_scores) <- "SentimentLM"
# Aggregate Sentiment Scores to Match 81 Documents
sentiment_aggregated <- sentiment_scores %>%
mutate(doc_id = rep(1:nrow(fomc_statements), length.out = nrow(sentiment_scores))) %>% # Assign document IDs
group_by(doc_id) %>%
summarize(SentimentLM = mean(SentimentLM, na.rm = TRUE)) # Average per document
# Ensure correct row order before merging
sentiment_aggregated <- sentiment_aggregated %>%
arrange(doc_id)
# Merge with fomc_statements ensuring same row count
sentiment_data <- fomc_statements %>%
mutate(sentiment = sentiment_aggregated$SentimentLM)
# Debugging: Check structure before plotting
print(head(sentiment_data))
## # A tibble: 6 × 4
## file content date sentiment
## <chr> <chr> <date> <dbl>
## 1 20080122.txt "The Federal Open Market Committee has deci… 2008-01-22 10.8
## 2 20080130.txt "The Federal Open Market Committee decided … 2008-01-30 11.0
## 3 20080311.txt "Since the coordinated actions taken in Dec… 2008-03-11 14.7
## 4 20080318.txt "The Federal Open Market Committee decided … 2008-03-18 12.7
## 5 20080430.txt "The Federal Open Market Committee decided … 2008-04-30 13.6
## 6 20080625.txt "The Federal Open Market Committee decided … 2008-06-25 11.2
# Plot Sentiment Trends
ggplot(sentiment_data, aes(x = date, y = sentiment)) +
geom_line(color = "blue") +
geom_point(color = "red") + # Adds points to verify data
labs(title = "FOMC Statement Sentiment Over Time",
x = "Date",
y = "Sentiment Score") +
theme_minimal()

## **6️⃣ Discussion & Conclusion**
``` r
# Summary Statistics
summary(sentiment_scores)
## SentimentLM
## Min. : -0.0703
## 1st Qu.: 0.0279
## Median : 0.0596
## Mean : 23.0332
## 3rd Qu.: 0.1339
## Max. :560.0000
# Discussion
cat("The sentiment analysis of FOMC statements shows a trend in tone over time, which can be linked to macroeconomic conditions, monetary policy changes, and financial market reactions. A more positive sentiment may correspond with expansionary policies, while a negative sentiment could indicate economic concerns or tightening measures.")
## The sentiment analysis of FOMC statements shows a trend in tone over time, which can be linked to macroeconomic conditions, monetary policy changes, and financial market reactions. A more positive sentiment may correspond with expansionary policies, while a negative sentiment could indicate economic concerns or tightening measures.