###Installing the required libraries

suppressWarnings({
  suppressPackageStartupMessages({
    library(markovchain)
library(tidyverse)
library(quantmod)
library(tsbox)
library(TSstudio)
library(vars)
  })
})

###Importing the Data

#Retrieve data from FRED using getSymbols
# Fetch three economic variables: Unemployment Rate (UNRATE), Industrial Production (INDPRO), Retail Sales (RSAFS)

getSymbols(c("UNRATE", "INDPRO", "RSAFS"), src = "FRED")
## [1] "UNRATE" "INDPRO" "RSAFS"
# Convert to data frames
unrate <- data.frame(Date = index(UNRATE), Value = coredata(UNRATE))
indpro <- data.frame(Date = index(INDPRO), Value = coredata(INDPRO))
rsafs <- data.frame(Date = index(RSAFS), Value = coredata(RSAFS))

##Combine all three datasets into a single data frame

economic_data <- reduce(list(unrate, indpro, rsafs), 
                        function(x, y) merge(x, y, by = "Date", all = TRUE))
 ##Rename columns for clarity
colnames(economic_data) <- c("Date", "UNRATE", "INDPRO", "RSAFS")

##Calculate the Diffusion Index

# Determine month-to-month changes for each variable
# Assign 1 for increases, -1 for decreases, and 0 for no change
diffusion_index <- economic_data %>%
  mutate(
    UNRATE_change = ifelse(UNRATE > lag(UNRATE), 1, ifelse(UNRATE < lag(UNRATE), -1, 0)),
    INDPRO_change = ifelse(INDPRO > lag(INDPRO), 1, ifelse(INDPRO < lag(INDPRO), -1, 0)),
    RSAFS_change = ifelse(RSAFS > lag(RSAFS), 1, ifelse(RSAFS < lag(RSAFS), -1, 0))
  ) %>%
  rowwise() %>%
  mutate(Diffusion = sum(c(UNRATE_change, INDPRO_change, RSAFS_change), na.rm = TRUE) / 3 * 100) %>%
  dplyr::select(Date, Diffusion)

##Retrieve CFNAIDIFF data # Fetch the Chicago Fed National Activity Index: Diffusion Index from FRED

getSymbols("CFNAIDIFF", src = "FRED")
## [1] "CFNAIDIFF"
cfnaidiff <- data.frame(Date = index(CFNAIDIFF), CFNAIDIFF = coredata(CFNAIDIFF))#Converting CFNAIDIFF to a data frame

#Merge your Diffusion Index with CFNAIDIFF # Combine both indices by Date for comparison

merged_data <- merge(diffusion_index, cfnaidiff, by = "Date", all = TRUE)

# Calculate the correlation coefficient between the two indices
correlation <- cor(merged_data$Diffusion, merged_data$CFNAIDIFF, use = "complete.obs")
print(paste("Correlation coefficient:", correlation))
## [1] "Correlation coefficient: 0.192576848009578"

##GGPLOT

ggplot(data = merged_data, aes(x = Date)) +
  geom_line(aes(y = Diffusion, color = "Your Index")) +
  geom_line(aes(y = CFNAIDIFF, color = "CFNAIDIFF")) +
  labs(title = "Comparison of Diffusion Indices",
       x = "Date",
       y = "Value") +
  scale_color_manual(values = c("Your Index" = "blue", "CFNAIDIFF" = "green")) +
  theme_minimal()
## Warning: Removed 581 rows containing missing values or values outside the scale range
## (`geom_line()`).

##Conclusion
#A strong positive correlation indicates that the two indices behave similarly, reflecting similar economic trends.