author: “GEETHIKA SAI YALAMANCHI”

suppressWarnings({
  suppressPackageStartupMessages({
    library(tidyverse)
    library(quantmod)
    library(tsbox)
    library(zoo)
  })
})

options(digits = 3, scipen = 99999)

getSymbols(c("PAYEMS", "INDPRO", "UMCSENT"), 
           src = "FRED", return.class = 'xts', 
           from = "2010-01-01", to = Sys.Date())
## [1] "PAYEMS"  "INDPRO"  "UMCSENT"
employment <- PAYEMS
industrial_production <- INDPRO
consumer_sentiment <- UMCSENT
economic_data <- merge(employment, industrial_production, consumer_sentiment)
colnames(economic_data) <- c("Employment", "Industrial_Production", "Consumer_Sentiment")

diffusion_data <- diff(economic_data) |> na.omit()

diffusion_data <- apply(diffusion_data, 2, function(x) {
  ifelse(x > 0, 1, ifelse(x < 0, -1, 0))
})

diffusion_index <- rowSums(diffusion_data) / ncol(diffusion_data) * 100
smoothed_index <- rollmean(diffusion_index, k = 7, align = "right", fill = NA)

dates <- index(diffusion_data)
plot_data <- data.frame(Date = dates, Diffusion_Index = smoothed_index)
ggplot(plot_data, aes(x = Date, y = Diffusion_Index)) +
  geom_line(color = "blue") +
  geom_smooth(method = "loess", color = "red", se = FALSE) +
  labs(title = "Diffusion Index for U.S. Economic Variables",
       x = "Date", y = "Diffusion Index (%)",
       caption = "Data Source: FRED") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).

getSymbols("CFNAIDIFF", src = "FRED", return.class = 'xts', from = "2010-01-01")
## [1] "CFNAIDIFF"
cfnaidiff <- CFNAIDIFF |> ts_ts()

min_length <- min(length(smoothed_index), length(cfnaidiff))
smoothed_index <- smoothed_index[1:min_length]
cfnaidiff <- cfnaidiff[1:min_length]
aligned_dates <- dates[1:min_length]

comparison_data <- data.frame(
  Date = aligned_dates,
  Diffusion_Index = smoothed_index,
  CFNAIDIFF = as.numeric(cfnaidiff)
)

correlation <- cor(comparison_data$Diffusion_Index, comparison_data$CFNAIDIFF, use = "complete.obs")
print(paste("Correlation Coefficient:", round(correlation, 3)))
## [1] "Correlation Coefficient: 0.344"
ggplot(comparison_data, aes(x = Date)) +
  geom_line(aes(y = Diffusion_Index, color = "Diffusion Index")) +
  geom_line(aes(y = CFNAIDIFF, color = "CFNAIDIFF")) +
  labs(title = "Comparison of Diffusion Indices",
       subtitle = paste("Correlation Coefficient:", round(correlation, 3)),
       x = "Date", y = "Index",
       color = "Index") +
  theme_minimal()
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).