options(digits = 3, scipen = 99999)
remove(list = ls())
graphics.off()
suppressWarnings({
suppressPackageStartupMessages({
library(markovchain)
library(tidyverse)
library(quantmod)
library(tsbox)
library(vars)
})
})
# Fetch three alternative economic variables from FRED
getSymbols(c("PAYEMS", "CPIAUCSL", "RETAILSMSA"),
freq = "monthly",
src = "FRED", return.class = 'xts',
index.class = 'Date',
from = "2010-01-01",
to = Sys.Date(),
periodicity = "monthly")
## [1] "PAYEMS" "CPIAUCSL" "RETAILSMSA"
# Convert to monthly data
payems <- to.monthly(PAYEMS)[, 4]
cpi <- to.monthly(CPIAUCSL)[, 4]
retail <- to.monthly(RETAILSMSA)[, 4]
# Subset data (for aesthetic purposes)
payems_ss <- payems["2010-01-31/2024-09-01"] |> ts_ts()
cpi_ss <- cpi["2010-01-31/2024-09-01"]|> ts_ts()
retail_ss <- retail["2010-01-31/2024-09-01"]|> ts_ts()
# Assemble data into a single dataframe
mydata <- cbind.data.frame(payems_ss, cpi_ss, retail_ss)
colnames(mydata) <- c("payems", "cpi", "retail")
head(mydata, 3)
# Compute first differences and handle missing values
mydf <- mydata %>%
mutate(payemsD1 = tsibble::difference(payems, differences = 1),
cpiD1 = tsibble::difference(cpi, differences = 1),
retailD1 = tsibble::difference(retail, differences = 1)) %>%
dplyr::select(c(payemsD1, cpiD1, retailD1)) %>%
na.omit()
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
# Check if `mydf` has rows and columns
if (nrow(mydf) == 0 || ncol(mydf) == 0) {
stop("The transformed dataset `mydf` is empty. Check the data processing steps for potential issues.")
}
# Convert to up (+1), down (-1), or no change (0)
mydf_mat <- apply(mydf, 2, sign)
head(mydf_mat, 3)
## payemsD1 cpiD1 retailD1
## 2 1 1 1
## 3 1 1 1
## 4 1 -1 -1
# Calculate diffusion index
pos <- apply(mydf_mat, 1, function(row) sum(row > 0))
neg <- apply(mydf_mat, 1, function(row) sum(row < 0))
tot <- pos + neg
index <- (pos / tot - neg / tot) * 100
# Apply a moving average for smoothing
ma_index <- zoo::rollmean(index, 7, align = "right")
## Visualization of Diffusion Index
Date <- seq.Date(from = as.Date("2010-05-01"), length.out = length(ma_index), by = "month")
data_viz <- cbind.data.frame(Date, ma_index)
# Plot the diffusion index
ggplot(data_viz, aes(x = Date, y = ma_index)) +
geom_line(color = "blue") +
geom_smooth(color = "red", method = "loess") +
labs(title = "Diffusion Index for U.S. Economy",
x = "Date",
y = "Index Value") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

#the graph represents the diffusion index constructed using three key economic variables: PAYEMS (Nonfarm Payrolls), CPIAUCSL (Consumer Price Index), and RETAILSMSA (Retail Sales). The blue line shows the raw diffusion index, while the red LOESS-smoothed curve highlights long-term trends.
#The smoothed diffusion index (using a 7-period moving average) provides clearer visibility into the broader trends, filtering out noise.
#The constructed index suggests positive momentum for the U.S. economy, as indicated by a positive value in the most recent observation. Specifically, the tail-end value of the smoothed index is positive
## Comparison with CFNAI Diffusion Index
# Fetch CFNAI Diffusion Index
getSymbols("CFNAIDIFF", src = "FRED", from = "2010-01-01", to = "2024-09-01", return.class = 'xts')
## [1] "CFNAIDIFF"
cfnaidi <- CFNAIDIFF["2010-05-01/2024-09-01"] |> ts_ts()
cfnaidi <- cfnaidi[-c(1,2,3,4)]
# Align lengths of both indices
comparison_data <- data.frame(
Date = Date,
MyIndex = ma_index[1:length(cfnaidi)],
CFNAI = cfnaidi
)
# Calculate correlation
correlation <- cor(comparison_data$MyIndex, comparison_data$CFNAI, use = "complete.obs")
correlation
## [1] 0.257
# Plot the indices side by side
ggplot(comparison_data, aes(x = Date)) +
geom_line(aes(y = MyIndex, color = "My Index")) +
geom_line(aes(y = CFNAI, color = "CFNAI Diffusion Index")) +
labs(title = "Comparison of Diffusion Indices",
x = "Date",
y = "Index Value") +
scale_color_manual(name = "Legend", values = c("My Index" = "blue", "CFNAI Diffusion Index" = "green")) +
theme_minimal()

# this graph compares the constructed diffusion index (blue line) with the CFNAI (Chicago Fed National Activity Index) Diffusion Index (green line) over the same period. This comparison evaluates the validity of the constructed index as a proxy for macroeconomic trends.
#Both indices generally move in the same direction, reflecting broad economic conditions.
#However, the correlation between the two indices is calculated as 0.257, suggesting a low to moderate relationship. This indicates that while there is some alignment, differences in methodology and variables result in divergence during certain periods.