We will use the FRED-MD dataset to forecast US Real Personal Income (RPI) growth. The forecast period spans from October 2015 to September 2025. We will consider three forecasting methods: ARMA, Factor models, and LASSO.
# load data
data <- read_excel("C:/Users/leesa/Desktop/FRED-MD.xlsx", col_names = FALSE)
# 1. Define indices (Out-of-Sample period: 120 months)
oos_period <- 120
n <- nrow(data)
train_idx <- 1:(n - oos_period)
test_idx <- (n - oos_period + 1):n
# 2. Prepare Y (Target) - Convert to numeric vector
y_all <- as.numeric(unlist(data[, 1]))
y_train <- y_all[train_idx]
y_test <- y_all[test_idx]
# 3. Prepare X (Features) - Convert to matrix and scale
X_all <- data.matrix(data[, -1])
X_scaled <- scale(X_all)
X_train <- X_scaled[train_idx, ]
X_test <- X_scaled[test_idx, ]
# Strategy 1: ARMA (Time Series)
arma_fit <- auto.arima(y_train)
# Strategy 2: Factor Model (PCA)
pca_train <- prcomp(X_train)
# Use top 3 Principal Components
factors_train <- pca_train$x[, 1:3]
fm_fit <- lm(y_train ~ factors_train)
# Strategy 3: LASSO (Machine Learning)
cv_lasso <- cv.glmnet(X_train, y_train, alpha = 1)
# Project test data onto the training rotation matrix
# Strategy 1: ARMA Forecast
y_hat_arma <- as.numeric(forecast(arma_fit, h = oos_period)$mean)
# Strategy 2: Factor Model Forecast
factors_test <- X_test %*% pca_train$rotation[, 1:3]
colnames(factors_test) <- colnames(factors_train)
y_hat_fm <- predict(fm_fit, newdata = as.data.frame(factors_test))
# Strategy 3: LASSO Forecast
y_hat_lasso <- as.numeric(predict(cv_lasso, newx = X_test, s = "lambda.min"))
# Out-Of-Sample MSFE
msfe_arma <- mean((y_test - y_hat_arma)^2)
msfe_fm <- mean((y_test - y_hat_fm)^2)
msfe_lasso <- mean((y_test - y_hat_lasso)^2)
results <- data.frame(
Model = c("ARMA", "Factor Model", "LASSO"),
MSFE = c(msfe_fm, msfe_lasso, msfe_arma)
)
knitr::kable(results, caption = "Out-of-Sample Performance (MSFE)")
| Model | MSFE |
|---|---|
| ARMA | 0.0009122 |
| Factor Model | 0.0004134 |
| LASSO | 0.0007236 |
# [Step 1] Cleanse Data Structures
y_true_plot <- as.numeric(y_test)
y_arma_plot <- as.numeric(y_hat_arma)
y_fm_plot <- as.numeric(y_hat_fm)
y_lasso_plot <- as.numeric(y_hat_lasso)
# [Step 2] Strict Length Alignment
expected_len <- length(y_true_plot)
actual_len <- min(length(y_true_plot), length(y_arma_plot),
length(y_fm_plot), length(y_lasso_plot))
y_true_plot <- tail(y_true_plot, actual_len)
y_arma_plot <- tail(y_arma_plot, actual_len)
y_fm_plot <- tail(y_fm_plot, actual_len)
y_lasso_plot <- tail(y_lasso_plot, actual_len)
# [Step 3] Timeline Generation
dates <- seq(from = as.Date("2025-09-01"),
length.out = actual_len,
by = "-1 month") %>% rev()
# [Step 4] Execute Plotting
plot(dates, y_true_plot, type = "l", lwd = 2, col = "black", xaxt = "n",
main = "Out-of-Sample Forecast Horse Race: 2015.10 - 2025.09",
xlab = "Timeline (Year.Month)", ylab = "RPI Growth Rate",
ylim = range(c(y_true_plot, y_arma_plot, y_fm_plot, y_lasso_plot), na.rm = TRUE))
# Add Model Layers
lines(dates, y_arma_plot, col = "blue", lty = 2, lwd = 2)
lines(dates, y_fm_plot, col = "darkgreen", lwd = 2)
lines(dates, y_lasso_plot, col = "red", lwd = 2)
# X-axis
tick_indices <- round(seq(1, length(dates), length.out = 15))
axis(1, at = dates[tick_indices],
labels = format(dates[tick_indices], "%Y.%m"),
las = 2, cex.axis = 0.7)
# Add Legend
legend("topleft", legend = c("Actual", "ARMA", "Factor", "LASSO"),
col = c("black", "blue", "darkgreen", "red"), lty = c(1, 2, 1, 1),
lwd = 2, bty = "n", cex = 0.8)