1. Load Packages

required_packages <- c(
  "quantmod", "forecast", "tseries", "lmtest",
  "vars", "ggplot2", "dplyr", "tidyr", "lubridate", "knitr"
)

installed <- rownames(installed.packages())
for (p in required_packages) {
  if (!(p %in% installed)) install.packages(p)
}

library(quantmod)
library(forecast)
library(tseries)
library(lmtest)
library(vars)
library(ggplot2)
library(dplyr)
library(tidyr)
library(lubridate)
library(knitr)

2. Pull Monthly Data from FRED

symbols <- c("RSAFS", "PAYEMS", "INDPRO")
getSymbols(symbols, src = "FRED")
## [1] "RSAFS"  "PAYEMS" "INDPRO"
data_xts <- na.omit(merge(RSAFS, PAYEMS, INDPRO))
colnames(data_xts) <- c("retail_sales", "payems", "indpro")

data_df <- data.frame(
  date = index(data_xts),
  coredata(data_xts)
)

head(data_df)
##         date retail_sales payems  indpro
## 1 1992-01-01       159177 108365 61.4616
## 2 1992-02-01       159189 108312 61.8955
## 3 1992-03-01       158647 108367 62.4232
## 4 1992-04-01       159921 108516 62.8970
## 5 1992-05-01       160471 108646 63.1040
## 6 1992-06-01       161205 108706 63.1363
tail(data_df)
##           date retail_sales payems   indpro
## 405 2025-09-01       732192 158548 101.6680
## 406 2025-10-01       731051 158408 101.2075
## 407 2025-11-01       734718 158449 101.3605
## 408 2025-12-01       734717 158432 101.6781
## 409 2026-01-01       733955 158592 102.3963
## 410 2026-02-01       738366 158459 102.5510

3. Visualize the Series

data_long <- data_df %>%
  pivot_longer(-date, names_to = "series", values_to = "value")

ggplot(data_long, aes(x = date, y = value)) +
  geom_line() +
  facet_wrap(~ series, scales = "free_y", ncol = 1) +
  labs(
    title = "Monthly FRED Series",
    x = "Date",
    y = "Value"
  ) +
  theme_minimal()

4. Convert to Time Series Objects

start_year  <- year(min(data_df$date))
start_month <- month(min(data_df$date))

y_ts <- ts(
  data_df$retail_sales,
  start = c(start_year, start_month),
  frequency = 12
)

x_ts <- ts(
  cbind(
    payems = data_df$payems,
    indpro = data_df$indpro
  ),
  start = c(start_year, start_month),
  frequency = 12
)

summary(y_ts)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  158647  267897  356291  383359  465240  738366

5. Train/Test Split

h <- 24
n <- length(y_ts)

train_y <- window(y_ts, end = time(y_ts)[n - h])
test_y  <- window(y_ts, start = time(y_ts)[n - h + 1])

train_x <- window(x_ts, end = time(x_ts)[n - h])
test_x  <- window(x_ts, start = time(x_ts)[n - h + 1])

length(train_y)
## [1] 386
length(test_y)
## [1] 24

6. Helper Function for Manual Metrics

calc_test_metrics <- function(actual, predicted, train_series = NULL) {
  actual <- as.numeric(actual)
  predicted <- as.numeric(predicted)
  err <- actual - predicted

  rmse <- sqrt(mean(err^2, na.rm = TRUE))
  mae  <- mean(abs(err), na.rm = TRUE)
  mape <- mean(abs(err / actual), na.rm = TRUE) * 100

  if (!is.null(train_series)) {
    train_series <- as.numeric(train_series)
    scale_denom <- mean(abs(diff(train_series)), na.rm = TRUE)
    mase <- mae / scale_denom
  } else {
    mase <- NA_real_
  }

  c(RMSE = rmse, MAE = mae, MAPE = mape, MASE = mase)
}

7. Part I — ETS vs ARIMA

7.1 Fit ETS

fit_ets <- ets(train_y)
fit_ets
## ETS(M,A,N) 
## 
## Call:
## ets(y = train_y)
## 
##   Smoothing parameters:
##     alpha = 0.7678 
##     beta  = 0.0016 
## 
##   Initial states:
##     l = 157212.7187 
##     b = 883.6035 
## 
##   sigma:  0.0176
## 
##      AIC     AICc      BIC 
## 9014.165 9014.323 9033.944

7.2 Fit ARIMA

fit_arima <- auto.arima(
  train_y,
  seasonal = TRUE,
  stepwise = FALSE,
  approximation = FALSE
)
fit_arima
## Series: train_y 
## ARIMA(0,1,2) with drift 
## 
## Coefficients:
##           ma1      ma2      drift
##       -0.0334  -0.2424  1371.5306
## s.e.   0.0495   0.0497   282.1368
## 
## sigma^2 = 58667293:  log likelihood = -3988.17
## AIC=7984.34   AICc=7984.45   BIC=8000.15

7.3 Forecast on the Test Set

fc_ets   <- forecast(fit_ets, h = h)
fc_arima <- forecast(fit_arima, h = h)

7.4 Plot Forecasts

autoplot(window(y_ts, start = time(y_ts)[length(train_y) - 60])) +
  autolayer(fc_ets$mean, series = "ETS Forecast") +
  autolayer(fc_arima$mean, series = "ARIMA Forecast") +
  autolayer(test_y, series = "Actual Test Data") +
  labs(
    title = "ETS vs ARIMA Forecasts",
    x = "Time",
    y = "Retail Sales"
  ) +
  theme_minimal()

7.5 Accuracy Comparison

acc_ets   <- accuracy(fc_ets, test_y)
acc_arima <- accuracy(fc_arima, test_y)

baseline_results <- rbind(
  ETS   = acc_ets["Test set", c("RMSE", "MAE", "MAPE", "MASE")],
  ARIMA = acc_arima["Test set", c("RMSE", "MAE", "MAPE", "MASE")]
)

kable(round(baseline_results, 3), caption = "Test-Set Accuracy: ETS vs ARIMA")
Test-Set Accuracy: ETS vs ARIMA
RMSE MAE MAPE MASE
ETS 17105.02 15159.68 2.096 0.764
ARIMA 13793.45 12021.92 1.661 0.606

8. Part II — Dynamic Regression / ARIMAX

8.1 Linear Regression with Trend, Season, and External Regressors

train_df <- data.frame(
  y = as.numeric(train_y),
  payems = as.numeric(train_x[, "payems"]),
  indpro = as.numeric(train_x[, "indpro"]),
  trend = seq_along(train_y),
  season = factor(cycle(train_y))
)

fit_lm <- lm(y ~ trend + season + payems + indpro, data = train_df)
summary(fit_lm)
## 
## Call:
## lm(formula = y ~ trend + season + payems + indpro, data = train_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -107542   -9777    1732   12851   73570 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.347e+05  3.930e+04  -8.517 4.14e-16 ***
## trend        8.602e+02  3.399e+01  25.303  < 2e-16 ***
## season2     -9.984e+02  6.724e+03  -0.148    0.882    
## season3     -1.527e+03  6.777e+03  -0.225    0.822    
## season4     -7.793e+02  6.781e+03  -0.115    0.909    
## season5      7.663e+02  6.779e+03   0.113    0.910    
## season6      1.831e+03  6.778e+03   0.270    0.787    
## season7      1.420e+03  6.778e+03   0.210    0.834    
## season8      1.672e+03  6.778e+03   0.247    0.805    
## season9      2.809e+02  6.777e+03   0.041    0.967    
## season10     7.232e+02  6.777e+03   0.107    0.915    
## season11    -1.684e+01  6.778e+03  -0.002    0.998    
## season12    -9.272e+02  6.778e+03  -0.137    0.891    
## payems       6.737e+00  4.366e-01  15.433  < 2e-16 ***
## indpro      -4.067e+03  2.973e+02 -13.679  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27310 on 371 degrees of freedom
## Multiple R-squared:  0.961,  Adjusted R-squared:  0.9595 
## F-statistic: 652.5 on 14 and 371 DF,  p-value: < 2.2e-16

8.2 Forecast Linear Model on Test Set

test_df <- data.frame(
  payems = as.numeric(test_x[, "payems"]),
  indpro = as.numeric(test_x[, "indpro"]),
  trend = length(train_y) + seq_len(h),
  season = factor(cycle(test_y), levels = levels(train_df$season))
)

lm_pred <- predict(fit_lm, newdata = test_df)

fc_lm <- ts(
  lm_pred,
  start = start(test_y),
  frequency = frequency(test_y)
)

fc_lm
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2024                   649023.1 651933.1 652344.5 654729.5 659270.5 658591.9
## 2025 666152.9 662089.9 663112.2 665092.1 668245.9 667949.7 667140.3 668876.0
## 2026 669175.2 667511.7                                                      
##           Sep      Oct      Nov      Dec
## 2024 661636.9 664540.0 666282.8 663620.9
## 2025 668681.0 670913.0 670687.1 669230.7
## 2026

8.3 Fit ARIMAX

fit_arimax <- auto.arima(
  train_y,
  xreg = train_x,
  seasonal = TRUE,
  stepwise = FALSE,
  approximation = FALSE
)
fit_arimax
## Series: train_y 
## Regression with ARIMA(0,1,2) errors 
## 
## Coefficients:
##           ma1      ma2     drift  payems     indpro
##       -0.0175  -0.2089  822.7790  2.0176  2895.5577
## s.e.   0.0509   0.0507  238.5831  0.4210   468.3907
## 
## sigma^2 = 36105000:  log likelihood = -3893.69
## AIC=7799.39   AICc=7799.61   BIC=7823.11

8.4 Forecast ARIMAX on Test Set

fc_arimax <- forecast(fit_arimax, xreg = test_x, h = h)

8.5 Compare All Models

acc_arimax <- accuracy(fc_arimax, test_y)

lm_metrics <- calc_test_metrics(
  actual = test_y,
  predicted = fc_lm,
  train_series = train_y
)

all_results <- rbind(
  ETS    = acc_ets["Test set", c("RMSE", "MAE", "MAPE", "MASE")],
  ARIMA  = acc_arima["Test set", c("RMSE", "MAE", "MAPE", "MASE")],
  LM     = lm_metrics[c("RMSE", "MAE", "MAPE", "MASE")],
  ARIMAX = acc_arimax["Test set", c("RMSE", "MAE", "MAPE", "MASE")]
)

kable(round(all_results, 3), caption = "Test-Set Accuracy: All Models")
Test-Set Accuracy: All Models
RMSE MAE MAPE MASE
ETS 17105.02 15159.68 2.096 0.764
ARIMA 13793.45 12021.92 1.661 0.606
LM 52802.08 51711.45 7.200 14.494
ARIMAX 17599.73 15460.07 2.137 0.779

8.6 Best Model

best_model <- rownames(all_results)[which.min(all_results[, "RMSE"])]
best_model
## [1] "ARIMA"

8.7 Forecast Plot for All Models

plot_start <- time(y_ts)[length(train_y) - 60]

autoplot(window(y_ts, start = plot_start)) +
  autolayer(fc_ets$mean, series = "ETS") +
  autolayer(fc_arima$mean, series = "ARIMA") +
  autolayer(fc_lm, series = "LM") +
  autolayer(fc_arimax$mean, series = "ARIMAX") +
  autolayer(test_y, series = "Actual Test Data") +
  labs(
    title = "Forecast Comparison on Test Period",
    x = "Time",
    y = "Retail Sales"
  ) +
  theme_minimal()

8.8 Residual Diagnostics

checkresiduals(fit_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,2) with drift
## Q* = 57.502, df = 22, p-value = 5.192e-05
## 
## Model df: 2.   Total lags used: 24
checkresiduals(fit_arimax)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,1,2) errors
## Q* = 56.153, df = 22, p-value = 8.13e-05
## 
## Model df: 2.   Total lags used: 24

9. Part III — Granger Causality

9.1 Build Stationary Transformed Data

granger_df <- data.frame(
  date = data_df$date[-1],
  dlog_retail = diff(log(data_df$retail_sales)),
  dlog_payems = diff(log(data_df$payems)),
  dlog_indpro = diff(log(data_df$indpro))
) %>%
  na.omit()

head(granger_df)
##         date   dlog_retail   dlog_payems  dlog_indpro
## 1 1992-02-01  7.538493e-05 -0.0004892074 0.0070348896
## 2 1992-03-01 -3.410567e-03  0.0005076634 0.0084895220
## 3 1992-04-01  7.998335e-03  0.0013740129 0.0075614667
## 4 1992-05-01  3.433298e-03  0.0011972630 0.0032856912
## 5 1992-06-01  4.563606e-03  0.0005520998 0.0005117225
## 6 1992-07-01  1.018339e-02  0.0008000037 0.0091649381

9.2 Stationarity Checks

adf_retail <- adf.test(granger_df$dlog_retail)
adf_payems <- adf.test(granger_df$dlog_payems)
adf_indpro <- adf.test(granger_df$dlog_indpro)

adf_results <- data.frame(
  Series = c("dlog_retail", "dlog_payems", "dlog_indpro"),
  p_value = c(adf_retail$p.value, adf_payems$p.value, adf_indpro$p.value)
)

adf_results$p_value <- round(adf_results$p_value, 5)
kable(adf_results, caption = "ADF Test p-values")
ADF Test p-values
Series p_value
dlog_retail 0.01
dlog_payems 0.01
dlog_indpro 0.01

9.3 Choose Lag Length

gc_mat <- granger_df %>%
  select(dlog_retail, dlog_payems, dlog_indpro)

lag_selection <- VARselect(gc_mat, lag.max = 12, type = "const")
lag_selection$selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##     12      3      1     12
p_lag <- lag_selection$selection["AIC(n)"]
p_lag <- as.numeric(p_lag)

if (is.na(p_lag)) p_lag <- 6
p_lag
## [1] 12

9.4 Granger Tests

gc_payems_to_retail <- grangertest(
  dlog_retail ~ dlog_payems,
  order = p_lag,
  data = granger_df
)

gc_retail_to_payems <- grangertest(
  dlog_payems ~ dlog_retail,
  order = p_lag,
  data = granger_df
)

gc_indpro_to_retail <- grangertest(
  dlog_retail ~ dlog_indpro,
  order = p_lag,
  data = granger_df
)

gc_retail_to_indpro <- grangertest(
  dlog_indpro ~ dlog_retail,
  order = p_lag,
  data = granger_df
)

gc_payems_to_retail
## Granger causality test
## 
## Model 1: dlog_retail ~ Lags(dlog_retail, 1:12) + Lags(dlog_payems, 1:12)
## Model 2: dlog_retail ~ Lags(dlog_retail, 1:12)
##   Res.Df  Df      F    Pr(>F)    
## 1    372                         
## 2    384 -12 13.147 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
gc_retail_to_payems
## Granger causality test
## 
## Model 1: dlog_payems ~ Lags(dlog_payems, 1:12) + Lags(dlog_retail, 1:12)
## Model 2: dlog_payems ~ Lags(dlog_payems, 1:12)
##   Res.Df  Df  F    Pr(>F)    
## 1    372                     
## 2    384 -12 11 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
gc_indpro_to_retail
## Granger causality test
## 
## Model 1: dlog_retail ~ Lags(dlog_retail, 1:12) + Lags(dlog_indpro, 1:12)
## Model 2: dlog_retail ~ Lags(dlog_retail, 1:12)
##   Res.Df  Df      F Pr(>F)  
## 1    372                    
## 2    384 -12 1.6864 0.0677 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
gc_retail_to_indpro
## Granger causality test
## 
## Model 1: dlog_indpro ~ Lags(dlog_indpro, 1:12) + Lags(dlog_retail, 1:12)
## Model 2: dlog_indpro ~ Lags(dlog_indpro, 1:12)
##   Res.Df  Df      F    Pr(>F)    
## 1    372                         
## 2    384 -12 5.6253 6.786e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

9.5 Summary Table

extract_p <- function(gt) {
  gt$`Pr(>F)`[2]
}

gc_summary <- data.frame(
  Test = c(
    "PAYEMS -> Retail Sales",
    "Retail Sales -> PAYEMS",
    "INDPRO -> Retail Sales",
    "Retail Sales -> INDPRO"
  ),
  p_value = c(
    extract_p(gc_payems_to_retail),
    extract_p(gc_retail_to_payems),
    extract_p(gc_indpro_to_retail),
    extract_p(gc_retail_to_indpro)
  )
)

gc_summary$p_value <- round(gc_summary$p_value, 5)
kable(gc_summary, caption = "Granger Causality Test Summary")
Granger Causality Test Summary
Test p_value
PAYEMS -> Retail Sales 0.0000
Retail Sales -> PAYEMS 0.0000
INDPRO -> Retail Sales 0.0677
Retail Sales -> INDPRO 0.0000