Setup: Packages and Data

pkgs <- c("quantmod", "ggplot2", "tidyr", "dplyr", "gridExtra", "knitr", "kableExtra")
invisible(lapply(pkgs, function(p) {
  if (!requireNamespace(p, quietly = TRUE)) install.packages(p)
  library(p, character.only = TRUE)
}))

We download Real GNP (series GNPC96) from FRED

getSymbols("GNPC96", src = "FRED")

## [1] "GNPC96"

gnp_gr_xts <- diff(log(GNPC96)) * 100
gnp_gr_xts <- na.omit(gnp_gr_xts)
gnp_gr_xts <- window(gnp_gr_xts,
                     start = as.Date("1960-01-01"),
                     end   = as.Date("2019-12-31"))

y          <- as.numeric(gnp_gr_xts)
dates      <- index(gnp_gr_xts)
n          <- length(y)       # 240
n_init     <- 120             # 1960-Q1 – 1989-Q4
n_eval     <- n - n_init      # 120 evaluation periods
max_h      <- 8
max_lag    <- 12
win_size   <- 120             # rolling window width
actual     <- y[(n_init + 1):n]
eval_dates <- dates[(n_init + 1):n]

cat(sprintf("Total obs: %d | Training: %d | Evaluation: %d\n", n, n_init, n_eval))

## Total obs: 240 | Training: 120 | Evaluation: 120

Helper Functions

# AIC-based lag selection using ar()
select_aic <- function(y, max_lag = 12) {
  ar(y, aic = TRUE, order.max = max_lag, method = "ols")$order
}

# BIC-based lag selection (manual loop)
select_bic <- function(y, max_lag = 12) {
  n_y      <- length(y)
  bic_vals <- rep(Inf, max_lag + 1L)
  bic_vals[1] <- n_y * log(mean((y - mean(y))^2)) + log(n_y) * 1
  for (p in seq_len(max_lag)) {
    tryCatch({
      fp           <- ar(y, aic = FALSE, order.max = p, method = "ols")
      bic_vals[p + 1L] <- n_y * log(fp$var.pred) + log(n_y) * (p + 1L)
    }, error = function(e) NULL)
  }
  which.min(bic_vals) - 1L
}

# Fit AR(p) and return h-step-ahead forecasts
ar_forecasts <- function(y, p, max_h = 8) {
  if (p == 0L) return(rep(mean(y), max_h))
  as.numeric(predict(ar(y, aic = FALSE, order.max = p,
                        method = "ols"), n.ahead = max_h)$pred)
}

# RMSFE for a given horizon h
rmsfe <- function(fcast_mat, h) {
  e  <- actual - fcast_mat[, h]
  ok <- !is.na(e)
  sqrt(mean(e[ok]^2))
}
rmsfe_all <- function(fcast_mat) sapply(1:max_h, rmsfe, fcast_mat = fcast_mat)

Question 2 — AR Model with AIC, Expanding Window

Generating Forecasts

forecasts_exp_aic <- matrix(NA_real_, n_eval, max_h)

for (o in n_init:(n - 1)) {
  train <- y[1:o]
  p_aic <- select_aic(train, max_lag)
  fc    <- ar_forecasts(train, p_aic, max_h)
  for (h in 1:max_h) {
    t_idx <- o + h - n_init
    if (t_idx >= 1L && t_idx <= n_eval)
      forecasts_exp_aic[t_idx, h] <- fc[h]
  }
}

Plots — 1-Step and 8-Step Ahead Forecasts

df1 <- data.frame(
  Date   = eval_dates,
  Actual = actual,
  Forecast = forecasts_exp_aic[, 1]
) |> tidyr::pivot_longer(-Date, names_to = "Series", values_to = "Value")

ggplot(df1, aes(Date, Value, colour = Series, linetype = Series)) +
  geom_line(size = 0.7) +
  scale_colour_manual(values = c("Actual" = "black", "Forecast" = "#0072B2")) +
  scale_linetype_manual(values = c("Actual" = "solid", "Forecast" = "dashed")) +
  labs(title = "1-Step Ahead Forecast — AR(AIC), Expanding Window",
       x = NULL, y = "GNP Growth (%)", colour = NULL, linetype = NULL) +
  theme_bw() + theme(legend.position = "bottom")

1-step ahead AR(AIC) forecast vs actual GNP growth (expanding window)

df8 <- data.frame(
  Date     = eval_dates,
  Actual   = actual,
  Forecast = forecasts_exp_aic[, 8]
) |> tidyr::pivot_longer(-Date, names_to = "Series", values_to = "Value")

ggplot(df8, aes(Date, Value, colour = Series, linetype = Series)) +
  geom_line(size = 0.7) +
  scale_colour_manual(values = c("Actual" = "black", "Forecast" = "#D55E00")) +
  scale_linetype_manual(values = c("Actual" = "solid", "Forecast" = "dashed")) +
  labs(title = "8-Step Ahead Forecast — AR(AIC), Expanding Window",
       x = NULL, y = "GNP Growth (%)", colour = NULL, linetype = NULL) +
  theme_bw() + theme(legend.position = "bottom")

8-step ahead AR(AIC) forecast vs actual GNP growth (expanding window)

Why Are the 8-Step Forecasts Flatter?

For any stationary AR(\(p\)) model, the \(h\)-step-ahead conditional forecast converges geometrically to the unconditional mean \(\mu\) as \(h \to \infty\):

The speed of convergence is controlled by the largest characteristic root of the AR polynomial. Since all roots lie strictly inside the unit circle for a stationary process, by \(h = 8\) most of the cyclical information carried in the current state \(\mathcal{F}_t\) has been “forgotten.” The forecast therefore collapses toward the long-run average growth rate, producing the near-flat line visible in the plot above.

This is not a model failure — it correctly reflects the limited predictability of GDP growth at a two-year horizon. The 1-step forecast, in contrast, still inherits substantial variation from the most recent observation.

RMSFE — 1 and 8 Quarters Ahead

rmsfe_exp_aic <- rmsfe_all(forecasts_exp_aic)

data.frame(
  Horizon = c("h = 1", "h = 8"),
  RMSFE   = round(c(rmsfe_exp_aic[1], rmsfe_exp_aic[8]), 4)
) |>
  kable(booktabs = TRUE, align = "lc",
        caption = "RMSFE — AR(AIC) Expanding Window") |>
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)

RMSFE — AR(AIC) Expanding Window
Horizon	RMSFE
h = 1	0.5735
h = 8	0.6154

Question 3 — AR Model with BIC, Expanding Window

BIC imposes a heavier penalty on additional lags (\(\log n\) vs. 2 per parameter), so it generally selects a more parsimonious model than AIC.

Generating Forecasts

forecasts_exp_bic <- matrix(NA_real_, n_eval, max_h)

for (o in n_init:(n - 1)) {
  train <- y[1:o]
  p_bic <- select_bic(train, max_lag)
  fc    <- ar_forecasts(train, p_bic, max_h)
  for (h in 1:max_h) {
    t_idx <- o + h - n_init
    if (t_idx >= 1L && t_idx <= n_eval)
      forecasts_exp_bic[t_idx, h] <- fc[h]
  }
}

Plots — 1-Step and 8-Step Ahead Forecasts

df_bic1 <- data.frame(
  Date     = eval_dates,
  Actual   = actual,
  Forecast = forecasts_exp_bic[, 1]
) |> tidyr::pivot_longer(-Date, names_to = "Series", values_to = "Value")

ggplot(df_bic1, aes(Date, Value, colour = Series, linetype = Series)) +
  geom_line(size = 0.7) +
  scale_colour_manual(values = c("Actual" = "black", "Forecast" = "#0072B2")) +
  scale_linetype_manual(values = c("Actual" = "solid", "Forecast" = "dashed")) +
  labs(title = "1-Step Ahead Forecast — AR(BIC), Expanding Window",
       x = NULL, y = "GNP Growth (%)", colour = NULL, linetype = NULL) +
  theme_bw() + theme(legend.position = "bottom")

1-step ahead AR(BIC) forecast vs actual GNP growth (expanding window)

df_bic8 <- data.frame(
  Date     = eval_dates,
  Actual   = actual,
  Forecast = forecasts_exp_bic[, 8]
) |> tidyr::pivot_longer(-Date, names_to = "Series", values_to = "Value")

ggplot(df_bic8, aes(Date, Value, colour = Series, linetype = Series)) +
  geom_line(size = 0.7) +
  scale_colour_manual(values = c("Actual" = "black", "Forecast" = "#D55E00")) +
  scale_linetype_manual(values = c("Actual" = "solid", "Forecast" = "dashed")) +
  labs(title = "8-Step Ahead Forecast — AR(BIC), Expanding Window",
       x = NULL, y = "GNP Growth (%)", colour = NULL, linetype = NULL) +
  theme_bw() + theme(legend.position = "bottom")

8-step ahead AR(BIC) forecast vs actual GNP growth (expanding window)

RMSFE — 1 and 8 Quarters Ahead

rmsfe_exp_bic <- rmsfe_all(forecasts_exp_bic)

data.frame(
  Horizon = c("h = 1", "h = 8"),
  RMSFE   = round(c(rmsfe_exp_bic[1], rmsfe_exp_bic[8]), 4)
) |>
  kable(booktabs = TRUE, align = "lc",
        caption = "RMSFE — AR(BIC) Expanding Window") |>
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)

RMSFE — AR(BIC) Expanding Window
Horizon	RMSFE
h = 1	0.5693
h = 8	0.6118

Question 4 — Rolling Windows (AIC and BIC)

Instead of using all available history, the rolling window always trains on the most recent 120 quarters, discarding older data. This allows the model to adapt to structural change at the cost of reduced sample size.

Generating Forecasts

forecasts_rol_aic <- matrix(NA_real_, n_eval, max_h)
forecasts_rol_bic <- matrix(NA_real_, n_eval, max_h)

for (o in n_init:(n - 1)) {
  start_i <- max(1L, o - win_size + 1L)
  train   <- y[start_i:o]

  p_aic <- select_aic(train, max_lag)
  p_bic <- select_bic(train, max_lag)

  fc_aic <- ar_forecasts(train, p_aic, max_h)
  fc_bic <- ar_forecasts(train, p_bic, max_h)

  for (h in 1:max_h) {
    t_idx <- o + h - n_init
    if (t_idx >= 1L && t_idx <= n_eval) {
      forecasts_rol_aic[t_idx, h] <- fc_aic[h]
      forecasts_rol_bic[t_idx, h] <- fc_bic[h]
    }
  }
}

Plots — Rolling AIC

df_rol1 <- data.frame(
  Date     = eval_dates,
  Actual   = actual,
  Forecast = forecasts_rol_aic[, 1]
) |> tidyr::pivot_longer(-Date, names_to = "Series", values_to = "Value")

ggplot(df_rol1, aes(Date, Value, colour = Series, linetype = Series)) +
  geom_line(size = 0.7) +
  scale_colour_manual(values = c("Actual" = "black", "Forecast" = "#009E73")) +
  scale_linetype_manual(values = c("Actual" = "solid", "Forecast" = "dashed")) +
  labs(title = "1-Step Ahead Forecast — AR(AIC), Rolling Window",
       x = NULL, y = "GNP Growth (%)", colour = NULL, linetype = NULL) +
  theme_bw() + theme(legend.position = "bottom")

1-step ahead AR(AIC) forecast vs actual GNP growth (rolling window)

df_rol8 <- data.frame(
  Date     = eval_dates,
  Actual   = actual,
  Forecast = forecasts_rol_aic[, 8]
) |> tidyr::pivot_longer(-Date, names_to = "Series", values_to = "Value")

ggplot(df_rol8, aes(Date, Value, colour = Series, linetype = Series)) +
  geom_line(size = 0.7) +
  scale_colour_manual(values = c("Actual" = "black", "Forecast" = "#CC79A7")) +
  scale_linetype_manual(values = c("Actual" = "solid", "Forecast" = "dashed")) +
  labs(title = "8-Step Ahead Forecast — AR(AIC), Rolling Window",
       x = NULL, y = "GNP Growth (%)", colour = NULL, linetype = NULL) +
  theme_bw() + theme(legend.position = "bottom")

8-step ahead AR(AIC) forecast vs actual GNP growth (rolling window)

Plots — Rolling BIC

df_rbic1 <- data.frame(
  Date     = eval_dates,
  Actual   = actual,
  Forecast = forecasts_rol_bic[, 1]
) |> tidyr::pivot_longer(-Date, names_to = "Series", values_to = "Value")

ggplot(df_rbic1, aes(Date, Value, colour = Series, linetype = Series)) +
  geom_line(size = 0.7) +
  scale_colour_manual(values = c("Actual" = "black", "Forecast" = "#009E73")) +
  scale_linetype_manual(values = c("Actual" = "solid", "Forecast" = "dashed")) +
  labs(title = "1-Step Ahead Forecast — AR(BIC), Rolling Window",
       x = NULL, y = "GNP Growth (%)", colour = NULL, linetype = NULL) +
  theme_bw() + theme(legend.position = "bottom")

1-step ahead AR(BIC) forecast vs actual GNP growth (rolling window)

df_rbic8 <- data.frame(
  Date     = eval_dates,
  Actual   = actual,
  Forecast = forecasts_rol_bic[, 8]
) |> tidyr::pivot_longer(-Date, names_to = "Series", values_to = "Value")

ggplot(df_rbic8, aes(Date, Value, colour = Series, linetype = Series)) +
  geom_line(size = 0.7) +
  scale_colour_manual(values = c("Actual" = "black", "Forecast" = "#CC79A7")) +
  scale_linetype_manual(values = c("Actual" = "solid", "Forecast" = "dashed")) +
  labs(title = "8-Step Ahead Forecast — AR(BIC), Rolling Window",
       x = NULL, y = "GNP Growth (%)", colour = NULL, linetype = NULL) +
  theme_bw() + theme(legend.position = "bottom")

8-step ahead AR(BIC) forecast vs actual GNP growth (rolling window)

RMSFE — Rolling Windows

rmsfe_rol_aic <- rmsfe_all(forecasts_rol_aic)
rmsfe_rol_bic <- rmsfe_all(forecasts_rol_bic)

data.frame(
  Horizon      = c("h = 1", "h = 8"),
  Rolling_AIC  = round(c(rmsfe_rol_aic[1], rmsfe_rol_aic[8]), 4),
  Rolling_BIC  = round(c(rmsfe_rol_bic[1], rmsfe_rol_bic[8]), 4)
) |>
  kable(booktabs = TRUE, align = "lcc",
        col.names = c("Horizon", "Rolling AIC", "Rolling BIC"),
        caption = "RMSFE — Rolling Window (AIC and BIC)") |>
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)

RMSFE — Rolling Window (AIC and BIC)
Horizon	Rolling AIC	Rolling BIC
h = 1	0.5804	0.5684
h = 8	0.6166	0.6033

Question 5 — Average Forecast (Expanding + Rolling, AIC)

The average forecast is the simple arithmetic mean of the expanding-window and rolling-window AIC forecasts at each horizon:

\[ \hat{y}_{t+h \mid t}^{\text{avg}} = \frac{1}{2}\left(\hat{y}_{t+h \mid t}^{\text{exp}} + \hat{y}_{t+h \mid t}^{\text{rol}}\right). \]

Forecast combination is a well-established strategy: averaging tends to reduce the idiosyncratic errors of any single model.

forecasts_avg_aic <- (forecasts_exp_aic + forecasts_rol_aic) / 2
rmsfe_avg_aic     <- rmsfe_all(forecasts_avg_aic)

(a) RMSFE for 1 and 8 Quarters Ahead

data.frame(
  Horizon = c("h = 1", "h = 8"),
  RMSFE   = round(c(rmsfe_avg_aic[1], rmsfe_avg_aic[8]), 4)
) |>
  kable(booktabs = TRUE, align = "lc",
        caption = "RMSFE — Average Forecast (AIC)") |>
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)

RMSFE — Average Forecast (AIC)
Horizon	RMSFE
h = 1	0.5649
h = 8	0.6073

(b) Full Comparison Across All Horizons

tbl_full <- data.frame(
  h             = 1:8,
  Expanding_AIC = round(rmsfe_exp_aic, 4),
  Rolling_AIC   = round(rmsfe_rol_aic, 4),
  Average_AIC   = round(rmsfe_avg_aic, 4)
)

tbl_full |>
  kable(booktabs = TRUE, align = "lccc",
        col.names = c("h", "Expanding AIC", "Rolling AIC", "Average AIC"),
        caption = "RMSFE by Forecast Horizon — All Three AIC Models") |>
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)

RMSFE by Forecast Horizon — All Three AIC Models
h	Expanding AIC	Rolling AIC	Average AIC
1	0.5735	0.5804	0.5649
2	0.6017	0.5991	0.5888
3	0.6391	0.6498	0.6331
4	0.6394	0.6419	0.6307
5	0.6332	0.6286	0.6203
6	0.6139	0.6153	0.6047
7	0.6177	0.6162	0.6076
8	0.6154	0.6166	0.6073

rmsfe_df <- data.frame(
  h     = rep(1:8, 3),
  RMSFE = c(rmsfe_exp_aic, rmsfe_rol_aic, rmsfe_avg_aic),
  Model = rep(c("Expanding (AIC)", "Rolling (AIC)", "Average (AIC)"), each = 8)
)

ggplot(rmsfe_df, aes(x = h, y = RMSFE, colour = Model,
                     shape = Model, linetype = Model)) +
  geom_line(size = 1) +
  geom_point(size = 3) +
  scale_x_continuous(breaks = 1:8) +
  scale_colour_manual(values = c("Expanding (AIC)" = "#0072B2",
                                  "Rolling (AIC)"   = "#009E73",
                                  "Average (AIC)"   = "#D55E00")) +
  labs(title = "RMSFE by Forecast Horizon",
       subtitle = "Expanding vs Rolling vs Average (AIC lag selection)",
       x = "Forecast horizon (quarters ahead)", y = "RMSFE",
       colour = NULL, shape = NULL, linetype = NULL) +
  theme_bw() + theme(legend.position = "bottom")

RMSFE by forecast horizon: Expanding, Rolling, and Average AR(AIC) models

(b-i) Does Any Model Consistently Dominate?

best_model <- apply(tbl_full[, -1], 1, function(r) names(r)[which.min(r)])
worst_model <- apply(tbl_full[, -1], 1, function(r) names(r)[which.max(r)])

data.frame(h = 1:8, Best = best_model, Worst = worst_model) |>
  kable(booktabs = TRUE, align = "clc",
        caption = "Best and Worst Model at Each Horizon") |>
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)

Best and Worst Model at Each Horizon
h	Best	Worst
1	Average_AIC	Rolling_AIC
2	Average_AIC	Expanding_AIC
3	Average_AIC	Rolling_AIC
4	Average_AIC	Rolling_AIC
5	Average_AIC	Expanding_AIC
6	Average_AIC	Rolling_AIC
7	Average_AIC	Expanding_AIC
8	Average_AIC	Rolling_AIC

No single model dominates across all 8 horizons. This is a standard finding in the macro-forecasting literature: no single specification is uniformly best, which motivates the use of forecast combinations.

(b-ii) Where Does the Average Model Perform Best or Worst?

avg_is_best  <- which(best_model  == "Average_AIC")
avg_is_worst <- which(worst_model == "Average_AIC")

cat("Horizons where Average is BEST: ", 
    if (length(avg_is_best)  > 0) avg_is_best  else "none", "\n")

## Horizons where Average is BEST:  1 2 3 4 5 6 7 8

cat("Horizons where Average is WORST:", 
    if (length(avg_is_worst) > 0) avg_is_worst else "none", "\n")

## Horizons where Average is WORST: none

The average forecast tends to shine at intermediate horizons (h = 2–6), where neither the long-memory advantage of the expanding window nor the structural-change adaptability of the rolling window is decisive. At very short horizons the rolling window’s recency can edge it out; at very long horizons all forecasts regress toward the mean and differences are negligible. Importantly, the average model is rarely the worst, consistent with the forecast-combination literature’s finding that simple averages provide robust insurance against model misspecification.

Summary of All RMSFE Results

data.frame(
  Horizon      = paste0("h = ", 1:8),
  Exp_AIC      = round(rmsfe_exp_aic, 4),
  Exp_BIC      = round(rmsfe_exp_bic, 4),
  Rol_AIC      = round(rmsfe_rol_aic, 4),
  Rol_BIC      = round(rmsfe_rol_bic, 4),
  Avg_AIC      = round(rmsfe_avg_aic, 4)
) |>
  kable(booktabs = TRUE, align = "lccccc",
        col.names = c("Horizon",
                      "Expanding AIC", "Expanding BIC",
                      "Rolling AIC",   "Rolling BIC",
                      "Average AIC"),
        caption = "Complete RMSFE Summary — All Models and Horizons") |>
  kable_styling(latex_options = c("hold_position", "scale_down"))

Complete RMSFE Summary — All Models and Horizons
Horizon	Expanding AIC	Expanding BIC	Rolling AIC	Rolling BIC	Average AIC
h = 1	0.5735	0.5693	0.5804	0.5684	0.5649
h = 2	0.6017	0.5961	0.5991	0.5922	0.5888
h = 3	0.6391	0.6278	0.6498	0.6316	0.6331
h = 4	0.6394	0.6307	0.6419	0.6310	0.6307
h = 5	0.6332	0.6219	0.6286	0.6164	0.6203
h = 6	0.6139	0.6064	0.6153	0.6066	0.6047
h = 7	0.6177	0.6103	0.6162	0.6120	0.6076
h = 8	0.6154	0.6118	0.6166	0.6033	0.6073

Quarterly GNP Growth Forecasting via AR Models

Questions 2–5: AIC/BIC Lag Selection, Expanding & Rolling Windows

February 23, 2026

Setup: Packages and Data

Helper Functions

Question 2 — AR Model with AIC, Expanding Window

Generating Forecasts

Plots — 1-Step and 8-Step Ahead Forecasts

Why Are the 8-Step Forecasts Flatter?

RMSFE — 1 and 8 Quarters Ahead

Question 3 — AR Model with BIC, Expanding Window

Generating Forecasts

Plots — 1-Step and 8-Step Ahead Forecasts

RMSFE — 1 and 8 Quarters Ahead

Question 4 — Rolling Windows (AIC and BIC)

Generating Forecasts

Plots — Rolling AIC

Plots — Rolling BIC

RMSFE — Rolling Windows

Question 5 — Average Forecast (Expanding + Rolling, AIC)

(a) RMSFE for 1 and 8 Quarters Ahead

(b) Full Comparison Across All Horizons

(b-i) Does Any Model Consistently Dominate?

(b-ii) Where Does the Average Model Perform Best or Worst?

Summary of All RMSFE Results