Forescast Strategy - Expanding Window


library(zoo)
library(forecast)
library(tidyverse)
library(dplyr)
data <- read.csv("M4_Forecast uncertainty/Thailand_M.csv")
data$date <- as.Date(data$dateid01)
inflation <- zoo(data$p, order.by = data$date)


Parameters


length of out-of-sample forecast = 4
number of rolling samples = 50
last date in the first estimation period = December, 2008

start_year <- as.numeric(format(index(inflation)[1], "%Y"))
start_month <- as.numeric(format(index(inflation)[1], "%m"))

# Parameters
h <- 4
num_forecasts <- 50
initial_end <- as.Date("2008-12-01")

# Find the index of initial_end
initial_index <- which(index(inflation) == initial_end)


Loop for number of forecasts assigned for AR(1)

results <- data.frame(
  step = 1:num_forecasts,
  est_end = as.Date(NA),
  forecast_target = as.Date(NA),
  forecast_value = NA,
  actual_value = NA
)

for (i in 0:(num_forecasts - 1)) {
  est_end_index <- initial_index + i
  forecast_index <- est_end_index + h
  
  if (forecast_index > length(inflation)) break
  
  est_end_date <- index(inflation)[est_end_index]
  forecast_target_date <- index(inflation)[forecast_index]
  
  # Extract estimation window
  train_series <- window(inflation, end = est_end_date)
  train_ts <- ts(as.numeric(train_series), start = c(2003, 1), frequency = 12)
  
  # Fit AR(1) model
  fit <- Arima(train_ts, order = c(1, 0, 0))
  
  # Forecast 4 steps ahead
  fc <- forecast(fit, h = h)
  forecast_value <- fc$mean[h]
  
  # Actual value
  actual_value <- as.numeric(inflation[forecast_index])
  
  # Store results
  results[i + 1, ] <- list(
    step = i + 1,
    est_end = est_end_date,
    forecast_target = forecast_target_date,
    forecast_value = forecast_value,
    actual_value = actual_value
  )
}

print(head(results, 10))
##    step    est_end forecast_target forecast_value actual_value
## 1     1 2008-12-01      2009-04-01      1.1190860        -0.99
## 2     2 2009-01-01      2009-05-01      0.2094913        -3.29
## 3     3 2009-02-01      2009-06-01      0.5027692        -3.96
## 4     4 2009-03-01      2009-07-01      0.3727968        -4.32
## 5     5 2009-04-01      2009-08-01     -0.3105390        -1.06
## 6     6 2009-05-01      2009-09-01     -2.6180143        -0.91
## 7     7 2009-06-01      2009-10-01     -3.3746018         0.39
## 8     8 2009-07-01      2009-11-01     -3.7907355         1.94
## 9     9 2009-08-01      2009-12-01     -0.3608350         3.46
## 10   10 2009-09-01      2010-01-01     -0.2478609         3.98


Calculate RMSE

results$error <- results$forecast_value - results$actual_value
results$sq_error <- results$error^2
rmse <- sqrt(mean(results$sq_error, na.rm = TRUE))
cat("RMSE for AR(1), Expanding Window, Horizon = 4:", round(rmse, 4), "\n")
## RMSE for AR(1), Expanding Window, Horizon = 4: 1.8139



Loop for number of forecasts assigned for AR(2)


results_ar2 <- data.frame(
  step = 1:num_forecasts,
  est_end = as.Date(NA),
  forecast_target = as.Date(NA),
  forecast_value = NA,
  actual_value = NA
)
for (i in 0:(num_forecasts - 1)) {
  est_end_index <- initial_index + i
  forecast_index <- est_end_index + h
  
  if (forecast_index > length(inflation)) break
  
  est_end_date <- index(inflation)[est_end_index]
  forecast_target_date <- index(inflation)[forecast_index]
  
  # Extract estimation window
  train_series <- window(inflation, end = est_end_date)
  train_ts <- ts(as.numeric(train_series), start = c(2003, 1), frequency = 12)
  
  # Fit AR(2) model
  fit <- Arima(train_ts, order = c(2, 0, 0))
  
  # Forecast 4 steps ahead
  fc <- forecast(fit, h = h)
  forecast_value <- fc$mean[h]
  
  # Actual value
  actual_value <- as.numeric(inflation[forecast_index])
  
  # Store results
  results_ar2[i + 1, ] <- list(
    step = i + 1,
    est_end = est_end_date,
    forecast_target = forecast_target_date,
    forecast_value = forecast_value,
    actual_value = actual_value
  )
}

print(head(results_ar2, 10))
##    step    est_end forecast_target forecast_value actual_value
## 1     1 2008-12-01      2009-04-01      0.9735803        -0.99
## 2     2 2009-01-01      2009-05-01      0.7178403        -3.29
## 3     3 2009-02-01      2009-06-01      1.7891562        -3.96
## 4     4 2009-03-01      2009-07-01      1.2800083        -4.32
## 5     5 2009-04-01      2009-08-01      0.2835954        -1.06
## 6     6 2009-05-01      2009-09-01     -3.0123276        -0.91
## 7     7 2009-06-01      2009-10-01     -2.3685061         0.39
## 8     8 2009-07-01      2009-11-01     -2.6655211         1.94
## 9     9 2009-08-01      2009-12-01      3.1109590         3.46
## 10   10 2009-09-01      2010-01-01      0.4934973         3.98


Calculate RMSE

results_ar2$error <- results_ar2$forecast_value - results_ar2$actual_value
results_ar2$sq_error <- results_ar2$error^2
rmse_ar2 <- sqrt(mean(results_ar2$sq_error, na.rm = TRUE))
cat("RMSE for AR(2), Expanding Window, Horizon = 4:", round(rmse_ar2, 4), "\n")
## RMSE for AR(2), Expanding Window, Horizon = 4: 1.7258


Using an expanding window forecast strategy with a 4-month horizon, the AR(2) model yields a lower RMSE (1.73) compared to the AR(1) model (1.81). This suggests that the AR(2) model performs better at capturing short-term inflation dynamics, likely because the inclusion of a second lag provides additional information that improves forecast accuracy. Since the AR(1) model is a restricted version of the AR(2), the latter has more flexibility to adapt to patterns in the data, resulting in more accurate forecasts at this horizon.

Modifying number of forecasts from 50 to 54 for AR(1)

Finding: The RMSE for this modification will decrease the RMSE for the AR(1) model.The number of observations in the estimation sample is larger than before. Thus, assuming no structural change, the coefficients are estimated more precisely and the RMSE falls.

num_forecasts1 <- 54

# Create results table
results_ar1_54 <- data.frame(
  step = 1:num_forecasts1,
  est_end = as.Date(NA),
  forecast_target = as.Date(NA),
  forecast_value = NA,
  actual_value = NA
)

# Expanding window forecast (AR(1))
for (i in 0:(num_forecasts1 - 1)) {
  est_end_index <- initial_index + i
  forecast_index <- est_end_index + h
  
  if (forecast_index > length(inflation)) break
  
  est_end_date <- index(inflation)[est_end_index]
  forecast_target_date <- index(inflation)[forecast_index]
  
  train_series <- window(inflation, end = est_end_date)
  train_ts <- ts(as.numeric(train_series), start = c(2003, 1), frequency = 12)
  
  fit <- Arima(train_ts, order = c(1, 0, 0))
  fc <- forecast(fit, h = h)
  
  forecast_value <- fc$mean[h]
  actual_value <- as.numeric(inflation[forecast_index])
  
  results_ar1_54[i + 1, ] <- list(
    step = i + 1,
    est_end = est_end_date,
    forecast_target = forecast_target_date,
    forecast_value = forecast_value,
    actual_value = actual_value
  )
}

# Calculate RMSE for AR(1) with 54 expanding steps
results_ar1_54$error <- results_ar1_54$forecast_value - results_ar1_54$actual_value
results_ar1_54$sq_error <- results_ar1_54$error^2
rmse_ar1_54 <- sqrt(mean(results_ar1_54$sq_error, na.rm = TRUE))
cat("MSE for AR(1), Expanding Window, Horizon = 4, 54 expanding steps:", round(rmse_ar1_54, 4), "\n")
## MSE for AR(1), Expanding Window, Horizon = 4, 54 expanding steps: 1.7631