library(zoo)
library(forecast)
library(tidyverse)
library(dplyr)
data <- read.csv("M4_Forecast uncertainty/Thailand_M.csv")
data$date <- as.Date(data$dateid01)
inflation <- zoo(data$p, order.by = data$date)
length of out-of-sample forecast = 4
number of rolling
samples = 50
last date in the first estimation period = December,
2008
start_year <- as.numeric(format(index(inflation)[1], "%Y"))
start_month <- as.numeric(format(index(inflation)[1], "%m"))
# Parameters
h <- 4
num_forecasts <- 50
initial_end <- as.Date("2008-12-01")
# Find the index of initial_end
initial_index <- which(index(inflation) == initial_end)
results <- data.frame(
step = 1:num_forecasts,
est_end = as.Date(NA),
forecast_target = as.Date(NA),
forecast_value = NA,
actual_value = NA
)
for (i in 0:(num_forecasts - 1)) {
est_end_index <- initial_index + i
forecast_index <- est_end_index + h
if (forecast_index > length(inflation)) break
est_end_date <- index(inflation)[est_end_index]
forecast_target_date <- index(inflation)[forecast_index]
# Extract estimation window
train_series <- window(inflation, end = est_end_date)
train_ts <- ts(as.numeric(train_series), start = c(2003, 1), frequency = 12)
# Fit AR(1) model
fit <- Arima(train_ts, order = c(1, 0, 0))
# Forecast 4 steps ahead
fc <- forecast(fit, h = h)
forecast_value <- fc$mean[h]
# Actual value
actual_value <- as.numeric(inflation[forecast_index])
# Store results
results[i + 1, ] <- list(
step = i + 1,
est_end = est_end_date,
forecast_target = forecast_target_date,
forecast_value = forecast_value,
actual_value = actual_value
)
}
print(head(results, 10))
## step est_end forecast_target forecast_value actual_value
## 1 1 2008-12-01 2009-04-01 1.1190860 -0.99
## 2 2 2009-01-01 2009-05-01 0.2094913 -3.29
## 3 3 2009-02-01 2009-06-01 0.5027692 -3.96
## 4 4 2009-03-01 2009-07-01 0.3727968 -4.32
## 5 5 2009-04-01 2009-08-01 -0.3105390 -1.06
## 6 6 2009-05-01 2009-09-01 -2.6180143 -0.91
## 7 7 2009-06-01 2009-10-01 -3.3746018 0.39
## 8 8 2009-07-01 2009-11-01 -3.7907355 1.94
## 9 9 2009-08-01 2009-12-01 -0.3608350 3.46
## 10 10 2009-09-01 2010-01-01 -0.2478609 3.98
results$error <- results$forecast_value - results$actual_value
results$sq_error <- results$error^2
rmse <- sqrt(mean(results$sq_error, na.rm = TRUE))
cat("RMSE for AR(1), Expanding Window, Horizon = 4:", round(rmse, 4), "\n")
## RMSE for AR(1), Expanding Window, Horizon = 4: 1.8139
results_ar2 <- data.frame(
step = 1:num_forecasts,
est_end = as.Date(NA),
forecast_target = as.Date(NA),
forecast_value = NA,
actual_value = NA
)
for (i in 0:(num_forecasts - 1)) {
est_end_index <- initial_index + i
forecast_index <- est_end_index + h
if (forecast_index > length(inflation)) break
est_end_date <- index(inflation)[est_end_index]
forecast_target_date <- index(inflation)[forecast_index]
# Extract estimation window
train_series <- window(inflation, end = est_end_date)
train_ts <- ts(as.numeric(train_series), start = c(2003, 1), frequency = 12)
# Fit AR(2) model
fit <- Arima(train_ts, order = c(2, 0, 0))
# Forecast 4 steps ahead
fc <- forecast(fit, h = h)
forecast_value <- fc$mean[h]
# Actual value
actual_value <- as.numeric(inflation[forecast_index])
# Store results
results_ar2[i + 1, ] <- list(
step = i + 1,
est_end = est_end_date,
forecast_target = forecast_target_date,
forecast_value = forecast_value,
actual_value = actual_value
)
}
print(head(results_ar2, 10))
## step est_end forecast_target forecast_value actual_value
## 1 1 2008-12-01 2009-04-01 0.9735803 -0.99
## 2 2 2009-01-01 2009-05-01 0.7178403 -3.29
## 3 3 2009-02-01 2009-06-01 1.7891562 -3.96
## 4 4 2009-03-01 2009-07-01 1.2800083 -4.32
## 5 5 2009-04-01 2009-08-01 0.2835954 -1.06
## 6 6 2009-05-01 2009-09-01 -3.0123276 -0.91
## 7 7 2009-06-01 2009-10-01 -2.3685061 0.39
## 8 8 2009-07-01 2009-11-01 -2.6655211 1.94
## 9 9 2009-08-01 2009-12-01 3.1109590 3.46
## 10 10 2009-09-01 2010-01-01 0.4934973 3.98
results_ar2$error <- results_ar2$forecast_value - results_ar2$actual_value
results_ar2$sq_error <- results_ar2$error^2
rmse_ar2 <- sqrt(mean(results_ar2$sq_error, na.rm = TRUE))
cat("RMSE for AR(2), Expanding Window, Horizon = 4:", round(rmse_ar2, 4), "\n")
## RMSE for AR(2), Expanding Window, Horizon = 4: 1.7258
Using an expanding window forecast strategy with a 4-month
horizon, the AR(2) model yields a lower RMSE (1.73) compared to the
AR(1) model (1.81). This suggests that the AR(2) model performs better
at capturing short-term inflation dynamics, likely because the inclusion
of a second lag provides additional information that improves forecast
accuracy. Since the AR(1) model is a restricted version of the AR(2),
the latter has more flexibility to adapt to patterns in the data,
resulting in more accurate forecasts at this horizon.
Finding: The RMSE for this modification will decrease the RMSE for
the AR(1) model.The number of observations in the estimation sample is
larger than before. Thus, assuming no structural change, the
coefficients are estimated more precisely and the RMSE falls.
num_forecasts1 <- 54
# Create results table
results_ar1_54 <- data.frame(
step = 1:num_forecasts1,
est_end = as.Date(NA),
forecast_target = as.Date(NA),
forecast_value = NA,
actual_value = NA
)
# Expanding window forecast (AR(1))
for (i in 0:(num_forecasts1 - 1)) {
est_end_index <- initial_index + i
forecast_index <- est_end_index + h
if (forecast_index > length(inflation)) break
est_end_date <- index(inflation)[est_end_index]
forecast_target_date <- index(inflation)[forecast_index]
train_series <- window(inflation, end = est_end_date)
train_ts <- ts(as.numeric(train_series), start = c(2003, 1), frequency = 12)
fit <- Arima(train_ts, order = c(1, 0, 0))
fc <- forecast(fit, h = h)
forecast_value <- fc$mean[h]
actual_value <- as.numeric(inflation[forecast_index])
results_ar1_54[i + 1, ] <- list(
step = i + 1,
est_end = est_end_date,
forecast_target = forecast_target_date,
forecast_value = forecast_value,
actual_value = actual_value
)
}
# Calculate RMSE for AR(1) with 54 expanding steps
results_ar1_54$error <- results_ar1_54$forecast_value - results_ar1_54$actual_value
results_ar1_54$sq_error <- results_ar1_54$error^2
rmse_ar1_54 <- sqrt(mean(results_ar1_54$sq_error, na.rm = TRUE))
cat("MSE for AR(1), Expanding Window, Horizon = 4, 54 expanding steps:", round(rmse_ar1_54, 4), "\n")
## MSE for AR(1), Expanding Window, Horizon = 4, 54 expanding steps: 1.7631