Evaluating Forecast Performance


Setup

library(zoo)
library(forecast)
library(tidyverse)
library(dplyr)
data <- read.csv("M4_Forecast uncertainty/India_Q.csv")
data$date <- as.Date(data$dateid01) 
data$date <- as.yearqtr(data$date, format = "%YQ%q")

head(data, 10)
##      dateid01                  dateid    rgdp       ygr    date
## 1  2003-01-01 2003-03-31 23:59:59.999 7028.06  3.210846 2003 Q1
## 2  2003-04-01 2003-06-30 23:59:59.999 7221.13  5.046689 2003 Q2
## 3  2003-07-01 2003-09-30 23:59:59.999 7412.13  7.369267 2003 Q3
## 4  2003-10-01 2003-12-31 23:59:59.999 7748.63 11.823654 2003 Q4
## 5  2004-01-01 2004-03-31 23:59:59.999 7635.01  8.636096 2004 Q1
## 6  2004-04-01 2004-06-30 23:59:59.999 7841.48  8.590761 2004 Q2
## 7  2004-07-01 2004-09-30 23:59:59.999 8060.99  8.754029 2004 Q3
## 8  2004-10-01 2004-12-31 23:59:59.999 8148.17  5.156266 2004 Q4
## 9  2005-01-01 2005-03-31 23:59:59.999 8334.76  9.165017 2005 Q1
## 10 2005-04-01 2005-06-30 23:59:59.999 8578.76  9.402307 2005 Q2
tail(data,10)
##      dateid01                  dateid     rgdp      ygr    date
## 39 2012-07-01 2012-09-30 23:59:59.999 14620.60 4.836853 2012 Q3
## 40 2012-10-01 2012-12-31 23:59:59.999 14896.50 5.265873 2012 Q4
## 41 2013-01-01 2013-03-31 23:59:59.999 14979.57 4.602064 2013 Q1
## 42 2013-04-01 2013-06-30 23:59:59.999 15075.86 4.511158 2013 Q2
## 43 2013-07-01 2013-09-30 23:59:59.999 15374.32 5.155192 2013 Q3
## 44 2013-10-01 2013-12-31 23:59:59.999 15561.92 4.466955 2013 Q4
## 45 2014-01-01 2014-03-31 23:59:59.999 15865.85 5.916592 2014 Q1
## 46 2014-04-01 2014-06-30 23:59:59.999 15985.67 6.034880 2014 Q2
## 47 2014-07-01 2014-09-30 23:59:59.999 16299.59 6.018282 2014 Q3
## 48 2014-10-01 2014-12-31 23:59:59.999       NA       NA 2014 Q4
ygr_ts <- ts(data$ygr, start = c(2003, 1), end = c(2014, 3), frequency = 4)

plot(ygr_ts, 
     main = "Year-on-Year quarterly GDP, India", 
     ylab = "", 
     xlab = "Year")


Four years of such rapid growth eventually led Goldman Sachs to conclude that “India’s high growth rate since 2003 represents a structural increase rather than simply a cyclical upturn. IMF project India’s potential or sustainable growth rate at about 8% until 2020.”

Many forecasters in 2007 would agree with this statement. However this period of high growth ended with the global financial crisis and India grew on average in the range of 5-6% per year for the last 2 years. Clearly, this rate is significantly lower than the rate in 2003-2007, which was in the vicinity of 9% per year.

Expanding Window Forecast

Parameters

T_end <- c(2008, 4)
horizon <- 2
num_expanding <- 21
end_index <- which(as.character(as.yearqtr(time(ygr_ts))) == "2008 Q4")
results <- data.frame(
  step = 1:num_expanding,
  est_end = rep(NA, num_expanding),
  forecast_1q = rep(NA, num_expanding),
  forecast_2q = rep(NA, num_expanding),
  actual_1q = rep(NA, num_expanding),
  actual_2q = rep(NA, num_expanding)
)

for (i in 0:(num_expanding - 1)) {
  est_end_idx <- end_index + i
  forecast_1q_idx <- est_end_idx + 1
  forecast_2q_idx <- est_end_idx + 2
  
  if (forecast_2q_idx > length(ygr_ts)) break
  
  train_data <- window(ygr_ts, end = time(ygr_ts)[est_end_idx])
  model <- Arima(train_data, order = c(1, 0, 0), include.mean = TRUE)
  forecast_out <- forecast(model, h = 2)
  
  results$est_end[i + 1] <- as.character(as.yearqtr(time(ygr_ts)[est_end_idx]))
  results$forecast_1q[i + 1] <- forecast_out$mean[1]
  results$forecast_2q[i + 1] <- forecast_out$mean[2]
  results$actual_1q[i + 1] <- ygr_ts[forecast_1q_idx]
  results$actual_2q[i + 1] <- ygr_ts[forecast_2q_idx]
}

head(results, 10)
##    step est_end forecast_1q forecast_2q  actual_1q actual_2q
## 1     1 2008 Q4    4.397887    5.785819  0.1617662  4.803595
## 2     2 2009 Q1    1.674107    2.832185  4.8035954  6.870901
## 3     3 2009 Q2    5.689663    6.263688  6.8709013  8.627214
## 4     4 2009 Q3    7.084859    7.219756  8.6272139 12.950306
## 5     5 2009 Q4    8.235849    7.991921 12.9503055 10.207099
## 6     6 2010 Q1   11.143313    9.992772 10.2070993  9.865107
## 7     7 2010 Q2    9.327593    8.784742  9.8651070 10.895331
## 8     8 2010 Q3    9.141534    8.690960 10.8953314  9.888993
## 9     9 2010 Q4    9.866620    9.212943  9.8889929  8.438769
## 10   10 2011 Q1    9.229183    8.809331  8.4387686  6.965544
results$error_1q <- results$forecast_1q - results$actual_1q
results$error_2q <- results$forecast_2q - results$actual_2q

mae_2q <- mean(abs(results$error_2q), na.rm = TRUE)   # for half-year ahead
rmse_1q <- sqrt(mean((results$error_1q)^2, na.rm = TRUE))  # for 1 quarter ahead

cat("MAE for AR(1), Horizon = 2 quarters (half-year ahead):", round(mae_2q, 4), "\n") 
## MAE for AR(1), Horizon = 2 quarters (half-year ahead): 1.7955
cat("RMSE for AR(1), Horizon = 1 quarter (3 months ahead):", round(rmse_1q, 4), "\n")
## RMSE for AR(1), Horizon = 1 quarter (3 months ahead): 1.862
train_data1 <- window(ygr_ts, end = c(2014, 3)) 
model1 <- Arima(train_data1, order = c(1, 0, 0), include.mean = TRUE)
fc1 <- forecast(model1, h = 25) # 25 Quaters ahead to 2020 Q4
forecast_dates <- as.yearqtr("2014 Q4") + 0:24

forecast_2020 <- data.frame(
  quarter = forecast_dates,
  forecast = as.numeric(fc1$mean)
)

head(forecast_2020,10)
##    quarter forecast
## 1  2014 Q4 6.343533
## 2  2015 Q4 6.576542
## 3  2016 Q4 6.743471
## 4  2017 Q4 6.863059
## 5  2018 Q4 6.948732
## 6  2019 Q4 7.010109
## 7  2020 Q4 7.054079
## 8  2021 Q4 7.085580
## 9  2022 Q4 7.108146
## 10 2023 Q4 7.124313



Rolling Window Forecast

Parameters

h_rolling <- 2
w_rolling <- 24
num_rolling_steps <- 21
first_end_date_rolling <- "2008 Q4"


first_end_idx_rolling <- which(as.character(as.yearqtr(time(ygr_ts))) == first_end_date_rolling)
first_start_idx_rolling <- first_end_idx_rolling - w_rolling + 1
rolling_results <- data.frame(
  step = 1:num_rolling_steps,
  roll_est_start = rep(as.yearqtr(NA), num_rolling_steps),
  roll_est_end   = rep(as.yearqtr(NA), num_rolling_steps),
  roll_fc_1q     = rep(NA, num_rolling_steps),
  roll_fc_2q     = rep(NA, num_rolling_steps),
  roll_actual_1q = rep(NA, num_rolling_steps),
  roll_actual_2q = rep(NA, num_rolling_steps)
)

for (i in 0:(num_rolling_steps - 1)) {
  est_start_idx <- first_start_idx_rolling + i
  est_end_idx <- est_start_idx + w_rolling - 1
  fc1_idx <- est_end_idx + 1
  fc2_idx <- est_end_idx + 2
  
  if (fc2_idx > length(ygr_ts)) break
  
  # Define training window
  train_roll <- window(ygr_ts,
                       start = time(ygr_ts)[est_start_idx],
                       end = time(ygr_ts)[est_end_idx])
  
  # Fit AR(1) model
  fit_roll <- Arima(train_roll, order = c(1, 0, 0), include.mean = TRUE)
  fc_roll <- forecast(fit_roll, h = h_rolling)
  
  # Store results
  rolling_results$roll_est_start[i + 1] <- format(as.yearqtr(time(ygr_ts)[est_start_idx]), "%Y Q%q")
  rolling_results$roll_est_end[i + 1] <- format(as.yearqtr(time(ygr_ts)[est_end_idx]), "%Y Q%q")
  rolling_results$roll_fc_1q[i + 1] <- fc_roll$mean[1]
  rolling_results$roll_fc_2q[i + 1] <- fc_roll$mean[2]
  rolling_results$roll_actual_1q[i + 1] <- ygr_ts[fc1_idx]
  rolling_results$roll_actual_2q[i + 1] <- ygr_ts[fc2_idx]
}

head(rolling_results,10)
##    step roll_est_start roll_est_end roll_fc_1q roll_fc_2q roll_actual_1q
## 1     1        2003 Q1      2008 Q4   4.397887   5.785819      0.1617662
## 2     2        2003 Q2      2009 Q1   2.126751   3.538373      4.8035954
## 3     3        2003 Q3      2009 Q2   6.054252   6.795831      6.8709013
## 4     4        2003 Q4      2009 Q3   7.358726   7.672643      8.6272139
## 5     5        2004 Q1      2009 Q4   8.400656   8.259147     12.9503055
## 6     6        2004 Q2      2010 Q1  11.316328  10.276493     10.2070993
## 7     7        2004 Q3      2010 Q2   9.476146   9.030656      9.8651070
## 8     8        2004 Q4      2010 Q3   9.264278   8.872106     10.8953314
## 9     9        2005 Q1      2010 Q4  10.203125   9.730119      9.8889929
## 10   10        2005 Q2      2011 Q1   9.499716   9.235150      8.4387686
##    roll_actual_2q
## 1        4.803595
## 2        6.870901
## 3        8.627214
## 4       12.950306
## 5       10.207099
## 6        9.865107
## 7       10.895331
## 8        9.888993
## 9        8.438769
## 10       6.965544
rolling_results$roll_err_1q <- rolling_results$roll_fc_1q - rolling_results$roll_actual_1q
rolling_results$roll_err_2q <- rolling_results$roll_fc_2q - rolling_results$roll_actual_2q

rmse_roll_1q <- sqrt(mean(rolling_results$roll_err_1q^2, na.rm = TRUE))
mae_roll_2q <- mean(abs(rolling_results$roll_err_2q), na.rm = TRUE)

cat("MAE for AR(1), Horizon = 2 quarters (half-year ahead):", round(mae_roll_2q, 4), "\n") 
## MAE for AR(1), Horizon = 2 quarters (half-year ahead): 1.7112
cat("RMSE for AR(1), Horizon = 1 quarter (3 months ahead):", round(rmse_roll_1q, 4), "\n")
## RMSE for AR(1), Horizon = 1 quarter (3 months ahead): 1.7872
roll_end_qtr <- "2014 Q3"
roll_end_idx <- which(as.character(as.yearqtr(time(ygr_ts))) == roll_end_qtr)

roll_start_idx <- roll_end_idx - 23  # 24 quarters total
roll_start_qtr <- time(ygr_ts)[roll_start_idx]

train_roll_2014 <- window(ygr_ts,
                          start = roll_start_qtr,
                          end = time(ygr_ts)[roll_end_idx])

fit_roll_2014 <- Arima(train_roll_2014, order = c(1, 0, 0), include.mean = TRUE)

fc_2020q4 <- forecast(fit_roll_2014, h = 25)

print(fc_2020q4)
##         Point Forecast    Lo 80    Hi 80       Lo 95     Hi 95
## 2014 Q4       5.972573 3.730517 8.214630  2.54364385  9.401503
## 2015 Q1       5.935326 3.043136 8.827516  1.51210306 10.358549
## 2015 Q2       5.904974 2.652090 9.157857  0.93011780 10.879830
## 2015 Q3       5.880240 2.408489 9.351992  0.57065433 11.189827
## 2015 Q4       5.860086 2.250324 9.469848  0.33943107 11.380740
## 2016 Q1       5.843662 2.145101 9.542223  0.18720171 11.500123
## 2016 Q2       5.830279 2.073913 9.586645  0.08541308 11.575145
## 2016 Q3       5.819373 2.025110 9.613637  0.01654780 11.622199
## 2016 Q4       5.810486 1.991265 9.629707 -0.03050806 11.651481
## 2017 Q1       5.803245 1.967541 9.638948 -0.06295784 11.669447
## 2017 Q2       5.797344 1.950734 9.643953 -0.08553813 11.680225
## 2017 Q3       5.792535 1.938700 9.646369 -0.10139618 11.686466
## 2017 Q4       5.788616 1.929992 9.647241 -0.11264033 11.689873
## 2018 Q1       5.785423 1.923621 9.647225 -0.12069285 11.691539
## 2018 Q2       5.782821 1.918911 9.646731 -0.12651942 11.692162
## 2018 Q3       5.780701 1.915391 9.646010 -0.13077997 11.692182
## 2018 Q4       5.778973 1.912735 9.645212 -0.13392851 11.691875
## 2019 Q1       5.777565 1.910710 9.644420 -0.13627968 11.691410
## 2019 Q2       5.776418 1.909153 9.643683 -0.13805323 11.690889
## 2019 Q3       5.775483 1.907946 9.643020 -0.13940396 11.690370
## 2019 Q4       5.774721 1.907004 9.642438 -0.14044189 11.689884
## 2020 Q1       5.774100 1.906263 9.641937 -0.14124602 11.689447
## 2020 Q2       5.773594 1.905678 9.641511 -0.14187364 11.689062
## 2020 Q3       5.773182 1.905213 9.641152 -0.14236670 11.688731
## 2020 Q4       5.772846 1.904842 9.640851 -0.14275630 11.688449
forecast_2020q4 <- as.numeric(fc_2020q4$mean[25])

cat("Rolling Forecast for 2020 Q4 (AR(1), trained on 2008 Q4–2014 Q3):",
    round(forecast_2020q4, 4), "\n")
## Rolling Forecast for 2020 Q4 (AR(1), trained on 2008 Q4–2014 Q3): 5.7728



Computing bias, MSE, RMSE, SE, MAE, MAPE

compute_metrics <- function(error, actual) {
  bias  <- mean(error, na.rm = TRUE)
  mse   <- mean(error^2, na.rm = TRUE)
  rmse  <- sqrt(mse)
  se    <- sd(error, na.rm = TRUE)
  mae   <- mean(abs(error), na.rm = TRUE)
  mape  <- mean(abs(error / actual), na.rm = TRUE) * 100
  return(c(Bias = bias, MSE = mse, RMSE = rmse, SE = se, MAE = mae, MAPE = mape))
}

exp_1q_metrics <- compute_metrics(results$error_1q, results$actual_1q)
exp_2q_metrics <- compute_metrics(results$error_2q, results$actual_2q)
roll_1q_metrics <- compute_metrics(rolling_results$roll_err_1q, rolling_results$roll_actual_1q)
roll_2q_metrics <- compute_metrics(rolling_results$roll_err_2q, rolling_results$roll_actual_2q)

comparison_table <- rbind(
  Expanding_1Q_ahead = exp_1q_metrics,
  Expanding_2Q_ahead = exp_2q_metrics,
  Rolling_1Q_ahead = roll_1q_metrics,
  Rolling_2Q_ahead = roll_2q_metrics
)

print(round(comparison_table, 4))
##                      Bias    MSE   RMSE     SE    MAE     MAPE
## Expanding_1Q_ahead 0.1395 3.4669 1.8620 1.9026 1.4325 143.7539
## Expanding_2Q_ahead 0.0881 4.8502 2.2023 2.2549 1.7955  27.4563
## Rolling_1Q_ahead   0.2026 3.1940 1.7872 1.8195 1.3711 142.4641
## Rolling_2Q_ahead   0.1970 4.2660 2.0654 2.1068 1.7112  26.2147