A Vector Autoregression (VAR) is a statistical model used to capture
the dynamic relationships among multiple time series variables. Each
variable in a VAR is explained by its own past values and the past
values of all other variables in the system.
The reduced-form VAR
model can be written as:
X_t = A₀ + A₁ * X_{t−1} + A₂ * X_{t−2} + …
+ A_p * X_{t−p} + u_t -> Shocks Correlated with u_t (Purely
statistical)
Note:
- When variables are non-stationary
and not cointegrated, difference the series or use a VECM if they are
cointegrated.
- When you have too many variables and too few
observations, use Bayesian VAR (BVAR) or reduce
variables/dimensions.
- When you care about structural
relationships, use Structural VARs (SVARs) or DSGE models for structural
interpretation.
library(vars)
library(tidyverse)
library(zoo)
library(readr)
library(mFilter)
data <- read.csv("M5_Vector_Autoregression/MFx Module 5, Part I VAR Workfile/MFx_Module5-Part I_VAR_Workfile_CAN.csv")
data$date <- as.Date(data$dateid01)
data$date <- as.yearqtr(data$date, format = "%YQ%q")
For GDP
data$lgdp <- log(data$gdp)
hp_lgdp <- hpfilter(data$lgdp, freq = 1600, type = "lambda")
data$lgdp_gap <- hp_lgdp$cycle
data$lgdp_trend <- hp_lgdp$trend
ggplot(data, aes(x = date)) +
geom_line(aes(y = lgdp, color = "Log GDP")) +
geom_line(aes(y = lgdp_trend, color = "Trend")) +
labs(title = "Log GDP and HP Filtered Trend", y = "log(GDP)", x = "Date") +
scale_color_manual(values = c("Log GDP" = "black", "Trend" = "red")) +
theme_minimal()
ggplot(data, aes(x = date, y = lgdp_gap)) +
geom_line(color = "steelblue", linewidth = 1) +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(title = "Output Gap (Cyclical Component of Log GDP)",
y = "Log GDP Gap", x = "Date") +
theme_minimal()
For Real Exchange Rate
hp_rer <- hpfilter(data$rer, freq = 1600, type = "lambda")
data$rer_gap <- hp_rer$cycle
data$rer_trend <- hp_rer$trend
ggplot(data, aes(x = date)) +
geom_line(aes(y = rer, color = "RER")) +
geom_line(aes(y = rer_trend, color = "Trend")) +
labs(title = "Real Exchange Rate and HP Filtered Trend",
y = "Real Exchange Rate", x = "Date") +
scale_color_manual(values = c("RER" = "black", "Trend" = "red")) +
theme_minimal()
ggplot(data, aes(x = date, y = rer_gap)) +
geom_line(color = "darkgreen", linewidth = 1) +
geom_hline(yintercept = 0, linetype = "dashed") +
labs(title = "Real Exchange Rate Gap (Cyclical Component)",
y = "RER Gap", x = "Date") +
theme_minimal()
Note: If all roots of the characteristic polynomial are < 1
→ VAR is stable and stationary.
var_data <- data %>%
filter(date >= as.yearqtr("1993 Q1") & date <= as.yearqtr("2012 Q4")) %>%
dplyr::select(date, lgdp_gap, rer_gap, infl, mpr) %>%
filter(complete.cases(.))
ts_data <- ts(var_data[, -1], start = c(1993, 1), frequency = 4)
var_model <- VAR(ts_data, p = 4, type = "const")
summary(var_model)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: lgdp_gap, rer_gap, infl, mpr
## Deterministic variables: const
## Sample size: 72
## Log Likelihood: 38.545
## Roots of the characteristic polynomial:
## 0.8493 0.8493 0.8295 0.8295 0.8208 0.8208 0.7894 0.7894 0.7812 0.7812 0.7007 0.7007 0.6531 0.6531 0.3589 0.3589
## Call:
## VAR(y = ts_data, p = 4, type = "const")
##
##
## Estimation results for equation lgdp_gap:
## =========================================
## lgdp_gap = lgdp_gap.l1 + rer_gap.l1 + infl.l1 + mpr.l1 + lgdp_gap.l2 + rer_gap.l2 + infl.l2 + mpr.l2 + lgdp_gap.l3 + rer_gap.l3 + infl.l3 + mpr.l3 + lgdp_gap.l4 + rer_gap.l4 + infl.l4 + mpr.l4 + const
##
## Estimate Std. Error t value Pr(>|t|)
## lgdp_gap.l1 1.447e+00 1.628e-01 8.887 3.21e-12 ***
## rer_gap.l1 4.969e-04 4.516e-04 1.100 0.27602
## infl.l1 -4.841e-03 2.337e-03 -2.071 0.04305 *
## mpr.l1 5.905e-04 2.595e-03 0.228 0.82080
## lgdp_gap.l2 -8.121e-01 2.353e-01 -3.451 0.00108 **
## rer_gap.l2 -3.481e-04 6.459e-04 -0.539 0.59216
## infl.l2 1.734e-03 2.524e-03 0.687 0.49500
## mpr.l2 1.528e-03 4.308e-03 0.355 0.72415
## lgdp_gap.l3 1.637e-01 2.466e-01 0.664 0.50963
## rer_gap.l3 6.702e-04 6.434e-04 1.042 0.30218
## infl.l3 2.491e-03 2.479e-03 1.005 0.31952
## mpr.l3 -2.969e-05 3.836e-03 -0.008 0.99385
## lgdp_gap.l4 -4.431e-02 1.745e-01 -0.254 0.80050
## rer_gap.l4 -7.005e-04 4.779e-04 -1.466 0.14836
## infl.l4 -7.471e-04 2.051e-03 -0.364 0.71704
## mpr.l4 -1.454e-03 2.140e-03 -0.679 0.49987
## const 5.934e-04 5.452e-03 0.109 0.91372
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.008636 on 55 degrees of freedom
## Multiple R-Squared: 0.8601, Adjusted R-squared: 0.8194
## F-statistic: 21.13 on 16 and 55 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation rer_gap:
## ========================================
## rer_gap = lgdp_gap.l1 + rer_gap.l1 + infl.l1 + mpr.l1 + lgdp_gap.l2 + rer_gap.l2 + infl.l2 + mpr.l2 + lgdp_gap.l3 + rer_gap.l3 + infl.l3 + mpr.l3 + lgdp_gap.l4 + rer_gap.l4 + infl.l4 + mpr.l4 + const
##
## Estimate Std. Error t value Pr(>|t|)
## lgdp_gap.l1 89.68895 50.59237 1.773 0.0818 .
## rer_gap.l1 0.96572 0.14038 6.879 5.94e-09 ***
## infl.l1 -0.55858 0.72648 -0.769 0.4452
## mpr.l1 -0.17655 0.80647 -0.219 0.8275
## lgdp_gap.l2 -134.01347 73.14267 -1.832 0.0723 .
## rer_gap.l2 -0.44922 0.20077 -2.237 0.0293 *
## infl.l2 0.70430 0.78452 0.898 0.3732
## mpr.l2 1.23516 1.33905 0.922 0.3603
## lgdp_gap.l3 24.33927 76.65158 0.318 0.7520
## rer_gap.l3 0.13290 0.19999 0.665 0.5091
## infl.l3 0.60334 0.77062 0.783 0.4370
## mpr.l3 -1.18229 1.19245 -0.991 0.3258
## lgdp_gap.l4 7.53485 54.23418 0.139 0.8900
## rer_gap.l4 -0.05594 0.14853 -0.377 0.7079
## infl.l4 -0.71940 0.63746 -1.129 0.2640
## mpr.l4 0.11275 0.66525 0.169 0.8660
## const 0.12526 1.69449 0.074 0.9413
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 2.684 on 55 degrees of freedom
## Multiple R-Squared: 0.6974, Adjusted R-squared: 0.6093
## F-statistic: 7.921 on 16 and 55 DF, p-value: 2.866e-09
##
##
## Estimation results for equation infl:
## =====================================
## infl = lgdp_gap.l1 + rer_gap.l1 + infl.l1 + mpr.l1 + lgdp_gap.l2 + rer_gap.l2 + infl.l2 + mpr.l2 + lgdp_gap.l3 + rer_gap.l3 + infl.l3 + mpr.l3 + lgdp_gap.l4 + rer_gap.l4 + infl.l4 + mpr.l4 + const
##
## Estimate Std. Error t value Pr(>|t|)
## lgdp_gap.l1 47.78657 9.24282 5.170 3.37e-06 ***
## rer_gap.l1 -0.08500 0.02565 -3.314 0.001629 **
## infl.l1 0.21963 0.13272 1.655 0.103657
## mpr.l1 -0.09943 0.14734 -0.675 0.502587
## lgdp_gap.l2 -21.03914 13.36257 -1.574 0.121111
## rer_gap.l2 0.01791 0.03668 0.488 0.627208
## infl.l2 0.04707 0.14333 0.328 0.743833
## mpr.l2 0.23317 0.24463 0.953 0.344689
## lgdp_gap.l3 -3.76776 14.00362 -0.269 0.788893
## rer_gap.l3 -0.01454 0.03654 -0.398 0.692218
## infl.l3 0.30707 0.14079 2.181 0.033470 *
## mpr.l3 -0.21929 0.21785 -1.007 0.318532
## lgdp_gap.l4 -13.76148 9.90814 -1.389 0.170460
## rer_gap.l4 -0.01607 0.02714 -0.592 0.556090
## infl.l4 -0.22856 0.11646 -1.963 0.054764 .
## mpr.l4 0.10282 0.12154 0.846 0.401196
## const 1.24027 0.30957 4.006 0.000187 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.4904 on 55 degrees of freedom
## Multiple R-Squared: 0.7452, Adjusted R-squared: 0.6711
## F-statistic: 10.05 on 16 and 55 DF, p-value: 3.923e-11
##
##
## Estimation results for equation mpr:
## ====================================
## mpr = lgdp_gap.l1 + rer_gap.l1 + infl.l1 + mpr.l1 + lgdp_gap.l2 + rer_gap.l2 + infl.l2 + mpr.l2 + lgdp_gap.l3 + rer_gap.l3 + infl.l3 + mpr.l3 + lgdp_gap.l4 + rer_gap.l4 + infl.l4 + mpr.l4 + const
##
## Estimate Std. Error t value Pr(>|t|)
## lgdp_gap.l1 14.110452 7.290201 1.936 0.058072 .
## rer_gap.l1 0.003901 0.020228 0.193 0.847789
## infl.l1 -0.254183 0.104683 -2.428 0.018476 *
## mpr.l1 1.343532 0.116209 11.561 2.4e-16 ***
## lgdp_gap.l2 -2.428290 10.539628 -0.230 0.818639
## rer_gap.l2 -0.050024 0.028930 -1.729 0.089395 .
## infl.l2 -0.035159 0.113047 -0.311 0.756968
## mpr.l2 -0.596052 0.192953 -3.089 0.003146 **
## lgdp_gap.l3 -2.546084 11.045251 -0.231 0.818548
## rer_gap.l3 0.002923 0.028819 0.101 0.919578
## infl.l3 0.113665 0.111044 1.024 0.310502
## mpr.l3 0.546086 0.171828 3.178 0.002433 **
## lgdp_gap.l4 -12.000790 7.814974 -1.536 0.130366
## rer_gap.l4 0.023868 0.021403 1.115 0.269619
## infl.l4 0.013647 0.091856 0.149 0.882440
## mpr.l4 -0.348872 0.095861 -3.639 0.000605 ***
## const 0.473354 0.244171 1.939 0.057686 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.3868 on 55 degrees of freedom
## Multiple R-Squared: 0.9645, Adjusted R-squared: 0.9542
## F-statistic: 93.46 on 16 and 55 DF, p-value: < 2.2e-16
##
##
##
## Covariance matrix of residuals:
## lgdp_gap rer_gap infl mpr
## lgdp_gap 7.458e-05 0.007061 0.002200 0.0004499
## rer_gap 7.061e-03 7.205575 0.007083 0.0753016
## infl 2.200e-03 0.007083 0.240496 -0.0121156
## mpr 4.499e-04 0.075302 -0.012116 0.1496158
##
## Correlation matrix of residuals:
## lgdp_gap rer_gap infl mpr
## lgdp_gap 1.0000 0.30457 0.51948 0.13468
## rer_gap 0.3046 1.00000 0.00538 0.07252
## infl 0.5195 0.00538 1.00000 -0.06387
## mpr 0.1347 0.07252 -0.06387 1.00000
lag_selection <- VARselect(ts_data, lag.max = 8, type = "const")
print(lag_selection)
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 3 2 2 3
##
## $criteria
## 1 2 3 4 5
## AIC(n) -1.007545e+01 -1.095356e+01 -1.102013e+01 -1.092064e+01 -1.075926e+01
## HQ(n) -9.816791e+00 -1.048798e+01 -1.034762e+01 -1.004120e+01 -9.672901e+00
## SC(n) -9.422652e+00 -9.778527e+00 -9.322860e+00 -8.701134e+00 -8.017519e+00
## FPE(n) 4.214540e-05 1.760528e-05 1.668370e-05 1.888128e-05 2.309494e-05
## 6 7 8
## AIC(n) -1.057830e+01 -1.067131e+01 -1.092060e+01
## HQ(n) -9.285010e+00 -9.171097e+00 -9.213454e+00
## SC(n) -7.314318e+00 -6.885094e+00 -6.612140e+00
## FPE(n) 2.941873e-05 2.928367e-05 2.583297e-05
Comparing the lag criteria, a lag length of 2 (which is the lowest in the lag_selection) proves to be the best fit.
# Estimate 1-lag VAR
var_model1 <- VAR(ts_data, p = 1, type = "const")
# Estimate 2-lag VAR
var_model2 <- VAR(ts_data, p = 2, type = "const") #As per lowest lag_selection
lag_1 <- serial.test(var_model1, lags.pt = 12, type = "PT.asymptotic")
lag_2 <- serial.test(var_model2, lags.pt = 12, type = "PT.asymptotic")
lag_4 <- serial.test(var_model, lags.pt = 12, type = "PT.asymptotic")
summary_table <- data.frame(
Model = c("VAR(1)", "VAR(2)", "VAR(4)"),
Lags = c(1, 2, 4),
Chi_squared = c(lag_1$serial$statistic, lag_2$serial$statistic, lag_4$serial$statistic),
DF = c(lag_1$serial$parameter, lag_2$serial$parameter, lag_4$serial$parameter),
P_value = c(lag_1$serial$p.value, lag_2$serial$p.value, lag_4$serial$p.value)
)
print(summary_table)
## Model Lags Chi_squared DF P_value
## 1 VAR(1) 1 212.2999 176 0.03210192
## 2 VAR(2) 2 164.0685 160 0.39637665
## 3 VAR(4) 4 159.7747 128 0.02986308
library(MTS)
forecast_horizon <- 8
forecast_var <- predict(var_model2, n.ahead = forecast_horizon, ci = 0.95)
quarter_labels <- paste(
rep(2013:2014, each = 4),
rep(paste0("Q", 1:4), times = 2)
)
# Function to build one forecast dataframe
get_forecast_df <- function(varname) {
df <- as.data.frame(forecast_var$fcst[[varname]][, 1:3])
colnames(df) <- c("Forecast", "Lower_95", "Upper_95")
df$Quarter <- quarter_labels
df <- df[, c("Quarter", "Forecast", "Lower_95", "Upper_95")]
return(df)
}
# dataframe per variable
lgdp_forecast <- get_forecast_df("lgdp_gap")
rer_forecast <- get_forecast_df("rer_gap")
infl_forecast <- get_forecast_df("infl")
mpr_forecast <- get_forecast_df("mpr")
print(lgdp_forecast)
## Quarter Forecast Lower_95 Upper_95
## 1 2013 Q1 -0.001433666 -0.01813176 0.01526443
## 2 2013 Q2 -0.001605072 -0.02905950 0.02584936
## 3 2013 Q3 -0.003529385 -0.03716367 0.03010490
## 4 2013 Q4 -0.006715405 -0.04352778 0.03009697
## 5 2014 Q1 -0.009542119 -0.04778571 0.02870148
## 6 2014 Q2 -0.010750412 -0.04963323 0.02813240
## 7 2014 Q3 -0.010064346 -0.04951148 0.02938279
## 8 2014 Q4 -0.008026854 -0.04816171 0.03210800
print(rer_forecast)
## Quarter Forecast Lower_95 Upper_95
## 1 2013 Q1 1.88506771 -3.178393 6.948528
## 2 2013 Q2 -0.20967253 -7.428769 7.009424
## 3 2013 Q3 -1.40308378 -9.352383 6.546215
## 4 2013 Q4 -1.75603426 -9.847076 6.335008
## 5 2014 Q1 -1.54853572 -9.740563 6.643491
## 6 2014 Q2 -1.04508858 -9.427562 7.337385
## 7 2014 Q3 -0.45293143 -9.033161 8.127298
## 8 2014 Q4 0.07549147 -8.633708 8.784691
print(infl_forecast)
## Quarter Forecast Lower_95 Upper_95
## 1 2013 Q1 1.099815 -0.05079331 2.250423
## 2 2013 Q2 1.381828 -0.17728478 2.940940
## 3 2013 Q3 1.604991 -0.11729321 3.327274
## 4 2013 Q4 1.691276 -0.07314624 3.455698
## 5 2014 Q1 1.688544 -0.09108533 3.468174
## 6 2014 Q2 1.675163 -0.11977709 3.470103
## 7 2014 Q3 1.697965 -0.11514146 3.511071
## 8 2014 Q4 1.760712 -0.07234263 3.593766
print(mpr_forecast)
## Quarter Forecast Lower_95 Upper_95
## 1 2013 Q1 1.141501 0.2488894 2.034113
## 2 2013 Q2 1.285519 -0.1773365 2.748375
## 3 2013 Q3 1.422391 -0.4766295 3.321412
## 4 2013 Q4 1.539580 -0.6780751 3.757235
## 5 2014 Q1 1.635219 -0.8118151 4.082254
## 6 2014 Q2 1.722411 -0.8937767 4.338599
## 7 2014 Q3 1.815382 -0.9291720 4.559936
## 8 2014 Q4 1.917774 -0.9258196 4.761367
historical_data <- data[data$date >= as.yearqtr("2010 Q1") & data$date <= as.yearqtr("2014 Q4"), ]
# Create Quarter labels
historical_data$Quarter <- paste(format(historical_data$date, "%Y"),
paste0("Q", as.numeric(cycle(historical_data$date))))
forecast_quarters <- paste(rep(2013:2014, each = 4), paste0("Q", 1:4))
plot_spliced_series <- function(forecast_df, historical_values, full_quarters, forecast_quarters, varname) {
# Format forecast
forecast_df$Type <- "Forecast"
forecast_df <- forecast_df[, c("Quarter", "Forecast", "Type")]
colnames(forecast_df) <- c("Quarter", "Value", "Type")
actual_df <- data.frame(
Quarter = full_quarters,
Value = historical_values,
Type = "Actual"
)
full_series <- rbind(actual_df, forecast_df)
full_series$Quarter <- factor(full_series$Quarter, levels = unique(c(full_quarters, forecast_quarters)))
ggplot(full_series, aes(x = Quarter, y = Value, group = Type, color = Type)) +
geom_line(size = 1.2) +
geom_vline(xintercept = "2013 Q1", linetype = "dashed", color = "gray50") +
scale_color_manual(values = c("Actual" = "black", "Forecast" = "red")) +
labs(title = paste("Historical + Forecast:", varname),
x = "Quarter", y = varname, color = "") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(size = 14, face = "bold"))
}
full_quarters <- historical_data$Quarter
print(plot_spliced_series(lgdp_forecast, historical_data$lgdp_gap, full_quarters, forecast_quarters, "lgdp_gap"))
print(plot_spliced_series(rer_forecast, historical_data$rer_gap, full_quarters, forecast_quarters, "rer_gap"))
print(plot_spliced_series(infl_forecast, historical_data$infl, full_quarters, forecast_quarters, "infl"))
print(plot_spliced_series(mpr_forecast, historical_data$mpr, full_quarters, forecast_quarters, "mpr"))
Conclusions: The forecast results from the VAR model show
notable deviations from the actual data over the forecast horizon
(2013Q1 to 2014Q4). For instance, the model consistently overpredicts
the monetary policy rate, projecting a steady increase while the actual
rate remained flat at 1%. Similarly, it underpredicts the output gap,
especially in 2014, and shows a mismatch in the real exchange rate
gap—underpredicting in 2013 and overpredicting in 2014. While inflation
forecasts appear somewhat closer, the model still misses key
fluctuations i.e. consider SVAR.