Vector Autoregression

A Vector Autoregression (VAR) is a statistical model used to capture the dynamic relationships among multiple time series variables. Each variable in a VAR is explained by its own past values and the past values of all other variables in the system.
The reduced-form VAR model can be written as:
X_t = A₀ + A₁ * X_{t−1} + A₂ * X_{t−2} + … + A_p * X_{t−p} + u_t -> Shocks Correlated with u_t (Purely statistical)

Note:
- When variables are non-stationary and not cointegrated, difference the series or use a VECM if they are cointegrated.
- When you have too many variables and too few observations, use Bayesian VAR (BVAR) or reduce variables/dimensions.
- When you care about structural relationships, use Structural VARs (SVARs) or DSGE models for structural interpretation.

library(vars)
library(tidyverse)
library(zoo)
library(readr)
library(mFilter)
data <- read.csv("M5_Vector_Autoregression/MFx Module 5, Part I VAR Workfile/MFx_Module5-Part I_VAR_Workfile_CAN.csv")

data$date <- as.Date(data$dateid01) 
data$date <- as.yearqtr(data$date, format = "%YQ%q")


Hodrick-Prescott (HP) filter (lambda=1600) to estimate trend and cyclical component.

For GDP

data$lgdp <- log(data$gdp)
hp_lgdp <- hpfilter(data$lgdp, freq = 1600, type = "lambda")
data$lgdp_gap <- hp_lgdp$cycle
data$lgdp_trend <- hp_lgdp$trend


Plot trend and cyclical component.

  • Peaks above zero: GDP was running above its long-run potential (tight economy, possibly inflationary pressure).
  • Troughs below zero: Economic slack — underutilization of resources (common after recessions, like the 2008 crisis dip).
ggplot(data, aes(x = date)) +
  geom_line(aes(y = lgdp, color = "Log GDP")) +
  geom_line(aes(y = lgdp_trend, color = "Trend")) +
  labs(title = "Log GDP and HP Filtered Trend", y = "log(GDP)", x = "Date") +
  scale_color_manual(values = c("Log GDP" = "black", "Trend" = "red")) +
  theme_minimal()

ggplot(data, aes(x = date, y = lgdp_gap)) +
  geom_line(color = "steelblue", linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(title = "Output Gap (Cyclical Component of Log GDP)",
       y = "Log GDP Gap", x = "Date") +
  theme_minimal()


For Real Exchange Rate

hp_rer <- hpfilter(data$rer, freq = 1600, type = "lambda")
data$rer_gap <- hp_rer$cycle
data$rer_trend <- hp_rer$trend


Plot trend and cyclical component.

  • Above zero: RER is above trend — possibly overvalued currency.
  • Below zero: RER is undervalued — exports more competitive.
ggplot(data, aes(x = date)) +
  geom_line(aes(y = rer, color = "RER")) +
  geom_line(aes(y = rer_trend, color = "Trend")) +
  labs(title = "Real Exchange Rate and HP Filtered Trend",
       y = "Real Exchange Rate", x = "Date") +
  scale_color_manual(values = c("RER" = "black", "Trend" = "red")) +
  theme_minimal()

ggplot(data, aes(x = date, y = rer_gap)) +
  geom_line(color = "darkgreen", linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(title = "Real Exchange Rate Gap (Cyclical Component)",
       y = "RER Gap", x = "Date") +
  theme_minimal()



Estimate VARs


Note: If all roots of the characteristic polynomial are < 1 → VAR is stable and stationary.

var_data <- data %>%
  filter(date >= as.yearqtr("1993 Q1") & date <= as.yearqtr("2012 Q4")) %>%
  dplyr::select(date, lgdp_gap, rer_gap, infl, mpr) %>%
  filter(complete.cases(.))

ts_data <- ts(var_data[, -1], start = c(1993, 1), frequency = 4)

var_model <- VAR(ts_data, p = 4, type = "const") 
summary(var_model)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: lgdp_gap, rer_gap, infl, mpr 
## Deterministic variables: const 
## Sample size: 72 
## Log Likelihood: 38.545 
## Roots of the characteristic polynomial:
## 0.8493 0.8493 0.8295 0.8295 0.8208 0.8208 0.7894 0.7894 0.7812 0.7812 0.7007 0.7007 0.6531 0.6531 0.3589 0.3589
## Call:
## VAR(y = ts_data, p = 4, type = "const")
## 
## 
## Estimation results for equation lgdp_gap: 
## ========================================= 
## lgdp_gap = lgdp_gap.l1 + rer_gap.l1 + infl.l1 + mpr.l1 + lgdp_gap.l2 + rer_gap.l2 + infl.l2 + mpr.l2 + lgdp_gap.l3 + rer_gap.l3 + infl.l3 + mpr.l3 + lgdp_gap.l4 + rer_gap.l4 + infl.l4 + mpr.l4 + const 
## 
##               Estimate Std. Error t value Pr(>|t|)    
## lgdp_gap.l1  1.447e+00  1.628e-01   8.887 3.21e-12 ***
## rer_gap.l1   4.969e-04  4.516e-04   1.100  0.27602    
## infl.l1     -4.841e-03  2.337e-03  -2.071  0.04305 *  
## mpr.l1       5.905e-04  2.595e-03   0.228  0.82080    
## lgdp_gap.l2 -8.121e-01  2.353e-01  -3.451  0.00108 ** 
## rer_gap.l2  -3.481e-04  6.459e-04  -0.539  0.59216    
## infl.l2      1.734e-03  2.524e-03   0.687  0.49500    
## mpr.l2       1.528e-03  4.308e-03   0.355  0.72415    
## lgdp_gap.l3  1.637e-01  2.466e-01   0.664  0.50963    
## rer_gap.l3   6.702e-04  6.434e-04   1.042  0.30218    
## infl.l3      2.491e-03  2.479e-03   1.005  0.31952    
## mpr.l3      -2.969e-05  3.836e-03  -0.008  0.99385    
## lgdp_gap.l4 -4.431e-02  1.745e-01  -0.254  0.80050    
## rer_gap.l4  -7.005e-04  4.779e-04  -1.466  0.14836    
## infl.l4     -7.471e-04  2.051e-03  -0.364  0.71704    
## mpr.l4      -1.454e-03  2.140e-03  -0.679  0.49987    
## const        5.934e-04  5.452e-03   0.109  0.91372    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.008636 on 55 degrees of freedom
## Multiple R-Squared: 0.8601,  Adjusted R-squared: 0.8194 
## F-statistic: 21.13 on 16 and 55 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation rer_gap: 
## ======================================== 
## rer_gap = lgdp_gap.l1 + rer_gap.l1 + infl.l1 + mpr.l1 + lgdp_gap.l2 + rer_gap.l2 + infl.l2 + mpr.l2 + lgdp_gap.l3 + rer_gap.l3 + infl.l3 + mpr.l3 + lgdp_gap.l4 + rer_gap.l4 + infl.l4 + mpr.l4 + const 
## 
##               Estimate Std. Error t value Pr(>|t|)    
## lgdp_gap.l1   89.68895   50.59237   1.773   0.0818 .  
## rer_gap.l1     0.96572    0.14038   6.879 5.94e-09 ***
## infl.l1       -0.55858    0.72648  -0.769   0.4452    
## mpr.l1        -0.17655    0.80647  -0.219   0.8275    
## lgdp_gap.l2 -134.01347   73.14267  -1.832   0.0723 .  
## rer_gap.l2    -0.44922    0.20077  -2.237   0.0293 *  
## infl.l2        0.70430    0.78452   0.898   0.3732    
## mpr.l2         1.23516    1.33905   0.922   0.3603    
## lgdp_gap.l3   24.33927   76.65158   0.318   0.7520    
## rer_gap.l3     0.13290    0.19999   0.665   0.5091    
## infl.l3        0.60334    0.77062   0.783   0.4370    
## mpr.l3        -1.18229    1.19245  -0.991   0.3258    
## lgdp_gap.l4    7.53485   54.23418   0.139   0.8900    
## rer_gap.l4    -0.05594    0.14853  -0.377   0.7079    
## infl.l4       -0.71940    0.63746  -1.129   0.2640    
## mpr.l4         0.11275    0.66525   0.169   0.8660    
## const          0.12526    1.69449   0.074   0.9413    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 2.684 on 55 degrees of freedom
## Multiple R-Squared: 0.6974,  Adjusted R-squared: 0.6093 
## F-statistic: 7.921 on 16 and 55 DF,  p-value: 2.866e-09 
## 
## 
## Estimation results for equation infl: 
## ===================================== 
## infl = lgdp_gap.l1 + rer_gap.l1 + infl.l1 + mpr.l1 + lgdp_gap.l2 + rer_gap.l2 + infl.l2 + mpr.l2 + lgdp_gap.l3 + rer_gap.l3 + infl.l3 + mpr.l3 + lgdp_gap.l4 + rer_gap.l4 + infl.l4 + mpr.l4 + const 
## 
##              Estimate Std. Error t value Pr(>|t|)    
## lgdp_gap.l1  47.78657    9.24282   5.170 3.37e-06 ***
## rer_gap.l1   -0.08500    0.02565  -3.314 0.001629 ** 
## infl.l1       0.21963    0.13272   1.655 0.103657    
## mpr.l1       -0.09943    0.14734  -0.675 0.502587    
## lgdp_gap.l2 -21.03914   13.36257  -1.574 0.121111    
## rer_gap.l2    0.01791    0.03668   0.488 0.627208    
## infl.l2       0.04707    0.14333   0.328 0.743833    
## mpr.l2        0.23317    0.24463   0.953 0.344689    
## lgdp_gap.l3  -3.76776   14.00362  -0.269 0.788893    
## rer_gap.l3   -0.01454    0.03654  -0.398 0.692218    
## infl.l3       0.30707    0.14079   2.181 0.033470 *  
## mpr.l3       -0.21929    0.21785  -1.007 0.318532    
## lgdp_gap.l4 -13.76148    9.90814  -1.389 0.170460    
## rer_gap.l4   -0.01607    0.02714  -0.592 0.556090    
## infl.l4      -0.22856    0.11646  -1.963 0.054764 .  
## mpr.l4        0.10282    0.12154   0.846 0.401196    
## const         1.24027    0.30957   4.006 0.000187 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.4904 on 55 degrees of freedom
## Multiple R-Squared: 0.7452,  Adjusted R-squared: 0.6711 
## F-statistic: 10.05 on 16 and 55 DF,  p-value: 3.923e-11 
## 
## 
## Estimation results for equation mpr: 
## ==================================== 
## mpr = lgdp_gap.l1 + rer_gap.l1 + infl.l1 + mpr.l1 + lgdp_gap.l2 + rer_gap.l2 + infl.l2 + mpr.l2 + lgdp_gap.l3 + rer_gap.l3 + infl.l3 + mpr.l3 + lgdp_gap.l4 + rer_gap.l4 + infl.l4 + mpr.l4 + const 
## 
##               Estimate Std. Error t value Pr(>|t|)    
## lgdp_gap.l1  14.110452   7.290201   1.936 0.058072 .  
## rer_gap.l1    0.003901   0.020228   0.193 0.847789    
## infl.l1      -0.254183   0.104683  -2.428 0.018476 *  
## mpr.l1        1.343532   0.116209  11.561  2.4e-16 ***
## lgdp_gap.l2  -2.428290  10.539628  -0.230 0.818639    
## rer_gap.l2   -0.050024   0.028930  -1.729 0.089395 .  
## infl.l2      -0.035159   0.113047  -0.311 0.756968    
## mpr.l2       -0.596052   0.192953  -3.089 0.003146 ** 
## lgdp_gap.l3  -2.546084  11.045251  -0.231 0.818548    
## rer_gap.l3    0.002923   0.028819   0.101 0.919578    
## infl.l3       0.113665   0.111044   1.024 0.310502    
## mpr.l3        0.546086   0.171828   3.178 0.002433 ** 
## lgdp_gap.l4 -12.000790   7.814974  -1.536 0.130366    
## rer_gap.l4    0.023868   0.021403   1.115 0.269619    
## infl.l4       0.013647   0.091856   0.149 0.882440    
## mpr.l4       -0.348872   0.095861  -3.639 0.000605 ***
## const         0.473354   0.244171   1.939 0.057686 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.3868 on 55 degrees of freedom
## Multiple R-Squared: 0.9645,  Adjusted R-squared: 0.9542 
## F-statistic: 93.46 on 16 and 55 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##           lgdp_gap  rer_gap      infl        mpr
## lgdp_gap 7.458e-05 0.007061  0.002200  0.0004499
## rer_gap  7.061e-03 7.205575  0.007083  0.0753016
## infl     2.200e-03 0.007083  0.240496 -0.0121156
## mpr      4.499e-04 0.075302 -0.012116  0.1496158
## 
## Correlation matrix of residuals:
##          lgdp_gap rer_gap     infl      mpr
## lgdp_gap   1.0000 0.30457  0.51948  0.13468
## rer_gap    0.3046 1.00000  0.00538  0.07252
## infl       0.5195 0.00538  1.00000 -0.06387
## mpr        0.1347 0.07252 -0.06387  1.00000


Do we have the correct number of lags?

lag_selection <- VARselect(ts_data, lag.max = 8, type = "const")
print(lag_selection)
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      3      2      2      3 
## 
## $criteria
##                    1             2             3             4             5
## AIC(n) -1.007545e+01 -1.095356e+01 -1.102013e+01 -1.092064e+01 -1.075926e+01
## HQ(n)  -9.816791e+00 -1.048798e+01 -1.034762e+01 -1.004120e+01 -9.672901e+00
## SC(n)  -9.422652e+00 -9.778527e+00 -9.322860e+00 -8.701134e+00 -8.017519e+00
## FPE(n)  4.214540e-05  1.760528e-05  1.668370e-05  1.888128e-05  2.309494e-05
##                    6             7             8
## AIC(n) -1.057830e+01 -1.067131e+01 -1.092060e+01
## HQ(n)  -9.285010e+00 -9.171097e+00 -9.213454e+00
## SC(n)  -7.314318e+00 -6.885094e+00 -6.612140e+00
## FPE(n)  2.941873e-05  2.928367e-05  2.583297e-05


VAR Diagnostics

Comparing the lag criteria, a lag length of 2 (which is the lowest in the lag_selection) proves to be the best fit.

# Estimate 1-lag VAR
var_model1 <- VAR(ts_data, p = 1, type = "const")

# Estimate 2-lag VAR
var_model2 <- VAR(ts_data, p = 2, type = "const") #As per lowest lag_selection
lag_1 <- serial.test(var_model1, lags.pt = 12, type = "PT.asymptotic")
lag_2 <- serial.test(var_model2, lags.pt = 12, type = "PT.asymptotic")
lag_4 <- serial.test(var_model,  lags.pt = 12, type = "PT.asymptotic")
summary_table <- data.frame(
  Model       = c("VAR(1)", "VAR(2)", "VAR(4)"),
  Lags        = c(1, 2, 4),
  Chi_squared = c(lag_1$serial$statistic, lag_2$serial$statistic, lag_4$serial$statistic),
  DF          = c(lag_1$serial$parameter, lag_2$serial$parameter, lag_4$serial$parameter),
  P_value     = c(lag_1$serial$p.value, lag_2$serial$p.value, lag_4$serial$p.value)
)

print(summary_table)
##    Model Lags Chi_squared  DF    P_value
## 1 VAR(1)    1    212.2999 176 0.03210192
## 2 VAR(2)    2    164.0685 160 0.39637665
## 3 VAR(4)    4    159.7747 128 0.02986308


Forecasting with VARs From 2013Q1 to 2014Q4.

library(MTS)
forecast_horizon <- 8
forecast_var <- predict(var_model2, n.ahead = forecast_horizon, ci = 0.95)

quarter_labels <- paste(
  rep(2013:2014, each = 4),
  rep(paste0("Q", 1:4), times = 2)
)

# Function to build one forecast dataframe
get_forecast_df <- function(varname) {
  df <- as.data.frame(forecast_var$fcst[[varname]][, 1:3])
  colnames(df) <- c("Forecast", "Lower_95", "Upper_95")
  df$Quarter <- quarter_labels
  df <- df[, c("Quarter", "Forecast", "Lower_95", "Upper_95")]
  return(df)
}

# dataframe per variable
lgdp_forecast <- get_forecast_df("lgdp_gap")
rer_forecast  <- get_forecast_df("rer_gap")
infl_forecast <- get_forecast_df("infl")
mpr_forecast  <- get_forecast_df("mpr")

print(lgdp_forecast)
##   Quarter     Forecast    Lower_95   Upper_95
## 1 2013 Q1 -0.001433666 -0.01813176 0.01526443
## 2 2013 Q2 -0.001605072 -0.02905950 0.02584936
## 3 2013 Q3 -0.003529385 -0.03716367 0.03010490
## 4 2013 Q4 -0.006715405 -0.04352778 0.03009697
## 5 2014 Q1 -0.009542119 -0.04778571 0.02870148
## 6 2014 Q2 -0.010750412 -0.04963323 0.02813240
## 7 2014 Q3 -0.010064346 -0.04951148 0.02938279
## 8 2014 Q4 -0.008026854 -0.04816171 0.03210800
print(rer_forecast)
##   Quarter    Forecast  Lower_95 Upper_95
## 1 2013 Q1  1.88506771 -3.178393 6.948528
## 2 2013 Q2 -0.20967253 -7.428769 7.009424
## 3 2013 Q3 -1.40308378 -9.352383 6.546215
## 4 2013 Q4 -1.75603426 -9.847076 6.335008
## 5 2014 Q1 -1.54853572 -9.740563 6.643491
## 6 2014 Q2 -1.04508858 -9.427562 7.337385
## 7 2014 Q3 -0.45293143 -9.033161 8.127298
## 8 2014 Q4  0.07549147 -8.633708 8.784691
print(infl_forecast)
##   Quarter Forecast    Lower_95 Upper_95
## 1 2013 Q1 1.099815 -0.05079331 2.250423
## 2 2013 Q2 1.381828 -0.17728478 2.940940
## 3 2013 Q3 1.604991 -0.11729321 3.327274
## 4 2013 Q4 1.691276 -0.07314624 3.455698
## 5 2014 Q1 1.688544 -0.09108533 3.468174
## 6 2014 Q2 1.675163 -0.11977709 3.470103
## 7 2014 Q3 1.697965 -0.11514146 3.511071
## 8 2014 Q4 1.760712 -0.07234263 3.593766
print(mpr_forecast)
##   Quarter Forecast   Lower_95 Upper_95
## 1 2013 Q1 1.141501  0.2488894 2.034113
## 2 2013 Q2 1.285519 -0.1773365 2.748375
## 3 2013 Q3 1.422391 -0.4766295 3.321412
## 4 2013 Q4 1.539580 -0.6780751 3.757235
## 5 2014 Q1 1.635219 -0.8118151 4.082254
## 6 2014 Q2 1.722411 -0.8937767 4.338599
## 7 2014 Q3 1.815382 -0.9291720 4.559936
## 8 2014 Q4 1.917774 -0.9258196 4.761367


Plot spliced line chart for historical and forecast.

historical_data <- data[data$date >= as.yearqtr("2010 Q1") & data$date <= as.yearqtr("2014 Q4"), ]

# Create Quarter labels
historical_data$Quarter <- paste(format(historical_data$date, "%Y"),
                                 paste0("Q", as.numeric(cycle(historical_data$date))))

forecast_quarters <- paste(rep(2013:2014, each = 4), paste0("Q", 1:4))

plot_spliced_series <- function(forecast_df, historical_values, full_quarters, forecast_quarters, varname) {
  # Format forecast
  forecast_df$Type <- "Forecast"
  forecast_df <- forecast_df[, c("Quarter", "Forecast", "Type")]
  colnames(forecast_df) <- c("Quarter", "Value", "Type")
  
  actual_df <- data.frame(
    Quarter = full_quarters,
    Value = historical_values,
    Type = "Actual"
  )
  
  full_series <- rbind(actual_df, forecast_df)
  full_series$Quarter <- factor(full_series$Quarter, levels = unique(c(full_quarters, forecast_quarters)))
  
  ggplot(full_series, aes(x = Quarter, y = Value, group = Type, color = Type)) +
    geom_line(size = 1.2) +
    geom_vline(xintercept = "2013 Q1", linetype = "dashed", color = "gray50") +
    scale_color_manual(values = c("Actual" = "black", "Forecast" = "red")) +
    labs(title = paste("Historical + Forecast:", varname),
         x = "Quarter", y = varname, color = "") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          plot.title = element_text(size = 14, face = "bold"))
}

full_quarters <- historical_data$Quarter

print(plot_spliced_series(lgdp_forecast, historical_data$lgdp_gap, full_quarters, forecast_quarters, "lgdp_gap"))

print(plot_spliced_series(rer_forecast,  historical_data$rer_gap,  full_quarters, forecast_quarters, "rer_gap"))

print(plot_spliced_series(infl_forecast, historical_data$infl,     full_quarters, forecast_quarters, "infl"))

print(plot_spliced_series(mpr_forecast,  historical_data$mpr,      full_quarters, forecast_quarters, "mpr"))


Conclusions: The forecast results from the VAR model show notable deviations from the actual data over the forecast horizon (2013Q1 to 2014Q4). For instance, the model consistently overpredicts the monetary policy rate, projecting a steady increase while the actual rate remained flat at 1%. Similarly, it underpredicts the output gap, especially in 2014, and shows a mismatch in the real exchange rate gap—underpredicting in 2013 and overpredicting in 2014. While inflation forecasts appear somewhat closer, the model still misses key fluctuations i.e. consider SVAR.