Dataset

Canada Unemployment Rate (1991-2022)
External Regressor : Inflation Rate
Obtained from World Bank Open Data

Load Packages & Dataset

# load packages
library(readr)
library(ggplot2)
library(forecast)
library(ggfortify)
# import dataset
df <- read_csv("/Users/laykhoonyu/Documents/MSAA_AE/predictive_analytics/week_4/CND_INF_UNEMP.csv")

Data Cleaning & Visualization

# generate summary of dataset
summary(df)
##       Date              Inflation       Unemployment   
##  Min.   :1991-01-01   Min.   :0.1656   Min.   : 5.280  
##  1st Qu.:1998-10-07   1st Qu.:1.5093   1st Qu.: 6.812  
##  Median :2006-07-14   Median :1.8859   Median : 7.460  
##  Mean   :2006-07-14   Mean   :2.0842   Mean   : 7.805  
##  3rd Qu.:2014-04-19   3rd Qu.:2.2937   3rd Qu.: 8.620  
##  Max.   :2022-01-24   Max.   :6.8028   Max.   :11.380
# Convert Date variable to Date format
df$Date <- as.Date(df$Date, format = "%Y/%d/%m")
# Convert dataset to time series
ts_data <- ts(df, start = c(1991-01-01), frequency = 1)
# plot time series of inflation and unemployment
autoplot(ts_data[,2:3], facets=TRUE) +
  xlab("Year") + ylab("") +
  ggtitle("Yearly Inflation and Unemployment Rate in Canada")

# Create the training and test datasets
train_data <- window(ts_data, end = c(2011, 01))
test_data <- window(ts_data, start = c(2012, 01))

ARIMA Model with External Regressor

# build ARIMA model with inflation as external regressor
fit_arima <- auto.arima(train_data[,3], xreg = train_data[,2])
# generate summary of ARIMA model
summary(fit_arima)
## Series: train_data[, 3] 
## Regression with ARIMA(0,1,0) errors 
## 
## Coefficients:
##          xreg
##       -0.2544
## s.e.   0.0999
## 
## sigma^2 = 0.3947:  log likelihood = -20.48
## AIC=44.96   AICc=45.59   BIC=47.14
## 
## Training set error measures:
##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set -0.1896008 0.6003182 0.4474376 -2.399285 5.544001 0.8327941
##                   ACF1
## Training set 0.2342905
# recover and plot estimates for regression and ARIMA error
cbind("Regression Errors" = residuals(fit_arima, type="regression"),
      "ARIMA errors" = residuals(fit_arima, type="innovation")) %>%
  autoplot(facets=TRUE)

checkresiduals(fit_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,1,0) errors
## Q* = 4.1446, df = 5, p-value = 0.5288
## 
## Model df: 0.   Total lags used: 5
arima_forecast <- forecast(fit_arima, xreg = test_data[,2], h = 9)
autoplot(arima_forecast)

Basic ARIMA Model

# build ARIMA model 
fit_basic_arima <- auto.arima(train_data[,3])
summary(fit_basic_arima)
## Series: train_data[, 3] 
## ARIMA(0,1,0) 
## 
## sigma^2 = 0.4879:  log likelihood = -23.32
## AIC=48.64   AICc=48.84   BIC=49.73
## 
## Training set error measures:
##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set -0.1378122 0.6831292 0.5143617 -1.950286 6.273343 0.9573569
##                   ACF1
## Training set 0.1950374
checkresiduals(fit_basic_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0)
## Q* = 3.593, df = 5, p-value = 0.6094
## 
## Model df: 0.   Total lags used: 5
basic_arima_forecast <- forecast(fit_basic_arima, h = 9)
autoplot(basic_arima_forecast)

ETS Model

# build ets model
fit_ets <- ets(train_data[, 3])
# generate summary of ETS model
summary(fit_ets)
## ETS(A,N,N) 
## 
## Call:
##  ets(y = train_data[, 3]) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 10.3212 
## 
##   sigma:  0.7149
## 
##      AIC     AICc      BIC 
## 60.58790 61.85106 63.99439 
## 
## Training set error measures:
##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set -0.1383258 0.6831413 0.5139729 -1.955337 6.269605 0.9566331
##                   ACF1
## Training set 0.1940272
ets_forecast <- forecast(fit_ets, h = 9)
autoplot(ets_forecast)

Visual Comparison

# build plot to compare forecast between all models
autoplot(train_data[, 3]) +
  autolayer(ets_forecast,
    series="ETS", PI=FALSE) +
  autolayer(arima_forecast,
    series="ARIMA w External Regressor", PI=FALSE) +
  autolayer(basic_arima_forecast,
    series="Basic ARIMA", PI=FALSE) +
  autolayer(test_data[, 3],
    series = "Actual") +
  ggtitle("Comparison between Forecasts for Canada's Unemployment Rate") +
  xlab("Year") + ylab("Unemployment") +
  guides(colour=guide_legend(title="Forecast"))

Analysis

Visually, the ARIMA model with external regressor does a better job than the ETS model and basic ARIMA model at capturing the patterns of the time series. Even though it is not exactly the same as the actual data, it peaks and drops at approximately the same time. In terms of MAE and RMSE, the ARIMA model with external regressor also has lower values which indicates better model performance. Lastly, the ARIMA model also has lower AIC and BIC values which indicates better-fitting models. For these reasons, the ARIMA model with external regressor is the better model for this data set.