Canada Unemployment Rate (1991-2022)
External Regressor : Inflation Rate
Obtained from World Bank Open Data
# load packages
library(readr)
library(ggplot2)
library(forecast)
library(ggfortify)
# import dataset
df <- read_csv("/Users/laykhoonyu/Documents/MSAA_AE/predictive_analytics/week_4/CND_INF_UNEMP.csv")
# generate summary of dataset
summary(df)
## Date Inflation Unemployment
## Min. :1991-01-01 Min. :0.1656 Min. : 5.280
## 1st Qu.:1998-10-07 1st Qu.:1.5093 1st Qu.: 6.812
## Median :2006-07-14 Median :1.8859 Median : 7.460
## Mean :2006-07-14 Mean :2.0842 Mean : 7.805
## 3rd Qu.:2014-04-19 3rd Qu.:2.2937 3rd Qu.: 8.620
## Max. :2022-01-24 Max. :6.8028 Max. :11.380
# Convert Date variable to Date format
df$Date <- as.Date(df$Date, format = "%Y/%d/%m")
# Convert dataset to time series
ts_data <- ts(df, start = c(1991-01-01), frequency = 1)
# plot time series of inflation and unemployment
autoplot(ts_data[,2:3], facets=TRUE) +
xlab("Year") + ylab("") +
ggtitle("Yearly Inflation and Unemployment Rate in Canada")
# Create the training and test datasets
train_data <- window(ts_data, end = c(2011, 01))
test_data <- window(ts_data, start = c(2012, 01))
# build ARIMA model with inflation as external regressor
fit_arima <- auto.arima(train_data[,3], xreg = train_data[,2])
# generate summary of ARIMA model
summary(fit_arima)
## Series: train_data[, 3]
## Regression with ARIMA(0,1,0) errors
##
## Coefficients:
## xreg
## -0.2544
## s.e. 0.0999
##
## sigma^2 = 0.3947: log likelihood = -20.48
## AIC=44.96 AICc=45.59 BIC=47.14
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1896008 0.6003182 0.4474376 -2.399285 5.544001 0.8327941
## ACF1
## Training set 0.2342905
# recover and plot estimates for regression and ARIMA error
cbind("Regression Errors" = residuals(fit_arima, type="regression"),
"ARIMA errors" = residuals(fit_arima, type="innovation")) %>%
autoplot(facets=TRUE)
checkresiduals(fit_arima)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,1,0) errors
## Q* = 4.1446, df = 5, p-value = 0.5288
##
## Model df: 0. Total lags used: 5
arima_forecast <- forecast(fit_arima, xreg = test_data[,2], h = 9)
autoplot(arima_forecast)
# build ARIMA model
fit_basic_arima <- auto.arima(train_data[,3])
summary(fit_basic_arima)
## Series: train_data[, 3]
## ARIMA(0,1,0)
##
## sigma^2 = 0.4879: log likelihood = -23.32
## AIC=48.64 AICc=48.84 BIC=49.73
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1378122 0.6831292 0.5143617 -1.950286 6.273343 0.9573569
## ACF1
## Training set 0.1950374
checkresiduals(fit_basic_arima)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 3.593, df = 5, p-value = 0.6094
##
## Model df: 0. Total lags used: 5
basic_arima_forecast <- forecast(fit_basic_arima, h = 9)
autoplot(basic_arima_forecast)
# build ets model
fit_ets <- ets(train_data[, 3])
# generate summary of ETS model
summary(fit_ets)
## ETS(A,N,N)
##
## Call:
## ets(y = train_data[, 3])
##
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l = 10.3212
##
## sigma: 0.7149
##
## AIC AICc BIC
## 60.58790 61.85106 63.99439
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1383258 0.6831413 0.5139729 -1.955337 6.269605 0.9566331
## ACF1
## Training set 0.1940272
ets_forecast <- forecast(fit_ets, h = 9)
autoplot(ets_forecast)
# build plot to compare forecast between all models
autoplot(train_data[, 3]) +
autolayer(ets_forecast,
series="ETS", PI=FALSE) +
autolayer(arima_forecast,
series="ARIMA w External Regressor", PI=FALSE) +
autolayer(basic_arima_forecast,
series="Basic ARIMA", PI=FALSE) +
autolayer(test_data[, 3],
series = "Actual") +
ggtitle("Comparison between Forecasts for Canada's Unemployment Rate") +
xlab("Year") + ylab("Unemployment") +
guides(colour=guide_legend(title="Forecast"))
Visually, the ARIMA model with external regressor does a better job than the ETS model and basic ARIMA model at capturing the patterns of the time series. Even though it is not exactly the same as the actual data, it peaks and drops at approximately the same time. In terms of MAE and RMSE, the ARIMA model with external regressor also has lower values which indicates better model performance. Lastly, the ARIMA model also has lower AIC and BIC values which indicates better-fitting models. For these reasons, the ARIMA model with external regressor is the better model for this data set.