Daniel RG

11/18/2020

Series: Exports: Value Goods for the United States, National currency, Monthly Level, Not Seasonally Adjusted (XTEXVA01USM664N)

Source: OECD Economic Indicators, Main Database.Accessed through FRED, Federal Reserve Bank of St. Louis

library(fpp2)
library(ggplot2)
library(ggthemes)
library(tidyverse)
library(kableExtra)

xus <- read.csv("F:/MSAE/2020fall_PredictiveAnalytics/Discussions/XTEXVA01USM664N.csv")
xus <- xus[order(xus$DATE),]


xusts1 <- ts(xus[,2]/1000000000,start=c(1990,1),end=c(2020,9),frequency=12)

# xusts <- window(cpits,start=2000)

plot1 <- autoplot(xusts1) + geom_line() +
  xlab("Year") + ylab("") + ggtitle("Exports: Value Goods for the United States, US$ Billions") + theme_classic()

plot1

ggseasonplot(xusts1)

From the charts it’s clear there’s a seasonality component, and we can clearly identify the recessions of the early 2000’s, 2009 and the recent pandemic.

Decompositions

decadd <- decompose(xusts1, type = "additive")
decmul <- decompose(xusts1, type = "multiplicative")

autoplot(decadd)

autoplot(decmul)

As expected, the errors are higher during the recessions and subsequent recoveries, which will make prediction in the current environment less accurate. To help in this exercise, I will only use data through 2019.

ETS

xusts1 <- ts(xus[,2]/1000000000,start=c(1990,1),end=c(2019,12),frequency=12)

xusts <- window(xusts1, start = c(1990,1), end = c(2019, 6))


ets1 <- ets(xusts, model = "ZZZ")

summary(ets1)
## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = xusts, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.7722 
##     beta  = 0.02 
##     gamma = 1e-04 
##     phi   = 0.9796 
## 
##   Initial states:
##     l = 32.083 
##     b = 0.1991 
##     s = 0.9989 1.0077 1.0548 0.9888 0.9943 0.9463
##            1.0258 1.0188 1.0009 1.0807 0.9494 0.9337
## 
##   sigma:  0.03
## 
##      AIC     AICc      BIC 
## 2664.863 2666.905 2734.511 
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.1559424 2.492378 1.832065 0.1817695 2.305664 0.2571659
##                    ACF1
## Training set 0.03645486
autoplot(ets1)

checkresiduals(ets1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,Ad,M)
## Q* = 192.33, df = 7, p-value < 2.2e-16
## 
## Model df: 17.   Total lags used: 24
ets1 %>% forecast(h=6) %>%
  autoplot() + autolayer(ts(ets1$fitted, start=c(1990,1),end=c(2019,6),frequency=12), series="Fitted Values")

kable(round(accuracy(ets1),2), caption = "ETS") %>% 
  kable_classic(full_width = F)
ETS
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.16 2.49 1.83 0.18 2.31 0.26 0.04

Auto Arima

aa1 <- auto.arima(xusts)

summary(aa1)
## Series: xusts 
## ARIMA(2,0,3)(0,1,2)[12] with drift 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2     ma3     sma1     sma2   drift
##       1.6918  -0.7285  -0.9645  0.3218  0.1119  -0.4481  -0.2657  0.3109
## s.e.  0.0940   0.0911   0.1082  0.0890  0.0717   0.0618   0.0565  0.0468
## 
## sigma^2 estimated as 6.532:  log likelihood=-807.26
## AIC=1632.53   AICc=1633.07   BIC=1667.04
## 
## Training set error measures:
##                       ME     RMSE      MAE        MPE     MAPE     MASE
## Training set -0.01354908 2.482566 1.838812 -0.1640364 2.292511 0.258113
##                      ACF1
## Training set -0.001366234
checkresiduals(aa1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,3)(0,1,2)[12] with drift
## Q* = 69.172, df = 16, p-value = 1.393e-08
## 
## Model df: 8.   Total lags used: 24
aa1 %>% forecast(h=6) %>%
  autoplot() + autolayer(ts(ets1$fitted, start=c(1990,1),end=c(2019,6),frequency=12), series="Fitted Values")

kable(round(accuracy(aa1),2), caption = "Auto Arima") %>% 
  kable_classic(full_width = F)
Auto Arima
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.01 2.48 1.84 -0.16 2.29 0.26 0

Selection Summary

c1 <- c("ETS Selection (M,Ad,M)", "Auto Arima")
c2 <- c(ets1$aic, aa1$aic)
c3 <- c(ets1$aicc, aa1$aicc)
c4 <- c(ets1$bic, aa1$bic)


table1 <- data.frame(c1,c2,c3,c4)
colnames(table1) <- c("Model", "AIC", "AICc", "BIC" )

kable(table1, caption = "Summary Table") %>% kable_classic(full_width = F)
Summary Table
Model AIC AICc BIC
ETS Selection (M,Ad,M) 2664.863 2666.905 2734.511
Auto Arima 1632.525 1633.068 1667.039

The Auto Arima model has the lowest values of AIC, AICc and BIC, which suggest the better fit.

xusts2 <- window(xusts1, start = c(2015,1), end = c(2019, 12))
xusts3 <- window(xusts1, start = c(2019,7), end = c(2019, 12))


f1 <- forecast(ets1, h=6)

autoplot(f1$mean, series = "ETS Model")  + autolayer(xusts2, series = "Actual Data")+ ggtitle("ETS Model vs Actual Data")

kable(round(accuracy(f1,xusts3),2), caption = "ETS Model") %>% 
  kable_classic(full_width = F)
ETS Model
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set 0.16 2.49 1.83 0.18 2.31 0.26 0.04 NA
Test set 1.29 2.50 1.63 0.96 1.20 0.23 0.43 0.31
f2 <- forecast(aa1, h=6)

autoplot(f2$mean, series = "ETS Model")  + autolayer(xusts2, series = "Actual Data")+ ggtitle("Arima Model vs Actual Data")

kable(round(accuracy(f2,xusts3),2), caption = "Arima Model") %>% 
  kable_classic(full_width = F)
Arima Model
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -0.01 2.48 1.84 -0.16 2.29 0.26 0.00 NA
Test set 2.40 2.67 2.40 1.76 1.76 0.34 0.15 0.46

However, the Arima model performed worse in the “Test” set (2019, 7 to 2019, 12)