Discussion: Week 4

Daniel RG

11/18/2020

Series: Exports: Value Goods for the United States, National currency, Monthly Level, Not Seasonally Adjusted (XTEXVA01USM664N)

Source: OECD Economic Indicators, Main Database.Accessed through FRED, Federal Reserve Bank of St. Louis

library(fpp2)
library(ggplot2)
library(ggthemes)
library(tidyverse)
library(kableExtra)

xus <- read.csv("F:/MSAE/2020fall_PredictiveAnalytics/Discussions/XTEXVA01USM664N.csv")
xus <- xus[order(xus$DATE),]


xusts1 <- ts(xus[,2]/1000000000,start=c(1990,1),end=c(2020,9),frequency=12)

# xusts <- window(cpits,start=2000)

plot1 <- autoplot(xusts1) + geom_line() +
  xlab("Year") + ylab("") + ggtitle("Exports: Value Goods for the United States, US$ Billions") + theme_classic()

plot1

ggseasonplot(xusts1)

From the charts it’s clear there’s a seasonality component, and we can clearly identify the recessions of the early 2000’s, 2009 and the recent pandemic.

Decompositions

decadd <- decompose(xusts1, type = "additive")
decmul <- decompose(xusts1, type = "multiplicative")

autoplot(decadd)

autoplot(decmul)

As expected, the errors are higher during the recessions and subsequent recoveries, which will make prediction in the current environment less accurate. To help in this exercise, I will only use data through 2019.

ETS

xusts1 <- ts(xus[,2]/1000000000,start=c(1990,1),end=c(2019,12),frequency=12)

xusts <- window(xusts1, start = c(1990,1), end = c(2019, 6))


ets1 <- ets(xusts, model = "ZZZ")

summary(ets1)

## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = xusts, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.7722 
##     beta  = 0.02 
##     gamma = 1e-04 
##     phi   = 0.9796 
## 
##   Initial states:
##     l = 32.083 
##     b = 0.1991 
##     s = 0.9989 1.0077 1.0548 0.9888 0.9943 0.9463
##            1.0258 1.0188 1.0009 1.0807 0.9494 0.9337
## 
##   sigma:  0.03
## 
##      AIC     AICc      BIC 
## 2664.863 2666.905 2734.511 
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.1559424 2.492378 1.832065 0.1817695 2.305664 0.2571659
##                    ACF1
## Training set 0.03645486

autoplot(ets1)

checkresiduals(ets1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,Ad,M)
## Q* = 192.33, df = 7, p-value < 2.2e-16
## 
## Model df: 17.   Total lags used: 24

ets1 %>% forecast(h=6) %>%
  autoplot() + autolayer(ts(ets1$fitted, start=c(1990,1),end=c(2019,6),frequency=12), series="Fitted Values")

kable(round(accuracy(ets1),2), caption = "ETS") %>% 
  kable_classic(full_width = F)

ETS
	ME	RMSE	MAE	MPE	MAPE	MASE	ACF1
Training set	0.16	2.49	1.83	0.18	2.31	0.26	0.04

Auto Arima

aa1 <- auto.arima(xusts)

summary(aa1)

## Series: xusts 
## ARIMA(2,0,3)(0,1,2)[12] with drift 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2     ma3     sma1     sma2   drift
##       1.6918  -0.7285  -0.9645  0.3218  0.1119  -0.4481  -0.2657  0.3109
## s.e.  0.0940   0.0911   0.1082  0.0890  0.0717   0.0618   0.0565  0.0468
## 
## sigma^2 estimated as 6.532:  log likelihood=-807.26
## AIC=1632.53   AICc=1633.07   BIC=1667.04
## 
## Training set error measures:
##                       ME     RMSE      MAE        MPE     MAPE     MASE
## Training set -0.01354908 2.482566 1.838812 -0.1640364 2.292511 0.258113
##                      ACF1
## Training set -0.001366234

checkresiduals(aa1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,3)(0,1,2)[12] with drift
## Q* = 69.172, df = 16, p-value = 1.393e-08
## 
## Model df: 8.   Total lags used: 24

aa1 %>% forecast(h=6) %>%
  autoplot() + autolayer(ts(ets1$fitted, start=c(1990,1),end=c(2019,6),frequency=12), series="Fitted Values")

kable(round(accuracy(aa1),2), caption = "Auto Arima") %>% 
  kable_classic(full_width = F)

Auto Arima
	ME	RMSE	MAE	MPE	MAPE	MASE	ACF1
Training set	-0.01	2.48	1.84	-0.16	2.29	0.26	0

Selection Summary

c1 <- c("ETS Selection (M,Ad,M)", "Auto Arima")
c2 <- c(ets1$aic, aa1$aic)
c3 <- c(ets1$aicc, aa1$aicc)
c4 <- c(ets1$bic, aa1$bic)


table1 <- data.frame(c1,c2,c3,c4)
colnames(table1) <- c("Model", "AIC", "AICc", "BIC" )

kable(table1, caption = "Summary Table") %>% kable_classic(full_width = F)

Summary Table
Model	AIC	AICc	BIC
ETS Selection (M,Ad,M)	2664.863	2666.905	2734.511
Auto Arima	1632.525	1633.068	1667.039

The Auto Arima model has the lowest values of AIC, AICc and BIC, which suggest the better fit.

xusts2 <- window(xusts1, start = c(2015,1), end = c(2019, 12))
xusts3 <- window(xusts1, start = c(2019,7), end = c(2019, 12))


f1 <- forecast(ets1, h=6)

autoplot(f1$mean, series = "ETS Model")  + autolayer(xusts2, series = "Actual Data")+ ggtitle("ETS Model vs Actual Data")

kable(round(accuracy(f1,xusts3),2), caption = "ETS Model") %>% 
  kable_classic(full_width = F)

ETS Model
	ME	RMSE	MAE	MPE	MAPE	MASE	ACF1	Theil’s U
Training set	0.16	2.49	1.83	0.18	2.31	0.26	0.04	NA
Test set	1.29	2.50	1.63	0.96	1.20	0.23	0.43	0.31

f2 <- forecast(aa1, h=6)

autoplot(f2$mean, series = "ETS Model")  + autolayer(xusts2, series = "Actual Data")+ ggtitle("Arima Model vs Actual Data")

kable(round(accuracy(f2,xusts3),2), caption = "Arima Model") %>% 
  kable_classic(full_width = F)

Arima Model
	ME	RMSE	MAE	MPE	MAPE	MASE	ACF1	Theil’s U
Training set	-0.01	2.48	1.84	-0.16	2.29	0.26	0.00	NA
Test set	2.40	2.67	2.40	1.76	1.76	0.34	0.15	0.46

However, the Arima model performed worse in the “Test” set (2019, 7 to 2019, 12)