Series: Exports: Value Goods for the United States, National currency, Monthly Level, Not Seasonally Adjusted (XTEXVA01USM664N)
Source: OECD Economic Indicators, Main Database.Accessed through FRED, Federal Reserve Bank of St. Louis
library(fpp2)
library(ggplot2)
library(ggthemes)
library(tidyverse)
library(kableExtra)
xus <- read.csv("F:/MSAE/2020fall_PredictiveAnalytics/Discussions/XTEXVA01USM664N.csv")
xus <- xus[order(xus$DATE),]
xusts1 <- ts(xus[,2]/1000000000,start=c(1990,1),end=c(2020,9),frequency=12)
# xusts <- window(cpits,start=2000)
plot1 <- autoplot(xusts1) + geom_line() +
xlab("Year") + ylab("") + ggtitle("Exports: Value Goods for the United States, US$ Billions") + theme_classic()
plot1
ggseasonplot(xusts1)
From the charts it’s clear there’s a seasonality component, and we can clearly identify the recessions of the early 2000’s, 2009 and the recent pandemic.
decadd <- decompose(xusts1, type = "additive")
decmul <- decompose(xusts1, type = "multiplicative")
autoplot(decadd)
autoplot(decmul)
As expected, the errors are higher during the recessions and subsequent recoveries, which will make prediction in the current environment less accurate. To help in this exercise, I will only use data through 2019.
xusts1 <- ts(xus[,2]/1000000000,start=c(1990,1),end=c(2019,12),frequency=12)
xusts <- window(xusts1, start = c(1990,1), end = c(2019, 6))
ets1 <- ets(xusts, model = "ZZZ")
summary(ets1)
## ETS(M,Ad,M)
##
## Call:
## ets(y = xusts, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 0.7722
## beta = 0.02
## gamma = 1e-04
## phi = 0.9796
##
## Initial states:
## l = 32.083
## b = 0.1991
## s = 0.9989 1.0077 1.0548 0.9888 0.9943 0.9463
## 1.0258 1.0188 1.0009 1.0807 0.9494 0.9337
##
## sigma: 0.03
##
## AIC AICc BIC
## 2664.863 2666.905 2734.511
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1559424 2.492378 1.832065 0.1817695 2.305664 0.2571659
## ACF1
## Training set 0.03645486
autoplot(ets1)
checkresiduals(ets1)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,M)
## Q* = 192.33, df = 7, p-value < 2.2e-16
##
## Model df: 17. Total lags used: 24
ets1 %>% forecast(h=6) %>%
autoplot() + autolayer(ts(ets1$fitted, start=c(1990,1),end=c(2019,6),frequency=12), series="Fitted Values")
kable(round(accuracy(ets1),2), caption = "ETS") %>%
kable_classic(full_width = F)
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | |
|---|---|---|---|---|---|---|---|
| Training set | 0.16 | 2.49 | 1.83 | 0.18 | 2.31 | 0.26 | 0.04 |
aa1 <- auto.arima(xusts)
summary(aa1)
## Series: xusts
## ARIMA(2,0,3)(0,1,2)[12] with drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 sma1 sma2 drift
## 1.6918 -0.7285 -0.9645 0.3218 0.1119 -0.4481 -0.2657 0.3109
## s.e. 0.0940 0.0911 0.1082 0.0890 0.0717 0.0618 0.0565 0.0468
##
## sigma^2 estimated as 6.532: log likelihood=-807.26
## AIC=1632.53 AICc=1633.07 BIC=1667.04
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01354908 2.482566 1.838812 -0.1640364 2.292511 0.258113
## ACF1
## Training set -0.001366234
checkresiduals(aa1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,3)(0,1,2)[12] with drift
## Q* = 69.172, df = 16, p-value = 1.393e-08
##
## Model df: 8. Total lags used: 24
aa1 %>% forecast(h=6) %>%
autoplot() + autolayer(ts(ets1$fitted, start=c(1990,1),end=c(2019,6),frequency=12), series="Fitted Values")
kable(round(accuracy(aa1),2), caption = "Auto Arima") %>%
kable_classic(full_width = F)
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | |
|---|---|---|---|---|---|---|---|
| Training set | -0.01 | 2.48 | 1.84 | -0.16 | 2.29 | 0.26 | 0 |
c1 <- c("ETS Selection (M,Ad,M)", "Auto Arima")
c2 <- c(ets1$aic, aa1$aic)
c3 <- c(ets1$aicc, aa1$aicc)
c4 <- c(ets1$bic, aa1$bic)
table1 <- data.frame(c1,c2,c3,c4)
colnames(table1) <- c("Model", "AIC", "AICc", "BIC" )
kable(table1, caption = "Summary Table") %>% kable_classic(full_width = F)
| Model | AIC | AICc | BIC |
|---|---|---|---|
| ETS Selection (M,Ad,M) | 2664.863 | 2666.905 | 2734.511 |
| Auto Arima | 1632.525 | 1633.068 | 1667.039 |
The Auto Arima model has the lowest values of AIC, AICc and BIC, which suggest the better fit.
xusts2 <- window(xusts1, start = c(2015,1), end = c(2019, 12))
xusts3 <- window(xusts1, start = c(2019,7), end = c(2019, 12))
f1 <- forecast(ets1, h=6)
autoplot(f1$mean, series = "ETS Model") + autolayer(xusts2, series = "Actual Data")+ ggtitle("ETS Model vs Actual Data")
kable(round(accuracy(f1,xusts3),2), caption = "ETS Model") %>%
kable_classic(full_width = F)
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 0.16 | 2.49 | 1.83 | 0.18 | 2.31 | 0.26 | 0.04 | NA |
| Test set | 1.29 | 2.50 | 1.63 | 0.96 | 1.20 | 0.23 | 0.43 | 0.31 |
f2 <- forecast(aa1, h=6)
autoplot(f2$mean, series = "ETS Model") + autolayer(xusts2, series = "Actual Data")+ ggtitle("Arima Model vs Actual Data")
kable(round(accuracy(f2,xusts3),2), caption = "Arima Model") %>%
kable_classic(full_width = F)
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | -0.01 | 2.48 | 1.84 | -0.16 | 2.29 | 0.26 | 0.00 | NA |
| Test set | 2.40 | 2.67 | 2.40 | 1.76 | 1.76 | 0.34 | 0.15 | 0.46 |
However, the Arima model performed worse in the “Test” set (2019, 7 to 2019, 12)