ZEngineer Discussion Week 4

Build an auto.arima model as well as an ETS model. Which performed better? Now hold out 6 months of data for a test set and try to forecast using the ETS and the auto.arima. Which performs better on the hold-out set?

The dataset used here is the quarterly US Producer Price Index from January 1960 to March 2002.

Reading and plotting the data:

library(ggplot2)
library(tsdl)
library(forecast)
library(seasonal)
library(magrittr)
library(dplyr)

 ppi<- read.csv("timeseries_ppi.csv", header=TRUE)
 ppi<- ppi[,"ppi"]
 
str(ppi)

##  num [1:169] 25.4 25.4 25.4 25.4 25.5 ...

ppi_ts<- ts(ppi, frequency=4, start=1960)

d.y<- diff(ppi_ts)


autoplot(ppi_ts) +
  ggtitle("Producer Price Index: 1960-2002")+
  theme_bw()

We use ndiffs() to determine the appropriate number of first differences,

ppi_ts %>% ndiffs()

## [1] 1

The figures below show the differenced series and the lag plots for both the original and the differenced series. Te d.y plot (with The original series has a slow decay. The diff series has 4 significant lags.

plot(d.y)

ggAcf(ppi_ts)

ggAcf(d.y)

Auto Arima Fit

For the first ARIMA model, we have selected the automatic option to let the function select the terms for p,d,q.

fita<- auto.arima(ppi_ts, seasonal= FALSE)
summary(fita)

## Series: ppi_ts 
## ARIMA(1,1,0) with drift 
## 
## Coefficients:
##          ar1   drift
##       0.5522  0.4557
## s.e.  0.0640  0.1307
## 
## sigma^2 estimated as 0.5911:  log likelihood=-193.39
## AIC=392.79   AICc=392.94   BIC=402.16
## 
## Training set error measures:
##                       ME      RMSE       MAE         MPE      MAPE      MASE
## Training set 0.001938637 0.7619869 0.5184711 -0.04513644 0.8242747 0.2092962
##                    ACF1
## Training set -0.0294358

autoplot(ppi_ts, series="Data") +
  autolayer(fitted(fita), series="Fitted") +
  xlab("Year") + ylab("") +
  ggtitle("Production Pice Index") + theme_bw()+
  guides(colour=guide_legend(title=" "))

The model shows a good fit. Looking at the residuals, The small p-value for the Ljung Box test shows that there is autocorrelation in the data.

checkresiduals(fita)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0) with drift
## Q* = 20.376, df = 6, p-value = 0.002373
## 
## Model df: 2.   Total lags used: 8

fita$aicc

## [1] 392.9359

ETS FIT

#ppi_ts %>%
#  stl( s.window="periodic", robust=TRUE) %>%
 # autoplot()

fite<- ets(ppi_ts,model="ANN")
summary(fite)

## ETS(A,N,N) 
## 
## Call:
##  ets(y = ppi_ts, model = "ANN") 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 25.3999 
## 
##   sigma:  1.0319
## 
##      AIC     AICc      BIC 
## 881.5457 881.6912 890.9354 
## 
## Training set error measures:
##                     ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 0.4615855 1.025749 0.7169765 0.8176022 1.117954 0.2894288
##                   ACF1
## Training set 0.5531706

autoplot(ppi_ts, series="Data") +
  autolayer(fitted(fite), series="Fitted") +
  xlab("Year") + ylab("") +
  ggtitle("Production Price Index") + theme_bw() +
  guides(colour=guide_legend(title=" "))

checkresiduals(fite)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,N)
## Q* = 106.36, df = 6, p-value < 2.2e-16
## 
## Model df: 2.   Total lags used: 8

The ETS model shows a lot of autocorrelation as well as varioation in seasonality.

We will now use a training set of PPI data till 1999 to train and validate the ARIMA and ETS models.

Training and Validation Set

ppi_tr<- window(ppi_ts, end = 1999)

ARIMA on Training Set

summary(fit_at<- auto.arima(ppi_tr, seasonal = c(0,1,2)))

## Series: ppi_tr 
## ARIMA(1,1,1) with drift 
## 
## Coefficients:
##          ar1      ma1   drift
##       0.8834  -0.5414  0.4082
## s.e.  0.0564   0.0961  0.2066
## 
## sigma^2 estimated as 0.4634:  log likelihood=-160.12
## AIC=328.24   AICc=328.51   BIC=340.44
## 
## Training set error measures:
##                       ME     RMSE       MAE         MPE    MAPE      MASE
## Training set 0.006093706 0.671987 0.4521775 0.006522846 0.73742 0.1936277
##                    ACF1
## Training set 0.02817474

fita_tr<- arima(ppi_tr, order = c(1,1,1))

summary(fita_tr)

## 
## Call:
## arima(x = ppi_tr, order = c(1, 1, 1))
## 
## Coefficients:
##          ar1      ma1
##       0.9306  -0.5822
## s.e.  0.0359   0.0817
## 
## sigma^2 estimated as 0.4606:  log likelihood = -161.31,  aic = 328.62
## 
## Training set error measures:
##                      ME      RMSE       MAE       MPE      MAPE      MASE
## Training set 0.06929615 0.6764783 0.4523804 0.1659514 0.7267734 0.6920792
##                    ACF1
## Training set 0.02495214

fita_tr %>% forecast(h = 9) %>% 
  autoplot() + theme_bw()

checkresiduals(fita_tr)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1)
## Q* = 21.162, df = 6, p-value = 0.001716
## 
## Model df: 2.   Total lags used: 8

ARIMA on differenced series

summary(fit_df<- arima(d.y, order = c(1,0,1)))

## 
## Call:
## arima(x = d.y, order = c(1, 0, 1))
## 
## Coefficients:
##          ar1      ma1  intercept
##       0.7245  -0.2547     0.4397
## s.e.  0.1152   0.1682     0.1576
## 
## sigma^2 estimated as 0.5783:  log likelihood = -192.59,  aic = 393.17
## 
## Training set error measures:
##                       ME      RMSE       MAE MPE MAPE      MASE       ACF1
## Training set 0.002810349 0.7604917 0.5152447 NaN  Inf 0.8900982 0.02378196

checkresiduals(fit_df)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,1) with non-zero mean
## Q* = 19.986, df = 5, p-value = 0.001258
## 
## Model df: 3.   Total lags used: 8

ETS on Training Set

fite_tr<- ets(ppi_tr, model = 'MAN')

summary(fite_tr)

## ETS(M,A,N) 
## 
## Call:
##  ets(y = ppi_tr, model = "MAN") 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.4008 
## 
##   Initial states:
##     l = 25.402 
##     b = -0.0053 
## 
##   sigma:  0.0107
## 
##      AIC     AICc      BIC 
## 625.2419 625.6392 640.5231 
## 
## Training set error measures:
##                        ME      RMSE       MAE        MPE      MAPE      MASE
## Training set -0.009602627 0.6865292 0.4573016 0.03118253 0.7298413 0.1958218
##                    ACF1
## Training set 0.01273637

fite_tr %>% forecast(h = 9) %>% 
  autoplot() +theme_bw()

checkresiduals(fite_tr)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,A,N)
## Q* = 10.564, df = 4, p-value = 0.03193
## 
## Model df: 4.   Total lags used: 8

fita$aicc

## [1] 392.9359

fite$aicc

## [1] 881.6912

fit_at$aicc

## [1] 328.5056

fita_tr$aic

## [1] 328.6211

fit_df$aic

## [1] 393.1732

fite_tr$aicc

## [1] 625.6392

Not surprisingly, the auto.arima model gave the lowest AICc. Applying further seasonal parameters to the auto.arima model did not improve the result.