Import and Chart Nonstore Retail Sales (A good measure of e-commerce sales)

library(Quandl)
## Warning: package 'Quandl' was built under R version 4.0.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.0.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.0.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(fpp2)
## Warning: package 'fpp2' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## -- Attaching packages --------------------------------------------------------------------------------- fpp2 2.4 --
## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3
## Warning: package 'forecast' was built under R version 4.0.3
## Warning: package 'fma' was built under R version 4.0.3
## Warning: package 'expsmooth' was built under R version 4.0.3
## 
ecommerce<-Quandl("FRED/RSNSRN", api_key="t_QEjkEczJraydsfJGw6", type="ts")
autoplot(ecommerce)+
  ggtitle("Nonstore Retail Sales")

Looks like a log transformation would make sense, but lets make sure.

BoxCox.lambda(ecommerce)
## [1] -0.009867549

Use essentially a simple log transformation here and plot again

ecommerce.bc<-BoxCox(ecommerce, lambda=BoxCox.lambda(ecommerce))
autoplot(ecommerce.bc)

That got ride of a lot of the heterskadicity present in the seasonality as well as the exponential growth of the series. Lets get the ets and ARIMA models, and check their residuals

#get models
(ecomm.ets<-ets(ecommerce.bc))
## ETS(M,A,A) 
## 
## Call:
##  ets(y = ecommerce.bc) 
## 
##   Smoothing parameters:
##     alpha = 0.4013 
##     beta  = 1e-04 
##     gamma = 0.393 
## 
##   Initial states:
##     l = 8.367 
##     b = 0.0068 
##     s = 0.2438 0.141 0.073 -0.0235 -0.1306 -0.1529
##            -0.1164 -0.0862 -0.0377 0.0402 -0.0066 0.056
## 
##   sigma:  0.0039
## 
##       AIC      AICc       BIC 
## -244.4703 -242.5988 -179.1301
(ecomm.arima<-auto.arima(ecommerce.bc))
## Series: ecommerce.bc 
## ARIMA(3,0,2)(1,1,2)[12] with drift 
## 
## Coefficients:
##           ar1     ar2     ar3     ma1     ma2     sar1     sma1     sma2
##       -0.1361  0.1173  0.7276  0.6085  0.4615  -0.3275  -0.0659  -0.3484
## s.e.   0.0718  0.0596  0.0573  0.1099  0.1016   0.2662   0.2466   0.1209
##        drift
##       0.0066
## s.e.  0.0005
## 
## sigma^2 estimated as 0.001077:  log likelihood=667.42
## AIC=-1314.83   AICc=-1314.15   BIC=-1276.75
#check residuals
checkresiduals(ecomm.ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,A,A)
## Q* = 280.26, df = 8, p-value < 2.2e-16
## 
## Model df: 16.   Total lags used: 24
checkresiduals(ecomm.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,2)(1,1,2)[12] with drift
## Q* = 52.258, df = 15, p-value = 5.12e-06
## 
## Model df: 9.   Total lags used: 24
ggtsdisplay(residuals(ecomm.ets))

ggtsdisplay(residuals(ecomm.arima))

The Forecasts here are very similar

autoplot(forecast(ecomm.ets, h=6))+xlim(c(2010, 2021))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 216 row(s) containing missing values (geom_path).

autoplot(forecast(ecomm.arima, h=6))+xlim(c(2010, 2021))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 216 row(s) containing missing values (geom_path).

MSE for ARIMA does slightly better here, but we should try with training and test sets

fets <- function(x, h) {
  forecast(ets(x), h = h)
}
farima <- function(x, h) {
  forecast(Arima(x, order=c(1,0,0), seasonal=c(1,1,1)), h=h)
}


# Compute CV errors for ETS as e1
e1 <- tsCV(ecommerce.bc, fets, h=1)
# Compute CV errors for ARIMA as e2
e2 <- tsCV(ecommerce.bc, farima, h=1)
# Find MSE of each model class
mean(e1^2, na.rm=TRUE)
## [1] 0.002219352
mean(e2^2, na.rm=TRUE)
## [1] 0.001962564

Now to test these models with training and test sets. The ETS function gives a (M,A,A) model while the ARIMA model gives a (1,0,5)(1,1,2) model. The 5 in the MA part of the ARIMA model indicates a lot of reliance on previous observations. The ARIMA model performs slightly worse here, but there is some extenuating circumstances. E-commerce has seen a huge boost from the pandemic, and broke from its normal pattern in the spring, as e-commerce adoption increased notably

library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.0.3
ecommerce.train = window(ecommerce.bc,end=c(2020,02),frequency=12)
ecommerce.test = window(ecommerce.bc,start=c(2020,03),frequency=12)


#get models
(ecomm.ets<-ets(ecommerce.train))
## ETS(M,A,A) 
## 
## Call:
##  ets(y = ecommerce.train) 
## 
##   Smoothing parameters:
##     alpha = 0.3298 
##     beta  = 1e-04 
##     gamma = 0.3015 
## 
##   Initial states:
##     l = 8.3778 
##     b = 0.0065 
##     s = 0.2512 0.1585 0.0563 -0.0416 -0.1071 -0.1539
##            -0.1164 -0.0747 -0.0361 0.0425 -0.0187 0.0399
## 
##   sigma:  0.0038
## 
##       AIC      AICc       BIC 
## -264.2777 -262.3652 -199.2859
(ecomm.arima<-auto.arima(ecommerce.train))
## Series: ecommerce.train 
## ARIMA(1,0,5)(1,1,2)[12] with drift 
## 
## Coefficients:
##          ar1      ma1     ma2     ma3      ma4     ma5     sar1     sma1
##       0.8845  -0.5114  0.0527  0.2278  -0.3143  0.2532  -0.0669  -0.2839
## s.e.  0.0442   0.0681  0.0639  0.0619   0.0589  0.0658   0.2144   0.2061
##          sma2   drift
##       -0.2900  0.0063
## s.e.   0.1132  0.0004
## 
## sigma^2 estimated as 0.00105:  log likelihood=657.83
## AIC=-1293.66   AICc=-1292.82   BIC=-1252.01
ets.forecast=forecast(ecomm.ets, h=6)
arima.forecast=forecast(ecomm.arima, h=6)

autoplot(ets.forecast)+
  autolayer(ecommerce.bc, series="Actual Data")+
  xlim(c(2015, 2021))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 276 row(s) containing missing values (geom_path).
## Warning: Removed 276 row(s) containing missing values (geom_path).

autoplot(arima.forecast)+
  autolayer(ecommerce.bc, series="Actual Data")+
  xlim(c(2015, 2021))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 276 row(s) containing missing values (geom_path).

## Warning: Removed 276 row(s) containing missing values (geom_path).

kable(accuracy(ets.forecast,ecommerce.test))
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -0.0003515 0.0348224 0.0278574 -0.0042324 0.2958359 0.3296773 -0.0426027 NA
Test set 0.1193730 0.1314928 0.1193730 1.1175846 1.1175846 1.4127123 0.3099617 2.990593
kable(accuracy(arima.forecast,ecommerce.test))
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set 0.0006668 0.0313323 0.0247043 0.0073893 0.262589 0.2923615 0.0029924 NA
Test set 0.1218041 0.1318671 0.1218041 1.1407759 1.140776 1.4414830 0.0459839 2.998573