Week 4 discussion

Import and Chart Nonstore Retail Sales (A good measure of e-commerce sales)

library(Quandl)

## Warning: package 'Quandl' was built under R version 4.0.3

## Loading required package: xts

## Warning: package 'xts' was built under R version 4.0.3

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 4.0.3

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(fpp2)

## Warning: package 'fpp2' was built under R version 4.0.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages --------------------------------------------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3

## Warning: package 'forecast' was built under R version 4.0.3

## Warning: package 'fma' was built under R version 4.0.3

## Warning: package 'expsmooth' was built under R version 4.0.3

##

ecommerce<-Quandl("FRED/RSNSRN", api_key="t_QEjkEczJraydsfJGw6", type="ts")
autoplot(ecommerce)+
  ggtitle("Nonstore Retail Sales")

Looks like a log transformation would make sense, but lets make sure.

BoxCox.lambda(ecommerce)

## [1] -0.009867549

Use essentially a simple log transformation here and plot again

ecommerce.bc<-BoxCox(ecommerce, lambda=BoxCox.lambda(ecommerce))
autoplot(ecommerce.bc)

That got ride of a lot of the heterskadicity present in the seasonality as well as the exponential growth of the series. Lets get the ets and ARIMA models, and check their residuals

#get models
(ecomm.ets<-ets(ecommerce.bc))

## ETS(M,A,A) 
## 
## Call:
##  ets(y = ecommerce.bc) 
## 
##   Smoothing parameters:
##     alpha = 0.4013 
##     beta  = 1e-04 
##     gamma = 0.393 
## 
##   Initial states:
##     l = 8.367 
##     b = 0.0068 
##     s = 0.2438 0.141 0.073 -0.0235 -0.1306 -0.1529
##            -0.1164 -0.0862 -0.0377 0.0402 -0.0066 0.056
## 
##   sigma:  0.0039
## 
##       AIC      AICc       BIC 
## -244.4703 -242.5988 -179.1301

(ecomm.arima<-auto.arima(ecommerce.bc))

## Series: ecommerce.bc 
## ARIMA(3,0,2)(1,1,2)[12] with drift 
## 
## Coefficients:
##           ar1     ar2     ar3     ma1     ma2     sar1     sma1     sma2
##       -0.1361  0.1173  0.7276  0.6085  0.4615  -0.3275  -0.0659  -0.3484
## s.e.   0.0718  0.0596  0.0573  0.1099  0.1016   0.2662   0.2466   0.1209
##        drift
##       0.0066
## s.e.  0.0005
## 
## sigma^2 estimated as 0.001077:  log likelihood=667.42
## AIC=-1314.83   AICc=-1314.15   BIC=-1276.75

#check residuals
checkresiduals(ecomm.ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,A,A)
## Q* = 280.26, df = 8, p-value < 2.2e-16
## 
## Model df: 16.   Total lags used: 24

checkresiduals(ecomm.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,2)(1,1,2)[12] with drift
## Q* = 52.258, df = 15, p-value = 5.12e-06
## 
## Model df: 9.   Total lags used: 24

ggtsdisplay(residuals(ecomm.ets))

ggtsdisplay(residuals(ecomm.arima))

The Forecasts here are very similar

autoplot(forecast(ecomm.ets, h=6))+xlim(c(2010, 2021))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## Warning: Removed 216 row(s) containing missing values (geom_path).

autoplot(forecast(ecomm.arima, h=6))+xlim(c(2010, 2021))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## Warning: Removed 216 row(s) containing missing values (geom_path).

MSE for ARIMA does slightly better here, but we should try with training and test sets

fets <- function(x, h) {
  forecast(ets(x), h = h)
}
farima <- function(x, h) {
  forecast(Arima(x, order=c(1,0,0), seasonal=c(1,1,1)), h=h)
}


# Compute CV errors for ETS as e1
e1 <- tsCV(ecommerce.bc, fets, h=1)
# Compute CV errors for ARIMA as e2
e2 <- tsCV(ecommerce.bc, farima, h=1)
# Find MSE of each model class
mean(e1^2, na.rm=TRUE)

## [1] 0.002219352

mean(e2^2, na.rm=TRUE)

## [1] 0.001962564

Now to test these models with training and test sets. The ETS function gives a (M,A,A) model while the ARIMA model gives a (1,0,5)(1,1,2) model. The 5 in the MA part of the ARIMA model indicates a lot of reliance on previous observations. The ARIMA model performs slightly worse here, but there is some extenuating circumstances. E-commerce has seen a huge boost from the pandemic, and broke from its normal pattern in the spring, as e-commerce adoption increased notably

library(kableExtra)

## Warning: package 'kableExtra' was built under R version 4.0.3

ecommerce.train = window(ecommerce.bc,end=c(2020,02),frequency=12)
ecommerce.test = window(ecommerce.bc,start=c(2020,03),frequency=12)


#get models
(ecomm.ets<-ets(ecommerce.train))

## ETS(M,A,A) 
## 
## Call:
##  ets(y = ecommerce.train) 
## 
##   Smoothing parameters:
##     alpha = 0.3298 
##     beta  = 1e-04 
##     gamma = 0.3015 
## 
##   Initial states:
##     l = 8.3778 
##     b = 0.0065 
##     s = 0.2512 0.1585 0.0563 -0.0416 -0.1071 -0.1539
##            -0.1164 -0.0747 -0.0361 0.0425 -0.0187 0.0399
## 
##   sigma:  0.0038
## 
##       AIC      AICc       BIC 
## -264.2777 -262.3652 -199.2859

(ecomm.arima<-auto.arima(ecommerce.train))

## Series: ecommerce.train 
## ARIMA(1,0,5)(1,1,2)[12] with drift 
## 
## Coefficients:
##          ar1      ma1     ma2     ma3      ma4     ma5     sar1     sma1
##       0.8845  -0.5114  0.0527  0.2278  -0.3143  0.2532  -0.0669  -0.2839
## s.e.  0.0442   0.0681  0.0639  0.0619   0.0589  0.0658   0.2144   0.2061
##          sma2   drift
##       -0.2900  0.0063
## s.e.   0.1132  0.0004
## 
## sigma^2 estimated as 0.00105:  log likelihood=657.83
## AIC=-1293.66   AICc=-1292.82   BIC=-1252.01

ets.forecast=forecast(ecomm.ets, h=6)
arima.forecast=forecast(ecomm.arima, h=6)

autoplot(ets.forecast)+
  autolayer(ecommerce.bc, series="Actual Data")+
  xlim(c(2015, 2021))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## Warning: Removed 276 row(s) containing missing values (geom_path).

## Warning: Removed 276 row(s) containing missing values (geom_path).

autoplot(arima.forecast)+
  autolayer(ecommerce.bc, series="Actual Data")+
  xlim(c(2015, 2021))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## Warning: Removed 276 row(s) containing missing values (geom_path).

## Warning: Removed 276 row(s) containing missing values (geom_path).

kable(accuracy(ets.forecast,ecommerce.test))

	ME	RMSE	MAE	MPE	MAPE	MASE	ACF1	Theil’s U
Training set	-0.0003515	0.0348224	0.0278574	-0.0042324	0.2958359	0.3296773	-0.0426027	NA
Test set	0.1193730	0.1314928	0.1193730	1.1175846	1.1175846	1.4127123	0.3099617	2.990593

kable(accuracy(arima.forecast,ecommerce.test))

	ME	RMSE	MAE	MPE	MAPE	MASE	ACF1	Theil’s U
Training set	0.0006668	0.0313323	0.0247043	0.0073893	0.262589	0.2923615	0.0029924	NA
Test set	0.1218041	0.1318671	0.1218041	1.1407759	1.140776	1.4414830	0.0459839	2.998573

Week 4 discussion

Joey Biasi

11/16/2020