Import and Chart Nonstore Retail Sales (A good measure of e-commerce sales)
library(Quandl)
## Warning: package 'Quandl' was built under R version 4.0.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.0.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.0.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(fpp2)
## Warning: package 'fpp2' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ fpp2 2.4 --
## v ggplot2 3.3.2 v fma 2.4
## v forecast 8.13 v expsmooth 2.3
## Warning: package 'forecast' was built under R version 4.0.3
## Warning: package 'fma' was built under R version 4.0.3
## Warning: package 'expsmooth' was built under R version 4.0.3
##
library(rugarch)
## Warning: package 'rugarch' was built under R version 4.0.3
## Loading required package: parallel
##
## Attaching package: 'rugarch'
## The following object is masked from 'package:stats':
##
## sigma
ecommerce<-Quandl("FRED/RSNSRN", api_key="t_QEjkEczJraydsfJGw6", type="ts")
autoplot(ecommerce)+
ggtitle("Nonstore Retail Sales")
Looks like a log transformation would make sense, but lets make sure.
BoxCox.lambda(ecommerce)
## [1] 0.02334151
Use essentially a simple log transformation here and plot again
ecommerce.bc<-diff(BoxCox(ecommerce, lambda=BoxCox.lambda(ecommerce)))
autoplot(ecommerce.bc)
That got ride of a lot of the heterskadicity present in the seasonality as well as the exponential growth of the series. Lets get the ets and ARIMA models, and check their residuals
spec = ugarchspec() #the empty function specifies the default model.
#get models
(ecomm.ets<-ets(ecommerce.bc))
## ETS(A,N,A)
##
## Call:
## ets(y = ecommerce.bc)
##
## Smoothing parameters:
## alpha = 1e-04
## gamma = 0.2757
##
## Initial states:
## l = 0.0084
## s = -0.3271 0.166 0.1227 0.1446 0.0696 0.0471
## -0.0205 -0.0619 -0.0221 -0.1043 0.1103 -0.1246
##
## sigma: 0.0614
##
## AIC AICc BIC
## 106.0365 107.4955 163.6897
(ecomm.arima<-auto.arima(ecommerce.bc))
## Series: ecommerce.bc
## ARIMA(2,0,2)(1,1,2)[12]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1 sma1 sma2
## -1.0077 -0.7576 0.5055 0.3458 -0.3299 -0.0661 -0.3359
## s.e. 0.0648 0.0745 0.1030 0.1147 0.2824 0.2627 0.1275
##
## sigma^2 estimated as 0.002151: log likelihood=551.26
## AIC=-1086.52 AICc=-1086.08 BIC=-1056.06
(ecomm.garch<-ugarchfit(spec = spec, data = ecommerce.bc))
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(1,0,1)
## Distribution : norm
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 0.008807 0.001178 7.47601 0.00000
## ar1 0.519566 0.076933 6.75349 0.00000
## ma1 -0.965565 0.028271 -34.15424 0.00000
## omega 0.000024 0.000210 0.11545 0.90809
## alpha1 0.000000 0.011580 0.00000 1.00000
## beta1 0.999000 0.000287 3477.80068 0.00000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.008807 0.002812 3.131969 0.001736
## ar1 0.519566 0.163862 3.170747 0.001520
## ma1 -0.965565 0.069474 -13.898239 0.000000
## omega 0.000024 0.000518 0.046929 0.962570
## alpha1 0.000000 0.028492 0.000000 1.000000
## beta1 0.999000 0.000789 1266.934383 0.000000
##
## LogLikelihood : 202.0125
##
## Information Criteria
## ------------------------------------
##
## Akaike -1.1363
## Bayes -1.0695
## Shibata -1.1369
## Hannan-Quinn -1.1097
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.9609 3.270e-01
## Lag[2*(p+q)+(p+q)-1][5] 7.1482 5.409e-07
## Lag[4*(p+q)+(p+q)-1][9] 14.9946 3.833e-05
## d.o.f=2
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 39.80 2.819e-10
## Lag[2*(p+q)+(p+q)-1][5] 54.82 2.554e-15
## Lag[4*(p+q)+(p+q)-1][9] 73.07 0.000e+00
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 11.00 0.500 2.000 9.125e-04
## ARCH Lag[5] 22.61 1.440 1.667 4.703e-06
## ARCH Lag[7] 32.83 2.315 1.543 2.888e-08
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 6.2061
## Individual Statistics:
## mu 0.1474
## ar1 0.2795
## ma1 0.1344
## omega 0.1539
## alpha1 0.1503
## beta1 0.1474
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 1.49 1.68 2.12
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 1.6602 9.779e-02 *
## Negative Sign Bias 0.1243 9.011e-01
## Positive Sign Bias 7.0328 1.117e-11 ***
## Joint Effect 141.9813 1.413e-30 ***
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 62.25 1.699e-06
## 2 30 72.83 1.233e-05
## 3 40 104.04 7.909e-08
## 4 50 131.09 2.044e-09
##
##
## Elapsed time : 0.09208393
#check residuals
checkresiduals(ecomm.ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 706.23, df = 10, p-value < 2.2e-16
##
## Model df: 14. Total lags used: 24
checkresiduals(ecomm.arima)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,2)(1,1,2)[12]
## Q* = 59.117, df = 17, p-value = 1.469e-06
##
## Model df: 7. Total lags used: 24
ggtsdisplay(residuals(ecomm.ets))
ggtsdisplay(residuals(ecomm.arima))
ggtsdisplay(residuals(ecomm.garch))
The Forecasts here are very similar for the old sets, but the GARCH model is linear
autoplot(forecast(ecomm.ets, h=6))+xlim(c(2010, 2021))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 215 row(s) containing missing values (geom_path).
autoplot(forecast(ecomm.arima, h=6))+xlim(c(2010, 2021))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 215 row(s) containing missing values (geom_path).
bootp=ugarchboot(ecomm.garch,method=c("Partial","Full")[1],n.ahead = 6,n.bootpred=1000,n.bootfit=1000)
plot(bootp,which=2)
Now to test these models with training and test sets. The ETS function gives a (M,A,A) model while the ARIMA model gives a (1,0,5)(1,1,2) model. The 5 in the MA part of the ARIMA model indicates a lot of reliance on previous observations. The ARIMA model performs slightly worse here, but there is some extenuating circumstances. E-commerce has seen a huge boost from the pandemic, and broke from its normal pattern in the spring, as e-commerce adoption increased notably. The GARCH model performs the worse on the test set in terms of RMSE.
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.0.3
ecommerce.train = window(ecommerce.bc,end=c(2020,04),frequency=12)
ecommerce.test = window(ecommerce.bc,start=c(2020,05),frequency=12)
#get models
(ecomm.ets<-ets(ecommerce.train))
## ETS(A,Ad,A)
##
## Call:
## ets(y = ecommerce.train)
##
## Smoothing parameters:
## alpha = 1e-04
## beta = 1e-04
## gamma = 0.2414
## phi = 0.9669
##
## Initial states:
## l = -0.0018
## b = 5e-04
## s = -0.3264 0.1219 0.1154 0.1551 0.0818 0.0726
## -0.0422 -0.0644 -0.0316 -0.1282 0.1132 -0.0672
##
## sigma: 0.062
##
## AIC AICc BIC
## 108.1184 110.2559 176.9864
(ecomm.arima<-auto.arima(ecommerce.train))
## Series: ecommerce.train
## ARIMA(2,0,2)(1,1,2)[12]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1 sma1 sma2
## -0.9787 -0.7098 0.4231 0.2533 -0.2898 -0.1102 -0.3154
## s.e. 0.0750 0.0923 0.1152 0.1422 0.3255 0.3055 0.1493
##
## sigma^2 estimated as 0.002153: log likelihood=541.19
## AIC=-1066.38 AICc=-1065.93 BIC=-1036.06
(ecomm.garch<-ugarchfit(spec = spec, data = ecommerce.train))
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(1,0,1)
## Distribution : norm
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 0.008316 0.000081 1.0315e+02 0.000000
## ar1 0.548279 0.044114 1.2429e+01 0.000000
## ma1 -1.000000 0.000095 -1.0489e+04 0.000000
## omega 0.000025 0.000011 2.2987e+00 0.021523
## alpha1 0.000000 0.001572 8.0000e-06 0.999994
## beta1 0.999000 0.000150 6.6778e+03 0.000000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.008316 0.000069 1.2037e+02 0.00000
## ar1 0.548279 0.034906 1.5707e+01 0.00000
## ma1 -1.000000 0.000062 -1.6115e+04 0.00000
## omega 0.000025 0.000024 1.0265e+00 0.30466
## alpha1 0.000000 0.001232 1.0000e-05 0.99999
## beta1 0.999000 0.000087 1.1538e+04 0.00000
##
## LogLikelihood : 198.6803
##
## Information Criteria
## ------------------------------------
##
## Akaike -1.1368
## Bayes -1.0690
## Shibata -1.1374
## Hannan-Quinn -1.1098
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.6391 4.240e-01
## Lag[2*(p+q)+(p+q)-1][5] 6.4678 1.107e-05
## Lag[4*(p+q)+(p+q)-1][9] 12.1858 9.494e-04
## d.o.f=2
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 32.79 1.025e-08
## Lag[2*(p+q)+(p+q)-1][5] 46.66 5.739e-13
## Lag[4*(p+q)+(p+q)-1][9] 61.51 4.441e-16
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 9.111 0.500 2.000 2.541e-03
## ARCH Lag[5] 19.418 1.440 1.667 3.003e-05
## ARCH Lag[7] 27.175 2.315 1.543 8.906e-07
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 8.3409
## Individual Statistics:
## mu 0.11677
## ar1 0.43640
## ma1 0.08425
## omega 0.20920
## alpha1 0.20714
## beta1 0.19947
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 1.49 1.68 2.12
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 1.5938 1.119e-01
## Negative Sign Bias 0.1472 8.831e-01
## Positive Sign Bias 6.8162 4.381e-11 ***
## Joint Effect 126.7370 2.730e-27 ***
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 59.76 4.221e-06
## 2 30 74.01 8.412e-06
## 3 40 96.75 8.214e-07
## 4 50 111.00 1.041e-06
##
##
## Elapsed time : 0.2227061
ets.forecast <- forecast(ecomm.ets, h=6)
arima.forecast <- forecast(ecomm.arima, h=6)
bootp=ugarchboot(ecomm.garch,method=c("Partial","Full")[1],n.ahead = 6,n.bootpred=1000,n.bootfit=1000)
s_f=bootp@forc@forecast$seriesFor
s_f1=as.vector(s_f)
autoplot(ets.forecast)+
autolayer(ecommerce.bc, series="Actual Data")+
xlim(c(2015, 2021))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 275 row(s) containing missing values (geom_path).
## Warning: Removed 275 row(s) containing missing values (geom_path).
autoplot(arima.forecast)+
autolayer(ecommerce.bc, series="Actual Data")+
xlim(c(2015, 2021))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 275 row(s) containing missing values (geom_path).
## Warning: Removed 275 row(s) containing missing values (geom_path).
length(s_f1)
## [1] 6
length(ecommerce.test)
## [1] 6
kable(forecast::accuracy(ets.forecast,ecommerce.test)[,"RMSE"])
| x | |
|---|---|
| Training set | 0.0604082 |
| Test set | 0.0292916 |
kable(forecast::accuracy(arima.forecast,ecommerce.test)[,"RMSE"])
| x | |
|---|---|
| Training set | 0.0450793 |
| Test set | 0.0436421 |
kable(forecast::accuracy(s_f1,ecommerce.test)[,"RMSE"])
| x |
|---|
| 0.0905256 |