Week 4 discussion

Import and Chart Nonstore Retail Sales (A good measure of e-commerce sales)

library(Quandl)

## Warning: package 'Quandl' was built under R version 4.0.3

## Loading required package: xts

## Warning: package 'xts' was built under R version 4.0.3

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 4.0.3

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(fpp2)

## Warning: package 'fpp2' was built under R version 4.0.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ fpp2 2.4 --

## v ggplot2   3.3.2     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3

## Warning: package 'forecast' was built under R version 4.0.3

## Warning: package 'fma' was built under R version 4.0.3

## Warning: package 'expsmooth' was built under R version 4.0.3

##

library(rugarch)

## Warning: package 'rugarch' was built under R version 4.0.3

## Loading required package: parallel

## 
## Attaching package: 'rugarch'

## The following object is masked from 'package:stats':
## 
##     sigma

ecommerce<-Quandl("FRED/RSNSRN", api_key="t_QEjkEczJraydsfJGw6", type="ts")
autoplot(ecommerce)+
  ggtitle("Nonstore Retail Sales")

Looks like a log transformation would make sense, but lets make sure.

BoxCox.lambda(ecommerce)

## [1] 0.02334151

Use essentially a simple log transformation here and plot again

ecommerce.bc<-diff(BoxCox(ecommerce, lambda=BoxCox.lambda(ecommerce)))
autoplot(ecommerce.bc)

That got ride of a lot of the heterskadicity present in the seasonality as well as the exponential growth of the series. Lets get the ets and ARIMA models, and check their residuals

spec = ugarchspec() #the empty function specifies the default model. 

#get models
(ecomm.ets<-ets(ecommerce.bc))

## ETS(A,N,A) 
## 
## Call:
##  ets(y = ecommerce.bc) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     gamma = 0.2757 
## 
##   Initial states:
##     l = 0.0084 
##     s = -0.3271 0.166 0.1227 0.1446 0.0696 0.0471
##            -0.0205 -0.0619 -0.0221 -0.1043 0.1103 -0.1246
## 
##   sigma:  0.0614
## 
##      AIC     AICc      BIC 
## 106.0365 107.4955 163.6897

(ecomm.arima<-auto.arima(ecommerce.bc))

## Series: ecommerce.bc 
## ARIMA(2,0,2)(1,1,2)[12] 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2     sar1     sma1     sma2
##       -1.0077  -0.7576  0.5055  0.3458  -0.3299  -0.0661  -0.3359
## s.e.   0.0648   0.0745  0.1030  0.1147   0.2824   0.2627   0.1275
## 
## sigma^2 estimated as 0.002151:  log likelihood=551.26
## AIC=-1086.52   AICc=-1086.08   BIC=-1056.06

(ecomm.garch<-ugarchfit(spec = spec, data = ecommerce.bc))

## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(1,0,1)
## Distribution : norm 
## 
## Optimal Parameters
## ------------------------------------
##         Estimate  Std. Error    t value Pr(>|t|)
## mu      0.008807    0.001178    7.47601  0.00000
## ar1     0.519566    0.076933    6.75349  0.00000
## ma1    -0.965565    0.028271  -34.15424  0.00000
## omega   0.000024    0.000210    0.11545  0.90809
## alpha1  0.000000    0.011580    0.00000  1.00000
## beta1   0.999000    0.000287 3477.80068  0.00000
## 
## Robust Standard Errors:
##         Estimate  Std. Error     t value Pr(>|t|)
## mu      0.008807    0.002812    3.131969 0.001736
## ar1     0.519566    0.163862    3.170747 0.001520
## ma1    -0.965565    0.069474  -13.898239 0.000000
## omega   0.000024    0.000518    0.046929 0.962570
## alpha1  0.000000    0.028492    0.000000 1.000000
## beta1   0.999000    0.000789 1266.934383 0.000000
## 
## LogLikelihood : 202.0125 
## 
## Information Criteria
## ------------------------------------
##                     
## Akaike       -1.1363
## Bayes        -1.0695
## Shibata      -1.1369
## Hannan-Quinn -1.1097
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                         statistic   p-value
## Lag[1]                     0.9609 3.270e-01
## Lag[2*(p+q)+(p+q)-1][5]    7.1482 5.409e-07
## Lag[4*(p+q)+(p+q)-1][9]   14.9946 3.833e-05
## d.o.f=2
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic   p-value
## Lag[1]                      39.80 2.819e-10
## Lag[2*(p+q)+(p+q)-1][5]     54.82 2.554e-15
## Lag[4*(p+q)+(p+q)-1][9]     73.07 0.000e+00
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale   P-Value
## ARCH Lag[3]     11.00 0.500 2.000 9.125e-04
## ARCH Lag[5]     22.61 1.440 1.667 4.703e-06
## ARCH Lag[7]     32.83 2.315 1.543 2.888e-08
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  6.2061
## Individual Statistics:             
## mu     0.1474
## ar1    0.2795
## ma1    0.1344
## omega  0.1539
## alpha1 0.1503
## beta1  0.1474
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          1.49 1.68 2.12
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                     t-value      prob sig
## Sign Bias            1.6602 9.779e-02   *
## Negative Sign Bias   0.1243 9.011e-01    
## Positive Sign Bias   7.0328 1.117e-11 ***
## Joint Effect       141.9813 1.413e-30 ***
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     62.25    1.699e-06
## 2    30     72.83    1.233e-05
## 3    40    104.04    7.909e-08
## 4    50    131.09    2.044e-09
## 
## 
## Elapsed time : 0.09208393

#check residuals
checkresiduals(ecomm.ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,A)
## Q* = 706.23, df = 10, p-value < 2.2e-16
## 
## Model df: 14.   Total lags used: 24

checkresiduals(ecomm.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,2)(1,1,2)[12]
## Q* = 59.117, df = 17, p-value = 1.469e-06
## 
## Model df: 7.   Total lags used: 24

ggtsdisplay(residuals(ecomm.ets))

ggtsdisplay(residuals(ecomm.arima))

ggtsdisplay(residuals(ecomm.garch))

The Forecasts here are very similar for the old sets, but the GARCH model is linear

autoplot(forecast(ecomm.ets, h=6))+xlim(c(2010, 2021))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## Warning: Removed 215 row(s) containing missing values (geom_path).

autoplot(forecast(ecomm.arima, h=6))+xlim(c(2010, 2021))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## Warning: Removed 215 row(s) containing missing values (geom_path).

bootp=ugarchboot(ecomm.garch,method=c("Partial","Full")[1],n.ahead = 6,n.bootpred=1000,n.bootfit=1000)
plot(bootp,which=2)

Now to test these models with training and test sets. The ETS function gives a (M,A,A) model while the ARIMA model gives a (1,0,5)(1,1,2) model. The 5 in the MA part of the ARIMA model indicates a lot of reliance on previous observations. The ARIMA model performs slightly worse here, but there is some extenuating circumstances. E-commerce has seen a huge boost from the pandemic, and broke from its normal pattern in the spring, as e-commerce adoption increased notably. The GARCH model performs the worse on the test set in terms of RMSE.

library(kableExtra)

## Warning: package 'kableExtra' was built under R version 4.0.3

ecommerce.train = window(ecommerce.bc,end=c(2020,04),frequency=12)
ecommerce.test = window(ecommerce.bc,start=c(2020,05),frequency=12)


#get models
(ecomm.ets<-ets(ecommerce.train))

## ETS(A,Ad,A) 
## 
## Call:
##  ets(y = ecommerce.train) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     beta  = 1e-04 
##     gamma = 0.2414 
##     phi   = 0.9669 
## 
##   Initial states:
##     l = -0.0018 
##     b = 5e-04 
##     s = -0.3264 0.1219 0.1154 0.1551 0.0818 0.0726
##            -0.0422 -0.0644 -0.0316 -0.1282 0.1132 -0.0672
## 
##   sigma:  0.062
## 
##      AIC     AICc      BIC 
## 108.1184 110.2559 176.9864

(ecomm.arima<-auto.arima(ecommerce.train))

## Series: ecommerce.train 
## ARIMA(2,0,2)(1,1,2)[12] 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2     sar1     sma1     sma2
##       -0.9787  -0.7098  0.4231  0.2533  -0.2898  -0.1102  -0.3154
## s.e.   0.0750   0.0923  0.1152  0.1422   0.3255   0.3055   0.1493
## 
## sigma^2 estimated as 0.002153:  log likelihood=541.19
## AIC=-1066.38   AICc=-1065.93   BIC=-1036.06

(ecomm.garch<-ugarchfit(spec = spec, data = ecommerce.train))

## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(1,0,1)
## Distribution : norm 
## 
## Optimal Parameters
## ------------------------------------
##         Estimate  Std. Error     t value Pr(>|t|)
## mu      0.008316    0.000081  1.0315e+02 0.000000
## ar1     0.548279    0.044114  1.2429e+01 0.000000
## ma1    -1.000000    0.000095 -1.0489e+04 0.000000
## omega   0.000025    0.000011  2.2987e+00 0.021523
## alpha1  0.000000    0.001572  8.0000e-06 0.999994
## beta1   0.999000    0.000150  6.6778e+03 0.000000
## 
## Robust Standard Errors:
##         Estimate  Std. Error     t value Pr(>|t|)
## mu      0.008316    0.000069  1.2037e+02  0.00000
## ar1     0.548279    0.034906  1.5707e+01  0.00000
## ma1    -1.000000    0.000062 -1.6115e+04  0.00000
## omega   0.000025    0.000024  1.0265e+00  0.30466
## alpha1  0.000000    0.001232  1.0000e-05  0.99999
## beta1   0.999000    0.000087  1.1538e+04  0.00000
## 
## LogLikelihood : 198.6803 
## 
## Information Criteria
## ------------------------------------
##                     
## Akaike       -1.1368
## Bayes        -1.0690
## Shibata      -1.1374
## Hannan-Quinn -1.1098
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                         statistic   p-value
## Lag[1]                     0.6391 4.240e-01
## Lag[2*(p+q)+(p+q)-1][5]    6.4678 1.107e-05
## Lag[4*(p+q)+(p+q)-1][9]   12.1858 9.494e-04
## d.o.f=2
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic   p-value
## Lag[1]                      32.79 1.025e-08
## Lag[2*(p+q)+(p+q)-1][5]     46.66 5.739e-13
## Lag[4*(p+q)+(p+q)-1][9]     61.51 4.441e-16
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale   P-Value
## ARCH Lag[3]     9.111 0.500 2.000 2.541e-03
## ARCH Lag[5]    19.418 1.440 1.667 3.003e-05
## ARCH Lag[7]    27.175 2.315 1.543 8.906e-07
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  8.3409
## Individual Statistics:              
## mu     0.11677
## ar1    0.43640
## ma1    0.08425
## omega  0.20920
## alpha1 0.20714
## beta1  0.19947
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          1.49 1.68 2.12
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                     t-value      prob sig
## Sign Bias            1.5938 1.119e-01    
## Negative Sign Bias   0.1472 8.831e-01    
## Positive Sign Bias   6.8162 4.381e-11 ***
## Joint Effect       126.7370 2.730e-27 ***
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     59.76    4.221e-06
## 2    30     74.01    8.412e-06
## 3    40     96.75    8.214e-07
## 4    50    111.00    1.041e-06
## 
## 
## Elapsed time : 0.2227061

ets.forecast <- forecast(ecomm.ets, h=6)
arima.forecast <- forecast(ecomm.arima, h=6)
bootp=ugarchboot(ecomm.garch,method=c("Partial","Full")[1],n.ahead = 6,n.bootpred=1000,n.bootfit=1000)
s_f=bootp@forc@forecast$seriesFor
s_f1=as.vector(s_f)


autoplot(ets.forecast)+
  autolayer(ecommerce.bc, series="Actual Data")+
  xlim(c(2015, 2021))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## Warning: Removed 275 row(s) containing missing values (geom_path).

## Warning: Removed 275 row(s) containing missing values (geom_path).

autoplot(arima.forecast)+
  autolayer(ecommerce.bc, series="Actual Data")+
  xlim(c(2015, 2021))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## Warning: Removed 275 row(s) containing missing values (geom_path).

## Warning: Removed 275 row(s) containing missing values (geom_path).

length(s_f1)

## [1] 6

length(ecommerce.test)

## [1] 6

kable(forecast::accuracy(ets.forecast,ecommerce.test)[,"RMSE"])

	x
Training set	0.0604082
Test set	0.0292916

kable(forecast::accuracy(arima.forecast,ecommerce.test)[,"RMSE"])

	x
Training set	0.0450793
Test set	0.0436421

kable(forecast::accuracy(s_f1,ecommerce.test)[,"RMSE"])

x
0.0905256

Week 4 discussion

Joey Biasi

11/16/2020