Introduction

This is a continuation of the dataset I have been using, which is Egg Laying Hens in the U.S. published by the USDA at the beginning of each month. It provides some continuity and testing this on highly seasonal and volatile dataset by incorporating some basic time-series approaches, like ARIMA, and ETS.

Data Exploration Quick

Perform some basic and quick initial manipulation and data exploration.

# Inspect Data
head(egg_layers) %>%
  kbl()%>%
  kable_material(c("striped","hover", full_width = T))
Date layers
2014-01-31 304.292
2014-02-28 302.343
2014-03-31 303.008
2014-04-30 304.398
2014-05-31 304.258
2014-06-30 302.943
#convert to tsibble for plotting

egg_layers_tsble <- egg_layers %>%
  mutate(Date = yearmonth(Date)) %>%
  as_tsibble(index = Date)

egg_layers_tsble %>%
  autoplot(layers) +
  labs(x = "Month",
       y = "Egg Laying Hens",
       title = "Table Egg Laying Hens in the U.S.",
       subtitle = "On-hand first of the month"
       )

# Review and split data
head(egg_layers)
##         Date  layers
## 1 2014-01-31 304.292
## 2 2014-02-28 302.343
## 3 2014-03-31 303.008
## 4 2014-04-30 304.398
## 5 2014-05-31 304.258
## 6 2014-06-30 302.943
egg_layers$Date = as.Date(egg_layers$Date)
head(egg_layers)
##         Date  layers
## 1 2014-01-31 304.292
## 2 2014-02-28 302.343
## 3 2014-03-31 303.008
## 4 2014-04-30 304.398
## 5 2014-05-31 304.258
## 6 2014-06-30 302.943
egg_layers_train <-egg_layers[1:77,]
tail(egg_layers_train)
##          Date  layers
## 72 2019-12-31 340.652
## 73 2020-01-31 340.296
## 74 2020-02-29 334.695
## 75 2020-03-31 330.520
## 76 2020-04-30 331.337
## 77 2020-05-31 324.252
egg_layers_test <-egg_layers[78:97,]
head(egg_layers_test)
##          Date  layers
## 78 2020-06-30 320.999
## 79 2020-07-31 316.927
## 80 2020-08-31 318.028
## 81 2020-09-30 319.529
## 82 2020-10-31 322.932
## 83 2020-11-30 325.445
#Convert to ts

egg_layers_ts = ts(egg_layers$layers, start=c(2014,1), frequency = 12)

egg_layers_train_ts <-ts(egg_layers_ts[1:77], start=c(2014,1), frequency = 12)
egg_layers_test_ts <-ts(egg_layers_ts[78:97], start=c(2020,6), frequency =12)

Quick Decomposition

In previous posts I had already decompose the data with ETS and ARIMA, but will do again so that we can see quick an ETS, “ANA” decomposition.

egg_layers_tsble_ETS_ANA <-
egg_layers_tsble %>%
  model(ETS_ANA = ETS(layers ~ error("A") + trend("N") + season("A"))
  ) %>%
  components()

autoplot(egg_layers_tsble_ETS_ANA) +
  labs(title = "ETS decomposition, 'ANA', of Egg Layers")

mydec1=decompose(egg_layers_ts)
autoplot(mydec1) +
  labs(title = "Decomposition of Additive TS for Egg Laying Hens")

ETS ANA

par(mfrow=c(1,1))
myets=ets(egg_layers_train_ts, model="ANA")
myetsf=forecast(myets,20)
myetsa=accuracy(myetsf, egg_layers_test_ts)
autoplot(myetsf)+
  theme_ipsum_ps()+ 
  labs(y = "Egg Laying Hens"
       , x = "Date")

myets$model
myetsa

checkresiduals(myets)

ETS Model AAN

par(mfrow=c(1,1))
myets2=stlf(egg_layers_train_ts, method="ets")
myetsf2=forecast(myets2,20)
myetsa2=accuracy(myetsf2, egg_layers_test_ts)
autoplot(myetsf2) +
  theme_ipsum_ps()+ 
  labs(y = "Egg Laying Hens"
       , x = "Date")

myets2$model
myetsa2

checkresiduals(myets2)

ETS ANN

par(mfrow=c(1,1))
myets3=stlf(egg_layers_train_ts, method="ets", etsmodel="ANA")
myetsf3=forecast(myets3,20)
myetsa3=accuracy(myetsf3, egg_layers_test_ts)
autoplot(myetsf3) + 
  theme_ipsum_ps()+ 
  labs(y = "Egg Laying Hens"
       , x = "Date")

myets3$model
myetsa3

checkresiduals(myets3)

ARIMA

par(mfrow=c(1,1))
myarima=stlf(egg_layers_train_ts, method="arima")
myarimaf=forecast(myarima,20)
myarimaa=accuracy(myarimaf, egg_layers_test_ts)
autoplot(myarima) + 
  theme_ipsum_ps()+ 
  labs(y = "Egg Laying Hens"
       , x = "Date")

myarima$model
## Series: x 
## ARIMA(1,1,0) 
## 
## Coefficients:
##          ar1
##       0.3238
## s.e.  0.1083
## 
## sigma^2 estimated as 9.112:  log likelihood=-191.35
## AIC=386.7   AICc=386.87   BIC=391.37
myarimaa
##                     ME     RMSE      MAE        MPE      MAPE      MASE
## Training set 0.2076586 2.979187 1.869509 0.06395646 0.6079444 0.1416140
## Test set     1.1714868 2.809312 2.346997 0.36072732 0.7258326 0.1777833
##                     ACF1 Theil's U
## Training set -0.03420609        NA
## Test set      0.47807071  1.150953
checkresiduals(myarima)

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ARIMA(1,1,0)
## Q* = 15.475, df = 14, p-value = 0.3465
## 
## Model df: 1.   Total lags used: 15

GARCH

Kept getting errors with garchFit, so I improvised here. Not sure of the results, but it appears that there is high degree of autocorrelation but not a great deal of improvement due to an alpha very close to 1 of Sigma E t-1 compared to any of the previous models. Volatility appears relatively low with sporadic jumps that appear very random.

myts1 = scale(egg_layers_train_ts)

mygarch = ugarchspec(mean.model = list(armaOrder = c(0,0)), 
                                       variance.model = list(model = "sGARCH"),
                                       distribution.model = "norm")

garch1 <-ugarchfit(data = myts1, spec = mygarch)
show(garch1)
coef(garch1)
summary(garch1)

plot(garch1, which ="all")

#   data=myts1, formula=garch(1,1),cond.dist="QMLE", trace=FALSE)
# # Added QMLE as in the example because all residuals of all models appear normally distributed
# coef(mygarch)
# summary(mygarch)
# plot(mygarch@residuals)
# mypred=predict(mygarch,1)
# plot(mypred$meanForecast, type="l")

Forecast

fcst <- ugarchforecast(fitORspec = garch1, n.ahead = 20)

plot(fitted(fcst))

plot(sigma(fcst))