This is a continuation of the dataset I have been using, which is Egg Laying Hens in the U.S. published by the USDA at the beginning of each month. It provides some continuity and testing this on highly seasonal and volatile dataset by incorporating some basic time-series approaches, like ARIMA, and ETS.
Perform some basic and quick initial manipulation and data exploration.
# Inspect Data
head(egg_layers) %>%
kbl()%>%
kable_material(c("striped","hover", full_width = T))
| Date | layers |
|---|---|
| 2014-01-31 | 304.292 |
| 2014-02-28 | 302.343 |
| 2014-03-31 | 303.008 |
| 2014-04-30 | 304.398 |
| 2014-05-31 | 304.258 |
| 2014-06-30 | 302.943 |
#convert to tsibble for plotting
egg_layers_tsble <- egg_layers %>%
mutate(Date = yearmonth(Date)) %>%
as_tsibble(index = Date)
egg_layers_tsble %>%
autoplot(layers) +
labs(x = "Month",
y = "Egg Laying Hens",
title = "Table Egg Laying Hens in the U.S.",
subtitle = "On-hand first of the month"
)
# Review and split data
head(egg_layers)
## Date layers
## 1 2014-01-31 304.292
## 2 2014-02-28 302.343
## 3 2014-03-31 303.008
## 4 2014-04-30 304.398
## 5 2014-05-31 304.258
## 6 2014-06-30 302.943
egg_layers$Date = as.Date(egg_layers$Date)
head(egg_layers)
## Date layers
## 1 2014-01-31 304.292
## 2 2014-02-28 302.343
## 3 2014-03-31 303.008
## 4 2014-04-30 304.398
## 5 2014-05-31 304.258
## 6 2014-06-30 302.943
egg_layers_train <-egg_layers[1:77,]
tail(egg_layers_train)
## Date layers
## 72 2019-12-31 340.652
## 73 2020-01-31 340.296
## 74 2020-02-29 334.695
## 75 2020-03-31 330.520
## 76 2020-04-30 331.337
## 77 2020-05-31 324.252
egg_layers_test <-egg_layers[78:97,]
head(egg_layers_test)
## Date layers
## 78 2020-06-30 320.999
## 79 2020-07-31 316.927
## 80 2020-08-31 318.028
## 81 2020-09-30 319.529
## 82 2020-10-31 322.932
## 83 2020-11-30 325.445
#Convert to ts
egg_layers_ts = ts(egg_layers$layers, start=c(2014,1), frequency = 12)
egg_layers_train_ts <-ts(egg_layers_ts[1:77], start=c(2014,1), frequency = 12)
egg_layers_test_ts <-ts(egg_layers_ts[78:97], start=c(2020,6), frequency =12)
In previous posts I had already decompose the data with ETS and ARIMA, but will do again so that we can see quick an ETS, “ANA” decomposition.
egg_layers_tsble_ETS_ANA <-
egg_layers_tsble %>%
model(ETS_ANA = ETS(layers ~ error("A") + trend("N") + season("A"))
) %>%
components()
autoplot(egg_layers_tsble_ETS_ANA) +
labs(title = "ETS decomposition, 'ANA', of Egg Layers")
mydec1=decompose(egg_layers_ts)
autoplot(mydec1) +
labs(title = "Decomposition of Additive TS for Egg Laying Hens")
par(mfrow=c(1,1))
myets=ets(egg_layers_train_ts, model="ANA")
myetsf=forecast(myets,20)
myetsa=accuracy(myetsf, egg_layers_test_ts)
autoplot(myetsf)+
theme_ipsum_ps()+
labs(y = "Egg Laying Hens"
, x = "Date")
myets$model
myetsa
checkresiduals(myets)
par(mfrow=c(1,1))
myets2=stlf(egg_layers_train_ts, method="ets")
myetsf2=forecast(myets2,20)
myetsa2=accuracy(myetsf2, egg_layers_test_ts)
autoplot(myetsf2) +
theme_ipsum_ps()+
labs(y = "Egg Laying Hens"
, x = "Date")
myets2$model
myetsa2
checkresiduals(myets2)
par(mfrow=c(1,1))
myets3=stlf(egg_layers_train_ts, method="ets", etsmodel="ANA")
myetsf3=forecast(myets3,20)
myetsa3=accuracy(myetsf3, egg_layers_test_ts)
autoplot(myetsf3) +
theme_ipsum_ps()+
labs(y = "Egg Laying Hens"
, x = "Date")
myets3$model
myetsa3
checkresiduals(myets3)
par(mfrow=c(1,1))
myarima=stlf(egg_layers_train_ts, method="arima")
myarimaf=forecast(myarima,20)
myarimaa=accuracy(myarimaf, egg_layers_test_ts)
autoplot(myarima) +
theme_ipsum_ps()+
labs(y = "Egg Laying Hens"
, x = "Date")
myarima$model
## Series: x
## ARIMA(1,1,0)
##
## Coefficients:
## ar1
## 0.3238
## s.e. 0.1083
##
## sigma^2 estimated as 9.112: log likelihood=-191.35
## AIC=386.7 AICc=386.87 BIC=391.37
myarimaa
## ME RMSE MAE MPE MAPE MASE
## Training set 0.2076586 2.979187 1.869509 0.06395646 0.6079444 0.1416140
## Test set 1.1714868 2.809312 2.346997 0.36072732 0.7258326 0.1777833
## ACF1 Theil's U
## Training set -0.03420609 NA
## Test set 0.47807071 1.150953
checkresiduals(myarima)
##
## Ljung-Box test
##
## data: Residuals from STL + ARIMA(1,1,0)
## Q* = 15.475, df = 14, p-value = 0.3465
##
## Model df: 1. Total lags used: 15
Kept getting errors with garchFit, so I improvised here. Not sure of the results, but it appears that there is high degree of autocorrelation but not a great deal of improvement due to an alpha very close to 1 of Sigma E t-1 compared to any of the previous models. Volatility appears relatively low with sporadic jumps that appear very random.
myts1 = scale(egg_layers_train_ts)
mygarch = ugarchspec(mean.model = list(armaOrder = c(0,0)),
variance.model = list(model = "sGARCH"),
distribution.model = "norm")
garch1 <-ugarchfit(data = myts1, spec = mygarch)
show(garch1)
coef(garch1)
summary(garch1)
plot(garch1, which ="all")
# data=myts1, formula=garch(1,1),cond.dist="QMLE", trace=FALSE)
# # Added QMLE as in the example because all residuals of all models appear normally distributed
# coef(mygarch)
# summary(mygarch)
# plot(mygarch@residuals)
# mypred=predict(mygarch,1)
# plot(mypred$meanForecast, type="l")
fcst <- ugarchforecast(fitORspec = garch1, n.ahead = 20)
plot(fitted(fcst))
plot(sigma(fcst))