This is a dataset I have been using, which is Egg Laying Hens in the U.S. published by the USDA at the beginning of each month. I test ETS modeling and data manipulation using different techniques.
#convert to tsibble and plot
egg_layers <- egg_layers %>%
mutate(Date = yearmonth(Date)) %>%
as_tsibble(index = Date)
egg_layers %>%
autoplot(layers) +
labs(x = "Month",
y = "Egg Laying Hens",
title = "Table Egg Laying Hens in the U.S.",
subtitle = "On-hand first of the month"
)
#Split the data
train_end_date <- round(length(rownames(egg_layers))*0.8,0)
test_start_date <- train_end_date+1
egg_layers_train <-egg_layers[1:train_end_date,]
egg_layers_test <-egg_layers[test_start_date:length(rownames(egg_layers)),]
This is a ts appraoch as opposed to a tsibble.
m.stats <- function(m.n, m.s){ # model.type, model.stats
aic <- round(m.s$aic, 2)
aicc <- round(m.s$aicc, 2)
bic <- round(m.s$bic, 2)
return(c(m.n, aic,aicc,bic))
}
myts.3 <-ts(egg_layers$layers, frequency = 12)
tsdisplay(myts.3)
# ANN
ANN <-ets(myts.3, model="ANN")
autoplot(ANN)
#ANA
ANA <-ets(myts.3, model="ANA")
autoplot(ANA)
#AAN
AAN <-ets(myts.3, model = "AAN")
autoplot(AAN)
#AAA
AAA <-ets(myts.3, model = "AAA")
autoplot(AAA)
#MAM
MAM <-ets(myts.3, model = "MAM")
autoplot(MAM)
#Test the models
m.e <- data.frame(matrix(ncol=4)) # model.evaluation
names(m.e)[1:4] <- c("Model", "aic", "aicc", "bic")
m.e[1,] <- m.stats("ANN", ANN)
m.e[2,] <- m.stats("ANA", ANA)
m.e[3,] <- m.stats("AAN", AAN)
m.e[4,] <- m.stats("AAA", AAA)
m.e[5,] <- m.stats("MAM", MAM)
m.e %>%
kbl()%>%
kable_material(c("striped","hover", full_width = T))
| Model | aic | aicc | bic |
|---|---|---|---|
| ANN | 714.32 | 714.58 | 722.05 |
| ANA | 706.19 | 712.12 | 744.81 |
| AAN | 713 | 713.94 | 728.45 |
| AAA | 712.36 | 720.1 | 756.13 |
| MAM | 724.26 | 733.03 | 770.6 |
autoplot(ANA)
myts.3 <- ts(egg_layers, frequency = 13)
test <- forecast(ANA)
autoplot(test) +
labs(y="Million Egg Laying Hens"
, x = "Date") +
theme_ipsum_es()
### Unable to make dates appear on the chart
A tsibble approach.
egg_lay_ETS <-
egg_layers_train %>%
model(ETS_AAM = ETS(layers ~ error("A") + trend("A") + season("M")),
ETS_AAA = ETS(layers ~ error("A") + trend("A") + season("A")),
ETS_MNA = ETS(layers ~ error("M") + trend("N") + season("M")),
ETS_ANA = ETS(layers ~ error("A") + trend("N") + season("A")))
#Metrics
metrics<-glance(egg_lay_ETS)
metrics%>%
kbl()%>%
kable_material(c("striped","hover", full_width = T))
| .model | sigma2 | log_lik | AIC | AICc | BIC | MSE | AMSE | MAE |
|---|---|---|---|---|---|---|---|---|
| ETS_AAM | 26.3234305 | -288.5062 | 611.0123 | 621.2123 | 651.0764 | 20.92375 | 43.91165 | 3.2226647 |
| ETS_AAA | 17.0683374 | -271.6100 | 577.2200 | 587.4200 | 617.2841 | 13.56714 | 32.35771 | 2.3346500 |
| ETS_MNA | 0.0001644 | -271.0800 | 572.1600 | 579.9020 | 607.5107 | 12.50975 | 32.08485 | 0.0072078 |
| ETS_ANA | 14.7381198 | -267.1235 | 564.2470 | 571.9889 | 599.5976 | 12.09282 | 30.74961 | 2.2039305 |
#knitr::kable(glance(egg_lay_ETS), "html")
#Plot forecast using ANA from Above
egg_lay_fcst <-
egg_lay_ETS %>%
forecast(h = length(rownames(egg_layers_test)))
egg_lay_fcst %>%
filter(.model == "ETS_ANA") %>%
autoplot(egg_layers_test) +
labs(x = "Month",
y = "Egg Laying Hens",
title = "Egg Laying Hens",
subtitle = "ETS using ANA"
, legend = TRUE) +
theme_ipsum_es()
The model that had the best performance was ETS ANA.Although there appears to be a trend, we can see that the mean appears constant according to the test–although not conclusive. I will try an ARIMA model instead in another post.