Introduction

This is a dataset I have been using, which is Egg Laying Hens in the U.S. published by the USDA at the beginning of each month. I test ETS modeling and data manipulation using different techniques.

Quick Data Exploration

#convert to tsibble and plot

egg_layers <- egg_layers %>%
  mutate(Date = yearmonth(Date)) %>%
  as_tsibble(index = Date)

egg_layers %>%
  autoplot(layers) +
  labs(x = "Month",
       y = "Egg Laying Hens",
       title = "Table Egg Laying Hens in the U.S.",
       subtitle = "On-hand first of the month"
       )

#Split the data

train_end_date <- round(length(rownames(egg_layers))*0.8,0)
test_start_date <- train_end_date+1

egg_layers_train <-egg_layers[1:train_end_date,]
egg_layers_test <-egg_layers[test_start_date:length(rownames(egg_layers)),]

ETS Model 1.0

This is a ts appraoch as opposed to a tsibble.

m.stats <- function(m.n, m.s){ # model.type, model.stats
  aic <- round(m.s$aic, 2)
  aicc <- round(m.s$aicc, 2)
  bic <- round(m.s$bic, 2)
  
  return(c(m.n, aic,aicc,bic))
}


myts.3 <-ts(egg_layers$layers, frequency = 12)
tsdisplay(myts.3)

# ANN
ANN <-ets(myts.3, model="ANN")
autoplot(ANN)

#ANA
ANA <-ets(myts.3, model="ANA")
autoplot(ANA)

#AAN
AAN <-ets(myts.3, model = "AAN")
autoplot(AAN)

#AAA
AAA <-ets(myts.3, model = "AAA")
autoplot(AAA)

#MAM
MAM <-ets(myts.3, model = "MAM")

autoplot(MAM)

Test ETS Models 1.0, and forecasts

#Test the models
m.e <- data.frame(matrix(ncol=4)) # model.evaluation
names(m.e)[1:4] <- c("Model", "aic", "aicc", "bic")

m.e[1,] <- m.stats("ANN", ANN)
m.e[2,] <- m.stats("ANA", ANA)
m.e[3,] <- m.stats("AAN", AAN)
m.e[4,] <- m.stats("AAA", AAA)
m.e[5,] <- m.stats("MAM", MAM)

m.e %>%
  kbl()%>%
  kable_material(c("striped","hover", full_width = T))
Model aic aicc bic
ANN 714.32 714.58 722.05
ANA 706.19 712.12 744.81
AAN 713 713.94 728.45
AAA 712.36 720.1 756.13
MAM 724.26 733.03 770.6
autoplot(ANA)

myts.3 <- ts(egg_layers, frequency = 13)
test <- forecast(ANA)

autoplot(test) + 
  labs(y="Million Egg Laying Hens"
       , x = "Date") +
  theme_ipsum_es()

### Unable to make dates appear on the chart

ETS Model 2.0

A tsibble approach.

egg_lay_ETS <-
  egg_layers_train %>% 
  model(ETS_AAM = ETS(layers ~ error("A") + trend("A") + season("M")),
        ETS_AAA = ETS(layers ~ error("A") + trend("A") + season("A")),
        ETS_MNA = ETS(layers ~ error("M") + trend("N") + season("M")),
        ETS_ANA = ETS(layers ~ error("A") + trend("N") + season("A")))

#Metrics
metrics<-glance(egg_lay_ETS)

metrics%>%
  kbl()%>%
  kable_material(c("striped","hover", full_width = T))
.model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
ETS_AAM 26.3234305 -288.5062 611.0123 621.2123 651.0764 20.92375 43.91165 3.2226647
ETS_AAA 17.0683374 -271.6100 577.2200 587.4200 617.2841 13.56714 32.35771 2.3346500
ETS_MNA 0.0001644 -271.0800 572.1600 579.9020 607.5107 12.50975 32.08485 0.0072078
ETS_ANA 14.7381198 -267.1235 564.2470 571.9889 599.5976 12.09282 30.74961 2.2039305
#knitr::kable(glance(egg_lay_ETS), "html")

ETS model 2.0 test, and forecast.

#Plot forecast using ANA from Above

egg_lay_fcst <-
  egg_lay_ETS %>%
  forecast(h = length(rownames(egg_layers_test)))

egg_lay_fcst %>%
  filter(.model == "ETS_ANA") %>%
  autoplot(egg_layers_test) +
  labs(x = "Month",
       y = "Egg Laying Hens",
       title = "Egg Laying Hens",
       subtitle = "ETS using ANA"
       , legend = TRUE) +
  theme_ipsum_es()

Conclusion

The model that had the best performance was ETS ANA.Although there appears to be a trend, we can see that the mean appears constant according to the test–although not conclusive. I will try an ARIMA model instead in another post.