Working with Hungarian Chicken Pox Time Series Data.
knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(data.table)
library(fpp2)
## Warning: package 'fpp2' was built under R version 4.0.5
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v forecast 8.14 v expsmooth 2.3
## v fma 2.4
## Warning: package 'fma' was built under R version 4.0.5
## Warning: package 'expsmooth' was built under R version 4.0.5
##
#Read In
hchicken <- fread('hungarychickenpox.csv')
#Format Date Variable
hchicken$Date <- as.Date(hchicken$Date, "%d/%m/%Y")
hchicken$Date <- substring(hchicken$Date, 1, 7)
hchickenmonthly = hchicken[,.(SUM = sum(SUM)),by = .(Date)]
hchickenmonthly$Date <- paste(hchickenmonthly$Date,"01",sep = "-")
hchickenmonthly$Date <- as.Date(hchickenmonthly$Date, "%Y-%m-%d")
#Time Series
hchicken.ts <- ts(hchickenmonthly$SUM,start=2005,frequency=12)
#Plot
plot(hchicken.ts)
#Train set (80%)
train <- ts(hchicken.ts[1:96], start = 2005, frequency = 12)
#Test set (20%) i.e. 6 month hold out
test <- ts(hchicken.ts[97:120], start = 2013, frequency = 12)
#Model
ets.hc <- ets(train, model = 'ZZZ', lambda = "auto")
#Forecast
for.ets.hc <- forecast(ets.hc,24,lambda = ets.hc$lambda)
#Plot
autoplot(for.ets.hc)
#Accuracy/error
acc.ets.hc <- accuracy(for.ets.hc,test[1:24])
acc.ets.hc
## ME RMSE MAE MPE MAPE MASE
## Training set -5.272879 652.7642 474.8033 -2.255532 14.69469 0.3913169
## Test set -263.056702 693.0194 468.1334 -8.567726 22.03965 0.3858198
## ACF1
## Training set 0.07646836
## Test set NA
#Model
aarima.hc <- auto.arima(train, lambda = "auto")
#Forecast
for.aarima.hc <- forecast(aarima.hc,24)
#Prediction Plot
autoplot(for.aarima.hc)
#Accuracy/error
acc.aarima.hc <- accuracy(for.aarima.hc,test[1:24])
acc.aarima.hc
## ME RMSE MAE MPE MAPE MASE
## Training set 11.38508 657.0006 444.8895 -2.203238 15.24394 0.3666629
## Test set -50.93296 602.9462 464.2824 -2.559108 21.25686 0.3826459
## ACF1
## Training set 0.007981979
## Test set NA
#Model
nnar.hc <- nnetar(train, lambda="auto")
#Forecast
for.nnar.hc <- forecast(nnar.hc,24)
#Plot
autoplot(for.nnar.hc)
#Accuracy/error
acc.nnar.hc <- accuracy(for.nnar.hc,test[1:24])
acc.nnar.hc
## ME RMSE MAE MPE MAPE MASE
## Training set 79.97464 804.5350 614.6841 -2.254994 19.36619 0.5066019
## Test set -393.32639 851.9048 551.6152 -12.508741 23.94422 0.4546227
## ACF1
## Training set 0.2072735
## Test set NA
myseq <- seq(1:24)
plot(test[1:24], type = 'l',
col = 'black', xlab = 'Month',
ylab = 'Total Cases',
main = 'Hungarian Chicken Pox Cases 2013-2014')
lines(myseq, for.ets.hc$mean, col = 'red')
lines(myseq, for.aarima.hc$mean, col = 'blue')
lines(myseq, for.nnar.hc$mean, col = 'green')
legend(3, 2500, legend=c("Real", "ETS", "Auto Arima", "NNAR"),
col=c("black", "red", "blue", "green"),
lty=1, cex=0.8)
With a test MASE of 0.383, the ARIMA model outperforms both the ETS (test MASE of 0.386) and the NNAR (test MASE of 0.452). The ARIMA and ETS perform nearly identifcally, while the NNAR performs distinctly worse. It’s clear that the neural network approach generally gets the seasonality right, but it misses the irregularities within the seasonality that ARIMA and ETS work better for. The ARIMA succeeds best in dealing with this, due to the moving average component, while the ETS comes in a close second with its smoothing functionality.