6.9 Exercise #7
writing.series <- get("writing")
# plot data
plot.ts(writing.series)

# use STL decomposition to calculate the trend-cycle and seasonal indices
mstl(writing.series) %>%
autoplot()

# seems like an exponential relationship exists (the magnitude of the seasonality is non-constant). Therefore, a Box-Cox transformation is necessary. We will also forecast with drift since naive is just basically the same value with the previous lag and cannot capture the seasonality in data.
plot(stlf(writing.series, method="rwdrift", lambda = T))

6.9 Exercise #8
fancy.series <- get("fancy")
# plot data
plot.ts(fancy.series)

# use STL decomposition to calculate the trend-cycle and seasonal indices
mstl(fancy.series) %>%
autoplot()

# seems like an exponential relationship exists (the magnitude of the seasonality is non-constant). Therefore, a Box-Cox transformation is necessary. We will also forecast with drift since naive is just basically the same value with the previous lag and cannot capture the seasonality in data.
plot(stlf(fancy.series, method="rwdrift", lambda = T))

7.8 Exercise #13
#prepare data
ausbeer.series <- get("ausbeer")
bricksq.series <- get("bricksq")
dole.series <- get("dole")
a10.series <- get("a10")
h02.series <- get("h02")
usmelec.series <- get("usmelec")
# train and test
ausbeer.train=ts(ausbeer.series, start = 1956, end = 2006, frequency = 4)
ausbeer.test=ts(ausbeer.series, start = 2007, end = 2010, frequency = 4)
bricksq.train=ts(bricksq.series, start = 1956, end = 1990, frequency = 4)
bricksq.test=ts(bricksq.series, start = 1991, end = 1994, frequency = 4)
dole.train=ts(dole.series, start = 1956, end = 1988, frequency = 12)
dole.test=ts(dole.series, start = 1989, end = 1992, frequency = 12)
a10.train=ts(a10.series, start = 1991, end = 2004, frequency = 12)
a10.test=ts(a10.series, start = 2005, end = 2008, frequency = 12)
h02.train=ts(h02.series, start = 1991, end = 2004, frequency = 12)
h02.test=ts(h02.series, start = 2005, end = 2008, frequency = 12)
usmelec.train=ts(usmelec.series, start = 1973, end = 2009, frequency = 12)
usmelec.test=ts(usmelec.series, start = 2009, end = 2013, frequency = 12)
# Build a function to run ets(), snaive() and stlf() and return the accuracy
compare <- function(traindata, testdata, hforecast = 12, lambdaNo = F) {
# ETS
a1 <- traindata %>% ets(model="ZZZ") %>% forecast(h = hforecast) %>%
accuracy(testdata)
print(a1[,c("RMSE","MAE","MAPE","MASE")])
# snaive
a2 <- snaive(traindata, h = hforecast) %>%
accuracy(testdata)
print(a2[,c("RMSE","MAE","MAPE","MASE")])
# stlf()
a3<- stlf(traindata, h = hforecast, lambda = lambdaNo) %>%
accuracy(testdata)
print(a3[,c("RMSE","MAE","MAPE","MASE")])
}
# ausbeer
plot.ts(ausbeer.train)

compare(ausbeer.train, ausbeer.test, hforecast = 14)
## RMSE MAE MAPE MASE
## Training set 15.9596 12.20538 2.921393 0.762111
## Test set 165.7248 165.40930 65.755363 10.328251
## RMSE MAE MAPE MASE
## Training set 19.85314 16.01523 3.814617 1.0000
## Test set 172.14661 171.90909 68.475947 10.7341
## RMSE MAE MAPE MASE
## Training set 13.74698 10.60279 2.539117 0.6620441
## Test set 167.62370 167.23776 66.420164 10.4424210
# => ets got the best result on Test set
# bricksq
plot.ts(bricksq.train)

compare(bricksq.train, bricksq.test, hforecast = 14, lambdaNo = T)
## RMSE MAE MAPE MASE
## Training set 21.53635 15.34056 3.814251 0.4372684
## Test set 309.73410 308.97589 146.924088 8.8070711
## RMSE MAE MAPE MASE
## Training set 48.75811 35.08271 8.638005 1.000000
## Test set 325.97602 324.36364 153.975118 9.245684
## RMSE MAE MAPE MASE
## Training set 19.6667 13.95614 3.458193 0.3978068
## Test set 309.5347 308.74632 146.823636 8.8005272
# => stlf got the best result on Test set
# dole
plot.ts(dole.train)

compare(dole.train, dole.test, hforecast = 38, lambdaNo = T)
## RMSE MAE MAPE MASE
## Training set 16565.6 9323.031 6.289154 0.3201815
## Test set 440880.2 439414.676 4031.172512 15.0908512
## RMSE MAE MAPE MASE
## Training set 54891.12 29117.95 28.19218 1.00000
## Test set 530775.02 528899.22 4808.59064 18.16403
## RMSE MAE MAPE MASE
## Training set 6406.613 3879.101 5.16386 0.1332202
## Test set 379993.734 378980.688 3469.93952 13.0153622
# => ets got the best result on Test set
# a10.train
plot.ts(a10.train)

compare(a10.train, a10.test, hforecast = 38, lambdaNo = T)
## RMSE MAE MAPE MASE
## Training set 0.4726965 0.3353551 3.996674 0.3698757
## Test set 13.8274319 13.7075375 355.419516 15.1185578
## RMSE MAE MAPE MASE
## Training set 1.052731 0.9066696 11.11561 1.00000
## Test set 10.254965 10.1843937 267.80801 11.23275
## RMSE MAE MAPE MASE
## Training set 0.4617717 0.3425413 4.320592 0.3778017
## Test set 13.0889944 13.0687409 342.914258 14.4140051
# => snaive got the best result on Test set
# h02.train
plot.ts(h02.train)

compare(h02.train, h02.test, hforecast = 38, lambdaNo = T)
## RMSE MAE MAPE MASE
## Training set 0.04410859 0.03256959 4.397991 0.5564493
## Test set 0.49887462 0.48749438 100.927086 8.3288092
## RMSE MAE MAPE MASE
## Training set 0.07058749 0.0585311 8.171465 1.000000
## Test set 0.47062025 0.4592449 95.960370 7.846169
## RMSE MAE MAPE MASE
## Training set 0.04019418 0.03032672 4.375312 0.518130
## Test set 0.56174084 0.55767827 117.103341 9.527896
# => snaive got the best result on Test set
# usmelec.train
plot.ts(usmelec.train)

compare(usmelec.train, usmelec.test, hforecast = 38, lambdaNo = T)
## RMSE MAE MAPE MASE
## Training set 7.231095 5.444088 2.140116 0.6204376
## Test set 195.353477 194.022361 122.780708 22.1118338
## RMSE MAE MAPE MASE
## Training set 11.32383 8.774594 3.475778 1.00000
## Test set 185.10317 183.919158 116.547828 20.96042
## RMSE MAE MAPE MASE
## Training set 6.198652 4.713299 1.869173 0.537153
## Test set 193.984058 192.891050 122.132056 21.982904
# => snaive got the best result on Test set
8.11 Exercise #18
#a. Select a time series from Datamarket. Then copy its short URL and import the data using
iceland.unemployed <- ts(rdatamarket::dmseries("https://datamarket.com/data/set/ybz/number-of-unemployed#!ds=ybz!2m5e=1&display=line")[,1],
start=2001, frequency=12)
#b. Plot graphs of the data, and try to identify an appropriate ARIMA model.
plot.ts(iceland.unemployed)

# Using R's default ADF test we get: detrend data
adf.test(iceland.unemployed
, alternative = "stationary"
, k = 12
)
##
## Augmented Dickey-Fuller Test
##
## data: iceland.unemployed
## Dickey-Fuller = -3.1098, Lag order = 12, p-value = 0.1128
## alternative hypothesis: stationary
# First look at ACF/PACF
ggAcf(iceland.unemployed)

ggPacf(iceland.unemployed)

# Build ARIMA
# first difference
diff1 = diff(iceland.unemployed)
# plot data
plot.ts(diff1)

ggAcf(diff1)

ggPacf(diff1)

fit.arima <- Arima(iceland.unemployed, order=c(1,1,0), seasonal=c(1,1,1))
summary(fit.arima)
## Series: iceland.unemployed
## ARIMA(1,1,0)(1,1,1)[12]
##
## Coefficients:
## ar1 sar1 sma1
## 0.7794 0.2162 -0.9822
## s.e. 0.0492 0.1046 0.5604
##
## sigma^2 estimated as 79819: log likelihood=-1141.14
## AIC=2290.29 AICc=2290.55 BIC=2302.59
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -4.872463 269.1403 171.2051 0.2226207 3.683212 0.08736699
## ACF1
## Training set -0.009152117
tsdisplay(residuals(fit.arima))

#c. Do residual diagnostic checking of your ARIMA model. Are the residuals white noise? No, they do not look white noise.
#check residual
ggAcf(fit.arima$residuals)

ggPacf(fit.arima$residuals)

checkresiduals(fit.arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)(1,1,1)[12]
## Q* = 40.514, df = 21, p-value = 0.006436
##
## Model df: 3. Total lags used: 24
#d. Use your chosen ARIMA model to forecast the next four years.
arima.forecast <- forecast(fit.arima, h = 48)
plot(arima.forecast)

#e. Now try to identify an appropriate ETS model.
fit.ets <- ets(iceland.unemployed, model="AAN", alpha=NULL, beta=NULL,
gamma=NULL, phi=NULL, lambda=NULL, biasadj=FALSE,
additive.only=FALSE, restrict=TRUE,
allow.multiplicative.trend=FALSE)
#f. Do residual diagnostic checking of your ETS model. Are the residuals white noise? No the residuals do not look white noise. There are several outliers.
checkresiduals(fit.ets)

##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,N)
## Q* = 135.8, df = 19, p-value < 2.2e-16
##
## Model df: 5. Total lags used: 24
#g. Use your chosen ETS model to forecast the next four years.
ets.forecast <- forecast(fit.ets, h = 48)
plot(ets.forecast)

#h. Which of the two models do you prefer? ACF and PACF and residual plots of ARIMA look better. I would go with Arima.