Chapter 7
Question 1
#(a)
head(pigs, 20)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 1980 76378 71947 33873 96428 105084 95741 110647 100331 94133 103055
## 1981 76889 81291 91643 96228 102736 100264 103491 97027
## Nov Dec
## 1980 90595 101457
## 1981
pigs.data <- window(pigs, start=1980)
fcast <- ses(pigs.data, h = 4)
fcast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Sep 1995 98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995 98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995 98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995 98816.41 83958.37 113674.4 76092.99 121539.8
#(b)
s <- sd(fcast$residuals)
fcast_mean <- fcast$mean[1]
up <- fcast_mean + 1.96*s
low <- fcast_mean - 1.96*s
print(paste("95% prediction interval is ", round(low,2), round(up,2)))
## [1] "95% prediction interval is 78679.97 118952.84"
## The predictions are very similar compared to two intervals.
Question 2
ses.func <- function (y, alpha, level, h){
yhat <- level
for(i in 1:length(y)+h){
if(i <= length(y)){
yhat[i] <- alpha*y[i] +(1-alpha)*yhat[i-1]
}
else{
yhat[i] <- alpha*yhat[i-1]+(1-alpha)*yhat[i-2]
}
}
return(yhat[length(y):length(y)+h])
}
a <- fcast$model$par[1]
print(paste("forecasts at h = 1", ses.func(pigs, alpha = a, level=1, h=1)))
## [1] "forecasts at h = 1 98314.3452587773"
fcast$mean
## Sep Oct Nov Dec
## 1995 98816.41 98816.41 98816.41 98816.41
Question 3
ses.func2 <- function(pars = c(alpha, l), y) {
error <- 0
SSE <- 0
alpha <- pars[1]
l0 <- pars[2]
y_hat <- l0
for(index in 1:length(y)){
error <- y[index] - y_hat
SSE <- SSE + error^2
y_hat <- alpha*y[index] + (1 - alpha)*y_hat
}
return(SSE)
}
gpo <-optim(par = c(0.5, pigs[1]), y = pigs, fn = ses.func2)
print(paste("alpha is ",gpo$par[1], "l0 is ", gpo$par[2]))
## [1] "alpha is 0.299008094014243 l0 is 76379.2653476235"
# values from ses() function
print(paste("alpha is ", fcast$model$par[1], "l0 is ", fcast$model$par[2]))
## [1] "alpha is 0.297148833372095 l0 is 77260.0561458528"
## The results are not the same but very similar.
Question 4
ses.func3 <- function(y){
gpo <-optim(par = c(0.5, pigs[1]), y = pigs, fn = ses.func2)
a <- fcast$model$par[1]
l <- fcast$model$par[2]
pred <- ses.func(y, a, l, 1)
my_list <- list(a, l, pred)
return(my_list)
}
ses.func3(pigs)
## [[1]]
## alpha
## 0.2971488
##
## [[2]]
## l
## 77260.06
##
## [[3]]
##
## 98314.35
print(paste("alpha is ", ses.func3(pigs)[1], "l0 is ", ses.func3(pigs)[2],"Forecast is ", ses.func3(pigs)[3]))
## [1] "alpha is c(alpha = 0.297148833372095) l0 is c(l = 77260.0561458528) Forecast is c(98314.3452587773)"
Question 5
# (a)
head(books)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## Paperback Hardcover
## 1 199 139
## 2 172 128
## 3 111 172
## 4 209 139
## 5 161 191
## 6 119 168
summary(books)
## Paperback Hardcover
## Min. :111.0 Min. :128.0
## 1st Qu.:167.2 1st Qu.:170.5
## Median :189.0 Median :200.5
## Mean :186.4 Mean :198.8
## 3rd Qu.:207.2 3rd Qu.:222.0
## Max. :247.0 Max. :283.0
autoplot(books)

##Both trends show an increasing patterns as the time increases. It is difficult to see if there is any seasonality.
# (b)
autoplot(ses(books[,1], h = 4))

autoplot(ses(books[,2], h = 4))

# (c)
#Paperback's rmse
rmse(books[,1], ses(books[,1])$fitted)
## [1] 33.63769
#Hardcover's rmse
rmse(books[,2], ses(books[,2])$fitted)
## [1] 31.93101
Question 6
# (a)
fc1 <- holt(books[,1], h=4)
fc1
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 209.4668 166.6035 252.3301 143.9130 275.0205
## 32 210.7177 167.8544 253.5811 145.1640 276.2715
## 33 211.9687 169.1054 254.8320 146.4149 277.5225
## 34 213.2197 170.3564 256.0830 147.6659 278.7735
fc2 <- holt(books[,2], h =4)
fc2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 250.1739 212.7390 287.6087 192.9222 307.4256
## 32 253.4765 216.0416 290.9113 196.2248 310.7282
## 33 256.7791 219.3442 294.2140 199.5274 314.0308
## 34 260.0817 222.6468 297.5166 202.8300 317.3334
# (b)
# Paperback
rmse(books[,1], holt(books[,1], h=4)$fitted)
## [1] 31.13692
# Hardcover
rmse(books[,2], holt(books[,2], h=4)$fitted)
## [1] 27.19358
## Because holt's method has an additional estimate of the slope of the series. Therefore,
## it is more suitable to capture the trend; hence, better predictions and less rmse error.
# (c)
# Paperback
ses.1 <- ses(books[,1],h = 4)
# Hardcover
ses.2 <- ses(books[,2],h = 4)
# Paperback
holt.1 <- holt(books[,1], h = 4)
# Hardcover
holt.2 <- holt(books[,2], h = 4)
autoplot(ses.1) + autolayer(fitted(ses.1), series = "SES") + autolayer(fitted(holt.1), series = "HOLT") + labs(title = "Paperback")

autoplot(ses.2) + autolayer(fitted(ses.2), series = "SES") + autolayer(fitted(holt.2), series = "HOLT") + labs(title = "Hardcover")

## While ses() has a larger confidence interval, it looks that holt() function is more appropriate.
# (d)
Rmse.1 <- rmse(books[,1],ses.1$fitted)
Rmse.2 <- rmse(books[,1],holt.1$fitted)
Rmse.3 <- rmse(books[,2],ses.2$fitted)
Rmse.4 <- rmse(books[,2],holt.2$fitted)
# Confidence interval
# for paperback
print(paste("Upper is", round(ses.1$mean[1]+1.96*Rmse.1,2), "Lower is",
round(ses.1$mean[1]-1.96*Rmse.1,2)))
## [1] "Upper is 273.04 Lower is 141.18"
print(paste("Upper is", round(holt.1$mean[1]+1.96*Rmse.2,2), "Lower is",
round(holt.1$mean[1]-1.96*Rmse.2,2)))
## [1] "Upper is 270.5 Lower is 148.44"
# for hardcover
print(paste("Upper is", round(ses.2$mean[1]+1.96*Rmse.3,2), "Lower is",
round(ses.2$mean[1]-1.96*Rmse.3,2)))
## [1] "Upper is 302.14 Lower is 176.98"
print(paste("Upper is", round(holt.2$mean[1]+1.96*Rmse.4,2), "Lower is",
round(holt.2$mean[1]-1.96*Rmse.4,2)))
## [1] "Upper is 303.47 Lower is 196.87"
# Confidence interval using ses() and holt()
# for paperback ses() method
print(paste("Upper is", round(ses.1$upper[1, "95%"],2), "Lower is",
round(ses.1$lower[1,"95%"],2)))
## [1] "Upper is 275.35 Lower is 138.87"
# for paperback holt() method
print(paste("Upper is", round(holt.1$upper[1, "95%"],2), "Lower is",
round(holt.1$lower[1,"95%"],2)))
## [1] "Upper is 275.02 Lower is 143.91"
# for hardcover ses() method
print(paste("Upper is", round(ses.2$upper[1, "95%"],2), "Lower is",
round(ses.2$lower[1,"95%"],2)))
## [1] "Upper is 304.34 Lower is 174.78"
# for hardcover holt() method
print(paste("Upper is", round(holt.2$upper[1, "95%"],2), "Lower is",
round(holt.2$lower[1,"95%"],2)))
## [1] "Upper is 307.43 Lower is 192.92"
## The results produced by ses() and holt() are a bit smaller than the intervals calculated
## using formula.
Question 7
head(eggs)
## Time Series:
## Start = 1900
## End = 1905
## Frequency = 1
## [1] 276.79 315.42 314.87 321.25 314.54 317.92
m1 <- holt(eggs, h=100)
m2 <- holt(eggs, h=100, damped = TRUE)
m3 <- holt(eggs, h=100, exponential= TRUE)
m4 <- holt(eggs, h=100, lambda = "auto", biasadj = T)
m5 <- holt(eggs, h=100, damped = TRUE, lambda = "auto", biasadj = TRUE)
autoplot(eggs)+ autolayer(m1, series = "Default", PI = F)+
autolayer(m2, series= "Damped", PI =F) + autolayer(m3, series= "Exponential", PI =F)+
autolayer(m4, series= "Lambda+biasadj", PI =F)+ autolayer(m5, series= "Damped+Box-cox", PI =F)

print(paste("Model 1", round(rmse(eggs, m1$fitted),2), "Model 2", round(rmse(eggs, m2$fitted),2),
"Model 3", round(rmse(eggs, m3$fitted),2), "Model 4", round(rmse(eggs, m4$fitted),2),
"Model 5", round(rmse(eggs, m5$fitted),2)))
## [1] "Model 1 26.58 Model 2 26.54 Model 3 26.5 Model 4 26.39 Model 5 26.59"
## Based on the rmse metric, Model 4 with lambda & biasadj has the best rmse of all other models.
Question 8
# (a)
retaildata <- readxl::read_excel("retail.xlsx", skip=1)
myts <- ts(retaildata[,"A3349873A"],
frequency=12, start=c(1982,4))
autoplot(myts)

## From the autoplot, it shows that seasonal variation is proportional to the level of the time series. Therefore, it is needed to use multiplicative seasonality.
# (b)
fit1 <- hw(myts,seasonal="multiplicative", damped = FALSE)
fit2 <- hw(myts,seasonal="multiplicative", damped = TRUE)
autoplot(myts)+
autolayer(fit1, series="Without Damped")+autolayer(fit2, series="With Damped")

# (c)
print(paste("Without damped", round(rmse(myts, fit1$fitted),4), "With damped", round(rmse(myts, fit2$fitted),4)))
## [1] "Without damped 13.2938 With damped 13.3049"
## Based on the metric, I prefer the method with not damped because it has the lowest rmse.
# (d)
checkresiduals(fit1)

##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' multiplicative method
## Q* = 40.405, df = 8, p-value = 2.692e-06
##
## Model df: 16. Total lags used: 24
## The residuals plot did not show a signal with equal intensities at every frequency; therefore, it did not look like white noise.
# (e)
myts.train <- window(myts, end=c(2010,12))
myts.test <- window(myts, start=2011)
fc <- snaive(myts.train)
rmse(myts.test, fc$mean)
## [1] 71.44309
hw.no <- hw(myts.train,seasonal="multiplicative", damped = FALSE)
rmse(myts.test, hw.no$mean)
## [1] 70.11659
print(paste("Seasonal Naive ", round(rmse(myts.test, fc$mean),4), "Holt without damped ", round(rmse(myts.test, hw.no$mean),4)))
## [1] "Seasonal Naive 71.4431 Holt without damped 70.1166"
## Looking at the rmse metric, I was able to beat the seasonal naive approach by using the holt's method without damped.
Question 9
train <- ts(as.vector(myts), start=c(1982,4), end=c(2010,12), frequency = 12) #######fixxxx
lam <- BoxCox.lambda(train)
box.train <- BoxCox(train, lam)
stl.m <- stl(box.train, s.window='periodic', robust=T)
autoplot(stl.m) +
ggtitle('STL Decomposition')

box.train.adj <- train - stl.m$time.series[,'seasonal']
autoplot(train, series='Unadjusted Data') +
autolayer(box.train.adj, series='Seasonally Adjusted') +
ylab('')

ets.m <- ets(box.train.adj)
summary(ets.m)
## ETS(M,Ad,M)
##
## Call:
## ets(y = box.train.adj)
##
## Smoothing parameters:
## alpha = 0.6374
## beta = 0.0042
## gamma = 1e-04
## phi = 0.9794
##
## Initial states:
## l = 62.7778
## b = 0.8092
## s = 0.9416 0.9061 0.9359 1.5071 1.087 1.0113
## 0.9656 0.9447 0.9228 0.8963 0.9622 0.9194
##
## sigma: 0.0418
##
## AIC AICc BIC
## 3439.414 3441.512 3508.598
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.48757 8.792609 6.292699 0.262504 3.121933 0.3943595 0.01376926
fc.ets <- forecast(ets.m)$mean
rmse(myts.test, fc.ets)
## [1] 80.57372
## The ets model has a much high rmse than my previous models, which will not be prefer as a better model
Question 10
# (a)
head(ukcars)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1977 330.371 371.051 270.670 343.880
## 1978 358.491 362.822
autoplot(ukcars)

## The plot shows an increasing trend; however, there is an large dent around 2001. Visually speaking, there is seasonality with a frequency of one year.
# (b)
stl.cars <- ukcars %>%
stl(t.window=13, s.window="periodic", robust=TRUE)
ukcars.adj <- seasadj(stl.cars)
# (c)
stlf.dam <- ukcars.adj %>% stlf(h = 8, etsmodel = "AAN", damped = TRUE)
autoplot(stlf.dam)

# (d)
holt.dam <- ukcars.adj %>% stlf(h = 8, etsmodel = "AAN", damped = FALSE)
autoplot(holt.dam)

# (e)
ets.cars <- ets(ukcars)
ets.cars
## ETS(A,N,A)
##
## Call:
## ets(y = ukcars)
##
## Smoothing parameters:
## alpha = 0.6199
## gamma = 1e-04
##
## Initial states:
## l = 314.2568
## s = -1.7579 -44.9601 21.1956 25.5223
##
## sigma: 25.9302
##
## AIC AICc BIC
## 1277.752 1278.819 1296.844
ets.cars.fc <- forecast(ets.cars, h = 8)
## ETS(A,N,A)
# (f)
rmse(ets.cars$fitted,ukcars.adj)
## [1] 37.14855
rmse(holt.dam$fitted,ukcars.adj)
## [1] 22.86408
rmse(stlf.dam$fitted,ukcars.adj)
## [1] 22.81499
## From rmse metric, the additive damped trend method is the better fit than holt's linear and ets.
# (g)
autoplot(ukcars.adj)+
autolayer(stlf.dam, series= "stlf", PI=F)+autolayer(holt.dam, series="holt", PI=F)+
autolayer(ets.cars.fc, series="ets", PI=F)

## From the autoplot, it looks like that holt's linear method seems more likely than other two methods.
# (h)
checkresiduals(holt.dam)
## Warning in checkresiduals(holt.dam): The fitted degrees of freedom is based on
## the model used for the seasonally adjusted data.

##
## Ljung-Box test
##
## data: Residuals from STL + ETS(A,A,N)
## Q* = 24.992, df = 4, p-value = 5.05e-05
##
## Model df: 4. Total lags used: 8
Question 11
# (a)
head(visitors)
## May Jun Jul Aug Sep Oct
## 1985 75.7 75.4 83.1 82.9 77.3 105.7
autoplot(visitors)

## The plot shows an increasing trend and seasonality with a frequency of one year.
# (b)
train.vis <- window(visitors, start=c(1985,5), end = c(2003,4))
test.vis <- window(visitors, end = c(2005,4))
hw.multi <- hw(train.vis, h=24, seasonal ="multiplicative")
hw.multi
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## May 2003 293.5512 272.0790 315.0235 260.7123 326.3902
## Jun 2003 311.3817 286.3466 336.4168 273.0939 349.6695
## Jul 2003 379.6865 346.4414 412.9316 328.8425 430.5305
## Aug 2003 341.5672 309.2321 373.9023 292.1150 391.0195
## Sep 2003 330.3820 296.7647 363.9994 278.9688 381.7953
## Oct 2003 371.9441 331.4615 412.4267 310.0313 433.8569
## Nov 2003 387.1607 342.2705 432.0509 318.5070 455.8144
## Dec 2003 471.3185 413.3054 529.3316 382.5951 560.0419
## Jan 2004 348.1741 302.8173 393.5309 278.8068 417.5414
## Feb 2004 390.3863 336.7055 444.0672 308.2885 472.4841
## Mar 2004 377.6150 322.9349 432.2951 293.9890 461.2410
## Apr 2004 337.3049 285.9782 388.6316 258.8075 415.8024
## May 2004 284.8762 239.4088 330.3437 215.3398 354.4127
## Jun 2004 302.1571 251.6621 352.6521 224.9316 379.3825
## Jul 2004 368.4105 304.0470 432.7739 269.9751 466.8459
## Aug 2004 331.3981 270.9579 391.8384 238.9627 423.8335
## Sep 2004 320.5215 259.5777 381.4653 227.3160 413.7270
## Oct 2004 360.8154 289.3783 432.2525 251.5618 470.0690
## Nov 2004 375.5478 298.2123 452.8832 257.2733 493.8222
## Dec 2004 457.1458 359.3352 554.9564 307.5574 606.7342
## Jan 2005 337.6781 262.6845 412.6717 222.9853 452.3709
## Feb 2005 378.5881 291.3958 465.7805 245.2390 511.9373
## Mar 2005 366.1740 278.7938 453.5542 232.5375 499.8105
## Apr 2005 327.0594 246.2591 407.8596 203.4861 450.6326
# (c)
## From the autoplot, it shows that seasonal variation is proportional to the level of the time series. Therefore, it is needed to use multiplicative seasonality.
# (d)
ets.vis <- ets(train.vis)
ets.vis ## ETS(M,Ad,M)
## ETS(M,Ad,M)
##
## Call:
## ets(y = train.vis)
##
## Smoothing parameters:
## alpha = 0.6396
## beta = 8e-04
## gamma = 1e-04
## phi = 0.9799
##
## Initial states:
## l = 87.7602
## b = 3.0639
## s = 0.9392 1.0538 1.0792 0.9675 1.3275 1.091
## 1.0329 0.8867 0.9399 1.0231 0.8488 0.8103
##
## sigma: 0.0539
##
## AIC AICc BIC
## 2300.157 2303.629 2360.912
ets.mam <- forecast(ets.vis, h =24)
# ETS(AAA)
ets.vis.aaa <- ets(model="AAA", BoxCox(train.vis, lambda = BoxCox.lambda(train.vis)))
ets.aaa <- forecast(ets.vis.aaa, h=24)
# SNaive
naive.vis <- snaive(train.vis, h =24)
# STL decomposition Box-Cox
lambda <- BoxCox.lambda(train.vis)
box.train <- BoxCox(train.vis, lambda)
stl.m <- stl(box.train, s.window='periodic', robust=T)
autoplot(stl.m) +
ggtitle('STL Decomposition')

box.train.adj <- train.vis - stl.m$time.series[,'seasonal']
ets.m <- ets(box.train.adj)
summary(ets.m)
## ETS(M,Ad,M)
##
## Call:
## ets(y = box.train.adj)
##
## Smoothing parameters:
## alpha = 0.6367
## beta = 1e-04
## gamma = 1e-04
## phi = 0.98
##
## Initial states:
## l = 88.0983
## b = 3.0013
## s = 0.9419 1.0561 1.0761 0.9701 1.3136 1.0893
## 1.0269 0.8922 0.9401 1.0215 0.854 0.8182
##
## sigma: 0.0537
##
## AIC AICc BIC
## 2298.417 2301.889 2359.172
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.8655531 14.43875 10.53325 0.1298963 3.978464 0.4037624
## ACF1
## Training set -0.05100681
fc.ets <- forecast(ets.m, h=24)
fc.ets
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## May 2003 291.6205 271.5537 311.6872 260.9310 322.3099
## Jun 2003 304.3876 279.5465 329.2287 266.3964 342.3788
## Jul 2003 364.1297 330.3853 397.8741 312.5221 415.7373
## Aug 2003 335.1671 300.7929 369.5413 282.5963 387.7379
## Sep 2003 318.1115 282.6153 353.6076 263.8248 372.3982
## Oct 2003 366.1690 322.2496 410.0883 299.0001 433.3378
## Nov 2003 388.4597 338.8278 438.0915 312.5543 464.3651
## Dec 2003 468.4604 405.1498 531.7710 371.6352 565.2856
## Jan 2004 345.9877 296.8047 395.1707 270.7687 421.2067
## Feb 2004 383.8303 326.7018 440.9589 296.4597 471.2010
## Mar 2004 376.7345 318.2486 435.2203 287.2881 466.1808
## Apr 2004 336.0082 281.7751 390.2413 253.0658 418.9506
## May 2004 291.9302 243.0766 340.7838 217.2150 366.6454
## Jun 2004 304.7044 251.9627 357.4461 224.0429 385.3659
## Jul 2004 364.5011 299.3799 429.6223 264.9068 464.0954
## Aug 2004 335.5020 273.7481 397.2559 241.0576 429.9465
## Sep 2004 318.4230 258.1384 378.7076 226.2257 410.6203
## Oct 2004 366.5204 295.2523 437.7884 257.5253 475.5155
## Nov 2004 388.8250 311.2767 466.3732 270.2251 507.4248
## Dec 2004 468.8921 373.0860 564.6981 322.3693 615.4148
## Jan 2005 346.3001 273.8893 418.7109 235.5574 457.0428
## Feb 2005 384.1700 302.0453 466.2946 258.5711 509.7688
## Mar 2005 377.0611 294.7294 459.3928 251.1457 502.9766
## Apr 2005 336.2937 261.3533 411.2341 221.6822 450.9051
# (e)
rmse(test.vis, ets.mam$mean)
## [1] 80.23124
rmse(test.vis, ets.aaa$mean)
## [1] 415.0766
rmse(test.vis, naive.vis$mean)
## [1] 50.30097
rmse(test.vis, fc.ets$mean)
## [1] 80.87764
checkresiduals(naive.vis)

##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 295.02, df = 24, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 24
## Seasonal naive model gives the best forecasts in terms of low rmse. In addition, it is not having issue with white noise.
# (f) #######################################Redo
fets_add_BoxCox <- function(y, h) {
forecast(ets(
y,
lambda = BoxCox.lambda(y),
additive.only = TRUE
),
h = h)
}
fstlm <- function(y, h) {
forecast(stlm(
y,
lambda = BoxCox.lambda(y),
s.window = frequency(y) + 1,
robust = TRUE,
method = "ets"
),
h = h)
}
fets <- function(y, h) {
forecast(ets(y),
h = h)
}
sqrt(mean(tsCV(visitors, snaive, h = 1)^2, na.rm = TRUE))
## [1] 32.78675
sqrt(mean(tsCV(visitors, fets_add_BoxCox, h = 1)^2, ###has problem
na.rm = TRUE))
## [1] 18.86439
sqrt(mean(tsCV(visitors, fstlm, h = 1)^2,
na.rm = TRUE))
## [1] 17.49642
sqrt(mean(tsCV(visitors, fets, h = 1)^2, na.rm = TRUE))
## [1] 18.52985
sqrt(mean(tsCV(visitors, hw, h = 1, #has porblem
seasonal = "multiplicative")^2,
na.rm = TRUE))
## [1] 19.62107
##
Question 12
fets <- function(y, h) {
forecast(ets(y), h = h)
}
# (a)
head(qcement)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1956 0.465 0.532 0.561 0.570
## 1957 0.529 0.604
ets.cv <- tsCV(qcement, fets, h = 4)
snaive.cv <- tsCV(qcement, snaive, h = 4)
# (b)
mean(ets.cv^2,na.rm=T)
## [1] 0.01250954
mean(snaive.cv^2,na.rm=T)
## [1] 0.01792147
## It looks that ets model is more accurate than seasonal naive. ##########
Question 13
head(ausbeer)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1956 284 213 227 308
## 1957 262 228
head(bricksq)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1956 189 204 208 197
## 1957 187 214
head(dole)
## Jan Feb Mar Apr May Jun
## 1956 4742 6128 6494 5379 6011 7003
head(a10)
## Jul Aug Sep Oct Nov Dec
## 1991 3.526591 3.180891 3.252221 3.611003 3.565869 4.306371
head(usmelec)
## Jan Feb Mar Apr May Jun
## 1973 160.218 143.539 148.158 139.589 147.395 161.244
# ausbeer
ausbeer.train <- window(ausbeer, end = c(2007,2))
ausbeer.test <- window(ausbeer, start = c(2007,3))
#ets
ets.aus <- forecast(ets(ausbeer.train), h = 12)
#snaive
snaive.aus <- snaive(ausbeer.train, h = 12)
#stlf
stlf.aus <- stlf(ausbeer.train, h = 12, lambda = BoxCox.lambda(ausbeer.train))
forecast::accuracy(ets.aus, ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3466095 15.781973 11.979955 -0.064073435 2.864311 0.7567076
## Test set 0.1272571 9.620828 8.919347 0.009981598 2.132836 0.5633859
## ACF1 Theil's U
## Training set -0.1942276 NA
## Test set 0.3763918 0.1783972
forecast::accuracy(snaive.aus, ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 3.336634 19.67005 15.83168 0.9044009 3.771370 1.0000000
## Test set -2.916667 10.80509 9.75000 -0.6505927 2.338012 0.6158537
## ACF1 Theil's U
## Training set 0.0009690785 NA
## Test set 0.3254581810 0.1981463
forecast::accuracy(stlf.aus, ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.5421438 13.330464 10.237474 0.1268498 2.454628 0.6466447
## Test set -2.5989512 9.790852 8.939692 -0.6576791 2.153152 0.5646710
## ACF1 Theil's U
## Training set -0.1594160 NA
## Test set 0.2871443 0.1691628
## looks like they all have similar results; however, ets model has better performance based on the rmse metric.
# bricksq
bricksq.train <- window(bricksq, end = c(1991,1))
bricksq.test <- window(bricksq, start = c(1991,2))
ets.bricksq <- forecast(ets(bricksq.train), h = 36)
snaive.bricksq <- snaive(bricksq.train, h = 36)
stlf.bricksq <- stlf(bricksq.train, h = 36, lambda = BoxCox.lambda(bricksq.train))
forecast::accuracy(ets.bricksq, bricksq.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.420627 21.77283 15.37691 -0.1306659 3.788064 0.4254112
## Test set 9.185507 17.16519 15.11577 1.9865479 3.404188 0.4181867
## ACF1 Theil's U
## Training set 0.1682149 NA
## Test set 0.3082903 0.4933751
forecast::accuracy(snaive.bricksq, bricksq.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 7.708029 49.66883 36.14599 1.724113 8.858339 1.000000
## Test set -32.857143 53.61103 44.71429 -7.422967 10.329959 1.237047
## ACF1 Theil's U
## Training set 0.7956728 NA
## Test set -0.1296336 1.491617
forecast::accuracy(stlf.bricksq, bricksq.test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.217074 22.49498 16.21792 0.3352736 3.895654 0.4486784 0.1769356
## Test set 25.355750 32.76304 28.16652 5.6133717 6.287438 0.7792435 0.5383942
## Theil's U
## Training set NA
## Test set 0.9328939
## Based on the error metrics, ets model has shown an dominated performance rmse than other two models.
# dole
dole.train <- window(dole, end = c(1989,7))
dole.test <- window(dole, start = c(1989,8))
ets.dole <- forecast(ets(dole.train), h = 36)
snaive.dole<- snaive(dole.train, h = 36)
stlf.dole <- stlf(dole.train, h = 36, lambda = BoxCox.lambda(dole.train))
forecast::accuracy(ets.dole, dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 98.00253 16130.58 9427.497 0.5518769 6.20867 0.2995409
## Test set 221647.42595 279668.65 221647.426 32.5447637 32.54476 7.0424271
## ACF1 Theil's U
## Training set 0.5139536 NA
## Test set 0.9394267 11.29943
forecast::accuracy(snaive.dole, dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 12606.58 56384.06 31473.16 3.350334 27.73651 1.000000
## Test set 160370.33 240830.92 186201.00 20.496197 27.38678 5.916184
## ACF1 Theil's U
## Training set 0.9781058 NA
## Test set 0.9208325 9.479785
forecast::accuracy(stlf.dole, dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 137.7633 6019.681 3490.508 0.2754407 3.674711 0.1109043
## Test set 204490.0015 267551.477 206293.106 29.1688279 29.632258 6.5545727
## ACF1 Theil's U
## Training set -0.0792675 NA
## Test set 0.9382601 10.65525
## snaive model has better performance at all error metrics.
# a10
a10.train <- window(a10, end = c(2005,6))
a10.test <- window(a10, start = c(2005,7))
ets.a10<- forecast(ets(a10.train ), h = 36)
snaive.a10 <- snaive(a10.train , h = 36)
stlf.a10<- stlf(a10.train, h = 36, lambda = BoxCox.lambda(a10.train ))
forecast::accuracy(ets.a10, a10.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04378049 0.488989 0.3542285 0.2011915 3.993267 0.3611299
## Test set 1.62559331 2.562046 2.0101563 7.0011207 9.410027 2.0493197
## ACF1 Theil's U
## Training set -0.05238173 NA
## Test set 0.23062972 0.6929693
forecast::accuracy(snaive.a10, a10.test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.9577207 1.177528 0.9808895 10.86283 11.15767 1.000000 0.3779746
## Test set 4.3181585 5.180738 4.3306974 19.80542 19.89352 4.415071 0.6384822
## Theil's U
## Training set NA
## Test set 1.383765
forecast::accuracy(stlf.a10, a10.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.009379657 0.4034157 0.298922 -0.04447829 3.490468 0.3047458
## Test set 1.170547699 2.1254012 1.776014 4.62679885 8.426817 1.8106159
## ACF1 Theil's U
## Training set -0.1371340 NA
## Test set 0.1693221 0.5694937
## stlf model has outperformed others.
# h02
h02.train <- window(h02, end = c(2005,7))
h02.test <- window(h02, start = c(2005,8))
ets.h02 <- forecast(ets(h02.train), h = 36)
snaive.h02 <- snaive(h02.train, h = 36)
stlf.h02 <- stlf(h02.train, h = 36, lambda = BoxCox.lambda(h02.train))
forecast::accuracy(ets.h02, h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0004513622 0.04579930 0.03426080 -0.2209876 4.560572 0.5745005
## Test set 0.0342751950 0.08019185 0.06763825 2.8172154 7.267771 1.1341883
## ACF1 Theil's U
## Training set 0.05057522 NA
## Test set -0.12581913 0.4581788
forecast::accuracy(snaive.h02, h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.037749040 0.07171709 0.05963581 5.0942482 8.197262 1.000000
## Test set -0.004309813 0.08238325 0.06787933 -0.1376051 7.444182 1.138231
## ACF1 Theil's U
## Training set 0.40080565 NA
## Test set 0.09840611 0.4929215
forecast::accuracy(stlf.h02, h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.00120577 0.03829965 0.02889214 -0.1362664 3.907902 0.4844764
## Test set 0.04481819 0.07945414 0.06413379 3.9816962 6.832377 1.0754241
## ACF1 Theil's U
## Training set -0.1524301 NA
## Test set -0.1620946 0.4519207
## stlf model gives a relatively better forecasts based on its lowest rmse.
# usmelec
usmelec.train <- window(usmelec, end = c(2010,6))
usmelec.test <- window(usmelec, start = c(2010,7))
ets.usmelec <- forecast(ets(usmelec.train), h = 36)
snaive.usmelec <- snaive(usmelec.train, h = 36)
stlf.usmelec<- stlf(usmelec.train, h = 36, lambda = BoxCox.lambda(usmelec.train))
forecast::accuracy(ets.usmelec, usmelec.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.07834851 7.447167 5.601963 -0.1168709 2.163495 0.6225637
## Test set -10.20080652 15.291359 13.123441 -3.1908161 3.926338 1.4584491
## ACF1 Theil's U
## Training set 0.2719249 NA
## Test set 0.4679496 0.4859495
forecast::accuracy(snaive.usmelec, usmelec.test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.921564 11.58553 8.998217 2.000667 3.511648 1.000000 0.4841860
## Test set 5.475972 17.20037 13.494750 1.391437 3.767514 1.499714 0.2692544
## Theil's U
## Training set NA
## Test set 0.4765145
forecast::accuracy(stlf.usmelec, usmelec.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.03002591 6.198606 4.648595 -0.01658299 1.799262 0.5166129
## Test set -18.99138915 23.249466 19.962853 -5.77781982 6.025763 2.2185343
## ACF1 Theil's U
## Training set 0.06453086 NA
## Test set 0.60201957 0.7417298
## ets model has better performance than others based on its overall low error metrics.
Question 14
# (a)
#bicoal
autoplot(forecast(ets(bicoal)))

#chicken
autoplot(forecast(ets(chicken)))

#dole
autoplot(forecast(ets(dole)))

#usdeaths
autoplot(forecast(ets(usdeaths)))

#lynx
autoplot(forecast(ets(lynx)))

#ibmclose
autoplot(forecast(ets(ibmclose)))

#eggs
autoplot(forecast(ets(eggs)))

## No, it does not give good consistent forecasts because the uniqueness of each time series.
# (b)
## It looks like that ets model in bicoal, chicken, lynx, ibmclose and eggs have struggled to capture the seasonality which shows an flat line. However, it was not the case in dole and usdeaths.
Question 15
ets.usdeaths <- ets(usdeaths, model = "MAM")
fc.ets <- forecast(ets.usdeaths, h=1)
hw.usdeaths <- hw(usdeaths, h=1, seasonal = "multiplicative")
fc.ets
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1979 8233.107 7883.699 8582.515 7698.734 8767.481
hw.usdeaths
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1979 8217.64 7845.352 8589.928 7648.274 8787.005
Question 16
#ets(ibmclose)
## chose ibmclose as its ets model gives A,N,N
ibmclose.ets <- ets(ibmclose, model = "ANN")
ibmclose.ets
## ETS(A,N,N)
##
## Call:
## ets(y = ibmclose, model = "ANN")
##
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l = 459.9339
##
## sigma: 7.2637
##
## AIC AICc BIC
## 3648.450 3648.515 3660.182
fc.ibm <- forecast(ibmclose.ets,h=1)
fc.ibm
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 370 356.9995 347.6907 366.3083 342.7629 371.2361
# From the model, I obtain
sigm <- 7.2637
alpha <- 0.9999
h <- 2
fv <- sigm*(1+(alpha^2)*(h-1))
#Point forecast
fc.ibm$mean[1]
## [1] 356.9995
#95% confidence interval
fc.ibm$lower[1,"95%"]
## 95%
## 342.7629
fc.ibm$upper[1,"95%"]
## 95%
## 371.2361
## I showed that the point forecast and confidence interval are the same as the model generated. Therefore, forecast variance is given by the formula.
Question 17
#L^hat T+h|T + 1.96*sigma^hat h
#L^hat T+h|T - 1.96*sigma^hat h