library(fpp2)
## Warning: package 'fpp2' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ------------------------------------ fpp2 2.4 --
## v ggplot2 3.3.0 v fma 2.4
## v forecast 8.12 v expsmooth 2.3
## Warning: package 'forecast' was built under R version 4.0.2
## Warning: package 'fma' was built under R version 4.0.3
## Warning: package 'expsmooth' was built under R version 4.0.3
##
1.Consider the pigs series — the number of pigs slaughtered in Victoria each month.
1a. Use the ses() function in R to find the optimal values of α and ℓ0, and generate forecasts for the next four months.
str(pigs)
## Time-Series [1:188] from 1980 to 1996: 76378 71947 33873 96428 105084 ...
ses1 = ses(pigs, h=4)
ses1$model
## Simple exponential smoothing
##
## Call:
## ses(y = pigs, h = 4)
##
## Smoothing parameters:
## alpha = 0.2971
##
## Initial states:
## l = 77260.0561
##
## sigma: 10308.58
##
## AIC AICc BIC
## 4462.955 4463.086 4472.665
forecast(ses1)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Sep 1995 98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995 98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995 98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995 98816.41 83958.37 113674.4 76092.99 121539.8
1b.Compute a 95% prediction interval for the first forecast using y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.
ses.ci = sd(ses1$residuals)
sesmean = ses1$mean[1]
ses.ci1 = c(sesmean - (1.96*ses.ci),sesmean + (1.96*ses.ci))
ses.ci1
## [1] 78679.97 118952.84
2.Write your own function to implement simple exponential smoothing. The function should take arguments y (the time series), alpha (the smoothing parameter α) and level (the initial level ℓ0). It should return the forecast of the next observation in the series. Does it give the same forecast as ses()?
myfunc = function(y, alpha, level, h) {
yHat = level
for(index in 1:length(y)+h){
yHat = alpha*y[index] +(1-alpha)*yHat[i-1]}
print(paste0('myfunc forecast:',as.character(yHat)))}
3.Modify your function from the previous exercise to return the sum of squared errors rather than the forecast of the next observation. Then use the optim() function to find the optimal values of α and ℓ0. Do you get the same values as the ses() function?
myfunc1 = function(pars = c(alpha,1), y) {
error = 0
sse = 0
alpha = pars[1]
l0 = pars[2]
yHat= 10
for(index in 1:length(y)){
error = y[index] - yHat
sse = sse + error^2
yHat = alpha*y[index] + (1 - alpha)*yHat
}
return(sse)}
myfunc2 = function (y){
opt = optim(par= c(0.5, y[1]), y=y, fn = myfunc1)
a = opt$par[1]
l = opt$par[2]
func = myfunc(y, a, l,1)
return(func)
}
5a. Plot the series and discuss the main features of the data.
str(books)
## Time-Series [1:30, 1:2] from 1 to 30: 199 172 111 209 161 119 195 195 131 183 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:2] "Paperback" "Hardcover"
autoplot(books)
I do not see any seasonality. There seems to be an increasing trend.
5b. Use the ses() function to forecast each series, and plot the forecasts.
pb1 = ses(books[,1], h=4)
hc1 = ses(books[,2], h=4)
forecast(pb1)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 207.1097 162.4882 251.7311 138.8670 275.3523
## 32 207.1097 161.8589 252.3604 137.9046 276.3147
## 33 207.1097 161.2382 252.9811 136.9554 277.2639
## 34 207.1097 160.6259 253.5935 136.0188 278.2005
forecast(hc1)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 239.5601 197.2026 281.9176 174.7799 304.3403
## 32 239.5601 194.9788 284.1414 171.3788 307.7414
## 33 239.5601 192.8607 286.2595 168.1396 310.9806
## 34 239.5601 190.8347 288.2855 165.0410 314.0792
par(mfrow = c(1,2))
plot(pb1)
plot(hc1)
5c. Compute the RMSE values for the training data in each case.
pbrmse = accuracy(pb1)
pbrmse
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 7.175981 33.63769 27.8431 0.4736071 15.57784 0.7021303 -0.2117522
hcrmse = accuracy(hc1)
hcrmse
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 9.166735 31.93101 26.77319 2.636189 13.39487 0.7987887 -0.1417763
hhc = holt(books[,1], h=4)
hhc
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 209.4668 166.6035 252.3301 143.9130 275.0205
## 32 210.7177 167.8544 253.5811 145.1640 276.2715
## 33 211.9687 169.1054 254.8320 146.4149 277.5225
## 34 213.2197 170.3564 256.0830 147.6659 278.7735
hpb = holt(books[,2], h=4)
hpb
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 250.1739 212.7390 287.6087 192.9222 307.4256
## 32 253.4765 216.0416 290.9113 196.2248 310.7282
## 33 256.7791 219.3442 294.2140 199.5274 314.0308
## 34 260.0817 222.6468 297.5166 202.8300 317.3334
6b.Compare the RMSE measures of Holt’s method for the two series to those of simple exponential smoothing in the previous question. (Remember that Holt’s method is using one more parameter than SES.) Discuss the merits of the two forecasting methods for these data sets.
rmse.hhc = accuracy(hhc)[2]
rmse.hhc
## [1] 31.13692
rmse.hpb = accuracy(hpb)[2]
rmse.hpb
## [1] 27.19358
6c.Compare the forecasts for the two series using both methods. Which do you think is best?
Hardcover sales are better than paperback because we can see that rmse is lower for hardcover.
6d.Calculate a 95% prediction interval for the first forecast for each series, using the RMSE values and assuming normal errors. Compare your intervals with those produced using ses and holt.
hhc$upper[1, "95%"]
## 95%
## 275.0205
hhc$lower[1, "95%"]
## 95%
## 143.913
hhc$mean[1] + 1.96*rmse.hhc
## [1] 270.4951
hhc$mean[1] - 1.96*rmse.hhc
## [1] 148.4384
hpb$upper[1, "95%"]
## 95%
## 307.4256
hpb$lower[1, "95%"]
## 95%
## 192.9222
hpb$mean[1] + 1.96*rmse.hpb
## [1] 303.4733
hpb$mean[1] - 1.96*rmse.hpb
## [1] 196.8745
The prediction interval was quite close but was different than the ses.
7.For this exercise use data set eggs, the price of a dozen eggs in the United States from 1900–1993. Experiment with the various options in the holt() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each argument is doing to the forecasts.Which model gives the best RMSE?
str(eggs)
## Time-Series [1:94] from 1900 to 1993: 277 315 315 321 315 ...
e1 = holt(eggs, h=100)
e2 = holt(eggs, h=100, damped = T)
e3 = holt(eggs, h=100, exponential = T)
e4 = holt(eggs, h=100, biasadj = T)
e5 = holt(eggs, h=100, damped = F, exponential = T)
e6 = holt(eggs, h=100, biasadj = T, lambda = 'auto')
autoplot(eggs) +
autolayer(e1, series = 'e1') +
autolayer(e2, series = 'e2') +
autolayer(e3, series = 'e3') +
autolayer(e4, series = 'e4') +
autolayer(e5, series = 'e5') +
autolayer(e6, series = 'e6')
accuracy(e1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04499087 26.58219 19.18491 -1.142201 9.653791 0.9463626
## ACF1
## Training set 0.01348202
accuracy(e2)
## ME RMSE MAE MPE MAPE MASE
## Training set -2.891496 26.54019 19.2795 -2.907633 10.01894 0.9510287
## ACF1
## Training set -0.003195358
accuracy(e3)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.4918791 26.49795 19.29399 -1.263235 9.766049 0.9517436 0.0103908
accuracy(e4)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04499087 26.58219 19.18491 -1.142201 9.653791 0.9463626
## ACF1
## Training set 0.01348202
accuracy(e5)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.4918791 26.49795 19.29399 -1.263235 9.766049 0.9517436 0.0103908
accuracy(e6)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.2015298 26.38689 18.99362 -1.63043 9.713172 0.9369265 0.0383996
#the 6th model, "e1" gives the best rmse.
8.Recall your retail time series data (from Exercise 3 in Section 2.10). 8a. Why is multiplicative seasonality necessary for this series?
library(readxl)
library(seasonal)
## Warning: package 'seasonal' was built under R version 4.0.3
retail= read_excel("C:/Users/burtkb/Downloads/retail.xlsx", skip=1)
head(retail)
## # A tibble: 6 x 190
## `Series ID` A3349335T A3349627V A3349338X A3349398A A3349468W
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1982-04-01 00:00:00 303. 41.7 63.9 409. 65.8
## 2 1982-05-01 00:00:00 298. 43.1 64 405. 65.8
## 3 1982-06-01 00:00:00 298 40.3 62.7 401 62.3
## 4 1982-07-01 00:00:00 308. 40.9 65.6 414. 68.2
## 5 1982-08-01 00:00:00 299. 42.1 62.6 404. 66
## 6 1982-09-01 00:00:00 305. 42 64.4 412. 62.3
## # ... with 184 more variables: A3349336V <dbl>, A3349337W <dbl>,
## # A3349397X <dbl>, A3349399C <dbl>, A3349874C <dbl>, A3349871W <dbl>,
## # A3349790V <dbl>, A3349556W <dbl>, A3349791W <dbl>, A3349401C <dbl>,
## # A3349873A <dbl>, A3349872X <dbl>, A3349709X <dbl>, A3349792X <dbl>,
## # A3349789K <dbl>, A3349555V <dbl>, A3349565X <dbl>, A3349414R <dbl>,
## # A3349799R <dbl>, A3349642T <dbl>, A3349413L <dbl>, A3349564W <dbl>,
## # A3349416V <dbl>, A3349643V <dbl>, A3349483V <dbl>, A3349722T <dbl>,
## # A3349727C <dbl>, A3349641R <dbl>, A3349639C <dbl>, A3349415T <dbl>,
## # A3349349F <dbl>, A3349563V <dbl>, A3349350R <dbl>, A3349640L <dbl>,
## # A3349566A <dbl>, A3349417W <dbl>, A3349352V <dbl>, A3349882C <dbl>,
## # A3349561R <dbl>, A3349883F <dbl>, A3349721R <dbl>, A3349478A <dbl>,
## # A3349637X <dbl>, A3349479C <dbl>, A3349797K <dbl>, A3349477X <dbl>,
## # A3349719C <dbl>, A3349884J <dbl>, A3349562T <dbl>, A3349348C <dbl>,
## # A3349480L <dbl>, A3349476W <dbl>, A3349881A <dbl>, A3349410F <dbl>,
## # A3349481R <dbl>, A3349718A <dbl>, A3349411J <dbl>, A3349638A <dbl>,
## # A3349654A <dbl>, A3349499L <dbl>, A3349902A <dbl>, A3349432V <dbl>,
## # A3349656F <dbl>, A3349361W <dbl>, A3349501L <dbl>, A3349503T <dbl>,
## # A3349360V <dbl>, A3349903C <dbl>, A3349905J <dbl>, A3349658K <dbl>,
## # A3349575C <dbl>, A3349428C <dbl>, A3349500K <dbl>, A3349577J <dbl>,
## # A3349433W <dbl>, A3349576F <dbl>, A3349574A <dbl>, A3349816F <dbl>,
## # A3349815C <dbl>, A3349744F <dbl>, A3349823C <dbl>, A3349508C <dbl>,
## # A3349742A <dbl>, A3349661X <dbl>, A3349660W <dbl>, A3349909T <dbl>,
## # A3349824F <dbl>, A3349507A <dbl>, A3349580W <dbl>, A3349825J <dbl>,
## # A3349434X <dbl>, A3349822A <dbl>, A3349821X <dbl>, A3349581X <dbl>,
## # A3349908R <dbl>, A3349743C <dbl>, A3349910A <dbl>, A3349435A <dbl>,
## # A3349365F <dbl>, A3349746K <dbl>, ...
retail.ts = ts(retail[,"A3349627V"], frequency = 12, start = c(1982,4))
autoplot(retail.ts)
As time increases, the seasonality changes more. Multiplicative seasonality can handle these changes, but additive cannot.
8b.Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
retail.hw = hw(retail.ts, seasonal = "multiplicative")
autoplot(retail.hw)
retail.hw.damped = hw(retail.ts, seasonal = "multiplicative", damped = TRUE)
autoplot(retail.hw.damped)
The damped forecast made the forecast increase slower than the original hw forecast.
8c.
r.hw = forecast(retail.hw, h=1)
accuracy(r.hw)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3076124 5.916807 4.201477 0.07243871 3.779909 0.4485563
## ACF1
## Training set -0.03122619
r.hw.d = forecast(retail.hw.damped, h=1)
accuracy(r.hw.d)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3511596 5.628031 4.018041 0.1776603 3.685373 0.4289724
## ACF1
## Training set -0.05517684
The values are quite similar, but the damped model performed slightly better.
8d.Check that the residuals from the best method look like white noise.
#the best model was the damped model
checkresiduals(r.hw.d)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' multiplicative method
## Q* = 100.48, df = 7, p-value < 2.2e-16
##
## Model df: 17. Total lags used: 24
The residuals from the damped hw model do not look like white noise. This is evident by looking at the ACF plot.
8e. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 8 in Section 3.7?
r.train = window(retail.ts, end = c(2010, 12))
r.test = window(retail.ts, start = c(2011, 1))
r.train.hw = hw(r.train, seasonal = "multiplicative", damped = TRUE)
autoplot(r.train.hw)
accuracy(r.train.hw, r.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3202296 5.390419 3.821149 0.1422629 3.776242 0.4296760
## Test set 5.2081580 8.694021 6.560525 1.9292741 2.469011 0.7377101
## ACF1 Theil's U
## Training set -0.06928801 NA
## Test set 0.43321618 0.2242362
r.train.sn = snaive(r.train)
autoplot(r.train.sn)
accuracy(r.train.sn, r.test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 6.870871 12.27525 8.893093 5.476112 7.780981 1.00000 0.6617306
## Test set 28.400000 29.39091 28.400000 11.015822 11.015822 3.19349 0.5697915
## Theil's U
## Training set NA
## Test set 0.7493485
retail.stl = BoxCox.lambda(r.train)
r.stl1 = stlf(r.train, lambda = 1)
r.ets1 = ets(seasadj(decompose(r.train, "multiplicative")))
summary(r.stl1)
##
## Forecast method: STL + ETS(M,A,N)
##
## Model Information:
## ETS(M,A,N)
##
## Call:
## ets(y = na.interp(x), model = etsmodel, allow.multiplicative.trend = allow.multiplicative.trend)
##
## Smoothing parameters:
## alpha = 0.4031
## beta = 1e-04
##
## Initial states:
## l = 45.3575
## b = 0.3963
##
## sigma: 0.0581
##
## AIC AICc BIC
## 3216.637 3216.814 3235.854
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.522178 5.781816 3.901909 0.006932324 3.779216 0.4387572
## ACF1
## Training set 0.08551028
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2011 259.5389 240.5854 278.4923 230.5520 288.5257
## Feb 2011 236.1379 215.6684 256.6074 204.8324 267.4433
## Mar 2011 253.5219 231.6353 275.4085 220.0492 286.9945
## Apr 2011 249.2427 226.0199 272.4655 213.7265 284.7590
## May 2011 244.1163 219.6249 268.6077 206.6599 281.5727
## Jun 2011 238.9683 213.2658 264.6707 199.6598 278.2768
## Jul 2011 238.2179 211.3542 265.0816 197.1334 279.3023
## Aug 2011 240.8553 212.8739 268.8367 198.0615 283.6491
## Sep 2011 246.5025 217.4420 275.5630 202.0582 290.9467
## Oct 2011 259.5594 229.4541 289.6646 213.5174 305.6014
## Nov 2011 271.4966 240.3775 302.6157 223.9040 319.0892
## Dec 2011 358.6144 326.5094 390.7194 309.5141 407.7147
## Jan 2012 264.5102 231.4448 297.5756 213.9410 315.0794
## Feb 2012 241.1093 207.1067 275.1118 189.1069 293.1116
## Mar 2012 258.4932 223.5750 293.4115 205.0904 311.8961
## Apr 2012 254.2141 218.3999 290.0283 199.4410 308.9872
## May 2012 249.0876 212.3958 285.7795 192.9723 305.2030
## Jun 2012 243.9396 206.3872 281.4921 186.5081 301.3712
## Jul 2012 243.1892 204.7920 281.5865 184.4658 301.9127
## Aug 2012 245.8267 206.5995 285.0538 185.8339 305.8194
## Sep 2012 251.4738 211.4306 291.5170 190.2331 312.7146
## Oct 2012 264.5308 223.6846 305.3769 202.0619 326.9996
## Nov 2012 276.4680 234.8311 318.1048 212.7899 340.1460
## Dec 2012 363.5858 321.1699 406.0017 298.7162 428.4553
summary(r.ets1)
## ETS(M,A,N)
##
## Call:
## ets(y = seasadj(decompose(r.train, "multiplicative")))
##
## Smoothing parameters:
## alpha = 0.4583
## beta = 1e-04
##
## Initial states:
## l = 43.4686
## b = 0.403
##
## sigma: 0.0505
##
## AIC AICc BIC
## 3127.879 3128.056 3147.097
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4021907 5.260437 3.826952 -0.09876248 3.754738 0.4328953
## ACF1
## Training set 0.03481627
Based on RMSE values, it seems to be similar to the others.
10.For this exercise use data set ukcars, the quarterly UK passenger vehicle production data from 1977Q1–2005Q1.
10a.Plot the data and describe the main features of the series.
autoplot(ukcars)
ggseasonplot(ukcars)
#There is an upward trend, and there is seasonality. The data tends to increase between Q1-Q2 and from Q3-Q4, and there is a sharp decrease from Q2-Q3.
10b.Decompose the series using STL and obtain the seasonally adjusted data.
sa.ukcars = stl(ukcars, t.window=13, s.window='periodic', robust=TRUE)
autoplot(sa.ukcars)
10c.Forecast the next two years of the series using an additive damped trend method applied to the seasonally adjusted data. (This can be done in one step using stlf() with arguments etsmodel=“AAN”, damped=TRUE.)
stlf.ukcars = stlf(ukcars, h=8, etsmodel = "AAN", damped = TRUE)
autoplot(stlf.ukcars)
10d.Forecast the next two years of the series using Holt’s linear method applied to the seasonally adjusted data (as before but with damped=FALSE).
hw.ukcars = hw(ukcars, h=8, etsmodel = "AAN", damped = FALSE)
autoplot(hw.ukcars)
10e.Now use ets() to choose a seasonal model for the data.
ets.ukcars = ets(ukcars)
autoplot(forecast(ets.ukcars))
summary(ets.ukcars)
## ETS(A,N,A)
##
## Call:
## ets(y = ukcars)
##
## Smoothing parameters:
## alpha = 0.6199
## gamma = 1e-04
##
## Initial states:
## l = 314.2568
## s = -1.7579 -44.9601 21.1956 25.5223
##
## sigma: 25.9302
##
## AIC AICc BIC
## 1277.752 1278.819 1296.844
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.313884 25.23244 20.17907 -0.1570979 6.629003 0.6576259
## ACF1
## Training set 0.02573334
10f.Compare the RMSE of the ETS model with the RMSE of the models you obtained using STL decompositions. Which gives the better in-sample fits?
accuracy(ets.ukcars)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.313884 25.23244 20.17907 -0.1570979 6.629003 0.6576259
## ACF1
## Training set 0.02573334
accuracy(hw.ukcars)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.706018 25.53765 20.42416 -0.0008705662 6.68854 0.6656134
## ACF1
## Training set 0.01923023
accuracy(stlf.ukcars)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.551267 23.32113 18.48987 0.04121971 6.042764 0.602576 0.02262668
#the ets model was best
10g. Compare the forecasts from the three approaches? Which seems most reasonable?
f.ets.uk = forecast(ets.ukcars)
f.ets.uk
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005 Q2 427.4885 394.2576 460.7195 376.6662 478.3109
## 2005 Q3 361.3329 322.2353 400.4305 301.5383 421.1275
## 2005 Q4 404.5358 360.3437 448.7280 336.9497 472.1219
## 2006 Q1 431.8154 383.0568 480.5741 357.2455 506.3854
## 2006 Q2 427.4885 374.5571 480.4200 346.5369 508.4401
## 2006 Q3 361.3329 304.5345 418.1313 274.4672 448.1986
## 2006 Q4 404.5358 344.1174 464.9542 312.1338 496.9378
## 2007 Q1 431.8154 367.9809 495.6500 334.1890 529.4419
f.hw.uk = forecast(ets.ukcars)
f.hw.uk
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005 Q2 427.4885 394.2576 460.7195 376.6662 478.3109
## 2005 Q3 361.3329 322.2353 400.4305 301.5383 421.1275
## 2005 Q4 404.5358 360.3437 448.7280 336.9497 472.1219
## 2006 Q1 431.8154 383.0568 480.5741 357.2455 506.3854
## 2006 Q2 427.4885 374.5571 480.4200 346.5369 508.4401
## 2006 Q3 361.3329 304.5345 418.1313 274.4672 448.1986
## 2006 Q4 404.5358 344.1174 464.9542 312.1338 496.9378
## 2007 Q1 431.8154 367.9809 495.6500 334.1890 529.4419
f.stlf.uk = forecast(stlf.ukcars)
f.stlf.uk
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005 Q2 415.0289 384.4576 445.6001 368.2742 461.7836
## 2005 Q3 364.2543 326.8700 401.6386 307.0799 421.4287
## 2005 Q4 400.8059 357.6702 443.9416 334.8355 466.7762
## 2006 Q1 432.5663 384.3596 480.7731 358.8404 506.2922
## 2006 Q2 415.0250 362.2312 467.8189 334.2838 495.7663
## 2006 Q3 364.2507 307.2369 421.2646 277.0556 451.4459
## 2006 Q4 400.8026 339.8596 461.7456 307.5983 494.0069
## 2007 Q1 432.5633 367.9289 497.1976 333.7136 531.4130
#the ets model is best
10h.Check the residuals of your preferred model.
checkresiduals(f.ets.uk)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 18.324, df = 3, p-value = 0.0003772
##
## Model df: 6. Total lags used: 9
11.For this exercise use data set visitors, the monthly Australian short-term overseas visitors data, May 1985–April 2005.
11a.Make a time plot of your data and describe the main features of the series.
str(visitors)
## Time-Series [1:240] from 1985 to 2005: 75.7 75.4 83.1 82.9 77.3 ...
autoplot(visitors)
ggseasonplot(visitors)
#there is an upward trend and a decent amount of seasonality.
11b.Split your data into a training set and a test set comprising the last two years of available data. Forecast the test set using Holt-Winters’ multiplicative method.
v.train = window(visitors, end = c(2003,4))
v.test = window(visitors, end = c(2005,4))
v.hw = hw(v.train, seasonal = 'multiplicative', h=24)
autoplot(v.hw)
11c.Why is multiplicative seasonality necessary here?
As time increases, the trend inccreases.
11d.Forecast the two-year test set using each of the following methods
#ets
v.ets = forecast(ets(v.train), h=24)
autoplot(v.ets)
#additive ets
v.add = ets(BoxCox(v.train, lambda = BoxCox.lambda(v.train)))
autoplot(v.add)
#snaive
v.sn = snaive(v.train, h=24)
autoplot(v.sn)
11e.Which method gives the best forecasts? Does it pass the residual tests?
accuracy(v.ets)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.7640074 14.5348 10.57657 0.1048224 3.994788 0.405423 -0.05311217
accuracy(v.add)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.03372402 0.3871656 0.298103 -0.1970547 1.673245 0.3827982
## ACF1
## Training set -0.0002543787
accuracy(v.sn)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 17.29363 31.15613 26.08775 7.192445 10.28596 1 0.6327669
checkresiduals(v.ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,M)
## Q* = 20.677, df = 7, p-value = 0.004278
##
## Model df: 17. Total lags used: 24
checkresiduals(v.add)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,A,A)
## Q* = 15.191, df = 8, p-value = 0.05554
##
## Model df: 16. Total lags used: 24
checkresiduals(v.sn)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 295.02, df = 24, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 24
#v.ets was best
12.The fets() function below returns ETS forecasts.
fets <- function(y, h) {
forecast(ets(y), h = h)
}
12a.Apply tsCV() for a forecast horizon of h = 4, for both ETS and seasonal naïve methods to the qcement data, (Hint: use the newly created fets() and the existing snaive() functions as your forecast function arguments.)
str(qcement)
## Time-Series [1:233] from 1956 to 2014: 0.465 0.532 0.561 0.57 0.529 0.604 0.603 0.582 0.554 0.62 ...
ets.tscv = tsCV(qcement, fets, h=4)
sn.tscv = tsCV(qcement, snaive, h=4)
summary(snaive(qcement, h=4))
##
## Forecast method: Seasonal naive method
##
## Model Information:
## Call: snaive(y = qcement, h = 4)
##
## Residual sd: 0.1297
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.03419651 0.1338564 0.1021528 2.338509 6.768085 1 0.613902
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2014 Q2 2.528 2.356456 2.699544 2.265646 2.790354
## 2014 Q3 2.637 2.465456 2.808544 2.374646 2.899354
## 2014 Q4 2.565 2.393456 2.736544 2.302646 2.827354
## 2015 Q1 2.229 2.057456 2.400544 1.966646 2.491354
summary(fets(qcement, h=4))
##
## Forecast method: ETS(M,A,M)
##
## Model Information:
## ETS(M,A,M)
##
## Call:
## ets(y = y)
##
## Smoothing parameters:
## alpha = 0.7505
## beta = 0.003
## gamma = 1e-04
##
## Initial states:
## l = 0.5043
## b = 0.0081
## s = 1.0297 1.0488 1.0164 0.9052
##
## sigma: 0.0473
##
## AIC AICc BIC
## 6.783846 7.591021 37.843192
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0006237224 0.07829922 0.05603263 -0.08782872 3.606505 0.5485176
## ACF1
## Training set -0.03632985
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2014 Q2 2.521670 2.368825 2.674515 2.287913 2.755427
## 2014 Q3 2.610968 2.412978 2.808958 2.308169 2.913767
## 2014 Q4 2.571906 2.344302 2.799510 2.223816 2.919996
## 2015 Q1 2.268522 2.042601 2.494443 1.923006 2.614039
12b.Compute the MSE of the resulting 4-step-ahead errors. (Hint: make sure you remove missing values.) Why are there missing values? Comment on which forecasts are more accurate. Is this what you expected?
mean(ets.tscv^2, na.rm=TRUE)
## [1] 0.01250954
mean(sn.tscv^2, na.rm=TRUE)
## [1] 0.01792147
The ETS model looks more accurate which is what was expected since it includes more in the fets function than the snaive function included.
str(ausbeer)
## Time-Series [1:218] from 1956 to 2010: 284 213 227 308 262 228 236 320 272 233 ...
plot(ausbeer)
ausbeer.train = window(ausbeer, end = c(2007,2))
ausbeer.test = window(ausbeer, start = c(2007,3))
#ets
ausbeer.ets = ets(ausbeer.train)
f.ausbeer.ets = forecast(ausbeer.ets, h=12)
#sn
ausbeer.sn = snaive(ausbeer.train)
f.ausbeer.sn = forecast(ausbeer.train, h=12)
#stlf
ausbeer.stlf = stlf(ausbeer.train)
f.ausbeer.stlf = stlf(ausbeer.train, h=12)
#rmse
forecast::accuracy(f.ausbeer.ets, ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3466095 15.781973 11.979955 -0.064073435 2.864311 0.7567076
## Test set 0.1272571 9.620828 8.919347 0.009981598 2.132836 0.5633859
## ACF1 Theil's U
## Training set -0.1942276 NA
## Test set 0.3763918 0.1783972
forecast::accuracy(f.ausbeer.sn, ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3466095 15.781973 11.979955 -0.064073435 2.864311 0.7567076
## Test set 0.1272571 9.620828 8.919347 0.009981598 2.132836 0.5633859
## ACF1 Theil's U
## Training set -0.1942276 NA
## Test set 0.3763918 0.1783972
forecast::accuracy(f.ausbeer.stlf, ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.6449182 13.77953 10.641492 0.1643171 2.552590 0.6721643
## Test set -3.9074898 10.13010 8.597595 -0.9365677 2.089001 0.5430626
## ACF1 Theil's U
## Training set -0.1469508 NA
## Test set 0.3443458 0.1777939
#the ets model was best
str(bricksq)
## Time-Series [1:155] from 1956 to 1994: 189 204 208 197 187 214 227 223 199 229 ...
plot(bricksq)
b.bricksq = BoxCox(bricksq, BoxCox.lambda(bricksq))
bricksq.train = window(b.bricksq, end = c(1991,1))
bricksq.test = window(b.bricksq, start = c(1991,2))
#ets
bricksq.ets = ets(bricksq.train)
f.bricksq.ets = forecast(bricksq.ets, h=12)
#sn
bricksq.sn = snaive(bricksq.train)
f.bricksq.sn = forecast(bricksq.train, h=12)
#stlf
bricksq.stlf = stlf(bricksq.train)
f.bricksq.stlf = stlf(bricksq.train, h=12)
#rmse
forecast::accuracy(f.bricksq.ets, bricksq.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01996121 0.2419629 0.1755865 0.1409763 1.247006 0.4329129
## Test set 0.20641501 0.2854117 0.2458899 1.4176042 1.692439 0.6062480
## ACF1 Theil's U
## Training set 0.1434416 NA
## Test set 0.3798615 0.793706
forecast::accuracy(f.bricksq.sn, bricksq.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01996121 0.2419629 0.1755865 0.1409763 1.247006 0.4329129
## Test set 0.20641501 0.2854117 0.2458899 1.4176042 1.692439 0.6062480
## ACF1 Theil's U
## Training set 0.1434416 NA
## Test set 0.3798615 0.793706
forecast::accuracy(f.bricksq.stlf, bricksq.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02095131 0.2281125 0.1675486 0.1506378 1.188061 0.4130953
## Test set 0.14549872 0.2641646 0.2239398 0.9999206 1.546620 0.5521294
## ACF1 Theil's U
## Training set 0.1966409 NA
## Test set 0.3346748 0.7433275
#the stlf model was best
str(dole)
## Time-Series [1:439] from 1956 to 1992: 4742 6128 6494 5379 6011 ...
plot(dole)
dole.train = window(dole, end=c(1989,7))
dole.test = window(dole, start=c(1989,8))
#ets
dole.ets = ets(dole.train)
f.dole.ets = forecast(dole.ets, h=12)
#sn
dole.sn = snaive(dole.train)
f.dole.sn = forecast(dole.train, h=12)
#stlf
dole.stlf = stlf(dole.train)
f.dole.stlf = stlf(dole.train, h=12)
#rmse
forecast::accuracy(f.dole.ets, dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 98.00253 16130.58 9427.497 0.5518769 6.20867 0.2995409
## Test set 27788.36057 33298.90 27788.361 6.9987596 6.99876 0.8829225
## ACF1 Theil's U
## Training set 0.5139536 NA
## Test set 0.4229686 2.661928
forecast::accuracy(f.dole.sn, dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 98.00253 16130.58 9427.497 0.5518769 6.20867 0.2995409
## Test set 27788.36057 33298.90 27788.361 6.9987596 6.99876 0.8829225
## ACF1 Theil's U
## Training set 0.5139536 NA
## Test set 0.4229686 2.661928
forecast::accuracy(f.dole.stlf, dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 272.8467 6394.784 3898.066 0.5609317 4.972168 0.1238537
## Test set 11231.3788 29250.310 19848.816 2.6019142 4.808377 0.6306585
## ACF1 Theil's U
## Training set 0.02000331 NA
## Test set 0.64284568 2.318964
#the stlf model was best
str(a10)
## Time-Series [1:204] from 1992 to 2008: 3.53 3.18 3.25 3.61 3.57 ...
plot(a10)
b.a10 = BoxCox(a10, BoxCox.lambda(a10))
a10.train = window(b.a10, end = c(2005,6))
a10.test = window(b.a10, start = c(2005,7))
#ets
a10.ets = ets(a10.train)
f.a10.ets = forecast(a10.ets, h=12)
#sn
a10.sn = snaive(a10.train)
f.a10.sn = forecast(a10.train, h=12)
#stlf
a10.stlf = stlf(a10.train)
f.a10.stlf = stlf(a10.train, h=12)
#rmse
forecast::accuracy(f.a10.ets, a10.test)
## ME RMSE MAE MPE MAPE
## Training set 7.659502e-05 0.06455211 0.05014221 -0.002655551 2.224162
## Test set -1.804857e-02 0.09455491 0.08455091 -0.654955029 2.511472
## MASE ACF1 Theil's U
## Training set 0.3212884 -0.04890245 NA
## Test set 0.5417637 -0.30185972 0.3111355
forecast::accuracy(f.a10.sn, a10.test)
## ME RMSE MAE MPE MAPE
## Training set 7.659502e-05 0.06455211 0.05014221 -0.002655551 2.224162
## Test set -1.804857e-02 0.09455491 0.08455091 -0.654955029 2.511472
## MASE ACF1 Theil's U
## Training set 0.3212884 -0.04890245 NA
## Test set 0.5417637 -0.30185972 0.3111355
forecast::accuracy(f.a10.stlf, a10.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -9.590892e-05 0.05937144 0.04677082 -0.04963522 2.090263 0.2996861
## Test set -1.477002e-02 0.08713335 0.07866002 -0.55445249 2.333908 0.5040176
## ACF1 Theil's U
## Training set -0.09757243 NA
## Test set -0.31562275 0.2836165
#the stlf model was best
str(h02)
## Time-Series [1:204] from 1992 to 2008: 0.43 0.401 0.432 0.493 0.502 ...
plot(h02)
h02.train = window(h02, end = c(2005,7))
h02.test = window(h02, start = c(2005,8))
#ets
h02.ets = ets(h02.train)
f.h02.ets = forecast(h02.ets, h=12)
#sn
h02.sn = snaive(h02.train)
f.h02.sn = forecast(h02.train, h=12)
#stlf
h02.stlf = stlf(h02.train)
f.h02.stlf = stlf(h02.train, h=12)
#rmse
forecast::accuracy(f.h02.ets, h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0004513622 0.04579930 0.03426080 -0.2209876 4.560572 0.5745005
## Test set 0.0232785622 0.06402092 0.05309284 1.7574790 5.577604 0.8902845
## ACF1 Theil's U
## Training set 0.05057522 NA
## Test set -0.38030773 0.3337125
forecast::accuracy(f.h02.sn, h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0004513622 0.04579930 0.03426080 -0.2209876 4.560572 0.5745005
## Test set 0.0232785622 0.06402092 0.05309284 1.7574790 5.577604 0.8902845
## ACF1 Theil's U
## Training set 0.05057522 NA
## Test set -0.38030773 0.3337125
forecast::accuracy(f.h02.stlf, h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -9.933146e-05 0.04222490 0.03158308 -0.1504343 4.456942 0.5295993
## Test set 1.804533e-02 0.06816579 0.05728146 0.7004321 6.188342 0.9605212
## ACF1 Theil's U
## Training set -0.06290209 NA
## Test set -0.07832555 0.3495893
#the stlf model was best
str(usmelec)
## Time-Series [1:486] from 1973 to 2013: 160 144 148 140 147 ...
plot(usmelec)
usmelec.train = window(usmelec, end = c(2010,6))
usmelec.test = window(usmelec, start = c(2010,7))
#ets
usmelec.ets = ets(usmelec.train)
f.usmelec.ets = forecast(usmelec.ets, h=12)
#sn
usmelec.sn = snaive(usmelec.train)
f.usmelec.sn = forecast(usmelec.train, h=12)
#stlf
usmelec.stlf = stlf(usmelec.train)
f.usmelec.stlf = stlf(usmelec.train, h=12)
#rmse
forecast::accuracy(f.usmelec.ets, usmelec.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.07834851 7.447167 5.601963 -0.1168709 2.163495 0.6225637
## Test set -1.17486480 7.974503 6.633269 -0.5181480 1.933084 0.7371760
## ACF1 Theil's U
## Training set 0.2719249 NA
## Test set 0.2142856 0.2257995
forecast::accuracy(f.usmelec.sn, usmelec.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.07834851 7.447167 5.601963 -0.1168709 2.163495 0.6225637
## Test set -1.17486480 7.974503 6.633269 -0.5181480 1.933084 0.7371760
## ACF1 Theil's U
## Training set 0.2719249 NA
## Test set 0.2142856 0.2257995
forecast::accuracy(f.usmelec.stlf, usmelec.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.06835845 6.327110 4.786752 -0.06755695 1.872096 0.5319668
## Test set -3.76217952 9.442505 8.219468 -1.32097598 2.470746 0.9134552
## ACF1 Theil's U
## Training set 0.1180312 NA
## Test set 0.2533619 0.2807631
#the stlf model was best
14a. Use ets() on the following series: bicoal, chicken, dole, usdeaths, lynx, ibmclose, eggs. Does it always give good forecasts?
autoplot(forecast(ets(bicoal)))
autoplot(forecast(ets(chicken)))
autoplot(forecast(ets(dole)))
autoplot(forecast(ets(usdeaths)))
autoplot(forecast(ets(lynx)))
autoplot(forecast(ets(ibmclose)))
autoplot(forecast(ets(eggs)))
No, it does not always give good forecasts.
14b.Find an example where it does not work well. Can you figure out why?
In the examples where it doesn’t work well, it is unable to pick up trends or seasonality, so snaive may be the better option in those cases.
15.Show that the point forecasts from an ETS(M,A,M) model are the same as those obtained using Holt-Winters’ multiplicative method.
#ETS(M,A,M) model
usp = ets(usdeaths, model = "MAM")
#hw model
usphw = hw(usdeaths, seasonal = 'multiplicative', h=1)
accuracy(usp)
## ME RMSE MAE MPE MAPE MASE
## Training set 22.42166 247.1924 193.8055 0.2077024 2.225981 0.4435586
## ACF1
## Training set 0.005044861
accuracy(usphw)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 37.44648 263.7717 204.3406 0.3811538 2.367304 0.4676699 0.2187199
16.Show that the forecast variance for an ETS(A,N,N) model is given by sigma^2[1+alpha^2(h-1)]
###for some reason, this was working well in the console, but there is an error when I try to knit. I will just include the code here.
ap1 = ets(AirPassengers, model = “ANN”) ap1f = forecast(ap1,1) ap1f summary(ap1f)
sigma = 0.0392 alpha = 0.7096 h = 2 ci = sigma(1+alpha(h-1)) ap1f\(mean[1] ap1f\)mean[1] - ci ap1f$lower[1, ‘95%’]
(0.0392^2)[1+((0.7096)^2)(2-1)]