Consider the pigs series — the number of pigs slaughtered in Victoria each month.
library(fpp2)
library(ggplot2)
library(ggthemes)
library(tidyverse)
library(kableExtra)
library(Metrics)
library(urca)
autoplot(pigs) + ggtitle("Number of Pigs Slaughtered in Victoria, Australia") + theme_classic()
ses1 <- ses(pigs, h=4)
ses1$model
## Simple exponential smoothing
##
## Call:
## ses(y = pigs, h = 4)
##
## Smoothing parameters:
## alpha = 0.2971
##
## Initial states:
## l = 77260.0561
##
## sigma: 10308.58
##
## AIC AICc BIC
## 4462.955 4463.086 4472.665
\(\alpha = 0.297\) \(\ell_0 = 77260.0561\)
s <- sd(ses1$residuals)
ul <- ses1$mean + 1.96*s
ll <- ses1$mean - 1.96*s
c1 <- c("Upper", "Lower")
c2 <- c(ses1$upper[1, "95%"], ses1$lower[1, "95%"])
c3 <- c(ses1$upper[2, "95%"], ses1$lower[2, "95%"])
c4 <- c(ses1$upper[3, "95%"], ses1$lower[3, "95%"])
c5 <- c(ses1$upper[4, "95%"], ses1$lower[4, "95%"])
c6 <- c(ul[1], ll[1])
c7 <- c(ul[2], ll[2])
c8 <- c(ul[3], ll[3])
c9 <- c(ul[4], ll[4])
table11 <- data.frame(c1,c2,c3,c4, c5)
colnames(table11) <- c("", "Sep", "Oct", "Nov", "Dec" )
kable(table11, caption = "R Forecast 95% Interval") %>% kable_classic(full_width = F)
| Sep | Oct | Nov | Dec | |
|---|---|---|---|---|
| Upper | 119020.84 | 119893.98 | 120732.35 | 121539.82 |
| Lower | 78611.97 | 77738.83 | 76900.46 | 76092.99 |
table12 <- data.frame(c1,c6,c7,c8, c9)
colnames(table12) <- c("", "Sep", "Oct", "Nov", "Dec" )
kable(table12, caption = "Formula Forecast 95% Interval") %>% kable_classic(full_width = F)
| Sep | Oct | Nov | Dec | |
|---|---|---|---|---|
| Upper | 118952.84 | 118952.84 | 118952.84 | 118952.84 |
| Lower | 78679.97 | 78679.97 | 78679.97 | 78679.97 |
autoplot(ses1) + autolayer(ul, series = "95% Formula (Upper)") + autolayer(ll, series = "95% Formula (Lower)")
There are small differences, but overall similar results.
Write your own function to implement simple exponential smoothing. The function should take arguments y (the time series), alpha (the smoothing parameter α) and level (the initial level ℓ0). It should return the forecast of the next observation in the series. Does it give the same forecast as ses()?
fses <- function(y, alpha, l0) {
y_hat <- l0
for(index in 1:length(y)){
y_hat <- alpha*y[index] + (1 - alpha)*y_hat
}
}
alpha <- ses1$model$par[1]
l0 <- ses1$model$par[2]
results2 <- fses(pigs, alpha, l0)
c21 <- c("Function", "R")
c22 <- c(results2, ses1$mean[1])
table21 <- data.frame(c21, c22)
colnames(table21) <- c("" , "Forecast Value")
kable(table21) %>% kable_classic(full_width = F)
| Forecast Value | |
|---|---|
| Function | 98816.41 |
| R | 98816.41 |
Both results are the same.
fses3 <- function(pars = c(alpha, l0), y ){
error <- 0
SSE <- 0
alpha <- pars[1]
l0 <- pars[2]
y_hat <- l0
for(index in 1:length(y)){
error <- y[index] - y_hat
SSE <- SSE + error^2
y_hat <- alpha*y[index] + (1 - alpha)*y_hat
}
return(SSE)
}
optim3 <- optim(par = c(0.5, pigs[1]), y = pigs, fn = fses3)
c31 <- c("Function", "R")
c32 <- c(round(optim3$par[1],3), round(ses1$model$par[1],3))
c33 <- c(round(optim3$par[2],3), round(ses1$model$par[2],3))
table31 <- data.frame(c31, c32, c33)
colnames(table31) <- c("" , "alpha", "L0")
kable(table31,) %>% kable_classic(full_width = F)
| alpha | L0 | ||
|---|---|---|---|
| Function | 0.299 | 76379.26 | |
| alpha | R | 0.297 | 77260.06 |
They’re not the same values, but are very close.
fses4 <- function(init_pars, data){
fc <- 0
SSE <- function(pars, data){
error <- 0
SSE <- 0
alpha <- pars[1]
l0 <- pars[2]
y_hat <- l0
for(index in 1:length(data)){
error <- data[index] - y_hat
SSE <- SSE + error^2
y_hat <- alpha*data[index] + (1 - alpha)*y_hat
}
fc <<- y_hat
return(SSE)
}
optim_pars <- optim(par = init_pars, data = data, fn = SSE)
return(list(
forecast = fc,
alpha = optim_pars$par[1],
l0 = optim_pars$par[2]
))
}
fses4(c(0.5, pigs[1]), pigs)
## $forecast
## [1] 98814.63
##
## $alpha
## [1] 0.2990081
##
## $l0
## [1] 76379.27
# str(books)
autoplot(books) + theme_classic()
Both series have an upward trend, with some likely seasonality.
sespb <- ses(books[, "Paperback"], h = 4)
seshc <- ses(books[, "Hardcover"], h = 4)
autoplot(books[, "Paperback"], series = "Paperback") +
autolayer(sespb, series = "Paperback Forecast", PI = FALSE ) +
autolayer(books[, "Hardcover"], series = "Hardcover") +
autolayer(seshc, series = "Hardcover Forecast", PI = FALSE) +
ylab("Sales") + ggtitle("Book Sales and Forecast")
c. Compute the RMSE values for the training data in each case.
library(Metrics)
c51 <- c("Paperback", "Hardcover")
c52 <- c(rmse(books[,"Paperback"], sespb$fitted), rmse(books[,"Hardcover"], seshc$fitted))
table51 <- data.frame(c51, round(c52,2))
colnames(table51) <- c("", "RMSE")
kable(table51) %>% kable_classic(full_width = F)
| RMSE | |
|---|---|
| Paperback | 33.64 |
| Hardcover | 31.93 |
6.We will continue with the daily sales of paperback and hardcover books in data set books.
holtpb <- holt(books[, "Paperback"], h = 4)
holthc <- holt(books[, "Hardcover"], h = 4)
autoplot(books[, "Paperback"], series = "Paperback") +
autolayer(holtpb, series = "Paperback Forecast", PI = FALSE) +
autolayer(books[, "Hardcover"], series = "Hardcover") +
autolayer(holthc, series = "Hardcover Forecast", PI = FALSE) +
ylab("Sales") + ggtitle("Book Sales and Forecast (Holt)")
b.Compare the RMSE measures of Holt’s method for the two series to those of simple exponential smoothing in the previous question. (Remember that Holt’s method is using one more parameter than SES.) Discuss the merits of the two forecasting methods for these data sets.
c6b1 <- c("SES Paperback", "Holt Paperback", "SES Hardcover", "Holt Hardcover")
c6b2 <- c(rmse(books[,"Paperback"], sespb$fitted), rmse(books[,"Paperback"], holtpb$fitted), rmse(books[,"Hardcover"], seshc$fitted), rmse(books[,"Hardcover"], holthc$fitted))
table6b <- data.frame(c6b1, round(c6b2,2))
colnames(table6b) <- c("", "RMSE")
kable(table6b) %>% kable_classic(full_width = F)
| RMSE | |
|---|---|
| SES Paperback | 33.64 |
| Holt Paperback | 31.14 |
| SES Hardcover | 31.93 |
| Holt Hardcover | 27.19 |
Holt’s method is better than SES, as it incorporates a trend, which the books data has.
plot6c1 <- autoplot(books[, "Paperback"], series = "Actual") +
autolayer(sespb, series = "SES", PI = FALSE) +
autolayer(holtpb, series = "Holt", PI = FALSE) +
ylab("Sales") + ggtitle("Paperback Sales and Forecasts") +
theme(legend.position = "bottom")
plot6c2 <- autoplot(books[, "Hardcover"], series = "Actual") +
autolayer(sespb, series = "SES", PI = FALSE) +
autolayer(holtpb, series = "Holt", PI = FALSE) +
ylab("Sales") + ggtitle("Hardcover Sales and Forecasts") +
theme(legend.position = "bottom")
gridExtra::grid.arrange(plot6c1, plot6c2, ncol=2)
rmsesespb <- rmse(books[,"Paperback"], sespb$fitted)
rmseseshc <- rmse(books[,"Hardcover"], seshc$fitted)
rmseholtpb <- rmse(books[,"Paperback"], holtpb$fitted)
rmseholthc <- rmse(books[,"Hardcover"], holthc$fitted)
c6d1 <- c("SES - R", "SES - Function", "Holt - R", "Holt - Function")
c6d2 <- c(sespb$upper[1, "95%"], sespb$mean[1]+1.96*rmsesespb, holtpb$upper[1, "95%"], holtpb$mean[1]+1.96*rmseholthc)
c6d3 <- c(sespb$lower[1, "95%"], sespb$mean[1]-1.96*rmsesespb, holtpb$lower[1, "95%"], holtpb$mean[1]-1.96*rmseholthc)
table6d1 <- data.frame(c6d1,c6d2,c6d3)
colnames(table6d1) <- c("", "Upper", "Lower")
kable(table6d1, caption = "Paperback Forecast (1st Period)") %>% kable_classic(full_width = F)
| Upper | Lower | |
|---|---|---|
| SES - R | 275.3523 | 138.8670 |
| SES - Function | 273.0395 | 141.1798 |
| Holt - R | 275.0205 | 143.9130 |
| Holt - Function | 262.7662 | 156.1674 |
c6d4 <- c(seshc$upper[1, "95%"], seshc$mean[1]+1.96*rmseseshc, holthc$upper[1, "95%"], holthc$mean[1]+1.96*rmseholthc)
c6d5 <- c(seshc$lower[1, "95%"], seshc$mean[1]-1.96*rmseseshc, holthc$lower[1, "95%"], holthc$mean[1]-1.96*rmseholthc)
table6d1 <- data.frame(c6d1,c6d4,c6d5)
colnames(table6d1) <- c("", "Upper", "Lower")
kable(table6d1, caption = "Hardcover Forecast (1st Period)") %>% kable_classic(full_width = F)
| Upper | Lower | |
|---|---|---|
| SES - R | 304.3403 | 174.7799 |
| SES - Function | 302.1449 | 176.9753 |
| Holt - R | 307.4256 | 192.9222 |
| Holt - Function | 303.4733 | 196.8745 |
As expected, both SES and Holt values are similar, although not the same. Holt PI are narrower.
[Hint: use h=100 when calling holt() so you can clearly see the differences between the various options when plotting the forecasts.]
Which model gives the best RMSE?
autoplot(eggs) + theme_classic() + ggtitle("Price of a Dozen of Eggs, 1900 - 1993")
holt1 <- holt(eggs, h=100)
autoplot(holt1) + autolayer(holt1$fitted) + theme_classic()
holt2 <- holt(eggs, h=100, damped = T)
holt3 <- holt(eggs, h=100, damped = F, biasadj = T)
holt4 <- holt(eggs, h=100, damped = T, biasadj = T)
autoplot(holt2) + autolayer(holt2$fitted) + ggtitle("Damped") + theme_classic()
autoplot(holt3) + autolayer(holt3$fitted) + ggtitle("BiasAdj") + theme_classic()
autoplot(holt4) + autolayer(holt4$fitted) + ggtitle("Damped and BiasAdj") + theme_classic()
rmse1 <- rmse(eggs, holt1$fitted)
rmse2 <- rmse(eggs, holt2$fitted)
rmse3 <- rmse(eggs, holt3$fitted)
rmse4 <- rmse(eggs, holt4$fitted)
c71 <- c("Holt", "Holt - Damped", "Holt - BiasAdj", "Holt - Damped and Biasadj")
c72 <- c(rmse1, rmse2, rmse3, rmse4)
table7 <- data.frame(c71, c72)
colnames(table7) <- c("", "RMSE")
kable(table7, caption = "Egg RMSE") %>% kable_classic(full_width = F)
| RMSE | |
|---|---|
| Holt | 26.58219 |
| Holt - Damped | 26.54019 |
| Holt - BiasAdj | 26.58219 |
| Holt - Damped and Biasadj | 26.54019 |
Recall your retail time series data (from Exercise 3 in Section 2.10).
library(openxlsx)
retaildata <- read.xlsx("F:/MSAE/2020fall_PredictiveAnalytics/retail.xlsx", startRow = 2)
tsretaildata <- ts(retaildata[,"A3349873A"], frequency=12, start=c(1982,4))
autoplot(tsretaildata) + theme_classic()
The seasonal effects increase with time.
hw8 <- hw(tsretaildata, seasonal = "multiplicative")
hw8d <- hw(tsretaildata, seasonal = "multiplicative", damped = T)
autoplot(tsretaildata) + autolayer(hw8, series = "HW") +
autolayer(hw8$fitted) + theme_classic() + ggtitle("HW Forecast")
autoplot(tsretaildata) + autolayer(hw8d, series = "HW - Damped") +
autolayer(hw8d$fitted) + theme_classic() + ggtitle("HW - Damped Forecast")
rmsehw8 <- rmse(tsretaildata, hw8$fitted)
rmsehw8d <- rmse(tsretaildata, hw8d$fitted)
c81 <- c("Holt", "Holt - Damped")
c82 <- c(rmsehw8, rmsehw8d)
table8 <- data.frame(c81, c82)
colnames(table8) <- c("", "RMSE")
kable(table8, caption = "Retail RMSE") %>% kable_classic(full_width = F)
| RMSE | |
|---|---|
| Holt | 13.29378 |
| Holt - Damped | 13.30494 |
Very similar RMSE, but damped might be preferred for longer term forecasts.
checkresiduals(hw8d)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' multiplicative method
## Q* = 42.932, df = 7, p-value = 3.437e-07
##
## Model df: 17. Total lags used: 24
retail_train <- window(tsretaildata, end = c(2010, 12))
retail_valid <- window(tsretaildata, start = 2011)
naive <- snaive(retail_train, h=36)
naiveacc <- forecast::accuracy(naive, retail_valid)
kable(round(naiveacc,2), caption = "Naive Model") %>% kable_classic(full_width = F)
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 7.77 | 20.25 | 15.96 | 4.70 | 8.11 | 1.00 | 0.74 | NA |
| Test set | 81.74 | 100.01 | 82.07 | 20.55 | 20.67 | 5.14 | 0.68 | 1.67 |
hw8e <- hw(retail_train, damped = T)
hw8eacc <- forecast::accuracy(hw8e, retail_valid)
kable(round(hw8eacc,2), caption = "HW Model") %>% kable_classic(full_width = F)
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 1.02 | 12.55 | 9.14 | 0.56 | 5.20 | 0.57 | 0.19 | NA |
| Test set | 63.42 | 78.15 | 63.42 | 17.44 | 17.44 | 3.97 | 0.48 | 1.45 |
The HW model performed better.
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
lambda1 <- BoxCox.lambda(retail_train)
bc9stl <-BoxCox(retail_train, lambda1) %>%
mstl(s.window = 13, robust = T)%>%
forecast(h=36,lambda=lambda1)
bc9ets <-BoxCox(retail_train, lambda1) %>%
ets()%>%
forecast(h=36,lambda=lambda1)
c91 <- c("BC STL", "BC ETS")
c92 <- c(rmse(bc9stl$mean, retail_valid), rmse(bc9ets$mean, retail_valid))
table9 <- data.frame(c91, c92)
colnames(table9) <- c("", "RMSE")
kable(table9, caption = "RMSE") %>% kable_classic(full_width = F)
| RMSE | |
|---|---|
| BC STL | 98.38422 |
| BC ETS | 95.31287 |
For this exercise use data set ukcars, the quarterly UK passenger vehicle production data from 1977Q1–2005Q1.
autoplot(ukcars) + theme_classic() + ggtitle("Vehicle Production Data 1977Q1 - 2005Q1")
There is a clear seasonal pattern, and an upward trend from the early 1980’s.
stl10 <- stl(ukcars, t.window=13, s.window='periodic', robust = T)
autoplot(stl10)
saukcars <- seasadj(stl10)
autoplot(saukcars) + autolayer(ukcars, series = "Original Data") +
ggtitle("Seasonally Adjusted Vehicle Production Data 1977Q1 - 2005Q1") +
theme_classic()
hw10 <- hw(saukcars, h=8, seasonal = "additive", damped = T)
autoplot(hw10)
hw10d <- holt(saukcars, h=8, damped = F)
autoplot(hw10d)
etscars <- ets(saukcars, damped = T)
autoplot(etscars)
summary(etscars)
## ETS(A,Ad,N)
##
## Call:
## ets(y = saukcars, damped = T)
##
## Smoothing parameters:
## alpha = 0.5696
## beta = 2e-04
## phi = 0.9143
##
## Initial states:
## l = 347.3368
## b = -8.8748
##
## sigma: 25.7803
##
## AIC AICc BIC
## 1275.493 1276.285 1291.857
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 2.398058 25.2035 20.43629 0.2677076 6.536475 0.6660087 0.03749911
f.Compare the RMSE of the ETS model with the RMSE of the models you obtained using STL decompositions. Which gives the better in-sample fits?
c10f1 <- c("HW - Additive", "Holt", "ETS")
c10f2 <- c(rmse(hw10$fitted, saukcars), rmse(hw10d$fitted, saukcars), rmse(etscars$fitted, saukcars))
table10 <- data.frame(c10f1, c10f2)
colnames(table10) <- c("", "RMSE")
kable(table10, caption = "RMSE") %>% kable_classic(full_width = F)
| RMSE | |
|---|---|
| HW - Additive | 25.18399 |
| Holt | 25.28237 |
| ETS | 25.20350 |
g.Compare the forecasts from the three approaches? Which seems most reasonable?
fcets <- forecast(etscars, h=8)
autoplot(saukcars) + autolayer(hw10$mean, series = "Holt", PI = F) +
autolayer(hw10d$mean, series = "Holt Winter", PI = F) +
autolayer (fcets, series = "ETS", PI = F)
They all are somewhat plausible, but Holt-Winter had a lower RMSE.
checkresiduals(hw10d)
##
## Ljung-Box test
##
## data: Residuals from Holt's method
## Q* = 17.568, df = 4, p-value = 0.001499
##
## Model df: 4. Total lags used: 8
For this exercise use data set visitors, the monthly Australian short-term overseas visitors data, May 1985–April 2005.
autoplot(visitors) + theme_classic() +
ggtitle("Monthly Australian short-term overseas visitors data, May 1985–April 2005")
A clear upward trend, with seasonality. Seasonality effects are clearly increasing with time, making this series ideal for a multiplicative model.
visitors_train <- window(visitors, end = c(2003,4))
visitors_valid <- window(visitors, start = c(2003,5))
hwvisitors<- hw(visitors_train, seasonal = 'multiplicative', h=24)
summary(hwvisitors)
##
## Forecast method: Holt-Winters' multiplicative method
##
## Model Information:
## Holt-Winters' multiplicative method
##
## Call:
## hw(y = visitors_train, h = 24, seasonal = "multiplicative")
##
## Smoothing parameters:
## alpha = 0.4379
## beta = 0.0164
## gamma = 1e-04
##
## Initial states:
## l = 90.9387
## b = 2.584
## s = 0.945 1.0553 1.0882 0.9681 1.3072 1.0711
## 1.0264 0.9095 0.938 1.0401 0.8509 0.8001
##
## sigma: 0.0571
##
## AIC AICc BIC
## 2326.608 2329.699 2383.988
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.9749466 14.06539 10.35763 -0.5792169 4.223204 0.3970304
## ACF1
## Training set 0.1356528
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## May 2003 293.5512 272.0790 315.0235 260.7123 326.3902
## Jun 2003 311.3817 286.3466 336.4168 273.0939 349.6695
## Jul 2003 379.6865 346.4414 412.9316 328.8425 430.5305
## Aug 2003 341.5672 309.2321 373.9023 292.1150 391.0195
## Sep 2003 330.3820 296.7647 363.9994 278.9688 381.7953
## Oct 2003 371.9441 331.4615 412.4267 310.0313 433.8569
## Nov 2003 387.1607 342.2705 432.0509 318.5070 455.8144
## Dec 2003 471.3185 413.3054 529.3316 382.5951 560.0419
## Jan 2004 348.1741 302.8173 393.5309 278.8068 417.5414
## Feb 2004 390.3863 336.7055 444.0672 308.2885 472.4841
## Mar 2004 377.6150 322.9349 432.2951 293.9890 461.2410
## Apr 2004 337.3049 285.9782 388.6316 258.8075 415.8024
## May 2004 284.8762 239.4088 330.3437 215.3398 354.4127
## Jun 2004 302.1571 251.6621 352.6521 224.9316 379.3825
## Jul 2004 368.4105 304.0470 432.7739 269.9751 466.8459
## Aug 2004 331.3981 270.9579 391.8384 238.9627 423.8335
## Sep 2004 320.5215 259.5777 381.4653 227.3160 413.7270
## Oct 2004 360.8154 289.3783 432.2525 251.5618 470.0690
## Nov 2004 375.5478 298.2123 452.8832 257.2733 493.8222
## Dec 2004 457.1458 359.3352 554.9564 307.5574 606.7342
## Jan 2005 337.6781 262.6845 412.6717 222.9853 452.3709
## Feb 2005 378.5881 291.3958 465.7805 245.2390 511.9373
## Mar 2005 366.1740 278.7938 453.5542 232.5375 499.8105
## Apr 2005 327.0594 246.2591 407.8596 203.4861 450.6326
autoplot(hwvisitors)
Seasonality increases with time.
# ETS
visitorsets <- ets(visitors_train)
visitorsets_fc <- forecast(visitorsets, h = 24)
# an additive ETS model applied to a Box-Cox transformed series
visitorsetsa <- ets(model = "AAA", BoxCox(visitors_train, lambda = BoxCox.lambda(visitors_train)))
visitorseta_fc <- forecast(visitorsetsa, h =24)
# a seasonal naïve method
visitorsnaive <- snaive(visitors_train, h = 24)
# an STL decomposition applied to the Box-Cox transformed data followed
# by an ETS model applied to the seasonally adjusted (transformed) data.
visitorsbcsa <- seasadj(BoxCox(visitors_train, lambda = BoxCox.lambda(visitors_train)) %>%
stl(t.window=13, s.window='periodic', robust = T))
visitorsetsbca <- ets(visitorsbcsa)
visitorsetsbca_fc <- forecast(visitorsetsbca, h = 24)
e.Which method gives the best forecasts? Does it pass the residual tests?
rmsesets <- rmse(visitors_valid, visitorsets_fc$mean)
rmseseta <- rmse(visitors_valid, visitorseta_fc$mean)
rmsesnaive <- rmse(visitors_valid, visitorsnaive$mean)
rmsesetsbca <- rmse(visitors_valid, visitorsetsbca_fc$mean)
c111 <- c("ETS", "ETS Box Cox", "Seasonal Naive", "STL, Box Cox")
c112 <- c(rmsesets, rmseseta, rmsesnaive, rmsesetsbca)
table11 <- data.frame(c111, c112)
colnames(table11) <- c("Model", "Validation RMSE")
kable(table11) %>% kable_classic(full_width = F)
| Model | Validation RMSE |
|---|---|
| ETS | 80.23124 |
| ETS Box Cox | 415.07656 |
| Seasonal Naive | 50.30097 |
| STL, Box Cox | 414.64789 |
checkresiduals(visitorsets)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,M)
## Q* = 20.677, df = 7, p-value = 0.004278
##
## Model df: 17. Total lags used: 24
checkresiduals(visitorsetsa)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,A)
## Q* = 15.206, df = 7, p-value = 0.03344
##
## Model df: 17. Total lags used: 24
checkresiduals(visitorsnaive)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 295.02, df = 24, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 24
checkresiduals(visitorsetsbca)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,A,N)
## Q* = 26.159, df = 20, p-value = 0.1606
##
## Model df: 4. Total lags used: 24
f.Compare the same four methods using time series cross-validation with the tsCV() function instead of using a training and test set. Do you come to the same conclusions?
fets_BC <- function(y, h) {
forecast(ets(
y,
lambda = BoxCox.lambda(y),
additive.only = TRUE
),
h = h)
}
fstlm <- function(y, h) {
forecast(stlm(
y,
lambda = BoxCox.lambda(y),
s.window = frequency(y) + 1,
robust = TRUE,
method = "ets"
),
h = h)
}
fets <- function(y, h) {
forecast(ets(y),
h = h)
}
rmse1 <- sqrt(mean(tsCV(visitors, snaive, h = 1)^2, na.rm = TRUE))
rmse2 <- sqrt(mean(tsCV(visitors, fets_BC, h = 1)^2, na.rm = TRUE))
rmse3 <- sqrt(mean(tsCV(visitors, fstlm, h = 1)^2, na.rm = TRUE))
rmse4 <- sqrt(mean(tsCV(visitors, fets, h = 1)^2, na.rm = TRUE))
c113 <- c("ETS", "ETS Box Cox", "Seasonal Naive", "STL, Box Cox")
c114 <- c(rmse4, rmse2, rmse1, rmse3)
table12 <- data.frame(c113, c114)
colnames(table12) <- c("Model", "tsCV RMSE")
kable(table12) %>% kable_classic(full_width = F)
| Model | tsCV RMSE |
|---|---|
| ETS | 18.52985 |
| ETS Box Cox | 18.86439 |
| Seasonal Naive | 32.78675 |
| STL, Box Cox | 17.49642 |
The fets() function below returns ETS forecasts.
fets <- function(y, h) { forecast(ets(y), h = h) }
fets <- function(y, h) {
forecast(ets(y),
h = h)
}
ets_tscv <- tsCV(qcement, fets, h=4)
sNaive_tscv <- tsCV(qcement, snaive, h= 4)
mse1 <- mean(ets_tscv^2,na.rm=T)
mse2 <- mean(sNaive_tscv^2,na.rm=T)
c115 <- c("ETS", "Seasonal Naive")
c116 <- c(mse1, mse2)
table13 <- data.frame(c115, c116)
colnames(table13) <- c("Model", "MSE")
kable(table13) %>% kable_classic(full_width = F)
| Model | MSE |
|---|---|
| ETS | 0.0125095 |
| Seasonal Naive | 0.0179215 |
Compare ets(), snaive() and stlf() on the following six time series. For stlf(), you might need to use a Box-Cox transformation. Use a test set of three years to decide what gives the best forecasts. ausbeer, bricksq, dole, a10, h02, usmelec.
## Ausbeer
autoplot(ausbeer)
ausbeer_train <- subset(ausbeer, end = c(199))
ausbeer_test <- subset(ausbeer, start = c(200))
ets_ausbeer_train <- forecast(ets(ausbeer_train), h = 12)
snaive_ausbeer_train <- snaive(ausbeer_train, h = 12)
stlf_ausbeer_train <- stlf(ausbeer_train, h = 12, lambda = BoxCox.lambda(ausbeer_train))
forecast::accuracy(ets_ausbeer_train, ausbeer_test) %>% kable(caption = "ETS") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | -0.4049581 | 15.997418 | 12.216762 | -0.0752874 | 2.922013 | 0.7672362 | -0.1925426 | NA |
| Test set | 3.6462120 | 7.706877 | 6.202539 | 0.8431747 | 1.449081 | 0.3895314 | 0.0981837 | 0.1549774 |
forecast::accuracy(snaive_ausbeer_train, ausbeer_test) %>% kable(caption = "Naive") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 3.328205 | 19.79109 | 15.92308 | 0.9155298 | 3.798192 | 1.0000000 | 0.0009618 | NA |
| Test set | 4.666667 | 18.71274 | 15.83333 | 0.7622802 | 3.637855 | 0.9943639 | 0.0603677 | 0.3495382 |
forecast::accuracy(stlf_ausbeer_train, ausbeer_test) %>% kable(caption = "STLF") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 0.5358377 | 13.85012 | 10.675853 | 0.1282484 | 2.556999 | 0.6704642 | -0.1600355 | NA |
| Test set | 2.6130090 | 8.22296 | 6.778516 | 0.6482497 | 1.575916 | 0.4257039 | 0.1040930 | 0.1614592 |
## bricksq
autoplot(bricksq)
bricksq_train <- subset(bricksq, end = c(143))
bricksq_test <- subset(bricksq, start = c(144))
ets_bricksq_train <- forecast(ets(bricksq_train), h = 12)
snaive_bricksq_train <- snaive(bricksq_train, h = 12)
stlf_bricksq_train <- stlf(bricksq_train, h = 12, lambda = BoxCox.lambda(bricksq_train))
forecast::accuracy(ets_bricksq_train, bricksq_test) %>% kable(caption = "ETS") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 1.228708 | 21.96146 | 15.81586 | 0.2606171 | 3.912882 | 0.4299638 | 0.2038074 | NA |
| Test set | 37.682916 | 42.36696 | 37.68292 | 8.4157549 | 8.415755 | 1.0244329 | 0.6190546 | 1.116608 |
forecast::accuracy(snaive_bricksq_train, bricksq_test) %>% kable(caption = "Naive") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 6.438849 | 50.25482 | 36.78417 | 1.430237 | 8.999949 | 1.0000000 | 0.8116983 | NA |
| Test set | 15.333333 | 37.15284 | 33.66667 | 3.231487 | 7.549326 | 0.9152487 | -0.0931487 | 0.9538702 |
forecast::accuracy(stlf_bricksq_train, bricksq_test) %>% kable(caption = "STLF") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 1.307145 | 20.65231 | 14.88304 | 0.3317424 | 3.630985 | 0.4046045 | 0.2143369 | NA |
| Test set | 39.841978 | 44.31471 | 40.65759 | 8.9561467 | 9.153632 | 1.1053013 | 0.5207064 | 1.152856 |
## dole
autoplot(dole)
dole_train <- subset(dole, end = c(427))
dole_test <- subset(dole, start = c(428))
ets_dole_train <- forecast(ets(dole_train), h = 12)
snaive_dole_train <- snaive(dole_train, h = 12)
stlf_dole_train <- stlf(dole_train, h = 12, lambda = BoxCox.lambda(dole_train))
forecast::accuracy(ets_dole_train, dole_test) %>% kable(caption = "ETS") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 419.4624 | 18184.20 | 10856.63 | 0.5111136 | 6.268750 | 0.3014767 | 0.5191139 | NA |
| Test set | 5143.1303 | 46330.29 | 40948.53 | 1.0361996 | 5.319934 | 1.1370959 | 0.8678915 | 2.792915 |
forecast::accuracy(snaive_dole_train, dole_test) %>% kable(caption = "Naive") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 16033.54 | 63643.2 | 36011.5 | 3.781301 | 27.34644 | 1.00000 | 0.9655291 | NA |
| Test set | 222180.50 | 224777.8 | 222180.5 | 28.712922 | 28.71292 | 6.16971 | 0.6869117 | 13.15477 |
forecast::accuracy(stlf_dole_train, dole_test) %>% kable(caption = "STLF") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 428.6905 | 6534.903 | 3816.433 | 0.2205941 | 3.619427 | 0.1059782 | -0.0390089 | NA |
| Test set | 12861.3268 | 29442.441 | 22340.560 | 1.5036966 | 2.719281 | 0.6203730 | 0.6992449 | 1.692465 |
## a10
autoplot(a10)
a10_train <- subset(a10, end = c(192))
a10_test <- subset(a10, start = c(193))
ets_a10_train <- forecast(ets(a10_train), h = 12)
snaive_a10_train <- snaive(a10_train, h = 12)
stlf_a10_train <- stlf(a10_train, h = 12, lambda = BoxCox.lambda(a10_train))
forecast::accuracy(ets_a10_train, a10_test) %>% kable(caption = "ETS") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 0.0814201 | 0.6834087 | 0.4666522 | 0.3611560 | 4.366536 | 0.4031628 | -0.1088647 | NA |
| Test set | 0.2374966 | 1.9938413 | 1.6968091 | 0.8537386 | 7.596603 | 1.4659532 | -0.3483479 | 0.6186824 |
forecast::accuracy(snaive_a10_train, a10_test) %>% kable(caption = "Naive") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 1.118474 | 1.475289 | 1.157478 | 10.85668 | 11.22859 | 1.000000 | 0.4420062 | NA |
| Test set | 2.899299 | 3.901300 | 3.362144 | 12.21499 | 14.68060 | 2.904715 | -0.1958294 | 1.191566 |
forecast::accuracy(stlf_a10_train, a10_test) %>% kable(caption = "STLF") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 0.0217968 | 0.5926686 | 0.406025 | -0.1409417 | 3.874051 | 0.3507841 | -0.0632069 | NA |
| Test set | 0.3720647 | 1.8798559 | 1.469918 | 1.2852494 | 6.785370 | 1.2699313 | -0.4319743 | 0.596385 |
## h02
autoplot(h02)
h02_train <- subset(h02, end = c(192))
h02_test <- subset(h02, start = c(193))
ets_h02_train <- forecast(ets(h02_train), h = 12)
snaive_h02_train <- snaive(h02_train, h = 12)
stlf_h02_train <- stlf(h02_train, h = 12, lambda = BoxCox.lambda(h02_train))
forecast::accuracy(ets_h02_train, h02_test) %>% kable(caption = "ETS") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 0.0031124 | 0.0489702 | 0.0372233 | 0.1184611 | 4.857064 | 0.6309385 | 0.0016808 | NA |
| Test set | 0.0710635 | 0.0978407 | 0.0871653 | 7.1647044 | 9.303321 | 1.4774620 | -0.3141474 | 0.7109327 |
forecast::accuracy(snaive_h02_train, h02_test)%>% kable(caption = "Naive") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 0.0313121 | 0.0713216 | 0.0589967 | 4.255045 | 7.910159 | 1.00000 | 0.3673519 | NA |
| Test set | 0.0528741 | 0.1080945 | 0.0849428 | 5.696945 | 9.686182 | 1.43979 | -0.4599591 | 0.9266397 |
forecast::accuracy(stlf_h02_train, h02_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.002432362 0.04132193 0.03191974 -0.03385613 4.167429 0.5410431
## Test set 0.069847415 0.09469091 0.08224503 7.14698241 8.911649 1.3940625
## ACF1 Theil's U
## Training set -0.2106805 NA
## Test set -0.4354237 0.7158866
##usmelec
autoplot(usmelec)
usmelec_train <- subset(usmelec, end = c(442))
usmelec_test <- subset(usmelec, start = c(443))
ets_usmelec_train <- forecast(ets(usmelec_train), h = 12)
snaive_usmelec_train <- snaive(usmelec_train, h = 12)
stlf_usmelec_train <- stlf(usmelec_train, h = 12, lambda = BoxCox.lambda(usmelec_train))
forecast::accuracy(ets_usmelec_train, usmelec_test)%>% kable(caption = "ETS") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | -0.0606942 | 7.306048 | 5.494686 | -0.0920879 | 2.140894 | 0.6139547 | 0.2570422 | NA |
| Test set | 12.8593581 | 16.595362 | 14.029488 | 3.5113260 | 3.900340 | 1.5676000 | 0.2517775 | 0.4468797 |
forecast::accuracy(snaive_usmelec_train, usmelec_test)%>% kable(caption = "Naive") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 4.867619 | 11.52392 | 8.949661 | 1.997519 | 3.512776 | 1.000000 | 0.4875339 | NA |
| Test set | 12.245000 | 18.66867 | 14.769667 | 3.239457 | 4.093555 | 1.650305 | 0.4328251 | 0.4711825 |
forecast::accuracy(stlf_usmelec_train, usmelec_test)%>% kable(caption = "STLF") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | -0.125221 | 6.220615 | 4.676775 | -0.0459582 | 1.828004 | 0.5225646 | 0.0387364 | NA |
| Test set | 11.688806 | 15.885954 | 13.471455 | 3.1653927 | 3.756877 | 1.5052476 | 0.2738507 | 0.4305592 |
ets_model <- function(data) {
data %>% ets -> fit_ets
print(fit_ets)
print(fit_ets %>% autoplot)
print(fit_ets %>% forecast %>% autoplot)
}
ets_model(bicoal)
## ETS(M,N,N)
##
## Call:
## ets(y = .)
##
## Smoothing parameters:
## alpha = 0.8205
##
## Initial states:
## l = 542.665
##
## sigma: 0.1262
##
## AIC AICc BIC
## 595.2499 595.7832 600.9253
ets_model(chicken)
## ETS(M,N,N)
##
## Call:
## ets(y = .)
##
## Smoothing parameters:
## alpha = 0.98
##
## Initial states:
## l = 159.8322
##
## sigma: 0.1691
##
## AIC AICc BIC
## 635.2382 635.6018 641.9836
ets_model(dole)
## ETS(M,Ad,M)
##
## Call:
## ets(y = .)
##
## Smoothing parameters:
## alpha = 0.697
## beta = 0.124
## gamma = 0.303
## phi = 0.902
##
## Initial states:
## l = 2708.6621
## b = 836.017
## s = 1.0404 0.8893 0.9103 1.0301 1.0576 1.0584
## 0.9801 0.9632 1.021 0.9838 1.0145 1.0514
##
## sigma: 0.0935
##
## AIC AICc BIC
## 10602.67 10604.30 10676.19
ets_model(usdeaths)
## ETS(A,N,A)
##
## Call:
## ets(y = .)
##
## Smoothing parameters:
## alpha = 0.5972
## gamma = 0.0019
##
## Initial states:
## l = 9195.6403
## s = -62.6129 -270.0351 263.3823 -89.4907 1005.529 1662.647
## 795.2585 333.326 -551.161 -737.5102 -1552.872 -796.4611
##
## sigma: 294.4663
##
## AIC AICc BIC
## 1141.016 1149.587 1175.166
ets_model(bricksq)
## ETS(M,A,M)
##
## Call:
## ets(y = .)
##
## Smoothing parameters:
## alpha = 0.973
## beta = 0.0032
## gamma = 1e-04
##
## Initial states:
## l = 196.0437
## b = 4.9839
## s = 1.0041 1.0598 1.0269 0.9093
##
## sigma: 0.0514
##
## AIC AICc BIC
## 1725.473 1726.714 1752.864
ets_model(lynx)
## ETS(M,N,N)
##
## Call:
## ets(y = .)
##
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l = 2372.8047
##
## sigma: 0.9594
##
## AIC AICc BIC
## 2058.138 2058.356 2066.346
ets_model(ibmclose)
## ETS(A,N,N)
##
## Call:
## ets(y = .)
##
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l = 459.9339
##
## sigma: 7.2637
##
## AIC AICc BIC
## 3648.450 3648.515 3660.182
ets_model(eggs)
## ETS(M,N,N)
##
## Call:
## ets(y = .)
##
## Smoothing parameters:
## alpha = 0.8198
##
## Initial states:
## l = 278.8889
##
## sigma: 0.1355
##
## AIC AICc BIC
## 1043.286 1043.553 1050.916
Show that the point forecasts from an ETS(M,A,M) model are the same as those obtained using Holt-Winters’ multiplicative method.
bricksqsets <- ets(bricksq,model = "MAM")
bricksqsets_fc <- forecast(bricksqsets, h=1)
bricksqhw_fc <- hw(bricksq, seasonal = 'multiplicative', h=1)
bricksqsets_fc$mean
## Qtr4
## 1994 471.545
bricksqhw_fc$mean
## Qtr4
## 1994 470.7108
Show that the forecast variance for an ETS(A,N,N) model is given by σ2[1+α2(h−1)].
usmelec_ets <- ets(usmelec, model = "ANN")
usmelec_ets_fc <- forecast(usmelec_ets,h=1)
usmelec_ets_fc %>% kable() %>% kable_classic(full_width=F)
| Point Forecast | Lo 80 | Hi 80 | Lo 95 | Hi 95 | |
|---|---|---|---|---|---|
| Jul 2013 | 356.3966 | 325.7519 | 387.0413 | 309.5295 | 403.2636 |
alpha <- usmelec_ets$par[1]
sigma <- sqrt(usmelec_ets$sigma2)
h <- 2
a1 <- usmelec_ets_fc$mean[1]-((sigma)*(1+(alpha^2)*(h-1)))
a2 <-usmelec_ets_fc$lower[1,'95%']
table16 <- data.frame(c("Equation", "R"), c(a1, a2))
colnames(table16) <- c("Method", "Value")
table16 %>%kable() %>%kable_classic(full_width=F)
| Method | Value | |
|---|---|---|
| alpha | Equation | 308.5770 |
| 95% | R | 309.5295 |
Write down 95% prediction intervals for an ETS(A,N,N) model as a function of ℓT, α, h and σ, assuming normally distributed errors.
\(Upper = \ell_{T} + (1-\alpha)*\ell_{t-1} + (\sigma^{2}[1+\alpha^{2}*(h-1)])\)
\(Lower = \ell_{T} + (1-\alpha)*\ell_{t-1} - (\sigma^{2}[1+\alpha^{2}*(h-1)])\)
Figure 8.31 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers.
The figures show white noise, since all the ACF bars fall within the critical values for the ACF to have statistical significance (dashed lines).
Since this is a randomly generated sample of numbers, the value of the critical value becomes smaller as the number of observations grows larger. Intuitively, a smaller data sets is more likely to exhibit “random” correlation by chance than a larger data set.
A classic example of a non-stationary series is the daily closing IBM stock price series (data set ibmclose). Use R to plot the daily closing prices for IBM stock and the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced. A nonstationary time series will have an ACF plot that decreases slowly and have values outside of the 95% limit
autoplot(ibmclose) + ggtitle("Daily IBM Stock Closing Price") + theme_classic()
ggAcf(ibmclose) + ggtitle("ACF Plot: IBM Close") + theme_classic()
ggPacf(ibmclose) + ggtitle("PACF Plot: IBM Close") + theme_classic()
The ACF plot values are not random and are larger than the critical values, suggesting correlation.
For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.
a.usnetelec
autoplot(usnetelec)+ theme_classic()
bc_usnetelec <- BoxCox(usnetelec, BoxCox.lambda(usnetelec))
summary(ur.kpss(bc_usnetelec))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 3 lags.
##
## Value of test-statistic is: 1.4583
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
# Data is not stationary.
diff(bc_usnetelec) %>% ur.kpss %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 3 lags.
##
## Value of test-statistic is: 0.4315
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
ggAcf(diff(bc_usnetelec))
ggPacf(diff(bc_usnetelec))
autoplot(usgdp)+ theme_classic()
bc_usgdp <- BoxCox(usgdp, BoxCox.lambda(usgdp))
summary(ur.kpss(bc_usgdp))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 4.8114
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
diff(bc_usgdp, differences = 2) %>% ur.kpss %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 0.012
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
ggAcf(diff(bc_usgdp, differences = 2))
ggPacf(diff(bc_usgdp, differences = 2))
autoplot(mcopper)+ theme_classic()
bc_mcopper <- BoxCox(mcopper, BoxCox.lambda(mcopper))
summary(ur.kpss(bc_mcopper))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 6 lags.
##
## Value of test-statistic is: 6.2659
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
diff(bc_mcopper) %>% ur.kpss %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 6 lags.
##
## Value of test-statistic is: 0.0573
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
ggAcf(diff(bc_mcopper))
ggPacf(diff(bc_mcopper))
d. enplanements
autoplot(enplanements)+ theme_classic()
bc_enplanements <- BoxCox(enplanements, BoxCox.lambda(enplanements))
summary(ur.kpss(bc_enplanements))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 4.3785
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
diff(bc_enplanements) %>% ur.kpss %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0151
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
ggAcf(diff(bc_enplanements))
ggPacf(diff(bc_enplanements))
autoplot(visitors)+ theme_classic()
bc_visitors <- BoxCox(visitors, BoxCox.lambda(visitors))
summary(ur.kpss(bc_visitors))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 4.5233
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
diff(bc_visitors) %>% ur.kpss %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 0.0519
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
ggAcf(diff(bc_visitors))
ggPacf(diff(bc_visitors))
For your retail data (from Exercise 3 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.
autoplot(tsretaildata) + theme_classic()
bc_tsretaildata <- BoxCox(tsretaildata, BoxCox.lambda(tsretaildata))
summary(ur.kpss(bc_tsretaildata))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 5.8234
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
diff(bc_tsretaildata) %>% ur.kpss %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0198
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
ggAcf(diff(bc_tsretaildata))
ggPacf(diff(bc_tsretaildata))
Use R to simulate and plot some data from simple ARIMA models.
y <- ts(numeric(100))
e <- rnorm(100)
AR1Model <- function(phi, y, e){
for (i in 2:100){
y[i] <- phi*y[i-1]+e[i]
}
return(y)
}
ar1series <- AR1Model(0.6, y, e)
autoplot(ar1series)
ar1series2 <- AR1Model(0.1, y, e)
ar1series3 <- AR1Model(0.5, y, e)
ar1series4 <- AR1Model(0.75, y, e)
ar1series5 <- AR1Model(0.9, y, e)
autoplot(ar1series, series = "0.6") + autolayer(ar1series2, series = "0.1") + autolayer(ar1series3, series = "0.5") + autolayer(ar1series4, series = "0.75") + autolayer(ar1series5, series = "0.9") +
ggtitle("Time Series with Different Phi Values ") + theme_classic()
It seems variance increases with \(\Phi\)
Write your own code to generate data from an MA(1) model with θ1=0.6 and σ2=1.
Produce a time plot for the series. How does the plot change as you change θ1?
ma1model <- function(theta, sd=1, n=100){
y <- ts(numeric(n))
e <- rnorm(n, sd=sd)
e[1] <- 0
for(i in 2:n)
y[i] <- theta*e[i-1] + e[i]
return(y)
}
ma1model1 <- ma1model(0.6)
ma1model2 <- ma1model(0.2)
ma1model3 <- ma1model(0.4)
ma1model4 <- ma1model(0.9)
autoplot(ma1model1, series = "0.6") + autolayer(ma1model2, series = "0.2") +
autolayer(ma1model3, series = "0.4") + autolayer(ma1model4, series = "0.9") +
ggtitle("Time Series with Different Theta Values") + theme_classic()
Generate data from an ARMA(1,1) model with ϕ1=0.6, θ1=0.6 and σ2=1.
Generate data from an AR(2) model with ϕ1=−0.8, ϕ2=0.3 and σ2=1. (Note that these parameters will give a non-stationary series.)
Graph the latter two series and compare them.
arma1model <- function(theta, phi, n=100){
y <- ts(numeric(n))
e <- rnorm(n)
e[1]<-0
for(i in 2:n)
y[i] <- phi*y[i-1]+theta*e[i-1] + e[i]
return(y)
}
arma1 <- arma1model(0.6, 0.6)
arma2model <- function( phi1, phi2, n=100){
y <- ts(numeric(n))
e <- rnorm(n)
e[1] <- 0
for(i in 3:n)
y[i] <- phi1*y[i-1]+phi2*e[i-2] + e[i]
return(y)
}
arma2 <- arma2model(-0.8, 0.3)
autoplot(arma1, series = "ARMA(1,1)") + autolayer(arma2, series = "AR2") + ggtitle("AR Models") + theme_classic()
ARMA (1,1) seems like a “better” series, given that AR(2) has high volatility and variation.
autoplot(wmurders) + ggtitle("US: Number of women murdered each year (per 100,000 standard population)") + theme_classic()
bc_wmurders <- BoxCox(wmurders, BoxCox.lambda(wmurders))
summary(ur.kpss(wmurders))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 3 lags.
##
## Value of test-statistic is: 0.6331
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
diff(wmurders) %>% ur.kpss %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 3 lags.
##
## Value of test-statistic is: 0.4697
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
ggAcf(diff(wmurders))
ggPacf(diff(wmurders))
auto.arima(bc_wmurders)
## Series: bc_wmurders
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## -0.3006 -0.786
## s.e. 0.1529 0.119
##
## sigma^2 estimated as 0.002851: log likelihood=80.37
## AIC=-154.74 AICc=-154.25 BIC=-148.83
No. Residuals average to ~0, and adding a constant term might add drift to the series.
checkresiduals(auto.arima(bc_wmurders))
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,2,1)
## Q* = 10.378, df = 8, p-value = 0.2395
##
## Model df: 2. Total lags used: 10
Forecast three times ahead. Check your forecasts by hand to make sure that you know how they have been calculated.
Create a plot of the series with forecasts and prediction intervals for the next three periods shown.
Does auto.arima() give the same model you have chosen? If not, which model do you think is better?
forecastbcm <- forecast(auto.arima(bc_wmurders), 3)
forecastbcm
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005 0.8727642 0.8043418 0.9411866 0.7681212 0.9774072
## 2006 0.8394207 0.7467510 0.9320904 0.6976947 0.9811468
## 2007 0.8050389 0.6833171 0.9267607 0.6188814 0.9911964
autoplot(forecastbcm)
Auto Arima selected the same model (1,2,1)
Consider austa, the total international visitors to Australia (in millions) for the period 1980-2015.
autoplot(austa)+ theme_classic()
autoarima1 <- auto.arima(austa)
checkresiduals(autoarima1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1) with drift
## Q* = 2.297, df = 5, p-value = 0.8067
##
## Model df: 2. Total lags used: 7
forecastausta <- forecast(autoarima1, 10)
autoplot(forecastausta)+ theme_classic()
b.Plot forecasts from an ARIMA(0,1,1) model with no drift and compare these to part a. Remove the MA term and plot again.
arima2 <- Arima(austa, order = c(0,1,0), include.drift = F)
arima2
## Series: austa
## ARIMA(0,1,0)
##
## sigma^2 estimated as 0.06423: log likelihood=-1.62
## AIC=5.24 AICc=5.36 BIC=6.8
autoplot(forecast(arima2, 10)) + theme_classic()
arima3 <- Arima(austa, order = c(2,1,3), include.drift = T)
arima3
## Series: austa
## ARIMA(2,1,3) with drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 drift
## 1.7648 -0.8513 -1.7684 0.5571 0.2224 0.1725
## s.e. 0.1136 0.1182 0.2433 0.4351 0.2150 0.0051
##
## sigma^2 estimated as 0.0276: log likelihood=13.72
## AIC=-13.44 AICc=-9.29 BIC=-2.55
autoplot(forecast(arima3, 10)) + theme_classic()
arima4 <- Arima(austa, order = c(2,1,0), include.drift = T)
arima4
## Series: austa
## ARIMA(2,1,0) with drift
##
## Coefficients:
## ar1 ar2 drift
## 0.2910 -0.1373 0.1726
## s.e. 0.1717 0.1768 0.0356
##
## sigma^2 estimated as 0.03462: log likelihood=10.71
## AIC=-13.42 AICc=-12.09 BIC=-7.2
autoplot(forecast(arima4, 10)) + theme_classic() + ggtitle("Forecasts from ARIMA(2,1,0) Without Constant")
autoplot(forecast(arima(austa, c(0,0,1))),h=10)
autoplot(forecast(arima(austa, c(0,0,0))),h=10)
fcarima <- forecast(Arima(austa, order = c(0,2,1)))
autoplot(fcarima, h=10)
For the usgdp series
autoplot(usgdp) + ggtitle("US GDP") + theme_classic()
bc_usgdp <- BoxCox(usgdp, BoxCox.lambda(usgdp))
autoplot(bc_usgdp) + ggtitle("Box Cox: US GDP") + theme_classic()
gdparima <- auto.arima(usgdp, lambda = BoxCox.lambda(usgdp))
summary(gdparima)
## Series: usgdp
## ARIMA(2,1,0) with drift
## Box Cox transformation: lambda= 0.366352
##
## Coefficients:
## ar1 ar2 drift
## 0.2795 0.1208 0.1829
## s.e. 0.0647 0.0648 0.0202
##
## sigma^2 estimated as 0.03518: log likelihood=61.56
## AIC=-115.11 AICc=-114.94 BIC=-101.26
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.195275 39.2224 29.29521 -0.01363259 0.6863491 0.1655687
## ACF1
## Training set -0.03824844
checkresiduals(gdparima)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0) with drift
## Q* = 6.5772, df = 5, p-value = 0.254
##
## Model df: 3. Total lags used: 8
gdparima2 <- arima(usgdp, order = c(1,1,0))
summary(gdparima2)
##
## Call:
## arima(x = usgdp, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## 0.6847
## s.e. 0.0481
##
## sigma^2 estimated as 2068: log likelihood = -1236.04, aic = 2476.09
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 13.54033 45.38063 34.92976 0.2725113 0.7815987 0.7001914
## ACF1
## Training set -0.3735535
checkresiduals(gdparima2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)
## Q* = 50.316, df = 7, p-value = 1.252e-08
##
## Model df: 1. Total lags used: 8
gdparima3 <- arima(usgdp, order = c(0,1,0))
summary(gdparima3)
##
## Call:
## arima(x = usgdp, order = c(0, 1, 0))
##
##
## sigma^2 estimated as 3832: log likelihood = -1308.5, aic = 2619.01
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 41.4965 61.77259 49.68215 0.8286231 1.04986 0.9959134 0.4070805
checkresiduals(gdparima3)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 108.19, df = 8, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 8
gdparima4 <- arima(usgdp, order = c(1,1,1))
summary(gdparima4)
##
## Call:
## arima(x = usgdp, order = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1
## 0.9624 -0.6426
## s.e. 0.0256 0.0871
##
## sigma^2 estimated as 1717: log likelihood = -1214.39, aic = 2434.79
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 5.498932 41.35172 31.44568 0.1166333 0.7380672 0.6303506
## ACF1
## Training set 0.01309587
checkresiduals(gdparima4)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)
## Q* = 17.319, df = 6, p-value = 0.008179
##
## Model df: 2. Total lags used: 8
ARIMA (2,1,0) seems like the best. See above for results.
arimagdp_fc <- forecast(gdparima, h=10)
autoplot(arimagdp_fc)
Results are reasonable.
ets_usgdp <- ets(usgdp)
checkresiduals(ets_usgdp)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,A,N)
## Q* = 21.461, df = 4, p-value = 0.0002565
##
## Model df: 4. Total lags used: 8
etsgdp_fc <- forecast(ets_usgdp, h=10)
autoplot(etsgdp_fc)
forecast::accuracy(usgdp,gdparima$fitted) %>% kable(caption = "Arima Accuracy") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|
| Test set | -1.195275 | 39.2224 | 29.29521 | 0.0050345 | 0.6858867 | -0.0382484 | 0.6305061 |
forecast::accuracy(usgdp,ets_usgdp$fitted) %>% kable(caption = "ETS Accuracy") %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|
| Test set | -1.30185 | 41.53448 | 31.85324 | -0.033319 | 0.7555654 | 0.0759649 | 0.6758224 |
Arima model is better.
Consider austourists, the quarterly visitor nights (in millions) spent by international tourists to Australia for the period 1999–2015.
autoplot(austourists) + theme_classic()
Clear and strong seasonality, with an upward trend.
What can you learn from the ACF graph?
What can you learn from the PACF graph?
tsdisplay(austourists)
From the ACF chart we can see a strong seasonal component every 4 lags. From the PACF we see strong correlations for the first 4 lags. Since it’s quarterly data, we can see it’s dependent on the previous year.
autoplot(diff(austourists, lag=4, differences=1))
tsdisplay(diff(austourists, lag=4, differences=1))
auto.arima(austourists)
## Series: austourists
## ARIMA(1,0,0)(1,1,0)[4] with drift
##
## Coefficients:
## ar1 sar1 drift
## 0.4705 -0.5305 0.5489
## s.e. 0.1154 0.1122 0.0864
##
## sigma^2 estimated as 5.15: log likelihood=-142.48
## AIC=292.97 AICc=293.65 BIC=301.6
It’s the same model as part d.
Consider usmelec, the total net generation of electricity (in billion kilowatt hours) by the U.S. electric industry (monthly for the period January 1973 – June 2013). In general there are two peaks per year: in mid-summer and mid-winter.
maelec <- ma(usmelec, order = 12)
autoplot(usmelec, series = "USMELEC") + autolayer(maelec, series = "USMELEC, 12MMA") + theme_classic()
bc_usmelec <- BoxCox(usmelec, BoxCox.lambda(usmelec))
autoplot(bc_usmelec)
ggtsdisplay(bc_usmelec)
Data is not stationary. Diff of 12 might be a good option.
bc_usmelec_diff <- diff(diff(bc_usmelec, lag=12))
ggtsdisplay(bc_usmelec_diff)
arima_elec <- auto.arima(bc_usmelec_diff)
arima_elec
## Series: bc_usmelec_diff
## ARIMA(1,0,1)(2,0,1)[12] with non-zero mean
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1 mean
## 0.3994 -0.8398 0.0434 -0.0947 -0.8560 0e+00
## s.e. 0.0630 0.0374 0.0586 0.0550 0.0362 1e-04
##
## sigma^2 estimated as 1.194e-06: log likelihood=2548.5
## AIC=-5083 AICc=-5082.76 BIC=-5053.89
arima_elec1 <- arima(bc_usmelec_diff, order = c(1,1,1), seasonal = c(0,1,0))
arima_elec1
##
## Call:
## arima(x = bc_usmelec_diff, order = c(1, 1, 1), seasonal = c(0, 1, 0))
##
## Coefficients:
## ar1 ma1
## -0.3196 -1.0000
## s.e. 0.0442 0.0055
##
## sigma^2 estimated as 6.573e-06: log likelihood = 2088.36, aic = -4170.73
arima_elec2 <- arima(bc_usmelec_diff, order = c(1,1,1), seasonal = c(0,1,1))
arima_elec2
##
## Call:
## arima(x = bc_usmelec_diff, order = c(1, 1, 1), seasonal = c(0, 1, 1))
##
## Coefficients:
## ar1 ma1 sma1
## -0.3056 -1.0000 -1.0000
## s.e. 0.0444 0.0085 0.0197
##
## sigma^2 estimated as 2.357e-06: log likelihood = 2299.75, aic = -4591.5
arima_elec3 <- arima(bc_usmelec_diff, order = c(1,1,2), seasonal = c(0,1,0))
arima_elec3
##
## Call:
## arima(x = bc_usmelec_diff, order = c(1, 1, 2), seasonal = c(0, 1, 0))
##
## Coefficients:
## ar1 ma1 ma2
## 0.5062 -1.9845 0.9847
## s.e. 0.0453 0.0165 0.0164
##
## sigma^2 estimated as 5.487e-06: log likelihood = 2126.68, aic = -4245.37
arima_elec4 <- arima(bc_usmelec_diff, order = c(1,1,2), seasonal = c(0,1,1))
arima_elec4
##
## Call:
## arima(x = bc_usmelec_diff, order = c(1, 1, 2), seasonal = c(0, 1, 1))
##
## Coefficients:
## ar1 ma1 ma2 sma1
## 0.3292 -1.8225 0.8226 -0.9999
## s.e. 0.0859 0.0635 0.0631 0.0197
##
## sigma^2 estimated as 2.039e-06: log likelihood = 2331.75, aic = -4653.51
c8111 <- c("Auto Arima (1,0,1)(2,0,1)", "Arima (1,1,1)(0,0,1)",
"Arima (1,1,1)(0,1,1)","Arima (1,1,2)(0,1,0)","Arima (1,1,2)(0,1,1)")
c8112 <- c(arima_elec$aic, arima_elec1$aic, arima_elec2$aic, arima_elec3$aic, arima_elec4$aic)
c8113 <- c(arima_elec$aicc, arima_elec1$aicc, arima_elec2$aicc, arima_elec3$aicc, arima_elec4$aicc)
c8114 <- c(arima_elec$bic, arima_elec1$bic, arima_elec2$bic, arima_elec3$bic, arima_elec4$bic)
table811 <- data.frame(c8111, c8112, c8113, c8114)
colnames(table811) <- c("Model", "AIC", "AICC", "BIC")
kable(table811) %>% kable_classic()
| Model | AIC | AICC | BIC |
|---|---|---|---|
| Auto Arima (1,0,1)(2,0,1) | -5082.999 | -5082.758 | -5053.885 |
| Arima (1,1,1)(0,0,1) | -4170.729 | -5082.758 | -5053.885 |
| Arima (1,1,1)(0,1,1) | -4591.498 | -5082.758 | -5053.885 |
| Arima (1,1,2)(0,1,0) | -4245.369 | -5082.758 | -5053.885 |
| Arima (1,1,2)(0,1,1) | -4653.509 | -5082.758 | -5053.885 |
Auto Arima has the best AIC value.
summary(arima_elec)
## Series: bc_usmelec_diff
## ARIMA(1,0,1)(2,0,1)[12] with non-zero mean
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1 mean
## 0.3994 -0.8398 0.0434 -0.0947 -0.8560 0e+00
## s.e. 0.0630 0.0374 0.0586 0.0550 0.0362 1e-04
##
## sigma^2 estimated as 1.194e-06: log likelihood=2548.5
## AIC=-5083 AICc=-5082.76 BIC=-5053.89
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.677983e-05 0.001085716 0.0008355439 29.61005 152.7566 0.4004158
## ACF1
## Training set 0.008433402
checkresiduals(arima_elec)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,1)(2,0,1)[12] with non-zero mean
## Q* = 32.631, df = 18, p-value = 0.01849
##
## Model df: 6. Total lags used: 24
arima_elec_fc <- forecast(arima_elec, h=180)
autoplot(arima_elec_fc)
A model of this type is probably valuable in the short term, around a year or so.
For the mcopper data:
a.if necessary, find a suitable Box-Cox transformation for the data;
d.choose what you think is the best model and check the residual diagnostics;
autoplot(mcopper)
bc_mcopper <- BoxCox(mcopper, BoxCox.lambda(mcopper))
autoplot(bc_mcopper)
ggtsdisplay(bc_mcopper)
arima_mcopper <- auto.arima(bc_mcopper)
summary(arima_mcopper)
## Series: bc_mcopper
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## 0.3720
## s.e. 0.0388
##
## sigma^2 estimated as 0.04997: log likelihood=45.05
## AIC=-86.1 AICc=-86.08 BIC=-77.43
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01254827 0.2231365 0.1592271 0.08049384 1.140225 0.1997301
## ACF1
## Training set -0.004184621
checkresiduals(arima_mcopper)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)
## Q* = 22.913, df = 23, p-value = 0.4659
##
## Model df: 1. Total lags used: 24
arima_mcopper$aic
## [1] -86.0969
e.produce forecasts of your fitted model. Do the forecasts look reasonable?
mcopper_fc <- forecast(arima_mcopper, h = 60)
autoplot(mcopper_fc) + autolayer(mcopper_fc$fitted, series = "Fitted")
forecast::accuracy(mcopper_fc) %>% kable() %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | |
|---|---|---|---|---|---|---|---|
| Training set | 0.0125483 | 0.2231365 | 0.1592271 | 0.0804938 | 1.140225 | 0.1997301 | -0.0041846 |
mcopper_ets <- ets(bc_mcopper)
checkresiduals(mcopper_ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,N)
## Q* = 80.996, df = 19, p-value = 1.252e-09
##
## Model df: 5. Total lags used: 24
autoplot(forecast(mcopper_ets, h = 60))
forecast::accuracy(forecast(mcopper_ets, h = 60)) %>% kable() %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | |
|---|---|---|---|---|---|---|---|
| Training set | 0.008893 | 0.2330528 | 0.1662713 | 0.0639587 | 1.19264 | 0.2085661 | 0.1508258 |
Choose one of the following seasonal time series: hsales, auscafe, qauselec, qcement, qgas.
Do the data need transforming? If so, find a suitable transformation.
Are the data stationary? If not, find an appropriate differencing which yields stationary data. The data is not stationary
autoplot(qgas)
ggtsdisplay(qgas)
bc_qgas <- BoxCox(qgas, BoxCox.lambda(qgas))
diff_bcqgas <-diff(diff(bc_qgas, lag = 4))
ggtsdisplay(diff_bcqgas)
Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AIC values?
Estimate the parameters of your best model and do diagnostic testing on the residuals. Do the residuals resemble white noise? If not, try to find another ARIMA model which fits better. Residuals look good enough
arima_qgas <- auto.arima(diff_bcqgas)
summary(arima_qgas)
## Series: diff_bcqgas
## ARIMA(0,0,0)(0,0,1)[4] with zero mean
##
## Coefficients:
## sma1
## -0.694
## s.e. 0.069
##
## sigma^2 estimated as 0.00464: log likelihood=269.17
## AIC=-534.35 AICc=-534.29 BIC=-527.62
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -8.92166e-05 0.06796024 0.05006815 89.24758 149.051 0.4630326
## ACF1
## Training set -0.02559135
qgas_fc <- forecast(arima_qgas, h=12)
autoplot(qgas_fc)
For the same time series you used in the previous exercise, try using a non-seasonal model applied to the seasonally adjusted data obtained from STL. The stlf() function will make the calculations easy (with method=“arima”). Compare the forecasts with those obtained in the previous exercise. Which do you think is the best approach?
qgas_stl <- stl(qgas, t.window = 13, s.window="period")
qgas_sa <- seasadj(qgas_stl)
qgas_stl_fc <- stlf(qgas_sa, method = "arima")
qgas_fc1 <- forecast(qgas_stl_fc, 13)
autoplot(qgas_fc1)
checkresiduals(qgas_stl_fc)
##
## Ljung-Box test
##
## data: Residuals from STL + ARIMA(2,1,2) with drift
## Q* = 0.81919, df = 3, p-value = 0.8449
##
## Model df: 5. Total lags used: 8
For your retail time series (Exercise 5 above)
develop an appropriate seasonal ARIMA model; compare the forecasts with those you obtained in earlier chapters;
compare the forecasts with those you obtained in earlier chapters; The model I used in problem 5 is also seasonal, though it does not have the (1,0,1) ARIMA, but a (1,0,0), so it is slightly worse
Obtain up-to-date retail data from the ABS website (Cat 8501.0, Table 11), and compare your forecasts with the actual numbers. How good were the forecasts from the various models?
retail_new <- read_csv("F:/MSAE/2020fall_PredictiveAnalytics/retailcsv.csv")
retail_test <- ts(retail_new,frequency=12, start=c(1982,4))
retail_train1 <- window(retail_test, end = c(2019, 9))
arimaretail <- auto.arima(retail_train1)
summary(arimaretail)
## Series: retail_train1
## ARIMA(0,1,2)(0,1,1)[12]
##
## Coefficients:
## ma1 ma2 sma1
## -0.3550 -0.1807 -0.3976
## s.e. 0.0473 0.0481 0.0452
##
## sigma^2 estimated as 233: log likelihood=-1810.77
## AIC=3629.54 AICc=3629.63 BIC=3645.86
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1267375 14.98961 10.07035 -0.09731141 3.755033 0.4741941
## ACF1
## Training set 0.003221027
arimaretail_fc <- forecast(arimaretail, h=12)
forecast::accuracy(arimaretail_fc, retail_test) %>% kable() %>% kable_classic()
| ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|---|
| Training set | 0.1267375 | 14.98961 | 10.07035 | -0.0973114 | 3.755033 | 0.4741941 | 0.0032210 | NA |
| Test set | 20.5024039 | 29.70698 | 24.03942 | 3.2713101 | 3.865004 | 1.1319722 | 0.3780508 | 0.4663265 |
autoplot(sheep)
ARIMA (3, 1, 0)
ggtsdisplay(diff(sheep))
Now fit the model in R and obtain the forecasts using forecast. How are they different from yours? Why?
sheepval <- c(1648,1665,1627,1791,1797)
phi1 <- 0.42
phi2 <- -0.2
phi3 <- -0.3
sheep1940 = 1797 + phi1*(1797 - 1791) + phi2*(1791 - 1627) + phi3*(1627 - 1665)
sheep1941 = sheep1940 + phi1*(sheep1940 - 1797) + phi2*(1797 - 1791) + phi3*(1791 - 1627)
sheep1942 = sheep1941 + phi1*(sheep1941 - sheep1940) + phi2*(sheep1940 - 1797) + phi3*(1797 - 1791)
sheepnew <- c(sheep1940, sheep1941, sheep1942)
sheepnew %>% kable(caption = "Calculated Forecast") %>%
kable_classic(full_width = F)
| x |
|---|
| 1778.120 |
| 1719.790 |
| 1697.268 |
sheep_arima_fc <- forecast(arima(sheep, order = c(3,1,0)), h=3)
sheep_arima_fc$mean %>% kable(caption = "R Forecasts") %>%
kable_classic(full_width = F)
| x |
|---|
| 1777.996 |
| 1718.869 |
| 1695.985 |
(sheep_arima_fc$mean - sheepnew) %>% kable(caption = "Differences in Forecasts") %>%
kable_classic(full_width = F)
| x |
|---|
| -0.1242328 |
| -0.9218464 |
| -1.2829667 |
They are only slightly different, and they are likely to increase as the coefficients are slightly different, whcih magnifies differences.
Annual bituminous coal production in the United States from 1920 to 1968 is in data set bicoal. a. Plot the annual bituminous coal production in the United States from 1920 to 1968 (data set bicoal).
autoplot(bicoal)
ARIMA(4, 0, 0)
ggtsdisplay(bicoal)
The last five values of the series are given below.
Now fit the model in R and obtain the forecasts from the same model. How are they different from yours? Why?
coalval <- c(467,512,534,552,545)
c <- 162
phi1 <- 0.83
phi2 <- -0.34
phi3 <- 0.55
phi4 <- -0.38
coal1969 = c + phi1*545 + phi2*552 + phi3*534 + phi4*512
coal1970 <- c + phi1*coal1969 + phi2*545 + phi3*552 + phi4*534
coal1971 = c + phi1*coal1970 + phi2*coal1969 + phi3*545 + phi4*552
coalnew <- c(coal1969, coal1970, coal1971)
coalnew %>% kable(caption = "Calculated Forecast") %>%
kable_classic(full_width = F)
| x |
|---|
| 525.8100 |
| 513.8023 |
| 499.6705 |
coal_arima_fc <- forecast(arima(bicoal, order = c(4,0,0)), h=3)
coal_arima_fc$mean %>% kable(caption = "R Forecasts") %>%
kable_classic(full_width = F)
| x |
|---|
| 527.6291 |
| 517.1923 |
| 503.8051 |
(coal_arima_fc$mean - coalnew) %>% kable(caption = "Differences in Forecasts") %>%
kable_classic(full_width = F)
| x |
|---|
| 1.819145 |
| 3.390011 |
| 4.134563 |
Before doing this exercise, you will need to install the Quandl package in R using install.packages(“Quandl”)
Select a time series from Quandl. Then copy its short URL and import the data using y <- Quandl(“?????”, api_key=“?????”, type=“ts”) (Replace each ????? with the appropriate values.)
Plot graphs of the data, and try to identify an appropriate ARIMA model.
library(Quandl)
Quandl.api_key('skpzv9x3Vztpi9qqbFy3')
zillow <- Quandl.datatable('ZILLOW/DATA', indicator_id='ZSFH', region_id='99999')
zillow <- zillow[order(zillow$date),]
tszillow <- ts(zillow[,"value"], start = c(2005,1), frequency = 12)
autoplot(tszillow) + ggtitle("Zillow: Single Family Home Value") + theme_classic()
ggtsdisplay((tszillow))
arimazillow <- auto.arima((tszillow))
summary(arimazillow)
## Series: (tszillow)
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 953.5426
## s.e. 200.8497
##
## sigma^2 estimated as 7624731: log likelihood=-1755.87
## AIC=3515.73 AICc=3515.8 BIC=3522.21
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.8679067 2746.643 1329.621 -0.01569936 0.5367952 0.08939714
## ACF1
## Training set 0.03062656
checkresiduals(arimazillow)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0) with drift
## Q* = 19.359, df = 23, p-value = 0.6802
##
## Model df: 1. Total lags used: 24
forecast(arimazillow, h=48)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Oct 2020 345207.5 341668.8 348746.3 339795.5 350619.6
## Nov 2020 346161.1 341156.6 351165.6 338507.3 353814.9
## Dec 2020 347114.6 340985.4 353243.9 337740.7 356488.5
## Jan 2021 348068.2 340990.7 355145.6 337244.1 358892.2
## Feb 2021 349021.7 341108.9 356934.6 336920.0 361123.4
## Mar 2021 349975.3 341307.2 358643.4 336718.5 363232.0
## Apr 2021 350928.8 341566.2 360291.4 336609.9 365247.7
## May 2021 351882.3 341873.3 361891.4 336574.8 367189.9
## Jun 2021 352835.9 342219.7 363452.1 336599.8 369072.0
## Jul 2021 353789.4 342599.0 364979.9 336675.1 370903.8
## Aug 2021 354743.0 343006.3 366479.6 336793.3 372692.6
## Sep 2021 355696.5 343438.0 367955.1 336948.7 374444.3
## Oct 2021 356650.1 343891.0 369409.2 337136.7 376163.4
## Nov 2021 357603.6 344362.9 370844.3 337353.6 377853.6
## Dec 2021 358557.1 344851.7 372262.6 337596.4 379517.8
## Jan 2022 359510.7 345355.7 373665.6 337862.6 381158.8
## Feb 2022 360464.2 345873.6 375054.8 338149.8 382778.6
## Mar 2022 361417.8 346404.2 376431.4 338456.5 384379.1
## Apr 2022 362371.3 346946.3 377796.3 338780.8 385961.8
## May 2022 363324.9 347499.1 379150.6 339121.5 387528.2
## Jun 2022 364278.4 348061.9 380494.9 339477.3 389079.4
## Jul 2022 365231.9 348633.8 381830.1 339847.3 390616.6
## Aug 2022 366185.5 349214.3 383156.7 340230.3 392140.7
## Sep 2022 367139.0 349802.8 384475.2 340625.6 393652.5
## Oct 2022 368092.6 350398.9 385786.3 341032.4 395152.7
## Nov 2022 369046.1 351002.0 387090.2 341450.0 396642.2
## Dec 2022 369999.6 351611.8 388387.5 341877.9 398121.4
## Jan 2023 370953.2 352228.0 389678.4 342315.4 399591.0
## Feb 2023 371906.7 352850.0 390963.4 342762.0 401051.4
## Mar 2023 372860.3 353477.8 392242.7 343217.4 402503.2
## Apr 2023 373813.8 354111.0 393516.7 343680.9 403946.7
## May 2023 374767.4 354749.2 394785.5 344152.3 405382.4
## Jun 2023 375720.9 355392.4 396049.4 344631.1 406810.7
## Jul 2023 376674.4 356040.2 397308.7 345117.1 408231.7
## Aug 2023 377628.0 356692.5 398563.4 345610.0 409646.0
## Sep 2023 378581.5 357349.1 399814.0 346109.3 411053.7
## Oct 2023 379535.1 358009.8 401060.4 346615.0 412455.2
## Nov 2023 380488.6 358674.4 402302.9 347126.6 413850.6
## Dec 2023 381442.2 359342.8 403541.6 347644.0 415240.3
## Jan 2024 382395.7 360014.8 404776.6 348167.0 416624.4
## Feb 2024 383349.2 360690.3 406008.2 348695.3 418003.2
## Mar 2024 384302.8 361369.1 407236.4 349228.8 419376.8
## Apr 2024 385256.3 362051.3 408461.4 349767.3 420745.4
## May 2024 386209.9 362736.5 409683.2 350310.5 422109.2
## Jun 2024 387163.4 363424.8 410902.0 350858.4 423468.4
## Jul 2024 388117.0 364116.1 412117.8 351410.8 424823.1
## Aug 2024 389070.5 364810.1 413330.9 351967.5 426173.5
## Sep 2024 390024.0 365506.9 414541.1 352528.4 427519.7
autoplot(forecast(arimazillow, h=48))
Now try to identify an appropriate ETS model.
Do residual diagnostic checking of your ETS model. Are the residuals white noise?
g, Use your chosen ETS model to forecast the next four years.
zillowets <- ets(tszillow)
fc_zillowets <- forecast(zillowets, h=48)
checkresiduals(zillowets)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,A,N)
## Q* = 19.92, df = 20, p-value = 0.4629
##
## Model df: 4. Total lags used: 24
autoplot(fc_zillowets, series = "ETS") + autolayer(forecast(arimazillow, h=48), series = "ARIMA", PI = F)
I prefer the Arima model. Even though it’s simple, it fits the data better. The bands are also narrower.