Chapter 7 Exercises
1.Consider the pigs series — the number of pigs slaughtered in Victoria each month.
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2 3.3.5 ✓ fma 2.4
## ✓ forecast 8.15 ✓ expsmooth 2.3
##
library(Metrics)
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
##
## accuracy
library(ggplot2)
library(tseries)
library(forecast)
autoplot(decompose(pigs))
fit1 <- ses(pigs, h = 4)
summary(fit1)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = pigs, h = 4)
##
## Smoothing parameters:
## alpha = 0.2971
##
## Initial states:
## l = 77260.0561
##
## sigma: 10308.58
##
## AIC AICc BIC
## 4462.955 4463.086 4472.665
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 385.8721 10253.6 7961.383 -0.922652 9.274016 0.7966249 0.01282239
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Sep 1995 98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995 98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995 98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995 98816.41 83958.37 113674.4 76092.99 121539.8
l <- fit1$model$par[2]
a <- fit1$model$par[1]
print(paste("alpha:", a, "l0:", l))
## [1] "alpha: 0.297148833372095 l0: 77260.0561458528"
s <- sd(fit1$residuals)
#Upper
fit1$mean[1] + 1.96*s
## [1] 118952.8
#Lower
fit1$mean[1] - 1.96*s
## [1] 78679.97
mySES <- function(y, alpha, level, h) {
yHat <- level
for(i in 1:length(y)+h){
if(i <= length(y)){
yHat[i] <- alpha*y[i] +(1-alpha)*yHat[i-1]
}
else{
yHat[i] <- alpha*yHat[i-1]+(1-alpha)*yHat[i-2]
}
}
return(yHat[length(y):length(y)+h])
}
print(paste('forecast h=1:',mySES(pigs,alpha = a, level = l, h =1)))
## [1] "forecast h=1: 98314.3452587773"
fit1$mean
## Sep Oct Nov Dec
## 1995 98816.41 98816.41 98816.41 98816.41
mySES2 <- function(pars = c(alpha, l), y) {
error <- 0
SSE <- 0
alpha <- pars[1]
l0 <- pars[2]
y_hat <- l0
for(index in 1:length(y)){
error <- y[index] - y_hat
SSE <- SSE + error^2
y_hat <- alpha*y[index] + (1 - alpha)*y_hat
}
return(SSE)
}
optParam <-optim(par = c(0.5, pigs[1]), y = pigs, fn = mySES2)
#Parameters for SES function:
print(paste("Alpha:", optParam$par[1], "l0:", optParam$par[2]))
## [1] "Alpha: 0.299008094014243 l0: 76379.2653476235"
#Parameters for SES function
print(paste("Alpha:", a, "l0",l))
## [1] "Alpha: 0.297148833372095 l0 77260.0561458528"
mySES3 <- function(y){
optParam <- optim(par= c(0.5, y[1]), y=y, fn = mySES2)
a <- optParam$par[1]
l <- optParam$par[2]
preds <-mySES(y, a, l,1)
return(preds)
}
mySES3(pigs)
## [1] 98308.8
ses(pigs, h = 1)$mean
## Sep
## 1995 98816.41
autoplot(books)
autoplot(ses(books[,1],h =4 ))
autoplot(ses(books[,2],h=4))
rmse(books[,1],ses(books[,1])$fitted)
## [1] 33.63769
rmse(books[,2],ses(books[,2])$fitted)
## [1] 31.93101
#Paperwork
holt(books[,1], h= 4)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 209.4668 166.6035 252.3301 143.9130 275.0205
## 32 210.7177 167.8544 253.5811 145.1640 276.2715
## 33 211.9687 169.1054 254.8320 146.4149 277.5225
## 34 213.2197 170.3564 256.0830 147.6659 278.7735
#Hardcover
holt(books[,2], h= 4)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 250.1739 212.7390 287.6087 192.9222 307.4256
## 32 253.4765 216.0416 290.9113 196.2248 310.7282
## 33 256.7791 219.3442 294.2140 199.5274 314.0308
## 34 260.0817 222.6468 297.5166 202.8300 317.3334
#Paperback
rmse(books[,1],holt(books[,1])$fitted)
## [1] 31.13692
#Hardcover
rmse(books[,2],holt(books[,2])$fitted)
## [1] 27.19358
#Paperback
Paperback.ses <- ses(books[,1],h=4)
#Hardcover
Hardcover.ses <- ses(books[,2],h=4)
#Paperback
Paperback.h <- holt(books[,1], h= 4)
#Hardcover
Hardcover.h <- holt(books[,2], h= 4)
autoplot(books) +
autolayer(Paperback.ses, series = 'SES Paperback') +
autolayer(Hardcover.ses, series = 'SES Hardcover') +
autolayer(Paperback.h, series = 'Holt Paperback') +
autolayer(Hardcover.h, series = 'Holt Hardcover')
paperback.ses.rmse <- rmse(books[,1],Paperback.ses$fitted)
paperback.holt.rmse <- rmse(books[,1], Paperback.h$fitted)
hardcover.ses.rmse <- rmse(books[,2],Hardcover.ses$fitted)
hardcover.holt.rmse <- rmse(books[,2],Hardcover.h$fitted)
Paperback.ses$mean[1] + 1.96*paperback.ses.rmse
## [1] 273.0395
Paperback.ses$mean[1] - 1.96*paperback.ses.rmse
## [1] 141.1798
Paperback.ses$upper[1,'95%']
## 95%
## 275.3523
Paperback.ses$lower[1,'95%']
## 95%
## 138.867
Paperback.h$mean[1] + 1.96*paperback.holt.rmse
## [1] 270.4951
Paperback.h$mean[1] - 1.96*paperback.holt.rmse
## [1] 148.4384
Paperback.h$upper[1,'95%']
## 95%
## 275.0205
Paperback.h$lower[1,'95%']
## 95%
## 143.913
Hardcover.ses$mean[1] + 1.96*hardcover.ses.rmse
## [1] 302.1449
Hardcover.ses$mean[1] - 1.96*hardcover.ses.rmse
## [1] 176.9753
Hardcover.ses$upper[1,'95%']
## 95%
## 304.3403
Hardcover.ses$lower[1,'95%']
## 95%
## 174.7799
Hardcover.h$mean[1] + 1.96*hardcover.holt.rmse
## [1] 303.4733
Hardcover.h$mean[1] - 1.96*hardcover.holt.rmse
## [1] 196.8745
Hardcover.h$upper[1,'95%']
## 95%
## 307.4256
Hardcover.h$lower[1,'95%']
## 95%
## 192.9222
fit1 <- holt(eggs,h=100)
fit2 <- holt(eggs,h=100, damped = T)
fit3 <- holt(eggs,h=100, damped = F, biasadj = T)
fit4 <- holt(eggs,h=100, damped = T, biasadj = T)
fit5 <- holt(eggs,h=100, damped = T, biasadj = T, lambda = 'auto')
fit6 <- holt(eggs,h=100, damped = F, biasadj = T, lambda = 'auto')
fit7 <- holt(eggs,h=100, damped = F, biasadj = F, lambda = 'auto')
autoplot(eggs) +
autolayer(fit1, series = 'Base') +
autolayer(fit2, series = 'Damped') +
autolayer(fit3, series = 'BiasAdj') +
autolayer(fit4, series = 'Damped + Biasadj') +
autolayer(fit5, series = 'Damped + Biasadj + Lambda') +
autolayer(fit6, series = 'Biasadj + Lambda') +
autolayer(fit7, series = 'Lambda')
fit1.rmse <- rmse(eggs, fit1$fitted)
fit2.rmse <- rmse(eggs, fit2$fitted)
fit3.rmse <- rmse(eggs, fit3$fitted)
fit4.rmse <- rmse(eggs, fit4$fitted)
fit5.rmse <- rmse(eggs, fit5$fitted)
fit6.rmse <- rmse(eggs, fit6$fitted)
fit7.rmse <- rmse(eggs, fit7$fitted)
fit1.rmse
## [1] 26.58219
fit2.rmse
## [1] 26.54019
fit3.rmse
## [1] 26.58219
fit4.rmse
## [1] 26.54019
fit5.rmse
## [1] 26.58953
fit6.rmse
## [1] 26.38704
fit7.rmse
## [1] 26.39364
library(readxl)
retail <- read_excel("Downloads/retail.xlsx", skip= 1)
retail_ts <- ts(retail[, "A3349873A"], frequency = 12, start = c(1982, 4))
autoplot(retail_ts)
b.Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
fit.hw <- hw(retail_ts, h=120, seasonal = "multiplicative")
fit.hw.damped <- hw(retail_ts, h=120, seasonal = "multiplicative", damped = TRUE)
autoplot(retail_ts) +
autolayer(fit.hw, series="Holt's method", PI=FALSE) +
autolayer(fit.hw.damped, series="Damped Holt's method", PI=FALSE) +
ggtitle("Multiplicative seasonal forecast") + xlab("Year") +
ylab("MyTs")
fit.hw.errors <- tsCV(retail_ts, hw, h=1, seasonal="multiplicative")
fit.hw.damped.errors <- tsCV(retail_ts, hw, h = 1, seasonal = "multiplicative", damped = TRUE)
print(paste('HW RMSE = ', sqrt(mean(fit.hw.errors ^2, na.rm = TRUE))))
## [1] "HW RMSE = 14.7276169362671"
print(paste('HW Damped RMSE = ', sqrt(mean(fit.hw.damped.errors ^2, na.rm = TRUE))))
## [1] "HW Damped RMSE = 14.9430568939693"
checkresiduals(fit.hw.damped)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' multiplicative method
## Q* = 42.932, df = 7, p-value = 3.437e-07
##
## Model df: 17. Total lags used: 24
e.Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 8 in Section 3.7?
retail.ts.train <- window(retail_ts, end = c(2010,12))
retail.ts.valid <- window(retail_ts, end = c(2011))
#Seasonally Naive method
seasNaive <- snaive(retail.ts.train)
seasNaive.acc <- forecast::accuracy(seasNaive,retail.ts.valid)
#RMSE of Seasonally Naive
seasNaive.acc[2,2]
## [1] 30
myLambda <- BoxCox.lambda(retail.ts.train)
retail.bcSTL <-BoxCox(retail.ts.train, myLambda) %>%
mstl(s.window = 13, robust = T)%>%
forecast(h=36,lambda=myLambda)
## Warning in InvBoxCox(fcast$mean, lambda, biasadj, fcast): biasadj information
## not found, defaulting to FALSE.
retail.bcETS <-BoxCox(retail.ts.train, myLambda) %>%
ets()%>%
forecast(h=36,lambda=myLambda)
## Warning in forecast.ets(., h = 36, lambda = myLambda): biasadj information not
## found, defaulting to FALSE.
Metrics::rmse(retail.bcSTL$mean, retail.ts.valid)
## [1] 39.92529
Metrics::rmse(retail.bcETS$mean, retail.ts.valid)
## [1] 32.32293
plot(ukcars)
ukcars %>% stl(t.window=13,s.window='periodic',robust = T)%>%autoplot()
ukcars.seasadj <- seasadj(ukcars %>% stl(t.window=13,s.window='periodic',robust = T))
ukcars.hw <- hw(ukcars.seasadj,h=8,seasonal = 'additive', damped = T)
autoplot(ukcars.hw)
ukcars.holt <- holt(ukcars.seasadj, h= 8, damped= F)
autoplot(ukcars.holt)
ukcars.ets <- ets(ukcars.seasadj, damped = T)
summary(ukcars.ets)
## ETS(A,Ad,N)
##
## Call:
## ets(y = ukcars.seasadj, damped = T)
##
## Smoothing parameters:
## alpha = 0.5696
## beta = 2e-04
## phi = 0.9143
##
## Initial states:
## l = 347.3368
## b = -8.8748
##
## sigma: 25.7803
##
## AIC AICc BIC
## 1275.493 1276.285 1291.857
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 2.398058 25.2035 20.43629 0.2677076 6.536475 0.6660087 0.03749911
rmse(ukcars.ets$fitted, ukcars.seasadj)
## [1] 25.2035
rmse(ukcars.hw$fitted, ukcars.seasadj)
## [1] 25.18399
rmse(ukcars.holt$fitted, ukcars.seasadj)
## [1] 25.28237
ukcars.ets.forecast <- forecast(ukcars.ets, h = 8)
autoplot(ukcars.seasadj) +
autolayer(ukcars.ets.forecast, series = "ETS")+
autolayer(ukcars.hw$mean, series = "HW")+
autolayer(ukcars.holt$mean, series = "Holt")
checkresiduals(ukcars.hw)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' additive method
## Q* = 32.225, df = 3, p-value = 4.692e-07
##
## Model df: 9. Total lags used: 12
autoplot(visitors)
visitors.train <- window(visitors, end = c(2003,4))
visitors.valid <- window(visitors, end = c(2005,4))
visitors.hw <- hw(visitors.train, seasonal = 'multiplicative',h=24)
visitors.hw$mean
## Jan Feb Mar Apr May Jun Jul Aug
## 2003 293.5512 311.3817 379.6865 341.5672
## 2004 348.1741 390.3863 377.6150 337.3049 284.8762 302.1571 368.4105 331.3981
## 2005 337.6781 378.5881 366.1740 327.0594
## Sep Oct Nov Dec
## 2003 330.3820 371.9441 387.1607 471.3185
## 2004 320.5215 360.8154 375.5478 457.1458
## 2005
Forecast the two-year test set using each of the following methods:
an ETS model;
visitors.ets <- ets(visitors.train)
visitors.ets.forecast <- forecast(visitors.ets, h = 24)
visitors.etsAdd <- ets(model = "AAA", BoxCox(visitors.train, lambda = BoxCox.lambda(visitors.train)))
visitors.etsAdd.forecast <- forecast(visitors.etsAdd, h =24)
visitors.snaive <- snaive(visitors.train, h = 24)
visitors.BCseasadj<-seasadj(BoxCox(visitors.train, lambda = BoxCox.lambda(visitors.train)) %>%
stl(t.window=13,s.window='periodic',robust = T))
visitors.ets.BCSA <- ets(visitors.BCseasadj)
visitors.ets.BCSA.for <- forecast(visitors.ets.BCSA, h = 24)
ETS
rmse(visitors.ets.forecast$mean, visitors.valid)
## [1] 80.23124
checkresiduals(visitors.ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,M)
## Q* = 20.677, df = 7, p-value = 0.004278
##
## Model df: 17. Total lags used: 24
ETS Additive
rmse(visitors.etsAdd.forecast$mean, visitors.valid)
## [1] 415.0766
checkresiduals(visitors.etsAdd)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,A)
## Q* = 15.206, df = 7, p-value = 0.03344
##
## Model df: 17. Total lags used: 24
Seasonally Naive
rmse(visitors.snaive$mean,visitors.valid)
## [1] 50.30097
checkresiduals(visitors.snaive)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 295.02, df = 24, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 24
ETS STL Seasonally Adjusted
rmse(visitors.ets.BCSA.for$mean, visitors.valid)
## [1] 414.6479
checkresiduals(visitors.ets.BCSA)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,A,N)
## Q* = 26.159, df = 20, p-value = 0.1606
##
## Model df: 4. Total lags used: 24
fets_add_BoxCox <- function(y, h) {
forecast(ets(
y,
lambda = BoxCox.lambda(y),
additive.only = TRUE
),
h = h)
}
fstlm <- function(y, h) {
forecast(stlm(
y,
lambda = BoxCox.lambda(y),
s.window = frequency(y) + 1,
robust = TRUE,
method = "ets"
),
h = h)
}
fets <- function(y, h) {
forecast(ets(y),
h = h)
}
# Comparing the models using RMSE
sqrt(mean(tsCV(visitors, snaive, h = 1)^2, na.rm = TRUE))
## [1] 32.78675
sqrt(mean(tsCV(visitors, fets_add_BoxCox, h = 1)^2,
na.rm = TRUE))
## [1] 18.86439
sqrt(mean(tsCV(visitors, fstlm, h = 1)^2,
na.rm = TRUE))
## [1] 17.49642
sqrt(mean(tsCV(visitors, fets, h = 1)^2, na.rm = TRUE))
## [1] 18.52985
sqrt(mean(tsCV(visitors, hw, h = 1,
seasonal = "multiplicative")^2,
na.rm = TRUE))
## [1] 19.62107
fets <- function(y, h) {
forecast(ets(y),
h = h)
}
ets.tscv <- tsCV(qcement, fets, h=4)
sNaive.tscv <- tsCV(qcement, snaive, h= 4)
mean(ets.tscv^2,na.rm=T)
## [1] 0.01250954
mean(sNaive.tscv^2,na.rm=T)
## [1] 0.01792147
plot(ausbeer)
plot(bricksq)
plot(dole)
plot(a10)
plot(h02)
plot(usmelec)
#Making transformations
bricksq.BC <- BoxCox(bricksq, BoxCox.lambda(bricksq))
a10.BC <- BoxCox(a10, BoxCox.lambda(a10))
#validation sets
ausbeer.train <- window(ausbeer, end = c(2007,2))
ausbeer.test <- window(ausbeer, start = c(2007,3))
bricksq.BC.train <- window(bricksq.BC, end = c(1991,1))
bricksq.BC.test <- window(bricksq.BC, start = c(1991,2))
dole.train <- window(dole, end=c(1989,7))
dole.test <- window(dole, start=c(1989,8))
a10.train <- window(a10.BC, end = c(2005,6))
a10.test <- window(a10.BC, start = c(2005,7))
h02.train <- window(h02, end = c(2005,7))
h02.test <- window(h02, start = c(2005,8))
usmelec.train <- window(usmelec, end = c(2010,6))
usmelec.test <- window(usmelec, start = c(2010,7))
#ETS
ausbeer.ets <- ets(ausbeer.train)
bricksq.BC.ets <- ets(bricksq.BC.train)
dole.ets <- ets(dole.train)
a10.ets <- ets(a10.train)
h02.ets <- ets(h02.train)
usmelec.ets <- ets(usmelec.train)
#ETS forecasts
ausbeer.etsF <- forecast(ausbeer.ets, h=12)
bricksq.BC.etsF <- forecast(bricksq.BC.ets, h=12)
dole.etsF <- forecast(dole.ets, h=36)
a10.etsF <- forecast(a10.ets, h=36)
h02.etsF <- forecast(h02.ets, h=36)
usmelec.etsF <- forecast(usmelec.ets, h = 36)
#snaive
ausbeer.sn <- snaive(ausbeer.train, h = 12)
bricksq.BC.sn <- snaive(bricksq.BC.train, h= 12)
dole.sn <- snaive(dole.train, h= 36)
a10.sn <- snaive(a10.train, h = 36)
h02.sn <- snaive(h02.train, h = 36)
usmelec.sn <- snaive(usmelec.train, h= 36)
#stlf
ausbeer.stlf <- stlf(ausbeer.train, h = 12)
bricksq.BC.stlf <- stlf(bricksq.BC.train, h= 12)
dole.stlf <- stlf(dole.train, h= 36)
a10.stlf <- stlf(a10.train, h = 36)
h02.stlf <- stlf(h02.train, h = 36)
usmelec.stlf <- stlf(usmelec.train, h= 36)
#RMSE
#Ausbeer
forecast::accuracy(ausbeer.etsF,ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3466095 15.781973 11.979955 -0.064073435 2.864311 0.7567076
## Test set 0.1272571 9.620828 8.919347 0.009981598 2.132836 0.5633859
## ACF1 Theil's U
## Training set -0.1942276 NA
## Test set 0.3763918 0.1783972
forecast::accuracy(ausbeer.sn,ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 3.336634 19.67005 15.83168 0.9044009 3.771370 1.0000000
## Test set -2.916667 10.80509 9.75000 -0.6505927 2.338012 0.6158537
## ACF1 Theil's U
## Training set 0.0009690785 NA
## Test set 0.3254581810 0.1981463
forecast::accuracy(ausbeer.stlf,ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.08449615 13.640828 10.495557 -0.02319029 2.523657 0.6629464
## Test set -0.74491615 9.596294 9.014064 -0.20867707 2.159142 0.5693686
## ACF1 Theil's U
## Training set -0.1623773 NA
## Test set 0.3154543 0.1712144
forecast::accuracy(bricksq.BC.etsF,bricksq.BC.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01996121 0.2419629 0.1755865 0.1409763 1.247006 0.4329129
## Test set 0.20641501 0.2854117 0.2458899 1.4176042 1.692439 0.6062480
## ACF1 Theil's U
## Training set 0.1434416 NA
## Test set 0.3798615 0.793706
forecast::accuracy(bricksq.BC.sn,bricksq.BC.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1051226 0.5448411 0.4055930 0.7368813 2.873281 1.000000
## Test set -0.3353677 0.5781175 0.4975619 -2.2995568 3.439080 1.226752
## ACF1 Theil's U
## Training set 0.77776982 NA
## Test set -0.05416358 1.488247
forecast::accuracy(bricksq.BC.stlf,bricksq.BC.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0208919 0.2258669 0.1671388 0.150274 1.185157 0.4120850
## Test set 0.1568390 0.2683405 0.2296609 1.077773 1.585394 0.5662348
## ACF1 Theil's U
## Training set 0.2125626 NA
## Test set 0.3479158 0.7546849
forecast::accuracy(dole.etsF,dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 98.00253 16130.58 9427.497 0.5518769 6.20867 0.2995409
## Test set 221647.42595 279668.65 221647.426 32.5447637 32.54476 7.0424271
## ACF1 Theil's U
## Training set 0.5139536 NA
## Test set 0.9394267 11.29943
forecast::accuracy(dole.sn,dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 12606.58 56384.06 31473.16 3.350334 27.73651 1.000000
## Test set 160370.33 240830.92 186201.00 20.496197 27.38678 5.916184
## ACF1 Theil's U
## Training set 0.9781058 NA
## Test set 0.9208325 9.479785
forecast::accuracy(dole.stlf,dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 256.3644 6248.781 3836.768 0.5425694 4.804379 0.121906
## Test set 205359.4652 268646.638 208074.403 29.3215555 30.017034 6.611170
## ACF1 Theil's U
## Training set 0.01337877 NA
## Test set 0.93746400 10.71493
forecast::accuracy(a10.etsF,a10.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 7.659502e-05 0.06455211 0.05014221 -0.002655551 2.224162 0.3212884
## Test set 5.129718e-02 0.13739413 0.11893283 1.217786471 3.234877 0.7620674
## ACF1 Theil's U
## Training set -0.04890245 NA
## Test set -0.01633399 0.4906605
forecast::accuracy(a10.sn,a10.test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1523955 0.1731097 0.1560660 6.682003 6.842931 1.000000 0.2212723
## Test set 0.3393671 0.4025493 0.3406072 8.979124 9.018149 2.182456 0.6073193
## Theil's U
## Training set NA
## Test set 1.423661
forecast::accuracy(a10.stlf,a10.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0001106891 0.05780298 0.04565029 -0.0493025 2.039699 0.2925062
## Test set 0.0557901445 0.13819615 0.11837298 1.3422799 3.212405 0.7584802
## ACF1 Theil's U
## Training set -0.09571412 NA
## Test set 0.04878539 0.4893801
forecast::accuracy(h02.etsF,h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0004513622 0.04579930 0.03426080 -0.2209876 4.560572 0.5745005
## Test set 0.0342751950 0.08019185 0.06763825 2.8172154 7.267771 1.1341883
## ACF1 Theil's U
## Training set 0.05057522 NA
## Test set -0.12581913 0.4581788
forecast::accuracy(h02.sn,h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.037749040 0.07171709 0.05963581 5.0942482 8.197262 1.000000
## Test set -0.004309813 0.08238325 0.06787933 -0.1376051 7.444182 1.138231
## ACF1 Theil's U
## Training set 0.40080565 NA
## Test set 0.09840611 0.4929215
forecast::accuracy(h02.stlf,h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.000830773 0.04053338 0.03033702 -0.2685748 4.274993 0.5087047
## Test set -0.006211506 0.07075988 0.05886616 -2.0093569 6.971515 0.9870941
## ACF1 Theil's U
## Training set -0.06958533 NA
## Test set -0.06144624 0.4425916
forecast::accuracy(usmelec.etsF,usmelec.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.07834851 7.447167 5.601963 -0.1168709 2.163495 0.6225637
## Test set -10.20080652 15.291359 13.123441 -3.1908161 3.926338 1.4584491
## ACF1 Theil's U
## Training set 0.2719249 NA
## Test set 0.4679496 0.4859495
forecast::accuracy(usmelec.sn,usmelec.test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.921564 11.58553 8.998217 2.000667 3.511648 1.000000 0.4841860
## Test set 5.475972 17.20037 13.494750 1.391437 3.767514 1.499714 0.2692544
## Theil's U
## Training set NA
## Test set 0.4765145
forecast::accuracy(usmelec.stlf,usmelec.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0657726 6.19216 4.682101 -0.06613967 1.826421 0.5203366
## Test set -13.0124627 17.48270 15.358336 -4.06887002 4.655355 1.7068199
## ACF1 Theil's U
## Training set 0.1106876 NA
## Test set 0.5244502 0.5593233
#Bicoal
mean(tsCV(bicoal, fets, h=10)^2, na.rm = T)
## [1] 11565.88
mean(tsCV(bicoal, snaive, h=10)^2, na.rm = T)
## [1] 12081.3
#Chicken
mean(tsCV(chicken, fets, h=10)^2, na.rm = T)
## [1] 1339.194
mean(tsCV(chicken, snaive, h=10)^2, na.rm = T)
## [1] 1212.813
#dole
mean(tsCV(dole, fets, h=10)^2, na.rm = T)
## [1] 2758010177
mean(tsCV(dole, snaive, h=10)^2, na.rm = T)
## [1] 5331238533
#usdeaths
mean(tsCV(usdeaths, fets, h=10)^2, na.rm = T)
## [1] 1789494
mean(tsCV(usdeaths, snaive, h=10)^2, na.rm = T)
## [1] 556867.2
#lynx
mean(tsCV(lynx, fets, h=10)^2, na.rm = T)
## [1] 5928549
mean(tsCV(lynx, snaive, h=10)^2, na.rm = T)
## [1] 4777830
#ibmclose
mean(tsCV(ibmclose, fets, h=10)^2, na.rm = T)
## [1] 346.7712
mean(tsCV(ibmclose, snaive, h=10)^2, na.rm = T)
## [1] 322.2696
#eggs
mean(tsCV(eggs, fets, h=10)^2, na.rm = T)
## [1] 2257.868
mean(tsCV(eggs, snaive, h=10)^2, na.rm = T)
## [1] 2361.556
usdeaths.ets2 <- ets(usdeaths,model = "MAM")
usdeaths.ets2F <- forecast(usdeaths.ets2, h=1)
usdeaths.hwM <- hw(usdeaths, seasonal = 'multiplicative', h=1)
usdeaths.ets2F$mean
## Jan
## 1979 8233.107
usdeaths.hwM$mean
## Jan
## 1979 8217.64
ibmclose.ets2 <- ets(ibmclose, model = "ANN")
ibmclose.ets2.F <- forecast(ibmclose.ets2,1)
ibmclose.ets2.F
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 370 356.9995 347.6907 366.3083 342.7629 371.2361
mysigma <- 7.2637
alpha <- .9999^2
h <- 2
ci <- mysigma*(1+alpha*(h-1))
ibmclose.ets2.F$mean[1]
## [1] 356.9995
ibmclose.ets2.F$mean[1] - ci
## [1] 342.4736
ibmclose.ets2.F$lower[1,'95%']
## 95%
## 342.7629
Chapter 8 Exercises
Figure 8.31: Left: ACF for a white noise series of 36 numbers. Middle: ACF for a white noise series of 360 numbers. Right: ACF for a white noise series of 1,000 numbers.
autoplot(ibmclose)
acf(ibmclose)
pacf(ibmclose)
library(urca)
Box.test(usnetelec, lag = 10, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: usnetelec
## X-squared = 329.22, df = 10, p-value < 2.2e-16
usnetelec %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 3 lags.
##
## Value of test-statistic is: 1.464
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
usnetelec %>% diff() %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 3 lags.
##
## Value of test-statistic is: 0.1585
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Box.test(usgdp, lag =10, type = 'Ljung-Box')
##
## Box-Ljung test
##
## data: usgdp
## X-squared = 2078.3, df = 10, p-value < 2.2e-16
usgdp %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 4.6556
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
usgdp %>% diff() %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 1.7909
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
usgdp %>% diff() %>% diff() %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 0.021
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Box.test(mcopper, lag =10, type = 'Ljung-Box')
##
## Box-Ljung test
##
## data: mcopper
## X-squared = 3819, df = 10, p-value < 2.2e-16
mcopper %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 6 lags.
##
## Value of test-statistic is: 5.01
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
mcopper %>% diff() %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 6 lags.
##
## Value of test-statistic is: 0.1843
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Box.test(enplanements, lag =10, type = 'Ljung-Box')
##
## Box-Ljung test
##
## data: enplanements
## X-squared = 2122.7, df = 10, p-value < 2.2e-16
enplanements %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 4.4423
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
enplanements %>% diff() %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0086
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Box.test(visitors, lag =10, type = 'Ljung-Box')
##
## Box-Ljung test
##
## data: visitors
## X-squared = 1522.6, df = 10, p-value < 2.2e-16
visitors %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 4.6025
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
visitors %>% diff() %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 4 lags.
##
## Value of test-statistic is: 0.0191
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
autoplot(retail_ts)
retail_ts %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 5.9286
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
retail_ts %>% diff() %>% ur.kpss %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0522
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
y <- ts(numeric(100))
e <- rnorm(100)
quickAR <- function(phi, y, e){
for (i in 2:100){
y[i] <- phi*y[i-1]+e[i]
}
return(y)
}
myseries <- quickAR(.6,y,e)
autoplot(myseries)
myseries <- quickAR(.1,y,e)
autoplot(myseries)
myseries <- quickAR(1,y,e)
autoplot(myseries)
quickAR <- function(theta,seed.nr){
set.seed(seed.nr)
y <- ts(numeric(100))
e <- rnorm(100)
for (i in 2:100){
y[i] <- theta*y[i-1]+e[i]
}
return(autoplot(y))
}
quickAR(.1,1)
quickAR(.5,1)
quickAR(1,1)
quickARIMA <- function(n.obs, theta, seed.nr, phi){
set.seed(seed.nr)
y <- ts(numeric(n.obs))
e <- rnorm(n.obs)
for (i in 2:n.obs)
y[i] <- phi*y[i-1] + theta*e[i-1] + e[i]
return(autoplot(y))
}
quickARIMA(100,.6,1,.6)
quickAR2 <- function(phi1, phi2,seed.nr){
set.seed(seed.nr)
y <- ts(numeric(100))
e <- rnorm(100)
for (i in 3:100){
y[i] <- (-.8)*y[i-1]+ (-.3)*y[i-2]+e[i]
}
return(autoplot(y))
}
quickAR2(-.8,-.3,1)
quickAR2(-.8,-.3,1)
quickARIMA(100,.6,1,.6)
plot(wmurders)
wmurders.BC <- BoxCox(wmurders, BoxCox.lambda(wmurders))
acf(wmurders.BC)
pacf(wmurders.BC)
wmurders.BC %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 3 lags.
##
## Value of test-statistic is: 0.6745
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
wmurders.BC %>% diff() %>% ur.kpss %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 3 lags.
##
## Value of test-statistic is: 0.5466
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
wmurders.BC.diff <- diff(wmurders.BC)
acf(wmurders.BC.diff)
pacf(wmurders.BC.diff)
wmurders.BC.diff2 <- diff(wmurders.BC.diff)
wmurders.BC.diff2 %>% ur.kpss %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 3 lags.
##
## Value of test-statistic is: 0.0532
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
auto.arima(wmurders.BC)
## Series: wmurders.BC
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## -0.3006 -0.786
## s.e. 0.1529 0.119
##
## sigma^2 estimated as 0.002851: log likelihood=80.37
## AIC=-154.74 AICc=-154.25 BIC=-148.83
fit1 <- auto.arima(wmurders.BC)
checkresiduals(fit1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,2,1)
## Q* = 10.378, df = 8, p-value = 0.2395
##
## Model df: 2. Total lags used: 10
forecast(fit1,3)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005 0.8727642 0.8043418 0.9411866 0.7681212 0.9774072
## 2006 0.8394207 0.7467510 0.9320904 0.6976947 0.9811468
## 2007 0.8050389 0.6833171 0.9267607 0.6188814 0.9911964
autoplot(forecast(fit1, 3))
fit2 <- auto.arima(austa)
checkresiduals(fit2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1) with drift
## Q* = 2.297, df = 5, p-value = 0.8067
##
## Model df: 2. Total lags used: 7
autoplot(forecast(fit2,10))
autoplot(forecast(arima(austa, c(0,1,1))),h=10)
autoplot(forecast(arima(austa, c(0,1,0))),h=10)
autoplot(forecast(arima(austa, c(2,1,0))),h=10)
autoplot(forecast(arima(austa, c(0,0,1))),h=10)
autoplot(forecast(arima(austa, c(0,2,1))),h=10)
autoplot(usgdp)
autoplot(BoxCox(usgdp,BoxCox.lambda(usgdp)))
usgdp.bc <-BoxCox.lambda(usgdp)
aARfit <- auto.arima(usgdp, lambda = usgdp.bc)
summary(aARfit)
## Series: usgdp
## ARIMA(2,1,0) with drift
## Box Cox transformation: lambda= 0.366352
##
## Coefficients:
## ar1 ar2 drift
## 0.2795 0.1208 0.1829
## s.e. 0.0647 0.0648 0.0202
##
## sigma^2 estimated as 0.03518: log likelihood=61.56
## AIC=-115.11 AICc=-114.94 BIC=-101.26
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.195275 39.2224 29.29521 -0.01363259 0.6863491 0.1655687
## ACF1
## Training set -0.03824844
usgdp.arima1<-arima(usgdp, order=c(1,1,0))
checkresiduals(usgdp.arima1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)
## Q* = 50.316, df = 7, p-value = 1.252e-08
##
## Model df: 1. Total lags used: 8
usgdp.arima2<-arima(usgdp, order=c(0,1,0))
checkresiduals(usgdp.arima2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 108.19, df = 8, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 8
usgdp.arima3<-arima(usgdp, order=c(2,1,0))
checkresiduals(usgdp.arima3)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0)
## Q* = 16.013, df = 6, p-value = 0.01368
##
## Model df: 2. Total lags used: 8
checkresiduals(aARfit)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0) with drift
## Q* = 6.5772, df = 5, p-value = 0.254
##
## Model df: 3. Total lags used: 8
autoplot(forecast(aARfit, h= 10))
usgdp.ets <- ets(usgdp)
checkresiduals(usgdp.ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,A,N)
## Q* = 21.461, df = 4, p-value = 0.0002565
##
## Model df: 4. Total lags used: 8
autoplot(forecast(usgdp.ets,h=10))
tsdisplay(austourists)
-Based on the ACF graph, there is a spike in the 3 quarters every 4th observation.
autoplot(diff(austourists, lag=4, differences=1))
par(mfrow=c(1,2))
acf(diff(austourists,lag=4, differences=1))
pacf(diff(austourists,lag=4, differences=1))
auto.arima(austourists)
## Series: austourists
## ARIMA(1,0,0)(1,1,0)[4] with drift
##
## Coefficients:
## ar1 sar1 drift
## 0.4705 -0.5305 0.5489
## s.e. 0.1154 0.1122 0.0864
##
## sigma^2 estimated as 5.15: log likelihood=-142.48
## AIC=292.97 AICc=293.65 BIC=301.6
movAvg <- ma(usmelec, order=12)
plot(usmelec, col='gray', main="Electricity Net Generation",
ylab="Billions of kilowatt hours (kWh)")
lines(movAvg)
lambda <- BoxCox.lambda(usmelec)
netusage <- BoxCox(usmelec, lambda=lambda)
autoplot(netusage)
plot(log(usmelec) - log(movAvg))
plot(netusage - BoxCox(movAvg, lambda=lambda))
tsdisplay(diff(netusage, 1))
tsdisplay(diff(netusage,lag=12,1))
tsdisplay(diff(diff(netusage,lag=12,1)))
kpss.test(diff(netusage,12))
## Warning in kpss.test(diff(netusage, 12)): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(netusage, 12)
## KPSS Level = 1.1122, Truncation lag parameter = 5, p-value = 0.01
kpss.test(diff(diff(netusage,12)))
## Warning in kpss.test(diff(diff(netusage, 12))): p-value greater than printed p-
## value
##
## KPSS Test for Level Stationarity
##
## data: diff(diff(netusage, 12))
## KPSS Level = 0.01669, Truncation lag parameter = 5, p-value = 0.1
d.Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AIC values?
test.arima <- function(t.series, params){
order <- as.numeric(params[1:3])
seasonal <- as.numeric(params[4:6])
df <- data.frame(model=paste0("ARIMA(",
paste0(params, collapse=","),
")"),
AIC=arima(t.series,
order=order,
seasonal=seasonal)$aic)
return(df)
}
gridSearch <- expand.grid(c(0, 1, 2), #p
c(2), #d
c(0, 1), #q
c(1, 2), #P
c(1), #D
c(0, 1) #Q
)
df.list <- apply(gridSearch, MARGIN=1, FUN=function(x) {test.arima(usmelec, x)})
df <- do.call(rbind, df.list)
df
## model AIC
## 1 ARIMA(0,2,0,1,1,0) 3925.638
## 2 ARIMA(1,2,0,1,1,0) 3773.422
## 3 ARIMA(2,2,0,1,1,0) 3617.919
## 4 ARIMA(0,2,1,1,1,0) 3489.316
## 5 ARIMA(1,2,1,1,1,0) 3452.156
## 6 ARIMA(2,2,1,1,1,0) 3403.781
## 7 ARIMA(0,2,0,2,1,0) 3889.878
## 8 ARIMA(1,2,0,2,1,0) 3723.813
## 9 ARIMA(2,2,0,2,1,0) 3590.935
## 10 ARIMA(0,2,1,2,1,0) 3450.950
## 11 ARIMA(1,2,1,2,1,0) 3411.202
## 12 ARIMA(2,2,1,2,1,0) 3372.929
## 13 ARIMA(0,2,0,1,1,1) 3812.535
## 14 ARIMA(1,2,0,1,1,1) 3659.566
## 15 ARIMA(2,2,0,1,1,1) 3544.265
## 16 ARIMA(0,2,1,1,1,1) 3393.268
## 17 ARIMA(1,2,1,1,1,1) 3363.035
## 18 ARIMA(2,2,1,1,1,1) 3330.714
## 19 ARIMA(0,2,0,2,1,1) 3811.630
## 20 ARIMA(1,2,0,2,1,1) 3655.111
## 21 ARIMA(2,2,0,2,1,1) 3544.726
## 22 ARIMA(0,2,1,2,1,1) 3391.470
## 23 ARIMA(1,2,1,2,1,1) 3360.434
## 24 ARIMA(2,2,1,2,1,1) 3329.810
df[which.min(df$AIC),]
## model AIC
## 24 ARIMA(2,2,1,2,1,1) 3329.81
fit1 <- arima(usmelec, order = c(2,2,1), seasonal = c(2,1,1))
checkresiduals(fit1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,2,1)(2,1,1)[12]
## Q* = 60.543, df = 18, p-value = 1.671e-06
##
## Model df: 6. Total lags used: 24
kpss.test(fit1$residuals)
## Warning in kpss.test(fit1$residuals): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: fit1$residuals
## KPSS Level = 0.022049, Truncation lag parameter = 5, p-value = 0.1
f.Forecast the next 15 years of electricity generation by the U.S.electric industry. Get the latest figures from the EIA to check the accuracy of your forecasts.
library(readxl)
electOver <- read_excel("Downloads/electricity_overview.xlsx")
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * `` -> ...6
## * ...
names(electOver) <- c('month', 'elec')
## Warning: The `value` argument of `names<-` must have the same length as `x` as of tibble 3.0.0.
## `names` must have length 12, not 2.
## Warning: The `value` argument of `names<-` can't be empty as of tibble 3.0.0.
## Columns 3, 4, 5, 6, 7, and 5 more must be named.
electOver.ts <- ts(electOver$elec, start= c(1973,1), frequency = 12)
plot(electOver.ts)
## Warning in xy.coords(x, NULL, log = log, setLab = FALSE): NAs introduced by
## coercion
## Warning in xy.coords(x, y): NAs introduced by coercion
fcast <- forecast(fit1, h=15*12)
plot(fcast)
lines(electOver.ts, col='red')
## Warning in xy.coords(x, y): NAs introduced by coercion
G. Eventually, the prediction intervals are so wide that the forecasts are not particularly useful. How many years of forecasts do you think are sufficiently accurate to be usable?
Based on the confidence intervals, the next 5 years seem more accurate for prediction.
autoplot(mcopper)
movAvg <- ma(mcopper, order=12)
lambda <- BoxCox.lambda(mcopper)
netprices <- BoxCox(mcopper, lambda=lambda)
autoplot(netprices)
plot(log(usmelec) - log(movAvg))
plot(netprices - BoxCox(movAvg, lambda=lambda))
fit12 <- auto.arima(mcopper, lambda= lambda)
fit12
## Series: mcopper
## ARIMA(0,1,1)
## Box Cox transformation: lambda= 0.1919047
##
## Coefficients:
## ma1
## 0.3720
## s.e. 0.0388
##
## sigma^2 estimated as 0.04997: log likelihood=45.05
## AIC=-86.1 AICc=-86.08 BIC=-77.43
df.list <- apply(gridSearch, MARGIN=1, FUN=function(x)
{test.arima(mcopper, x)})
df <- do.call(rbind, df.list)
df
## model AIC
## 1 ARIMA(0,2,0,1,1,0) 6771.437
## 2 ARIMA(1,2,0,1,1,0) 6717.869
## 3 ARIMA(2,2,0,1,1,0) 6660.403
## 4 ARIMA(0,2,1,1,1,0) 6579.486
## 5 ARIMA(1,2,1,1,1,0) 6538.480
## 6 ARIMA(2,2,1,1,1,0) 6535.362
## 7 ARIMA(0,2,0,2,1,0) 6737.754
## 8 ARIMA(1,2,0,2,1,0) 6684.161
## 9 ARIMA(2,2,0,2,1,0) 6629.310
## 10 ARIMA(0,2,1,2,1,0) 6551.373
## 11 ARIMA(1,2,1,2,1,0) 6508.398
## 12 ARIMA(2,2,1,2,1,0) 6505.775
## 13 ARIMA(0,2,0,1,1,1) 6656.245
## 14 ARIMA(1,2,0,1,1,1) 6589.775
## 15 ARIMA(2,2,0,1,1,1) 6529.324
## 16 ARIMA(0,2,1,1,1,1) 6456.676
## 17 ARIMA(1,2,1,1,1,1) 6422.693
## 18 ARIMA(2,2,1,1,1,1) 6422.826
## 19 ARIMA(0,2,0,2,1,1) 6658.071
## 20 ARIMA(1,2,0,2,1,1) 6591.294
## 21 ARIMA(2,2,0,2,1,1) 6530.839
## 22 ARIMA(0,2,1,2,1,1) 6458.553
## 23 ARIMA(1,2,1,2,1,1) 6424.635
## 24 ARIMA(2,2,1,2,1,1) 6424.788
df[which.min(df$AIC),]
## model AIC
## 17 ARIMA(1,2,1,1,1,1) 6422.693
checkresiduals(fit12)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)
## Q* = 22.913, df = 23, p-value = 0.4659
##
## Model df: 1. Total lags used: 24
autoplot(forecast(fit12,h=5*12))
fit12.ets <- ets(mcopper)
fit12.ets
## ETS(M,Ad,N)
##
## Call:
## ets(y = mcopper)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.2141
## phi = 0.8
##
## Initial states:
## l = 265.0541
## b = -3.9142
##
## sigma: 0.0632
##
## AIC AICc BIC
## 8052.038 8052.189 8078.049
fit12
## Series: mcopper
## ARIMA(0,1,1)
## Box Cox transformation: lambda= 0.1919047
##
## Coefficients:
## ma1
## 0.3720
## s.e. 0.0388
##
## sigma^2 estimated as 0.04997: log likelihood=45.05
## AIC=-86.1 AICc=-86.08 BIC=-77.43
autoplot(hsales)
movAvg <- ma(hsales, order=12)
lambda <- BoxCox.lambda(hsales)
netprices <- BoxCox(hsales, lambda=lambda)
autoplot(netprices)
plot(log(usmelec) - log(movAvg))
plot(netprices - BoxCox(movAvg, lambda=lambda))
tsdisplay(diff(netprices, 1))
tsdisplay(diff(netprices,lag=12,1))
tsdisplay(diff(diff(netprices,lag=12,1)))
kpss.test(diff(netprices,12))
## Warning in kpss.test(diff(netprices, 12)): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(netprices, 12)
## KPSS Level = 0.087312, Truncation lag parameter = 5, p-value = 0.1
kpss.test(diff(diff(netprices,12)))
## Warning in kpss.test(diff(diff(netprices, 12))): p-value greater than printed p-
## value
##
## KPSS Test for Level Stationarity
##
## data: diff(diff(netprices, 12))
## KPSS Level = 0.045168, Truncation lag parameter = 5, p-value = 0.1
test.arima <- function(t.series, params){
order <- as.numeric(params[1:3])
seasonal <- as.numeric(params[4:6])
df <- data.frame(model=paste0("ARIMA(",
paste0(params, collapse=","),
")"),
AIC=forecast::Arima(t.series,
order=order,
seasonal=seasonal,
lambda=lambda)$aic)
return(df)
}
df.list <- apply(gridSearch, MARGIN=1, FUN=function(x)
{test.arima(mcopper, x)})
df <- do.call(rbind, df.list)
df
## model AIC
## 1 ARIMA(0,2,0,1,1,0) 12.16749
## 2 ARIMA(1,2,0,1,1,0) -24.55333
## 3 ARIMA(2,2,0,1,1,0) -92.34166
## 4 ARIMA(0,2,1,1,1,0) -143.71916
## 5 ARIMA(1,2,1,1,1,0) -203.36169
## 6 ARIMA(2,2,1,1,1,0) -208.96125
## 7 ARIMA(0,2,0,2,1,0) -28.45613
## 8 ARIMA(1,2,0,2,1,0) -62.80340
## 9 ARIMA(2,2,0,2,1,0) -129.10728
## 10 ARIMA(0,2,1,2,1,0) -183.02174
## 11 ARIMA(1,2,1,2,1,0) -242.98150
## 12 ARIMA(2,2,1,2,1,0) -250.21483
## 13 ARIMA(0,2,0,1,1,1) -149.36815
## 14 ARIMA(1,2,0,1,1,1) -187.35099
## 15 ARIMA(2,2,0,1,1,1) -256.30073
## 16 ARIMA(0,2,1,1,1,1) -307.75285
## 17 ARIMA(1,2,1,1,1,1) -363.80940
## 18 ARIMA(2,2,1,1,1,1) -369.73701
## 19 ARIMA(0,2,0,2,1,1) -147.43467
## 20 ARIMA(1,2,0,2,1,1) -185.44033
## 21 ARIMA(2,2,0,2,1,1) -254.33181
## 22 ARIMA(0,2,1,2,1,1) -305.78918
## 23 ARIMA(1,2,1,2,1,1) -361.91351
## 24 ARIMA(2,2,1,2,1,1) -367.78545
df[which.min(df$AIC),]
## model AIC
## 18 ARIMA(2,2,1,1,1,1) -369.737
fit13.1 <- forecast::Arima(hsales,order = c(1,2,1), seasonal = c(1,1,1),lambda= lambda)
fit13.2 <- auto.arima(hsales)
checkresiduals(fit13.1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,2,1)(1,1,1)[12]
## Q* = 18.978, df = 20, p-value = 0.5233
##
## Model df: 4. Total lags used: 24
checkresiduals(fit13.2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0)(1,1,0)[12] with drift
## Q* = 39.66, df = 21, p-value = 0.008177
##
## Model df: 3. Total lags used: 24
fcast <- forecast(fit13.1, h=24)
fArima <- function(y, h) {
fit13.1 <- forecast::Arima(hsales,order = c(1,2,1), seasonal = c(1,1,1),lambda= lambda)
return(forecast(fit13.1, h=h))
}
mean(tsCV(hsales, fArima, h= 24)^2,na.rm=T)
## [1] 197.4602
mean(tsCV(hsales, fets_add_BoxCox, h=24)^2, na.rm=T)
## [1] 222.4153
mean(tsCV(hsales, stlf, method ='arima', h= 24)^2, na.rm=T)
## [1] 158.4764
fit15 <- auto.arima(retail.ts.train)
retail.Arima <- forecast(fit15, h= 36)
Metrics::rmse(retail.bcSTL$mean, retail.ts.valid)
## [1] 39.92529
Metrics::rmse(retail.Arima$mean, retail.ts.valid)
## [1] 42.71018
autoplot(sheep)
Assume you decide to fit the following model: yt = yt-1 + phi1(yt-1 - yt-2) + phi2(yt-2 - yt-3) + phi3(yt-3 - yt-4) + et where et is a white noise series. What sort of ARIMA model is this (i.e., what are p, d, and q)? (yt - yt-1) - phi1(yt-1 - yt-2) - phi2(yt-2 - yt-3) - phi3(yt-3 - yt-4) = et (1 - B)yt - phi1B(1- B)yt - phi2B^2(1- B)yt - phi3B^3(1- B)yt = et (1 - phi1B - phi2B^2 - phi3B^3)(1 - B)yt = et ARIMA(3, 1, 0) model.
By Examining the ACF and PACF of the differenced data, explain why this model is appropriate.
ggtsdisplay(diff(sheep))
sheep.1940 = 1797 + 0.42*(1797 - 1791) -0.20*(1791 - 1627) - 0.30*(1627 - 1665)
sheep.1941 = sheep.1940 + 0.42*(sheep.1940 - 1797) -0.20*(1797 - 1791) - 0.30*(1791 - 1627)
sheep.1942 = sheep.1941 + 0.42*(sheep.1941 - sheep.1940) -0.20*(sheep.1940 - 1797) - 0.30*(1797 - 1791)
c(sheep.1940, sheep.1941, sheep.1942)
## [1] 1778.120 1719.790 1697.268
fc_sheep_arima.3.1.0 <- forecast(
Arima(sheep, order = c(3, 1, 0)),
h = 3
)
fc_sheep_arima.3.1.0$mean
## Time Series:
## Start = 1940
## End = 1942
## Frequency = 1
## [1] 1777.996 1718.869 1695.985
ar1 <- fc_sheep_arima.3.1.0$model$coef[1]
ar2 <- fc_sheep_arima.3.1.0$model$coef[2]
ar3 <- fc_sheep_arima.3.1.0$model$coef[3]
sheep.1940.new = 1797 + ar1*(1797 - 1791) + ar2*(1791 - 1627) + ar3*(1627 - 1665)
sheep.1941.new = sheep.1940.new + ar1*(sheep.1940.new - 1797) + ar2*(1797 - 1791) + ar3*(1791 - 1627)
sheep.1942.new = sheep.1941.new + ar1*(sheep.1941.new - sheep.1940.new) + ar2*(sheep.1940.new - 1797) + ar3*(1797 - 1791)
c(sheep.1940.new, sheep.1941.new, sheep.1942.new)
## ar1 ar1 ar1
## 1777.996 1718.869 1695.985
autoplot(bicoal)
(1 - phi1B - phi2B^2 - phi3B^3 - phi4B^4)yt = c + et if mu is the mean of yt, c = mu(1 - phi1B - phi2B^2 - phi3B^3 - phi4B^4) The model is ARIMA(4, 0, 0) or AR(4).
ggAcf(bicoal, lag.max = 36)
ggPacf(bicoal, lag.max = 36)
c = 162.00
phi1 = 0.83
phi2 = -0.34
phi3 = 0.55
phi4 = -0.38
bicoal.1969 <- c + phi1*545 + phi2*552 + phi3*534 + phi4*512
bicoal.1970 <- c + phi1*bicoal.1969 + phi2*545 + phi3*552 + phi4*534
bicoal.1971 <- c + phi1*bicoal.1970 + phi2*bicoal.1969 + phi3*545 + phi4*552
c(bicoal.1969, bicoal.1970, bicoal.1971)
## [1] 525.8100 513.8023 499.6705
fc_bicoal_ar4 <- forecast(ar(bicoal, 4), h = 3)
fc_bicoal_ar4$mean
## Time Series:
## Start = 1969
## End = 1971
## Frequency = 1
## [1] 526.2057 514.0658 500.0111
phi1 <- fc_bicoal_ar4$model$ar[1]
phi2 <- fc_bicoal_ar4$model$ar[2]
phi3 <- fc_bicoal_ar4$model$ar[3]
phi4 <- fc_bicoal_ar4$model$ar[4]
c <- fc_bicoal_ar4$model$x.mean*(1 - phi1 - phi2 - phi3 - phi4)
bicoal.1969.new <- c + phi1*545 + phi2*552 + phi3*534 + phi4*512
bicoal.1970.new <- c + phi1*bicoal.1969.new + phi2*545 + phi3*552 + phi4*534
bicoal.1971.new <- c + phi1*bicoal.1970.new + phi2*bicoal.1969.new + phi3*545 + phi4*552
c(bicoal.1969.new, bicoal.1970.new, bicoal.1971.new)
## [1] 526.2057 514.0658 500.0111