library(fpp3)
miss.piggy <- aus_livestock %>%
filter(State == "Victoria" & (Animal == "Pigs") )
fit.piggy <- miss.piggy %>%
model(ets = ETS(Count)) %>%
report()
## Series: Count
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.3579401
## gamma = 0.0001000139
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 95487.5 2066.235 6717.311 -2809.562 -1341.299 -7750.173 -8483.418 5610.89
## s[-7] s[-8] s[-9] s[-10] s[-11]
## -579.8107 1215.464 -2827.091 1739.465 6441.989
##
## sigma^2: 60742898
##
## AIC AICc BIC
## 13545.38 13546.26 13610.24
fit.piggy
## # A mable: 1 x 3
## # Key: Animal, State [1]
## Animal State ets
## <fct> <fct> <model>
## 1 Pigs Victoria <ETS(A,N,A)>
forecast.piggy <- fit.piggy %>%
forecast(h = 4) %>%
autoplot(miss.piggy)
fc.piggy <- fit.piggy %>%
forecast(h=4)
forecast.piggy
#b
firstfore <- fc.piggy %>%
pull(Count) %>%
head(1)
standard.dev <- augment(fit.piggy) %>%
pull(.resid) %>%
sd()
bottom <- firstfore - 1.96 * standard.dev
top <- firstfore + 1.96 * standard.dev
results <- c(bottom, top)
names(results) <- c('Lower', 'Upper')
results
## <distribution[2]>
## Lower Upper
## N(69328, 6.1e+07) N(99521, 6.1e+07)
hilo(fc.piggy$Count, 95)
## <hilo[4]>
## [1] [69149.19, 99700.22]95 [68933.39, 101382.56]95 [74445.66, 108687.93]95
## [4] [72125.42, 108071.45]95
#a
iceland <- global_economy %>%
filter(Country == "Iceland") %>%
select("Exports")
autoplot
## function (object, ...)
## {
## UseMethod("autoplot")
## }
## <bytecode: 0x0000000015127a58>
## <environment: namespace:ggplot2>
#There is a lot of fluctuation in the data. and no clear trend despite the massive spike at the end. I don't spot a clear seasonality or cycle.
#b
icelandfit <- iceland %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
icelandfc <- icelandfit %>%
forecast(h=10) %>%
autoplot(iceland)
icelandfc
#c
accuracy(icelandfit)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Exports ~ error(~ Trai~ 0.0980 3.47 2.61 -0.192 6.88 0.983 0.991 0.257
#RMSE = 3.47
#d
icelandfit2 <- iceland %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
accuracy(icelandfit2)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Exports ~ error(~ Trai~ 0.0929 3.50 2.67 -0.222 7.03 1.01 0.999 0.254
#RMSE = 3.50 which is higher than the rmse from the A,N,N model. Trying to adjust for trend actually made our forecast less accurate, which mean's there isn't a clear trend. So we'll go with the A,N,N model.
#e
icelandfc2 <- icelandfit2 %>%
forecast(h=10) %>%
autoplot(iceland)
icelandfc2
#The forecast from the A,A,N has a slight upward slope to it while the forecast from the A,N,N is relatively flat (constant). This is of course because the A.A.N is accounting for the trend in the data while the A.N,N is not.
#The A,N,N yields a lower rmse and mae, but it's hard to believe exports will stay constant given the fluctuation in the data. I would say I trust the A,N,N more because there isn't really a clear trend to the data.
china <- global_economy %>%
filter(Country == "China") %>%
mutate(GDP = GDP/1e9) %>%
select(GDP)
autoplot(china)
## Plot variable not specified, automatically selected `.vars = GDP`
#There is a clear upward trend with no seasonality. So we want an ETS that accounts for trend but no seasonality.
fit.china <- china %>%
model(ets = ETS(GDP))
fit.china
## # A mable: 1 x 1
## ets
## <model>
## 1 <ETS(M,A,N)>
# M,A,N is the proposed ETS model let's look at it.
fit.china.man <- china %>%
model(ETS(GDP ~ error("M") + trend("A") + season("N")))
fit.china.manfc <- fit.china.man %>%
forecast(h= 20) %>%
autoplot(china)
fit.china.manfc
damp.chinese.man <- china %>%
model(hw = ETS(GDP ~ error("M") + trend("Ad") + season("N"))) %>%
forecast(h = 20) %>%
autoplot(china)
damp.chinese.man
chinese.fits <- china %>%
model(
SES = ETS(GDP ~ error("A") + trend("N") + season("N")),
Holt = ETS(GDP ~ error("A") + trend("A") + season("N")),
Damped = ETS(GDP ~ error("A") + trend("Ad") + season("N"))
)
chinese.fits.fc <- chinese.fits %>%
forecast(h=20) %>%
autoplot(china)
chinese.fits.fc
accuracy(chinese.fits)
## # A tibble: 3 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SES Training 210. 416. 213. 8.25 10.9 0.983 0.991 0.789
## 2 Holt Training 23.6 190. 96.0 1.36 7.72 0.443 0.453 0.00882
## 3 Damped Training 29.4 190. 94.8 1.86 7.56 0.437 0.454 -0.00684
china %>%
features(GDP, features = guerrero)
## # A tibble: 1 x 1
## lambda_guerrero
## <dbl>
## 1 -0.0345
#Lambda = -.0345
lambda <- china %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
china.box.cox <- china %>%
model(
SES = ETS(box_cox(GDP, lambda) ~ error("A") + trend("N") + season("N")),
Holt = ETS(box_cox(GDP, lambda) ~ error("A") + trend("A") + season("N")),
Damped = ETS(box_cox(GDP, lambda) ~ error("A") + trend("Ad") + season("N"))
)
china.box.coxfc <- china.box.cox %>%
forecast(h=20) %>%
autoplot(china)
china.box.coxfc
aus_production
## # A tsibble: 218 x 7 [1Q]
## Quarter Beer Tobacco Bricks Cement Electricity Gas
## <qtr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1956 Q1 284 5225 189 465 3923 5
## 2 1956 Q2 213 5178 204 532 4436 6
## 3 1956 Q3 227 5297 208 561 4806 7
## 4 1956 Q4 308 5681 197 570 4418 6
## 5 1957 Q1 262 5577 187 529 4339 5
## 6 1957 Q2 228 5651 214 604 4811 7
## 7 1957 Q3 236 5317 227 603 5259 7
## 8 1957 Q4 320 6152 222 582 4735 6
## 9 1958 Q1 272 5758 199 554 4608 5
## 10 1958 Q2 233 5641 229 620 5196 7
## # ... with 208 more rows
## # i Use `print(n = ...)` to see more rows
gas <- aus_production %>%
select(Gas)
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`
fit.gas <- gas %>%
model(ets = ETS(Gas))
fit.gas
## # A mable: 1 x 1
## ets
## <model>
## 1 <ETS(M,A,M)>
# M,A,M is the reccommended ETS model
fit.gas2 <- gas %>%
model(ETS( Gas ~ error("M") + trend("A") + season("M")))
accuracy(fit.gas2)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\"M~ Trai~ -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
#rmse = 4.60
fit.gas.fc <- fit.gas2 %>%
forecast(h=36) %>%
autoplot(gas)
fit.gas.fc
#Multiplicative seasonality is necessary here because our variance is increasing! Clearly seen in the plot of gas.
damp.gas <- gas %>%
model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))
accuracy(damp.gas)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\~ Trai~ -0.00439 4.59 3.03 0.326 4.10 0.544 0.606 -0.0217
#rmse = 4.59
#The rmse from the damped trend model is lower than that of the non-damped, but the mae is higher.
damp.gas.fc <- damp.gas %>%
forecast(h=36) %>%
autoplot(gas)
damp.gas.fc
#I would say the forecast are about identical and the rmse values are really close to one another. So I would not say it improves the forecast no.
set.seed(241241241)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries %>% autoplot(Turnover)
#a
#Multiplicative seasonality will be necessary because the variance is increasing over time.
#b
multi.model <- myseries %>%
model(Multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")))
accuracy(multi.model)
## # A tibble: 1 x 12
## State Indus~1 .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victor~ Other ~ Multi~ Trai~ 1.05 20.2 14.0 0.205 2.91 0.379 0.424 -0.0892
## # ... with abbreviated variable name 1: Industry
#rmse = 20.2
multi.model.fc <- multi.model %>%
forecast(h=36) %>%
autoplot(myseries)
multi.model.fc
multi.model.damp <- myseries %>%
model(Multiplicative = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
accuracy(multi.model.damp)
## # A tibble: 1 x 12
## State Indus~1 .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victoria Other ~ Multi~ Trai~ 1.73 20.3 14.1 0.314 2.92 0.382 0.425 -0.115
## # ... with abbreviated variable name 1: Industry
#rmse = 20.3
multi.damp.fc <- multi.model.damp %>%
forecast(h=36) %>%
autoplot(myseries)
multi.damp.fc
#c
#The rmse and mae are lower without the damp than with the damp. So I'll go with the non-damp model.
#d
gg_tsresiduals(multi.model)
#The autocorrelation is significant. A ljung-box test would surely clear that up further even though it may not even be necessary given how blatant that is.
#The innovation residuals are not bad. There isn't a clear trend and it's close to being homoskedastic. The residuals appear to be close to a normal distribution maybe with a slight skew to the right.
#e
train.turnover <- myseries %>%
filter(year(Month) < 2011)
test.turnover <- myseries %>%
filter(year(Month) > 2010)
turnover.fit <- train.turnover %>%
model(
"Holt-Winters' Damped" = ETS(Turnover ~ error("M") +
trend("Ad") +
season("M")),
"Holt-Winters' Multiplicative" = ETS(Turnover ~ error("M") +
trend("A") +
season("M")),
"Seasonal Naïve Forecast" = SNAIVE(Turnover)
)
fc.turnover <- turnover.fit %>%
forecast(h = 36)
accuracy(fc.turnover, test.turnover)
## # A tibble: 3 x 12
## .model State Indus~1 .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Holt-Wint~ Vict~ Other ~ Test -34.6 72.2 57.0 -4.63 7.15 NaN NaN 0.672
## 2 Holt-Wint~ Vict~ Other ~ Test -71.2 111. 82.8 -9.20 10.5 NaN NaN 0.731
## 3 Seasonal ~ Vict~ Other ~ Test 17.6 60.6 51.1 2.06 6.23 NaN NaN 0.653
## # ... with abbreviated variable name 1: Industry
#No I could not. The seasonal naive model produced the lowest rmse and mae therefore I did not beat the seasonal naive.
#a
aus.trips <- tourism %>%
summarise(Trips = sum(Trips))
autoplot(aus.trips)
## Plot variable not specified, automatically selected `.vars = Trips`
#A small declining trend until around 2010 where it begins to increase pretty dramatically. There is definitely seasonality to the data, it spikes in Q1 and then decreases until the next Q1.
#b
stl.trips <- aus.trips %>%
model(STL(Trips)) %>%
components() %>%
as_tsibble() %>%
autoplot(season_adjust)
stl.trips
#c
aus.aadn <- aus.trips %>%
model(
decomposition_model(STL(Trips),
ETS(season_adjust ~ error("A") +
trend("Ad") +
season("N")
)
)
)
accuracy(aus.aadn)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "decomposition_model(~ Trai~ 103. 763. 576. 0.367 2.72 0.607 0.629 -0.0174
#rmse = 763, mae = 576
aus.aadn.fc <- aus.aadn %>%
forecast(h = "2 years") %>%
autoplot(aus.trips) + labs(title = "A,Ad,N Forecast")
aus.aadn.fc
#d
aus.aan <- aus.trips %>%
model(
decomposition_model(STL(Trips),
ETS(season_adjust ~ error("A") +
trend("A") +
season("N")
)
)
)
accuracy(aus.aan)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "decomposition_model(~ Trai~ 99.7 763. 574. 0.359 2.71 0.604 0.628 -0.0182
#rmse = 763, mae = 574
aus.aan.fc <- aus.aan %>%
forecast(h = "2 years") %>%
autoplot(aus.trips) + labs( title = "A,A,N Forecast")
aus.aan.fc
#e
aus.ets.model <- aus.trips %>%
model(ETS(Trips))
aus.ets.model
## # A mable: 1 x 1
## `ETS(Trips)`
## <model>
## 1 <ETS(A,A,A)>
#A,A,A
accuracy(aus.ets.model)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Trips) Training 105. 794. 604. 0.379 2.86 0.636 0.653 -0.00151
#rmse = 794
aus.ets.fc <- aus.ets.model %>%
forecast(h = "2 years") %>%
autoplot(aus.trips) + labs(title = "A,A,A Forecast")
aus.ets.fc
#f
#The A,A,N model gives the best in sample fits. It has the same rmse as the A,Ad,N but a lower mae than it.
#g
#The forecasts are honestly really similar for all 3, so I'm going to stick with the one with the lowest rmse. The A,A.N model.
#H
gg_tsresiduals(aus.aan)
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
#The autocorellation looks good with the exeption of lag 14, but that's pretty far out, so it's still good. The residuals are definetely skewed to the right. The residuals look close to white noise with some questionable points.
###Exercise 7
new.zeal <- aus_arrivals %>%
filter(Origin == 'NZ')
#a
autoplot(new.zeal)
## Plot variable not specified, automatically selected `.vars = Arrivals`
#Looks like it has a slightly increasing seasonal variation that levels off around 2005. An upward trend is present for sure.
#b
zeal.training <- new.zeal %>%
filter(year(Quarter) < 2011)
zeal.test <- new.zeal %>%
filter(year(Quarter) > 2010)
zeal.training %>%
model(
ETS(Arrivals ~ error("M") +
trend("A") +
season("M"))
) %>%
forecast(h="2 years") %>%
autoplot(level = NULL) +
autolayer(new.zeal, Arrivals) +
labs(title = "Train forecast comparison to the actual data.")
#c
#Multiplicative seasonality is necessary because we have an increasing variance.
#d
fit.zeal <- zeal.training %>%
model(
'ETS model' = ETS(Arrivals),
'Additive to a Log Transformed' = ETS(log(Arrivals) ~ error("A") +
trend("A") +
season("A")),
'Seasonal Naïve Method' = SNAIVE(Arrivals),
'STL decomposition applied to the log transformed' = decomposition_model(STL(log(Arrivals)),
ETS(season_adjust))
)
zeal.fc <- fit.zeal %>%
forecast(h= "2 years")
zeal.fc %>%
autoplot(level = NULL) +
autolayer(zeal.test, Arrivals) +
guides(colour=guide_legend(title="Forecast")) +
labs(title = "New Zealand Visitors Train Forecast Comparison to the Actual Data.")
#e
accuracy(zeal.fc, zeal.test)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2012 Q4
## # A tibble: 4 x 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Additive t~ NZ Test -20920. 27624. 20920. -6.78 6.78 NaN NaN -0.0169
## 2 ETS model NZ Test -10467. 18302. 12746. -3.27 4.16 NaN NaN -0.0857
## 3 Seasonal N~ NZ Test 5692. 14962. 14203. 2.27 4.97 NaN NaN -0.116
## 4 STL decomp~ NZ Test -22350. 28963. 22350. -7.28 7.28 NaN NaN -0.0118
#The seasonal naive method produced the lowest rmse.
Snaive.zeal <- zeal.training %>%
model(SNAIVE(Arrivals))
gg_tsresiduals(Snaive.zeal)
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
#We have autocorrelation from the beginning. The residuals do not appear to be normally distributed around 0. They also do not appear to be resemble white noise. This model does not look great., so I would say it does not pass the test.
#f
zeal.cv <- new.zeal %>%
slice(1:(n() - 3)) %>%
stretch_tsibble(.init = 36, .step = 3)
zeal.cv %>%
model(
'ETS model' = ETS(Arrivals),
'Additive to a Log Transformed' = ETS(log(Arrivals) ~ error("A") +
trend("A") +
season("A")),
'Seasonal Naïve Method' = SNAIVE(Arrivals),
'STL decomposition applied to the log transformed' = decomposition_model(STL(log(Arrivals)),
ETS(season_adjust))
) %>%
forecast(h = 3) %>%
accuracy(new.zeal)
## # A tibble: 4 x 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Additive to a ~ NZ Test -375. 14347. 11021. 0.200 5.83 0.741 0.746 0.309
## 2 ETS model NZ Test 4627. 15327. 11799. 2.23 6.45 0.793 0.797 0.283
## 3 Seasonal Naïve~ NZ Test 8244. 18768. 14422. 3.83 7.76 0.970 0.976 0.566
## 4 STL decomposit~ NZ Test 4252. 15618. 11873. 2.04 6.25 0.798 0.812 0.244
#Now the additive to a log transformed is giving the lowest rmse instead of the seasonal naive.
###Exercise 8
#a
cement <- aus_production %>%
select(Quarter, Cement) %>%
as_tsibble(index = Quarter)
autoplot(cement)
## Plot variable not specified, automatically selected `.vars = Cement`
cement.cv <- cement %>%
slice(1:(n()-4)) %>%
stretch_tsibble(.init = 5, .step = 1)
cement.model <- cement.cv %>%
model(ETS(Cement),
SNAIVE(Cement)) %>%
forecast(h = "1 year")
#b
cement.cv2 <- cement %>%
stretch_tsibble(.init = 5, .step = 4)
cement.model2 <- cement.cv2 %>%
model(ETS(Cement),
SNAIVE(Cement)) %>%
forecast(h= "1 year")
cement.accuracy <- accuracy(cement.model2, cement) %>%
mutate(MSE = (RMSE)^2) %>%
select(MSE, RMSE)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 3 observations are missing between 2010 Q3 and 2011 Q1
cement.accuracy
## # A tibble: 2 x 2
## MSE RMSE
## <dbl> <dbl>
## 1 9962. 99.8
## 2 18028. 134.
#Looks like our ETS model has the smaller mse and rmse so we'll go with that one. Looking at the data I would say that makes sense. The seasonal naive can't account for the clear upward trend of the data.