Do exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Forecasting: Principles and Practice. Please submit both your Rpubs link as well as attach the .rmd file with your code.
library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.3
## ✔ dplyr 1.1.0 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.0
## ✔ lubridate 1.9.2 ✔ fable 0.3.2
## ✔ ggplot2 3.4.1 ✔ fabletools 0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ readr 2.1.4 ✔ stringr 1.5.0
## ✔ purrr 1.0.1 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union() masks lubridate::union(), base::union()
Consider the the number of pigs slaughtered in Victoria, available in
the aus_livestock dataset.
head(aus_livestock)
## # A tsibble: 6 x 4 [1M]
## # Key: Animal, State [1]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory 2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory 2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory 2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory 1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory 2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory 1800
aus_livestock %>%
distinct(Animal)
## # A tibble: 7 × 1
## Animal
## <fct>
## 1 Bulls, bullocks and steers
## 2 Calves
## 3 Cattle (excl. calves)
## 4 Cows and heifers
## 5 Lambs
## 6 Pigs
## 7 Sheep
aus_livestock %>%
distinct(State)
## # A tibble: 8 × 1
## State
## <fct>
## 1 Australian Capital Territory
## 2 New South Wales
## 3 Northern Territory
## 4 Queensland
## 5 South Australia
## 6 Tasmania
## 7 Victoria
## 8 Western Australia
#filter data and preview it
vic_pigs <- aus_livestock %>%
filter(State=='Victoria', Animal=='Pigs') %>%
summarise(Count = sum(Count/1e3))
head(vic_pigs)
## # A tsibble: 6 x 2 [1M]
## Month Count
## <mth> <dbl>
## 1 1972 Jul 94.2
## 2 1972 Aug 106.
## 3 1972 Sep 96.5
## 4 1972 Oct 117.
## 5 1972 Nov 105.
## 6 1972 Dec 100.
tail(vic_pigs)
## # A tsibble: 6 x 2 [1M]
## Month Count
## <mth> <dbl>
## 1 2018 Jul 101.
## 2 2018 Aug 102.
## 3 2018 Sep 82.6
## 4 2018 Oct 101.
## 5 2018 Nov 98.5
## 6 2018 Dec 92.3
#model
fit <- vic_pigs %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
fc <- fit %>%
forecast(h = 4)
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3219175
##
## Initial states:
## l[0]
## 100.4949
##
## sigma^2: 87.4807
##
## AIC AICc BIC
## 6028.040 6028.084 6041.013
\(\alpha\) = 0.3219
\(\ell\) = 100.4949
As we can observe below, the simple exponential smoothing function that we applied to model these data follows the original graph one step ahead.
fc %>%
autoplot(vic_pigs) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit)) +
labs(y="Count", title="Number of Pigs in Victoria") +
guides(colour = "none")
Multiplier for 95% prediction interval, \(c\): 1.96
#four month forecast
fc <-fit %>% forecast(h = 4)
Computed 95% prediction interval
#get the mean
y_hat <- fc$.mean[1]
#get the residuals
aug_fit <- augment(fit)
#get standard dev using residuals
s <- sd(aug_fit$.resid)
# Calculate the 95% prediction intervals
upper_95 <- y_hat + (s * 1.96)
lower_95 <- y_hat - (s * 1.96)
#lower interval
lower_95
## [1] 76.87131
#upper interval
upper_95
## [1] 113.5024
R generated 95% prediction interval
# Determine the model forecast 95% intervals
fc_hilo <- fc %>% hilo()
# Output model interval values
fc_hilo$`95%`[1]
## <hilo[1]>
## [1] [76.85509, 113.5186]95
We can see that our results by hand and those of R are off by just a few decimals, thus they are practically identical.
Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
us_econ <- global_economy %>%
filter(Country == 'United States')
#sum(is.na(us_econ))
us_econ <- na.omit(us_econ)
us_econ %>% autoplot(Exports)+
labs(y = "% of GDP", title = "Exports: United States")
I have picked US for this time series. We can see an upward trend with 2
big dips throughout the years. We can also observe that this trend
starts to go down around the year 2004, but it is unclear whether this
is another dip as we don’t have data past 2017. Thus we can’t see if it
has gone back up.
# Estimate parameters
fit <- us_econ %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
#create the forecast
fc <- fit %>%
forecast(h = 5)
fc %>%
autoplot(us_econ) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit)) +
labs(y="% of GDP", title="Exports: United States") +
guides(colour = "none")
fit %>% accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States "ETS(Expo… Trai… 0.125 0.632 0.468 1.35 5.09 0.982 0.991 0.239
fit_compare <- us_econ %>%
model(
ANN = ETS (Exports ~ error("A") + trend("N") + season("N")),
AAN = ETS (Exports ~ error("A") + trend("A") + season("N"))
)
accuracy(fit_compare)
## # A tibble: 2 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States ANN Train… 0.125 0.632 0.468 1.35 5.09 0.982 0.991 0.239
## 2 United States AAN Train… 0.00170 0.621 0.476 -0.123 5.30 0.997 0.974 0.233
fit_compare %>%
forecast(h=4) %>%
autoplot(us_econ, level=NULL) +
labs(title="Forecast Comparison",
subtitle = "United States Exports")
Using the RMSE to compare the models, the AAN model provides a smaller
RMSE value than that of the ANN model suggesting AAN is the better
performing model of the two. The AAN performed better by approximately
0.011 over the ANN.
Calculated 95% prediction interval
#get the mean
y_hat <- fc$.mean[1]
#get the residuals
aug_fit <- augment(fit)
#get standard dev using residuals
s <- sd(aug_fit$.resid)
# Calculate the 95% prediction intervals
upper_95 <- y_hat + (s * 1.96)
lower_95 <- y_hat - (s * 1.96)
#lower interval
lower_95
## [1] 10.66541
#upper interval
upper_95
## [1] 13.11596
R generated 95% prediction interval
# Determine the model forecast 95% intervals
fc_hilo <- fc %>% hilo()
# Output model interval values
fc_hilo$`95%`[1]
## <hilo[1]>
## [1] [10.62928, 13.15209]95
While the two intervals are similar to the first decimal place (without rounding), they are not identical beyond that. The variance of the residuals generated using R use a more accurate critical value than the 1.96 used in our manual calculation in addition to degrees of freedom being taken into account in the R generated interval.
Forecast the Chinese GDP from the global_economy data
set using an ETS model. Experiment with the various options in the
ETS() function to see how much the forecasts change with
damped trend, or with a Box-Cox transformation. Try to develop an
intuition of what each is doing to the forecasts.
[Hint: use a relatively large value of h when
forecasting, so you can clearly see the differences between the various
options when plotting the forecasts.]
china <- global_economy %>%
filter(Country == "China")
china %>% autoplot(GDP) +
labs(title="Chinese GDP")
The Chinese GDP data shows a strong upward trend, with no evidence of a cycle or season. The activity resembles that of exponential growth. GDP is almost stagnant from the year 1960 up until what looks like the mid 1990s and then shows an incredible increase from there on wards with a minor stagnant point around 2015.
Let’s take a look at the components of these data:
#STL decomposition
dcmp <- china %>%
model(stl = STL(GDP))
components(dcmp) %>% autoplot()
A Box-Cox transformation may be beneficial in eliminating the non-constant variance shown in the Chinese GDP data.
#obtain optimal lambda for BoxCox transform
lambda <- china %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
fit <- china %>%
model(
# ETS
ETS = ETS(GDP),
# Log Transformation
`Log` = ETS(log(GDP)),
# Damped Model
`Damped` = ETS(GDP ~ trend("Ad")),
# Box-Cox Transformation
`Box-Cox` = ETS(box_cox(GDP, lambda)),
# Damped Model w Box-Cox Transformation
`Box-Cox, Damped` = ETS(box_cox(GDP, lambda) ~ trend("Ad"))
)
fit %>%
forecast(h="20 years") %>%
autoplot(china, level = NULL)+
labs(title="20 Year Forecast",
subtitle= "Chinese GDP") +
guides(colour = guide_legend(title = "Forecast"))
Based on the plot, our Box-Cox and Log transformed forecast appear slightly to over-forecast for the Chinese GDP data. The generic ETS model falls between our damped models which are simple damped model of the ETS, and a Box-Cox with dampening. The dampened forecast plots exhibit slower growth than the transformed forecasts where the transformed forecasts resemble continued exponential growth.
Find an ETS model for the Gas data from aus_production
and forecast the next few years. Why is multiplicative seasonality
necessary here? Experiment with making the trend damped. Does it improve
the forecasts?
aus_production %>% autoplot(Gas)+
labs(title="Austrailian Gas Production")
Multiplicative seasonality is needed in this case because there is seasonal variation that increases with time.
fit <- aus_production %>%
model(
# Multiplicative
Multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
# Damped multiplicative
`Multiplicative, Damped` = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
)
fc <- fit %>% forecast(h = "5 years")
fc %>%
autoplot(aus_production, level = NULL) +
labs(title="Australian Gas Production") +
guides(colour = guide_legend(title = "Forecast"))
report(fit)
## # A tibble: 2 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Multiplicative 0.00324 -831. 1681. 1682. 1711. 21.1 32.2 0.0413
## 2 Multiplicative, Damped 0.00329 -832. 1684. 1685. 1718. 21.1 32.0 0.0417
Based on the AIC values derived from the report() of our
fit as well as our plot, there doesn’t seem to be much
difference between the damped and un-damped forecast. Both produce about
the same results, with very small difference in their AIC values where
the un-damped forecast performs slightly better than the damped by
3.09909.
Recall your retail time series data (from Exercise 8 in Section 2.10).
set.seed(123)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
a. Why is multiplicative seasonality necessary for this series?
As observed previously with the data from gas production, we can observe that these data have an increasing trend, seasonality as well as variation. As discussed earlier, the Multiplicative method is preferred when the seasonal variations are changing proportional to the level of the series.
#STL decomposition
dcmp3 <- myseries %>%
model(stl = STL(Turnover))
components(dcmp3) %>% autoplot()
we can also compare below the Multipliatie and Additive methods and conclude that the Multiplicative method is better.
fit5 <- myseries %>%
model(
"Additive" = ETS(Turnover ~ error('A') + trend('A') + season('A')),
"Multiplicative" = ETS(Turnover ~ error('M') + trend('A') + season('M'))
)
fc5 <- fit5 %>%
forecast(h = 10)
fc5 %>% autoplot(myseries, level = NULL) +
labs(title = "Household Goods Turnover") +
guides(colour = guide_legend(title = "Forecasts"))
accuracy(fit5)
## # A tibble: 2 × 12
## State Indus…¹ .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victo… Househ… Addit… Trai… -0.0163 26.4 20.0 -0.336 4.04 0.517 0.529 0.244
## 2 Victo… Househ… Multi… Trai… 2.13 24.3 18.3 0.125 3.40 0.473 0.488 0.316
## # … with abbreviated variable name ¹Industry
b. Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
fit6 <- myseries %>%
model(
"Holt-Winter" = ETS(Turnover ~ error('M') + trend('Ad') + season('M')),
"Damped Holt's Method" = ETS(Turnover ~ error('A') + trend('Ad') + season('M'))
)
fc6 <- fit6 %>%
forecast(h = 15)
fc6 %>% autoplot(myseries, level = NULL) +
labs(title = "Household Goods Turnover") +
guides(colour = guide_legend(title = "Forecasts"))
c. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
In this case the damped methods seems to be slightly better than the Holt-Winter method, and they both clearly outperform the Multiplicative method in our previous example.
accuracy(fit6)
## # A tibble: 2 × 12
## State Indus…¹ .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victoria Househ… Holt-… Trai… 2.37 23.7 17.1 0.331 3.12 0.441 0.475 0.0862
## 2 Victoria Househ… Dampe… Trai… 2.35 23.6 16.9 0.258 3.16 0.436 0.472 0.0250
## # … with abbreviated variable name ¹Industry
d. Check that the residuals from the best method look like white noise.
It seems that the residuals on the time plot show a slight increase in variability from the year 2000 and on. We can also observe that there is some correlation on the ACF of the residuals but the histogram seems to be close to normal.
best_fit <- myseries %>%
model(
"Damped Holt's Method" = ETS(Turnover ~ error('A') + trend('Ad') + season('M'))
)
best_fc <- best_fit %>%
forecast(h = 15)
best_fit %>% gg_tsresiduals()
e. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?
myseries_train <- myseries %>%
filter(year(Month) < 2011)
fit_train <- myseries_train %>%
model(
"SNAIVE" = SNAIVE(Turnover),
"Damped Holt's Method" = ETS(Turnover ~ error('A') + trend('Ad') + season('M'))
)
fc_train <- fit_train %>%
forecast(h = 15)
fc_train %>% autoplot(myseries_train, level = NULL) +
labs(title = "Household Goods Turnover") +
guides(colour = guide_legend(title = "Forecasts"))
fc_train %>% accuracy(myseries)
## # A tibble: 2 × 12
## .model State Indus…¹ .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damped Ho… Vict… Househ… Test -28.2 41.8 33.6 -3.39 3.92 0.947 0.918 0.660
## 2 SNAIVE Vict… Househ… Test 33.7 42.3 36.0 3.73 4.00 1.02 0.928 0.411
## # … with abbreviated variable name ¹Industry
We can conclude from the above graph and accuracy metrics that the damped method is superior to the seasonal naive approach.
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
#lambda for boxcox
lambda2 <- myseries_train %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
#boxcox transformation
myseries_train_bx <- myseries_train
myseries_train_bx$Turnover <- box_cox(myseries_train$Turnover,lambda2)
As we can observe below, the Box-Cox transformation made the seasonality a lot more constant as well as the variability in the remainder.
#STL decomposition
dcmp4 <- myseries_train_bx %>%
model(stl = STL(Turnover))
components(dcmp4) %>% autoplot()
fit_train2 <- myseries_train_bx %>%
model(
"Damped Holt's Method" = ETS(Turnover ~ error('A') + trend('Ad') + season('M'))
)
fc_train2 <- fit_train2 %>%
forecast(h = 15)
fc_train2 %>% autoplot(myseries_train_bx, level = NULL) +
labs(title = "Household Goods Turnover") +
guides(colour = guide_legend(title = "Forecasts"))
accuracy(fit_train)
## # A tibble: 2 × 12
## State Indus…¹ .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victor… Househ… SNAIVE Trai… 25.1 45.6 35.4 5.05 7.29 1 1 0.695
## 2 Victor… Househ… Dampe… Trai… 2.12 20.3 14.8 0.315 3.33 0.417 0.445 -0.0345
## # … with abbreviated variable name ¹Industry
accuracy(fit_train2)
## # A tibble: 1 × 12
## State Indus…¹ .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victo… Househ… Dampe… Trai… 0.0255 0.240 0.182 0.143 1.16 0.435 0.466 0.00910
## # … with abbreviated variable name ¹Industry
According to our accuracy metrics above, it appears that the model improved with the Box-Cox transformation and it now outperforms the best version of our model in the previous exercise. However, I believe we would have to scale the data back to their original form to make a more accurate comparison.
Based on the RMSE values, the Box-Cox STL model is the best performing out of the three with an RMSE of 0.048.