Homework 5: Exponential Smoothing

Instructions

Do exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Forecasting: Principles and Practice. Please submit both your Rpubs link as well as attach the .rmd file with your code.

Packages

library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.1.8     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.0     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.0
## ✔ lubridate   1.9.2     ✔ fable       0.3.2
## ✔ ggplot2     3.4.1     ✔ fabletools  0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ readr   2.1.4     ✔ stringr 1.5.0
## ✔ purrr   1.0.1     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ tsibble::intersect()     masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ tsibble::setdiff()       masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union()         masks lubridate::union(), base::union()

Exercises

8.1

Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

  1. Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of \(\alpha\) and \(\ell_0\), and generate forecasts for the next four months.
head(aus_livestock)
## # A tsibble: 6 x 4 [1M]
## # Key:       Animal, State [1]
##      Month Animal                     State                        Count
##      <mth> <fct>                      <fct>                        <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory  2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory  2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory  2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory  1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory  2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory  1800
aus_livestock %>%
  distinct(Animal)
## # A tibble: 7 × 1
##   Animal                    
##   <fct>                     
## 1 Bulls, bullocks and steers
## 2 Calves                    
## 3 Cattle (excl. calves)     
## 4 Cows and heifers          
## 5 Lambs                     
## 6 Pigs                      
## 7 Sheep
aus_livestock %>%
  distinct(State)
## # A tibble: 8 × 1
##   State                       
##   <fct>                       
## 1 Australian Capital Territory
## 2 New South Wales             
## 3 Northern Territory          
## 4 Queensland                  
## 5 South Australia             
## 6 Tasmania                    
## 7 Victoria                    
## 8 Western Australia
#filter data and preview it
vic_pigs <- aus_livestock %>% 
  filter(State=='Victoria', Animal=='Pigs') %>% 
  summarise(Count = sum(Count/1e3))

head(vic_pigs)
## # A tsibble: 6 x 2 [1M]
##      Month Count
##      <mth> <dbl>
## 1 1972 Jul  94.2
## 2 1972 Aug 106. 
## 3 1972 Sep  96.5
## 4 1972 Oct 117. 
## 5 1972 Nov 105. 
## 6 1972 Dec 100.
tail(vic_pigs)
## # A tsibble: 6 x 2 [1M]
##      Month Count
##      <mth> <dbl>
## 1 2018 Jul 101. 
## 2 2018 Aug 102. 
## 3 2018 Sep  82.6
## 4 2018 Oct 101. 
## 5 2018 Nov  98.5
## 6 2018 Dec  92.3
#model
fit <- vic_pigs %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N")))
fc <- fit %>%
  forecast(h = 4)

report(fit)
## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3219175 
## 
##   Initial states:
##      l[0]
##  100.4949
## 
##   sigma^2:  87.4807
## 
##      AIC     AICc      BIC 
## 6028.040 6028.084 6041.013

\(\alpha\) = 0.3219
\(\ell\) = 100.4949

As we can observe below, the simple exponential smoothing function that we applied to model these data follows the original graph one step ahead.

fc %>%
  autoplot(vic_pigs) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit)) +
  labs(y="Count", title="Number of Pigs in Victoria") +
  guides(colour = "none")

  1. Compute a 95% prediction interval for the first forecast using \(\hat{y} \pm 1.96s\) where \(s\) is the standard deviation of the residuals. Compare your interval with the interval produced by R.

Multiplier for 95% prediction interval, \(c\): 1.96

#four month forecast
fc <-fit %>% forecast(h = 4)

Computed 95% prediction interval

#get the mean
y_hat <- fc$.mean[1]

#get the residuals
aug_fit <- augment(fit)

#get standard dev using residuals
s <- sd(aug_fit$.resid)

# Calculate the 95% prediction intervals
upper_95 <- y_hat + (s * 1.96)
lower_95 <- y_hat - (s * 1.96)


#lower interval
lower_95
## [1] 76.87131
#upper interval
upper_95
## [1] 113.5024

R generated 95% prediction interval

# Determine the model forecast 95% intervals
fc_hilo <- fc %>% hilo()

# Output model interval values
fc_hilo$`95%`[1]
## <hilo[1]>
## [1] [76.85509, 113.5186]95

We can see that our results by hand and those of R are off by just a few decimals, thus they are practically identical.

8.5

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

us_econ <- global_economy %>% 
  filter(Country == 'United States') 

#sum(is.na(us_econ))

us_econ <- na.omit(us_econ)
  1. Plot the Exports series and discuss the main features of the data.
us_econ %>% autoplot(Exports)+
  labs(y = "% of GDP", title = "Exports: United States")

I have picked US for this time series. We can see an upward trend with 2 big dips throughout the years. We can also observe that this trend starts to go down around the year 2004, but it is unclear whether this is another dip as we don’t have data past 2017. Thus we can’t see if it has gone back up.

  1. Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.
# Estimate parameters
fit <- us_econ %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))

#create the forecast
fc <- fit %>%
  forecast(h = 5)
fc %>%
  autoplot(us_econ) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit)) +
  labs(y="% of GDP", title="Exports: United States") +
  guides(colour = "none")

  1. Compute the RMSE values for the training data.
fit %>% accuracy()
## # A tibble: 1 × 11
##   Country       .model     .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <fct>         <chr>      <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States "ETS(Expo… Trai… 0.125 0.632 0.468  1.35  5.09 0.982 0.991 0.239
  1. Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.
fit_compare <- us_econ %>%
  model(
    ANN = ETS (Exports ~ error("A") + trend("N") + season("N")),
    AAN = ETS (Exports ~ error("A") + trend("A") + season("N"))
    )
accuracy(fit_compare)
## # A tibble: 2 × 11
##   Country       .model .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <fct>         <chr>  <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States ANN    Train… 0.125   0.632 0.468  1.35   5.09 0.982 0.991 0.239
## 2 United States AAN    Train… 0.00170 0.621 0.476 -0.123  5.30 0.997 0.974 0.233
  1. Compare the forecasts from both methods. Which do you think is best?
fit_compare %>% 
  forecast(h=4) %>% 
  autoplot(us_econ, level=NULL) +
  labs(title="Forecast Comparison",
       subtitle = "United States Exports")

Using the RMSE to compare the models, the AAN model provides a smaller RMSE value than that of the ANN model suggesting AAN is the better performing model of the two. The AAN performed better by approximately 0.011 over the ANN.

  1. Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

Calculated 95% prediction interval

#get the mean
y_hat <- fc$.mean[1]

#get the residuals
aug_fit <- augment(fit)

#get standard dev using residuals
s <- sd(aug_fit$.resid)

# Calculate the 95% prediction intervals
upper_95 <- y_hat + (s * 1.96)
lower_95 <- y_hat - (s * 1.96)

#lower interval
lower_95
## [1] 10.66541
#upper interval
upper_95
## [1] 13.11596

R generated 95% prediction interval

# Determine the model forecast 95% intervals
fc_hilo <- fc %>% hilo()

# Output model interval values
fc_hilo$`95%`[1]
## <hilo[1]>
## [1] [10.62928, 13.15209]95

While the two intervals are similar to the first decimal place (without rounding), they are not identical beyond that. The variance of the residuals generated using R use a more accurate critical value than the 1.96 used in our manual calculation in addition to degrees of freedom being taken into account in the R generated interval.

8.6

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

china <- global_economy %>%
  filter(Country == "China")

china %>% autoplot(GDP) +
  labs(title="Chinese GDP")

The Chinese GDP data shows a strong upward trend, with no evidence of a cycle or season. The activity resembles that of exponential growth. GDP is almost stagnant from the year 1960 up until what looks like the mid 1990s and then shows an incredible increase from there on wards with a minor stagnant point around 2015.

Let’s take a look at the components of these data:

#STL decomposition
dcmp <- china %>%
  model(stl = STL(GDP))

components(dcmp) %>% autoplot()

A Box-Cox transformation may be beneficial in eliminating the non-constant variance shown in the Chinese GDP data.

#obtain optimal lambda for BoxCox transform
lambda <- china %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)
fit <- china %>% 
  model(
    # ETS
    ETS = ETS(GDP),
    # Log Transformation
    `Log` = ETS(log(GDP)),
    # Damped Model
    `Damped` = ETS(GDP ~ trend("Ad")),
    # Box-Cox Transformation
    `Box-Cox` = ETS(box_cox(GDP, lambda)),
    # Damped Model w Box-Cox Transformation
    `Box-Cox, Damped` = ETS(box_cox(GDP, lambda) ~ trend("Ad"))
)

fit %>%
  forecast(h="20 years") %>%
  autoplot(china, level = NULL)+
  labs(title="20 Year Forecast",
       subtitle= "Chinese GDP") +
  guides(colour = guide_legend(title = "Forecast"))

Based on the plot, our Box-Cox and Log transformed forecast appear slightly to over-forecast for the Chinese GDP data. The generic ETS model falls between our damped models which are simple damped model of the ETS, and a Box-Cox with dampening. The dampened forecast plots exhibit slower growth than the transformed forecasts where the transformed forecasts resemble continued exponential growth.

8.7

Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

aus_production %>% autoplot(Gas)+
  labs(title="Austrailian Gas Production")

Multiplicative seasonality is needed in this case because there is seasonal variation that increases with time.

fit <- aus_production %>%
  model(
    # Multiplicative
    Multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
    # Damped multiplicative
    `Multiplicative, Damped` = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
  )
fc <- fit %>% forecast(h = "5 years")

fc %>%
  autoplot(aus_production, level = NULL) +
  labs(title="Australian Gas Production") +
  guides(colour = guide_legend(title = "Forecast"))

report(fit)
## # A tibble: 2 × 9
##   .model                  sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
##   <chr>                    <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Multiplicative         0.00324   -831. 1681. 1682. 1711.  21.1  32.2 0.0413
## 2 Multiplicative, Damped 0.00329   -832. 1684. 1685. 1718.  21.1  32.0 0.0417

Based on the AIC values derived from the report() of our fit as well as our plot, there doesn’t seem to be much difference between the damped and un-damped forecast. Both produce about the same results, with very small difference in their AIC values where the un-damped forecast performs slightly better than the damped by 3.09909.

8.8

Recall your retail time series data (from Exercise 8 in Section 2.10).

set.seed(123)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

a. Why is multiplicative seasonality necessary for this series?

As observed previously with the data from gas production, we can observe that these data have an increasing trend, seasonality as well as variation. As discussed earlier, the Multiplicative method is preferred when the seasonal variations are changing proportional to the level of the series.

#STL decomposition
dcmp3 <- myseries %>%
  model(stl = STL(Turnover))

components(dcmp3) %>% autoplot()

we can also compare below the Multipliatie and Additive methods and conclude that the Multiplicative method is better.

fit5 <- myseries %>%
  model(
    "Additive" = ETS(Turnover ~ error('A') + trend('A') + season('A')),
    "Multiplicative" = ETS(Turnover ~ error('M') + trend('A') + season('M'))
  )

fc5 <- fit5 %>%
  forecast(h = 10)

fc5 %>% autoplot(myseries, level = NULL) +
  labs(title = "Household Goods Turnover") +
  guides(colour = guide_legend(title = "Forecasts"))

accuracy(fit5)
## # A tibble: 2 × 12
##   State  Indus…¹ .model .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>   <chr>  <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victo… Househ… Addit… Trai… -0.0163  26.4  20.0 -0.336  4.04 0.517 0.529 0.244
## 2 Victo… Househ… Multi… Trai…  2.13    24.3  18.3  0.125  3.40 0.473 0.488 0.316
## # … with abbreviated variable name ¹​Industry

b. Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

fit6 <- myseries %>%
  model(
    "Holt-Winter" = ETS(Turnover ~ error('M') + trend('Ad') + season('M')),
    "Damped Holt's Method" = ETS(Turnover ~ error('A') + trend('Ad') + season('M'))
  )

fc6 <- fit6 %>%
  forecast(h = 15)

fc6 %>% autoplot(myseries, level = NULL) +
  labs(title = "Household Goods Turnover") +
  guides(colour = guide_legend(title = "Forecasts"))

c. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

In this case the damped methods seems to be slightly better than the Holt-Winter method, and they both clearly outperform the Multiplicative method in our previous example.

accuracy(fit6)
## # A tibble: 2 × 12
##   State    Indus…¹ .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>    <chr>   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Victoria Househ… Holt-… Trai…  2.37  23.7  17.1 0.331  3.12 0.441 0.475 0.0862
## 2 Victoria Househ… Dampe… Trai…  2.35  23.6  16.9 0.258  3.16 0.436 0.472 0.0250
## # … with abbreviated variable name ¹​Industry

d. Check that the residuals from the best method look like white noise.

It seems that the residuals on the time plot show a slight increase in variability from the year 2000 and on. We can also observe that there is some correlation on the ACF of the residuals but the histogram seems to be close to normal.

best_fit <- myseries %>%
  model(
    "Damped Holt's Method" = ETS(Turnover ~ error('A') + trend('Ad') + season('M'))
  )

best_fc <- best_fit %>%
  forecast(h = 15)

best_fit %>% gg_tsresiduals()

e. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

fit_train <- myseries_train %>%
  model(
    "SNAIVE" = SNAIVE(Turnover),
    "Damped Holt's Method" = ETS(Turnover ~ error('A') + trend('Ad') + season('M'))
  )

fc_train <- fit_train %>%
  forecast(h = 15)

fc_train %>% autoplot(myseries_train, level = NULL) +
  labs(title = "Household Goods Turnover") +
  guides(colour = guide_legend(title = "Forecasts"))

fc_train %>% accuracy(myseries)
## # A tibble: 2 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damped Ho… Vict… Househ… Test  -28.2  41.8  33.6 -3.39  3.92 0.947 0.918 0.660
## 2 SNAIVE     Vict… Househ… Test   33.7  42.3  36.0  3.73  4.00 1.02  0.928 0.411
## # … with abbreviated variable name ¹​Industry

We can conclude from the above graph and accuracy metrics that the damped method is superior to the seasonal naive approach.

8.9

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

#lambda for boxcox
lambda2 <- myseries_train %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)
#boxcox transformation
myseries_train_bx <- myseries_train
myseries_train_bx$Turnover <- box_cox(myseries_train$Turnover,lambda2)

As we can observe below, the Box-Cox transformation made the seasonality a lot more constant as well as the variability in the remainder.

#STL decomposition
dcmp4 <- myseries_train_bx %>%
  model(stl = STL(Turnover))

components(dcmp4) %>% autoplot()

fit_train2 <- myseries_train_bx %>%
  model(
    "Damped Holt's Method" = ETS(Turnover ~ error('A') + trend('Ad') + season('M'))
  )

fc_train2 <- fit_train2 %>%
  forecast(h = 15)

fc_train2 %>% autoplot(myseries_train_bx, level = NULL) +
  labs(title = "Household Goods Turnover") +
  guides(colour = guide_legend(title = "Forecasts"))

accuracy(fit_train)
## # A tibble: 2 × 12
##   State   Indus…¹ .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>   <chr>   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Victor… Househ… SNAIVE Trai… 25.1   45.6  35.4 5.05   7.29 1     1      0.695 
## 2 Victor… Househ… Dampe… Trai…  2.12  20.3  14.8 0.315  3.33 0.417 0.445 -0.0345
## # … with abbreviated variable name ¹​Industry
accuracy(fit_train2)
## # A tibble: 1 × 12
##   State  Indus…¹ .model .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr>   <chr>  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Victo… Househ… Dampe… Trai… 0.0255 0.240 0.182 0.143  1.16 0.435 0.466 0.00910
## # … with abbreviated variable name ¹​Industry

According to our accuracy metrics above, it appears that the model improved with the Box-Cox transformation and it now outperforms the best version of our model in the previous exercise. However, I believe we would have to scale the data back to their original form to make a more accurate comparison.

Based on the RMSE values, the Box-Cox STL model is the best performing out of the three with an RMSE of 0.048.