library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## âś” tibble      3.2.1     âś” tsibble     1.1.4
## âś” dplyr       1.1.2     âś” tsibbledata 0.4.1
## âś” tidyr       1.3.0     âś” feasts      0.3.1
## âś” lubridate   1.9.2     âś” fable       0.3.3
## âś” ggplot2     3.4.4     âś” fabletools  0.3.4
## Warning: package 'tsibble' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## âś– lubridate::date()    masks base::date()
## âś– dplyr::filter()      masks stats::filter()
## âś– tsibble::intersect() masks base::intersect()
## âś– tsibble::interval()  masks lubridate::interval()
## âś– dplyr::lag()         masks stats::lag()
## âś– tsibble::setdiff()   masks base::setdiff()
## âś– tsibble::union()     masks base::union()
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

Ask

Do exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Hyndman. Please submit both the link to your Rpubs and the .pdf file with your run code


8.1 - Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

pigs <- aus_livestock |>
  filter(State == "Victoria" & Animal == "Pigs")

head(pigs)
autoplot(pigs)
## Plot variable not specified, automatically selected `.vars = Count`

Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of a and l_0, and generate forecasts for the next four months.

# Estimate parameters
fit <- pigs |>
  model(ETS(Count ~ error("A") + trend("N") + season("N")))
fit |>
  report()
## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

The optimal values for alpha is 0.3221247. This means that about 32% of the forecast is based on the most recent observationand initial states is 100646.6.

fc <- fit |>
  forecast(h = 4)

fc |>
  autoplot(pigs) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit)) +
  labs(y="Count", title="Victorian Pigs Slaughtered") +
  guides(colour = "none")

The forecast for the next four months is shown above.

Compute a 95% prediction interval for the first forecast using y +- 1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

fc
y_hat <- fc$.mean[1]
resid(fit)
fit_df <- augment(fit)
sd<- sd(fit_df$.resid)
upper <- y_hat + (1.96 * sd)
lower <- y_hat - (1.96 * sd)
c(lower,upper)
## [1]  76871.01 113502.10
fc_hilo <- fc %>% hilo()
fc_hilo$`95%`[1]
## <hilo[1]>
## [1] [76854.79, 113518.3]95

The hilo function and my computed values differ but do not have a significant difference.

8.5 Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

JA <- global_economy |>
  filter(Country == "Jamaica")
head(JA)

Plot the Exports series and discuss the main features of the data.

JA |>
  autoplot(Exports) +
  labs(x= "Year", y="Exports", title="Jamaican Exports") +
  guides(colour = "none")

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

fit <- JA |>
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))
fit |>
  report()
## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.7074404 
## 
##   Initial states:
##      l[0]
##  33.26479
## 
##   sigma^2:  29.5677
## 
##      AIC     AICc      BIC 
## 435.8980 436.3424 442.0793
fc <- fit |>
  forecast(h = 4)

fc |>
  autoplot(JA) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit)) +
  labs(y="Exports", title="Jamaican Exports") +
  guides(colour = "none")

Compute the RMSE values for the training data.

accuracy(fit)

The RMSE value for the model is 5.343044.

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

models <- JA |>
  model(
    AAN = ETS(Exports ~ error("A") + trend("A") + season("N")),
    ANN = ETS(Exports ~ error("A") + trend("N") + season("N"))
    )
accuracy(models)

The AAN model had a higher RMSE than the ANN model.

Compare the forecasts from both methods. Which do you think is best?

fc <- models |>
  forecast(h= 8)
fc |>
  autoplot(JA, level = NULL)

The AAN model shows an increase in the amount of Exports in its forecast. This is more plausible than the ANN model which shows no increase or decrease over the forecast.

Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

Fit 1

fit1 <- JA |>
  model(
    AAN = ETS(Exports ~ error("A") + trend("A") + season("N"))
    )
accuracy(fit1)
fc1 <- fit1 |>
  forecast(h= 8)
fc1 |>
  autoplot(JA, level = NULL)

fc1
y_hat_1 <- fc1$.mean[1]
resid(fit1)
fit_df <- augment(fit1)
sd<- sd(fit_df$.resid)
upper <- y_hat_1 + (1.96 * sd)
lower <- y_hat_1 - (1.96 * sd)
c(lower,upper)
## [1] 22.62591 43.76015
fc_hilo <- fc1 %>% hilo()
fc_hilo$`95%`[1]
## <hilo[1]>
## [1] [22.33654, 44.04952]95

Fit 2

fit2 <- JA |>
  model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N"))
    )
accuracy(fit2)
fit2
fc2 <- fit2 |>
  forecast(h= 8)
fc2 |>
  autoplot(JA, level = NULL)

fc2
y_hat_2 <- fc2$.mean[1]
head(resid(fit2))
fit_df <- augment(fit2)
sd<- sd(fit_df$.resid)
upper <- y_hat_2 + (1.96 * sd)
lower <- y_hat_2 - (1.96 * sd)
upper
## [1] 43.72768
lower
## [1] 22.60003

The upper value is 43.7276848 and the lower value is 22.6000288.

Using the hilo function the results differ from my computed values by less than 0.1.

fc_hilo <- fc2 %>% hilo()
fc_hilo$`95%`[1]
## <hilo[1]>
## [1] [22.50632, 43.82139]95

Fit1 had an R computed interval of [22.33654, 44.04952]. I calculated for the first model [22.62591, 43.76015]. My interval for the second fit was [22.60003, 43.72768]. Fit 2 had an R computed interval of [22.50632, 43.82139].

My computed intervals were close to the R computed value.

8.6 Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

global_economy |>
  filter(Country == "China") |>
  autoplot(GDP)

Before using BoxCox, we can use the guerrero formula to determine the optimal lambda value.

lambda <- global_economy |>
  filter(Country == "China") |>
  features(GDP, features = guerrero) |>
  pull(lambda_guerrero)
fit <- global_economy |>
  filter(Country == "China") |>
  model(
    # ETS
    ETS = ETS(GDP),
    `Damped` = ETS(GDP ~ trend("Ad")),
    `Box-Cox` = ETS(box_cox(GDP, lambda))
)

CH <- global_economy |>
  filter(Country == "China")  
fit %>%
  forecast(h=10) %>%
  autoplot(CH)

8.7 Find an ETS model for the Gas data from aus_production and forecast the next few years.

aus_production |>
  autoplot(Gas)

Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

Multiplicative seasonality is needed because there are changes to the heights of seasonal periods over time.

fit <- aus_production |>
  model(
    Multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
    Damped = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
  )

fc <- fit %>% forecast(h = "5 years")

fc %>%
  autoplot(aus_production, level = NULL) +
  labs(title="Australian Gas Production") +
  guides(colour = guide_legend(title = "Forecast"))

accuracy(fit )

As shown in the table above, the Damped model shows a better result based on the RMSE than the Multiplicative model.

8.8 Recall your retail time series data (from Exercise 7 in Section 2.10).

set.seed(811)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

Why is multiplicative seasonality necessary for this series?

It accounts for the variance over time.

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

fit <- myseries %>%
  model(
    `Holt-Winters’ Multiplicative` = ETS(Turnover ~ error("M") + trend("A") +
                                                season("M")),
    `Holt-Winters’ Damped Multiplicative` = ETS(Turnover ~ error("M") + trend("Ad") +
                                                season("M"))
  )
fc <- fit %>% forecast(h = "5 years")
fc %>%
  autoplot(myseries, level = NULL) +
  labs(title="Australian Department Stores",
       y="Turnover") +
  guides(colour = guide_legend(title = "Forecast"))

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

accuracy(fit)

The RMSE of the model with least RMSE value is the Holt-Winters’ Damped Multiplicative method.

Check that the residuals from the best method look like white noise.

fit |>
  select("Holt-Winters’ Damped Multiplicative") |>
  gg_tsresiduals()

There is no clear pattern in the residuals plot. However, the ACF plot shows lag values which are outside of the dashed lines. Additionally the re

Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naĂŻve approach from Exercise 7 in Section 5.11?

myseries_train <- myseries |>
  filter(year(Month) < 2011)

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

# seasonal naĂŻve
fit <- myseries_train %>%
  model(
    "Holt-Winters' Damped" = ETS(Turnover ~ error("M") + trend("Ad") +
                                            season("M")),
    "Holt-Winters' Multiplicative" = ETS(Turnover ~ error("M") + trend("A") +
                                                    season("M"))
    )

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

accuracy(fit)

The multiplicative method had the lower RMSE value, so I would prefer to use the multiplicatice model.

Check that the residuals from the best method look like white noise.

fit |>
  select("Holt-Winters' Multiplicative") |>
  gg_tsresiduals()

The model shows residuls with two values outside of the dashed lines, but it is less than 5% so the residuals are white noise. Additionally, the residuals do appear to have a normal distribution.

8.9 For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

Again, to get the optimal lambda value, I used the guerrero feature.

lambda <- myseries_train |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)

Box_cox_train <- myseries_train |>
  mutate(
    Turnover_Box_Cox = box_cox(Turnover, lambda)
  )
fit <- Box_cox_train |>
  model(
    'STL' = STL(Turnover_Box_Cox ~ season(window = "periodic"),
             robust = T),
    'ETS' = ETS(Turnover_Box_Cox)
  )

The accuracy of the above fit is shown below.

accuracy(fit)

The previous best fit (from the Holt-Winters’ Multiplicative model) had a RMSE value of 1.069499. These models both performed better than the Multiplicative model I produced in the previous exercise.