Load libraries

library(tidyverse)
library(fpp3)

8.1 Exercises

Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0, and generate forecasts for the next four months.

The optimal value for alpha is .3221247 and l is 100646.6.

# filter for State Victoria and animal as Pigs
pigs <- aus_livestock %>%
  filter(Animal == "Pigs" & State == "Victoria")

autoplot(pigs) +
  labs(title = "Pigs slaughtered in Victoria")

## Plot variable not specified, automatically selected `.vars = Count`

fit <- pigs %>%
  model(ANN = ETS(Count ~ error("A") + trend("N") + season("N")))

report(fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

Compute a 95% prediction interval for the first forecast using y ± 1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

The interval for 95% prediction in r and the manual calculation differs slightly.

pigs_fc <- pigs %>%
 model(ANN = ETS(Count ~ error("A") + trend("N") + season("N"))) %>%
  forecast( h = 5) %>%
  hilo(level = 95) %>%
  mutate(lower = `95%`$lower, upper=`95%`$upper) %>%
  pull(lower, upper) %>%
  head(1)

pigs_fc

## 113518.325972343 
##         76854.79

pigs_fc <- pigs %>%
 model(ANN = ETS(Count ~ error("A") + trend("N") + season("N"))) %>%
  forecast( h = 5) 
pigs_fc

## # A fable: 5 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model    Month             Count  .mean
##   <fct>  <fct>    <chr>     <mth>            <dist>  <dbl>
## 1 Pigs   Victoria ANN    2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria ANN    2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria ANN    2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria ANN    2019 Apr N(95187, 1.1e+08) 95187.
## 5 Pigs   Victoria ANN    2019 May N(95187, 1.2e+08) 95187.

# mean +- 1.96 * s
mean = 95186.56
s = sqrt(87480760)

lower = mean - (1.96 * s)
upper = mean + (1.96 * s)

lower

## [1] 76854.45

upper

## [1] 113518.7

8.5 Exercises

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

Plot the Exports series and discuss the main features of the data.

There was a decline in exports in Peru from 1960 to 1978 and then in had an increase in exports for a few years before you saw a decline from 1988 to 2000 before it increased again.

 global_economy %>%
  filter(Country == "Peru") %>%
  autoplot(Exports) +
  labs(title = "Peru Exports")

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

peru_fit <- global_economy %>%
  filter(Country == "Peru") %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N"))) 

peru_fc <- peru_fit %>%
  forecast(h = 10)

peru_fc %>%
  autoplot(global_economy)

Compute the RMSE values for the training data.

accuracy(peru_fit)$RMSE

## [1] 2.862248

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

Comparing the two ETS model the ETS(A,N,N) has more of a flat straight line compared to ETS(A,A,N) which is slightly pivoted up for the forecast.

peru_fit2 <- global_economy %>%
  filter(Country == "Peru") %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N"))) 

peru_fc2 <- peru_fit2 %>%
  forecast(h = 10)

peru_fc2 %>%
  autoplot(global_economy)

accuracy(peru_fit2)$RMSE

## [1] 2.862429

Compare the forecasts from both methods. Which do you think is best?

The RMSE is better in the ETS(A,N, N) than in the ETS(A,A,N)

Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

THe RMSE values in the peru_fit model was slightly better than the peru_fit2 model.

peru_fc %>%
  hilo(level = 95) %>%
  mutate(lower = `95%`$lower, upper=`95%`$upper) %>%
  pull(lower, upper) %>%
  head(1)

## 29.9725591202677 
##         18.55416

report(peru_fit)

## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##    l[0]
##  20.199
## 
##   sigma^2:  8.4851
## 
##      AIC     AICc      BIC 
## 363.4922 363.9366 369.6735

peru_fc <- peru_fit %>%
  forecast(h = 10)
peru_fc

## # A fable: 10 x 5 [1Y]
## # Key:     Country, .model [1]
##    Country .model                                          Year    Exports .mean
##    <fct>   <chr>                                          <dbl>     <dist> <dbl>
##  1 Peru    "ETS(Exports ~ error(\"A\") + trend(\"N\") + …  2018 N(24, 8.5)  24.3
##  2 Peru    "ETS(Exports ~ error(\"A\") + trend(\"N\") + …  2019  N(24, 17)  24.3
##  3 Peru    "ETS(Exports ~ error(\"A\") + trend(\"N\") + …  2020  N(24, 25)  24.3
##  4 Peru    "ETS(Exports ~ error(\"A\") + trend(\"N\") + …  2021  N(24, 34)  24.3
##  5 Peru    "ETS(Exports ~ error(\"A\") + trend(\"N\") + …  2022  N(24, 42)  24.3
##  6 Peru    "ETS(Exports ~ error(\"A\") + trend(\"N\") + …  2023  N(24, 51)  24.3
##  7 Peru    "ETS(Exports ~ error(\"A\") + trend(\"N\") + …  2024  N(24, 59)  24.3
##  8 Peru    "ETS(Exports ~ error(\"A\") + trend(\"N\") + …  2025  N(24, 68)  24.3
##  9 Peru    "ETS(Exports ~ error(\"A\") + trend(\"N\") + …  2026  N(24, 76)  24.3
## 10 Peru    "ETS(Exports ~ error(\"A\") + trend(\"N\") + …  2027  N(24, 85)  24.3

# mean +- 1.96 * s
mean = 24.3
s = sqrt(8.84851)

lower2 = mean - (1.96 * s)
upper2 = mean + (1.96 * s)

lower2

## [1] 18.4697

upper2

## [1] 30.1303

peru_fc2 %>%
  hilo(level = 95) %>%
  mutate(lower = `95%`$lower, upper=`95%`$upper) %>%
  pull(lower, upper) %>%
  head(1)

## 30.137769072594 
##         18.5091

report(peru_fit2)

## Series: Exports 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
##     beta  = 0.0001002129 
## 
##   Initial states:
##      l[0]       b[0]
##  20.69266 0.06006188
## 
##   sigma^2:  8.8004
## 
##      AIC     AICc      BIC 
## 367.4995 368.6533 377.8017

peru_fc2 <- peru_fit2 %>%
  forecast(h = 10)
peru_fc2

## # A fable: 10 x 5 [1Y]
## # Key:     Country, .model [1]
##    Country .model                                          Year    Exports .mean
##    <fct>   <chr>                                          <dbl>     <dist> <dbl>
##  1 Peru    "ETS(Exports ~ error(\"A\") + trend(\"A\") + …  2018 N(24, 8.8)  24.3
##  2 Peru    "ETS(Exports ~ error(\"A\") + trend(\"A\") + …  2019  N(24, 18)  24.4
##  3 Peru    "ETS(Exports ~ error(\"A\") + trend(\"A\") + …  2020  N(24, 26)  24.4
##  4 Peru    "ETS(Exports ~ error(\"A\") + trend(\"A\") + …  2021  N(25, 35)  24.5
##  5 Peru    "ETS(Exports ~ error(\"A\") + trend(\"A\") + …  2022  N(25, 44)  24.6
##  6 Peru    "ETS(Exports ~ error(\"A\") + trend(\"A\") + …  2023  N(25, 53)  24.6
##  7 Peru    "ETS(Exports ~ error(\"A\") + trend(\"A\") + …  2024  N(25, 62)  24.7
##  8 Peru    "ETS(Exports ~ error(\"A\") + trend(\"A\") + …  2025  N(25, 70)  24.7
##  9 Peru    "ETS(Exports ~ error(\"A\") + trend(\"A\") + …  2026  N(25, 79)  24.8
## 10 Peru    "ETS(Exports ~ error(\"A\") + trend(\"A\") + …  2027  N(25, 88)  24.9

# mean +- 1.96 * s
mean = 24.3
s = sqrt(8.8004)

lower2 = mean - (1.96 * s)
upper2 = mean + (1.96 * s)

lower2

## [1] 18.48557

upper2

## [1] 30.11443

8.6 Exercises

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

All methods were conducted using an additive model due to the linear trend in the data. With the simple exponential smoothing the line was more flat as it takes into account for more recent data. In the holt linear there was a slight slope in the data and the additive damped model it had a bigger slope. In the box cox method the data increased and pivot up and with the additive damped it was similar but with less pivot. The box cox forecast lines are more smooth and consistent in intervals.

#getting lambda for China - GDP
chn_lambda <- global_economy %>%
  filter(Code =="CHN") %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

#different methods
fit <- global_economy %>%
  filter(Code == "CHN") %>%
  model(
    simple = ETS(GDP ~ error("A") + trend("N") + season("N")),
    holt_linear = ETS(GDP ~ error("A") + trend("A") + season("N")),
    additive_damped = ETS(GDP ~ error("A") + trend("Ad") + season("N")),
    box_cox = ETS(box_cox(GDP, chn_lambda) ~ error("A") + trend("Ad") + season("N")),
    box_cox_damped = ETS(box_cox(GDP, chn_lambda) ~ error("A") + trend("Ad", phi = 0.8) + season("N"))
  )  
#forecast
fc <- fit %>%
  forecast(h = 30)
#chart
fc %>%
  autoplot(global_economy, level = NULL) + 
  labs(title = "China GDP") +
  guides(colour = guide_legend(title = "Methods"))

8.7 Exercises

Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

A multiplicative seasonality will be necessary because the data has increasing trend and the intervals are growing. With the trend damped it will increase the range on the bottom end of the data. The multiplicative damped model was better as their RSME was lower than the multiplicative model.

gas_fit <- aus_production %>%
  model(
    multi = ETS(Gas ~ error("M") + trend("A") + season("M")),
    multi_damped = ETS(Gas ~ error("M") + trend("Ad", phi = 0.8) + season("M"))
  )  

gas_fc <- gas_fit %>%
  forecast(h = 30)

gas_fc %>%
  autoplot(aus_production, level = NULL) + 
  labs(title = "Gas Production") +
  guides(colour = guide_legend(title = "Methods"))

accuracy(gas_fit)

## # A tibble: 2 × 10
##   .model       .type        ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>        <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 multi        Training -0.115  4.60  3.02 0.199  4.08 0.542 0.606 -0.0131
## 2 multi_damped Training  0.435  4.56  3.04 0.892  4.18 0.545 0.601 -0.0387

Exercises 8.8

Recall your retail time series data (from Exercise 7 in Section 2.10).

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries %>%
  autoplot()

## Plot variable not specified, automatically selected `.vars = Turnover`

a. Why is multiplicative seasonality necessary for this series?

A multiplicative seasonality is necessary for this series is because the data has an increasing trend and the intervals are increasing larger and larger over time.

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

myseries_fit <- myseries %>%
  model(
    multi = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    multi_damped = ETS(Turnover ~ error("M") + trend("Ad", phi = .8) + season("M"))
  )  

myseries_fc <- myseries_fit %>%
  forecast(h = 30)

myseries_fc %>%
  autoplot(myseries, level = NULL) + 
  labs(title = "Australia Turnover Retail") +
  guides(colour = guide_legend(title = "Methods"))

accuracy(myseries_fit)

## # A tibble: 2 × 12
##   State       Industry .model .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE
##   <chr>       <chr>    <chr>  <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Northern T… Clothin… multi  Trai… -0.0128 0.613 0.450 -0.469  5.15 0.513 0.529
## 2 Northern T… Clothin… multi… Trai…  0.0495 0.619 0.452  0.303  5.18 0.516 0.534
## # ℹ 1 more variable: ACF1 <dbl>

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

I prefer the multiplicative model as the RMSE was slightly lower than the damped trend.

Check that the residuals from the best method look like white noise.

With the p-value greater than .05 there is white noise in the data.

myseries %>%
  model(
    multi = ETS(Turnover ~ error("M") + trend("A") + season("M"))
  )  %>%
  gg_tsresiduals()

#Ljung-box test
myseries %>%
  model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
  augment() %>% 
  features(.innov, ljung_box, lag = 24)

## # A tibble: 1 × 5
##   State              Industry                           .model lb_stat lb_pvalue
##   <chr>              <chr>                              <chr>    <dbl>     <dbl>
## 1 Northern Territory Clothing, footwear and personal a… multi…    34.5    0.0765

Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

The seasonal naive approach has a lower RMSE than the multiplicative model.

#training data
train <- myseries %>%
  filter(year(Month) <= 2010)

# test data
test_data <- myseries %>%
  filter(year(Month) > 2010)

# fit the model
fit_train <- train %>%
  model(
    multi = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    snaive = SNAIVE(Turnover))  

#forecast
train_fc <- fit_train %>%
  forecast(new_data = test_data)

#calculate RMSE
accuracy(train_fc, test_data) %>%
  select(.model,.type, RMSE)

## # A tibble: 2 × 3
##   .model .type  RMSE
##   <chr>  <chr> <dbl>
## 1 multi  Test   1.78
## 2 snaive Test   1.55

# Plot the actual data and forecasts
train_fc %>%
  autoplot(myseries, level = NULL) +
  labs(title = "Forecasts for Turnover",
       x = "Month",
       y = "Turnover") +
  guides(colour = guide_legend(title = "Model")) +
  theme_minimal()

Exercises 8.9

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

The RMSE in this one is better than the previous one.

lambda <- myseries %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

stl_comp <- myseries %>%
  model(STL = STL(box_cox(Turnover,lambda) ~ season(window = "periodic"), robust = TRUE)) %>%
  components()

myseries$season_adj <- stl_comp$season_adjust

train2 <- myseries %>%
  filter(year(Month) <= 2010)

fit2 <- train2 %>%
  model(ETS(season_adj ~ error("M") + trend("A") + season("M")))

fc2 <- fit2 %>%
  forecast(new_data = anti_join(myseries, train2))

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover,
## season_adj)`

fit2 %>% accuracy() %>%
  select(.model, .type, RMSE)

## # A tibble: 1 × 3
##   .model                                                          .type     RMSE
##   <chr>                                                           <chr>    <dbl>
## 1 "ETS(season_adj ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Traini… 0.0789

fc2 %>% accuracy(myseries) %>%
  select(.model, .type, RMSE)

## # A tibble: 1 × 3
##   .model                                                          .type  RMSE
##   <chr>                                                           <chr> <dbl>
## 1 "ETS(season_adj ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Test  0.325

Homework 5

Anna Moy

2024-10-06