library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.3.2
## ✔ lubridate   1.9.3     ✔ fable       0.3.4
## ✔ ggplot2     3.5.1     ✔ fabletools  0.4.2
## Warning: package 'tsibble' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.5
## ✔ purrr   1.0.2     ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tsibble)
library(fable)
library(ggplot2)

Data 624 HW-5

  1. Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of # Use this in place of in your code or text and 0, and generate forecasts for the next four months.

# Load the dataset and filter for Victoria's pigs data
Vic_pigs <- aus_livestock %>%
  filter(State == "Victoria", Animal == "Pigs")

# Fit an ETS model equivalent to simple exponential smoothing (SES)
fit_ses <- Vic_pigs %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N")))

# Forecast for the next 4 months
fc_ses <- fit_ses %>%
  forecast(h = "4 months")

# Plot the forecast
autoplot(fc_ses, Vic_pigs) +
  labs(title = "Number of Pigs Slaughtered in Victoria with 4 Month Forecast", y = "Number of Pigs")

Compute a 95% prediction interval for the first forecast using ^y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

# After fitting the model
fit_ses <- Vic_pigs %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N")))

# Get the report which contains the sigma^2
fit_report <- report(fit_ses)
## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07
# Extract sigma^2 from the report
sigma_squared <- fit_report$sigma^2 
## Warning: Unknown or uninitialised column: `sigma`.
# Calculate the first forecast value
first_forecast <- fc_ses$.mean[1]

# Calculate the 95% prediction interval using y_hat ± 1.96 * s
lower_bound <- first_forecast - 1.96 * sqrt(sigma_squared)
upper_bound <- first_forecast + 1.96 * sqrt(sigma_squared)

# Print the results
cat("95% prediction interval for the first forecast: (", lower_bound, ",", upper_bound, ")\n")
## 95% prediction interval for the first forecast: (  ,  )
alpha = 0.3221247 
  ℓ0 = 100646.6

These are the optimal values for the alpha and ℓ0.

  1. Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

Plot the Exports series and discuss the main features of the data. Use an ETS(A,N,N) model to forecast the series, and plot the forecasts. Compute the RMSE values for the training data. Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set. Compare the forecasts from both methods. Which do you think is best? Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

# Select data for Australia
exports_data <- global_economy %>% filter(Country == "Australia")

# Plot the exports data
autoplot(exports_data, Exports) +
  labs(title = "Australian Exports",
       y = "Exports")

# ETS(A,N,N) model and forecast
fit_ets_ann <- exports_data %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))

fc_ann <- forecast(fit_ets_ann)

# Plot forecast
autoplot(fc_ann)

# Compute RMSE
accuracy(fit_ets_ann)
## # A tibble: 1 × 11
##   Country   .model        .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <fct>     <chr>         <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Australia "ETS(Exports… Trai… 0.232  1.15 0.914  1.09  5.41 0.928 0.928 0.0125
# Compare with ETS(A,A,N) model
fit_ets_aan <- exports_data %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))

accuracy(fit_ets_aan)
## # A tibble: 1 × 11
##   Country   .model     .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <fct>     <chr>      <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia "ETS(Expo… Trai… -7.46e-7  1.12 0.893 -0.387  5.39 0.907 0.904 0.109
  1. Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

# Filter for China GDP data
china_gdp <- global_economy %>% filter(Country == "China")

# Visualize the subset
china_gdp %>% autoplot(GDP) +
  labs(y = "GDP", title = "Chinese Gross Domestic Product")

# Fit an ETS model
fit_china_ets <- china_gdp %>%
  model(ETS(GDP))

# Forecast and plot
fc_china_ets <- forecast(fit_china_ets, h = 20)
autoplot(fc_china_ets)

# Experiment with damped trend and Box-Cox
fit_china_ets_damped <- china_gdp %>%
  model(ETS(GDP ~ error("A") + trend("Ad", phi = 0.9)))

fc_china_damped <- forecast(fit_china_ets_damped, h = 20)
autoplot(fc_china_damped)

  1. Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
# Glimpse at the dataset
aus_production %>%
    autoplot(Gas)

# Filter Gas data
gas_data <- aus_production %>%
  select(Quarter, Gas)  

# Fit ETS model with multiplicative seasonality
fit_gas_ets <- gas_data %>%
  model(ETS(Gas ~ error("M") + trend("A") + season("M")))

# Forecast and plot for 3 years ahead
fc_gas <- forecast(fit_gas_ets, h = "3 years")
autoplot(fc_gas)

# Fit ETS model with damped trend
fit_gas_damped <- gas_data %>%
  model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))

# Forecast and plot for 3 years ahead
fc_gas_damped <- forecast(fit_gas_damped, h = "3 years")
autoplot(fc_gas_damped)

  1. Recall your retail time series data (from Exercise 7 in Section 2.10).

Why is multiplicative seasonality necessary for this series? Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer? Check that the residuals from the best method look like white noise. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

set.seed(1234)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1)) 

fit_my <- myseries %>%
  model(
    multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    `damped multiplicative` = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
  ) 

fit_my %>%
  forecast(h = 36) %>%
  autoplot(myseries, level = NULL) +
  labs(title = "Australian Retail Turnover") +
  guides(colour = guide_legend(title = "Forecast"))

# Compare RMSE
accuracy(fit_my) %>%
  select(.model, RMSE)
## # A tibble: 2 × 2
##   .model                 RMSE
##   <chr>                 <dbl>
## 1 multiplicative         1.34
## 2 damped multiplicative  1.36
# Check residuals for the best method
myseries %>%
  model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
  gg_tsresiduals() +
  ggtitle("Multiplicative Method")

# Box-Pierce test
myseries %>%
  model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
  augment() %>% 
  features(.innov, box_pierce, lag = 24, dof = 0)
## # A tibble: 1 × 5
##   State    Industry                                     .model bp_stat bp_pvalue
##   <chr>    <chr>                                        <chr>    <dbl>     <dbl>
## 1 Tasmania Cafes, restaurants and takeaway food servic… multi…    27.2     0.296
# Ljung-Box test
myseries %>%
  model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
  augment() %>% 
  features(.innov, ljung_box, lag = 24, dof = 0)
## # A tibble: 1 × 5
##   State    Industry                                     .model lb_stat lb_pvalue
##   <chr>    <chr>                                        <chr>    <dbl>     <dbl>
## 1 Tasmania Cafes, restaurants and takeaway food servic… multi…    28.0     0.260
# Training and test RMSE
myseries_train <- myseries %>%
  filter(year(Month) < 2011)

fit_train <- myseries_train %>%
  model(
    multi = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    snaive = SNAIVE(Turnover)
  )

fc <- fit_train %>%
  forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc %>% autoplot(myseries, level = NULL)

fc %>% accuracy(myseries) %>%
  select(.type, .model, RMSE)
## # A tibble: 2 × 3
##   .type .model  RMSE
##   <chr> <chr>  <dbl>
## 1 Test  multi   3.99
## 2 Test  snaive  9.13
  1. For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
# Finding the Box-Cox lambda
lambda <- myseries_train %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

# STL decomposition applied to the Box-Cox transformed data
myseries_train %>%
  model(STL(box_cox(Turnover, lambda) ~ season(window = "periodic"), robust = TRUE)) %>%
  components() %>%
  autoplot() +
  ggtitle("STL with Box-Cox")

# Get seasonally adjusted data
dcmp <- myseries_train %>%
  model(STL_box = STL(box_cox(Turnover, lambda) ~ season(window = "periodic"), robust = TRUE)) %>%
  components()

# Create new variable for seasonally adjusted turnover
myseries_train <- myseries_train %>%
  mutate(Turnover_sa = dcmp$season_adjust)

# Modeling on the seasonally adjusted data
fit <- myseries_train %>%
  model(ETS(Turnover_sa ~ error("M") + trend("A") + season("M")))

# Checking residuals
fit %>% gg_tsresiduals() +
  ggtitle("Residual Plots for Australian Retail Turnover")

# Produce forecasts for test data
fc <- fit %>%
  forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
# Accuracy on training data
fit %>% accuracy() %>%
  select(.model, .type, RMSE)
## # A tibble: 1 × 3
##   .model                                                           .type    RMSE
##   <chr>                                                            <chr>   <dbl>
## 1 "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Traini… 0.141

Accuracy on test data

fc %>% accuracy(myseries) %>% select(.model, .type, RMSE)