Homework 5

Load Libraries

library(fpp3)

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.3     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(dplyr)
library(distributional)
library(purrr)
library(tsibble)

8.1 - Simple Exponential Smoothing (SES)

(a) Fit SES Model for Pig Population in Victoria

This section loads and processes data on pig populations in Victoria. The data is converted into a time series (tsibble) format and fitted with a Simple Exponential Smoothing (SES) model using an ETS(A,N,N) approach. The model extracts key parameters and generates forecasts for the next four months.

library(fpp3)

data <- aus_livestock %>%
  filter(Animal == "Pigs", State == "Victoria")

data_ts <- data %>% select(Month, Count) %>% as_tsibble(index = Month)

model <- data_ts %>%
  model(SES = ETS(Count ~ error("A") + trend("N") + season("N")))

alpha <- coef(model) %>% filter(term == "alpha") %>% pull(estimate)
l0 <- coef(model) %>% filter(term == "l[0]") %>% pull(estimate)

forecasts <- model %>%
  forecast(h = 4)

(b) Compute Prediction Intervals

This section computes the standard deviation of residuals to construct a 95% manual prediction interval for the first forecasted value. It then compares the manually computed interval with R’s built-in prediction interval.

residuals <- model %>% augment() %>% pull(.resid)
s <- sd(residuals, na.rm = TRUE)

first_forecast <- forecasts %>% slice(1) %>% pull(.mean)
lower_bound <- first_forecast - 1.96 * s
upper_bound <- first_forecast + 1.96 * s

cat("Optimal α:", alpha, "\n")

## Optimal α: 0.3221247

cat("Optimal ℼ0:", l0, "\n")

## Optimal ℼ0: 100646.6

cat("Manual 95% Prediction Interval for first forecast:", lower_bound, "-", upper_bound, "\n")

## Manual 95% Prediction Interval for first forecast: 76871.01 - 113502.1

forecasts %>%
  filter(row_number() == 1) %>%
  hilo(level = 95) %>%
  as_tibble()

## # A tibble: 1 × 5
##   .model    Month
##   <chr>     <mth>
## 1 SES    2019 Jan
## # ℹ 3 more variables: Count <dist>, .mean <dbl>, `95%` <hilo>

8.5 - Forecasting U.S. Exports

Data Preparation

This section loads and processes export data for the United States. Missing values are checked and interpolated where necessary.

data <- global_economy %>%
  filter(Country == "United States") %>%
  select(Year, Exports) %>%
  as_tsibble(index = Year)

sum(is.na(data$Exports))

## [1] 1

data <- data %>%
  mutate(Exports = na.interp(Exports))

(a) Visualizing the Exports Data

This section generates a time series plot of U.S. exports.

autoplot(data, Exports) +
  labs(title = "Exports of the United States",
       y = "Exports (USD Billions)",
       x = "Year") +
  theme_minimal()

## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

(b) Forecasting Exports with ETS Models

This section fits two different ETS models: a simple exponential smoothing model (ETS(A,N,N)) and a Holt’s linear trend model (ETS(A,A,N)).

model_ANN <- data %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))

forecast_ANN <- model_ANN %>%
  forecast(h = 10)

autoplot(data, Exports) +
  autolayer(forecast_ANN, level = 95, alpha = 0.3) +
  labs(title = "ETS(A,N,N) Forecast for Exports",
       y = "Exports (USD Billions)",
       x = "Year") +
  theme_minimal()

## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

model_AAN <- data %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))

forecast_AAN <- model_AAN %>%
  forecast(h = 10)

autoplot(data, Exports) +
  autolayer(forecast_AAN, level = 95, alpha = 0.3) +
  labs(title = "ETS(A,A,N) Forecast for Exports",
       y = "Exports (USD Billions)",
       x = "Year") +
  theme_minimal()

## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

(c) Computing RMSE Values

Root Mean Squared Error (RMSE) is calculated for both models to assess their accuracy.

residuals_ANN <- model_ANN %>% augment() %>% pull(.resid)
rmse_ANN <- sqrt(mean(residuals_ANN^2, na.rm = TRUE))

residuals_AAN <- model_AAN %>% augment() %>% pull(.resid)
rmse_AAN <- sqrt(mean(residuals_AAN^2, na.rm = TRUE))

print(paste("RMSE for ETS(A,N,N):", rmse_ANN))

## [1] "RMSE for ETS(A,N,N): 0.621637990745711"

print(paste("RMSE for ETS(A,A,N):", rmse_AAN))

## [1] "RMSE for ETS(A,A,N): 0.609914374286451"

(d) Comparing the Forecasts

A visualization comparing both ETS models’ forecasts.

forecast_comparison <- forecast_ANN %>%
  mutate(Model = "ETS(A,N,N)") %>%
  bind_rows(
    forecast_AAN %>%
      mutate(Model = "ETS(A,A,N)")
  )

ggplot(forecast_comparison, aes(x = Year, y = .mean, color = Model)) +
  geom_line(size = 1) +
  labs(title = "Comparison of ETS(A,N,N) vs ETS(A,A,N) Forecasts",
       y = "Exports (USD Billions)",
       x = "Year") +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

(e) Interpretation of Forecasts

ETS(A,N,N) produces a flat forecast, suitable for stable exports.
ETS(A,A,N) suggests an increasing trend, making it preferable if exports have shown consistent growth.

(f) Computing 95% Prediction Intervals

A 95% confidence interval is calculated for the first forecasted value in both models.

first_forecast_ANN <- forecast_ANN %>% slice(1) %>% pull(.mean)
first_forecast_AAN <- forecast_AAN %>% slice(1) %>% pull(.mean)

lower_ANN <- first_forecast_ANN - 1.96 * rmse_ANN
upper_ANN <- first_forecast_ANN + 1.96 * rmse_ANN

lower_AAN <- first_forecast_AAN - 1.96 * rmse_AAN
upper_AAN <- first_forecast_AAN + 1.96 * rmse_AAN

print(paste("95% Prediction Interval for ETS(A,N,N):", lower_ANN, "-", upper_ANN))

## [1] "95% Prediction Interval for ETS(A,N,N): 10.6722119645154 - 13.1090328882386"

print(paste("95% Prediction Interval for ETS(A,A,N):", lower_AAN, "-", upper_AAN))

## [1] "95% Prediction Interval for ETS(A,A,N): 10.8177686545507 - 13.2086330017536"

8.6 Forecasting China’s GDP Using ETS Models

Visualizing Historical GDP Data

This section filters the global economy dataset to extract GDP data for China and visualizes its historical trend.

china_gdp <- global_economy %>% filter(Country == "China") %>% select(Year, GDP)

autoplot(china_gdp, GDP) + 
  ggtitle("Historical Chinese GDP") + 
  ylab("GDP in USD Billions")

Building ETS Models

Fit three different ETS models: an automatic selection, a damped trend model, and a model with a Box-Cox transformation to stabilize variance.

library(fpp3)

china_gdp <- global_economy %>% filter(Country == "China") %>% select(Year, GDP)

ets_auto <- china_gdp %>% model(ETS(GDP))
forecast_ets_auto <- ets_auto %>% forecast(h = 50)

ets_damped <- china_gdp %>% model(ETS(GDP ~ trend("Ad")))
forecast_ets_damped <- ets_damped %>% forecast(h = 50)

ets_boxcox <- china_gdp %>% model(ETS(box_cox(GDP, 0.3)))
forecast_ets_boxcox <- ets_boxcox %>% forecast(h = 50)

Comparing Forecasts

This visualization compares the forecasts generated by the three models, allowing for an assessment of their differences and reliability.

autoplot(china_gdp, GDP) +
  autolayer(forecast_ets_auto, GDP, color = "blue", alpha = 0.7) +
  autolayer(forecast_ets_damped, GDP, color = "red", alpha = 0.7) +
  autolayer(forecast_ets_boxcox, GDP, color = "green", alpha = 0.7) +
  ggtitle("Comparison of ETS Forecasts for Chinese GDP (h = 50)") +
  ylab("GDP in USD Billions") +
  labs(color = "Model") +
  scale_color_manual(values = c("blue" = "ETS Auto", "red" = "ETS Damped", "green" = "ETS Box-Cox"))

## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.

## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.

Model Insights

Standard ETS models may overestimate long-term GDP growth, leading to overly optimistic projections.
Damped ETS models provide more conservative and likely more realistic forecasts by slowing down excessive growth.
Box-Cox transformation helps stabilize variance, making predictions more reliable when dealing with large-scale economic data.

8.7 - Forecasting Australian Gas Production

Load and Visualize Gas Production Data

This section loads the Australian gas production data and visualizes the historical trends.

gas_data <- aus_production %>% select(Quarter, Gas)

autoplot(gas_data, Gas) +
  ggtitle("Australian Gas Production Over Time") +
  ylab("Gas Production (Petajoules)")

ETS Model with Multiplicative Seasonality

This model accounts for seasonal variations that increase proportionally with the level of the series.

ets_multiplicative <- gas_data %>% model(ETS(Gas ~ season("M")))

forecast_ets_multiplicative <- ets_multiplicative %>% forecast(h = "10 years")

Multiplicative seasonality is necessary when seasonal fluctuations grow as the overall level of production increases.

ETS Model with a Damped Trend

This model includes a damped trend component to avoid over-predictions of future growth.

ets_damped <- gas_data %>% model(ETS(Gas ~ trend("Ad") + season("M")))

forecast_ets_damped <- ets_damped %>% forecast(h = "10 years")

Damping prevents the model from over-extrapolating trends into the future.

Comparison of Forecasts

This plot compares the two ETS models to assess which provides a more realistic projection.

autoplot(gas_data, Gas) +
  autolayer(forecast_ets_damped, series = "ETS Damped", color = "red", alpha = 0.5) +  
  autolayer(forecast_ets_multiplicative, series = "ETS Multiplicative", color = "blue", alpha = 0.5) +  
  ggtitle("Comparison of ETS Forecasts for Gas Production") +
  ylab("Gas Production (Petajoules)") +
  theme_minimal()

## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`

## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`

## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.

8.8 Retail Time Series Analysis

(a) Select and Visualize a Random Retail Time Series

This section selects a random retail time series from the aus_retail dataset, converts it to a tsibble, and plots the original and log-transformed series.

set.seed(12345678)

myseries <- aus_retail |> 
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1)) |> 
  as_tsibble(index = Month)  

autoplot(myseries, Turnover) +
  ggtitle("Randomly Selected Retail Time Series") +
  ylab("Turnover (Millions of AUD)") +
  theme_minimal()

autoplot(myseries, log(Turnover)) +
  ggtitle("Log-Transformed Retail Time Series") +
  ylab("Log Turnover (Millions of AUD)") +
  theme_minimal()

Multiplicative seasonality is necessary when seasonal variations increase as the overall level of the series increases. The log-transformed series appears more stable, confirming the need for multiplicative seasonality.

(b) Apply Holt-Winters’ Multiplicative Method

This section applies the Holt-Winters’ multiplicative method to forecast the data and includes a damped trend version for comparison.

library(fpp3)

hw_multiplicative <- myseries %>% 
  model(ETS(Turnover ~ error("A") + trend("N") + season("M")))

hw_damped <- myseries %>% 
  model(ETS(Turnover ~ error("A") + trend("Ad") + season("M")))

fc_multiplicative <- hw_multiplicative %>% forecast(h = "3 years")
fc_damped <- hw_damped %>% forecast(h = "3 years")

fc_combined <- bind_rows(
  fc_multiplicative %>% mutate(Model = "Multiplicative"),
  fc_damped %>% mutate(Model = "Damped Trend")
)

autoplot(myseries, Turnover) +
  autolayer(fc_multiplicative, series = "Holt-Winters Multiplicative", color = "blue", alpha = 0.6) +
  autolayer(fc_damped, series = "Holt-Winters Damped", color = "red", alpha = 0.6) +
  ggtitle("Comparison of Holt-Winters' Multiplicative vs. Damped Trend Forecasts") +
  ylab("Turnover (Millions of AUD)") +
  theme_minimal()

## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`

## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`

## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.

(c) Compare the RMSE of One-Step Forecasts

The RMSE values for both methods are computed to determine which model produces smaller forecasting errors.

accuracy_multiplicative <- accuracy(hw_multiplicative)
accuracy_damped <- accuracy(hw_damped)

rmse_multiplicative <- accuracy_multiplicative %>% filter(.type == "Training") %>% pull(RMSE)
rmse_damped <- accuracy_damped %>% filter(.type == "Training") %>% pull(RMSE)

print(paste("RMSE for Holt-Winters' Multiplicative Model:", rmse_multiplicative))

## [1] "RMSE for Holt-Winters' Multiplicative Model: 0.603112648151485"

print(paste("RMSE for Holt-Winters' Damped Trend Model:", rmse_damped))

## [1] "RMSE for Holt-Winters' Damped Trend Model: 0.602472390488493"

A lower RMSE indicates better forecasting accuracy. If the damped model has a lower RMSE, it suggests that dampening the trend prevents overestimation of future values.

(d) Check for White Noise in Residuals

best_model_residuals <- augment(hw_damped) %>% pull(.resid)

best_model_residuals_ts <- ts(best_model_residuals, start = c(1988, 4), frequency = 12)  

autoplot(best_model_residuals_ts) +
  ggtitle("Residuals from the Best Model") +
  ylab("Residuals") +
  theme_minimal()

ljung_test <- Box.test(best_model_residuals, lag = 10, type = "Ljung-Box")
print(ljung_test)

## 
##  Box-Ljung test
## 
## data:  best_model_residuals
## X-squared = 19.452, df = 10, p-value = 0.03489

resid_df <- data.frame(residuals = best_model_residuals)

ggplot(resid_df, aes(x = residuals)) +
  geom_histogram(bins = 30, fill = "blue", alpha = 0.5) +
  ggtitle("Histogram of Residuals") +
  theme_minimal()

ggplot(resid_df, aes(sample = residuals)) +
  stat_qq() +
  stat_qq_line() +
  ggtitle("QQ-Plot of Residuals") +
  theme_minimal()

Since the p-value is less than 0.05, the residuals don’t resemble white noise. A bell-shaped histogram and QQ-plot alignment along the diagonal suggest normal residuals.

(e) Test Set RMSE and Seasonal Naïve Model Comparison

The test set RMSE is calculated to compare the Holt-Winters models against a seasonal naïve approach.

train_data <- myseries %>% filter(year(Month) <= 2010)
test_data <- myseries %>% filter(year(Month) > 2010)

hw_multiplicative_train <- train_data %>% 
  model(ETS(Turnover ~ error("A") + trend("N") + season("M")))

hw_damped_train <- train_data %>% 
  model(ETS(Turnover ~ error("A") + trend("Ad") + season("M")))

seasonal_naive_train <- train_data %>% 
  model(SNAIVE(Turnover ~ lag("year")))

fc_hw_multiplicative <- forecast(hw_multiplicative_train, new_data = test_data)
fc_hw_damped <- forecast(hw_damped_train, new_data = test_data)
fc_seasonal_naive <- forecast(seasonal_naive_train, new_data = test_data)

rmse_hw_multiplicative <- accuracy(fc_hw_multiplicative, test_data) %>% filter(.type == "Test") %>% pull(RMSE)
rmse_hw_damped <- accuracy(fc_hw_damped, test_data) %>% filter(.type == "Test") %>% pull(RMSE)
rmse_seasonal_naive <- accuracy(fc_seasonal_naive, test_data) %>% filter(.type == "Test") %>% pull(RMSE)

print(paste("Test RMSE for Holt-Winters Multiplicative Model:", rmse_hw_multiplicative))

## [1] "Test RMSE for Holt-Winters Multiplicative Model: 1.42470901456108"

print(paste("Test RMSE for Holt-Winters Damped Trend Model:", rmse_hw_damped))

## [1] "Test RMSE for Holt-Winters Damped Trend Model: 1.33444427208816"

print(paste("Test RMSE for Seasonal Naïve Model:", rmse_seasonal_naive))

## [1] "Test RMSE for Seasonal Naïve Model: 1.5519812606257"

8.9 STL Decomposition and ETS Forecasting

Train-Test Split

library(fpp3)

train_data <- myseries %>% filter(year(Month) <= 2010)
test_data <- myseries %>% filter(year(Month) > 2010)

Box-Cox Transformation and STL Decomposition

lambda <- features(train_data, Turnover, features = guerrero) %>% pull(lambda_guerrero)
print(paste("Optimal Box-Cox Lambda:", lambda))

## [1] "Optimal Box-Cox Lambda: 0.197143080203977"

train_stl <- train_data %>%
  model(STL(box_cox(Turnover, lambda) ~ season(window = "periodic")))

train_stl_components <- components(train_stl)

train_adjusted <- train_stl_components %>%
  mutate(Turnover_adjusted = `box_cox(Turnover, lambda)` - season_year) %>%
  select(Month, Turnover_adjusted)

Training an ETS Model on Seasonally Adjusted Data

ets_adjusted <- train_adjusted %>%
  model(ETS(Turnover_adjusted ~ error("A") + trend("Ad") + season("N")))

Forecasting Using the ETS Model

fc_adjusted <- forecast(ets_adjusted, h = nrow(test_data)) %>%
  as_tibble() %>%
  select(Month, .mean)

Extracting Seasonality from Test Data

test_stl <- test_data %>%
  model(STL(box_cox(Turnover, lambda) ~ season(window = "periodic"))) %>%
  components()

seasonality_test <- test_stl %>%
  as_tibble() %>%
  select(Month, season_year)

Restoring Seasonality to Forecasts

fc_final <- fc_adjusted %>%
  left_join(seasonality_test, by = "Month") %>%
  mutate(Final_Prediction = inv_box_cox(.mean + season_year, lambda)) %>%
  select(Month, Final_Prediction)

Computing RMSE for STL + ETS Model

rmse_stl_ets <- accuracy(fc_final$Final_Prediction, test_data$Turnover)[, "RMSE"]
print(paste("Test RMSE for STL + ETS Model:", rmse_stl_ets))

## [1] "Test RMSE for STL + ETS Model: 1.01141694085381"

Comparison with Previous Models

print(paste("Test RMSE for Holt-Winters Multiplicative:", rmse_hw_multiplicative))

## [1] "Test RMSE for Holt-Winters Multiplicative: 1.42470901456108"

print(paste("Test RMSE for Holt-Winters Damped:", rmse_hw_damped))

## [1] "Test RMSE for Holt-Winters Damped: 1.33444427208816"

print(paste("Test RMSE for Seasonal Naïve:", rmse_seasonal_naive))

## [1] "Test RMSE for Seasonal Naïve: 1.5519812606257"

Plotting the Forecast

ggplot() +
  geom_line(data = test_data, aes(x = Month, y = Turnover), color = "black", size = 1) +
  geom_line(data = fc_final, aes(x = Month, y = Final_Prediction), color = "blue", size = 1) +
  ggtitle("STL + ETS Forecast vs. Test Data") +
  ylab("Turnover (Millions of AUD)") +
  theme_minimal()