8.1 - Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

library(fpp3) 
library(forecast) 
library(plotly)
library(ggplot2)


data <- aus_livestock |>
  filter(State == "Victoria", Animal == "Pigs")

model <- data |>
  model(ETS(Count ~ error("A") + trend("N") + season("N")))

forecasted_values <- model |>
  forecast(h = 4)

autoplot(forecasted_values)

Part (b): Compute the 95% Prediction Interval

first_forecast <- forecasted_values |>
  slice(1) |>
  pull(.mean)

residuals_sd <- model |>
  augment() |>
  summarise(sd_resid = sd(.resid)) |>
  pull(sd_resid)

lower_bound <- first_forecast - 1.96 * residuals_sd
upper_bound <- first_forecast + 1.96 * residuals_sd

forecasted_values |>
  hilo(level = 95)

8.5a - Plot the Exports series and discuss the main features of the data.

Canada_data <- global_economy |>
  filter(Country == "Canada")

US_plot <- Canada_data |>
 autoplot(Exports) + geom_smooth(method = 'loess', se = FALSE, color = 'blue') + 
  labs(y='Exports (% of GDP)', title = 'Exports: United States')

ggplotly(US_plot)

8.5b. Use an ETS(A,N,N) model to forecast the series and plot the forecasts.

EST_ANN <- Canada_data |> 
     model(ETS(Exports ~ error('A') + trend('N') + season('N')))

EST_ANN |> 
     forecast(h = 5) |> 
     autoplot(Canada_data, level = NULL) + 
     labs(y = '(% of GDP)', 
          title = 'Exports: Canada') + 
     guides(color = guide_legend(title = 'Forecast'))

8.5c.Compute the RMSE values for the training data

EST_ANN |> 
     accuracy()|> 
     select(Country, .model, .type, RMSE)

tidy(EST_ANN)

8.5d - Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

ETS_ANN_2 <- Canada_data |>
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))

ETS_ANN_forecast <- ETS_ANN_2 |>
  forecast(h = 5)

autoplot(ETS_ANN_forecast) +
  labs(title = "ETS(A,A,N) Model Forecast",
       y = "Exports (in CAD billions)")

8.5e.- Compare the forecasts from both methods. Which do you think is best?

accuracy(EST_ANN)

accuracy(ETS_ANN_2)

autoplot(Canada_data, Exports) +
  autolayer(ETS_ANN_forecast, series = "ETS(A,N,N)", PI = FALSE) +
  autolayer(ETS_ANN_forecast, series = "ETS(A,A,N)", PI = FALSE) +
  labs(title = "Comparison of ETS Forecasts",
       y = "Exports (in CAD billions)") +
  guides(colour = guide_legend(title = "Model"))

8.5f.Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors

EST_ANN |>
  forecast(h = 1) |>
  hilo(level = 95)

ETS_ANN_2 |>
  forecast(h = 1) |>
  hilo(level = 95)

RMSE_ANN_1 <- EST_ANN |>
  accuracy() |>
  pull(RMSE)

EST_ANN |> 
     forecast(h = 1) |>
     mutate(conf_lo = .mean - 2 * RMSE_ANN_1 , 
            conf_hi = .mean + 2 * RMSE_ANN_1 )

RMSE_ANN_2 <- ETS_ANN_2 |>
  accuracy() |>
  pull(RMSE)

ETS_ANN_2 |> 
     forecast(h = 1) |>
     mutate(conf_lo = .mean - 2 * RMSE_ANN_2, 
            conf_hi = .mean + 2 * RMSE_ANN_2)

8.6 - Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

china_gdp <- global_economy |>
  filter(Country == "China") |>
  select(Year, GDP) |>
  as_tsibble(index = Year)

autoplot(china_gdp, GDP) +
  labs(title = "China's GDP Over Time", y = "GDP (USD Trillions)")

ets_ann <- china_gdp |>
  model(ETS(GDP ~ error("A") + trend("N") + season("N")))

ets_ann_forecast <- ets_ann |>
  forecast(h = 20)  # Forecasting 20 years ahead

autoplot(ets_ann_forecast) +
  labs(title = "ETS(A,N,N) Model Forecast - No Trend",
       y = "GDP (USD Trillions)")

# ETS(A,A,N) – Additive Trend
ets_aan <- china_gdp |>
  model(ETS(GDP ~ error("A") + trend("A") + season("N")))

ets_aan_forecast <- ets_aan |>
  forecast(h = 20)

autoplot(ets_aan_forecast) +
  labs(title = "ETS(A,A,N) Model Forecast - Additive Trend",
       y = "GDP (USD Trillions)")

ets_aad <- china_gdp |>
  model(ETS(GDP ~ error("A") + trend("Ad") + season("N")))

ets_aad_forecast <- ets_aad |>
  forecast(h = 20)

autoplot(ets_aad_forecast) +
  labs(title = "ETS(A,Ad,N) Model Forecast - Damped Trend",
       y = "GDP (USD Trillions)")

ets_boxcox <- china_gdp |>
  model(ETS(GDP ~ error("A") + trend("A") + season("N") + BoxCox(lambda = 0.3)))

ets_boxcox_forecast <- ets_boxcox |>
  forecast(h = 20)

autoplot(ets_boxcox_forecast) +
  labs(title = "ETS Model Forecast with Box-Cox Transformation",
       y = "GDP (USD Trillions)")

autoplot(china_gdp, GDP) +
  autolayer(ets_ann_forecast, series = "No Trend (A,N,N)", PI = FALSE) +
  autolayer(ets_aan_forecast, series = "Additive Trend (A,A,N)", PI = FALSE) +
  autolayer(ets_aad_forecast, series = "Damped Trend (A,Ad,N)", PI = FALSE) +
  autolayer(ets_boxcox_forecast, series = "Box-Cox (A,A,N)", PI = FALSE) +
  labs(title = "Comparison of ETS Model Forecasts for China's GDP",
       y = "GDP (USD Trillions)") +
  guides(colour = guide_legend(title = "Model"))

8.7 - Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

gas_data <- aus_production %>%
  select(Quarter, Gas) %>%
  as_tsibble(index = Quarter)

autoplot(gas_data, Gas) +
  labs(title = "Australian Quarterly Gas Production",
       y = "Gas (petajoules)")

ets_multiplicative <- gas_data %>%
  model(ETS(Gas ~ error("A") + trend("A") + season("M")))

ets_multiplicative_forecast <- ets_multiplicative %>%
  forecast(h = 40)

autoplot(ets_multiplicative_forecast) +
  labs(title = "ETS Model Forecast with Multiplicative Seasonality",
       y = "Gas (petajoules)")

ets_damped <- gas_data %>%
  model(ETS(Gas ~ error("A") + trend("Ad") + season("M")))

ets_damped_forecast <- ets_damped %>%
  forecast(h = 40)

autoplot(ets_damped_forecast) +
  labs(title = "ETS Model Forecast with Damped Trend",
       y = "Gas (petajoules)")

autoplot(gas_data, Gas) +
  autolayer(ets_multiplicative_forecast, series = "Multiplicative Seasonality", PI = FALSE) +
  autolayer(ets_damped_forecast, series = "Damped Trend", PI = FALSE) +
  labs(title = "Comparison of ETS Forecasts for Australian Gas Production",
       y = "Gas (petajoules)") +
  guides(colour = guide_legend(title = "Model"))

## 8.8a - Why is multiplicative seasonality necessary for this series? Multiplicative seasonality is necessary for this series because the trend shows an increasing amplitude in the seasonality. In multiplicative models, seasonal variations increase in magnitude as the data values increase.

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1)) |>
  select(Turnover)

myseries |> 
  autoplot()

8.8b - Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

myseries_ts <- ts(myseries$Turnover, frequency = 12)  

holt_win_model <- hw(myseries_ts, seasonal = "multiplicative", damped = TRUE)

summary(holt_win_model)

## 
## Forecast method: Damped Holt-Winters' multiplicative method
## 
## Model Information:
## Damped Holt-Winters' multiplicative method 
## 
## Call:
## hw(y = myseries_ts, seasonal = "multiplicative", damped = TRUE)
## 
##   Smoothing parameters:
##     alpha = 0.5562 
##     beta  = 1e-04 
##     gamma = 0.2073 
##     phi   = 0.8441 
## 
##   Initial states:
##     l = 2.6391 
##     b = -0.0516 
##     s = 0.8028 0.8056 0.8196 1.3074 0.9719 1.0633
##            1.0268 1.0957 1.2165 1.0508 0.9988 0.8408
## 
##   sigma:  0.0707
## 
##      AIC     AICc      BIC 
## 1809.599 1811.553 1879.993 
## 
## Error measures:
##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set 0.05823794 0.6024724 0.4486334 0.4987534 5.230405 0.5121909
##                     ACF1
## Training set -0.04902864
## 
## Forecasts:
##        Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Oct 31       11.55410 10.507376 12.60083  9.953272 13.15494
## Nov 31       10.32985  9.258337 11.40136  8.691112 11.96859
## Dec 31       11.43668 10.116881 12.75648  9.418220 13.45514
## Jan 32       11.71500 10.238747 13.19125  9.457267 13.97273
## Feb 32       13.30040 11.493986 15.10682 10.537728 16.06307
## Mar 32       13.88918 11.875618 15.90274 10.809702 16.96866
## Apr 32       15.87410 13.435846 18.31236 12.145110 19.60310
## May 32       15.69368 13.154752 18.23260 11.810727 19.57663
## Jun 32       14.46264 12.010077 16.91519 10.711772 18.21350
## Jul 32       14.41158 11.860061 16.96310 10.509368 18.31379
## Aug 32       14.41007 11.755399 17.06473 10.350103 18.47003
## Sep 32       20.44144 16.534232 24.34864 14.465883 26.41699
## Oct 32       11.56046  9.207452 13.91348  7.961843 15.15909
## Nov 32       10.33558  8.166655 12.50450  7.018497 13.65266
## Dec 32       11.44306  8.971521 13.91461  7.663165 15.22296
## Jan 33       11.72157  9.119828 14.32331  7.742550 15.70059
## Feb 33       13.30789 10.276492 16.33930  8.671765 17.94402
## Mar 33       13.89703 10.652339 17.14173  8.934700 18.85937
## Apr 33       15.88311 12.086319 19.67990 10.076420 21.68980
## May 33       15.70260 11.863439 19.54176  9.831109 21.57409
## Jun 33       14.47088 10.855647 18.08611  8.941860 19.99990
## Jul 33       14.41981 10.741888 18.09773  8.794913 20.04471
## Aug 33       14.41831 10.666729 18.16989  8.680763 20.15585
## Sep 33       20.45314 15.028173 25.87811 12.156367 28.74992

autoplot(holt_win_model)

## 8.8c - Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

if (!is.ts(myseries$Turnover)) {
  myseries_ts <- ts(myseries$Turnover, frequency = 12)  
} else {
  myseries_ts <- myseries$Turnover
}

hw_model_nondamped <- hw(myseries_ts, seasonal = "multiplicative", damped = FALSE)

autoplot(hw_model_nondamped)

## 8.8d - Check that the residuals from the best method look like white noise.

checkresiduals(holt_win_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from Damped Holt-Winters' multiplicative method
## Q* = 20.877, df = 24, p-value = 0.646
## 
## Model df: 0.   Total lags used: 24

8.8e - Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

myseries <- myseries |>
  mutate(Month = as.Date(Month, format = "%Y-%m-%d")) |>
  as_tsibble(index = Month)

myseries_train <- myseries |>
  filter(year(Month) < 2011)

myseries_train_fit <- myseries_train |>
  model(
    multi = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    snaive = SNAIVE(Turnover)
  )

## myseries_test <- anti_join(myseries, myseries_train, by = "Month")

myseries_test <- myseries %>%
  filter(year(Month) >= 2011)

training_dates <- myseries_train$Month
all_dates <- myseries$Month

test_dates <- setdiff(all_dates, training_dates)

myseries_test <- myseries %>%
  filter(Month %in% test_dates)

myseries_df <- as.data.frame(myseries)

is_train <- myseries_df$Month < as.Date("2011-01-01")

myseries_test <- myseries_df[!is_train, ]

myseries_test <- myseries %>%
  filter(!between(Month, min(myseries_train$Month), max(myseries_train$Month)))

myseries_train_forcast <- myseries_train_fit %>%
  forecast(new_data = myseries_test)

myseries_train_forcast |>  
  autoplot(myseries, level= NULL)

myseries_test <- myseries |>
  filter(year(Month) >= 2011)

training_dates <- myseries_train$Month
all_dates <- myseries$Month

test_dates <- setdiff(all_dates, training_dates)

myseries_test <- myseries |>
  filter(Month %in% test_dates)

myseries_df <- as.data.frame(myseries)

is_train <- myseries_df$Month < as.Date("2011-01-01")

myseries_test <- myseries_df[!is_train, ]

myseries_test <- myseries |>
  filter(!between(Month, min(myseries_train$Month), max(myseries_train$Month)))

8.9 - For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

I am not sure if I got this correct, but all the STL+ETS seems to flatline. which can be taken as understimation if this correct’

train_end_date <- as.Date("2010-12-31")

training_set <- myseries|>
  filter(Month <= train_end_date)

testing_set <- myseries |>
  filter(Month > train_end_date)

lambda <- BoxCox.lambda(training_set$Turnover)
boxcox_train <- BoxCox(training_set$Turnover, lambda)

stl_fit <- stl(ts(boxcox_train, frequency = 12), s.window = "periodic")

ets_fit <- stl_fit$time.series[, "remainder"]
ets_model <- ets(ets_fit)

ets_forecast <- nrow(testing_set)
forecast1 <- forecast(ets_model, h = ets_forecast)

forecasted_values <- InvBoxCox(forecast1$mean, lambda)

test_actuals <- testing_set$Turnover
dates <- testing_set$Month
forecast_df <- data.frame(Date = dates, Forecast = forecasted_values, Actual = test_actuals)

new_rmse <- sqrt(mean((forecast_df$Actual - forecast_df$Forecast)^2))

ggplot(forecast_df, aes(x = Date)) +
  geom_line(aes(y = Actual, colour = "Actual"), size = 1.2) +
  geom_line(aes(y = Forecast, colour = "Forecasted with STL+ETS"), size = 1.2) +
  labs(title = "Comparison of Actual vs. Forecasted Turnover",
       x = "", y = "Turnover") +
  scale_colour_manual("", values = c("Actual" = "blue", "Forecasted with STL+ETS" = "red")) +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

print(paste("RMSE of new method:", new_rmse))

## [1] "RMSE of new method: 12.5419168833704"

Data624-HomeWork#6

Michael Robinson

2025-03-09