{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE)
# Function to install a package if not already installed
install_if_needed <- function(pkg) {
if (!requireNamespace(pkg, quietly = TRUE)) {
install.packages(pkg)
}
}
# List of packages to check and install if necessary
required_packages <- c("fpp3", "dplyr", "ggplot2", "lubridate", "tsibble",
"tsibbledata", "feasts", "fable", "fabletools",
"curl", "USgas", "readxl", "readr", "tidyr")
# Loop through the list and install packages only if needed
for (pkg in required_packages) {
install_if_needed(pkg)
}
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
# Function to suppress package startup messages
suppressPackageStartupMessages({
library(fpp3)
library(dplyr)
library(ggplot2)
library(lubridate)
library(tsibble)
library(tsibbledata)
library(feasts)
library(fable)
library(fabletools)
library(readr)
library(readxl)
library(tidyr)
})
Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
# Load necessary libraries
library(tsibble)
library(fable)
library(dplyr)
# Filter for pigs in Victoria
pigs_victoria <- aus_livestock |>
filter(Animal == "Pigs", State == "Victoria")
# Estimate the simple exponential smoothing (ETS(A,N,N)) model for the number of pigs slaughtered
fit <- pigs_victoria |>
model(ETS(Count ~ error("A") + trend("N") + season("N")))
# Extract the model components
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
# Extract the smoothing parameter (α) and initial level (ℓ₀)
model_params <- fit |>
tidy()
alpha <- model_params |>
filter(term == "alpha") |>
pull(estimate)
l0 <- model_params |>
filter(term == "l[0]") |>
pull(estimate)
# Print alpha and initial level
print(paste("Optimal smoothing parameter (alpha):", alpha))
## [1] "Optimal smoothing parameter (alpha): 0.322124702874923"
print(paste("Optimal initial level (l0):", l0))
## [1] "Optimal initial level (l0): 100646.603176363"
# Generate forecasts for the next 4 months
fc <- fit |>
forecast(h = 4)
# Extract residuals to calculate the standard deviation
residuals_fit <- residuals(fit)
s <- sd(residuals_fit$.resid)
# Compute 95% prediction interval for the first forecast
first_forecast <- fc |> slice(1) |> pull(.mean)
lower_bound <- first_forecast - 1.96 * s
upper_bound <- first_forecast + 1.96 * s
# Print the first forecast and the 95% prediction interval
print(paste("First forecast:", first_forecast))
## [1] "First forecast: 95186.5574309915"
print(paste("95% Prediction Interval: [", lower_bound, ",", upper_bound, "]"))
## [1] "95% Prediction Interval: [ 76871.0124775157 , 113502.102384467 ]"
Load Data and Select a Country
# Load necessary libraries
library(tsibble)
library(fable)
library(dplyr)
library(tidyr)
# Assuming global_economy is available
# Select a country (e.g., "Australia") for analysis
country_exports <- global_economy |>
filter(Country == "United States") |>
select(Year, Exports) |>
as_tsibble(index = Year)
country_exports <- country_exports |>
fill(Exports, .direction = "down")
head(country_exports, n=3)
## # A tsibble: 3 x 2 [1Y]
## Year Exports
## <dbl> <dbl>
## 1 1960 4.97
## 2 1961 4.90
## 3 1962 4.81
# Plot the exports series
library(ggplot2)
# Plot the exports series with x-axis ticks every 5 years
autoplot(country_exports, Exports) +
scale_x_continuous(breaks = seq(1960, 2020, by = 5)) + # Adjust x-axis to have ticks every 5 years
ggtitle("Annual Exports for United States") +
ylab("Exports") +
xlab("Year")
Looking at the plot of annual exports for the United States, here are some observations:
# Fit the ETS(A,N,N) model (Simple Exponential Smoothing)
fit_ANN <- country_exports |>
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
# Generate forecasts for the next 5 years
fc_ANN <- fit_ANN |>
forecast(h = 8)
# Plot the forecasts with extended x-axis
autoplot(fc_ANN) +
autolayer(country_exports, Exports, series = "Actual") +
scale_x_continuous(breaks = seq(1960, 2025, by = 5), limits = c(1960, 2025)) + # Extend x-axis to show forecasts
ggtitle("ETS(A,N,N) Model Forecast for Exports") +
xlab("Year") +
ylab("Exports")
## Warning in geom_line(eval_tidy(expr(aes(!!!aes_spec))), data = object, ..., :
## Ignoring unknown parameters: `series`
## c. Compute the RMSE values for the training data.
# Calculate RMSE for the ETS(A,N,N) model
accuracy_ANN <- accuracy(fit_ANN)
rmse_ANN <- accuracy_ANN$RMSE
print(paste("RMSE for ETS(A,N,N):", rmse_ANN))
## [1] "RMSE for ETS(A,N,N): 0.621637990745711"
Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.
# Load necessary libraries
library(tsibble)
library(fable)
library(dplyr)
# This stretches the time series for model training and accuracy testing
results <- country_exports |>
stretch_tsibble(.init = 10) |> # Use initial window size of 10
model(
SES = ETS(Exports ~ error("A") + trend("N") + season("N")), # Simple Exponential Smoothing (ETS(A,N,N))
Holt = ETS(Exports ~ error("A") + trend("A") + season("N")), # Holt's linear trend (ETS(A,A,N))
) |>
forecast(h = 1) |> # Forecast 1 period ahead
accuracy(country_exports) # Calculate accuracy metrics against actual data
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2018
# Display the results
print(results)
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Holt Test -0.0123 0.739 0.593 -0.0949 6.39 1.28 1.18 0.210
## 2 SES Test 0.144 0.682 0.533 1.54 5.67 1.16 1.09 0.246
Comparison Metrics:
Discussion of the Two Models:
The red line (ETS(A,A,N)) shows a better fit with a trend, while the blue line (ETS(A,N,N)) is flat. If the data has a clear trend as is the case in the plot, ETS(A,A,N) is the better model.
# Fit the ETS(A,N,N) model (Simple Exponential Smoothing)
fit_ANN <- country_exports |>
model(ETS_A_N_N = ETS(Exports ~ error("A") + trend("N") + season("N")))
# Fit the ETS(A,A,N) model (Holt's linear trend)
fit_AAN <- country_exports |>
model(ETS_A_A_N = ETS(Exports ~ error("A") + trend("A") + season("N")))
# Generate forecasts for both models (e.g., for the next 5 periods)
fc_ANN <- fit_ANN |>
forecast(h = 5)
fc_AAN <- fit_AAN |>
forecast(h = 5)
# Combine both forecasts with a specific color for each
autoplot(fc_ANN, level = NULL) + # Plot the forecast for ETS(A,N,N) without prediction interval
autolayer(fc_AAN, level = NULL, color = "red") + # Add the forecast for ETS(A,A,N) in red
autolayer(country_exports, Exports, color = "black") + # Add the actual data in black
ggtitle("Comparison of Forecasts: ETS(A,N,N) vs ETS(A,A,N)") +
xlab("Year") +
ylab("Exports") +
scale_color_manual(values = c("black", "blue", "red"), # Set the colors for the actual data and forecasts
labels = c("Actual", "ETS(A,N,N)", "ETS(A,A,N)")) + # Define labels for the legend
guides(colour = guide_legend(title = "Model")) # Add a legend to differentiate between models
# Calculate RMSE for both models
accuracy_ANN <- accuracy(fit_ANN) # Accuracy for ETS(A,N,N)
rmse_ANN <- accuracy_ANN$RMSE # Extract RMSE
accuracy_AAN <- accuracy(fit_AAN) # Accuracy for ETS(A,A,N)
rmse_AAN <- accuracy_AAN$RMSE # Extract RMSE
# For ETS(A,N,N)
first_forecast_ANN <- fc_ANN |> slice(1) |> pull(.mean)
lower_bound_ANN <- first_forecast_ANN - 1.96 * rmse_ANN
upper_bound_ANN <- first_forecast_ANN + 1.96 * rmse_ANN
print(paste("95% Prediction Interval for ETS(A,N,N): [", lower_bound_ANN, ",", upper_bound_ANN, "]"))
## [1] "95% Prediction Interval for ETS(A,N,N): [ 10.6722119645154 , 13.1090328882386 ]"
# For ETS(A,A,N)
first_forecast_AAN <- fc_AAN |> slice(1) |> pull(.mean)
lower_bound_AAN <- first_forecast_AAN - 1.96 * rmse_AAN
upper_bound_AAN <- first_forecast_AAN + 1.96 * rmse_AAN
print(paste("95% Prediction Interval for ETS(A,A,N): [", lower_bound_AAN, ",", upper_bound_AAN, "]"))
## [1] "95% Prediction Interval for ETS(A,A,N): [ 10.8177686545507 , 13.2086330017536 ]"
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
# Load necessary libraries
# Load necessary libraries
library(fable)
library(tsibble)
library(dplyr)
library(ggplot2)
# Filter the Chinese GDP data from the global_economy dataset
china_gdp <- global_economy |>
filter(Country == "China") |>
select(Year, GDP)
# Apply Box-Cox transformation to the GDP data
china_gdp_boxcox <- china_gdp |>
mutate(GDP_transformed = box_cox(GDP, lambda = 0.3))
# Fit different ETS models
# 1. Basic ETS(A,A,N) model (additive error, additive trend, no seasonality)
fit_basic <- china_gdp |>
model(Basic = ETS(GDP ~ error("A") + trend("A") + season("N")))
# 2. ETS(A,A,N) model with damped trend
fit_damped <- china_gdp |>
model(Damped = ETS(GDP ~ error("A") + trend("Ad") + season("N")))
# 3. ETS model with Box-Cox transformation
fit_boxcox <- china_gdp_boxcox |>
model(BoxCox = ETS(GDP_transformed ~ error("A") + trend("A") + season("N")))
# Generate forecasts for 20 years ahead (to see long-term trends)
fc_basic <- fit_basic |> forecast(h = "20 years")
fc_damped <- fit_damped |> forecast(h = "20 years")
fc_boxcox <- fit_boxcox |> forecast(h = "20 years") |>
mutate(GDP = inv_box_cox(.mean, lambda = 0.3)) # Invert Box-Cox transformation for the forecasted values
# Visualize the forecasts for comparison
autoplot(china_gdp, GDP) +
autolayer(fc_basic, series = "Basic ETS", color = "blue") +
autolayer(fc_damped, series = "Damped ETS", color = "red") +
autolayer(fc_boxcox, series = "BoxCox ETS", color = "green") +
ggtitle("Forecast Comparison: China's GDP") +
xlab("Year") +
ylab("GDP (in trillions)") +
guides(colour = guide_legend(title = "Model")) +
theme_minimal()
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), : Ignoring unknown parameters: `series`
## Ignoring unknown parameters: `series`
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
In the plot you shared, you can see the forecasts for China’s GDP using different variations of the ETS models: the basic ETS model, a damped trend ETS model, and the ETS model with Box-Cox transformation. Let’s break down what each model is doing to the forecasts and why their behavior differs.
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
Steps: 1. Fit a basic ETS model with multiplicative seasonality. 2. Experiment with a damped trend to see if it improves the forecast. 3. Compare forecasts and examine their accuracy.
# Load necessary libraries
library(fable)
library(tsibble)
library(ggplot2)
library(dplyr)
# Load the Gas data from aus_production
gas_data <- aus_production |>
filter(!is.na(Gas)) |>
select(Quarter, Gas)
# 1. Basic ETS model with multiplicative seasonality (ETS(A,A,M))
fit_multiplicative <- gas_data |>
model(Multiplicative = ETS(Gas ~ error("A") + trend("A") + season("M")))
# 2. ETS model with multiplicative seasonality and a damped trend (ETS(A,Ad,M))
fit_damped <- gas_data |>
model(Damped = ETS(Gas ~ error("A") + trend("Ad") + season("M")))
# Forecast the next few years (8 quarters = 2 years)
fc_multiplicative <- fit_multiplicative |> forecast(h = 8)
fc_damped <- fit_damped |> forecast(h = 8)
# Plot the forecasts for comparison
autoplot(gas_data, Gas) +
autolayer(fc_multiplicative, series = "Multiplicative Seasonality", color = "blue") +
autolayer(fc_damped, series = "Damped Trend", color = "red") +
ggtitle("Gas Production Forecast: Multiplicative Seasonality vs Damped Trend") +
xlab("Year") +
ylab("Gas Production") +
guides(colour = guide_legend(title = "Model")) +
theme_minimal()
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
In the plot, the difference between the damped trend (red) and multiplicative seasonality without damping is not very pronounced in the short term. Both forecasts are close to each other, and the uncertainty bands overlap significantly.
From the plot, multiplicative seasonality is necessary because the seasonal fluctuations grow proportionally with the increasing turnover. As the trend rises, the peaks and troughs become larger, which an additive model wouldn’t capture accurately.
# Load necessary libraries
library(fable)
library(tsibble)
library(ggplot2)
library(dplyr)
# Filter aus_retail dataset (example for "A3349335A" retail series)
retail_data <- aus_retail |>
filter(State == "Victoria", Industry == "Food retailing") |>
select(Month, Turnover)
# Plot the retail data to visualize trends and seasonality
autoplot(retail_data, Turnover) +
ggtitle("Retail Turnover: Food Retailing in Victoria") +
xlab("Year") +
ylab("Turnover (in millions)") +
theme_minimal()
# Load necessary libraries
library(fable)
library(tsibble)
library(ggplot2)
library(dplyr)
library(purrr)
# Filter aus_retail dataset
retail_data <- aus_retail |>
filter(State == "Victoria", Industry == "Food retailing") |>
select(Month, Turnover)
# 1. Holt-Winters' Multiplicative Method (ETS(A,A,M))
fit_multiplicative <- retail_data |>
model(Multiplicative = ETS(Turnover ~ error("A") + trend("A") + season("M")))
# 2. Holt-Winters' Multiplicative Method with Damped Trend (ETS(A,Ad,M))
fit_damped <- retail_data |>
model(Damped = ETS(Turnover ~ error("A") + trend("Ad") + season("M")))
# One-step-ahead forecasts
fc_multiplicative <- fit_multiplicative |> forecast(h = 1)
fc_damped <- fit_damped |> forecast(h = 1)
I prefer the Multiplicative Model due to its slightly lower RMSE (25.88 vs. 26.12). However, the difference is minimal, so either model could be fine depending on the context. For short-term accuracy, Multiplicative is preferred. For long-term stability, Damped may be better.
# Compare RMSE of one-step-ahead forecasts
accuracy_multiplicative <- accuracy(fit_multiplicative)
accuracy_damped <- accuracy(fit_damped)
# Print RMSE values
rmse_multiplicative <- accuracy_multiplicative$RMSE
rmse_damped <- accuracy_damped$RMSE
print(paste("RMSE for Multiplicative Model:", rmse_multiplicative))
## [1] "RMSE for Multiplicative Model: 25.8789239175654"
print(paste("RMSE for Damped Model:", rmse_damped))
## [1] "RMSE for Damped Model: 26.1188703443491"
# Extract the best model based on RMSE (fit_damped or fit_multiplicative)
best_model <- ifelse(rmse_damped < rmse_multiplicative, fit_damped, fit_multiplicative)
# Since best_model is a list, extract the correct model (e.g., "Multiplicative" or "Damped")
# Use pluck to extract the model directly (adjust based on your model names)
best_model_selected <- best_model |> pluck(1) # Adjust index if needed
The residuals do not fully resemble white noise. Some significant autocorrelations in the ACF plot suggest the model isn’t capturing all patterns. This indicates room for improvement in the model.
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# Extract residuals directly from the fit_multiplicative model
residuals_multiplicative <- augment(fit_multiplicative)
# Check for white noise by plotting the residuals
autoplot(residuals_multiplicative, .resid) +
ggtitle("Residuals from the Multiplicative Model") +
xlab("Time") +
ylab("Residuals")
# ACF plot for residuals
ggAcf(residuals_multiplicative$.resid) +
ggtitle("ACF of Residuals from the Multiplicative Model")
## e. Now find the test set RMSE, while training the model to the end of
2010. Can you beat the seasonal naïve approach from Exercise 7 in
Section 5.11?
Yes, the multiplicative model (RMSE: 58.55) outperforms the seasonal naïve model (RMSE: 479.76).
# Load necessary libraries
library(fable)
library(tsibble)
library(ggplot2)
library(dplyr)
# Split the data into training (up to the end of 2010) and test sets
train_data <- retail_data |> filter(Month <= yearmonth("2010 Dec"))
test_data <- retail_data |> filter(Month > yearmonth("2010 Dec"))
# 1. Train the multiplicative model on the training data
fit_multiplicative_train <- train_data |>
model(Multiplicative = ETS(Turnover ~ error("A") + trend("A") + season("M")))
# 2. Generate forecasts for the test period (from 2011 onward)
forecasts_multiplicative <- fit_multiplicative_train |>
forecast(new_data = test_data)
# 3. Calculate RMSE for the test set
accuracy_multiplicative_test <- accuracy(forecasts_multiplicative, test_data)
rmse_multiplicative_test <- accuracy_multiplicative_test$RMSE
print(paste("Test RMSE for Multiplicative Model:", rmse_multiplicative_test))
## [1] "Test RMSE for Multiplicative Model: 59.2242845353677"
# 4. Seasonal naïve approach for comparison
fit_naive <- train_data |>
model(SeasonalNaive = SNAIVE(Turnover))
# Generate seasonal naive forecasts
forecasts_naive <- fit_naive |>
forecast(new_data = test_data)
# Calculate RMSE for seasonal naive model
accuracy_naive_test <- accuracy(forecasts_naive, test_data)
rmse_naive_test <- accuracy_naive_test$RMSE
print(paste("Test RMSE for Seasonal Naive Model:", rmse_naive_test))
## [1] "Test RMSE for Seasonal Naive Model: 479.761827194842"
# Compare RMSEs
if (rmse_multiplicative_test < rmse_naive_test) {
print("Multiplicative model beats the seasonal naive approach.")
} else {
print("Seasonal naive approach is better.")
}
## [1] "Multiplicative model beats the seasonal naive approach."
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
# Load necessary libraries
library(fable)
library(tsibble)
library(dplyr)
library(ggplot2)
# Apply Box-Cox transformation and STL decomposition, followed by ETS on seasonally adjusted data
fit_stl_ets <- retail_data |>
filter(Month <= yearmonth("2010 Dec")) |> # Use training data up to 2010
model(
STL_ETS = decomposition_model(
STL(Turnover ~ season(window = "periodic")),
ETS(season_adjust ~ error("A") + trend("A") + season("N"))
)
)
# Forecast on the test data (from 2011 onwards)
forecasts_stl_ets <- fit_stl_ets |>
forecast(new_data = retail_data |> filter(Month > yearmonth("2010 Dec")))
# Compute the RMSE for the STL-ETS model on the test data using the original Turnover values
accuracy_stl_ets <- accuracy(forecasts_stl_ets, retail_data |> filter(Month > yearmonth("2010 Dec")))
rmse_stl_ets <- accuracy_stl_ets$RMSE
print(paste("Test RMSE for STL-ETS model:", rmse_stl_ets))
## [1] "Test RMSE for STL-ETS model: 112.860739361934"
# Assuming you have the RMSE for your previous multiplicative model
print(paste("Test RMSE for Multiplicative Model:", rmse_multiplicative_test))
## [1] "Test RMSE for Multiplicative Model: 59.2242845353677"
# Compare the RMSEs
if (rmse_stl_ets < rmse_multiplicative_test) {
print("STL-ETS model beats the Multiplicative Model.")
} else {
print("Multiplicative Model is better.")
}
## [1] "Multiplicative Model is better."