library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(dplyr)
library(distributional)
library(purrr)
library(tsibble)
This section loads and processes data on pig populations in Victoria.
The data is converted into a time series (tsibble
) format
and fitted with a Simple Exponential Smoothing (SES) model using an
ETS(A,N,N) approach. The model extracts key parameters and generates
forecasts for the next four months.
library(fpp3)
data <- aus_livestock %>%
filter(Animal == "Pigs", State == "Victoria")
data_ts <- data %>% select(Month, Count) %>% as_tsibble(index = Month)
model <- data_ts %>%
model(SES = ETS(Count ~ error("A") + trend("N") + season("N")))
alpha <- coef(model) %>% filter(term == "alpha") %>% pull(estimate)
l0 <- coef(model) %>% filter(term == "l[0]") %>% pull(estimate)
forecasts <- model %>%
forecast(h = 4)
This section computes the standard deviation of residuals to construct a 95% manual prediction interval for the first forecasted value. It then compares the manually computed interval with R’s built-in prediction interval.
residuals <- model %>% augment() %>% pull(.resid)
s <- sd(residuals, na.rm = TRUE)
first_forecast <- forecasts %>% slice(1) %>% pull(.mean)
lower_bound <- first_forecast - 1.96 * s
upper_bound <- first_forecast + 1.96 * s
cat("Optimal α:", alpha, "\n")
## Optimal α: 0.3221247
cat("Optimal ℼ0:", l0, "\n")
## Optimal ℼ0: 100646.6
cat("Manual 95% Prediction Interval for first forecast:", lower_bound, "-", upper_bound, "\n")
## Manual 95% Prediction Interval for first forecast: 76871.01 - 113502.1
forecasts %>%
filter(row_number() == 1) %>%
hilo(level = 95) %>%
as_tibble()
## # A tibble: 1 × 5
## .model Month
## <chr> <mth>
## 1 SES 2019 Jan
## # ℹ 3 more variables: Count <dist>, .mean <dbl>, `95%` <hilo>
This section loads and processes export data for the United States. Missing values are checked and interpolated where necessary.
data <- global_economy %>%
filter(Country == "United States") %>%
select(Year, Exports) %>%
as_tsibble(index = Year)
sum(is.na(data$Exports))
## [1] 1
data <- data %>%
mutate(Exports = na.interp(Exports))
This section generates a time series plot of U.S. exports.
autoplot(data, Exports) +
labs(title = "Exports of the United States",
y = "Exports (USD Billions)",
x = "Year") +
theme_minimal()
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
This section fits two different ETS models: a simple exponential smoothing model (ETS(A,N,N)) and a Holt’s linear trend model (ETS(A,A,N)).
model_ANN <- data %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
forecast_ANN <- model_ANN %>%
forecast(h = 10)
autoplot(data, Exports) +
autolayer(forecast_ANN, level = 95, alpha = 0.3) +
labs(title = "ETS(A,N,N) Forecast for Exports",
y = "Exports (USD Billions)",
x = "Year") +
theme_minimal()
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
model_AAN <- data %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
forecast_AAN <- model_AAN %>%
forecast(h = 10)
autoplot(data, Exports) +
autolayer(forecast_AAN, level = 95, alpha = 0.3) +
labs(title = "ETS(A,A,N) Forecast for Exports",
y = "Exports (USD Billions)",
x = "Year") +
theme_minimal()
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
Root Mean Squared Error (RMSE) is calculated for both models to assess their accuracy.
residuals_ANN <- model_ANN %>% augment() %>% pull(.resid)
rmse_ANN <- sqrt(mean(residuals_ANN^2, na.rm = TRUE))
residuals_AAN <- model_AAN %>% augment() %>% pull(.resid)
rmse_AAN <- sqrt(mean(residuals_AAN^2, na.rm = TRUE))
print(paste("RMSE for ETS(A,N,N):", rmse_ANN))
## [1] "RMSE for ETS(A,N,N): 0.621637990745711"
print(paste("RMSE for ETS(A,A,N):", rmse_AAN))
## [1] "RMSE for ETS(A,A,N): 0.609914374286451"
A visualization comparing both ETS models’ forecasts.
forecast_comparison <- forecast_ANN %>%
mutate(Model = "ETS(A,N,N)") %>%
bind_rows(
forecast_AAN %>%
mutate(Model = "ETS(A,A,N)")
)
ggplot(forecast_comparison, aes(x = Year, y = .mean, color = Model)) +
geom_line(size = 1) +
labs(title = "Comparison of ETS(A,N,N) vs ETS(A,A,N) Forecasts",
y = "Exports (USD Billions)",
x = "Year") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
A 95% confidence interval is calculated for the first forecasted value in both models.
first_forecast_ANN <- forecast_ANN %>% slice(1) %>% pull(.mean)
first_forecast_AAN <- forecast_AAN %>% slice(1) %>% pull(.mean)
lower_ANN <- first_forecast_ANN - 1.96 * rmse_ANN
upper_ANN <- first_forecast_ANN + 1.96 * rmse_ANN
lower_AAN <- first_forecast_AAN - 1.96 * rmse_AAN
upper_AAN <- first_forecast_AAN + 1.96 * rmse_AAN
print(paste("95% Prediction Interval for ETS(A,N,N):", lower_ANN, "-", upper_ANN))
## [1] "95% Prediction Interval for ETS(A,N,N): 10.6722119645154 - 13.1090328882386"
print(paste("95% Prediction Interval for ETS(A,A,N):", lower_AAN, "-", upper_AAN))
## [1] "95% Prediction Interval for ETS(A,A,N): 10.8177686545507 - 13.2086330017536"
This section filters the global economy dataset to extract GDP data for China and visualizes its historical trend.
china_gdp <- global_economy %>% filter(Country == "China") %>% select(Year, GDP)
autoplot(china_gdp, GDP) +
ggtitle("Historical Chinese GDP") +
ylab("GDP in USD Billions")
Fit three different ETS models: an automatic selection, a damped trend model, and a model with a Box-Cox transformation to stabilize variance.
library(fpp3)
china_gdp <- global_economy %>% filter(Country == "China") %>% select(Year, GDP)
ets_auto <- china_gdp %>% model(ETS(GDP))
forecast_ets_auto <- ets_auto %>% forecast(h = 50)
ets_damped <- china_gdp %>% model(ETS(GDP ~ trend("Ad")))
forecast_ets_damped <- ets_damped %>% forecast(h = 50)
ets_boxcox <- china_gdp %>% model(ETS(box_cox(GDP, 0.3)))
forecast_ets_boxcox <- ets_boxcox %>% forecast(h = 50)
This visualization compares the forecasts generated by the three models, allowing for an assessment of their differences and reliability.
autoplot(china_gdp, GDP) +
autolayer(forecast_ets_auto, GDP, color = "blue", alpha = 0.7) +
autolayer(forecast_ets_damped, GDP, color = "red", alpha = 0.7) +
autolayer(forecast_ets_boxcox, GDP, color = "green", alpha = 0.7) +
ggtitle("Comparison of ETS Forecasts for Chinese GDP (h = 50)") +
ylab("GDP in USD Billions") +
labs(color = "Model") +
scale_color_manual(values = c("blue" = "ETS Auto", "red" = "ETS Damped", "green" = "ETS Box-Cox"))
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.
This section loads the Australian gas production data and visualizes the historical trends.
gas_data <- aus_production %>% select(Quarter, Gas)
autoplot(gas_data, Gas) +
ggtitle("Australian Gas Production Over Time") +
ylab("Gas Production (Petajoules)")
This model accounts for seasonal variations that increase proportionally with the level of the series.
ets_multiplicative <- gas_data %>% model(ETS(Gas ~ season("M")))
forecast_ets_multiplicative <- ets_multiplicative %>% forecast(h = "10 years")
Multiplicative seasonality is necessary when seasonal fluctuations grow as the overall level of production increases.
This model includes a damped trend component to avoid over-predictions of future growth.
ets_damped <- gas_data %>% model(ETS(Gas ~ trend("Ad") + season("M")))
forecast_ets_damped <- ets_damped %>% forecast(h = "10 years")
Damping prevents the model from over-extrapolating trends into the future.
This plot compares the two ETS models to assess which provides a more realistic projection.
autoplot(gas_data, Gas) +
autolayer(forecast_ets_damped, series = "ETS Damped", color = "red", alpha = 0.5) +
autolayer(forecast_ets_multiplicative, series = "ETS Multiplicative", color = "blue", alpha = 0.5) +
ggtitle("Comparison of ETS Forecasts for Gas Production") +
ylab("Gas Production (Petajoules)") +
theme_minimal()
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
This section selects a random retail time series from the
aus_retail
dataset, converts it to a tsibble
,
and plots the original and log-transformed series.
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1)) |>
as_tsibble(index = Month)
autoplot(myseries, Turnover) +
ggtitle("Randomly Selected Retail Time Series") +
ylab("Turnover (Millions of AUD)") +
theme_minimal()
autoplot(myseries, log(Turnover)) +
ggtitle("Log-Transformed Retail Time Series") +
ylab("Log Turnover (Millions of AUD)") +
theme_minimal()
Multiplicative seasonality is necessary when seasonal variations
increase as the overall level of the series increases. The
log-transformed series appears more stable, confirming the need for
multiplicative seasonality.
This section applies the Holt-Winters’ multiplicative method to forecast the data and includes a damped trend version for comparison.
library(fpp3)
hw_multiplicative <- myseries %>%
model(ETS(Turnover ~ error("A") + trend("N") + season("M")))
hw_damped <- myseries %>%
model(ETS(Turnover ~ error("A") + trend("Ad") + season("M")))
fc_multiplicative <- hw_multiplicative %>% forecast(h = "3 years")
fc_damped <- hw_damped %>% forecast(h = "3 years")
fc_combined <- bind_rows(
fc_multiplicative %>% mutate(Model = "Multiplicative"),
fc_damped %>% mutate(Model = "Damped Trend")
)
autoplot(myseries, Turnover) +
autolayer(fc_multiplicative, series = "Holt-Winters Multiplicative", color = "blue", alpha = 0.6) +
autolayer(fc_damped, series = "Holt-Winters Damped", color = "red", alpha = 0.6) +
ggtitle("Comparison of Holt-Winters' Multiplicative vs. Damped Trend Forecasts") +
ylab("Turnover (Millions of AUD)") +
theme_minimal()
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
The RMSE values for both methods are computed to determine which model produces smaller forecasting errors.
accuracy_multiplicative <- accuracy(hw_multiplicative)
accuracy_damped <- accuracy(hw_damped)
rmse_multiplicative <- accuracy_multiplicative %>% filter(.type == "Training") %>% pull(RMSE)
rmse_damped <- accuracy_damped %>% filter(.type == "Training") %>% pull(RMSE)
print(paste("RMSE for Holt-Winters' Multiplicative Model:", rmse_multiplicative))
## [1] "RMSE for Holt-Winters' Multiplicative Model: 0.603112648151485"
print(paste("RMSE for Holt-Winters' Damped Trend Model:", rmse_damped))
## [1] "RMSE for Holt-Winters' Damped Trend Model: 0.602472390488493"
A lower RMSE indicates better forecasting accuracy. If the damped model has a lower RMSE, it suggests that dampening the trend prevents overestimation of future values.
best_model_residuals <- augment(hw_damped) %>% pull(.resid)
best_model_residuals_ts <- ts(best_model_residuals, start = c(1988, 4), frequency = 12)
autoplot(best_model_residuals_ts) +
ggtitle("Residuals from the Best Model") +
ylab("Residuals") +
theme_minimal()
ljung_test <- Box.test(best_model_residuals, lag = 10, type = "Ljung-Box")
print(ljung_test)
##
## Box-Ljung test
##
## data: best_model_residuals
## X-squared = 19.452, df = 10, p-value = 0.03489
resid_df <- data.frame(residuals = best_model_residuals)
ggplot(resid_df, aes(x = residuals)) +
geom_histogram(bins = 30, fill = "blue", alpha = 0.5) +
ggtitle("Histogram of Residuals") +
theme_minimal()
ggplot(resid_df, aes(sample = residuals)) +
stat_qq() +
stat_qq_line() +
ggtitle("QQ-Plot of Residuals") +
theme_minimal()
Since the p-value is less than 0.05, the residuals don’t resemble white
noise. A bell-shaped histogram and QQ-plot alignment along the diagonal
suggest normal residuals.
The test set RMSE is calculated to compare the Holt-Winters models against a seasonal naïve approach.
train_data <- myseries %>% filter(year(Month) <= 2010)
test_data <- myseries %>% filter(year(Month) > 2010)
hw_multiplicative_train <- train_data %>%
model(ETS(Turnover ~ error("A") + trend("N") + season("M")))
hw_damped_train <- train_data %>%
model(ETS(Turnover ~ error("A") + trend("Ad") + season("M")))
seasonal_naive_train <- train_data %>%
model(SNAIVE(Turnover ~ lag("year")))
fc_hw_multiplicative <- forecast(hw_multiplicative_train, new_data = test_data)
fc_hw_damped <- forecast(hw_damped_train, new_data = test_data)
fc_seasonal_naive <- forecast(seasonal_naive_train, new_data = test_data)
rmse_hw_multiplicative <- accuracy(fc_hw_multiplicative, test_data) %>% filter(.type == "Test") %>% pull(RMSE)
rmse_hw_damped <- accuracy(fc_hw_damped, test_data) %>% filter(.type == "Test") %>% pull(RMSE)
rmse_seasonal_naive <- accuracy(fc_seasonal_naive, test_data) %>% filter(.type == "Test") %>% pull(RMSE)
print(paste("Test RMSE for Holt-Winters Multiplicative Model:", rmse_hw_multiplicative))
## [1] "Test RMSE for Holt-Winters Multiplicative Model: 1.42470901456108"
print(paste("Test RMSE for Holt-Winters Damped Trend Model:", rmse_hw_damped))
## [1] "Test RMSE for Holt-Winters Damped Trend Model: 1.33444427208816"
print(paste("Test RMSE for Seasonal Naïve Model:", rmse_seasonal_naive))
## [1] "Test RMSE for Seasonal Naïve Model: 1.5519812606257"
library(fpp3)
train_data <- myseries %>% filter(year(Month) <= 2010)
test_data <- myseries %>% filter(year(Month) > 2010)
lambda <- features(train_data, Turnover, features = guerrero) %>% pull(lambda_guerrero)
print(paste("Optimal Box-Cox Lambda:", lambda))
## [1] "Optimal Box-Cox Lambda: 0.197143080203977"
train_stl <- train_data %>%
model(STL(box_cox(Turnover, lambda) ~ season(window = "periodic")))
train_stl_components <- components(train_stl)
train_adjusted <- train_stl_components %>%
mutate(Turnover_adjusted = `box_cox(Turnover, lambda)` - season_year) %>%
select(Month, Turnover_adjusted)
ets_adjusted <- train_adjusted %>%
model(ETS(Turnover_adjusted ~ error("A") + trend("Ad") + season("N")))
fc_adjusted <- forecast(ets_adjusted, h = nrow(test_data)) %>%
as_tibble() %>%
select(Month, .mean)
test_stl <- test_data %>%
model(STL(box_cox(Turnover, lambda) ~ season(window = "periodic"))) %>%
components()
seasonality_test <- test_stl %>%
as_tibble() %>%
select(Month, season_year)
fc_final <- fc_adjusted %>%
left_join(seasonality_test, by = "Month") %>%
mutate(Final_Prediction = inv_box_cox(.mean + season_year, lambda)) %>%
select(Month, Final_Prediction)
rmse_stl_ets <- accuracy(fc_final$Final_Prediction, test_data$Turnover)[, "RMSE"]
print(paste("Test RMSE for STL + ETS Model:", rmse_stl_ets))
## [1] "Test RMSE for STL + ETS Model: 1.01141694085381"
print(paste("Test RMSE for Holt-Winters Multiplicative:", rmse_hw_multiplicative))
## [1] "Test RMSE for Holt-Winters Multiplicative: 1.42470901456108"
print(paste("Test RMSE for Holt-Winters Damped:", rmse_hw_damped))
## [1] "Test RMSE for Holt-Winters Damped: 1.33444427208816"
print(paste("Test RMSE for Seasonal Naïve:", rmse_seasonal_naive))
## [1] "Test RMSE for Seasonal Naïve: 1.5519812606257"
ggplot() +
geom_line(data = test_data, aes(x = Month, y = Turnover), color = "black", size = 1) +
geom_line(data = fc_final, aes(x = Month, y = Final_Prediction), color = "blue", size = 1) +
ggtitle("STL + ETS Forecast vs. Test Data") +
ylab("Turnover (Millions of AUD)") +
theme_minimal()