library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.3.2
## ✔ lubridate 1.9.3 ✔ fable 0.3.4
## ✔ ggplot2 3.5.1 ✔ fabletools 0.4.2
## Warning: package 'tsibble' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tsibble)
library(fable)
library(ggplot2)
Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of # Use this in place of in your code or text and 0, and generate forecasts for the next four months.
# Load the dataset and filter for Victoria's pigs data
Vic_pigs <- aus_livestock %>%
filter(State == "Victoria", Animal == "Pigs")
# Fit an ETS model equivalent to simple exponential smoothing (SES)
fit_ses <- Vic_pigs %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
# Forecast for the next 4 months
fc_ses <- fit_ses %>%
forecast(h = "4 months")
# Plot the forecast
autoplot(fc_ses, Vic_pigs) +
labs(title = "Number of Pigs Slaughtered in Victoria with 4 Month Forecast", y = "Number of Pigs")
Compute a 95% prediction interval for the first forecast using ^y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.
# After fitting the model
fit_ses <- Vic_pigs %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
# Get the report which contains the sigma^2
fit_report <- report(fit_ses)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
# Extract sigma^2 from the report
sigma_squared <- fit_report$sigma^2
## Warning: Unknown or uninitialised column: `sigma`.
# Calculate the first forecast value
first_forecast <- fc_ses$.mean[1]
# Calculate the 95% prediction interval using y_hat ± 1.96 * s
lower_bound <- first_forecast - 1.96 * sqrt(sigma_squared)
upper_bound <- first_forecast + 1.96 * sqrt(sigma_squared)
# Print the results
cat("95% prediction interval for the first forecast: (", lower_bound, ",", upper_bound, ")\n")
## 95% prediction interval for the first forecast: ( , )
alpha = 0.3221247
ℓ0 = 100646.6
These are the optimal values for the alpha and ℓ0.
Plot the Exports series and discuss the main features of the data. Use an ETS(A,N,N) model to forecast the series, and plot the forecasts. Compute the RMSE values for the training data. Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set. Compare the forecasts from both methods. Which do you think is best? Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.
# Select data for Australia
exports_data <- global_economy %>% filter(Country == "Australia")
# Plot the exports data
autoplot(exports_data, Exports) +
labs(title = "Australian Exports",
y = "Exports")
# ETS(A,N,N) model and forecast
fit_ets_ann <- exports_data %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
fc_ann <- forecast(fit_ets_ann)
# Plot forecast
autoplot(fc_ann)
# Compute RMSE
accuracy(fit_ets_ann)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia "ETS(Exports… Trai… 0.232 1.15 0.914 1.09 5.41 0.928 0.928 0.0125
# Compare with ETS(A,A,N) model
fit_ets_aan <- exports_data %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
accuracy(fit_ets_aan)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia "ETS(Expo… Trai… -7.46e-7 1.12 0.893 -0.387 5.39 0.907 0.904 0.109
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
# Filter for China GDP data
china_gdp <- global_economy %>% filter(Country == "China")
# Visualize the subset
china_gdp %>% autoplot(GDP) +
labs(y = "GDP", title = "Chinese Gross Domestic Product")
# Fit an ETS model
fit_china_ets <- china_gdp %>%
model(ETS(GDP))
# Forecast and plot
fc_china_ets <- forecast(fit_china_ets, h = 20)
autoplot(fc_china_ets)
# Experiment with damped trend and Box-Cox
fit_china_ets_damped <- china_gdp %>%
model(ETS(GDP ~ error("A") + trend("Ad", phi = 0.9)))
fc_china_damped <- forecast(fit_china_ets_damped, h = 20)
autoplot(fc_china_damped)
# Glimpse at the dataset
aus_production %>%
autoplot(Gas)
# Filter Gas data
gas_data <- aus_production %>%
select(Quarter, Gas)
# Fit ETS model with multiplicative seasonality
fit_gas_ets <- gas_data %>%
model(ETS(Gas ~ error("M") + trend("A") + season("M")))
# Forecast and plot for 3 years ahead
fc_gas <- forecast(fit_gas_ets, h = "3 years")
autoplot(fc_gas)
# Fit ETS model with damped trend
fit_gas_damped <- gas_data %>%
model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))
# Forecast and plot for 3 years ahead
fc_gas_damped <- forecast(fit_gas_damped, h = "3 years")
autoplot(fc_gas_damped)
Why is multiplicative seasonality necessary for this series? Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer? Check that the residuals from the best method look like white noise. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?
set.seed(1234)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
fit_my <- myseries %>%
model(
multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")),
`damped multiplicative` = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
fit_my %>%
forecast(h = 36) %>%
autoplot(myseries, level = NULL) +
labs(title = "Australian Retail Turnover") +
guides(colour = guide_legend(title = "Forecast"))
# Compare RMSE
accuracy(fit_my) %>%
select(.model, RMSE)
## # A tibble: 2 × 2
## .model RMSE
## <chr> <dbl>
## 1 multiplicative 1.34
## 2 damped multiplicative 1.36
# Check residuals for the best method
myseries %>%
model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
gg_tsresiduals() +
ggtitle("Multiplicative Method")
# Box-Pierce test
myseries %>%
model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
augment() %>%
features(.innov, box_pierce, lag = 24, dof = 0)
## # A tibble: 1 × 5
## State Industry .model bp_stat bp_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Tasmania Cafes, restaurants and takeaway food servic… multi… 27.2 0.296
# Ljung-Box test
myseries %>%
model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
augment() %>%
features(.innov, ljung_box, lag = 24, dof = 0)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Tasmania Cafes, restaurants and takeaway food servic… multi… 28.0 0.260
# Training and test RMSE
myseries_train <- myseries %>%
filter(year(Month) < 2011)
fit_train <- myseries_train %>%
model(
multi = ETS(Turnover ~ error("M") + trend("A") + season("M")),
snaive = SNAIVE(Turnover)
)
fc <- fit_train %>%
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc %>% autoplot(myseries, level = NULL)
fc %>% accuracy(myseries) %>%
select(.type, .model, RMSE)
## # A tibble: 2 × 3
## .type .model RMSE
## <chr> <chr> <dbl>
## 1 Test multi 3.99
## 2 Test snaive 9.13
# Finding the Box-Cox lambda
lambda <- myseries_train %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
# STL decomposition applied to the Box-Cox transformed data
myseries_train %>%
model(STL(box_cox(Turnover, lambda) ~ season(window = "periodic"), robust = TRUE)) %>%
components() %>%
autoplot() +
ggtitle("STL with Box-Cox")
# Get seasonally adjusted data
dcmp <- myseries_train %>%
model(STL_box = STL(box_cox(Turnover, lambda) ~ season(window = "periodic"), robust = TRUE)) %>%
components()
# Create new variable for seasonally adjusted turnover
myseries_train <- myseries_train %>%
mutate(Turnover_sa = dcmp$season_adjust)
# Modeling on the seasonally adjusted data
fit <- myseries_train %>%
model(ETS(Turnover_sa ~ error("M") + trend("A") + season("M")))
# Checking residuals
fit %>% gg_tsresiduals() +
ggtitle("Residual Plots for Australian Retail Turnover")
# Produce forecasts for test data
fc <- fit %>%
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
# Accuracy on training data
fit %>% accuracy() %>%
select(.model, .type, RMSE)
## # A tibble: 1 × 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Traini… 0.141
fc %>% accuracy(myseries) %>% select(.model, .type, RMSE)