Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.3.2
## ✔ lubridate 1.9.3 ✔ fable 0.3.4
## ✔ ggplot2 3.5.1 ✔ fabletools 0.4.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
data(aus_livestock)
library(dplyr)
pigs <- aus_livestock %>% filter(Animal == "Pigs" & State=="Victoria")
str(pigs)
## tbl_ts [558 × 4] (S3: tbl_ts/tbl_df/tbl/data.frame)
## $ Month : mth [1:558] 1972 Jul, 1972 Aug, 1972 Sep, 1972 Oct, 1972 Nov, 1972 Dec...
## $ Animal: Factor w/ 7 levels "Bulls, bullocks and steers",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ State : Factor w/ 8 levels "Australian Capital Territory",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ Count : num [1:558] 94200 105700 96500 117100 104600 ...
## - attr(*, "key")= tibble [1 × 3] (S3: tbl_df/tbl/data.frame)
## ..$ Animal: Factor w/ 7 levels "Bulls, bullocks and steers",..: 6
## ..$ State : Factor w/ 8 levels "Australian Capital Territory",..: 7
## ..$ .rows : list<int> [1:1]
## .. ..$ : int [1:558] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
## - attr(*, "index")= chr "Month"
## ..- attr(*, "ordered")= logi TRUE
## - attr(*, "index2")= chr "Month"
## - attr(*, "interval")= interval [1:1] 1M
## ..@ .regular: logi TRUE
** The optimal alpha = 0.3221247 and initial state 100646.6 **
pigs_fit <- pigs %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
pigs_forecast <- pigs_fit %>%
forecast(h = 4)
pigs_forecast %>%
autoplot(pigs) +
labs(y = "Count", title = "Victoria Pigs Slaughtered") +
guides(colour = "none")
report(pigs_fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
components(pigs_fit) |>
autoplot() +
labs(title = "ETS(A,N,N)")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
b. Compute a 95% prediction interval for the first forecast using 𝑦̂
±1.96𝑠 where 𝑠 is the standard deviation of the residuals. Compare your
interval with the interval produced by R.
** the point forecast is 95186.56; the 95% interval is [76871, 113502], 95186 falls within the interval and at the midpoint. **
pigs_forecast <- pigs_fit %>%
forecast(h = 4)
pigs_forecast
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95187, 1.1e+08) 95187.
resid_std <- sd(augment(pigs_fit)$.resid)
sprintf(
"confidence interval: [%f, %f]",
pigs_forecast$.mean[1] - (resid_std * 1.96),
pigs_forecast$.mean[1] + (resid_std * 1.96)
)
## [1] "confidence interval: [76871.012478, 113502.102384]"
Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
The data is has a smaller time range, starting at 1990 rather than 1960 like some other countries. There is a upward trend, for the most part with a few troughs - in 2015 and 2008. It doesnt seem to have a cyclical or seasonal pattern.
data("global_economy")
albania <- global_economy %>% filter(Country=="Albania")
alb <- na.omit(albania)
autoplot(alb, Exports)
b. Use an ETS(A,N,N) model to forecast the series, and plot the
forecasts.
alb_fit <- alb %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
alb_forecast <- alb_fit %>%
forecast(h = 5)
alb_forecast %>%
autoplot(alb) +
labs(y = "Exports", title = "Albania's Exports") +
guides(colour = "none")
c. Compute the RMSE values for the training data.
RMSE is 2.316658
alb %>%
model(ANN = ETS(Exports~ error("A") + trend("N") + season("N") )
) %>%
accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Albania ANN Training 0.891 2.32 1.77 4.15 9.88 0.963 0.981 0.121
AAN model is used when there is no trend and no seasonality; AAN also is used when there is no seasonality, but is preferred whent here is a trend, this specific mode has a additive trend.
alb %>%
model(AAN = ETS(Exports~ error("A") + trend("A") + season("N") )
) %>%
accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Albania AAN Training -0.144 2.20 1.77 -2.07 10.8 0.961 0.932 0.0562
The ETS(A,A,N) model’s RMSE is 2.199 and is lower than the previous model ANN, indicating the AAN model is better performing. This makes sense since AAN model is used when there is no trend; however, since there is a trend the AAN model would be ideal (comparatively) to forecast this data set.
The intervals are exact in value, [28.17952 36.80009], I tried a few different methods to confirm if there was a mistake but they produced the same results, this could be due to the data size being small. The RMSE still guides us as to AAN model performing a bit better.
#model 1 is already fitted
fitted_values <- alb_fit %>% augment()
rmse <- sqrt(mean((fitted_values$.resid) ^ 2))
y_hat <- alb_forecast$.mean[1]
lower_alb_fc <- y_hat - (rmse * 1.96)
upper_alb_fc <- y_hat + (rmse * 1.96)
ets_ann_interval <- c(lower_alb_fc, upper_alb_fc)
# Fit 2nd model
alb_fit1 <- alb %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
alb_forecast1 <- alb_fit1 %>%
forecast(h = 5)
# RMSE
fitted_values1 <- alb_fit1 %>% augment()
rmse1 <- sqrt(mean((fitted_values1$.resid) ^ 2))
y_hat1 <- alb_forecast1$.mean[1]
lower_alb_fc1 <- y_hat1 - (rmse1 * 1.96)
upper_alb_fc1 <- y_hat1 + (rmse1 * 1.96)
ets_aan_interval <- c(lower_alb_fc1, upper_alb_fc1)
ets_ann_interval
## [1] 26.98336 36.06466
ets_aan_interval
## [1] 28.17952 36.80009
resid_std <- sd(augment(alb_fit)$.resid)
sprintf(
"ANN confidence interval: [%f, %f]",
alb_forecast$.mean[1] - (resid_std * 1.96),
alb_forecast$.mean[1] + (resid_std * 1.96)
)
## [1] "ANN confidence interval: [27.252399, 35.795623]"
alb_fit1 <- alb %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
alb_forecast1 <- alb_fit1 %>%
forecast(h = 5)
resid_std <- sd(augment(alb_fit)$.resid)
sprintf(
"AAN confidence interval: [%f, %f]",
alb_forecast1$.mean[1] - (resid_std * 1.96),
alb_forecast1$.mean[1] + (resid_std * 1.96)
)
## [1] "AAN confidence interval: [28.218192, 36.761416]"
## Manual RMSE intervals generally provide a more reliable estimate of the uncertainty around predictions, especially in the presence of outliers or non-constant variance.
#Standard deviation intervals can be useful when the residuals are homoscedastic and normally distributed, but they may not capture prediction error variability as effectively.
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
As expected the slope is more controlled for damped trend (red); the regular holt method has a slope forecast that is a bit high, which might not be realistic. When experimenting with box-cox, I wasnt sure how to add the best transformation based on the data to the autoplot. So the box cox on the first(top) visual is using a fixed transformation(lambda = 0), which might not be an ideal forecast method for this data. The Bottom visual of box cox is more realistic using optimal lambda hence the scale is different.
original <- china_gdp <- global_economy %>% filter(Country=="China")
china_gdp |>
model(
`Holt's method` = ETS(GDP ~ error("A") +
trend("A") + season("N")),
`Damped Holt's method` = ETS(GDP ~ error("A") +
trend("Ad", phi = 0.9) + season("N")),
BoxCox = ETS(box_cox(GDP,0))
) |>
forecast(h = 20) |>
autoplot(china_gdp, level = NULL) +
labs(title = "GDP",
y = "USD") +
guides(colour = guide_legend(title = "Forecast"))
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
lambda_value <- BoxCox.lambda(china_gdp$GDP)
china_gdp <- china_gdp %>%
mutate(GDP_transformed = BoxCox(GDP, lambda_value))
boxcox_forecasts <- china_gdp |>
model(
`Holt's method` = ETS(GDP_transformed ~ error("A") +
trend("A") + season("N")),
`Damped Holt's method` = ETS(GDP_transformed ~ error("A") +
trend("Ad", phi = 0.9) + season("N"))
) |>
forecast(h = 20)
autoplot(boxcox_forecasts, level = NULL) +
labs(title = "GDP",
y = "USD") +
guides(colour = guide_legend(title = "Forecast"))
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
Because the seasonality is not constant through out the series, multiplicative is necessary as seasonal variations are changing proportional to the level of the series. The damped trend looks more controlled. Examining the RMSE might be a objective way to assess if the forecast improved. The RMSE shows that the AAM model performs the best, this is also known as the Additive Holt-Winters method, which also has a damp trend. This means the residuals have a constance variance hence additive error.
aus_gas <- aus_production %>%
select(Quarter, Gas)
aus_gas %>%
autoplot(Gas)
fit <- aus_gas %>%
model(
MAM = ETS(Gas ~ error("M") + trend("A") + season("M")),
AAM = ETS(Gas ~ error("A") + trend("A") + season("M")),
MadM = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
)
fc <- fit %>% forecast(h = 40)
fc %>%
autoplot(aus_gas, level=NULL) +
labs(y="Petajoules", title="Gas Australia") +
guides(colour = guide_legend(title = "Forecast"))
aus_gas %>%
model(
MMA = ETS(Gas~ error("M") + trend("A") + season("A")),
AAM = ETS(Gas ~ error("A") + trend("A") + season("M")),
MadM = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
) %>%
accuracy()
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MMA Training 0.0508 6.39 4.03 2.02 14.5 0.723 0.843 0.226
## 2 AAM Training 0.218 4.19 2.84 -0.920 5.03 0.510 0.553 0.0405
## 3 MadM Training -0.00439 4.59 3.03 0.326 4.10 0.544 0.606 -0.0217
Recall your retail time series data (from Exercise 7 in Section 2.10).
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
autoplot(Turnover)
a. Why is multiplicative seasonality necessary for this series? seasonal
variations are not constanst and are changing proportional to the level
of the series
fit <- myseries %>%
model(
MAM = ETS(Turnover ~ error("M") + trend("A") + season("M")),
MAdM = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
fc <- fit %>% forecast(h = 50)
fc %>%
autoplot(myseries, level=NULL) +
labs(y="turnover", title="Retail Turnover: Australia") +
guides(colour = guide_legend(title = "Forecast"))
c. Compare the RMSE of the one-step forecasts from the two methods.
Which do you prefer? The RMSE is very close (while the visual seems like
there is a larger range). Based on the RMSE value which is lower in MAM
- this would have to be the preference.
myseries %>%
model(
MAM = ETS(Turnover ~ error("M") + trend("A") + season("M")),
MAdM = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
) %>%
accuracy()
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern … Clothin… MAM Trai… -0.0128 0.613 0.450 -0.469 5.15 0.513 0.529
## 2 Northern … Clothin… MAdM Trai… 0.0398 0.616 0.444 -0.0723 5.06 0.507 0.531
## # ℹ 1 more variable: ACF1 <dbl>
fit |>
components() |>
filter(.model == "MAdM") |>
select(remainder) |>
autoplot(.vars = remainder, na.rm = TRUE)
pref_mod <- fit %>% select(MAM)
pref_mod %>%
gg_tsresiduals()
set.seed(45224)
train <- myseries %>%
filter(year(Month) < 2011)
test <- myseries %>%
filter(year(Month) >= 2011)
fit <- train %>%
model(
'MAM' = ETS(Turnover ~ error('M') + trend('A') + season('M')),
'MAdM' = ETS(Turnover ~ error('M') + trend('Ad') + season('M')),
'Seasonal Naive' = SNAIVE(Turnover)
)
comp <- anti_join(myseries, train, by = c('State', 'Industry', 'Series ID', 'Month', 'Turnover'))
fc <- fit %>% forecast(comp)
forecast_values <- fc %>%
as_tibble() %>%
select(Month, .model, .mean)
test_tibble <- test %>%
select(Month, Turnover) %>%
as_tibble()
comparison <- test_tibble %>%
left_join(forecast_values, by = "Month")
comparison <- comparison %>%
mutate(squared_error = (Turnover - .mean)^2)
rmse_manual <- comparison %>%
group_by(.model) %>%
summarise(RMSE = sqrt(mean(squared_error, na.rm = TRUE)))
rmse_manual
## # A tibble: 3 × 2
## .model RMSE
## <chr> <dbl>
## 1 MAM 1.78
## 2 MAdM 1.15
## 3 Seasonal Naive 1.55
autoplot(comp, Turnover) +
autolayer(fc, level = NULL) +
labs(title = 'Forecast Method Comparison')
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set? Both boxcox transformed data result in models that perform well and have low RMSE: STL Box-Cox has a RMSE of 0.0818 (which is the best performance, comparatively) and ETS box cox has a RMSE of 0.0994. The seasonal Naive had as RMSE of 1.551981; the Holt Winters Multiplicative Method with box cox applies had RMSE of 0.517819.
lambda_train <- train %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
box_cox_myseries <- train %>%
mutate(
bc = box_cox(Turnover, lambda_train)
)
fit2 <- box_cox_myseries %>%
model(
'STL Box-Cox' = STL(bc ~ season(window = 'periodic'), robust = TRUE),
'ETS Box-Cox' = ETS(bc)
)
fit3 <- box_cox_myseries %>%
model(
'Holt Winters Multiplicative Method' = ETS(Turnover ~ error('M') + trend('A') + season('M'))
)
accuracy(fit2)
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern … Clothin… STL B… Trai… 0.00745 0.0819 0.0623 0.193 2.85 0.329 0.326
## 2 Northern … Clothin… ETS B… Trai… 0.0139 0.0994 0.0784 0.516 3.56 0.414 0.396
## # ℹ 1 more variable: ACF1 <dbl>
set.seed(26435)
dcmp <- box_cox_myseries |>
model(STL(bc ~ season(window = "periodic"), robust = TRUE)) |>
components()
# seasonally adjusted
dcmp_sa <- dcmp |>
select(Month, season_adjust) |>
rename(Turnover = season_adjust)
# Merge sa back with the original
myseries_stl <- myseries |>
left_join(dcmp_sa, by = "Month") |>
select(Month, Turnover = Turnover.x)
# Create the test set
myseries_stl_test <- myseries_stl %>%
filter(year(Month) >= 2011)
# Fit models to the training data
fit2 <- box_cox_myseries |>
model(
'STL Box-Cox' = STL(bc ~ season(window = 'periodic'), robust = TRUE),
'ETS Box-Cox' = ETS(bc)
)
fit3 <- box_cox_myseries |>
model(
'Holt Winters Multiplicative Method' = ETS(Turnover ~ error('M') + trend('A') + season('M'))
)
# Fit the model to the test set
fit_stl <- myseries_stl_test |>
model(AAdM = ETS(Turnover ~ error("A") + trend("Ad") + season("M")))
# Forecast
fc_stl <- fit_stl |>
forecast(h = 50)
# forecasts from STL model
fc_stl |>
autoplot(myseries_stl) +
geom_line(aes(y = .fitted),
data = augment(fit_stl)) +
labs(y = "Turnover", title = "STL Decomposition with ETS on Seasonally Adjusted Data")
accuracy_previous <- accuracy(fit3) # For Holt-Winters method
accuracy_stl <- accuracy(fit_stl) # For the ETS model
print(accuracy_previous)
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern T… Clothin… Holt … Trai… -0.0119 0.518 0.384 -0.446 5.21 0.420 0.427
## # ℹ 1 more variable: ACF1 <dbl>
print(accuracy_stl)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAdM Training 0.00112 0.617 0.469 -0.151 3.66 0.585 0.596 0.0251