Load libraries
library(tidyverse)
library(fpp3)
Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
The optimal value for alpha is .3221247 and l is 100646.6.
# filter for State Victoria and animal as Pigs
pigs <- aus_livestock %>%
filter(Animal == "Pigs" & State == "Victoria")
autoplot(pigs) +
labs(title = "Pigs slaughtered in Victoria")
## Plot variable not specified, automatically selected `.vars = Count`
fit <- pigs %>%
model(ANN = ETS(Count ~ error("A") + trend("N") + season("N")))
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
The interval for 95% prediction in r and the manual calculation differs slightly.
pigs_fc <- pigs %>%
model(ANN = ETS(Count ~ error("A") + trend("N") + season("N"))) %>%
forecast( h = 5) %>%
hilo(level = 95) %>%
mutate(lower = `95%`$lower, upper=`95%`$upper) %>%
pull(lower, upper) %>%
head(1)
pigs_fc
## 113518.325972343
## 76854.79
pigs_fc <- pigs %>%
model(ANN = ETS(Count ~ error("A") + trend("N") + season("N"))) %>%
forecast( h = 5)
pigs_fc
## # A fable: 5 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria ANN 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs Victoria ANN 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs Victoria ANN 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs Victoria ANN 2019 Apr N(95187, 1.1e+08) 95187.
## 5 Pigs Victoria ANN 2019 May N(95187, 1.2e+08) 95187.
# mean +- 1.96 * s
mean = 95186.56
s = sqrt(87480760)
lower = mean - (1.96 * s)
upper = mean + (1.96 * s)
lower
## [1] 76854.45
upper
## [1] 113518.7
Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
There was a decline in exports in Peru from 1960 to 1978 and then in had an increase in exports for a few years before you saw a decline from 1988 to 2000 before it increased again.
global_economy %>%
filter(Country == "Peru") %>%
autoplot(Exports) +
labs(title = "Peru Exports")
peru_fit <- global_economy %>%
filter(Country == "Peru") %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
peru_fc <- peru_fit %>%
forecast(h = 10)
peru_fc %>%
autoplot(global_economy)
accuracy(peru_fit)$RMSE
## [1] 2.862248
Comparing the two ETS model the ETS(A,N,N) has more of a flat straight line compared to ETS(A,A,N) which is slightly pivoted up for the forecast.
peru_fit2 <- global_economy %>%
filter(Country == "Peru") %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
peru_fc2 <- peru_fit2 %>%
forecast(h = 10)
peru_fc2 %>%
autoplot(global_economy)
accuracy(peru_fit2)$RMSE
## [1] 2.862429
The RMSE is better in the ETS(A,N, N) than in the ETS(A,A,N)
THe RMSE values in the peru_fit model was slightly better than the peru_fit2 model.
peru_fc %>%
hilo(level = 95) %>%
mutate(lower = `95%`$lower, upper=`95%`$upper) %>%
pull(lower, upper) %>%
head(1)
## 29.9725591202677
## 18.55416
report(peru_fit)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l[0]
## 20.199
##
## sigma^2: 8.4851
##
## AIC AICc BIC
## 363.4922 363.9366 369.6735
peru_fc <- peru_fit %>%
forecast(h = 10)
peru_fc
## # A fable: 10 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Peru "ETS(Exports ~ error(\"A\") + trend(\"N\") + … 2018 N(24, 8.5) 24.3
## 2 Peru "ETS(Exports ~ error(\"A\") + trend(\"N\") + … 2019 N(24, 17) 24.3
## 3 Peru "ETS(Exports ~ error(\"A\") + trend(\"N\") + … 2020 N(24, 25) 24.3
## 4 Peru "ETS(Exports ~ error(\"A\") + trend(\"N\") + … 2021 N(24, 34) 24.3
## 5 Peru "ETS(Exports ~ error(\"A\") + trend(\"N\") + … 2022 N(24, 42) 24.3
## 6 Peru "ETS(Exports ~ error(\"A\") + trend(\"N\") + … 2023 N(24, 51) 24.3
## 7 Peru "ETS(Exports ~ error(\"A\") + trend(\"N\") + … 2024 N(24, 59) 24.3
## 8 Peru "ETS(Exports ~ error(\"A\") + trend(\"N\") + … 2025 N(24, 68) 24.3
## 9 Peru "ETS(Exports ~ error(\"A\") + trend(\"N\") + … 2026 N(24, 76) 24.3
## 10 Peru "ETS(Exports ~ error(\"A\") + trend(\"N\") + … 2027 N(24, 85) 24.3
# mean +- 1.96 * s
mean = 24.3
s = sqrt(8.84851)
lower2 = mean - (1.96 * s)
upper2 = mean + (1.96 * s)
lower2
## [1] 18.4697
upper2
## [1] 30.1303
peru_fc2 %>%
hilo(level = 95) %>%
mutate(lower = `95%`$lower, upper=`95%`$upper) %>%
pull(lower, upper) %>%
head(1)
## 30.137769072594
## 18.5091
report(peru_fit2)
## Series: Exports
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998999
## beta = 0.0001002129
##
## Initial states:
## l[0] b[0]
## 20.69266 0.06006188
##
## sigma^2: 8.8004
##
## AIC AICc BIC
## 367.4995 368.6533 377.8017
peru_fc2 <- peru_fit2 %>%
forecast(h = 10)
peru_fc2
## # A fable: 10 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Peru "ETS(Exports ~ error(\"A\") + trend(\"A\") + … 2018 N(24, 8.8) 24.3
## 2 Peru "ETS(Exports ~ error(\"A\") + trend(\"A\") + … 2019 N(24, 18) 24.4
## 3 Peru "ETS(Exports ~ error(\"A\") + trend(\"A\") + … 2020 N(24, 26) 24.4
## 4 Peru "ETS(Exports ~ error(\"A\") + trend(\"A\") + … 2021 N(25, 35) 24.5
## 5 Peru "ETS(Exports ~ error(\"A\") + trend(\"A\") + … 2022 N(25, 44) 24.6
## 6 Peru "ETS(Exports ~ error(\"A\") + trend(\"A\") + … 2023 N(25, 53) 24.6
## 7 Peru "ETS(Exports ~ error(\"A\") + trend(\"A\") + … 2024 N(25, 62) 24.7
## 8 Peru "ETS(Exports ~ error(\"A\") + trend(\"A\") + … 2025 N(25, 70) 24.7
## 9 Peru "ETS(Exports ~ error(\"A\") + trend(\"A\") + … 2026 N(25, 79) 24.8
## 10 Peru "ETS(Exports ~ error(\"A\") + trend(\"A\") + … 2027 N(25, 88) 24.9
# mean +- 1.96 * s
mean = 24.3
s = sqrt(8.8004)
lower2 = mean - (1.96 * s)
upper2 = mean + (1.96 * s)
lower2
## [1] 18.48557
upper2
## [1] 30.11443
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
All methods were conducted using an additive model due to the linear trend in the data. With the simple exponential smoothing the line was more flat as it takes into account for more recent data. In the holt linear there was a slight slope in the data and the additive damped model it had a bigger slope. In the box cox method the data increased and pivot up and with the additive damped it was similar but with less pivot. The box cox forecast lines are more smooth and consistent in intervals.
#getting lambda for China - GDP
chn_lambda <- global_economy %>%
filter(Code =="CHN") %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
#different methods
fit <- global_economy %>%
filter(Code == "CHN") %>%
model(
simple = ETS(GDP ~ error("A") + trend("N") + season("N")),
holt_linear = ETS(GDP ~ error("A") + trend("A") + season("N")),
additive_damped = ETS(GDP ~ error("A") + trend("Ad") + season("N")),
box_cox = ETS(box_cox(GDP, chn_lambda) ~ error("A") + trend("Ad") + season("N")),
box_cox_damped = ETS(box_cox(GDP, chn_lambda) ~ error("A") + trend("Ad", phi = 0.8) + season("N"))
)
#forecast
fc <- fit %>%
forecast(h = 30)
#chart
fc %>%
autoplot(global_economy, level = NULL) +
labs(title = "China GDP") +
guides(colour = guide_legend(title = "Methods"))
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
A multiplicative seasonality will be necessary because the data has increasing trend and the intervals are growing. With the trend damped it will increase the range on the bottom end of the data. The multiplicative damped model was better as their RSME was lower than the multiplicative model.
gas_fit <- aus_production %>%
model(
multi = ETS(Gas ~ error("M") + trend("A") + season("M")),
multi_damped = ETS(Gas ~ error("M") + trend("Ad", phi = 0.8) + season("M"))
)
gas_fc <- gas_fit %>%
forecast(h = 30)
gas_fc %>%
autoplot(aus_production, level = NULL) +
labs(title = "Gas Production") +
guides(colour = guide_legend(title = "Methods"))
accuracy(gas_fit)
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 multi Training -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
## 2 multi_damped Training 0.435 4.56 3.04 0.892 4.18 0.545 0.601 -0.0387
Recall your retail time series data (from Exercise 7 in Section 2.10).
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = Turnover`
a. Why is multiplicative seasonality necessary for this series?
A multiplicative seasonality is necessary for this series is because the data has an increasing trend and the intervals are increasing larger and larger over time.
myseries_fit <- myseries %>%
model(
multi = ETS(Turnover ~ error("M") + trend("A") + season("M")),
multi_damped = ETS(Turnover ~ error("M") + trend("Ad", phi = .8) + season("M"))
)
myseries_fc <- myseries_fit %>%
forecast(h = 30)
myseries_fc %>%
autoplot(myseries, level = NULL) +
labs(title = "Australia Turnover Retail") +
guides(colour = guide_legend(title = "Methods"))
accuracy(myseries_fit)
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern T… Clothin… multi Trai… -0.0128 0.613 0.450 -0.469 5.15 0.513 0.529
## 2 Northern T… Clothin… multi… Trai… 0.0495 0.619 0.452 0.303 5.18 0.516 0.534
## # ℹ 1 more variable: ACF1 <dbl>
I prefer the multiplicative model as the RMSE was slightly lower than the damped trend.
With the p-value greater than .05 there is white noise in the data.
myseries %>%
model(
multi = ETS(Turnover ~ error("M") + trend("A") + season("M"))
) %>%
gg_tsresiduals()
#Ljung-box test
myseries %>%
model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
augment() %>%
features(.innov, ljung_box, lag = 24)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Northern Territory Clothing, footwear and personal a… multi… 34.5 0.0765
The seasonal naive approach has a lower RMSE than the multiplicative model.
#training data
train <- myseries %>%
filter(year(Month) <= 2010)
# test data
test_data <- myseries %>%
filter(year(Month) > 2010)
# fit the model
fit_train <- train %>%
model(
multi = ETS(Turnover ~ error("M") + trend("A") + season("M")),
snaive = SNAIVE(Turnover))
#forecast
train_fc <- fit_train %>%
forecast(new_data = test_data)
#calculate RMSE
accuracy(train_fc, test_data) %>%
select(.model,.type, RMSE)
## # A tibble: 2 × 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 multi Test 1.78
## 2 snaive Test 1.55
# Plot the actual data and forecasts
train_fc %>%
autoplot(myseries, level = NULL) +
labs(title = "Forecasts for Turnover",
x = "Month",
y = "Turnover") +
guides(colour = guide_legend(title = "Model")) +
theme_minimal()
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
The RMSE in this one is better than the previous one.
lambda <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
stl_comp <- myseries %>%
model(STL = STL(box_cox(Turnover,lambda) ~ season(window = "periodic"), robust = TRUE)) %>%
components()
myseries$season_adj <- stl_comp$season_adjust
train2 <- myseries %>%
filter(year(Month) <= 2010)
fit2 <- train2 %>%
model(ETS(season_adj ~ error("M") + trend("A") + season("M")))
fc2 <- fit2 %>%
forecast(new_data = anti_join(myseries, train2))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover,
## season_adj)`
fit2 %>% accuracy() %>%
select(.model, .type, RMSE)
## # A tibble: 1 × 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(season_adj ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Traini… 0.0789
fc2 %>% accuracy(myseries) %>%
select(.model, .type, RMSE)
## # A tibble: 1 × 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(season_adj ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Test 0.325