#library
library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.4
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.3.1
## ✔ lubridate 1.9.3 ✔ fable 0.3.3
## ✔ ggplot2 3.5.0 ✔ fabletools 0.4.0
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
Slaughtered pigs in Victoria:
pigs <- aus_livestock %>%
filter( Animal == "Pigs", State == "Victoria")
# Plot the time series.
s_pigs <- pigs %>%
autoplot(Count) +
labs(title = 'Pigs Slaughtered in Victoria')
s_pigs
Using ETS() function:
f_value <- pigs %>%
model(ETS(Count ~ error('A') + trend('N') + season('N')))
report(f_value)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
The optimal values of \(\alpha\) = 0.3221247 and \(l_0\) = 100646.6
Now, finding forecasts of the next four months:
# four month forecast
f_m_fc <- f_value %>%
forecast(h = 4)
f_m_fc
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95187, 1.1e+08) 95187.
#plotting forecast from January 2018
f_m_fc_plot <- f_value %>%
forecast(h = 4) %>%
autoplot(filter(pigs, Month >= yearmonth('2018 Jan'))) +
labs(title = 'Four Month Forecast Data')
f_m_fc_plot
# Get the first forecast.
hat_y <- f_m_fc %>%
pull(Count) %>%
head(1)
# Get the standard deviation of the residuals.
std <- augment(f_value) %>%
pull(.resid) %>%
sd()
# Calculate the lower and upper confidence intervals.
lowerCi <- hat_y - 1.96 * std
upperCi <- hat_y + 1.96 * std
results <- c(lowerCi, upperCi)
names(results) <- c('Lower', 'Upper')
results
## <distribution[2]>
## Lower Upper
## N(76871, 8.7e+07) N(113502, 8.7e+07)
hilo(f_m_fc$Count, 95) %>% head(1)
## <hilo[1]>
## [1] [76854.79, 113518.3]95
The prediction interval of 95% for the first forecast is from 76871 to 113502. But the prediction interval we calculated is slightly limited than the 95% prediction interval by R.
Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
data("global_economy")
ge <- global_economy %>%
filter(Country == 'Peru')
ge %>%
autoplot(Exports)
There are several fluctuations in the data, indicating variability in export volumes from year to year. Notably, there are sharp declines followed by rapid increases around the years 1980 and 2000. The graph shows a general increase in exports over time, particularly after the year 2000, where there is a sharp rise. This suggests significant growth in export activity during this time.
ge_mod <- ge %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
ge_fc <- ge_mod %>%
forecast(h = 5)
ge_fc %>%
autoplot(global_economy)
accuracy(ge_mod)$RMSE
## [1] 2.862248
The RMSE value for the training data is 2.862248.
ge_mod2 <- ge %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
ge_fc2 <- ge_mod2 %>%
forecast(h = 5)
ge_fc2 %>%
autoplot(global_economy)
accuracy(ge_mod2)$RMSE
## [1] 2.862429
The RMSE for the ETS(A,N,N) model is slightly lower than that of the ETS(A,A,N) model, meaning it forecasts a little better.
Considering that the RMSE difference between the two models is small and that the ETS(A,A,N) model indicates an upward trend while the ETS(A,N,N) model displays a flat line trend, the ETS(A,A,N) model seems more appropriate based on its forecasts.
s1 <- sd(residuals(ge_mod)$.resid)
mean1 <- ge_fc$.mean[1]
c(mean1 - (1.96*s1),mean1 + (1.96*s1))
## [1] 18.60605 29.92066
The 95% interval is (18.60605, 29.92066).
ge_fc2$Exports
## <distribution[5]>
## [1] N(24, 8.8) N(24, 18) N(24, 26) N(25, 35) N(25, 44)
s2 <- sd(residuals(ge_mod2)$.resid)
mean2 <- ge_fc2$.mean[1]
c(mean2 - (1.96*s2),mean2 + (1.96*s2))
## [1] 18.66408 29.98280
The 95% interval is (18.66408, 29.98280)
ge_fc2$Exports[1]
## <distribution[1]>
## [1] N(24, 8.8)
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
c <- global_economy %>%
filter(Country == 'China')
lam <- c %>%
features(GDP,features=guerrero) %>%
pull(lambda_guerrero)
c_mod <- c %>%
model('SES' = ETS(GDP ~ error("A") + trend("N") + season("N")),
'Holt' = ETS(GDP ~ error("A") + trend("A") + season("N")),
'Damped Holt' = ETS(GDP ~ error("A") + trend("Ad", phi = 0.75) + season("N")),
'Box-Cox' = ETS(box_cox(GDP, lam) ~ error("A") + trend("A") + season("N")),
'Damped Box-Cox' = ETS(box_cox(GDP, lam) ~ error("A") + trend("Ad", phi = 0.75) + season("N")))
c_fc <- c_mod %>%
forecast(h = 25)
c_fc %>%
autoplot(global_economy, level = NULL)
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
gas_mod <- aus_production %>%
model('Multiplicative' = ETS(Gas ~ error("M") + trend("A") + season("M")),
'Damped Multiplicative' = ETS(Gas ~ error("M") + trend("Ad", phi = 0.75) + season("M")))
gas_fc <- gas_mod %>%
forecast(h=20)
gas_fc %>%
autoplot(aus_production, level = NULL)
Multiplicative seasonality is necessary due to the proportional increase of the size of the seasonal cycle with the time series.
accuracy(gas_mod) %>%
select('.model', 'RMSE')
## # A tibble: 2 × 2
## .model RMSE
## <chr> <dbl>
## 1 Multiplicative 4.60
## 2 Damped Multiplicative 4.54
The RMSE for the damped multiplicative model is lower than the RMSE for the multiplicative model. Making the trend does improve the forecasts.
Recall your retail time series data (from Exercise 7 in Section 2.10).
set.seed(246810)
myseries <- aus_retail %>%
filter(`Series ID` == 'A3349849A')
autoplot(myseries)
## Plot variable not specified, automatically selected `.vars = Turnover`
Multiplicative seasonality is necessary for this series because the seasonal variations are changing protional to the time series.
ms_mod <- myseries %>%
model('Multiplicative' = ETS(Turnover ~ error("M") + trend("A") +
season("M")),'Damped Multiplicative' = ETS(Turnover ~ error("M") +
trend("Ad", phi = 0.75) + season("M")))
ms_fc <- ms_mod %>%
forecast(h=20)
ms_fc %>%
autoplot(aus_retail, level = NULL)
accuracy(ms_mod) %>% select('.model', 'RMSE')
## # A tibble: 2 × 2
## .model RMSE
## <chr> <dbl>
## 1 Multiplicative 1.78
## 2 Damped Multiplicative 1.76
The Damped Multiplicative model has a slightly lower RMSE than the Multiplicative model, which suggests that the Damped Multiplicative model’s forecasts are closer to the actual observed values. So, Damped Multiplicative model is preferable.
pref_mod <- myseries %>%
model('Damped Multiplicative' = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.75) + season("M")))
pref_mod %>%
gg_tsresiduals()
augment(pref_mod) %>% features(.innov, box_pierce, lag = 24)
## # A tibble: 1 × 5
## State Industry .model bp_stat bp_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Australian Capital Territory Cafes, restaurants and … Dampe… 30.7 0.161
augment(pref_mod) %>% features(.innov, ljung_box, lag = 24)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Australian Capital Territory Cafes, restaurants and … Dampe… 31.8 0.131
Both tests show the p value to be greater 0.05, which shows that the residuals are not distinguishable from white noise.
train <- myseries %>%
filter(year(Month) <= 2010)
mod <- train %>%
model('Multiplicative' = ETS(Turnover ~ error("M") + trend("A") + season("M")),
'Seasonal Naive' = SNAIVE(Turnover))
fc <- mod %>%
forecast(new_data = anti_join(myseries, train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc %>% autoplot(myseries, level = NULL)
accuracy(mod) %>%
select(.type, .model, RMSE)
## # A tibble: 2 × 3
## .type .model RMSE
## <chr> <chr> <dbl>
## 1 Training Multiplicative 1.38
## 2 Training Seasonal Naive 3.37
fc %>% accuracy(myseries) %>%
select(.type, .model, RMSE)
## # A tibble: 2 × 3
## .type .model RMSE
## <chr> <chr> <dbl>
## 1 Test Multiplicative 6.62
## 2 Test Seasonal Naive 8.42
The Multiplicative model has the lower RMSE and better forecasts.
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
lam <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
stl_decomp <- myseries %>%
model(stl_box = STL(box_cox(Turnover,lam) ~ season(window = "periodic"), robust = TRUE)) %>%
components()
stl_decomp %>%
autoplot()
myseries$Turnover_sa <- stl_decomp$season_adjust
training <- myseries %>%
filter(year(Month) <= 2010)
model <- training %>%
model(ETS(Turnover_sa ~ error("M") + trend("A") + season("M")))
forecast <- model %>%
forecast(new_data = anti_join(myseries, training))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover,
## Turnover_sa)`
model %>%
accuracy() %>%
select(.model, .type, RMSE)
## # A tibble: 1 × 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Traini… 0.375
model %>%
gg_tsresiduals()
augment(model) %>% features(.innov, box_pierce, lag = 24)
## # A tibble: 1 × 5
## State Industry .model bp_stat bp_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Australian Capital Territory Cafes, restaurants and … "ETS(… 37.3 0.0406
augment(model) %>% features(.innov, ljung_box, lag = 24)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Australian Capital Territory Cafes, restaurants and … "ETS(… 38.7 0.0291
model %>% accuracy() %>%
select(.model, .type, RMSE)
## # A tibble: 1 × 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Traini… 0.375
forecast %>% accuracy(myseries) %>%
select(.model, .type, RMSE)
## # A tibble: 1 × 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Test 0.692