library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
Consider the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
aus_livestock
## # A tsibble: 29,364 x 4 [1M]
## # Key: Animal, State [54]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory 2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory 2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory 2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory 1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory 2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory 1800
## 7 1977 Jan Bulls, bullocks and steers Australian Capital Territory 1800
## 8 1977 Feb Bulls, bullocks and steers Australian Capital Territory 1900
## 9 1977 Mar Bulls, bullocks and steers Australian Capital Territory 2700
## 10 1977 Apr Bulls, bullocks and steers Australian Capital Territory 2300
## # ℹ 29,354 more rows
unique(aus_livestock$Animal)
## [1] Bulls, bullocks and steers Calves
## [3] Cattle (excl. calves) Cows and heifers
## [5] Lambs Pigs
## [7] Sheep
## 7 Levels: Bulls, bullocks and steers Calves ... Sheep
pigs_data <- aus_livestock %>%
filter(Animal == "Pigs", State == "Victoria")
#View(pigs_data)
autoplot(pigs_data, Count)
fit_pigs <- pigs_data %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
report(fit_pigs)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
pigs_fc <- fit_pigs %>%
forecast(h = 4)
pigs_fc %>%
autoplot(pigs_data)+
labs(title="Forecast: pigs slaughtered in Victoria/Australia",
y="Count")
- Compute a 95% prediction interval for the first forecast using where s
is the standard deviation of the residuals. Compare your interval with
the interval produced by R.
s <- sd(residuals(fit_pigs)$.resid)
s
## [1] 9344.666
pigs_first_forecast <- pigs_fc$.mean[1]
pigs_first_forecast
## [1] 95186.56
lower <- pigs_first_forecast - 1.96 * s
upper <- pigs_first_forecast + 1.96 * s
lower
## [1] 76871.01
upper
## [1] 113502.1
pigs_fc %>%
hilo() %>%
pull('95%') %>%
head(1)
## <hilo[1]>
## [1] [76854.79, 113518.3]95
Computed 95% prediction interval is: from 76871.01 to 113502.1. The interval produced by R is: 76854.79 to 113518.3. The intervals are pretty close, R’s interval is slightly wider.
Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
global_economy
## # A tsibble: 15,150 x 9 [1Y]
## # Key: Country [263]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1960 537777811. NA NA 7.02 4.13 8996351
## 2 Afghanistan AFG 1961 548888896. NA NA 8.10 4.45 9166764
## 3 Afghanistan AFG 1962 546666678. NA NA 9.35 4.88 9345868
## 4 Afghanistan AFG 1963 751111191. NA NA 16.9 9.17 9533954
## 5 Afghanistan AFG 1964 800000044. NA NA 18.1 8.89 9731361
## 6 Afghanistan AFG 1965 1006666638. NA NA 21.4 11.3 9938414
## 7 Afghanistan AFG 1966 1399999967. NA NA 18.6 8.57 10152331
## 8 Afghanistan AFG 1967 1673333418. NA NA 14.2 6.77 10372630
## 9 Afghanistan AFG 1968 1373333367. NA NA 15.2 8.90 10604346
## 10 Afghanistan AFG 1969 1408888922. NA NA 15.0 10.1 10854428
## # ℹ 15,140 more rows
#unique(global_economy$Country)
belarus_data <- global_economy %>%
filter(Country == "Belarus") # i am from Belarus, so I've decided to look at the data for this country
belarus_data
## # A tsibble: 58 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Belarus BLR 1960 NA NA NA NA NA 8198000
## 2 Belarus BLR 1961 NA NA NA NA NA 8271216
## 3 Belarus BLR 1962 NA NA NA NA NA 8351928
## 4 Belarus BLR 1963 NA NA NA NA NA 8437232
## 5 Belarus BLR 1964 NA NA NA NA NA 8524224
## 6 Belarus BLR 1965 NA NA NA NA NA 8610000
## 7 Belarus BLR 1966 NA NA NA NA NA 8696496
## 8 Belarus BLR 1967 NA NA NA NA NA 8785648
## 9 Belarus BLR 1968 NA NA NA NA NA 8874552
## 10 Belarus BLR 1969 NA NA NA NA NA 8960304
## # ℹ 48 more rows
summary(belarus_data)
## Country Code Year GDP
## Belarus :58 BLR :58 Min. :1960 Min. :1.214e+10
## Afghanistan : 0 ABW : 0 1st Qu.:1974 1st Qu.:1.489e+10
## Albania : 0 AFG : 0 Median :1988 Median :2.240e+10
## Algeria : 0 AGO : 0 Mean :1988 Mean :3.436e+10
## American Samoa: 0 ALB : 0 3rd Qu.:2003 3rd Qu.:5.496e+10
## Andorra : 0 AND : 0 Max. :2017 Max. :7.881e+10
## (Other) : 0 (Other): 0 NA's :30
## Growth CPI Imports Exports
## Min. :-11.700 Min. : NA Min. :33.33 Min. :33.33
## 1st Qu.: -0.500 1st Qu.: NA 1st Qu.:58.79 1st Qu.:57.24
## Median : 3.400 Median : NA Median :64.07 Median :59.96
## Mean : 2.653 Mean :NaN Mean :63.99 Mean :60.13
## 3rd Qu.: 8.099 3rd Qu.: NA 3rd Qu.:69.31 3rd Qu.:66.82
## Max. : 11.450 Max. : NA Max. :84.11 Max. :78.78
## NA's :31 NA's :58 NA's :30 NA's :30
## Population
## Min. : 8198000
## 1st Qu.: 9329938
## Median : 9544469
## Mean : 9533874
## 3rd Qu.: 9978458
## Max. :10239000
##
autoplot(belarus_data, Exports) +
labs(title = "Belarus Exports") #looks like there is no data before 1990 (which makes sense, because Belarus was part of USSR before that)
## Warning: Removed 30 rows containing missing values or values outside the scale range
## (`geom_line()`).
#need to remove missing values because model is not supporting missing values, so will filter the data to start from 1990
belarus_data_clean <- belarus_data %>%
filter(Year >= 1990)
The plot shows big fluctuations in exports, going up and down significantly, no visible seasonality, perhaps some upward trend, but i think it is hard to conclude from this plot.
#ETS (A,N,N) model
fit_belarus_exports <- belarus_data_clean %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
report(fit_belarus_exports)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.000100039
##
## Initial states:
## l[0]
## 60.1264
##
## sigma^2: 100.9718
##
## AIC AICc BIC
## 226.4423 227.4423 230.4389
fc_belarus_exports <- fit_belarus_exports %>%
forecast(h = 5)
fc_belarus_exports %>% autoplot(belarus_data_clean) +
labs(title = "Forecast for the next 5 years for Belarus Exports")
rmse_1 <- fit_belarus_exports |>
accuracy() |>
select(RMSE)
print(rmse_1)
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 9.68
#ETS(A,A,N)
fit2_belarus_exports <- belarus_data_clean %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
report(fit2_belarus_exports)
## Series: Exports
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.0004284379
## beta = 0.000100002
##
## Initial states:
## l[0] b[0]
## 52.65342 0.4817818
##
## sigma^2: 99.3447
##
## AIC AICc BIC
## 227.7462 230.4735 234.4072
fc2_belarus_exports <- fit2_belarus_exports %>%
forecast(h = 5)
fc2_belarus_exports %>% autoplot(belarus_data_clean) +
labs(title = "Second Forecast for the next 5 years for Belarus Exports")
rmse_2 <- fit2_belarus_exports |>
accuracy() |>
select(RMSE)
print(rmse_2)
## # A tibble: 1 × 1
## RMSE
## <dbl>
## 1 9.23
When comparing the two models, ETS(A,A,N) had a lower RMSE of 9.228, while ETS(A,N,N) had an RMSE of 9.683. ETS(A,A,N) includes a trend, which helps it better follow changes in Belarus exports over time. Although the RMSE difference is small, the model with the trend gives slightly better forecasts, making it a better choice.
#95% prediction interval for ETS(A,N,N)
s_ann <- sd(residuals(fit2_belarus_exports)$.resid, na.rm = TRUE)
s_ann
## [1] 9.384795
# First point forecast ANN
first_forecast_ann <- fc_belarus_exports$.mean[1]
first_forecast_ann
## [1] 60.12641
# Calculate 95% prediction interval manually
lower_ann <- first_forecast_ann - 1.96 * s_ann
upper_ann <- first_forecast_ann + 1.96 * s_ann
c(lower_ann, upper_ann)
## [1] 41.73221 78.52061
# Compare to R's automatically generated 95% interval
fc_belarus_exports %>%
hilo() %>%
pull('95%') %>%
head(1)
## <hilo[1]>
## [1] [40.43176, 79.82105]95
#95% prediction interval for ETS(A,A,N)
s_aan <- sd(residuals(fit_belarus_exports)$.resid, na.rm = TRUE)
s_aan
## [1] 9.860636
# First point forecast AAN
first_forecast_aan <- fc2_belarus_exports$.mean[1]
first_forecast_aan
## [1] 66.66845
# Calculate 95% prediction interval manually
lower_aan <- first_forecast_aan - 1.96 * s_ann
upper_aan <- first_forecast_aan + 1.96 * s_ann
c(lower_aan, upper_aan)
## [1] 48.27425 85.06265
# Compare to R's automatically generated 95% interval
fc2_belarus_exports %>%
hilo() %>%
pull('95%') %>%
head(1)
## <hilo[1]>
## [1] [47.13314, 86.20377]95
The 95% prediction intervals from manual calculation and R’s hilo() function were very similar, differing slightly because R adjusts for forecast uncertainty.
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
chinese_gdp_data <- global_economy %>%
filter(Country == "China")
summary(chinese_gdp_data)
## Country Code Year GDP
## China :58 CHN :58 Min. :1960 Min. :4.721e+10
## Afghanistan : 0 ABW : 0 1st Qu.:1974 1st Qu.:1.455e+11
## Albania : 0 AFG : 0 Median :1988 Median :3.301e+11
## Algeria : 0 AGO : 0 Mean :1988 Mean :1.970e+12
## American Samoa: 0 ALB : 0 3rd Qu.:2003 3rd Qu.:1.613e+12
## Andorra : 0 AND : 0 Max. :2017 Max. :1.224e+13
## (Other) : 0 (Other): 0
## Growth CPI Imports Exports
## Min. :-27.270 Min. : 26.05 Min. : 2.133 Min. : 2.492
## 1st Qu.: 7.298 1st Qu.: 60.33 1st Qu.: 4.352 1st Qu.: 4.357
## Median : 9.131 Median : 81.85 Median :12.376 Median :13.048
## Mean : 8.227 Mean : 79.00 Mean :12.625 Mean :14.117
## 3rd Qu.: 11.235 3rd Qu.: 98.22 3rd Qu.:18.442 3rd Qu.:20.748
## Max. : 19.300 Max. :119.09 Max. :28.444 Max. :36.035
## NA's :1 NA's :26
## Population
## Min. :6.603e+08
## 1st Qu.:9.044e+08
## Median :1.110e+09
## Mean :1.077e+09
## 3rd Qu.:1.286e+09
## Max. :1.386e+09
##
autoplot(chinese_gdp_data, GDP) +
labs(title = "China GDP Over Time") #strong exponential growth
#model 1 - chosen automatically by R ETS() function
chinese_model_1 <- chinese_gdp_data %>%
model(ETS(GDP))
report(chinese_model_1) #ETS(M,A,N) - r chose this model
## Series: GDP
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9998998
## beta = 0.3119984
##
## Initial states:
## l[0] b[0]
## 45713434615 3288256682
##
## sigma^2: 0.0108
##
## AIC AICc BIC
## 3102.064 3103.218 3112.366
fc1_chinese <- chinese_model_1 %>%
forecast(h = 30)
fc1_chinese %>%
autoplot(chinese_gdp_data) +
labs(title = "China GDP Forecast Model 1")
#models
lambda_china <- chinese_gdp_data |>
features(GDP, features = guerrero) |>
pull(lambda_guerrero)
china_gdp_models <- chinese_gdp_data |>
model("Auto Model" = ETS(GDP),
"AAN - damped" = ETS(GDP ~ error("A") + trend("Ad") + season("N")),
"Box-Cox" = ETS(box_cox(GDP,lambda_china) ~ error("A") + trend("A") + season("N")),
"Box-Cox - damped" = ETS(box_cox(GDP,lambda_china) ~ error("A") + trend("Ad") + season("N")))
fcs_china_gdp <- china_gdp_models |>
forecast(h = 30)
fcs_china_gdp |>
autoplot(chinese_gdp_data, level=NULL) +
labs(title = "GDP Forecase for China: Next 20 Years")
I forecasted China’s GDP using four different ETS models: one
chosen automatically by a function, one with a damped trend, one with a
Box-Cox transformation, and one comdining box cox and damped trend. The
damped trend model predicted GDP growth that gradually levels off, while
the Box-Cox model without damping showed extensive exponential growth.
The Box-Cox with damping model produced forecasts that grow steadily but
at a slower pace than without damping. These differences highlight how
damping controls long-term trend behavior and how the Box-Cox
transformation helps stabilize variance without necessarily slowing down
forecast growth.
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
autoplot(aus_production, Gas)
gas_model <- aus_production %>%
model(ETS(Gas))
report(gas_model)
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
gas_forecast <- gas_model %>%
forecast(h = 20)
gas_forecast %>%
autoplot(aus_production)
#making trend damped in the model
gas_model_damped <- aus_production %>%
model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))
report(gas_model_damped)
## Series: Gas
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.6489044
## beta = 0.1551275
## gamma = 0.09369372
## phi = 0.98
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.858941 0.09944006 0.9281912 1.177903 1.07678 0.8171255
##
## sigma^2: 0.0033
##
## AIC AICc BIC
## 1684.028 1685.091 1717.873
gas_forecast_damped <- gas_model_damped %>%
forecast(h = 20)
gas_forecast_damped %>%
autoplot(aus_production)
accuracy(gas_model) # RMSE 4.5955
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Gas) Training -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
accuracy(gas_model_damped) #RMSE 4.5918
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\… Trai… -0.00439 4.59 3.03 0.326 4.10 0.544 0.606 -0.0217
Multiplicative seasonality is necessary because the size of the seasonal fluctuations increases as the overall level of gas production increases. I compared an automatically selected ETS model and a damped ETS model for forecasting Australian gas production. The RMSE for the auto model was 4.5955, while the RMSE for the damped model was slightly lower at 4.5918. Not sure if this is a significant change but the damped trend slightly improved the forecast.
Recall your retail time series data (from Exercise 7 in Section 2.10).
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries)
## Plot variable not specified, automatically selected `.vars = Turnover`
Multiplicative seasonality is necessary because the seasonal
variations in the turnover data become larger as the overall level of
turnover increases.
#
myseries_models <- myseries %>%
model("Holt-Winters method" = ETS(Turnover ~ error("M") + trend("A") + season("M")),
"Damped Holt_winters method" = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
report(myseries_models)
## Warning in report.mdl_df(myseries_models): Model reporting is only supported
## for individual models, so a glance will be shown. To see the report for a
## specific model, use `select()` and `filter()` to identify a single model.
## # A tibble: 2 × 11
## State Industry .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern… Clothin… Holt-… 0.00447 -870. 1775. 1776. 1841. 0.376 0.473 0.0511
## 2 Northern… Clothin… Dampe… 0.00457 -872. 1781. 1783. 1851. 0.379 0.479 0.0505
fc_myseries <- myseries_models %>%
forecast(h = "2 years")
fc_myseries %>%
autoplot(myseries)
accuracy(myseries_models)
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern … Clothin… Holt-… Trai… -0.0128 0.613 0.450 -0.469 5.15 0.513 0.529
## 2 Northern … Clothin… Dampe… Trai… 0.0398 0.616 0.444 -0.0723 5.06 0.507 0.531
## # ℹ 1 more variable: ACF1 <dbl>
The non-damped model achieved a slightly lower RMSE of 0.6130 compared to 0.6155 for the damped model. Thus, damping did not significantly improve the one-step-ahead forecast accuracy for this series.
myseries_models %>%
select("Holt-Winters method") %>%
gg_tsresiduals()
The residuals from the Holt-Winters multiplicative model do not
appear to behave like white noise. Although there are no obvious
patterns over time, the histogram of residuals is slightly skewed and
the peak is not centered exactly at zero. Additionally, there are two
significant spikes in the autocorrelation plot.
train <- myseries %>%
filter(Month <= yearmonth("2010 Dec"))
test <- myseries %>%
filter(Month > yearmonth("2010 Dec"))
train_model <- train |>
model("Damped multipilcative" = ETS(Turnover ~ error("M") + trend("Ad") + season("M")),
"SNAIVE" = SNAIVE(Turnover))
fc_train_model <- train_model |>
forecast(new_data = anti_join(myseries, train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc_train_model |>
autoplot(myseries)
fc_train_model |> accuracy(myseries) |> select(.model, RMSE)
## # A tibble: 2 × 2
## .model RMSE
## <chr> <dbl>
## 1 Damped multipilcative 1.15
## 2 SNAIVE 1.55
On the test set, the Damped Holt-Winters model achieved a RMSE of 1.1513, while the Seasonal Naive model had a RMSE of 1.5520. Damped Holt-Winters model has a lower RMSE, it provided more accurate forecasts.
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
lambda_train <- train %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
stl_train <- train %>%
model(
STL(box_cox(Turnover, lambda_train) ~ season(window = "periodic")))
#Extract seasonally adjusted series
sa_train <- components(stl_train) %>%
as_tsibble() %>%
select(Month, season_adjust)
ets_sa_model <- sa_train %>%
model(
ETS(season_adjust))
ets_sa_forecast <- ets_sa_model %>%
forecast(h = nrow(test))
stl_full <- myseries %>%
model(
STL(box_cox(Turnover, lambda_train) ~ season(window = "periodic")))
sa_full <- components(stl_full) %>%
as_tsibble() %>%
select(Month, season_adjust)
sa_test <- sa_full %>%
filter(Month > yearmonth("2010 Dec"))
autoplot(ets_sa_forecast, sa_train) +
labs(title = "Forecast from STL + ETS Model")
accuracy(ets_sa_forecast, sa_test)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(season_adjust) Test -0.373 0.403 0.373 -11.2 11.2 NaN NaN 0.702
The STL + ETS approach achieved a test set RMSE of 0.4032, which is significantly lower than the RMSEs from the Damped Holt-Winters model (1.1513) and the Seasonal Naive model (1.5520). Therefore, the STL decomposition combined with ETS provided the best forecast for myseries dataset.