library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
## ✔ tibble 3.3.0 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.2
## ✔ lubridate 1.9.4 ✔ fable 0.4.1
## ✔ ggplot2 3.5.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
#aus_livestock
pig <- aus_livestock %>%
filter (State == "Victoria", Animal == "Pigs")
fit <- pig %>%
model(ETS(Count ~error("A")+ trend("N")+ season("N")))
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
fit_fc <- fit %>%
forecast (h= 4)
fit_fc %>%
autoplot(pig)+
ggtitle("Number of Pigs Slaughtered in Victoria")
Overall, the simple exponential smoothing model for the nimber of pigs slaughtered in Victoria produced an optimal smoothing parameter of alpha = 0.3221247 and l[0] = 100646.6
fit_fc %>% hilo(95) %>% pull('95%') %>% head(1)
## <hilo[1]>
## [1] [76854.79, 113518.3]95
fit_fc
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month
## <fct> <fct> <chr> <mth>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Jan
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Feb
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Mar
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Apr
## # ℹ 2 more variables: Count <dist>, .mean <dbl>
m <- 95186.56
s <- sqrt(87480760)
lower <- m - 1.96*s
upper <- m + 1.96*s
paste(lower, upper)
## [1] "76854.4546212935 113518.665378707"
As shown in the output above, there are only minor decimal differences. Overall, the results are the same.
jap_exp <- global_economy %>%
filter (Country == "Japan")%>%
select (,Exports)
autoplot(jap_exp)+
labs(y="Export", title ="Japan Exports")
## Plot variable not specified, automatically selected `.vars = Exports`
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
The Japanese economy declined following the bubble period (1986–1991) and, despite a partial recovery, was impacted again by the 2008 global financial crisis.
# removing missing value for year 2017
jap_exp <- jap_exp %>%
filter(!(Year == 2017 & is.na(Exports)))
fit_2 <- jap_exp %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
report(fit_2)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9414732
##
## Initial states:
## l[0]
## 10.638
##
## sigma^2: 1.6541
##
## AIC AICc BIC
## 263.1026 263.5555 269.2318
fit_2_fc <-fit_2 %>%
forecast(h=4)
fit_2_fc %>%
autoplot(jap_exp)+
labs(y="Exports", title ="Japan Exports", subtitle ="ETS(A,N,N)")
accuracy(fit_2)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Exports ~ error(… Trai… 0.104 1.26 0.883 0.258 7.26 0.981 0.990 0.00173
Per the above outcome, accuracy (RMSE, MAE, MAPE): RMSE = 1.26, MAE = 0.88, MAPE ≈ 7.26% Noted that the model’s typical forecast error is ~7% of actual exports.
fit_3 <- jap_exp %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
report(fit_3)
## Series: Exports
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9094125
## beta = 0.0001000003
##
## Initial states:
## l[0] b[0]
## 10.50228 0.1047452
##
## sigma^2: 1.7047
##
## AIC AICc BIC
## 266.7104 267.8868 276.9256
fit_3_fc <-fit_3 %>%
forecast(h=4)
fit_3_fc %>%
autoplot(jap_exp)+
labs(y="Exports", title ="Japan Exports", subtitle ="ETS(A,A,N)")
accuracy(fit_3)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Exports ~ err… Trai… -0.00389 1.26 0.880 -0.702 7.30 0.978 0.987 0.0246
The seconde model RSME and MAE is slightly more accurate overall. For MAPE, the first model barely better but only 0.04% difference.
The second model corresponds to ETS(A,A,N), it seems the trend component adds a tiny improvement in accuracy without overfitting. Overall, I think the second one is tiny better than the first one.
fit_2_fc %>% hilo(95) %>% pull('95%') %>% head(1)
## <hilo[1]>
## [1] [13.68394, 18.72539]95
fit_2_fc
## # A fable: 4 x 4 [1Y]
## # Key: .model [1]
## .model Year Exports
## <chr> <dbl> <dist>
## 1 "ETS(… 2017 N(16, 1.7)
## 2 "ETS(… 2018 N(16, 3.1)
## 3 "ETS(… 2019 N(16, 4.6)
## 4 "ETS(… 2020 N(16, 6.1)
## # ℹ 1 more variable: .mean <dbl>
fit_3_fc %>% hilo(95) %>% pull('95%') %>% head(1)
## <hilo[1]>
## [1] [13.80675, 18.92479]95
fit_3_fc
## # A fable: 4 x 4 [1Y]
## # Key: .model [1]
## .model Year Exports
## <chr> <dbl> <dist>
## 1 "ETS(… 2017 N(16, 1.7)
## 2 "ETS(… 2018 N(16, 3.1)
## 3 "ETS(… 2019 N(17, 4.5)
## 4 "ETS(… 2020 N(17, 5.9)
## # ℹ 1 more variable: .mean <dbl>
m2 <- 16.20467
s2 <- sqrt(1.6541)
lower_2 <- m2 - 1.96*s2
upper_2 <- m2 + 1.96*s2
paste(lower_2, upper_2)
## [1] "13.6838783465705 18.7254616534295"
m3 <- 16.36577
s3 <- sqrt(1.7047)
lower_3 <- m3 - 1.96*s3
upper_3 <- m3 + 1.96*s3
paste(lower_3, upper_3)
## [1] "13.8067124547306 18.9248275452694"
Based on our calculation results, the manually calculated ranges differ slightly after the third decimal place compared to those produced by R. Overall, the results are the same
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
lambda_cn <- global_economy %>%
filter(Country == "China") %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
fit_cn <- global_economy %>%
filter(Country == "China") %>%
model(`Simple` = ETS(GDP ~ error("A") + trend("N") + season("N")),
`Holt's method` = ETS(GDP ~ error("A") + trend("A") + season("N")),
`Damped Holt's method` = ETS(GDP ~ error("A") + trend("Ad", phi = 0.85) + season("N")),
`Box-Cox` = ETS(box_cox(GDP,lambda_cn) ~ error("A") + trend("A") + season("N")),
`Box-Cox Damped` = ETS(box_cox(GDP,lambda_cn) ~ error("A") + trend("Ad", phi = 0.85) + season("N")),
`Log` = ETS(log(GDP) ~ error("A") + trend("A") + season("N")),
`Log Damped` = ETS(log(GDP) ~ error("A") + trend("Ad", phi = 0.85) + season("N"))
)
fc_cn <- fit_cn %>%
forecast(h = 20)
fc_cn %>%
autoplot(global_economy, level = NULL) +
labs(title="China GPD") +
guides(colour = guide_legend(title = "Forecast"))
Overall, China’s GDP shows a positive trend with no apparent seasonality. Therefore, an additive trend and no seasonal component were applied.
fit_gas <- aus_production %>%
model(`additive` = ETS(Gas ~ error("A") + trend("A") + season("A")),
`multiplicative` = ETS(Gas ~ error("M") + trend("A") + season("M")),
`damped multiplicative` = ETS(Gas ~ error("M") + trend("Ad", phi = 0.8) + season("M")))
aus_production %>%
model(`additive` = ETS(Gas ~ error("A") + trend("A") + season("A")),
`multiplicative` = ETS(Gas ~ error("M") + trend("A") + season("M")),
`damped multiplicative` = ETS(Gas ~ error("M") + trend("Ad", phi = 0.8) + season("M"))) %>%
forecast(h=10) %>%
autoplot(aus_production, level = NULL) +
labs(title="Australian Gas Production") +
guides(colour = guide_legend(title = "Forecast"),
subtitle="Additive vs. Multiplicative Seasonality vs, Damped Trend")
report(fit_gas)
## Warning in report.mdl_df(fit_gas): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 3 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 additive 23.6 -927. 1872. 1873. 1903. 22.7 29.7 3.35
## 2 multiplicative 0.00324 -831. 1681. 1682. 1711. 21.1 32.2 0.0413
## 3 damped multiplicative 0.00352 -838. 1695. 1696. 1725. 20.8 32.3 0.0430
accuracy(fit_gas)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 additive Trai… 0.00525 4.76 3.35 -4.69 10.9 0.600 0.628 0.0772
## 2 multiplicative Trai… -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
## 3 damped multiplica… Trai… 0.435 4.56 3.04 0.892 4.18 0.545 0.601 -0.0387
Multiplicative seasonality is necessary because the seasonal fluctuations grow proportionally with the level of Gas production. A damped trend is useful to avoid overestimating future growth if the series trend is slowing. Comparing forecasts from damped vs. non-damped trends, the RMSE values are similar, with the damped model performing slightly better. For MAPE, the damped model is much better than the additive trend model, while for the multiplicative trend model, the difference is only slight.
set.seed(334)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Turnover`
Multiplicative seasonality is appropriate because both the trend and the amplitude of the seasonal fluctuations increase over time. This indicates that the seasonal variation is proportional to the level of the time series.
fit_my <- myseries %>%
model(`Multiplicative` = ETS(Turnover ~ error("M") + trend("A") + season("M")),
`Damped Multiplicative` = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
fit_my %>%
forecast(h=24) %>%
autoplot(myseries, level = NULL) +
labs(title="Retail Turnover") +
guides(colour = guide_legend(title = "Forecast"))
accuracy(fit_my)
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South… Hardwar… Multi… Trai… 0.0334 4.05 2.90 -0.231 6.10 0.473 0.471 0.282
## 2 South… Hardwar… Dampe… Trai… 0.211 4.01 2.86 0.353 5.95 0.465 0.467 0.190
Based on the output, the preferred method would be Damped Multiplicative model, it has a RMSE of 4.01 compared to regular multiplicative which has a RMSE value of 4.05.
myseries %>%
model(`Damped Multiplicative` = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))) %>%
gg_tsresiduals() +
ggtitle("Multiplicative Method")
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
myseries_train <- myseries %>%
filter(year(Month) < 2011)
fit_train <- myseries_train %>%
model(
"SNAIVE" = SNAIVE(Turnover),
"Multi-Damped" = ETS(Turnover ~ error('M') + trend('Ad') + season('M'))
)
fc_train <- fit_train %>%
forecast(h = 15)
fc_train %>% autoplot(myseries_train, level = NULL) +
labs(title = "Retails Turnover") +
guides(colour = guide_legend(title = "Forecasts"))
fc_train %>% accuracy(myseries)
## # A tibble: 2 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Multi-… Sout… Hardwar… Test -0.907 5.02 4.39 -1.54 5.33 0.788 0.620 0.266
## 2 SNAIVE Sout… Hardwar… Test -0.407 6.16 5.05 -0.739 6.06 0.904 0.760 0.234
Based on the above graph and accuracy metrics, the damped method is much better to the naive approach.
lambda_retail <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
stl_decomp <- myseries %>%
model(stl_box = STL(box_cox(Turnover,lambda_retail) ~ season(window = "periodic"), robust = TRUE)) %>%
components()
stl_decomp %>% autoplot()
# Box-Cox STL
Decomp <- myseries %>%
model(STL_box = STL(box_cox(Turnover,lambda_retail) ~ season(window = "periodic"), robust = TRUE)) %>%
components()
# Seasonally Adjusted
myseries$Turnover_sa <- Decomp$season_adjust
myseries_train <- myseries %>%
filter(year(Month) < 2011)
fit_myseries <- myseries_train %>%
model(ETS(Turnover_sa ~ error("M") + trend("A") + season("M")))
fit_myseries %>% gg_tsresiduals() +
ggtitle("Retail Turnover Residual")
fc_re <- fit_myseries %>%
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover,
## Turnover_sa)`
accuracy(fit_myseries)
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Sout… Hardwar… "ETS(… Trai… 0.00116 0.426 0.331 -0.145 3.75 0.457 0.455 0.144
fc_re %>% accuracy(myseries)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Tur… Sout… Hardwar… Test -1.84 2.08 1.87 -13.3 13.5 2.58 2.22 0.846