Email : brigita.melantika@student.matanauniversity.ac.id
RPubs : https://rpubs.com/brigitatiaraem/
Jurusan : Statistika
Address : ARA Center, Matana University Tower
Jl. CBD Barat Kav, RT.1, Curug Sangereng, Kelapa Dua, Tangerang, Banten 15810.
library(fpp3)## Warning: package 'fpp3' was built under R version 4.1.3
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.6 v tsibble 1.1.1
## v dplyr 1.0.7 v tsibbledata 0.4.0
## v tidyr 1.2.0 v feasts 0.2.2
## v lubridate 1.8.0 v fable 0.3.1
## v ggplot2 3.3.5
## Warning: package 'tsibble' was built under R version 4.1.3
## Warning: package 'tsibbledata' was built under R version 4.1.3
## Warning: package 'feasts' was built under R version 4.1.3
## Warning: package 'fabletools' was built under R version 4.1.3
## Warning: package 'fable' was built under R version 4.1.3
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
aus_production).aus_production %>% autoplot(Electricity) + labs(title = "Australian Electricity Production") +
theme(plot.title = element_text(hjust = 0.5))aus_elec <- aus_production %>% select("Quarter", "Electricity")
aus_elec$Electricity <- log(aus_elec$Electricity)
aus_elec %>% autoplot(.vars=Electricity) +
labs(title = "Australian Electricity Production with Log Transformation") +
theme(plot.title = element_text(hjust = 0.5))aus_elec %>%
features(Electricity, unitroot_kpss)## # A tibble: 1 x 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 4.26 0.01
aus_elec %>%
features(Electricity, unitroot_ndiffs)## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 2
aus_elec %>%
mutate(log_elec = difference(log(Electricity), 12)) %>%
features(log_elec, unitroot_ndiffs)## # A tibble: 1 x 1
## ndiffs
## <int>
## 1 1
aus_elec %>%
gg_tsdisplay(difference(Electricity), plot_type='partial')## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
elec_fit <- aus_elec %>%
model(arima310 = ARIMA(Electricity ~ pdq(3,1,0)),
arima210 = ARIMA(Electricity ~ pdq(2,1,0)),
stepwise = ARIMA(Electricity),
auto = ARIMA(Electricity, stepwise=FALSE))
report (elec_fit)## Warning in report.mdl_df(elec_fit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 4 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima310 0.000357 545. -1076. -1076. -1053. <cpl [11]> <cpl [4]>
## 2 arima210 0.000350 546. -1080. -1080. -1060. <cpl [6]> <cpl [8]>
## 3 stepwise 0.000359 543. -1079. -1078. -1065. <cpl [1]> <cpl [5]>
## 4 auto 0.000347 547. -1083. -1082. -1063. <cpl [8]> <cpl [9]>
fit11d <- aus_elec %>%
model(ARIMA(Electricity)) %>%
report(fit11d)## Series: Electricity
## Model: ARIMA(1,1,1)(0,1,1)[4]
##
## Coefficients:
## ar1 ma1 sma1
## 0.4537 -0.8002 -0.6075
## s.e. 0.1320 0.0969 0.0527
##
## sigma^2 estimated as 0.0003586: log likelihood=543.29
## AIC=-1078.59 AICc=-1078.4 BIC=-1065.14
fit11d %>% gg_tsresiduals()augment(fit11d) %>% features(.innov, ljung_box, lag=10, dof=2)## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 ARIMA(Electricity) 12.8 0.119
elec_arima <- aus_elec %>%
model(ARIMA(Electricity))
elec_arima %>%
forecast(h = "2 years") %>%
autoplot(aus_elec, level = NULL) +
labs(title = "24 Month Australian Electricity Production Forecast") +
theme(plot.title = element_text(hjust = 0.5))ets_compare11 <- aus_elec %>% model(ETS(Electricity))
report(ets_compare11)## Series: Electricity
## Model: ETS(M,A,A)
## Smoothing parameters:
## alpha = 0.5160242
## beta = 0.02948315
## gamma = 0.3091674
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 8.328679 0.02053009 -0.03579961 0.08180292 0.02721019 -0.0732135
##
## sigma^2: 0
##
## AIC AICc BIC
## -546.2358 -545.3705 -515.7754
ets_compare11 %>% forecast(h=24) %>%
autoplot(aus_elec) + labs(title = "Forecasted Australian Electricity Production using ETS") +
theme(plot.title = element_text(hjust = 0.5))library(tsibble)
library(dplyr)
library(fpp3)
library(magrittr)##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(forecast)## Warning: package 'forecast' was built under R version 4.1.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(zoo)## Warning: package 'zoo' was built under R version 4.1.3
##
## Attaching package: 'zoo'
## The following object is masked from 'package:tsibble':
##
## index
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
portland_cement_crossval <- aus_production %>%
slice(1:(n()-4)) %>%
stretch_tsibble(.init = 20, .step = 1)
port_cement_crossval_f <- portland_cement_crossval %>%
model(ETS(Cement),
SNAIVE(Cement)
) %>%
forecast(h = "1 year")port_cement_crossval_f %>%
group_by(.id, .model) %>%
mutate(h = row_number()) %>%
ungroup() %>%
accuracy(aus_production, by = c(".model", "h"))## # A tibble: 8 x 11
## .model h .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Cement) 1 Test -0.0902 82.9 60.1 -0.227 3.95 0.597 0.625 -0.00185
## 2 ETS(Cement) 2 Test -0.653 101. 72.0 -0.325 4.74 0.708 0.756 0.495
## 3 ETS(Cement) 3 Test -1.71 119. 87.0 -0.492 5.80 0.856 0.894 0.616
## 4 ETS(Cement) 4 Test -0.729 137. 102. -0.543 6.65 1.01 1.03 0.699
## 5 SNAIVE(Ceme~ 1 Test 30.9 138. 107. 1.97 6.99 1.06 1.04 0.640
## 6 SNAIVE(Ceme~ 2 Test 30.0 139. 107. 1.90 6.96 1.05 1.04 0.649
## 7 SNAIVE(Ceme~ 3 Test 29.5 139. 107. 1.85 6.95 1.05 1.04 0.651
## 8 SNAIVE(Ceme~ 4 Test 30.8 140. 108 1.91 6.99 1.06 1.05 0.637
In the output results above, the first to third horizons show much better ETS results, it’s just that in the fourth horizon the results are much closer.
library(fpp3)aus_production %>% autoplot(Beer) fc <- aus_production %>%
slice(1:(nrow(aus_production)-12)) %>%
model(
'ETS' = ETS(Beer),
'Seasonal Naïve' = SNAIVE(Beer),
'STL Decomposition' = decomposition_model(STL(log(Beer)),
ETS(season_adjust))
) %>%
forecast(h = "3 years")
fc %>%
autoplot(filter_index(aus_production, "2000 Q1" ~ .),level=NULL) +
guides(colour=guide_legend(title="Forecast")) +
ggtitle("Beer production from aus_production")fc %>% accuracy(aus_production)## # A tibble: 3 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS Test 0.127 9.62 8.92 0.00998 2.13 0.563 0.489 0.376
## 2 Seasonal Naïve Test -2.92 10.8 9.75 -0.651 2.34 0.616 0.549 0.325
## 3 STL Decomposition Test -2.85 9.87 8.95 -0.719 2.16 0.565 0.502 0.283
tidy_bricks <- aus_production %>%
filter(!is.na(Bricks))
tidy_bricks %>% autoplot(Bricks)fc <- tidy_bricks %>%
slice(1:(nrow(tidy_bricks)-12)) %>%
model(
'ETS' = ETS(Bricks),
'Seasonal Naïve' = SNAIVE(Bricks),
'STL Decomposition' = decomposition_model(STL(log(Bricks)),
ETS(season_adjust))
) %>%
forecast(h = "3 years")
fc %>%
autoplot(filter_index(tidy_bricks, "1980 Q1" ~ .),level=NULL) +
guides(colour=guide_legend(title="Forecast")) +
ggtitle("Beer production from aus_production")fc %>% accuracy(tidy_bricks)## # A tibble: 3 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS Test 2.27 17.5 13.2 0.474 3.31 0.365 0.354 0.339
## 2 Seasonal Naïve Test 32.6 36.5 32.6 7.85 7.85 0.898 0.739 -0.322
## 3 STL Decomposition Test 9.71 18.7 14.9 2.29 3.65 0.411 0.378 0.0933
Based on the test set results, ETS and STLM are the best for this dataset.
subsidies <- PBS %>%
filter(ATC2 %in% c("A10", "H02")) %>%
group_by(ATC2) %>%
summarise(Cost = sum(Cost))
subsidies %>%
autoplot(Cost) +
facet_grid(vars(ATC2), scales = "free_y") +
ggtitle("Cost of drug subsidies for diabetes and corticosteroids")In the output above, it can be seen that there is a variance problem, so a box-cox transformation is needed for STL by dividing the data because the lambda for each data is different.
# Diabetes
diabet <- subsidies %>%
filter(ATC2 %in% "A10")
lambda1 <- diabet %>%
features(Cost, features = guerrero) %>%
pull(lambda_guerrero)
fc1 <- diabet %>%
filter(Month < max(Month) - 35) %>%
model(
'ETS' = ETS(Cost),
'Seasonal Naïve' = SNAIVE(Cost),
'STL Decomposition' = decomposition_model(STL(box_cox(log(Cost), lambda1)),
ETS(season_adjust))
) %>%
forecast(h = "3 years")
# Corticosteroids
corti <- subsidies %>%
filter(ATC2 %in% "H02")
lambda2 <- corti %>%
features(Cost, features = guerrero) %>%
pull(lambda_guerrero)
fc2 <- corti %>%
filter(Month < max(Month) - 35) %>%
model(
'ETS' = ETS(Cost),
'Seasonal Naïve' = SNAIVE(Cost),
'STL Decomposition' = decomposition_model(STL(box_cox(log(Cost), lambda2)),
ETS(season_adjust))
) %>%
forecast(h = "3 years")
# Combine the data again
fc <- bind_rows(fc1,fc2)
fc %>% autoplot(subsidies, level=NULL) +
guides(colour=guide_legend(title="Forecast")) +
ggtitle("Forecasting for 3 years")fc %>%
accuracy(subsidies) %>%
arrange(ATC2)## # A tibble: 6 x 11
## .model ATC2 .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS A10 Test 1382519. 2.36e6 1.85e6 5.83 8.74 1.89 2.00 0.177
## 2 Seasonal ~ A10 Test 4318158. 5.18e6 4.33e6 19.8 19.9 4.42 4.40 0.638
## 3 STL Decom~ A10 Test 94089. 1.57e6 1.29e6 -0.269 6.63 1.32 1.33 -0.153
## 4 ETS H02 Test 27005. 7.65e4 6.45e4 1.99 7.05 1.09 1.07 -0.0990
## 5 Seasonal ~ H02 Test -14795. 8.55e4 7.16e4 -1.31 7.88 1.21 1.20 0.0226
## 6 STL Decom~ H02 Test 22681. 6.83e4 5.63e4 1.66 6.24 0.952 0.960 -0.219
For the A10 series, the STLM method appears to be the best, while for the H02 series, SNAIVE/STLM appears to be the best.
food <- aus_retail %>%
filter(Industry == "Food retailing") %>%
summarise(Turnover = sum(Turnover))
autoplot(food, Turnover) +
ggtitle('Total food retailing turnover for Australia')From the output above, it can be seen that there is a variance problem, so a box-cox transformation is needed for STL.
lambda <- food %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
fc <- food %>%
filter(Month < max(Month) - 35) %>%
model(
'ETS' = ETS(Turnover),
'Seasonal Naïve' = SNAIVE(Turnover),
'STL Decomposition' = decomposition_model(STL(box_cox(log(Turnover), lambda)),
ETS(season_adjust))
) %>%
forecast(h = "3 years")
fc %>%
autoplot(filter_index(food, "2005 Jan"~ .), level=NULL)+
guides(colour=guide_legend(title="Forecast")) +
ggtitle("Forecasting for 3 years")fc %>% accuracy(food)## # A tibble: 3 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS Test -151. 194. 170. -1.47 1.65 0.639 0.634 0.109
## 2 Seasonal Naïve Test 625. 699. 625. 5.86 5.86 2.35 2.29 0.736
## 3 STL Decomposition Test -8.34 155. 132. -0.135 1.24 0.496 0.506 0.256
The STL Decomposition with log and box-cox transformation has the best result as we can see from the RMSE value.
Total number of trips across Australia using
aus_trips <- tourism %>%
summarise(Trips = sum(Trips))
fit <- aus_trips %>%
model(ETS(Trips))
report(fit)## Series: Trips
## Model: ETS(A,A,A)
## Smoothing parameters:
## alpha = 0.4495675
## beta = 0.04450178
## gamma = 0.0001000075
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 21689.64 -58.46946 -125.8548 -816.3416 -324.5553 1266.752
##
## sigma^2: 699901.4
##
## AIC AICc BIC
## 1436.829 1439.400 1458.267
fit %>%
forecast() %>%
autoplot(aus_trips) +
ggtitle("Forecasting of total number of trips across Australia")The closing prices for the four stocks in gafa_stock
gafa_stock %>%
autoplot(Close) +
facet_grid(vars(Symbol), scales = "free_y")gafa_regular <- gafa_stock %>%
group_by(Symbol) %>%
mutate(trading_day = row_number()) %>%
ungroup() %>%
as_tsibble(index = trading_day, regular = TRUE)
fit <- gafa_regular %>%
model(
ETS(Close)
)
report(fit)## Warning in report.mdl_df(fit): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.
## # A tibble: 4 x 10
## Symbol .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL ETS(Close) 0.000228 -5299. 10604. 10604. 10620. 4.39 8.96 0.0106
## 2 AMZN ETS(Close) 0.000383 -7778. 15561. 15561. 15577. 359. 700. 0.0129
## 3 FB ETS(Close) 0.000356 -5442. 10890. 10890. 10906. 5.82 11.3 0.0126
## 4 GOOG ETS(Close) 0.000219 -7529. 15064. 15064. 15079. 144. 291. 0.0102
fit %>%
forecast(h=50) %>%
autoplot(gafa_regular %>% group_by_key() %>% slice((n() - 100):n()))## `mutate_if()` ignored the following grouping variables:
## Column `Symbol`
Pelt trading records
pelt %>%
model(
ETS(Lynx)
) %>%
report()## Series: Lynx
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998998
##
## Initial states:
## l[0]
## 25737.12
##
## sigma^2: 162584145
##
## AIC AICc BIC
## 2134.976 2135.252 2142.509
pelt %>%
model(
ETS(Lynx)
) %>%
forecast(h=10) %>%
autoplot(pelt) +
ggtitle("Forecasting of PELT trading records.")The lynx data’s cyclic behavior is completely lost here. There is nothing that can be done to improve this because ETS models are not designed to handle cyclic data.
As seen in the pelt data set above, ETS does not work well with cyclic data.
library(forecast)
?ets## starting httpd help server ... done
library(forecast)
library(fpp2)## Warning: package 'fpp2' was built under R version 4.1.3
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v fma 2.4 v expsmooth 2.3
## Warning: package 'fma' was built under R version 4.1.3
## Warning: package 'expsmooth' was built under R version 4.1.3
## -- Conflicts ------------------------------------------------- fpp2_conflicts --
## x magrittr::extract() masks tidyr::extract()
##
## Attaching package: 'fpp2'
## The following object is masked from 'package:fpp3':
##
## insurance
library(seasonal)## Warning: package 'seasonal' was built under R version 4.1.3
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
#ETS(M,A,M) model
usp = ets(usdeaths, model = "MAM")
#hw model
usphw = hw(usdeaths, seasonal = 'multiplicative', h=1)
accuracy(usp)## ME RMSE MAE MPE MAPE MASE
## Training set 22.42166 247.1924 193.8055 0.2077024 2.225981 0.4435586
## ACF1
## Training set 0.005044861
usdeaths.ets2 <- ets(usdeaths,model = "MAM")
usdeaths.ets2F <- forecast(usdeaths.ets2, h=1)
usdeaths.hwM <- hw(usdeaths, seasonal = 'multiplicative', h=1)
usdeaths.ets2F$mean## Jan
## 1979 8233.107
usdeaths.hwM$mean## Jan
## 1979 8217.64