library(fpp3)
###Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
vic_p <- aus_livestock %>% filter(Animal=='Pigs', State=='Victoria')
vic_p %>% autoplot()
vic_p_mod <- vic_p %>% model(ETS(Count ~ error("A") + trend("N") + season("N")))
vic_p_fc <- vic_p_mod %>% forecast(h = 4)
autoplot(vic_p) + autolayer(vic_p_fc)
report(vic_p_mod)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
The optimal values for α and ℓ0 respectively are 0.32 and 100646.6.
vic_p_fc$Count
## <distribution[4]>
## [1] N(95187, 8.7e+07) N(95187, 9.7e+07) N(95187, 1.1e+08) N(95187, 1.1e+08)
s <- sd(residuals(vic_p_mod)$.resid)
mean <- vic_p_fc$.mean[1]
c(mean - (1.96*s),mean + (1.96*s))
## [1] 76871.01 113502.10
The 95% interval is (76871.01, 113502.10). This is smaller than the interval produced by R.
###Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
ge <- global_economy %>% filter(Country == 'Peru')
ge %>% autoplot(Exports)
ge_mod <- ge %>% model(ETS(Exports ~ error("A") + trend("N") + season("N")))
ge_fc <- ge_mod %>% forecast(h = 5)
ge_fc %>% autoplot(global_economy)
accuracy(ge_mod)$RMSE
## [1] 2.862248
ge_mod2 <- ge %>% model(ETS(Exports ~ error("A") + trend("A") + season("N")))
ge_fc2 <- ge_mod2 %>% forecast(h = 5)
ge_fc2 %>% autoplot(global_economy)
accuracy(ge_mod2)$RMSE
## [1] 2.862429
The RMSE for the ETS(A,N,N) model is slightly lower than that of the ETS(A,A,N) model, meaning it forecasts a little better.
The ETS(A,A,N) model forecasts an upwards trend while the ETS(A,N,N) models shows a flat line trend. The difference in RMSE between both models is very small. Taking that into account the ETS(A,A,N) model would be better suited based on its forecasts.
s1 <- sd(residuals(ge_mod)$.resid)
mean1 <- ge_fc$.mean[1]
c(mean1 - (1.96*s1),mean1 + (1.96*s1))
## [1] 18.60605 29.92066
The 95% interval is (18.60605, 29.92066).
ge_fc2$Exports
## <distribution[5]>
## [1] N(24, 8.8) N(24, 18) N(24, 26) N(25, 35) N(25, 44)
s2 <- sd(residuals(ge_mod2)$.resid)
mean2 <- ge_fc2$.mean[1]
c(mean2 - (1.96*s2),mean2 + (1.96*s2))
## [1] 18.66408 29.98280
The 95% interval is (18.66408, 29.98280)
ge_fc2$Exports[1]
## <distribution[1]>
## [1] N(24, 8.8)
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
c <- global_economy %>% filter(Country == 'China')
lam <- c %>% features(GDP,features=guerrero) %>% pull(lambda_guerrero)
c_mod <- c %>% model('SES' = ETS(GDP ~ error("A") + trend("N") + season("N")),
'Holt' = ETS(GDP ~ error("A") + trend("A") + season("N")),
'Damped Holt' = ETS(GDP ~ error("A") + trend("Ad", phi = 0.75) + season("N")),
'Box-Cox' = ETS(box_cox(GDP, lam) ~ error("A") + trend("A") + season("N")),
'Damped Box-Cox' = ETS(box_cox(GDP, lam) ~ error("A") + trend("Ad", phi = 0.75) + season("N")))
c_fc <- c_mod %>% forecast(h = 25)
c_fc %>% autoplot(global_economy, level = NULL)
###Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
gas_mod <- aus_production %>% model('Multiplicative' = ETS(Gas ~ error("M") + trend("A") + season("M")),
'Damped Multiplicative' = ETS(Gas ~ error("M") + trend("Ad", phi = 0.75) + season("M")))
gas_fc <- gas_mod %>% forecast(h=20)
gas_fc %>% autoplot(aus_production, level = NULL)
Multiplicative seasonality is necessary due to the proportional increase
of the size of the seasonal cycle with the time series.
accuracy(gas_mod) %>% select('.model', 'RMSE')
## # A tibble: 2 x 2
## .model RMSE
## <chr> <dbl>
## 1 Multiplicative 4.60
## 2 Damped Multiplicative 4.54
The RMSE for the damped multiplicative model is lower than the RMSE for the multiplicative model. Making the trend does improve the forecasts.
set.seed(246810)
myseries <- aus_retail %>%
filter(`Series ID` == 'A3349849A')
autoplot(myseries)
Multiplicative seasonality is necessary for this series because the seasonal variations are changing protional to the time series.
ms_mod <- myseries %>% model('Multiplicative' = ETS(Turnover ~ error("M") + trend("A") + season("M")),
'Damped Multiplicative' = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.75) + season("M")))
ms_fc <- ms_mod %>% forecast(h=20)
ms_fc %>% autoplot(aus_retail, level = NULL)
accuracy(ms_mod) %>% select('.model', 'RMSE')
## # A tibble: 2 x 2
## .model RMSE
## <chr> <dbl>
## 1 Multiplicative 1.78
## 2 Damped Multiplicative 1.76
The RMSE for the damped multiplicative model is lower so it would be preferred.
pref_mod <- myseries %>%
model('Damped Multiplicative' = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.75) + season("M")))
pref_mod %>%
gg_tsresiduals()
augment(pref_mod) %>% features(.innov, box_pierce, lag = 24)
## # A tibble: 1 x 5
## State Industry .model bp_stat bp_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Australian Capital Territory Cafes, restaurants and ~ Dampe~ 30.7 0.161
augment(pref_mod) %>% features(.innov, ljung_box, lag = 24)
## # A tibble: 1 x 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Australian Capital Territory Cafes, restaurants and ~ Dampe~ 31.8 0.131
Both tests show the p value to be greater 0.05, which shows that the residuals are not distinguishable from white noise.
train <- myseries %>%
filter(year(Month) <= 2010)
mod <- train %>%
model('Multiplicative' = ETS(Turnover ~ error("M") + trend("A") + season("M")),
'Seasonal Naive' = SNAIVE(Turnover))
fc <- mod %>%
forecast(new_data = anti_join(myseries, train))
fc %>% autoplot(myseries, level = NULL)
accuracy(mod) %>%
select(.type, .model, RMSE)
## # A tibble: 2 x 3
## .type .model RMSE
## <chr> <chr> <dbl>
## 1 Training Multiplicative 1.38
## 2 Training Seasonal Naive 3.37
fc %>% accuracy(myseries) %>%
select(.type, .model, RMSE)
## # A tibble: 2 x 3
## .type .model RMSE
## <chr> <chr> <dbl>
## 1 Test Multiplicative 6.62
## 2 Test Seasonal Naive 8.42
The Multiplicative model has the lower RMSE and forecasts better.
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
lam <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
stl_decomp <- myseries %>%
model(stl_box = STL(box_cox(Turnover,lam) ~ season(window = "periodic"), robust = TRUE)) %>%
components()
stl_decomp %>% autoplot()
myseries$Turnover_sa <- stl_decomp$season_adjust
training <- myseries %>%
filter(year(Month) <= 2010)
model <- training %>%
model(ETS(Turnover_sa ~ error("M") + trend("A") + season("M")))
forecast <- model %>% forecast(new_data = anti_join(myseries, training))
model %>% accuracy() %>%
select(.model, .type, RMSE)
## # A tibble: 1 x 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Traini~ 0.375
model %>% gg_tsresiduals()
augment(model) %>% features(.innov, box_pierce, lag = 24)
## # A tibble: 1 x 5
## State Industry .model bp_stat bp_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Australian Capital Territory Cafes, restaurants and ~ "ETS(~ 37.3 0.0406
augment(model) %>% features(.innov, ljung_box, lag = 24)
## # A tibble: 1 x 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Australian Capital Territory Cafes, restaurants and ~ "ETS(~ 38.7 0.0291
model %>% accuracy() %>%
select(.model, .type, RMSE)
## # A tibble: 1 x 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Traini~ 0.375
forecast %>% accuracy(myseries) %>%
select(.model, .type, RMSE)
## # A tibble: 1 x 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Test 0.692