library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.4
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.3.1
## ✔ lubridate 1.9.3 ✔ fable 0.3.3
## ✔ ggplot2 3.5.0 ✔ fabletools 0.4.0
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
ETS()
function to estimate the equivalent model
for simple exponential smoothing. Find the optimal values of α and \(l_0\), and generate forecasts for the next
four months.pigs_vic <- aus_livestock %>% filter(Animal=='Pigs', State=='Victoria')
pigs_vic|>autoplot(Count, color='darkgreen')+
labs(title = "Pigs slaughtered in Victoria",
x= "Month", y="Count")
fit <- pigs_vic |>
model(sim_exp <- ETS(Count ~ error("A") + trend("N") + season("N")))
fc <- fit |>
forecast(h = 4)
fc|>
autoplot(pigs_vic)
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
Here, the optimal value of \(\alpha\) is 0.3221 and \(l_0\) is 100646.6
Std <- sd(residuals(fit)$.resid)
Mean <- fc$.mean[1]
LL <- Mean - (1.96*Std)
UL <- Mean + (1.96*Std)
cat("Lower limt: ",LL, "Upper limit: ", UL, "And 95% Confidence interval:","(",c(LL, UL), ")")
## Lower limt: 76871.01 Upper limit: 113502.1 And 95% Confidence interval: ( 76871.01 113502.1 )
fc$Count[1]
## <distribution[1]>
## [1] N(95187, 8.7e+07)
This is the confidence interval created by R for the first month forecast. Confidence interval created by R is narrow while 95% confidence interval obtained statisticallly is wide.
Data set global_economy
contains the annual Exports from
many countries. Select one country to analyse.
US_exports <- global_economy|>
filter(Country=="United States")|>
na.omit()
autoplot(US_exports, Exports)+
labs(title = "US Exports:",
x= "Year")
The US Exports series is a time series. The plot shows that the exports of the US increases with respect to time. It has a positve trend with obvious seasonality and noise.
fit <- US_exports|>
model(
ets_model= ETS(Exports ~ error("A") + trend("N") + season("N")) # ANN model
)
fc<- fit |> forecast(h = 10)
fc|> autoplot(US_exports)
US_exports|>
stretch_tsibble(.init = 10)|>
model(
SES = ETS(Exports ~ error("A") + trend("N") + season("N"))
) |>
forecast(h = 15) |>
accuracy(US_exports)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 15 observations are missing between 2017 and 2031
## # A tibble: 1 × 11
## .model Country .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SES United States Test 1.07 1.88 1.51 9.65 14.8 3.16 2.94 0.857
It can be seen that the root mean squared error (RMSE) is 1.8779.
fit <- US_exports|>
model(
aan = ETS(Exports ~ error("A") + trend("A")+ season("N"))
)
fc <- fit|>
forecast(h = 15)
fc|> autoplot(US_exports)
It can be seen that the trend of forcast of ETS(AAN) model has positive slope while the trend of ETS(ANN) model is horizontal.
US_exports|>
stretch_tsibble(.init = 10)|>
model(
aan = ETS(Exports ~ error("A") + trend("A") + season("N"))
) |>
forecast(h = 15) |>
accuracy(US_exports)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 15 observations are missing between 2017 and 2031
## # A tibble: 1 × 11
## .model Country .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 aan United States Test -0.140 2.64 1.85 -3.43 19.8 3.87 4.14 0.938
It can be seen that the RMSE for aan is 2.6411 while RMSE for ann is 1.8779. Since RMSE for ETS(A,N,N) is less than ETS(A, A, N) therefore, ann is better for forecasting than aan.
Std <- sd(residuals(fit)$.resid)
Mean = mean(fc$.mean)
LL <- Mean - (1.96*Std)
UL <- Mean + (1.96*Std)
cat("Lower limt: ",LL, "Upper limit: ", UL, "And 95% Confidence interval:","(",c(LL, UL), ")")
## Lower limt: 11.71352 Upper limit: 14.1696 And 95% Confidence interval: ( 11.71352 14.1696 )
Forecast the Chinese GDP from the global_economy
data
set using an ETS model. Experiment with the various options in the
ETS()
function to see how much the forecasts change with
damped trend, or with a Box-Cox transformation. Try to develop an
intuition of what each is doing to the forecasts.
[Hint: use a relatively large value of h
when
forecasting, so you can clearly see the differences between the various
options when plotting the forecasts.]
chinese_economy<-global_economy|>
filter(Country=="China")
chinese_economy|>
autoplot(GDP)+
labs(title = "Chinese GDP", x="Year")
lambda <- chinese_economy |>
features(GDP,features=guerrero)|>
pull(lambda_guerrero)
mod <- chinese_economy |>
model('SES' = ETS(GDP ~ error("A") + trend("N") + season("N")),
'Holt' = ETS(GDP ~ error("A") + trend("A") + season("N")),
'Damped Holt' = ETS(GDP ~ error("A") + trend("Ad", phi = 0.75) + season("N")),
'Box-Cox' = ETS(box_cox(GDP, lambda) ~ error("A") + trend("A") + season("N")),
'Damped Box-Cox' = ETS(box_cox(GDP, lambda) ~ error("A") + trend("Ad", phi = 0.75) + season("N")))
fc <- mod |> forecast(h = 25)
fc |> autoplot(chinese_economy, level = NULL)
Simple exponential smoothing produces a nearly horizontal line since trend is horizontal. Box-Cox produces exponential curve and other damped Box-Cox, Damped Holt and Holt are constant slope trends forecasts. However, different forecast methods are suiatbel for different time series.
Find an ETS model for the Gas data from aus_production
and forecast the next few years. Why is multiplicative seasonality
necessary here? Experiment with making the trend damped. Does it improve
the forecasts?
aus_gas <- aus_production|>
select(Gas)
aus_gas|>autoplot(Gas)
It can be seen that the amplitude of seasonality is proportional to the trend. Therefore, for this data, the seasonality must be multiplicative not additive.
fit <- aus_gas|>model('Multiplicative' = ETS(Gas ~ error("A") + trend("A") + season("M")))
fc <- fit|> forecast(h=25)
fc|>autoplot(aus_gas)
In this experiment, the trend is additive while the seasonality is multiplicative. The seasonality must be multiplicative because of the fact that the amplitude of seasonality is not constant but it depends on the trend.
Damped Trend:
fit <- aus_gas|>model('Damped Multiplicative' = ETS(Gas ~ error("M") + trend("Ad", phi = 0.7) + season("M")))
fc <- fit|> forecast(h=25)
fc|>autoplot(aus_gas, level=NULL)
Comparing both the charts, it can be concluded that the additive trend with additive errors and multiplicative seasonality is better than the one with damped trend with multiplicative error and multiplicative seasonality.
Recall your retail time series data (from Exercise 7 in Section 2.10).
set.seed(1000)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries|>autoplot(Turnover)+labs(title = "Autralian Retail Turn Over", x="Month")
Answer. The plot shown above suggests that the seasonality is not independent of trend but it depends on the trend. Hence, the multiplicative seasonality is a necessory to account the change in the amplitude of the seasonality.
fit<-myseries |>
model(
`Holt's method` = ETS(Turnover ~ error("M") + trend("A") + season("M")),
`Damped Holt's method` = ETS(Turnover~ error("M") +trend("Ad", phi = 0.9) + season("M"))
)
fc<-fit|>forecast(h = 50)
fc|>autoplot(myseries, level = NULL) +
labs(title = "Australian Retail Turnover") +
guides(colour = guide_legend(title = "Forecast"))
Damped trend does not cover the trend but Holt-Winter’s method catches the trend and thus just extrapolates the series.
accuracy(fit)
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victo… Food re… Holt'… Trai… 1.88 27.3 20.3 0.0787 1.81 0.301 0.337 0.0952
## 2 Victo… Food re… Dampe… Trai… 5.39 27.7 20.6 0.384 1.89 0.307 0.341 0.0455
There is only a slight difference between RMSE of Holt-Winter and Damped Holt-Winters Methods.
resid<-residuals(fit)|>
filter(`.model`=="Holt's method")|>
select(.resid)
ggplot(resid, aes(x=Month, y= .resid))+
geom_point(col = 'blue')+
geom_line()+
labs(title = "Plot of Residuals")
There appears no trend in the residuals therefore residuals are just white noise.
train <- myseries %>%
filter(year(Month) <= 2010)
mod <- train %>%
model('Multiplicative' = ETS(Turnover ~ error("M") + trend("A") + season("M")),
'Seasonal Naive' = SNAIVE(Turnover))
fc <- mod %>%
forecast(new_data = anti_join(myseries, train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc %>% autoplot(myseries, level = NULL)
accuracy(mod) %>%
select(.type, .model, RMSE)
## # A tibble: 2 × 3
## .type .model RMSE
## <chr> <chr> <dbl>
## 1 Training Multiplicative 23.8
## 2 Training Seasonal Naive 72.7
fc %>% accuracy(myseries) %>%
select(.type, .model, RMSE)
## # A tibble: 2 × 3
## .type .model RMSE
## <chr> <chr> <dbl>
## 1 Test Multiplicative 66.6
## 2 Test Seasonal Naive 480.
It can be seen that the RMSE is better for multiplicative model than that for Seasonal Naive. Hence, Multiplicative model is better for the forecast for this series.
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
lambda <- myseries|>
features(Turnover, features = guerrero)|>
pull(lambda_guerrero)
stl_decomp <- myseries|>
model(stl_box = STL(box_cox(Turnover,lambda) ~ season(window = "periodic"), robust = TRUE)) |>
components()
stl_decomp |> autoplot()
myseries$Turnover_sa <- stl_decomp$season_adjust
training <- myseries |>
filter(year(Month) <= 2010)
model <- training |>
model(ETS(Turnover_sa ~ error("M") + trend("A") + season("M")))
forecast <- model |> forecast(new_data = anti_join(myseries, training))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover,
## Turnover_sa)`
model |> accuracy()|>
select(.model, .type, RMSE)
## # A tibble: 1 × 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Train… 0.0355
model |> gg_tsresiduals()
augment(model) |> features(.innov, box_pierce, lag = 24)
## # A tibble: 1 × 5
## State Industry .model bp_stat bp_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Victoria Food retailing "ETS(Turnover_sa ~ error(\"M\") + t… 477. 0
augment(model) %>% features(.innov, ljung_box, lag = 24)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Victoria Food retailing "ETS(Turnover_sa ~ error(\"M\") + t… 499. 0
model |> accuracy() |>
select(.model, .type, RMSE)
## # A tibble: 1 × 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Train… 0.0355
forecast |> accuracy(myseries) |>
select(.model, .type, RMSE)
## # A tibble: 1 × 3
## .model .type RMSE
## <chr> <chr> <dbl>
## 1 "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Test 0.0870
The RMSE is 0.087 which is much less than the RMSE obtained for the Holt-Winters method with multiplicative seasonality. It might be due to the constant amplitude of seasonality. Thus, this model seems the best one.