Do exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Hyndman
library(mlbench)
library(ggplot2)
library(reshape2)
library(Amelia)
library(inspectdf)
library(naniar)
library(fpp3)
Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0 , and generate forecasts for the next four months.
Compute a 95% prediction interval for the first forecast using y ± 1.96 s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.
pigs_slaughtered = aus_livestock %>% filter(Animal=='Pigs',State=='Victoria')
pigs_fit = pigs_slaughtered %>% model(ETS(Count~error("A") + trend("N") + season("N")))
pig_fc = pigs_fit %>% forecast(h=4)
report(pigs_fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
autoplot(pig_fc)
Computed 95% prediction interval
y_hat <- pig_fc$.mean[1]
aug_fit <- augment(pigs_fit)
s <- sd(aug_fit$.resid)
# Calculate the 95% prediction intervals
upper_95 <- y_hat + (s * 1.96)
lower_95 <- y_hat - (s * 1.96)
paste0("upper bound (95% C.I): ",upper_95,';'," lower bound (95% C.I)): ",lower_95)
## [1] "upper bound (95% C.I): 113502.102384467; lower bound (95% C.I)): 76871.0124775157"
Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
gejapan = global_economy
gejapan %>% select(Exports) %>% filter(Country == 'Japan') %>% autoplot(.vars = Exports)
japanex = gejapan %>% select(Exports) %>% filter(Country == 'Japan')
japan_fit = japanex %>% model(ETS(Exports~error("A") + trend("N") + season ("N")))
japan_fc = japan_fit %>% forecast(h=5)
japan_fc %>% autoplot(japanex) + geom_line(aes(y = .fitted), data = augment(japan_fit))
japan_fit%>% accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Japan "ETS(Exports ~ … Trai… NaN NaN NaN NaN NaN NaN NaN NA
fit_compare <- japanex %>%
model(
ANN = ETS (Exports ~ error("A") + trend("N") + season("N")),
AAN = ETS (Exports ~ error("A") + trend("A") + season("N"))
)
accuracy(fit_compare)
## # A tibble: 2 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Japan ANN Training NaN NaN NaN NaN NaN NaN NaN NA
## 2 Japan AAN Training NaN NaN NaN NaN NaN NaN NaN NA
Forecast the Chinese GDP from the global_economy data
set using an ETS model. Experiment with the various options in the
ETS() function to see how much the forecasts change with
damped trend, or with a Box-Cox transformation. Try to develop an
intuition of what each is doing to the forecasts.
china_export <- global_economy %>%
filter(Country == "China")
china_export %>% autoplot(GDP) +
labs(title="china_export")
The Data shows a strong upward trend.
china_fit = china_export %>% model(ETS(Exports~error("A") + trend("A") + season("N")))
china_fc = china_fit %>% forecast(h=12)
china_fc %>% autoplot(china_export) +
geom_line(aes(y = .fitted), col="blue",
data = augment(china_fit)) +
labs(y="Exports", title="China: Exports")
report(china_fit)
## Series: Exports
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998999
## beta = 0.0992376
##
## Initial states:
## l[0] b[0]
## 4.0399 0.09995689
##
## sigma^2: 3.9037
##
## AIC AICc BIC
## 320.3530 321.5069 330.6552
Model: ETS(A,A,N)
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
aus_gas_production = aus_production %>% select(Gas)
aus_gas_production %>%
model(
simple_additive = ETS(Gas ~ error('A')+trend("A")+season("N")),
simple_mult = ETS(Gas ~ error("M")+trend("A")+season("N")),
seasonality_add = ETS(Gas ~ error('A')+trend("A")+season("A")),
seasonality_mult = ETS(Gas ~ error("M")+trend("A")+season("M")),
mult_season_damp = ETS(Gas ~ error("M")+trend("Ad")+season("M"))
) %>% forecast(h=25) %>%autoplot(aus_gas_production,level=NULL) +
labs(y="Amount", title="Gas: Aus Production ")
Multiplicative models would be the most appropriate solution
set.seed(6543)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries
## # A tsibble: 441 x 5 [1M]
## # Key: State, Industry [1]
## State Industry Serie…¹ Month Turno…²
## <chr> <chr> <chr> <mth> <dbl>
## 1 South Australia Hardware, building and garden suppl… A33495… 1982 Apr 8.6
## 2 South Australia Hardware, building and garden suppl… A33495… 1982 May 9.5
## 3 South Australia Hardware, building and garden suppl… A33495… 1982 Jun 8.4
## 4 South Australia Hardware, building and garden suppl… A33495… 1982 Jul 10.3
## 5 South Australia Hardware, building and garden suppl… A33495… 1982 Aug 10.6
## 6 South Australia Hardware, building and garden suppl… A33495… 1982 Sep 11.5
## 7 South Australia Hardware, building and garden suppl… A33495… 1982 Oct 10.8
## 8 South Australia Hardware, building and garden suppl… A33495… 1982 Nov 13.1
## 9 South Australia Hardware, building and garden suppl… A33495… 1982 Dec 25.4
## 10 South Australia Hardware, building and garden suppl… A33495… 1983 Jan 9.2
## # … with 431 more rows, and abbreviated variable names ¹​`Series ID`, ²​Turnover
autoplot(myseries)
a. Why is multiplicative seasonality necessary for this series?
The seasonality of the data with peaks observed after regular interval.
fit = myseries %>% model(
Multi_Season = ETS(Turnover ~ error("M") + trend("A") + season("M")),
Multi_Season_Damp = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
fc = fit %>% forecast(h = 40)
fc %>%
autoplot(myseries, level=NULL) +
labs(y="AUD", title="Retail")
C. Compare the RMSE of the one-step forecasts from the two methods.
Which do you prefer?
accuracy(fit)
## # A tibble: 2 × 12
## State Indus…¹ .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South … Hardwa… Multi… Trai… 0.0334 4.05 2.90 -0.231 6.10 0.473 0.471 0.282
## 2 South … Hardwa… Multi… Trai… 0.211 4.01 2.86 0.353 5.95 0.465 0.467 0.190
## # … with abbreviated variable name ¹​Industry
Multi_Season_Damp
myseries %>% model( Multi_Season_Damp = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)%>% gg_tsresiduals()