Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
Use the ETS() function to estimate the equivalent model for simple
exponential smoothing. Find the optimal values of α and
ℓ 0 , and generate forecasts for the next four months. Compute a 95%
prediction interval for the first forecast using y-hat ± 1.96s where s
is the standard deviation of the residuals. Compare your interval with
the interval produced by R.
victoria_pigs <- aus_livestock %>%
filter(Animal == 'Pigs' & State == 'Victoria')
victoria_pigs
## # A tsibble: 558 x 4 [1M]
## # Key: Animal, State [1]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1972 Jul Pigs Victoria 94200
## 2 1972 Aug Pigs Victoria 105700
## 3 1972 Sep Pigs Victoria 96500
## 4 1972 Oct Pigs Victoria 117100
## 5 1972 Nov Pigs Victoria 104600
## 6 1972 Dec Pigs Victoria 100500
## 7 1973 Jan Pigs Victoria 94700
## 8 1973 Feb Pigs Victoria 93900
## 9 1973 Mar Pigs Victoria 93200
## 10 1973 Apr Pigs Victoria 78000
## # … with 548 more rows
victoria_pigs %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Count`
dcmp <- victoria_pigs %>%
model(stl = STL(Count))
components(dcmp) %>% autoplot()
fit <- victoria_pigs %>%
model(ETS(Count ~ error("A") + trend("A") + season("A")))
report(fit)
## Series: Count
## Model: ETS(A,A,A)
## Smoothing parameters:
## alpha = 0.351401
## beta = 0.008168924
## gamma = 0.000100095
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 108342.3 -938.1185 2065.39 6717.424 -2809.359 -1341.236 -7750.408 -8483.266
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 5610.021 -579.1321 1148.82 -2826.693 1739.669 6508.77
##
## sigma^2: 61920982
##
## AIC AICc BIC
## 13558.04 13559.18 13631.56
fc <- fit %>%
forecast(h=4)
print(fc)
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(84776, 6.2e+07) 84776.
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(85581, 7e+07) 85581.
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(92062, 7.8e+07) 92062.
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(90665, 8.7e+07) 90665.
fc %>% autoplot(victoria_pigs)
## Problem 8.5
Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
brazil_data <- global_economy %>%
filter(Country == 'Brazil')
brazil_data %>%
autoplot(Exports)
There appears to be a general upward trend in the data. There is
evidence of some black-swan events that have resulted in the data
declining and breaking the trend. However, once the data recovers, it
appears that the trend is continuing.
fit <- brazil_data %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
fcst <- fit %>% forecast(h=5)
fcst %>%
autoplot(brazil_data)
training <- brazil_data %>% filter(Year < 2007)
fit <- training %>%
model(ets = ETS(Exports ~ error("A")+trend("N")+season("N")))
accuracy(fit)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Brazil ets Training 0.201 1.64 1.24 -0.500 14.4 0.962 0.973 -0.00508
fit <- training %>%
model(ets1 = ETS(Exports ~ error("A") + trend("N") + season("N")),
ets2 = ETS(Exports ~ error("A") + trend("A") + season("N")))
accuracy(fit)
## # A tibble: 2 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Brazil ets1 Training 2.01e-1 1.64 1.24 -0.500 14.4 0.962 0.973 -0.00508
## 2 Brazil ets2 Training -1.31e-4 1.63 1.25 -2.96 14.8 0.973 0.965 0.0200
fcst <- fit %>% forecast(h="11 years")
fcst %>%
autoplot(brazil_data)
ffcst <- fcst %>%
filter(Year == '2007')
ffcst
## # A fable: 2 x 5 [1Y]
## # Key: Country, .model [2]
## Country .model Year Exports .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Brazil ets1 2007 N(15, 2.8) 14.6
## 2 Brazil ets2 2007 N(15, 2.9) 14.8
model1_mean <- ffcst[[1,5]]
model2_mean <- ffcst[[2,5]]
model1_rmse <- accuracy(fit)[[1,5]]
model2_rmse <- accuracy(fit)[[2,5]]
model1_lower <- model1_mean - 1.96*model1_rmse
model1_upper <- model1_mean + 1.96*model1_rmse
model2_lower <- model2_mean - 1.96*model2_rmse
model2_upper <- model2_mean + 1.96*model2_rmse
print(model1_lower)
## [1] 11.36546
print(model1_upper)
## [1] 17.80013
print(model2_lower)
## [1] 11.64341
print(model2_upper)
## [1] 18.02648
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
china_data <- global_economy %>%
filter(Country == 'China')
china_data <- china_data %>%
mutate(GDP_thousands = GDP/1e6)
fit <- china_data %>%
model(model1 = ETS(GDP ~ error("A") + trend("A") + season("N")),
model2 = ETS(GDP ~ error("A") + trend("Ad",phi=0.9) + season("N")),
model3 = ETS((GDP)^1/2 ~ error("A") + trend("Ad") + season("N")))
fcst <- fit %>% forecast(h=25)
fcst %>%
autoplot(china_data)
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
aus_production %>%
autoplot(Gas)
fit <- aus_production %>%
model(model1 = ETS(Gas ~ error("A") + trend("A") + season("N")),
model2 = ETS(Gas ~ error("A") + trend("A") + season("A")),
model3 = ETS(Gas ~ error("A") + trend("A") + season("M")),
model4 = ETS(Gas ~ error("A") + trend("Ad") + season("A")),
model5 = ETS(Gas ~ error("A") + trend("Ad") + season("M")))
fcst <- fit %>% forecast(h=12)
accuracy(fit)
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 model1 Training 0.442 15.7 11.7 1.05 14.3 2.11 2.07 0.118
## 2 model2 Training 0.00525 4.76 3.35 -4.69 10.9 0.600 0.628 0.0772
## 3 model3 Training 0.218 4.19 2.84 -0.920 5.03 0.510 0.553 0.0405
## 4 model4 Training 0.600 4.58 3.16 0.460 7.39 0.566 0.604 0.0660
## 5 model5 Training 0.548 4.22 2.81 1.32 4.11 0.505 0.556 0.0265
fcst %>%
autoplot(aus_production)
Recall your retail time series data (from Exercise 7 in Section 2.10).
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>% autoplot()
## Plot variable not specified, automatically selected `.vars = Turnover`
Multiplicative seasonality is appropriate for this series because we can
see that over time, the amplitude of the seasonal trend grows
myseries
## # A tsibble: 369 x 5 [1M]
## # Key: State, Industry [1]
## State Industry Serie…¹ Month Turno…²
## <chr> <chr> <chr> <mth> <dbl>
## 1 Northern Territory Clothing, footwear and personal … A33497… 1988 Apr 2.3
## 2 Northern Territory Clothing, footwear and personal … A33497… 1988 May 2.9
## 3 Northern Territory Clothing, footwear and personal … A33497… 1988 Jun 2.6
## 4 Northern Territory Clothing, footwear and personal … A33497… 1988 Jul 2.8
## 5 Northern Territory Clothing, footwear and personal … A33497… 1988 Aug 2.9
## 6 Northern Territory Clothing, footwear and personal … A33497… 1988 Sep 3
## 7 Northern Territory Clothing, footwear and personal … A33497… 1988 Oct 3.1
## 8 Northern Territory Clothing, footwear and personal … A33497… 1988 Nov 3
## 9 Northern Territory Clothing, footwear and personal … A33497… 1988 Dec 4.2
## 10 Northern Territory Clothing, footwear and personal … A33497… 1989 Jan 2.7
## # … with 359 more rows, and abbreviated variable names ¹`Series ID`, ²Turnover
fit <- myseries %>%
model(model1 = ETS(Turnover ~ error("A") + trend("A") + season("M")),
model2 = ETS(Turnover ~ error("A") + trend("Ad",phi=0.9) + season("M")))
fcst <- fit %>%
forecast(h=10)
accuracy(fit)
## # A tibble: 2 × 12
## State Indus…¹ .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern T… Clothi… model1 Trai… -0.00119 0.600 0.443 -0.265 5.21 0.506 0.517
## 2 Northern T… Clothi… model2 Trai… 0.0477 0.603 0.450 0.264 5.28 0.514 0.520
## # … with 1 more variable: ACF1 <dbl>, and abbreviated variable name ¹Industry
aug <- augment(fit)
mod1_aug <- aug %>% filter(.model == 'model1')
autoplot(mod1_aug, .innov)
mod1_aug %>%
ggplot(aes(x=.innov)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mod1_aug %>%
ACF(.innov) %>%
autoplot()
training_set <- myseries %>% filter(year(Month) <= 2010 )
test_set <- myseries %>% filter(year(Month) > 2010 )
train_fit <- training_set %>%
model(ETS(Turnover ~ error("A") + trend("A") + season("M")))
fcst <- train_fit %>%
forecast(new_data=test_set)
fcst %>%
autoplot(training_set, level=NULL) +
autolayer(myseries, Turnover, color="black")
accuracy(fcst, test_set)
## # A tibble: 1 × 12
## .model State Indus…¹ .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Turn… Nort… Clothi… Test -1.54 1.80 1.61 -12.2 12.6 NaN NaN 0.557
## # … with abbreviated variable name ¹Industry
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
myseries %>%
autoplot(Turnover)
dcmp <- myseries %>%
model(STL(log(Turnover)))
components(dcmp)
## # A dable: 369 x 9 [1M]
## # Key: State, Industry, .model [1]
## # : log(Turnover) = trend + season_year + remainder
## State Indus…¹ .model Month log(T…² trend season…³ remai…⁴ seaso…⁵
## <chr> <chr> <chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern Terr… Clothi… STL(l… 1988 Apr 0.833 0.952 -0.131 0.0115 0.964
## 2 Northern Terr… Clothi… STL(l… 1988 May 1.06 0.967 0.00568 0.0924 1.06
## 3 Northern Terr… Clothi… STL(l… 1988 Jun 0.956 0.981 0.0443 -0.0698 0.911
## 4 Northern Terr… Clothi… STL(l… 1988 Jul 1.03 0.995 0.177 -0.143 0.853
## 5 Northern Terr… Clothi… STL(l… 1988 Aug 1.06 1.01 0.103 -0.0461 0.962
## 6 Northern Terr… Clothi… STL(l… 1988 Sep 1.10 1.02 0.0602 0.0171 1.04
## 7 Northern Terr… Clothi… STL(l… 1988 Oct 1.13 1.03 0.0525 0.0447 1.08
## 8 Northern Terr… Clothi… STL(l… 1988 Nov 1.10 1.05 0.00579 0.0460 1.09
## 9 Northern Terr… Clothi… STL(l… 1988 Dec 1.44 1.06 0.322 0.0536 1.11
## 10 Northern Terr… Clothi… STL(l… 1989 Jan 0.993 1.07 -0.173 0.0942 1.17
## # … with 359 more rows, and abbreviated variable names ¹Industry,
## # ²`log(Turnover)`, ³season_year, ⁴remainder, ⁵season_adjust
fit <- components(dcmp) %>%
dplyr::select(season_adjust) %>%
model(ETS(season_adjust ~ error("A") + trend("N") + season("N")))
fcst <- fit %>% forecast(h=12)
fcst
## # A fable: 12 x 4 [1M]
## # Key: .model [1]
## .model Month season_adjust .mean
## <chr> <mth> <dist> <dbl>
## 1 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 Jan N(2.6, 0.0033) 2.62
## 2 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 Feb N(2.6, 0.0047) 2.62
## 3 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 Mar N(2.6, 0.006) 2.62
## 4 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 Apr N(2.6, 0.0074) 2.62
## 5 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 May N(2.6, 0.0088) 2.62
## 6 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 Jun N(2.6, 0.01) 2.62
## 7 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 Jul N(2.6, 0.012) 2.62
## 8 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 Aug N(2.6, 0.013) 2.62
## 9 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 Sep N(2.6, 0.014) 2.62
## 10 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 Oct N(2.6, 0.016) 2.62
## 11 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 Nov N(2.6, 0.017) 2.62
## 12 "ETS(season_adjust ~ error(\"A\") + trend(\"N\… 2019 Dec N(2.6, 0.018) 2.62