Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
# First, let's get the pigs in Victoria
pigs <- aus_livestock %>%
filter(State == 'Victoria',
Animal == 'Pigs')
# Now let's create a model and have it tell us the best model for it
fit <- pigs %>%
model(ETS(Count))
# We can see that is is an ANA model so now let's create the model and run for four months (no extra work since pigs is already at the month level)
fit_pig <- pigs %>%
model(ETS(Count ~ error("A") + trend("N") + season("A")))
fc_pig <- fit_pig %>%
fabletools::forecast(h = 4)
fc_pig
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(84425, 6.1e+07) 84425.
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(85158, 6.9e+07) 85158.
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(91567, 7.6e+07) 91567.
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(90098, 8.4e+07) 90098.
# Great, now we have our model, let's take a look at the forecast. To make it easier to see, will show data since 2010 since a cycle seemed to be beginning there.
pigs_2010 <- pigs %>% filter(year(Month) >= 2010)
fc_pig %>%
autoplot(pigs_2010) +
labs(y = "Count", title = "Victorian Pigs")
y_hat <- mean(fc_pig$.mean[1])
pig_acc <- fabletools::accuracy(fit_pig)
# Get the 95% interval for upper and lower
u_95 <- y_hat + (pig_acc$RMSE * 1.96)
l_95 <- y_hat - (pig_acc$RMSE * 1.96)
paste0("The lower limit is ",round(l_95,2),". The upper limit is: ",round(u_95,2),".")
## [1] "The lower limit is 69341.76. The upper limit is: 99507.65."
# Get the R calculated values
pig_interval <- fc_pig %>% hilo()
pig_interval$`95%`[1]
## <hilo[1]>
## [1] [69149.19, 99700.22]95
Mine is slightly narrower than the one calculated by hilo, but overall not drastically different.
Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
set.seed(1)
# A- Plot the Exports series and discuss the main features of the data.
ind_exports <- global_economy %>%
filter(Country == 'India')
summary(ind_exports)
## Country Code Year GDP
## India :58 IND :58 Min. :1960 Min. :3.654e+10
## Afghanistan : 0 ABW : 0 1st Qu.:1974 1st Qu.:9.742e+10
## Albania : 0 AFG : 0 Median :1988 Median :2.754e+11
## Algeria : 0 AGO : 0 Mean :1988 Mean :5.459e+11
## American Samoa: 0 ALB : 0 3rd Qu.:2003 3rd Qu.:5.767e+11
## Andorra : 0 AND : 0 Max. :2017 Max. :2.601e+12
## (Other) : 0 (Other): 0
## Growth CPI Imports Exports
## Min. :-5.238 Min. : 2.527 Min. : 3.750 Min. : 3.342
## 1st Qu.: 3.821 1st Qu.: 7.455 1st Qu.: 6.416 1st Qu.: 4.974
## Median : 5.947 Median : 20.364 Median : 8.568 Median : 6.955
## Mean : 5.370 Mean : 40.693 Mean :12.408 Mean :10.550
## 3rd Qu.: 7.453 3rd Qu.: 60.494 3rd Qu.:15.718 3rd Qu.:14.932
## Max. :10.260 Max. :159.829 Max. :31.259 Max. :25.431
## NA's :1
## Population
## Min. :4.495e+08
## 1st Qu.:6.106e+08
## Median :8.434e+08
## Mean :8.616e+08
## 3rd Qu.:1.103e+09
## Max. :1.339e+09
##
ind_exports %>%
autoplot(Exports) +
labs(y = "Exports as a % of GDP", title = "Indian Exports")
# I Chose India because it is a fascinating country that saw an insular economy rapidly join the international economy in the 2000s, plus I own a Royal Enfield motorcycle. Overall, it has all the dates. It looks like it has an upwards trend with little to no seasonality or any type of cycle. Recently, it looks like it is a downward trend.
# B - Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.
fit_ind <- ind_exports %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
report(fit_ind)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998965
##
## Initial states:
## l[0]
## 4.507901
##
## sigma^2: 1.4709
##
## AIC AICc BIC
## 261.8526 262.2971 268.0339
# Set the forecast to 10 for 10 years
fc_ind <- fit_ind %>%
fabletools::forecast(h = 5)
fc_ind %>%
autoplot(ind_exports) +
labs(y="Exports as a % of GDP", title="Indian Exports for the Next 5 years")
paste0("The mean estimate is: ", round(fc_ind$.mean[1],2),"%")
## [1] "The mean estimate is: 19.05%"
# C - Compute the RMSE values for the training data.
fabletools::accuracy(fit_ind)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 India "ETS(Exports … Trai… 0.251 1.19 0.798 2.05 7.25 0.983 0.991 -0.0331
# We can see that the RMSE is fairly high at 1.19
# D - Compare the results to those from an ETS(A,A,N) model. Discuss the merits of the two forecasting methods for this data set.
fit_ind_2 <- ind_exports %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
fabletools::accuracy(fit_ind_2)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 India "ETS(Export… Trai… -0.0501 1.18 0.736 0.239 6.98 0.906 0.978 -0.0294
# We can see that the RMSE is slightly lower at 1.17.
# The difference is that the AAN model takes into account Trend, which was one of the intial points we had made.
# E - Compare the forecasts from both methods. Which do you think is best?
fc_ind_2 <- fit_ind_2 %>%
fabletools::forecast(h = 5)
fc_ind_2 %>%
autoplot(ind_exports) +
labs(y="Exports as a % of GDP", title="Indian Exports for the Next 5 years")
# I find AAN to be more believable since we are looking at entering a negative trend.
# F - Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.
# first forecast
y_hat_i <- fc_ind$.mean[1]
ind_acc <- fabletools::accuracy(fit_ind)
l_95_i <- y_hat_i - (ind_acc$RMSE * 1.96)
u_95_i <- y_hat_i + (ind_acc$RMSE * 1.96)
paste0("The lower limit is ",round(l_95_i,2),". The upper limit is: ",round(u_95_i,2),".")
## [1] "The lower limit is 16.71. The upper limit is: 21.38."
i_hilo <- fc_ind %>% hilo()
i_hilo$`95%`[1]
## <hilo[1]>
## [1] [16.66831, 21.42248]95
# Second forecast
y_hat_i_2 <- fc_ind_2$.mean[1]
ind_2_acc <- fabletools::accuracy(fit_ind_2)
l_95_i_2 <- y_hat_i_2 - (ind_2_acc$RMSE * 1.96)
u_95_i_2 <- y_hat_i_2 + (ind_2_acc$RMSE * 1.96)
paste0("The lower limit is ",round(l_95_i_2,2),". The upper limit is: ",round(u_95_i_2,2),".")
## [1] "The lower limit is 15.88. The upper limit is: 20.49."
i_hilo_2 <- fc_ind_2 %>% hilo()
i_hilo_2$`95%`[1]
## <hilo[1]>
## [1] [15.79928, 20.57683]95
Again, they are similar but the ones manually calculated are more narrow
Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
chn_gdp <- global_economy %>%
filter(Country == 'China') %>%
mutate(GDP = GDP/1e12) %>%
select(c(Country, Code, Year, GDP))
chn_gdp %>%
autoplot(GDP) +
labs(y="Trillions USD", title="China's GDP")
fit_c_t <- chn_gdp %>%
model(ETS(GDP))
report(fit_c_t )
## Series: GDP
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.3589511
##
## Initial states:
## l[0] b[0]
## 0.05793671 0.001243994
##
## sigma^2: 0.0098
##
## AIC AICc BIC
## -108.97922 -107.82538 -98.67701
fit_chn <- chn_gdp %>%
model(
"Holt Winters Additive Method" = ETS(GDP ~ error("A") + trend("A") + season("N")),
"Holt Winters Damped Method" = ETS(GDP ~ error("A") + trend("Ad") + season("N")),
"MAN Method" = ETS(GDP ~ error("M") + trend("Ad") + season("N"))
)
# Forecast for 15 years
fc_ch <- fit_chn %>% fabletools::forecast(h = 15)
fc_ch %>%
autoplot(chn_gdp) +
labs(y="Trillions USD", title="China's GDP")
We can see that all three methods are seeing a similar expected outcome
while the damped and man method show a slowing down of performance. Now
let’s see what happens with a Box-Cox transformation. Note to person
reading, after you add in the forecast library, you will need to specify
that when forecasting you need to use fabletools for time series
forecasting.
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
## The following objects are masked from 'package:fabletools':
##
## accuracy, forecast
lambda <- BoxCox.lambda(chn_gdp$GDP)
chn_gdp$BCGDP = BoxCox(chn_gdp$GDP, lambda)
fit_b <- chn_gdp %>%
model(
"Holt Winters Additive Method" = ETS(BCGDP ~ error("A") + trend("A") + season("N")),
"Holt Winters Damped Method" = ETS(BCGDP ~ error("A") + trend("Ad") + season("N")),
"MAN Method" = ETS(BCGDP ~ error("M") + trend("Ad") + season("N"))
)
# Forecast for 15 years
fc_chb <- fit_b %>% fabletools::forecast(h = 15)
fc_chb %>%
autoplot(chn_gdp) +
labs(y="Trillions USD", title="China's GDP")
With the Box-Cox transformation, we are now seeing that the predicted
GDP under Holt Damped and MAN is more dampened than before and starts to
decrease. While, with the regular Holt, we still see it continue to
grow.
Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped.
aus_gas <- aus_production %>%
select(Quarter, Gas)
aus_gas %>%
autoplot(Gas)
fit_gt <- aus_gas %>%
model(ETS(Gas))
report(fit_gt)
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
fit_g <- aus_gas %>%
model(ETS(Gas ~ error("M") + trend("A") + season("M")))
fabletools::accuracy(fit_g)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\"M… Trai… -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
fc_g <- fit_g %>% fabletools::forecast(h = 8)
fc_g %>%
autoplot(aus_gas) +
labs(y="Gas in Petajoules", title="Australian Gas Production")
When looking at the report function, we can see that MAM is necessary.
When we look at the text book, we find that MAM, Multiplicative
seasonality, is necessary because, “multiplicative method is preferred
when the seasonal variations are changing proportional to the level of
the series…” i.e. the peaks and valleys occur at the same time but also
exhibit an upward trend, which we can see in the original plot as well
as the forecast above.
fit_g2 <- aus_gas %>%
model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))
fabletools::accuracy(fit_g2)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\… Trai… -0.00439 4.59 3.03 0.326 4.10 0.544 0.606 -0.0217
fc_g2 <- fit_g2 %>% fabletools::forecast(h = 8)
fc_g2 %>%
autoplot(aus_gas) +
labs(y="Gas in Petajoules", title="Australian Gas Production")
Does it improve the forecasts? - No, we can actually see with RMSE that
the fit decreases slightly
Recall your retail time series data (from Exercise 8 in Section 2.10)
set.seed(43)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries)
## Plot variable not specified, automatically selected `.vars = Turnover`
A - Why is multiplicative seasonality necessary for this series?
Same reason as 8.7.
B - Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
fit_ms <- myseries %>%
model(
M_S = ETS(Turnover ~ error("M") + trend("A") + season("M")),
M_S_D = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
# Forecast for 2 years
fc_ms <- fit_ms %>% fabletools::forecast(h = 24)
fc_ms %>%
autoplot(myseries) +
labs(y="AUD", title="Australian Turnover")
C -Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
fabletools::accuracy(fit_ms)
## # A tibble: 2 × 12
## State Indus…¹ .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Western Au… Hardwa… M_S Trai… -0.290 5.51 3.83 -0.881 5.44 0.384 0.427
## 2 Western Au… Hardwa… M_S_D Trai… -0.0302 5.54 3.85 -0.0428 5.41 0.386 0.429
## # … with 1 more variable: ACF1 <dbl>, and abbreviated variable name ¹Industry
I perefer the MAM without dampening
D -Check that the residuals from the best method look like white noise.
fit_ms %>% select(M_S) %>% gg_tsresiduals()
The innovation residuals show a plot that has constant mean around zero
and a near constant variance, i.e. white noise.
E - Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?
fit_mst <- myseries %>%
model(ETS(Turnover))
report(fit_mst)
## Series: Turnover
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6600442
## beta = 0.0001105009
## gamma = 0.2810096
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 9.961231 0.4646899 0.8616126 0.8958572 0.9259468 2.025057 1.164239 0.9498344
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.9152002 0.8486757 0.845662 0.8271247 0.8864711 0.8543195
##
## sigma^2: 0.006
##
## AIC AICc BIC
## 4172.225 4173.672 4241.739
myseries_train <- myseries |>
filter(year(Month) < 2011)
myseries_test <- myseries |>
filter(year(Month) > 2010)
fit_mss <- myseries_train |>
model(
M_S = ETS(Turnover ~ error("M") + trend("A") + season("M")),
"SNAIVE" = SNAIVE(Turnover ~ lag("year"))
)
fcmss <- fit_mss |> fabletools::forecast(h = 96)
fcmss %>% fabletools::accuracy(myseries_test)
## # A tibble: 2 × 12
## .model State Indus…¹ .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 M_S Western … Hardwa… Test 37.7 41.8 37.7 21.9 21.9 NaN NaN 0.887
## 2 SNAIVE Western … Hardwa… Test 59.2 64.2 59.2 34.7 34.7 NaN NaN 0.923
## # … with abbreviated variable name ¹Industry
fcmss %>%
autoplot(myseries_test) +
labs(y = "AUD", title = "Retail Turnover")
Yes, in fact we did beat it. The RSME for MAM is less than the SNAIVE and we can see that in the forecast where it is much more closely aligned to the actual numbers than SNAIVE.
For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
# Box-Cox
lambda <- BoxCox.lambda(myseries$Turnover)
myseries$TurnoverBC = BoxCox(myseries$Turnover, lambda)
myseries_stl_ets <- myseries %>%
model(
"STL BC" = STL(TurnoverBC ~ season(window = "periodic"), robust = T),
"ETS BC" = ETS(TurnoverBC ~ error("M") + trend("A") + season("M"))
)
myseries_stl_ets %>% fabletools::accuracy()
## # A tibble: 2 × 12
## State Indus…¹ .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Western Au… Hardwa… STL BC Trai… 0.0354 0.621 0.465 -0.0782 3.50 0.376 0.409
## 2 Western Au… Hardwa… ETS BC Trai… -0.0628 0.676 0.490 -0.457 3.41 0.396 0.446
## # … with 1 more variable: ACF1 <dbl>, and abbreviated variable name ¹Industry
We can see that the STL without season has a better fit that the MAM, which was the best without the Box-Cox transformation