p <- aus_livestock %>%
filter(Animal == 'Pigs' & State == 'Victoria')
pigs <- p %>%
autoplot(Count) +
labs(title = 'Timeseries')
pigs
a. Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0 , and generate forecasts for the next four months.
fit <- p%>%
model(ses = ETS(Count ~ error('A') + trend('N') + season('N')))
opt_val <- fit %>% report()
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
pigsforecast <- fit %>%
forecast(h = 4)
pigsforecast
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria ses 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs Victoria ses 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs Victoria ses 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs Victoria ses 2019 Apr N(95187, 1.1e+08) 95187.
plot <- fit %>%
forecast(h = 4) %>%
autoplot(filter(p, Month >= yearmonth('2016 Jan'))) +
labs(title = 'Four Month Forecast')
plot
y<- pigsforecast %>%
pull(Count) %>%
head(1)
sD <- augment(fit) %>%
pull(.resid) %>%
sd()
# Calculate the lower and upper confidence intervals.
lowerCi <- y - 1.96 * sD
upperCi <- y + 1.96 * sD
z <- c(lowerCi, upperCi)
names(z) <- c('Lower', 'Upper')
z
## <distribution[2]>
## Lower Upper
## N(76871, 8.7e+07) N(113502, 8.7e+07)
The 95% prediction interval for the first forecast is from 76871 to 113502.
hilo(pigsforecast$Count, 95)
## <hilo[4]>
## [1] [76854.79, 113518.3]95 [75927.17, 114445.9]95 [75042.22, 115330.9]95
## [4] [74194.54, 116178.6]95
The intervals calculated by R function are slightly different.
a.Plot the Exports series and discuss the main features of the data.
pol <- global_economy %>%
filter(Country == 'Poland')
head(pol,15)
## # A tsibble: 15 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Poland POL 1960 NA NA NA NA NA 29637450
## 2 Poland POL 1961 NA NA NA NA NA 29964000
## 3 Poland POL 1962 NA NA NA NA NA 30308500
## 4 Poland POL 1963 NA NA NA NA NA 30712000
## 5 Poland POL 1964 NA NA NA NA NA 31139450
## 6 Poland POL 1965 NA NA NA NA NA 31444950
## 7 Poland POL 1966 NA NA NA NA NA 31681000
## 8 Poland POL 1967 NA NA NA NA NA 31987155
## 9 Poland POL 1968 NA NA NA NA NA 32294655
## 10 Poland POL 1969 NA NA NA NA NA 32548300
## 11 Poland POL 1970 NA NA 0.0210 NA NA 32664300
## 12 Poland POL 1971 NA NA 0.0213 NA NA 32783500
## 13 Poland POL 1972 NA NA 0.0213 NA NA 33055650
## 14 Poland POL 1973 NA NA 0.0218 NA NA 33357200
## 15 Poland POL 1974 NA NA 0.0233 NA NA 33678899
pol %>%
autoplot(Exports) +
labs(y="% of GDP", title="Exports: Poland") +
guides(colour = "none")
## Warning: Removed 35 row(s) containing missing values (geom_path).
It looks like the export in Poland started after 1989 after the economic reform by Balcerowicz, where the reform had a temporary dropped but it boomed around 2000.
b.Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.
pol1 <- na.omit(pol)
fit <- pol1 %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
report(fit)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998999
##
## Initial states:
## l[0]
## 22.96177
##
## sigma^2: 4.6429
##
## AIC AICc BIC
## 111.3369 112.6000 114.7434
fc <- fit %>%
forecast(h = 5)
fc%>%
autoplot(pol) +
labs(y="% of GDP", title="Exports: Poland") +
guides(colour = "none")
## Warning: Removed 35 row(s) containing missing values (geom_path).
fc
## # A fable: 5 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Poland "ETS(Exports ~ error(\"A\") + trend(\"N\") + s~ 2018 N(54, 4.6) 54.3
## 2 Poland "ETS(Exports ~ error(\"A\") + trend(\"N\") + s~ 2019 N(54, 9.3) 54.3
## 3 Poland "ETS(Exports ~ error(\"A\") + trend(\"N\") + s~ 2020 N(54, 14) 54.3
## 4 Poland "ETS(Exports ~ error(\"A\") + trend(\"N\") + s~ 2021 N(54, 19) 54.3
## 5 Poland "ETS(Exports ~ error(\"A\") + trend(\"N\") + s~ 2022 N(54, 23) 54.3
Above plot shows the forecast for the next 5 years. The forecast value is 54.3(%).
acc <- accuracy(fit)
acc
## # A tibble: 1 x 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Poland "ETS(Exports ~~ Trai~ 1.36 2.06 1.72 3.55 4.88 0.957 0.978 -0.112
The RMSE of the training data for the ETS(A,N,N) model is 2.05.
fit2 <- pol1 %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
acc2 <- accuracy(fit2)
acc2
## # A tibble: 1 x 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Poland "ETS(Export~ Trai~ 6.74e-4 1.47 1.19 -0.462 3.70 0.663 0.700 0.0291
The RMSE of the training data for the ETS(A,A,N) model is 1.47. A bit better performance by the ETS(A,A,N) model over the ETS(A,N,N) model. The ETS(A,A,N) model takes into account trending, which the initial plot of Exports clearly indicated, thats why it performed better.
fc2 <- fit2 %>%
forecast(h = 5)
fc2 %>%
autoplot(pol1) +
labs(y="% of GDP", title="Exports: Poland") +
guides(colour = "none")
fc2
## # A fable: 5 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Poland "ETS(Exports ~ error(\"A\") + trend(\"A\") + s~ 2018 N(55, 2.6) 55.4
## 2 Poland "ETS(Exports ~ error(\"A\") + trend(\"A\") + s~ 2019 N(57, 3.9) 56.9
## 3 Poland "ETS(Exports ~ error(\"A\") + trend(\"A\") + s~ 2020 N(58, 5.2) 58.3
## 4 Poland "ETS(Exports ~ error(\"A\") + trend(\"A\") + s~ 2021 N(60, 6.5) 59.7
## 5 Poland "ETS(Exports ~ error(\"A\") + trend(\"A\") + s~ 2022 N(61, 7.8) 61.2
The forecast values for the ETS(A,N,N) model is the same value 54.29 for every year forward. In the above plot and forecast values for the ETS(A,A,N) model shows, the predicted values for ETS(A,A,N) increases due to the additive trend parameter. The constant increase for this model of 1.4 visually appear more in line with the increasing trend of the Exports data.
y <- fc$.mean[1]
lower <- y - (acc$RMSE * 1.96)
upper <- y + (acc$RMSE * 1.96)
z <- c(lower, upper)
z
## [1] 50.2544 58.3254
y2 <- fc2$.mean[1]
lower2 <- y2 - (acc2$RMSE * 1.96)
upper2 <- y2 + (acc2$RMSE * 1.96)
z2 <- c(lower2, upper2)
z2
## [1] 52.52691 58.29927
hilo1 <- fc %>% hilo()
hilo1$`95%`[1]
## <hilo[1]>
## [1] [50.06668, 58.51312]95
Above is the 95% interval for the ETS(A,N,N) model produced by R. The R produced values are 2.46 lower for the lower limit and 0.19 lower for the higher limit.
hilo2 <- fc2 %>% hilo()
hilo2$`95%`[1]
## <hilo[1]>
## [1] [52.23766, 58.58852]95
Above is the 95% interval for the ETS(A,A,N) model produced by R. The R produced values are 0.29 lower for the lower limit and 0.07 higher for the higher limit.
As noted in Exercise 8.1 the R produced confidence intervals appear a bit broader than the calculated values, but given the small differences, the calculated and R produced confidence intervals do appear in line with each other.
6.Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
options(scipen = 999)
# Extract Chinese GDP data from the global_economy dataset.
chGDP <- global_economy %>%
filter(Country == 'China')
# Create a plot of the data.
chGDP %>% autoplot(GDP) +
labs(title = 'Chinese GDP')
fit <-chGDP%>%
model(
SES = ETS(GDP ~ error("A") + trend("N") + season("N")),
Holt = ETS(GDP ~ error("A") + trend("A") + season("N")),
Damped = ETS(GDP ~ error("A") + trend("Ad") + season("N"))
)
fc <- fit %>% forecast(h = 20)
fc %>%
autoplot(chGDP, level=NULL) +
labs(y="USD", title="GDP: China") +
guides(colour = guide_legend(title = "Forecast"))
Above plot shows trends for the next 20 years of the Chinese GDP. The exponential smoothing forecast produces a horizontal line which doesn’t appear to follow the increasing trend. The Holt forecast shows an increasing forecast with constant growth. And the Holt dampened forecast shows increasing trend that does slow over time. The Holt dampened forecast appears the most predictable values as show some slow increases in future.
lambda <- chGDP %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
fit2 <- chGDP %>%
model(
SES = ETS(box_cox(GDP, lambda) ~ error("A") + trend("N") + season("N")),
Holt = ETS(box_cox(GDP, lambda) ~ error("A") + trend("A") + season("N")),
Damped = ETS(box_cox(GDP, lambda) ~ error("A") + trend("Ad") + season("N"))
)
fc2 <- fit2 %>% forecast(h = 20)
fc2 %>%
autoplot(chGDP, level=NULL) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed China GDP with $\\lambda$ = ",
round(lambda,2)))) +
guides(colour = guide_legend(title = "Forecast"))
Applying the Box-Cox transformation along with the ETS() function shows some interesting results. The simple exponential smoothing model produced a horizontal line. The Holt approach produced a forecast that follows an exponential growth that show straight up trend after 5 years. The dampened method also starts to increase more steady not quite as the trend of the Holt method. Given above methods I would pick the dampened method as it is the most realistic looking.
7.Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
aus_production %>%
autoplot(Gas)
fit <- aus_production %>%
model(fit = ETS(Gas))
report(fit)
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
fit %>%
forecast(h =10 ) %>%
autoplot(aus_production)
Multiplicative seasonality is necessary due to the fact that the seasonal variation trends upwards over time.
fit2 <- aus_production %>%
model(fit = ETS(Gas ~ trend('Ad', phi = 0.9)))
fit2 %>%
forecast(h = 10) %>%
autoplot(aus_production)
Comparing the damped and non damped trend plots above, making the trend damped does not appear to improve the forecast.
8.Recall your retail time series data (from Exercise 8 in Section 2.10).
set.seed(666)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries %>% autoplot(Turnover)
a. Why is multiplicative seasonality necessary for this series?
Because the retail time series displays increasing proportional seasonality as the level of the series increases.
fit <- myseries %>%
model(
'Holt Winters Multiplicative Method' = ETS(Turnover ~ error('M') + trend('A') + season('M')),
'Holt Winters Damped Method' = ETS(Turnover ~ error('M') + trend('Ad') + season('M'))
)
HoltWinters <- fit %>% forecast(h = 10)
HoltWinters %>% autoplot(myseries, level = NULL)
c. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
accuracy(fit) %>% select('.model', 'RMSE')
## # A tibble: 2 x 2
## .model RMSE
## <chr> <dbl>
## 1 Holt Winters Multiplicative Method 15.0
## 2 Holt Winters Damped Method 14.6
The multiplicative method has a slightly lower RMSE than that of the damped method suggesting that this may be the more accurate choice for the time series.
fit %>%
select('Holt Winters Multiplicative Method') %>%
gg_tsresiduals()
tThe risiduals plot and historgram above show that the residuals for the multiplicative method look like white noise with the exception of a few outliers. The ACF confirms that most of the risiduals are within bounds.
train <- myseries %>%
filter(year(Month) < 2011)
fit <- train %>%
model(
'Holt Winters Multiplicative Method' = ETS(Turnover ~ error('M') + trend('A') + season('M')),
'Holt Winters Damped Method' = ETS(Turnover ~ error('M') + trend('Ad') + season('M')),
'Seasonal Naive' = SNAIVE(Turnover)
)
comp <- anti_join(myseries, train, by = c('State', 'Industry', 'Series ID', 'Month', 'Turnover'))
fc <- fit %>% forecast(comp)
autoplot(comp, Turnover) +
autolayer(fc, level = NULL) +
labs(title = 'Forecast Method Comparison')
The above method comparison plot shows that both the Holt Winters Damped and Multiplicative methods overthrown the Seasonal Niave method, and that the Multiplicative method is the most accurate.
9.For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
lambda <- train %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
mybc <- train %>%
mutate(
bc = box_cox(Turnover, lambda)
)
fit <- mybc %>%
model(
'STL Box-Cox' = STL(bc ~ season(window = 'periodic'), robust = TRUE),
'ETS Box-Cox' = ETS(bc)
)
fit2 <- mybc %>%
model(
'Holt Winters Multiplicative Method' = ETS(Turnover ~ error('M') + trend('A') + season('M'))
)
accuracy(fit)
## # A tibble: 2 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 New S~ Hardwar~ STL B~ Trai~ 0.320 12.9 8.67 -0.399 5.40 0.421 0.507 0.355
## 2 New S~ Hardwar~ ETS B~ Trai~ -1.74 13.2 9.37 -1.47 5.85 0.455 0.519 0.221
accuracy(fit2)
## # A tibble: 1 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 New S~ Hardware~ Holt ~ Trai~ -1.14 13.5 9.55 -0.912 5.79 0.450 0.515 0.247
Looking at the RMSE values of the STL and ETS Box-Cox methods, we can see that both these methods are more accurate than our previous most accurate ‘Holt Winters Multiplicative’ method that has an RMSE.