Exponential Smoothing Homework
Question 1
A.)
Look at the initial plot of pigs in victoria:
## Look at the data
vic_pigs <- aus_livestock %>%
filter(Animal == "Pigs" & State == "Victoria")
autoplot(vic_pigs) + ggtitle("The Count of Pigs in Victoria, Australia")
## Plot variable not specified, automatically selected `.vars = Count`

## Fit an ANN model to our data
fit <- vic_pigs %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
The model’s optimal alpha value is 0.322 and optimal l(0) value is
100646
Here is the forecast that is produced for 4 months out
fc <- fit %>%
forecast(h = "4 months")
fc
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95187, 1.1e+08) 95187.
The fitted values for the model:
augment_fit <-augment(fit)
augment_fit
## # A tsibble: 558 x 8 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .fitted .resid .innov
## <fct> <fct> <chr> <mth> <dbl> <dbl> <dbl> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\… 1972 Jul 94200 100647. -6447. -6447.
## 2 Pigs Victoria "ETS(Count ~ error(\… 1972 Aug 105700 98570. 7130. 7130.
## 3 Pigs Victoria "ETS(Count ~ error(\… 1972 Sep 96500 100867. -4367. -4367.
## 4 Pigs Victoria "ETS(Count ~ error(\… 1972 Oct 117100 99460. 17640. 17640.
## 5 Pigs Victoria "ETS(Count ~ error(\… 1972 Nov 104600 105142. -542. -542.
## 6 Pigs Victoria "ETS(Count ~ error(\… 1972 Dec 100500 104968. -4468. -4468.
## 7 Pigs Victoria "ETS(Count ~ error(\… 1973 Jan 94700 103529. -8829. -8829.
## 8 Pigs Victoria "ETS(Count ~ error(\… 1973 Feb 93900 100685. -6785. -6785.
## 9 Pigs Victoria "ETS(Count ~ error(\… 1973 Mar 93200 98499. -5299. -5299.
## 10 Pigs Victoria "ETS(Count ~ error(\… 1973 Apr 78000 96792. -18792. -18792.
## # ℹ 548 more rows
Compute the 95% confidence interval
## Get mean
mean <- fc$.mean[1]
## Get standard deviation
std <- sd(augment_fit$.resid)
##
top_95 <- mean + (std * 1.96)
bot_95 <- mean - (std * 1.96)
paste("The top 95% confidence interval: ", round(top_95, 0))
## [1] "The top 95% confidence interval: 113502"
paste("The bottom 95% confidence interval: ",round(bot_95, 0))
## [1] "The bottom 95% confidence interval: 76871"
Find the interval produced by R:
r_confidence <- vic_pigs %>%
filter(!is.na(Count)) %>%
model(ETS(Count ~ error("A") + trend("N") + season("N"))) %>%
forecast(h = "4 months") %>%
hilo(level = 95) %>%
mutate(lower = `95%`$lower,upper =`95%`$upper)
r_confidence
## # A tsibble: 4 x 9 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean `95%`
## <fct> <fct> <chr> <mth> <dist> <dbl> <hilo>
## 1 Pigs Victor… "ETS(… 2019 Jan N(95187, 8.7e+07) 95187. [76854.79, 113518.3]95
## 2 Pigs Victor… "ETS(… 2019 Feb N(95187, 9.7e+07) 95187. [75927.17, 114445.9]95
## 3 Pigs Victor… "ETS(… 2019 Mar N(95187, 1.1e+08) 95187. [75042.22, 115330.9]95
## 4 Pigs Victor… "ETS(… 2019 Apr N(95187, 1.1e+08) 95187. [74194.54, 116178.6]95
## # ℹ 2 more variables: lower <dbl>, upper <dbl>
paste("The top 95% confidence inverval produced by R:", round(r_confidence$upper[1], 0))
## [1] "The top 95% confidence inverval produced by R: 113518"
paste("The bottom 95% confidence inverval produced by R:", round(r_confidence$lower[1], 0))
## [1] "The bottom 95% confidence inverval produced by R: 76855"
paste("The difference between the top confidence interval: ", round(r_confidence$upper[1] - top_95, 0))
## [1] "The difference between the top confidence interval: 16"
paste("The difference between the bottom confidence interval: ", round(r_confidence$lower[1] - bot_95, 0))
## [1] "The difference between the bottom confidence interval: -16"
There is a difference of 16 between the intervals
Exercise 5
A.)
## Only Use german data
german_econ <- global_economy %>%
filter(Country == "Germany" & !is.na(Exports))
german_econ %>%
autoplot(Exports)

The main feature of the timeseries is the upward trend that is
occuring overtime. There may be a seasonal component or cyclical
component but it is not as clear as the trend.
B.)
fit1 <- german_econ %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
report(fit1)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l[0]
## 14.97361
##
## sigma^2: 3.2299
##
## AIC AICc BIC
## 246.0527 246.5982 251.6663
## Forecast the data 6 years out
fc1 <- fit1 %>%
forecast(h = 6)
fitted <- augment(fit1)
fitted
## # A tsibble: 48 x 7 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .fitted .resid .innov
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Germany "ETS(Exports ~ error(\"A\") … 1970 15.2 15.0 0.196 0.196
## 2 Germany "ETS(Exports ~ error(\"A\") … 1971 14.6 15.2 -0.570 -0.570
## 3 Germany "ETS(Exports ~ error(\"A\") … 1972 14.6 14.6 -0.00718 -0.00718
## 4 Germany "ETS(Exports ~ error(\"A\") … 1973 15.4 14.6 0.804 0.804
## 5 Germany "ETS(Exports ~ error(\"A\") … 1974 18.3 15.4 2.89 2.89
## 6 Germany "ETS(Exports ~ error(\"A\") … 1975 17.2 18.3 -1.13 -1.13
## 7 Germany "ETS(Exports ~ error(\"A\") … 1976 18.1 17.2 0.964 0.964
## 8 Germany "ETS(Exports ~ error(\"A\") … 1977 18.0 18.1 -0.0919 -0.0919
## 9 Germany "ETS(Exports ~ error(\"A\") … 1978 17.7 18.0 -0.344 -0.344
## 10 Germany "ETS(Exports ~ error(\"A\") … 1979 17.9 17.7 0.188 0.188
## # ℹ 38 more rows
## Plot the forecast
fc1 %>% autoplot(german_econ) +
geom_line(aes(y = .fitted),linetype = "dotted", col = "red", data = augment(fit1)) +
guides(colour = "colorbar") + labs(title = "ANN forecast of German Exports")

The red dotted line is the fitted data. We can see that without the
seasonal trend being accounted for, a simple naive method is applied to
predict the data.
C.)
fit1 %>% accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Germany "ETS(Exports… Trai… 0.672 1.76 1.31 2.21 4.73 0.982 0.990 -0.00124
The RMSE of the ANN model is 1.76
D.)
## Fit the AAN model
fit2 <- german_econ %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
fc2 <- fit2 %>%
forecast(h = 6)
Plot the AAN Forecast:
fc2 %>% autoplot(german_econ) +
geom_line(aes(y = .fitted),linetype = "dotted", col = "red", data = augment(fit2)) +
guides(colour = "colorbar") + labs(title = "AAN forecast of German Exports")

fit2 %>% accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Germany "ETS(Export… Trai… -0.0121 1.62 1.11 -0.571 4.14 0.832 0.914 0.0220
The benefit of ANN model is that it is simpler. If the models
contained the same RMSE, then the ANN model would be the optimal choice.
The merits of the AAN model is that it captures trend. Overall, I think
that the AAN model is a better approximation than the ANN model. It has
an RMSE of 1.62, while the ANN model is 1.76.
E.)
Overall the AAN model is a better predictor. It has a lower RMSE,
while also accounting for the trend in the data that is clearly
occuring.
F.)
Using RMSE Values for the ANN Model:
mean1 <- fc1$.mean[1]
sd1 <- sd(augment(fit1)$.resid)
top_95 <- mean1 + (sd1 * 1.96)
bot_95 <- mean1 - (sd1 * 1.96)
paste("The top 95% confidence interval: ", round(top_95, 1))
## [1] "The top 95% confidence interval: 50.5"
paste("The bottom 95% confidence interval: ",round(bot_95, 1))
## [1] "The bottom 95% confidence interval: 44"
Using R to calculate confidence interval for ANN Model:
r_confidence_ann <- german_econ %>%
filter(!is.na(Exports)) %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N"))) %>%
forecast(h = "6 years") %>%
hilo(level = 95) %>%
mutate(lower = `95%`$lower,upper =`95%`$upper)
r_confidence_ann
## # A tsibble: 6 x 8 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .mean `95%` lower upper
## <fct> <chr> <dbl> <dist> <dbl> <hilo> <dbl> <dbl>
## 1 Germany "ETS(Export… 2018 N(47, 3.2) 47.2 [43.71329, 50.75818]95 43.7 50.8
## 2 Germany "ETS(Export… 2019 N(47, 6.5) 47.2 [42.25450, 52.21697]95 42.3 52.2
## 3 Germany "ETS(Export… 2020 N(47, 9.7) 47.2 [41.13509, 53.33638]95 41.1 53.3
## 4 Germany "ETS(Export… 2021 N(47, 13) 47.2 [40.19138, 54.28009]95 40.2 54.3
## 5 Germany "ETS(Export… 2022 N(47, 16) 47.2 [39.35994, 55.11153]95 39.4 55.1
## 6 Germany "ETS(Export… 2023 N(47, 19) 47.2 [38.60827, 55.86320]95 38.6 55.9
paste("The top 95% confidence inverval produced by R:", round(r_confidence_ann$upper[1], 1))
## [1] "The top 95% confidence inverval produced by R: 50.8"
paste("The bottom 95% confidence inverval produced by R:", round(r_confidence_ann$lower[1], 1))
## [1] "The bottom 95% confidence inverval produced by R: 43.7"
paste("The difference between the top confidence interval: ", round(r_confidence_ann$upper[1] - top_95, 1))
## [1] "The difference between the top confidence interval: 0.3"
paste("The difference between the bottom confidence interval: ", round(r_confidence_ann$lower[1] - bot_95, 1))
## [1] "The difference between the bottom confidence interval: -0.3"
There is only about a 0.3 difference in the values for the
confidence intervals for the ANN model
Now calculate the confidence intervals for the AAN models:
mean2 <- fc2$.mean[1]
sd2 <- sd(augment(fit2)$.resid)
top_95 <- mean2 + (sd2 * 1.96)
bot_95 <- mean2 - (sd2 * 1.96)
paste("The top 95% confidence interval: ", round(top_95, 1))
## [1] "The top 95% confidence interval: 51.1"
paste("The bottom 95% confidence interval: ",round(bot_95, 1))
## [1] "The bottom 95% confidence interval: 44.7"
r_confidence_ann2 <- german_econ %>%
filter(!is.na(Exports)) %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N"))) %>%
forecast(h = "6 years") %>%
hilo(level = 95) %>%
mutate(lower = `95%`$lower,upper =`95%`$upper)
r_confidence_ann2
## # A tsibble: 6 x 8 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .mean `95%` lower upper
## <fct> <chr> <dbl> <dist> <dbl> <hilo> <dbl> <dbl>
## 1 Germany "ETS(Export… 2018 N(48, 2.9) 47.9 [44.59370, 51.24660]95 44.6 51.2
## 2 Germany "ETS(Export… 2019 N(49, 5.6) 48.6 [43.99421, 53.24048]95 44.0 53.2
## 3 Germany "ETS(Export… 2020 N(49, 8.2) 49.3 [43.68572, 54.94336]95 43.7 54.9
## 4 Germany "ETS(Export… 2021 N(50, 11) 50.0 [43.53130, 56.49217]95 43.5 56.5
## 5 Germany "ETS(Export… 2022 N(51, 14) 50.7 [43.47633, 57.94153]95 43.5 57.9
## 6 Germany "ETS(Export… 2023 N(51, 16) 51.4 [43.49240, 59.31986]95 43.5 59.3
paste("The top 95% confidence inverval produced by R:", round(r_confidence_ann2$upper[1], 1))
## [1] "The top 95% confidence inverval produced by R: 51.2"
paste("The bottom 95% confidence inverval produced by R:", round(r_confidence_ann2$lower[1], 1))
## [1] "The bottom 95% confidence inverval produced by R: 44.6"
paste("The difference between the top confidence interval: ", round(r_confidence_ann2$upper[1] - top_95, 1))
## [1] "The difference between the top confidence interval: 0.1"
paste("The difference between the bottom confidence interval: ", round(r_confidence_ann2$lower[1] - bot_95, 1))
## [1] "The difference between the bottom confidence interval: -0.1"
There is only about a 0.1 difference in the values for the
confidence intervals for the AAN model
Exercise 6
## Create Dataset
chinese_gdp <- global_economy %>%
filter(Country == "China" & !is.na(GDP))
chinese_gdp %>% autoplot()
## Plot variable not specified, automatically selected `.vars = GDP`

Create Models
lambda <- chinese_gdp %>%
features(GDP,features = guerrero) %>% pull(lambda_guerrero)
gdp_mod <- chinese_gdp %>%
model("ANN" = ETS(GDP ~ error("A") + trend("N") + season("N")),
"AAN"= ETS(GDP ~ error("A") + trend("A") + season("N")),
"AAN Damped" = ETS(GDP ~ error("A") + trend("Ad", phi = 0.90) + season("N")),
"Box-Cox AAN" = ETS(box_cox(GDP, lambda) ~ error("A") + trend("A") + season("N")))
gdp_fc <- gdp_mod %>%
forecast(h = 20)
gdp_fc %>% autoplot(chinese_gdp, level = NULL)

Of all the models, the AAN appears to be the best one, purely based
on visuals.
Exercise 7
gas <- aus_production %>%
filter(!is.na(Gas))
The gas plot shows an upward trend with a seasonality effect that
changes over time.
gas %>% autoplot(Gas)

gas_mod <- gas %>% model("MAM" = ETS(Gas ~ error("M") + trend("A") + season("M")),
"MAM damped"= ETS(Gas ~ error("M") + trend("Ad", phi = 0.9) + season("M")))
gas_fc <- gas_mod %>%
forecast(h = "8 years")
gas_fc %>% autoplot(gas, level = NULL) +
labs(title = "Gas Trends Forecasted with Multiplicative Models")

Multiplicative seasonality is necessary as there is variance in the
seasonality effects for the data. Non homogenous seasonality fits better
to a multiplicative model.
gas_mod %>% accuracy()
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MAM Training -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
## 2 MAM damped Training 0.255 4.58 3.04 0.655 4.15 0.545 0.604 -0.00451
based on the RMSE, the MAM model that is damped is slightly better,
but the difference is small.
Exercise 8
set.seed(123458)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>% autoplot(Turnover)

A.)
The plot of turnover shows that there is a seasonal component to the
data but that the seasonality effect on the data is changing over time.
It would be more suitable to use multiplicative seasonality because this
better accounts for changing seasonality effects.
retail_model <- myseries %>% model("Multiplicative Holt-Winters" = ETS(Turnover ~ error("M") + trend("A") + season("M")),
"Multiplicative Holt-Winters Damped" = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M")))
fc <- retail_model %>% forecast(h = 20)
fc %>% autoplot(myseries, level = NULL)

C.)
retail_model %>% accuracy()
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northe… Food re… Multi… Trai… 0.114 2.00 1.51 0.0223 2.27 0.362 0.371 0.217
## 2 Northe… Food re… Multi… Trai… 0.318 2.03 1.52 0.397 2.29 0.366 0.377 0.157
The model that is not damped has a slightly lower RMSE, so this
would be the optimal model to choose
D.)
retail_model[,1:3] %>% gg_tsresiduals() + labs(title = "Multiplicative Holt-Winters")

retail_model[,c(1,2,4)] %>% gg_tsresiduals() + labs(title = "Multiplicative Holt-Winters Damped")

Based on the residuals plot generated for both models, the residuals
look like white noise. They both have normal looking residuals and the
distribution of the innovation residuals seems random about 0, with a
mean sum of 0
E.)
train <- myseries %>% filter(year(Month) <= 2010)
model_pred <- train %>%
model("Multiplicative Holt-Winters" = ETS(Turnover ~ error("M") + trend("A") + season("M")),
"Naive Model" = SNAIVE(Turnover))
forecast_retail <- model_pred %>%
forecast(new_data = anti_join(myseries, train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
forecast_retail %>% autoplot(myseries, level = NULL)

model_pred %>% accuracy()
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Nort… Food re… Multi… Trai… 0.0390 1.78 1.37 -0.0933 2.47 0.342 0.356 0.199
## 2 Nort… Food re… Naive… Trai… 3.10 4.99 3.99 5.28 6.99 1 1 0.823
The multiplicative Holt-winters model outperforms the naive model.
The RMSE of the Holt-Winters model is 1.37 while its 4.99 for the Naive
Model.
Exercise 9
lambda <- myseries %>%
features(Turnover,features = guerrero) %>%
pull(lambda_guerrero)
stl <- myseries %>%
model(STL(box_cox(Turnover, lambda) ~ season(window = "periodic"),robust = TRUE)) %>%
components()
stl %>%
autoplot()

myseries$sa <- stl$season_adjust
training <- myseries %>%
filter(year(Month)<= 2010)
model <- training %>%
model(ETS(sa ~ error("M") + trend("A") + season("M")))
forecast_sa <- model %>% forecast(new_data = anti_join(myseries, training))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover, sa)`
model %>% accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 North… Food re… "ETS(… Trai… -1.18e-4 0.0243 0.0194 -0.00261 0.558 0.340 0.362
## # ℹ 1 more variable: ACF1 <dbl>
This model that is transformed has an RMSE of 0.024 while our
previous model has an RMSE of 1.78. This is an improvement over our
previous model.