Tanzil_DATAa_624_ Homework5

library(fpp3)
library(dplyr)
library(ggplot2)

8.1 Pigs in Victoria

pigs_victoria <- aus_livestock %>%
  filter(Animal == "Pigs", State == "Victoria")

autoplot(pigs_victoria, Count) +
  labs(title = "Pigs Slaughtered in Victoria")

(a)Use the ETS() function to estimate the equivalent model for simple exponential smoothing.

fit <- pigs_victoria %>% model(ets = ETS(Count))

report(fit)

## Series: Count 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.3579401 
##     gamma = 0.0001000139 
## 
##   Initial states:
##     l[0]     s[0]    s[-1]     s[-2]     s[-3]     s[-4]     s[-5]   s[-6]
##  95487.5 2066.235 6717.311 -2809.562 -1341.299 -7750.173 -8483.418 5610.89
##      s[-7]    s[-8]     s[-9]   s[-10]   s[-11]
##  -579.8107 1215.464 -2827.091 1739.465 6441.989
## 
##   sigma^2:  60742898
## 
##      AIC     AICc      BIC 
## 13545.38 13546.26 13610.24

The ETS(A,N,N) model is the same as simple exponential smoothing.

fit %>% forecast(h = 4) %>% autoplot(pigs_victoria) + labs(title = "Forcast of the Number of Pigs Slaughtered: A,N,A")

The output above gives the estimated values of alpha and l[0], and the forecasts for the next four months.

(c) Manual 95% interval

variance = 60742898

upper_bound = 95487.5 +  1.96 * sqrt(variance)
lower_bound = 95487.5 - 1.96 * sqrt(variance)

print(paste("The 95% prediction interval is", round(lower_bound, 2), " - ", round(upper_bound, 2)))

## [1] "The 95% prediction interval is 80211.7  -  110763.3"

“The 95% prediction interval is 80211.7 - 110763.3”

#Using R by piping in the fable or forecast object into the hilo() function

# Forecast the next 4 periods 
forecast_pigs <- fit %>% forecast(h = 4)

# Pull CI from the forecast using the hilo function
pigs_hilo <- forecast_pigs %>% hilo(level = 95)

pigs_hilo %>% slice(1) %>% select("95%")

Comparing the intervals for the first forecast (January 2019), the interval calculated using y±1.96s is slightly higher. There is significant overlap between each interval.

# Create a tibble with the intervals
lb <- c(69149.19, 80211.7)
ub <- c(99700.22, 110763.3)

intervals <- tibble(calculation = c("R", "Manual"), lower = lb, 
       upper = ub)

intervals

# Create line segment plot to visualize the 
intervals %>% ggplot() + geom_segment(aes(x = lower, y = calculation, xend = upper, yend = calculation, color = calculation)) + 
  geom_point(aes(x = c(lower[2],upper[2]), y = 1, color = calculation[2])) +
   geom_point(aes(x = c(lower[1],upper[1]), y = 2, color = calculation[1])) + 
  labs(title = "95% Interval Prediction")

8.5 Exports from global_economy

canada_econ <- global_economy %>%
  filter(Country == "Canada")

autoplot(canada_econ, Exports) +
  labs(title = "Canadian Exports", y = "Exports (% GDP)")

The series has no seasonality because it is annual data. It moves up and down over time, so both a simple model and a trend model can be compared.

(a) ETS(A,N,N)

fit_ann <- canada_econ %>%
  model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N")))

report(fit_ann)

## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
## 
##   Initial states:
##      l[0]
##  16.96593
## 
##   sigma^2:  2.7105
## 
##      AIC     AICc      BIC 
## 297.3032 297.7477 303.4846

fc_ann <- fit_ann %>% forecast(h = 5)

autoplot(fc_ann, canada_econ) +
  labs(title = "Canadian Exports Forecast: ANN")

(b) RMSE

accuracy(fit_ann) %>% select(RMSE)

(c) Compare with ETS(A,A,N)

fit_models <- canada_econ %>%
  model(
    ANN = ETS(Exports ~ error("A") + trend("N") + season("N")),
    AAN = ETS(Exports ~ error("A") + trend("A") + season("N"))
  )

accuracy(fit_models) %>% select(.model, RMSE)

fit_models %>%
  forecast(h = 5) %>%
  autoplot(canada_econ, level = NULL) +
  labs(title = "Canadian Exports: ANN vs AAN", y = "% GDP")

The ANN and AAN models are similar, but ANN may be preferred if it has slightly lower RMSE and is simpler.

(d) Manual 95% interval

fc_exports <- fit_models %>% forecast(h = 5)
rmse_table <- accuracy(fit_models) %>% select(.model, RMSE)

fc_first <- fc_exports %>% as_tibble() %>% group_by(.model) %>% slice(1)

left_join(fc_first, rmse_table, by = ".model") %>%
  mutate(
    lower = .mean - 1.96 * RMSE,
    upper = .mean + 1.96 * RMSE
  ) %>%
  select(.model, .mean, lower, upper)

fc_exports %>% hilo(level = 95)

The manual intervals are close to the intervals from R.

8.6 Chinese GDP

china_gdp <- global_economy %>%
  filter(Country == "China")

autoplot(china_gdp, GDP) +
  labs(title = "China GDP")

fit_china <- china_gdp %>%
  model(
    ETS_auto = ETS(GDP),
    ETS_damped = ETS(GDP ~ trend("Ad")),
    ETS_boxcox = ETS(box_cox(GDP, 0.3))
  )

fc_china <- fit_china %>% forecast(h = 15)

autoplot(fc_china, china_gdp) +
  labs(title = "China GDP Forecasts")

The standard ETS model keeps strong growth, the damped model slows the growth, and the Box-Cox version smooths the series.

8.7 Gas data

gas_data <- aus_production %>%
  select(Quarter, Gas)

autoplot(gas_data, Gas) +
  labs(title = "Australian Gas Production")

Multiplicative seasonality is needed because the seasonal pattern gets bigger when the level gets bigger.

fit_gas <- gas_data %>%
  model(
    MAM = ETS(Gas ~ error("M") + trend("A") + season("M")),
    MAdM = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
  )

accuracy(fit_gas) %>% select(.model, RMSE)

fit_gas %>%
  forecast(h = 12) %>%
  autoplot(gas_data) +
  labs(title = "Gas Forecasts")

If the damped model does not lower RMSE, then it does not improve the forecasts.

8.8 Retail data

aus_household_goods <- aus_retail %>%
  filter(State == "Victoria", Industry == "Household goods retailing")

autoplot(aus_household_goods, Turnover) +
  labs(title = "Victoria Household Goods Retailing")

Multiplicative seasonality is needed because the seasonal changes become larger as turnover increases.

(a) Holt-Winters models

fit_hw <- aus_household_goods %>%
  model(
    HW = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    HW_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
  )

accuracy(fit_hw) %>% select(.model, RMSE)

The model with the lower RMSE is preferred.

(b) Residuals of best model

fit_hw %>%
  select(HW_damped) %>%
  gg_tsresiduals()

## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

If the residuals show no strong pattern, then they are close to white noise.

(c) Test set RMSE

train <- aus_household_goods %>%
  filter(year(Month) <= 2010)

test <- aus_household_goods %>%
  filter(year(Month) > 2010)

fit_train <- train %>%
  model(HW_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))

fc_train <- fit_train %>% forecast(h = "8 years")

accuracy(fc_train, aus_household_goods)

(d) Compare with seasonal naive

fit_snaive <- train %>% model(SNAIVE(Turnover))
fc_snaive <- fit_snaive %>% forecast(h = "8 years")

accuracy(fc_snaive, aus_household_goods)

If the Holt-Winters RMSE is smaller than the seasonal naive RMSE, then Holt-Winters is better.

##(e)

# Train set up to end of 2010
train <- aus_household_goods %>%
  filter(year(Month) <= 2010)

# Fit damped Holt-Winters multiplicative model
fit_train <- train %>%
  model(
    HW_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
  )

# Forecast next 8 years
fc_train <- fit_train %>%
  forecast(h = "8 years")

# Test set RMSE
accuracy(fc_train, aus_household_goods)

# Seasonal naive comparison
fit_snaive <- train %>%
  model(SNAIVE(Turnover))

fc_snaive <- fit_snaive %>%
  forecast(h = "8 years")

accuracy(fc_snaive, aus_household_goods)

The Holt-Winters multiplicative damped model has a lower test RMSE than the seasonal naïve model, so it gives better forecasts.

8.9 STL + Box-Cox + ETS

fit_stl <- train %>%
  model(
    STL_ETS = decomposition_model(
      STL(box_cox(Turnover, 0.3)),
      ETS(season_adjust)
    )
  )

fc_stl <- fit_stl %>% forecast(h = nrow(test))

accuracy(fc_stl, test)

If STL + ETS gives a smaller test RMSE than the best Holt-Winters model, then STL + ETS is better.