DATA 624 HW5

suppressPackageStartupMessages(library(fpp2))
suppressPackageStartupMessages(library(forecast))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(fpp3))
suppressPackageStartupMessages(library(purrr))
suppressPackageStartupMessages(library(seasonal))

Home Work #5

Question 8.1

Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0, and generate forecasts for the next four months.
Compute a 95% prediction interval for the first forecast using ^y ± 1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

victoria_pigs <-
    aus_livestock %>% 
    filter(Animal == 'Pigs' & State == 'Victoria')

victoria_pigs_model <-
    victoria_pigs %>% 
    model(ann = ETS(Count ~ error('A') + season('N') + trend('N'), opt_crit = 'mse'))

victoria_pigs$Count %>%
  ets(model = "ANN") %>%
    forecast(h = 4) -> victoria_pigs_model
    
victoria_pigs_model %>%  
autoplot()

pigs_ses <- ses(pigs, h=4)

s <- sd(pigs_ses$residuals)
mean_pigs_ses <- pigs_ses$mean[1]
c(mean_pigs_ses - (1.96*s),mean_pigs_ses + (1.96*s))

## [1]  78679.97 118952.84

Question 8.5

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

Plot the Exports series and discuss the main features of the data.
Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.
Compute the RMSE values for the training data.
Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.
Compare the forecasts from both methods. Which do you think is best?
Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

global_economy %>% 
    filter(Country == 'Jamaica') -> jamaica_exports

jamaica_exports %>%
  autoplot(Exports)

jmfit <- global_economy %>%
  filter(Country == "Jamaica") %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N"))) 

fc <- jmfit %>%
  forecast(h = 5)

fc %>%
  autoplot(global_economy) +
  labs(y="% of GDP", title="Exports: Jamaica", subtitle = "ETS(A,N,N)")

(cdef)

jmfit2 <- global_economy %>%
  filter(Country == "Jamaica") %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))

fc2 <- jmfit2 %>%
  forecast(h = 5)

fc2 %>%
  autoplot(global_economy) +
  labs(y="% of GDP", title="Exports: Jamaica", subtitle = "ETS(A,A,N)")

jamaica_exports %>%
  filter(Year <= year(as.Date("2000-01-01"))) -> jamaica_train

jamaica_train$Exports %>%
    ets(model = "ANN") %>% 
    forecast(h = 2) -> jamaica_ann

jamaica_train$Exports %>%
    ets(model = "AAN")  %>% 
    forecast(h = 2) -> jamaica_aan

#RMSE
sqrt(mean(jamaica_ann$residuals^2))

## [1] 6.042543

sqrt(mean(jamaica_aan$residuals^2))

## [1] 6.039002

jamaica_ann

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 42       39.04492 31.10502 46.98483 26.90188 51.18796
## 43       39.04492 29.66663 48.42322 24.70206 53.38779

jamaica_aan

##    Point Forecast   Lo 80    Hi 80    Lo 95   Hi 95
## 42       39.29540 31.1485 47.44229 26.83579 51.7550
## 43       39.43227 29.8600 49.00455 24.79275 54.0718

Both methods yield very similiar results. They both seem to suggest exports will remain at the same level

Question 8.6

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

china_economy <-
    global_economy %>% 
    filter(Country == 'China') %>% 
    filter_index('1990' ~ .)

autoplot(china_economy, .vars = GDP)

china_aan <-
    china_economy %>%
    model(
        aan = ETS(GDP ~ error('A') + trend('A') + season('N')),
        aan_.9_phi = ETS(GDP ~ error('A') + trend('Ad', phi = .9) + season('N')),
        aan_.8_phi = ETS(GDP ~ error('A') + trend('Ad', phi = .8) + season('N')),
    )

china_aan %>% 
    forecast(h = 10) %>% 
    autoplot(china_economy, level = NULL)

gdp_lambda <-
    china_economy %>% 
    features(GDP, features = guerrero) %>% 
    pull(lambda_guerrero)

china_box_cox_ets <-
    china_economy %>% 
    model(
        ets_bc = ETS(box_cox(GDP, gdp_lambda)),
        ets_log = ETS(log(GDP)),
        aan = ETS(GDP ~ error('A') + trend('A') + season('N')),
        aan_.9_phi = ETS(GDP ~ error('A') + trend('Ad', phi = .9) + season('N')),
        aan_.8_phi = ETS(GDP ~ error('A') + trend('Ad', phi = .8) + season('N')),
    )

china_box_cox_ets %>% 
    forecast(h = 15) %>% 
    autoplot(china_economy, level = NULL)

china_box_cox_ets %>% 
    augment() %>% 
    ggplot() +
    geom_line(aes(Year, .fitted, colour = .model)) +
    geom_line(aes(Year, GDP))

Question 8.7

Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

#EXPLORATION
summary(aus_production$Gas)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   11.25  105.00   99.21  163.75  252.00

aus_production %>%
    autoplot(Gas)

#MODEL
aus_gas_mdl <-
    aus_production %>% 
    model(
        aaa = ETS(Gas ~ error('A') + trend('A') + season('A')),
        aam = ETS(Gas ~ error('A') + trend('A') + season('M')),
        aan_damp = ETS(Gas ~ error('A') + trend('Ad') + season('M'))
    )

aus_gas_mdl %>%
    forecast(h = 10) %>% 
    autoplot(filter_index(aus_production, '2009' ~ .), level = NULL)

aus_gas_mdl %>% 
    accuracy() %>%
    select(.model, RMSE)

## # A tibble: 3 × 2
##   .model    RMSE
##   <chr>    <dbl>
## 1 aaa       4.76
## 2 aam       4.19
## 3 aan_damp  4.22

Question 8.8

Recall your retail time series data (from Exercise 8 in Section 2.10).

Why is multiplicative seasonality necessary for this series?
Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
Check that the residuals from the best method look like white noise.
Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

q8_retail <-
    aus_retail %>%
    filter(`Series ID` == sample(aus_retail$`Series ID`,1))

autoplot(q8_retail, .vars = Turnover)

Seasonal peaks grow higher. (Multiplicative seasonality is required)

hsales_model <-
    q8_retail %>%
    model(
        ets_mam = ETS(Turnover ~ error('M') + trend('A') + season('M')),
        ets_mam_damp_.9 = ETS(Turnover ~ error('M') + trend('Ad', phi = .9) + season('M')),
        ets_mam_damp_.8 = ETS(Turnover ~ error('M') + trend('Ad', phi = .8) + season('M'))
    )

hsales_model %>% 
    accuracy() %>% 
    select(.model, RMSE)

## # A tibble: 3 × 2
##   .model           RMSE
##   <chr>           <dbl>
## 1 ets_mam          3.05
## 2 ets_mam_damp_.9  3.07
## 3 ets_mam_damp_.8  3.07

Dampening to improve the accuracy

hsales_model %>% 
    select(ets_mam_damp_.8) %>% 
    gg_tsresiduals()

q8_test_set <- 
    q8_retail %>% 
    filter_index('2011-1' ~ '2017-12')

q8_train_set <-
    q8_retail %>% 
    filter_index(. ~ '2010-12')

q8_train_forecast <-
    q8_train_set %>% 
    model(
        snaive = SNAIVE(Turnover),
        ets_mam = ETS(Turnover ~ error('M') + trend('A') + season('M')),
        ets_mam_damp_.8 = ETS(Turnover ~ error('M') + trend('Ad', phi = .8) + season('M')),
        ets_mam_damp_.9 = ETS(Turnover ~ error('M') + trend('Ad', phi = .9) + season('M'))
    ) %>% 
    forecast(h = 84)

q8_train_forecast %>% 
    autoplot(q8_test_set, level = NULL)

accuracy(q8_train_forecast, q8_test_set) %>% select(.model, RMSE)

## # A tibble: 4 × 2
##   .model           RMSE
##   <chr>           <dbl>
## 1 ets_mam         12.9 
## 2 ets_mam_damp_.8  9.84
## 3 ets_mam_damp_.9  9.78
## 4 snaive           6.43

Question 8.9

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

q9_lambda <-
    q8_train_set %>% 
    features(Turnover, features = guerrero) %>% 
    pull(lambda_guerrero) %>%
    print()

## [1] -0.01563213

# Transform
q9_retail_train <-
    q8_train_set %>% 
    mutate(Turnover_BoxCox = box_cox(Turnover, q9_lambda))

q9_retail_test <-
    q8_test_set %>% 
    mutate(Turnover_BoxCox = box_cox(Turnover, q9_lambda))

q9_retail_train %>%
    pivot_longer(c(Turnover, Turnover_BoxCox)) %>% 
    ggplot() +
    geom_line(aes(Month, value, colour = name))

q9_model_stl <-
    q9_retail_train %>% 
    model(stl = STL(Turnover_BoxCox))

q9_model_stl %>%
    components() %>% 
    pivot_longer(c(Turnover_BoxCox, season_adjust)) %>% 
    ggplot() +
    geom_line(aes(Month, value, colour = name))

q9_forecast <-
    q8_train_set %>%
    model(
        decomposition_model(
            STL(box_cox(Turnover, lambda = q9_lambda)),
            ETS(season_adjust)
        )
    ) %>% 
    forecast(h = 84)

q9_forecast %>% 
    autoplot(q9_retail_test)

q9_forecast %>% 
    accuracy(q9_retail_test) %>% 
    select(RMSE)

## # A tibble: 1 × 1
##    RMSE
##   <dbl>
## 1  44.9