data624 assignment5

8.1 a

library(fpp3)

## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──

## ✔ tibble      3.1.6     ✔ tsibble     1.1.3
## ✔ dplyr       1.0.7     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.1.4     ✔ feasts      0.3.0
## ✔ lubridate   1.8.0     ✔ fable       0.3.2
## ✔ ggplot2     3.3.5     ✔ fabletools  0.3.2

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

victpigs <- aus_livestock %>% 
  filter(Animal == 'Pigs' & State == 'Victoria')

fit <- victpigs %>%
  model(ses = ETS(Count ~ error('A') + trend('N') + season('N')))

report(fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

fc4m <- fit %>%
  forecast(h = 4)
fc4m

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model    Month             Count  .mean
##   <fct>  <fct>    <chr>     <mth>            <dist>  <dbl>
## 1 Pigs   Victoria ses    2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria ses    2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria ses    2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria ses    2019 Apr N(95187, 1.1e+08) 95187.

8.1 b

yHat <- fc4m %>%
  pull(Count) %>%
  head(1)

# Get the standard deviation of the residuals.
sd <- augment(fit) %>%
  pull(.resid) %>%
  sd()

# Calculate the lower and upper confidence intervals. 
lower <- yHat - 1.96 * sd
upper <- yHat + 1.96 * sd
results <- c(lower, upper)
results

## <distribution[2]>
## [1] N(76871, 8.7e+07)  N(113502, 8.7e+07)

8.5 Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

Plot the Exports series and discuss the main features of the data.

global_economy

## # A tsibble: 15,150 x 9 [1Y]
## # Key:       Country [263]
##    Country     Code   Year         GDP Growth   CPI Imports Exports Population
##    <fct>       <fct> <dbl>       <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
##  1 Afghanistan AFG    1960  537777811.     NA    NA    7.02    4.13    8996351
##  2 Afghanistan AFG    1961  548888896.     NA    NA    8.10    4.45    9166764
##  3 Afghanistan AFG    1962  546666678.     NA    NA    9.35    4.88    9345868
##  4 Afghanistan AFG    1963  751111191.     NA    NA   16.9     9.17    9533954
##  5 Afghanistan AFG    1964  800000044.     NA    NA   18.1     8.89    9731361
##  6 Afghanistan AFG    1965 1006666638.     NA    NA   21.4    11.3     9938414
##  7 Afghanistan AFG    1966 1399999967.     NA    NA   18.6     8.57   10152331
##  8 Afghanistan AFG    1967 1673333418.     NA    NA   14.2     6.77   10372630
##  9 Afghanistan AFG    1968 1373333367.     NA    NA   15.2     8.90   10604346
## 10 Afghanistan AFG    1969 1408888922.     NA    NA   15.0    10.1    10854428
## # … with 15,140 more rows

afExports <- global_economy %>%
  filter(Country == 'Australia')


afExports %>%
  autoplot(Exports)  +
  labs(title = 'Australia Exports')

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

fit <- afExports %>%
  model(ANN = ETS(Exports ~ error('A') + trend('N') + season('N')))

afExportsfc <- fit %>%
  forecast(h = 4)

afExportsfc %>% autoplot(afExports) +
  labs(title = 'Australia Exports Forecast')

Compute the RMSE values for the training data.

accuracy(fit)

## # A tibble: 1 × 11
##   Country   .model .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <fct>     <chr>  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Australia ANN    Training 0.232  1.15 0.914  1.09  5.41 0.928 0.928 0.0125

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

fit2 <- afExports %>%
  model(AAN = ETS(Exports ~ error("A") + trend("A") + season("N")))

f2acc <- accuracy(fit2)

f2acc

## # A tibble: 1 × 11
##   Country   .model .type           ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <fct>     <chr>  <chr>        <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia AAN    Training  -7.46e-7  1.12 0.893 -0.387  5.39 0.907 0.904 0.109

Ans: AAN is better since it is close to .5. but both are not a very good model

Compare the forecasts from both methods. Which do you think is best?

ANN is more flat while AAN is rising.

fit %>%
  forecast(h = 4) %>%
  autoplot(afExports, level = NULL)

fit2 %>%
  forecast(h = 4) %>%
  autoplot(afExports, level = NULL)

Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

y_hat <- afExportsfc$.mean[1]

lower_limit_fc <- y_hat - (f2acc$RMSE * 1.96)
upper_limit_fc <- y_hat + (f2acc$RMSE * 1.96)

c(lower_limit_fc, upper_limit_fc)

## [1] 18.41837 22.79594

8.6 Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

cnGDP <- global_economy %>%
  filter(Country == 'China')

cnGDP %>% autoplot(GDP) +
  labs(title = 'Chinese GDP')

fit <- cnGDP %>%
  model(
    SES = ETS(GDP ~ error("A") + trend("N") + season("N")),
    Holt = ETS(GDP ~ error("A") + trend("A") + season("N")),
    Damped = ETS(GDP ~ error("A") + trend("Ad") + season("N"))
  )

# Forecast for 20 years
fc <- fit %>% forecast(h = 20)

fc %>%
  autoplot(cnGDP, level=NULL)

8.7 Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

aus_production %>%
  autoplot(Gas)

fit <- aus_production %>%
  model(
    SES = ETS(Gas ~ error("A") + trend("N") + season("N")),
    Holt = ETS(Gas ~ error("A") + trend("A") + season("N")),
    Damped = ETS(Gas ~ error("A") + trend("Ad") + season("N"))
  )

fc <- fit %>% forecast(h = 20)

fc %>%
  autoplot(aus_production, level=NULL)

8.8

Recall your retail time series data (from Exercise 8 in Section 2.10).

set.seed(1111111)

myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

autoplot(myseries)

## Plot variable not specified, automatically selected `.vars = Turnover`

Why is multiplicative seasonality necessary for this series?

Ans: necessary, it is because the seasonality of the data.

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

fitHW <- myseries %>%
  model(
    'MAM' = ETS(Turnover ~ error('M') + trend('A') + season('M')),
    'MADM' = ETS(Turnover ~ error('M') + trend('Ad') + season('M'))
  )

fcHW <- fitHW %>% forecast(h = 10)

fcHW %>% autoplot(myseries, level = NULL)

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

Ans:MADM is better.

accfitHW <- accuracy(fitHW)
accfitHW

## # A tibble: 2 × 12
##   State    Industry    .model .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE
##   <chr>    <chr>       <chr>  <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Western… Newspaper … MAM    Trai… -0.0918  2.54  1.75 -0.926  6.83 0.421 0.449
## 2 Western… Newspaper … MADM   Trai…  0.0633  2.54  1.75 -0.322  6.80 0.420 0.449
## # … with 1 more variable: ACF1 <dbl>

Check that the residuals from the best method look like white noise. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

fitHW2 <- myseries %>%
  model(
    'MADM' = ETS(Turnover ~ error('M') + trend('Ad') + season('M'))
  )
fitHW2 %>% gg_tsresiduals()

8.9

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

myseriesstl <- myseries_train %>% 
  model(stl = STL(Turnover))

components(myseriesstl) %>% autoplot()

myseriesstl2 <- myseries_train %>% 
  model(sts = ETS(Turnover))

components(myseriesstl2) %>% autoplot()

## Warning: Removed 12 row(s) containing missing values (geom_path).

data624 assignment5

Jay Lee

3/5/2023