DATA 624 HW5

Do exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Hyndman

load libraries

library(mlbench)
library(ggplot2)
library(reshape2)
library(Amelia)
library(inspectdf)
library(naniar)
library(fpp3)

8.1

Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0 , and generate forecasts for the next four months.

Compute a 95% prediction interval for the first forecast using y ± 1.96 s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

pigs_slaughtered = aus_livestock %>% filter(Animal=='Pigs',State=='Victoria')

pigs_fit = pigs_slaughtered %>% model(ETS(Count~error("A") + trend("N") + season("N")))

pig_fc = pigs_fit %>% forecast(h=4)

report(pigs_fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

autoplot(pig_fc)

Computed 95% prediction interval

y_hat <- pig_fc$.mean[1]


aug_fit <- augment(pigs_fit)
s <- sd(aug_fit$.resid)

# Calculate the 95% prediction intervals
upper_95 <- y_hat + (s * 1.96)
lower_95 <- y_hat - (s * 1.96)

paste0("upper bound (95% C.I): ",upper_95,';'," lower bound (95% C.I)): ",lower_95)

## [1] "upper bound (95% C.I): 113502.102384467; lower bound (95% C.I)): 76871.0124775157"

8.5

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

Plot the Exports series and discuss the main features of the data.

gejapan = global_economy
gejapan %>% select(Exports) %>% filter(Country == 'Japan') %>% autoplot(.vars = Exports)

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

japanex = gejapan %>% select(Exports) %>% filter(Country == 'Japan')

japan_fit = japanex %>% model(ETS(Exports~error("A") + trend("N") + season ("N")))

japan_fc = japan_fit %>% forecast(h=5)

japan_fc %>% autoplot(japanex) + geom_line(aes(y = .fitted), data = augment(japan_fit))

Compute the RMSE values for the training data.

japan_fit%>% accuracy()

## # A tibble: 1 × 11
##   Country .model           .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <fct>   <chr>            <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Japan   "ETS(Exports ~ … Trai…   NaN   NaN   NaN   NaN   NaN   NaN   NaN    NA

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

fit_compare <- japanex %>%
  model(
    ANN = ETS (Exports ~ error("A") + trend("N") + season("N")),
    AAN = ETS (Exports ~ error("A") + trend("A") + season("N"))
    )
accuracy(fit_compare)

## # A tibble: 2 × 11
##   Country .model .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <fct>   <chr>  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Japan   ANN    Training   NaN   NaN   NaN   NaN   NaN   NaN   NaN    NA
## 2 Japan   AAN    Training   NaN   NaN   NaN   NaN   NaN   NaN   NaN    NA

8.6

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

china_export <- global_economy %>%
  filter(Country == "China")
china_export %>% autoplot(GDP) +
  labs(title="china_export")

The Data shows a strong upward trend.

china_fit = china_export %>% model(ETS(Exports~error("A") + trend("A") + season("N")))

china_fc = china_fit %>% forecast(h=12)


china_fc %>% autoplot(china_export) +
  geom_line(aes(y = .fitted), col="blue",
            data = augment(china_fit)) +
  labs(y="Exports", title="China: Exports")

report(china_fit)

## Series: Exports 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
##     beta  = 0.0992376 
## 
##   Initial states:
##    l[0]       b[0]
##  4.0399 0.09995689
## 
##   sigma^2:  3.9037
## 
##      AIC     AICc      BIC 
## 320.3530 321.5069 330.6552

Model: ETS(A,A,N)

8.7

Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

aus_gas_production  = aus_production  %>% select(Gas)
aus_gas_production %>%
 model(
    simple_additive = ETS(Gas ~ error('A')+trend("A")+season("N")),
    simple_mult = ETS(Gas ~ error("M")+trend("A")+season("N")),
    seasonality_add = ETS(Gas ~ error('A')+trend("A")+season("A")),
    seasonality_mult = ETS(Gas ~ error("M")+trend("A")+season("M")),
    mult_season_damp = ETS(Gas ~ error("M")+trend("Ad")+season("M"))
  ) %>% forecast(h=25) %>%autoplot(aus_gas_production,level=NULL) + 
  labs(y="Amount", title="Gas: Aus Production ")

Multiplicative models would be the most appropriate solution

8.8

set.seed(6543)

myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries

## # A tsibble: 441 x 5 [1M]
## # Key:       State, Industry [1]
##    State           Industry                             Serie…¹    Month Turno…²
##    <chr>           <chr>                                <chr>      <mth>   <dbl>
##  1 South Australia Hardware, building and garden suppl… A33495… 1982 Apr     8.6
##  2 South Australia Hardware, building and garden suppl… A33495… 1982 May     9.5
##  3 South Australia Hardware, building and garden suppl… A33495… 1982 Jun     8.4
##  4 South Australia Hardware, building and garden suppl… A33495… 1982 Jul    10.3
##  5 South Australia Hardware, building and garden suppl… A33495… 1982 Aug    10.6
##  6 South Australia Hardware, building and garden suppl… A33495… 1982 Sep    11.5
##  7 South Australia Hardware, building and garden suppl… A33495… 1982 Oct    10.8
##  8 South Australia Hardware, building and garden suppl… A33495… 1982 Nov    13.1
##  9 South Australia Hardware, building and garden suppl… A33495… 1982 Dec    25.4
## 10 South Australia Hardware, building and garden suppl… A33495… 1983 Jan     9.2
## # … with 431 more rows, and abbreviated variable names ¹`Series ID`, ²Turnover

autoplot(myseries)

a. Why is multiplicative seasonality necessary for this series?

The seasonality of the data with peaks observed after regular interval.

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

fit = myseries %>% model(
  Multi_Season = ETS(Turnover ~ error("M") + trend("A") + season("M")),
  Multi_Season_Damp = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)

fc = fit %>% forecast(h = 40)

fc %>%
  autoplot(myseries, level=NULL) +
  labs(y="AUD", title="Retail")

C. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

accuracy(fit)

## # A tibble: 2 × 12
##   State   Indus…¹ .model .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>   <chr>   <chr>  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South … Hardwa… Multi… Trai… 0.0334  4.05  2.90 -0.231  6.10 0.473 0.471 0.282
## 2 South … Hardwa… Multi… Trai… 0.211   4.01  2.86  0.353  5.95 0.465 0.467 0.190
## # … with abbreviated variable name ¹Industry

Multi_Season_Damp

Check that the residuals from the best method look like white noise.

myseries %>% model( Multi_Season_Damp = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)%>% gg_tsresiduals()

DATA 624 HW5

Anjal Hussan

March 2023

load libraries

8.1

8.5

8.6

8.7

8.8