DATA 624 Homework5

Running Code

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, error = FALSE)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2

Registered S3 method overwritten by 'tsibble':
  method               from 
  as_tibble.grouped_df dplyr


Attaching package: 'tsibble'


The following object is masked from 'package:lubridate':

    interval


The following objects are masked from 'package:base':

    intersect, setdiff, union


Loading required package: fabletools

── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──

✔ fable 0.3.4     

── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
✖ lubridate::date()    masks base::date()
✖ dplyr::filter()      masks stats::filter()
✖ tsibble::intersect() masks base::intersect()
✖ tsibble::interval()  masks lubridate::interval()
✖ dplyr::lag()         masks stats::lag()
✖ tsibble::setdiff()   masks base::setdiff()
✖ tsibble::union()     masks base::union()


Attaching package: 'MASS'


The following object is masked from 'package:dplyr':

    select


Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 

Registered S3 methods overwritten by 'forecast':
  method                 from     
  autoplot.Arima         ggfortify
  autoplot.acf           ggfortify
  autoplot.ar            ggfortify
  autoplot.bats          ggfortify
  autoplot.decomposed.ts ggfortify
  autoplot.ets           ggfortify
  autoplot.forecast      ggfortify
  autoplot.stl           ggfortify
  autoplot.ts            ggfortify
  fitted.ar              ggfortify
  fortify.ts             ggfortify
  residuals.ar           ggfortify


Attaching package: 'naniar'


The following object is masked from 'package:tsibble':

    pedestrian



Attaching package: 'kableExtra'


The following object is masked from 'package:dplyr':

    group_rows



Attaching package: 'mice'


The following object is masked from 'package:stats':

    filter


The following objects are masked from 'package:base':

    cbind, rbind

Do exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Hyndman.

(8.1)Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
1. Use the ETS() function to estimate the equivalent model for simple exponential smoothing.
ANSWER: From the book, This SES method is suitable for forecasting data with no clear trend or seasonal pattern.Forecasts are calculated using weighted averages, where the weights decrease exponentially as observations come from further in the past — the smallest weights are associated with the oldest observations.

#Additive error, no trend , no seasonality
pig_sl <- aus_livestock %>%
  filter(State == "Victoria") %>%
  filter(Animal == "Pigs") %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N"))) 

pig_fore <- pig_sl %>%
  forecast(h = 4)

pig_fore %>%
  autoplot(aus_livestock) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(pig_sl)) +
  labs(y="Number of Pigs", title="Victorian Pigs Slaughtered") +
  guides(colour = "none")

report(pig_sl )

Series: Count 
Model: ETS(A,N,N) 
  Smoothing parameters:
    alpha = 0.3221247 

  Initial states:
     l[0]
 100646.6

  sigma^2:  87480760

     AIC     AICc      BIC 
13737.10 13737.14 13750.07

tidy(pig_sl)

# A tibble: 2 × 5
  Animal State    .model                                          term  estimate
  <fct>  <fct>    <chr>                                           <chr>    <dbl>
1 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + sea… alpha  3.22e-1
2 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + sea… l[0]   1.01e+5

accuracy(pig_sl)

# A tibble: 1 × 12
  Animal State    .model  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
  <fct>  <fct>    <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
1 Pigs   Victoria "ETS(C… Trai… -30.4 9336. 7190. -1.06  8.35 0.776 0.751 0.0103

Find the optimal values of αα and ℓ0ℓ0, and generate forecasts for the next four months.
- ANSWER: For any alpha between 0 and 1, the weights attached to the observations decrease exponentially as we go back in time, hence the name “exponential smoothing”. Our alpha for our SES model is 0.3221247. Our l0 is initial level at t =0 is 100646.6

#print forecast
pig_fore

# A fable: 4 x 6 [1M]
# Key:     Animal, State, .model [1]
  Animal State    .model                          Month             Count  .mean
  <fct>  <fct>    <chr>                           <mth>            <dist>  <dbl>
1 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95187, 8.7e+07) 95187.
2 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95187, 9.7e+07) 95187.
3 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95187, 1.1e+08) 95187.
4 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95187, 1.1e+08) 95187.

#find alpha
pig_sl %>% report()

Series: Count 
Model: ETS(A,N,N) 
  Smoothing parameters:
    alpha = 0.3221247 

  Initial states:
     l[0]
 100646.6

  sigma^2:  87480760

     AIC     AICc      BIC 
13737.10 13737.14 13750.07

#print accuracy metrics
accuracy(pig_sl)

# A tibble: 1 × 12
  Animal State    .model  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
  <fct>  <fct>    <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
1 Pigs   Victoria "ETS(C… Trai… -30.4 9336. 7190. -1.06  8.35 0.776 0.751 0.0103

Compute a 95% prediction interval for the first forecast using ^y±1.96sy±1.96s where ss is the standard deviation of the residuals. Compare your interval with the interval produced by R
ANSWER: The interval produced by calculating by the std residual and the mean is close to the interval calculated by R but a little narrorwer on the lower bound value. Calculated confidence interval: [76871.012478, 113502.102384]

#calculate through R
pig_fore %>% hilo(95) %>% pull('95%') %>% head(1)

<hilo[1]>
[1] [76854.79, 113518.3]95

# 95% interval from a standard normal distribution
interval <- hilo(pig_fore, 95)
interval

# A tsibble: 4 x 7 [1M]
# Key:       Animal, State, .model [1]
  Animal State   .model    Month             Count  .mean                  `95%`
  <fct>  <fct>   <chr>     <mth>            <dist>  <dbl>                 <hilo>
1 Pigs   Victor… "ETS(… 2019 Jan N(95187, 8.7e+07) 95187. [76854.79, 113518.3]95
2 Pigs   Victor… "ETS(… 2019 Feb N(95187, 9.7e+07) 95187. [75927.17, 114445.9]95
3 Pigs   Victor… "ETS(… 2019 Mar N(95187, 1.1e+08) 95187. [75042.22, 115330.9]95
4 Pigs   Victor… "ETS(… 2019 Apr N(95187, 1.1e+08) 95187. [74194.54, 116178.6]95

resid_std <- sd(augment(pig_sl)$.resid)

sprintf(
  "Calculated confidence interval: [%f, %f]", 
  pig_fore$.mean[1] - (resid_std * 1.96),
  pig_fore$.mean[1] + (resid_std * 1.96)
  )

[1] "Calculated confidence interval: [76871.012478, 113502.102384]"

(8.5)Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

Plot the Exports series and discuss the main features of the data.

ANSWER: The plot shows a declining trend on exports for singapore.

   Singapore <- global_economy %>% filter(Country == "Singapore") %>% fill_gaps()
head(Singapore,20)

# A tsibble: 20 x 9 [1Y]
# Key:       Country [1]
   Country   Code   Year         GDP Growth   CPI Imports Exports Population
   <fct>     <fct> <dbl>       <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
 1 Singapore SGP    1960  704462302.  NA     27.5    177.    163.    1646400
 2 Singapore SGP    1961  764308114.   8.14  27.6    157.    142.    1702400
 3 Singapore SGP    1962  825885274.   7.12  27.7    149.    138.    1750200
 4 Singapore SGP    1963  917222004.   9.98  28.4    156.    141.    1795000
 5 Singapore SGP    1964  893734483.  -3.68  28.8    133.    122.    1841600
 6 Singapore SGP    1965  974193127.   7.60  28.9    134.    123.    1886900
 7 Singapore SGP    1966 1095910101.  10.9   29.5    131.    123.    1934400
 8 Singapore SGP    1967 1237423233.  12.3   30.5    122.    114.    1977600
 9 Singapore SGP    1968 1425029400.  13.6   30.7    131.    126.    2012000
10 Singapore SGP    1969 1659055272.  13.7   30.6    142.    132.    2042500
11 Singapore SGP    1970 1919508689.  13.9   30.7    145.    126.    2074500
12 Singapore SGP    1971 2262544100.  12.1   31.3    140.    120.    2112900
13 Singapore SGP    1972 2719900351.  13.5   31.9    122.    107.    2152400
14 Singapore SGP    1973 3693760000   11.1   38.2    127.    118.    2193000
15 Singapore SGP    1974 5216773826.   6.47  46.7    165.    150.    2229800
16 Singapore SGP    1975 5633386680.   4.61  47.9    146.    137.    2262600
17 Singapore SGP    1976 6326445410.   7.44  47.0    156.    149.    2293300
18 Singapore SGP    1977 6617532783.   7.49  48.5    164.    163.    2325300
19 Singapore SGP    1978 7515823563.   8.70  50.9    169.    165.    2353600
20 Singapore SGP    1979 9294635004.   9.42  52.9    190.    185.    2383500

Singapore %>% autoplot(Exports) + 
  labs(y="% of GDP", title="Singapore Annual Exports")

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

ANSWER: We have plotted an simple exponential smooting model with “Additive Error”, “No Trend” and “No Seasonality” to this data.

chft <- global_economy %>%
filter(Country == "Singapore") %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N"))) 

chfc <- chft %>%
  forecast(h = 15)

chfc %>%
  autoplot( global_economy)  +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(chft)) +
  labs(y="Exp", title="Singapore ANN") +
  guides(colour = "none")

  #labs(y="% of GDP", title="Singapore Annual Exports", subtitle = "ETS(A,N,N)")

Compute the RMSE values for the training data.

ANSWER: The RMSE below for the fit of the model is 12.24821

accuracy(chft)

# A tibble: 1 × 11
  Country   .model       .type    ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
  <fct>     <chr>        <chr> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
1 Singapore "ETS(Export… Trai… 0.181  12.2  9.66 -0.171  5.87 0.983 0.991 0.0437

 accuracy(chft)$RMSE

[1] 12.24821

 report(chft)

Series: Exports 
Model: ETS(A,N,N) 
  Smoothing parameters:
    alpha = 0.9998995 

  Initial states:
     l[0]
 162.8702

  sigma^2:  155.3764

     AIC     AICc      BIC 
532.1297 532.5742 538.3111

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

ANSWER: The AAN is the Holt linear trend model , whereas the ANN is the simple exponential smoothing model (for data with no trend and no seasonality). The smoothing parameters alpha and beta and the intial l0 and b0 are estimted by minimizing the SSE for the one-step training errors for the Holt.

For instance below for the Holt method , the estimated smoothing coefficient for the level is apha = 0.9999. The very high value shows that the level changes rapidly in order to capture the highly trended series. The estimated smoothing coefficient for the slope is 0.0001000994. This is relatively low suggesting that the trend doesn’t change as often as mentioned in the book.

chft2 <- global_economy %>%
filter(Country == "Singapore") %>%
  model(
  holt = ETS(Exports ~ error("A") + trend("A") + season("N"))) 

chfc2 <- chft2 %>%
  forecast(h = 10)

chfc2 %>%
  autoplot( global_economy)  +
  geom_line(aes(y = .fitted), col="#D55E00",
        data = augment(chft2)) +
  labs(y="Exp", title="Singapore AAN")

   report(chft2)

Series: Exports 
Model: ETS(A,A,N) 
  Smoothing parameters:
    alpha = 0.9998993 
    beta  = 0.0001000994 

  Initial states:
     l[0]       b[0]
 150.8137 -0.2087273

  sigma^2:  164.0676

     AIC     AICc      BIC 
537.1772 538.3311 547.4795

Compare the forecasts from both methods. Which do you think is best?

ANSWER: Given that both the RMSE and MAE are lower (MINIMIZING to the mean) for the ETS or SES method with no trend, no seasonality, it seems tht this would be a better option in the case of this dataset.

 accuracy(chft2)

# A tibble: 1 × 11
  Country   .model .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
  <fct>     <chr>  <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
1 Singapore holt   Training 0.595  12.4  9.87 0.0839  5.99  1.00  1.00 0.0151

         accuracy(chft2)$RMSE

[1] 12.35931

         rbind(accuracy(chft), accuracy(chft2))

# A tibble: 2 × 11
  Country   .model      .type    ME  RMSE   MAE     MPE  MAPE  MASE RMSSE   ACF1
  <fct>     <chr>       <chr> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl>
1 Singapore "ETS(Expor… Trai… 0.181  12.2  9.66 -0.171   5.87 0.983 0.991 0.0437
2 Singapore "holt"      Trai… 0.595  12.4  9.87  0.0839  5.99 1.00  1.00  0.0151

Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

ANSWER:

The calculated CI interval is for ANN method : [149.131166, 197.558209].

CI calculated by R for ANN is [148.9137, 197.7757]95.

For this method our hand calculated is narrower on the upper bound.

Calculated confidence interval for AAN (HolT method): [148.731895, 197.546889].

CI calculated by R for AAN is [148.0344, 198.2444]95.

The one calculated by hand is wider on the lower bound and narrower on the upper bound.

sing_model <- global_economy %>% filter(Country == "Singapore") %>%
  model(
    SES = ETS(Exports ~ error("A") + trend("N") + season("N")),
    Holt = ETS(Exports ~ error("A") + trend("A") + season("N"))
  )

sing_model_forecasts <- sing_model %>%
  forecast(h = 15)

sing_model_forecasts %>%
  group_by(.model) %>%
  mutate(interval = hilo(Exports, 95)) %>%
  filter(row_number() == 1)

# A tsibble: 2 x 6 [1Y]
# Key:       Country, .model [2]
# Groups:    .model [2]
  Country   .model  Year     Exports .mean               interval
  <fct>     <chr>  <dbl>      <dist> <dbl>                 <hilo>
1 Singapore SES     2018 N(173, 155)  173. [148.9137, 197.7757]95
2 Singapore Holt    2018 N(173, 164)  173. [148.0344, 198.2444]95

# Forecast ANN

chft %>% report()

Series: Exports 
Model: ETS(A,N,N) 
  Smoothing parameters:
    alpha = 0.9998995 

  Initial states:
     l[0]
 162.8702

  sigma^2:  155.3764

     AIC     AICc      BIC 
532.1297 532.5742 538.3111

resid_std1 <- sd(augment(chft )$.resid)

sprintf(
  "Calculated confidence interval: [%f, %f]", 
  chfc$.mean[1] - (resid_std1 * 1.96),
  chfc$.mean[1] + (resid_std1 * 1.96)
  )

[1] "Calculated confidence interval: [149.131166, 197.558209]"

# 
# # 95% interval from a standard normal distribution
# interval1 <- hilo(chfc, 95)
# interval1
# 
# #calculate interval
# mean3 <- 173.3447
# s3 <- sqrt(155.3764)
# z3 <- 1.96
# margin <- z3 * s3
# lower3 <- mean - margin
# upper3 <- mean + margin
# 
# paste(lower3, upper3)

# Forecast AAN

chft2 %>% report()

Series: Exports 
Model: ETS(A,A,N) 
  Smoothing parameters:
    alpha = 0.9998993 
    beta  = 0.0001000994 

  Initial states:
     l[0]       b[0]
 150.8137 -0.2087273

  sigma^2:  164.0676

     AIC     AICc      BIC 
537.1772 538.3311 547.4795

resid_std2 <- sd(augment(chft2 )$.resid)

sprintf(
  "Calculated confidence interval: [%f, %f]", 
  chfc2$.mean[1] - (resid_std2 * 1.96),
  chfc2$.mean[1] + (resid_std2 * 1.96)
  )

[1] "Calculated confidence interval: [148.731895, 197.546889]"

# # 95% interval from a standard normal distribution
# interval2 <- hilo(chfc2, 95)
# interval2

# #calculate interval
# mean4 <- 173.1394 
# s4 <- sqrt(164.0676)
# z4 <- 1.96
# margin <- z4 * s4
# lower4 <- mean - margin
# upper4 <- mean + margin
# 
# paste(lower4, upper4)

(8.6)Forecast the Chinese GDP from the global_economy data set using an ETS model.

Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation.

Try to develop an intuition of what each is doing to the forecasts.

ANSWER:

In practice, phi value is rarely less than 0.8 as the damping has a very strong effect for smaller values. Values of phi close to 1 will mean that a damped model is not able to be distinguished from a non-damped model. For these reasons, we usually restrict phi to a minimum of 0.8 and a maximum of 0.98. We have prented out the chinese GDP using damping values and box cox methods. Holt’s method has the lowest RMSE and damped Box cox has the lowest MAE. So these 2 methods maybe the best fit for the data.

\[Hint: use a relatively large value of `h` when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.\]

Lambdachina <- global_economy %>%
  filter(Country == "China") %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

china_fit <- global_economy %>%
  filter(Country == "China") %>%
  model(`Standard` = ETS(GDP ~ error("A") + trend("N") + season("N")),
        `Holt's method` = ETS(GDP ~ error("A") + trend("A") + season("N")),
        `Damped Holt's method` = ETS(GDP ~ error("A") + trend("Ad", phi = 0.8) + season("N")),
        `Box-Cox` = ETS(box_cox(GDP,Lambdachina) ~ error("A") + trend("Ad") + season("N")),
        `Damped Box-Cox` = ETS(box_cox(GDP,Lambdachina) ~ error("A") + trend("Ad", phi = 0.8) + season("N"))) 

Ch_fore <- china_fit %>%
  forecast(h = 50)

Ch_fore %>%
  autoplot(global_economy, level = NULL) +
  labs(title="Chinese GDP") +
  guides(colour = guide_legend(title = "Forecast china GDP"))

global_economy %>%
  filter(Country == "China") %>%
  model(
    `SES` = ETS(GDP ~ error("A") + trend("N") + season("N")),
    `Damped Holt's method (phi = 0.8)` = ETS(GDP ~ error("A") + trend("Ad", phi = 0.8) + season("N")),
    `Damped Holt's method (phi = 0.85)` = ETS(GDP ~ error("A") + trend("Ad", phi = 0.85) + season("N")),
    `Damped Holt's method (phi = 0.9)` = ETS(GDP ~ error("A") + trend("Ad", phi = 0.9) + season("N")),
    `Damped Holt's method (phi = 0.98)` = ETS(GDP ~ error("A") + trend("Ad", phi = 0.98) + season("N")),
  ) %>%
  forecast(h = 50) %>%
  autoplot(global_economy, level = NULL) +
  labs(title = "China GDP ") +
  guides(colour = guide_legend(title = "Forecast"))

report(china_fit)

# A tibble: 5 × 10
  Country .model     sigma2 log_lik   AIC  AICc   BIC      MSE     AMSE      MAE
  <fct>   <chr>       <dbl>   <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>    <dbl>
1 China   Standard 1.79e+23 -1669.  3345. 3345. 3351. 1.73e+23 7.22e+23 2.13e+11
2 China   Holt's … 3.88e+22 -1624.  3258. 3259. 3268. 3.61e+22 1.31e+23 9.59e+10
3 China   Damped … 4.70e+22 -1629.  3268. 3269. 3278. 4.29e+22 1.42e+23 9.83e+10
4 China   Box-Cox  1.50e- 3    73.4 -135. -133. -123. 1.37e- 3 4.56e- 3 2.95e- 2
5 China   Damped … 1.52e- 3    73.1 -136. -135. -126. 1.38e- 3 5.16e- 3 2.96e- 2

accuracy(china_fit)

# A tibble: 5 × 11
  Country .model  .type      ME    RMSE     MAE   MPE  MAPE  MASE RMSSE     ACF1
  <fct>   <chr>   <chr>   <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
1 China   Standa… Trai… 2.10e11 4.16e11 2.13e11  8.14 11.0  0.983 0.991  0.789  
2 China   Holt's… Trai… 2.36e10 1.90e11 9.59e10  1.41  7.62 0.442 0.453  0.00905
3 China   Damped… Trai… 5.94e10 2.07e11 9.83e10  2.55  8.03 0.454 0.494 -0.0283 
4 China   Box-Cox Trai… 6.35e 9 1.96e11 1.02e11  1.77  7.26 0.472 0.468  0.135  
5 China   Damped… Trai… 4.21e10 1.94e11 9.43e10  2.61  7.23 0.435 0.463 -0.0964

(8.7)Find an ETS model for the Gas data from aus_production and forecast the next few years.

Why is multiplicative seasonality necessary here?

ANSWER:

Here you can see that seasonal variations are changing propotional to the level of the series. Additionally, when we look at the RMSE and MAE, the values on the fit of the multiplicative model has the lowest RMSE and MAE.

From the book

“The additive method is preferred when the seasonal variations are roughly constant through the series, while the multiplicative method is preferred when the seasonal variations are changing proportional to the level of the series.

With the multiplicative method, the seasonal component is expressed in relative terms (percentages), and the series is seasonally adjusted by dividing through by the seasonal component. Within each year, the seasonal component will sum up to approximately”
m .

Experiment with making the trend damped. Does it improve the forecasts?

ANSWER:

Yes, when we look at the RMSE and MAE values of the damped multiplicative values , the forecast is closer to the values of the multiplicative but slightly lower on the RMSE therefore it is closer to the mean and therefore improvement on the forecast.

fit_gasaus <- aus_production %>%
  model(`Additive` = ETS(Gas ~ error("A") + trend("A") + season("A")),
    `Multiplicative` = ETS(Gas ~ error("M") + trend("A") + season("M")),
    `Damped Multiplicative` = ETS(Gas ~ error("M") + trend("Ad", phi = 0.9) + season("M"))) 

aus_production %>%
  model(`Additive` = ETS(Gas ~ error("A") + trend("A") + season("A")),
    `Multiplicative` = ETS(Gas ~ error("M") + trend("A") + season("M")),
    `Damped Multiplicative` = ETS(Gas ~ error("M") + trend("Ad", phi = 0.9))) %>%
  forecast(h=30) %>%
  autoplot(aus_production, level = NULL) +
  labs(title="Australian Gas ") +
  guides(colour = guide_legend(title = "Forecast"),
     subtitle="Additive vs. Multiplicative Seasonality")

report(fit_gasaus)

# A tibble: 3 × 9
  .model                  sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
  <chr>                    <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
1 Additive              23.6       -927. 1872. 1873. 1903.  22.7  29.7 3.35  
2 Multiplicative         0.00324   -831. 1681. 1682. 1711.  21.1  32.2 0.0413
3 Damped Multiplicative  0.00340   -835. 1688. 1689. 1719.  21.0  32.4 0.0424

accuracy(fit_gasaus)

# A tibble: 3 × 10
  .model            .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE     ACF1
  <chr>             <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>    <dbl>
1 Additive          Trai…  0.00525  4.76  3.35 -4.69  10.9  0.600 0.628  0.0772 
2 Multiplicative    Trai… -0.115    4.60  3.02  0.199  4.08 0.542 0.606 -0.0131 
3 Damped Multiplic… Trai…  0.255    4.58  3.04  0.655  4.15 0.545 0.604 -0.00451

(8.8)Recall your retail time series data (from Exercise 7 in Section 2.10).
1. Why is multiplicative seasonality necessary for this series?
ANSWER:Here you can see that seasonal variations are changing propotional to the level of the series.Therefore we can adopt a multiplicative seasonality method for this series.

set.seed(334)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>% autoplot()

Plot variable not specified, automatically selected `.vars = Turnover`

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

fitRET <- myseries %>%
  model(`Multiplicative` = ETS(Turnover ~ error("M") + trend("A") + season("M")),
        `Damped Multiplicative` = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))) 

fc<- fitRET  %>%
  forecast(h=36) 

fc%>%
  autoplot(myseries, level = NULL) +
  labs(title="Australian Retail Turnover") +
  guides(colour = guide_legend(title = "Forecast"))

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

ANSWER:Here you can see that seasonal variations are changing propotional to the level of the series.When we compare the damping parameter for the multiplicative it has performed better with an RMSE and MAE value lower than the just the hold multiplicative method without the damping parameter. Therefore we can proceed the damping trend with multiplicative as the better option. We can also see the plot of the components of the fit.

accuracy(fitRET) %>%
  dplyr::select(.model, RMSE)

# A tibble: 2 × 2
  .model                 RMSE
  <chr>                 <dbl>
1 Multiplicative         4.05
2 Damped Multiplicative  4.01

accuracy(fitRET)

# A tibble: 2 × 12
  State  Industry .model .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
  <chr>  <chr>    <chr>  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 South… Hardwar… Multi… Trai… 0.0334  4.05  2.90 -0.231  6.10 0.473 0.471 0.282
2 South… Hardwar… Dampe… Trai… 0.211   4.01  2.86  0.353  5.95 0.465 0.467 0.190

 components(fitRET) %>%
  autoplot()

Check that the residuals from the best method look like white noise.

ANSWER: From the BOOK. For white noise series, we expect each autocorrelation to be close to zero. Of course, they will not be exactly equal to zero as there is some random variation. For a white noise series, we expect 95% of the spikes in the ACF to lie within ±1.96/√T where T is the length of the time series. We can see that the damped has ACF spikes within the blue boundaries whereas on the multiplicative there is 1 very large spike that is outside the blue lines indicating that the autocorrelation is not close to 0 and there can be outliers.

myseries %>%
  model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
  gg_tsresiduals() +
  ggtitle("Multiplicative Method")

myseries %>%
  model(Damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))) %>%
  gg_tsresiduals() +
  ggtitle("damped Multiplicative Method")

Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

ANSWER:

The RMSE of the SNAIVE on the test data is lower than the ETS - multiplicative model, however, looking at the residuals the multiplicative shows more normal around the histogram of the residuals. Because the RMSE is lower we can’t beat the SNAIVE model.

For example from the book, with monthly data, for the seasonal NAIVE, the forecast for all future February values is equal to the last observed February value. With quarterly data, the forecast of all future Q2 values is equal to the last observed Q2 value (where Q2 means the second quarter). Similar rules apply for other months and quarters, and for other seasonal periods.

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

myseries_test <- myseries %>%
  filter(year(Month) >= 2011)

autoplot(myseries, Turnover) +
  autolayer(myseries_test, Turnover, colour = "red")

#Fit ETS and SNAIEVE on train
fittrain <- myseries_train %>%
  model(multi = ETS(Turnover ~ error("M") + trend("A") + season("M")),
        snaive = SNAIVE(Turnover))

#forecasts on myseries_test
fore <- fittrain %>%
  forecast(new_data = anti_join(myseries, myseries_train))

Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`

fore %>% autoplot(myseries, level = NULL)

#values on train data
accuracy(fittrain) %>%
  dplyr::select(.type, .model, RMSE)

# A tibble: 2 × 3
  .type    .model  RMSE
  <chr>    <chr>  <dbl>
1 Training multi   3.72
2 Training snaive  8.10

#values on test data
fore %>% accuracy(myseries)  %>%
  dplyr::select(.type, .model, RMSE)

# A tibble: 2 × 3
  .type .model  RMSE
  <chr> <chr>  <dbl>
1 Test  multi   16.6
2 Test  snaive  13.8

myseries_test  %>%
  model(snaive = SNAIVE(Turnover)) %>%
  gg_tsresiduals() +
  ggtitle("SNAIVE")

myseries_test  %>%
  model(multi = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
  gg_tsresiduals() +
  ggtitle("ETS")

(8.9)For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data.

lambda_retail <- myseries %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

# STL Box Cox 
myseries %>%
  model(STL(box_cox(Turnover,lambda_retail) ~ season(window = "periodic"), robust = TRUE)) %>%
  components() %>%
  autoplot() +
  ggtitle("STL with Box-Cox Transformation")

How does that compare with your best previous forecasts on the test set?

ANSWER: The RMSE value is the lowest out of all the models we have applied so far. The RMSE value here is 2.078057. So, this model is better compared to other applied so far.

# Box-Cox STL
Decmp <- myseries %>%
  model(STL_box = STL(box_cox(Turnover,lambda_retail) ~ season(window = "periodic"), robust = TRUE)) %>%
  components()

# Seasonally Adjusted
myseries$Turnover_sa <- Decmp$season_adjust

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

fit <- myseries_train %>%
  model(ETS(Turnover_sa ~ error("M") + trend("A") + season("M")))

fit %>% gg_tsresiduals()  +
  ggtitle("Australian Retail Turnover Residual FOR ADJ")

# Forecast Test Data
fc <- fit %>%
  forecast(new_data = anti_join(myseries, myseries_train))

Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover,
Turnover_sa)`

fc %>% accuracy(myseries)  %>%
  dplyr::select(.type, .model, RMSE)

# A tibble: 1 × 3
  .type .model                                                            RMSE
  <chr> <chr>                                                            <dbl>
1 Test  "ETS(Turnover_sa ~ error(\"M\") + trend(\"A\") + season(\"M\"))"  2.08

report(fit)

Series: Turnover_sa 
Model: ETS(M,A,M) 
  Smoothing parameters:
    alpha = 0.5172058 
    beta  = 0.0001015057 
    gamma = 0.2369751 

  Initial states:
     l[0]     b[0]     s[0]    s[-1]     s[-2]    s[-3]     s[-4]   s[-5] s[-6]
 4.272319 0.027774 1.017124 1.056054 0.9843349 1.085979 0.9695858 0.86624 1.025
    s[-7]    s[-8]    s[-9]  s[-10]    s[-11]
 1.035566 1.030692 0.983274 1.01681 0.9293412

  sigma^2:  0.0025

     AIC     AICc      BIC 
1472.475 1474.347 1537.815

components(fit) |>
  autoplot() +
  labs(title = "ETS(M,A,M) components")