Homework # 5

8.1 Consider the number of pigs slaughtered in Victoria

library(fpp3)

## Warning: package 'fpp3' was built under R version 4.3.2

## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.2     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.2     ✔ fable       0.3.3
## ✔ ggplot2     3.4.2     ✔ fabletools  0.3.4

## Warning: package 'tsibble' was built under R version 4.3.2

## Warning: package 'tsibbledata' was built under R version 4.3.2

## Warning: package 'feasts' was built under R version 4.3.2

## Warning: package 'fabletools' was built under R version 4.3.2

## Warning: package 'fable' was built under R version 4.3.2

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

pigs <- aus_livestock %>%
  filter(Animal == "Pigs",State == "Victoria")

pigs

## # A tsibble: 558 x 4 [1M]
## # Key:       Animal, State [1]
##       Month Animal State     Count
##       <mth> <fct>  <fct>     <dbl>
##  1 1972 Jul Pigs   Victoria  94200
##  2 1972 Aug Pigs   Victoria 105700
##  3 1972 Sep Pigs   Victoria  96500
##  4 1972 Oct Pigs   Victoria 117100
##  5 1972 Nov Pigs   Victoria 104600
##  6 1972 Dec Pigs   Victoria 100500
##  7 1973 Jan Pigs   Victoria  94700
##  8 1973 Feb Pigs   Victoria  93900
##  9 1973 Mar Pigs   Victoria  93200
## 10 1973 Apr Pigs   Victoria  78000
## # ℹ 548 more rows

fit <- pigs %>%
  model(ANN = ETS(Count ~ error("A") + trend("N") + season("N")))

report(fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

The alpha parameter is 0.32 and the inital state is 1006.46

## Look at the components of the fit.. 
components(fit) %>%
  autoplot()

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Now we forecast the values onto 4 months 
fc <-
  fit %>%
  forecast(h = "4 months")

fc

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model    Month             Count  .mean
##   <fct>  <fct>    <chr>     <mth>            <dist>  <dbl>
## 1 Pigs   Victoria ANN    2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria ANN    2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria ANN    2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria ANN    2019 Apr N(95187, 1.1e+08) 95187.

## Taken from the textbook... 
fc |>
  autoplot(pigs) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit)) +
  labs(y="Count", title="Pigs Slaughter in Victoria") +
  guides(colour = "none")

B. Compute a 95% prediction Interval

fc

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model    Month             Count  .mean
##   <fct>  <fct>    <chr>     <mth>            <dist>  <dbl>
## 1 Pigs   Victoria ANN    2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria ANN    2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria ANN    2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria ANN    2019 Apr N(95187, 1.1e+08) 95187.

## Use the hilo function to extract a specified prediction interval 
hilo(fc$Count,95)

## <hilo[4]>
## [1] [76854.79, 113518.3]95 [75927.17, 114445.9]95 [75042.22, 115330.9]95
## [4] [74194.54, 116178.6]95

## Formula for predictionn interval is the equation y +- 1.96s

## We have to use residual function to extract the models residuals
s <- sd(residuals(fit)$.resid)

uper <- 95186.56 + 1.96*s
lwr <- 95186.56 - 1.96*s

lwr

## [1] 76871.02

uper

## [1] 113502.1

There doesn’t appear to be too much of a change between the hilo distribution and manually calculating the prediction interval in R. This is for the first observation

8.5 Dataset global_economy

A. Plot the exports series

Bd <- global_economy %>%
  filter(Country == "Bangladesh")

Bd %>%
  autoplot(Exports) +
  labs(title = "Bangladesh Exports")

The data seems to an decreasing trend followed by an increasing trend past 1985..? It seems there is no seasonal data and follows a cyclic frequency.

B. Use an ETS(A,N,N) model

## Create an ETS model with additive decmp 
Bdfit <- Bd %>%
  model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N")))

report(Bdfit)

## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##      l[0]
##  9.997555
## 
##   sigma^2:  1.6265
## 
##      AIC     AICc      BIC 
## 267.6831 268.1275 273.8644

The alpha parameter is really close to 1 which tells me recent observations are having a heavy influence on the data, or the prediction is lagged by 1.

Bdfc <- Bdfit %>%
  forecast(h = "15 years")

Bdfc %>%
  autoplot(Bd) + labs(title = "Bd Exports Forecasts ahead for 15 years")

C. Compute RMSE

## USe the accuracy function from the textbook video
accuracy(Bdfit)$RMSE

## [1] 1.253158

The RMSE is 1.25

D. Compute ETS(A,A,N)

Bdfit2 <- Bd %>%
  model(ANN = ETS(Exports ~ error("A") + trend("A") + season("N")))

report(Bdfit2)

## Series: Exports 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998998 
##     beta  = 0.0001001086 
## 
##   Initial states:
##     l[0]       b[0]
##  10.1597 0.07875098
## 
##   sigma^2:  1.6798
## 
##      AIC     AICc      BIC 
## 271.4452 272.5990 281.7474

The alpha value is very high which tells us that the level changes rapidly in order to capture the highly trended series. The beta parameter is 0.0001 which is really small which may tells us that the trend doesn’t change often..

## Make the same forecast but with Additive Trend 
Bdfc2 <- Bdfit2 %>%
  forecast(h = "15 years")

## Plot the forecasts
Bdfc2 %>%
  autoplot(Bd) + labs(title = "Bd Exports Forecasts ahead for 15 years")

The simple exponential smoothing is useful since it utilizes recent observations to help weigh the forecasts value. While double exponential smoothing allows us to produce forecasts with a trend.

E. Compare The forecasts from both methods Which is best?

Compute RMSE for both values and then compare the RMSE for both methods and choose the lowest one.

## FOr the first fit with simple exponential smoothing
accuracy(Bdfit)$RMSE

## [1] 1.253158

accuracy(Bdfit2)$RMSE

## [1] 1.250591

It appears for Bangladesh Exports the RMSE values for both models are smiliar and thus it is difficult to see which methods are better in this case.

F. Plot 95% prediction Interval

head(Bdfc)

## # A fable: 6 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country    .model  Year    Exports .mean
##   <fct>      <chr>  <dbl>     <dist> <dbl>
## 1 Bangladesh ANN     2018 N(15, 1.6)  15.0
## 2 Bangladesh ANN     2019 N(15, 3.3)  15.0
## 3 Bangladesh ANN     2020 N(15, 4.9)  15.0
## 4 Bangladesh ANN     2021 N(15, 6.5)  15.0
## 5 Bangladesh ANN     2022 N(15, 8.1)  15.0
## 6 Bangladesh ANN     2023 N(15, 9.8)  15.0

hilo(Bdfc$Exports,95)

## <hilo[15]>
##  [1] [12.536649, 17.53589]95 [11.501449, 18.57109]95 [10.707088, 19.36545]95
##  [4] [10.037404, 20.03513]95 [ 9.447396, 20.62514]95 [ 8.913985, 21.15855]95
##  [7] [ 8.423463, 21.64908]95 [ 7.966894, 22.10564]95 [ 7.538075, 22.53446]95
## [10] [ 7.132487, 22.94005]95 [ 6.746720, 23.32582]95 [ 6.378124, 23.69441]95
## [13] [ 6.024592, 24.04795]95 [ 5.684415, 24.38812]95 [ 5.356185, 24.71635]95

## Use R calculation again for the first calculation
ss <- sd(residuals(Bdfit)$.resid)
upppr <- 15.03627 + 1.96 * ss
lower <- 15.03627 - 1.96 * ss

print(lower)

## [1] 12.56459

print(upppr)

## [1] 17.50795

head(Bdfc2)

## # A fable: 6 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country    .model  Year    Exports .mean
##   <fct>      <chr>  <dbl>     <dist> <dbl>
## 1 Bangladesh ANN     2018 N(15, 1.7)  15.1
## 2 Bangladesh ANN     2019 N(15, 3.4)  15.2
## 3 Bangladesh ANN     2020   N(15, 5)  15.3
## 4 Bangladesh ANN     2021 N(15, 6.7)  15.4
## 5 Bangladesh ANN     2022 N(15, 8.4)  15.4
## 6 Bangladesh ANN     2023  N(16, 10)  15.5

### Use the hilo function again (check 8,7) for the trend forecast
hilo(Bdfc2$Exports,95)

## <hilo[15]>
##  [1] [12.574787, 17.65533]95 [11.601355, 18.78633]95 [10.872598, 19.67266]95
##  [4] [10.270483, 20.43234]95 [ 9.749288, 21.11110]95 [ 9.285566, 21.73239]95
##  [7] [ 8.865388, 22.31013]95 [ 8.479678, 22.85341]95 [ 8.122134, 23.36852]95
## [10] [ 7.788166, 23.86005]95 [ 7.474310, 24.33148]95 [ 7.177875, 24.78548]95
## [13] [ 6.896720, 25.22420]95 [ 6.629110, 25.64938]95 [ 6.373616, 26.06244]95

## Calculate it again 
sss <- sd(residuals(Bdfit2)$.resid)
upppr <- 15.11506 + 1.96 * ss
lower <- 15.11506 - 1.96 * ss

print(lower)

## [1] 12.64338

print(upppr)

## [1] 17.58674

Once again, the prediction interval for both method is not too different using the hilo and manually calculating except the values are a few decimal values below or above.

8.6 Forecast The Chinese GDP

Visualizations

Let us first visualize the data so we can understand how the time-series data appears, it appears the log transformation of the GDP seems the best..

Chinese <- global_economy %>%
  filter(Country == "China")

## Let's take a look at the plot 
Chinese %>%
  autoplot(GDP) + labs(title = "China GDP")

Okay the time-series data for China appears to be an steady exponential growth after the 1990s, we can perhaps transform the data into a square-root transformation,(apply the guerro function to let us choose lambda function). Also since the data is going up in a positive trend we can apply double exponential smoothing, this is the thought-process I have so far.

## Let's see if a sq-rt transformation works, the transformation seems to have helped
Chinese %>%
  autoplot(sqrt(GDP)) + labs(title = "China GDP (SqRt Transformed)")

## Wow the log does an even better job!!
Chinese %>%
  autoplot(log(GDP)) + labs(title = "China GDP (Log Transformed)")

Model#1 Using an additive trend for the error and the trend (Untransformed)

## Let's create a forecasts for the ETS model with a logged GDP, since the data isn't seasonal we can just leave it null
Chinese

## # A tsibble: 58 x 9 [1Y]
## # Key:       Country [1]
##    Country Code   Year          GDP Growth   CPI Imports Exports Population
##    <fct>   <fct> <dbl>        <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
##  1 China   CHN    1960 59716467625.  NA       NA    4.43    4.31  667070000
##  2 China   CHN    1961 50056868958. -27.3     NA    3.49    3.87  660330000
##  3 China   CHN    1962 47209359006.  -5.58    NA    2.91    4.05  665770000
##  4 China   CHN    1963 50706799903.  10.3     NA    2.86    4.01  682335000
##  5 China   CHN    1964 59708343489.  18.2     NA    2.86    3.77  698355000
##  6 China   CHN    1965 70436266147.  17.0     NA    3.19    3.64  715185000
##  7 China   CHN    1966 76720285970.  10.7     NA    3.24    3.49  735400000
##  8 China   CHN    1967 72881631327.  -5.77    NA    2.98    3.28  754550000
##  9 China   CHN    1968 70846535056.  -4.10    NA    2.92    3.30  774510000
## 10 China   CHN    1969 79705906247.  16.9     NA    2.41    3.05  796025000
## # ℹ 48 more rows

## Create an ETS model with additive trend with the errors and the trend
Chinfit <- Chinese %>%
  model(ANN = ETS(GDP ~ error("A") + trend("A") + season("N")))

report(Chinfit)

## Series: GDP 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998964 
##     beta  = 0.5518569 
## 
##   Initial states:
##         l[0]       b[0]
##  50284778074 3288256684
## 
##   sigma^2:  3.87701e+22
## 
##      AIC     AICc      BIC 
## 3258.053 3259.207 3268.356

The alpha is close to 1, tells us that recent observations were weighted more and the beta tells us that the trend is somewhat changing.

## ## Create the forecast and visualize it for the next 15 years..
Chinfit <- Chinfit %>%
  forecast(h = "15 years")

Chinfit %>%
  autoplot(Chinese) + labs(title = "Chinese GDP with Additive Trend")

Model #2 Add Dampening phi = 0.83

## Ad is the dampend variant of the addditive trend
Chinfit2 <- Chinese %>%
  model(ANN = ETS(GDP ~ error("A") + trend("Ad",phi = 0.83) + season("N")))

report(Chinfit2)

## Series: GDP 
## Model: ETS(A,Ad,N) 
##   Smoothing parameters:
##     alpha = 0.8406579 
##     beta  = 0.8406564 
##     phi   = 0.83 
## 
##   Initial states:
##         l[0]       b[0]
##  45713434614 7859600144
## 
##   sigma^2:  4.524326e+22
## 
##      AIC     AICc      BIC 
## 3265.925 3267.079 3276.227

It seems both the alpha and beta parameter are both 0.84

## Make sure to forecast before plotting it, the prediction interval is shaped like a funnel
Chinfit2 %>%
  forecast(h = "15 years") %>%
  autoplot(Chinese) + labs(title = "Chinese GDP with Additive Dampened Trend")

Model #3 Use transformed(GDP) with additive trend

Chinlogfit <- Chinese %>%
  model(ANN = ETS(log(GDP) ~ error("A") + trend("A") + season("N")))

report(Chinlogfit)

## Series: GDP 
## Model: ETS(A,A,N) 
## Transformation: log(GDP) 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.1079782 
## 
##   Initial states:
##      l[0]       b[0]
##  24.77007 0.04320226
## 
##   sigma^2:  0.0088
## 
##       AIC      AICc       BIC 
## -33.07985 -31.92600 -22.77763

The alpha parameter is really high close to 1 telling us that recent observation are influencing the forecast and the beta is really low suggesting us that the trend doesn’t flucutate that much.

Chinlogfit %>%
  forecast(h = "15 years") %>%
  autoplot(Chinese) + labs(title = "Transformed GDP with Additive Trend")

Model #4 Use transformed GDP with Additive Dampend

Chinlogfit2 <- Chinese %>%
  model(ANN = ETS(log(GDP) ~ error("A") + trend("Ad",phi = 0.83) + season("N")))

report(Chinlogfit2)

## Series: GDP 
## Model: ETS(A,Ad,N) 
## Transformation: log(GDP) 
##   Smoothing parameters:
##     alpha = 0.965097 
##     beta  = 0.5928893 
##     phi   = 0.83 
## 
##   Initial states:
##      l[0]       b[0]
##  24.96615 -0.1897312
## 
##   sigma^2:  0.0093
## 
##       AIC      AICc       BIC 
## -31.32767 -30.17383 -21.02546

Dampened the beta parameter reduces it to 0.59

Chinlogfit2 %>%
  forecast(h = "15 years") %>%
  autoplot(Chinese) + labs(title = "Transformed GDP with Additive Trend")

8.7 Find An ETS Model for Gas Data

ausGas <- aus_production

ausGas

## # A tsibble: 218 x 7 [1Q]
##    Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##      <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
##  1 1956 Q1   284    5225    189    465        3923     5
##  2 1956 Q2   213    5178    204    532        4436     6
##  3 1956 Q3   227    5297    208    561        4806     7
##  4 1956 Q4   308    5681    197    570        4418     6
##  5 1957 Q1   262    5577    187    529        4339     5
##  6 1957 Q2   228    5651    214    604        4811     7
##  7 1957 Q3   236    5317    227    603        5259     7
##  8 1957 Q4   320    6152    222    582        4735     6
##  9 1958 Q1   272    5758    199    554        4608     5
## 10 1958 Q2   233    5641    229    620        5196     7
## # ℹ 208 more rows

ausGas %>%
  autoplot(Gas) + labs(title = "Gas Production in Australia")

The data shows increasing trend and seasonality

Model #1 Additive and Multiplicative Seasonality.

ausmod <- ausGas %>%
  model(additive = ETS(Gas ~ error("A") + trend("A") + season("A")),
        multplicative = ETS(Gas ~ error("M") + trend("A") + season("M"))) 


report(ausmod)

## Warning in report.mdl_df(ausmod): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 2 × 9
##   .model          sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
##   <chr>            <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 additive      23.6       -927. 1872. 1873. 1903.  22.7  29.7 3.35  
## 2 multplicative  0.00324   -831. 1681. 1682. 1711.  21.1  32.2 0.0413

accuracy(ausmod)

## # A tibble: 2 × 10
##   .model        .type          ME  RMSE   MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>         <chr>       <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 additive      Training  0.00525  4.76  3.35 -4.69  10.9  0.600 0.628  0.0772
## 2 multplicative Training -0.115    4.60  3.02  0.199  4.08 0.542 0.606 -0.0131

## forecast 20 quarters
ausmod %>%
  forecast(h = 20) %>%
  autoplot(aus_production,level = NULL) + 
  labs(title = "Gas Production in Australia") +
  guides(color = guide_legend(title = "Forecast"),
         subtitle = "Additive vs Multiplicative Seasonality")

Model # 2 Damped Additive and Multiplicative

ausmod2 <- ausGas %>%
  model(additive_dampened = ETS(Gas ~ error("A") + trend("Ad",phi = 0.83) + season("A")),
        multplicative_dampened = ETS(Gas ~ error("M") + trend("Ad",phi = 0.83) + season("M"))) 


report(ausmod2)

## Warning in report.mdl_df(ausmod2): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 2 × 9
##   .model                   sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
##   <chr>                     <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 additive_dampened      21.3       -916. 1850. 1851. 1880.  20.5  29.8 3.03  
## 2 multplicative_dampened  0.00350   -838. 1693. 1694. 1724.  20.1  31.5 0.0422

accuracy(ausmod2)

## # A tibble: 2 × 10
##   .model                .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>                 <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 additive_dampened     Trai… 0.915  4.52  3.03 1.40   4.30 0.543 0.596  0.0336 
## 2 multplicative_dampen… Trai… 0.429  4.48  2.97 0.830  4.11 0.532 0.591 -0.00216

## Plot the model..

ausmod2 %>%
  forecast(h = 20) %>%
  autoplot(aus_production,level = NULL) + 
  labs(title = "Gas Production in Australia (Damped..)") +
  guides(color = guide_legend(title = "Forecast"),
         subtitle = "Additve Damp vs Multiplicative Damp")

The multiplicative method is preferred because the seasonal variations are changing proportional to the level of the series and hence this method is better. Looking at the model fits, the multiplicative seasonality performed a bit better since it has lower RMSE and AIC values, though by not much. The dampened measurements seems to not have improved the forecasts as well.

8.8 Recall Your Time Series Data

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

Glancing at the Data

myseries %>%
  autoplot() + labs(title = "Australian Retail Data")

## Plot variable not specified, automatically selected `.vars = Turnover`

The plot shows a somewhat increasing trend and seasonality

A.

Multiplicative Seasonality is necessary since the seasonality of the data is changing proportional to the level of the series

B.

Multplicative Seasonality and Damped

mymod <- myseries %>%
  model(multplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")),
        multplicative_damp = ETS(Turnover ~ error("M") + trend("Ad",phi = 0.83) + season("M")))

report(mymod)

## Warning in report.mdl_df(mymod): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 2 × 11
##   State     Industry .model  sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
##   <chr>     <chr>    <chr>    <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Northern… Clothin… multp… 0.00447   -870. 1775. 1776. 1841. 0.376 0.473 0.0511
## 2 Northern… Clothin… multp… 0.00479   -880. 1794. 1796. 1861. 0.382 0.497 0.0518

accuracy(mymod)

## # A tibble: 2 × 12
##   State       Industry .model .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE
##   <chr>       <chr>    <chr>  <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Northern T… Clothin… multp… Trai… -0.0128 0.613 0.450 -0.469  5.15 0.513 0.529
## 2 Northern T… Clothin… multp… Trai…  0.0465 0.618 0.451  0.194  5.16 0.515 0.533
## # ℹ 1 more variable: ACF1 <dbl>

mymod %>%
  forecast(h = 20) %>%
  autoplot(myseries,level = NULL) + labs(title = "Australia Retail Data") +
  guides(color = guide_legend(title = "Forecast"),
         subtitle = "Additve vs Multiplicative")

C. Compare The RMSE

Looking at the RMSE for both models, they both have smilar RMSE but since the multiplicative seasonality without dampening performed slightly better. I may prefer to use that method..

D. Check The Residuals look like white-noise

We can use gg_tsresiduals and both of the pormanteau test to make our conclusion

## Recreate the multiplicative model..
Mymod <- myseries %>%
  model(multplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")))

Mymod %>%
  gg_tsresiduals()

The residuals display a high variance with correlations in the ACF plot, the residuals are somewhat normally distributed but has a skew towards the left. The model may not be a good fit for the data.

Pormanteau Tests

We can perform a ljung-box to figure whether the residuals look like white-noise

## Recall the argument is the model_fitting, lag = 10 for non_seasonal data and lag = 2*m for seasonal
## Since seasonal is 12 we multiply it  by 12.
augment(Mymod) %>%
  features(.innov,ljung_box,lag = 12*2)

## # A tibble: 1 × 5
##   State              Industry                           .model lb_stat lb_pvalue
##   <chr>              <chr>                              <chr>    <dbl>     <dbl>
## 1 Northern Territory Clothing, footwear and personal a… multp…    34.5    0.0765

From the ljung_box test since the p-value is larger than 0.05 we can say that the residuals are not different from white-noise.

E. Find test-set RMSE

## Set the training data to the end of 2010
Trainseries <- myseries %>%
  filter_index("1988 Aug"~ "2010 Dec")

## Fit the models.. 
Series_fit <- Trainseries %>%
  model(Seasonal_Naive = SNAIVE(Turnover),
        multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")))

## Make Forecasts.. and plot it for 32 quarters which is 8 years..?

Series_fit %>%
  forecast(h = 96) %>%
  autoplot(Trainseries,level = NULL) +
  autolayer(
    filter_index(myseries,"2011 Jan" ~ .),
    color = "black") + 
  guides(color = guide_legend(title = "Forecast")
  ) + labs(title = "Forecasts with Retail Data")

## Plot variable not specified, automatically selected `.vars = Turnover`

## Find the RMSE for training set
accuracy(Series_fit)

## # A tibble: 2 × 12
##   State       Industry .model .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE
##   <chr>       <chr>    <chr>  <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Northern T… Clothin… Seaso… Trai…  0.440  1.22  0.923  5.15  12.4  1     1    
## 2 Northern T… Clothin… multi… Trai… -0.0271 0.520 0.386 -0.710  5.15 0.418 0.426
## # ℹ 1 more variable: ACF1 <dbl>

It appears the the RMSE of the training set for multiplicative seasonality has a lower RMSE than the Seasonal_naive approach.

## Make a test_set start from 2011 Jan
Testseries <- myseries %>%
  filter_index("2011 Jan"~.)

## We can forecast the testing data..

forcastt <- Series_fit %>%
  forecast(new_data = Testseries)

## Review it for the test_set
forcastt %>%
  accuracy(myseries)

## # A tibble: 2 × 12
##   .model  State Industry .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>   <chr> <chr>    <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Season… Nort… Clothin… Test   0.836  1.55  1.24   5.94  9.06  1.36  1.28 0.601
## 2 multip… Nort… Clothin… Test  -1.69   1.95  1.75 -13.4  13.7   1.91  1.61 0.568

However, the seasonal naive has a lower RMSE on the test set as compared to the seasonal multiplicative. (Did I mess up..?)

8.9 For the same retail data

## Define the series again just in case..
set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

## Select a lambda value.
lambda <- myseries %>%
  features(Turnover,features = guerrero) %>%
  pull(lambda_guerrero)

## Try an STL decomposition with box_cox transformation. leave it with the default setting
myseries %>%
  model(STL(box_cox(Turnover,lambda),robust = TRUE)) %>%
          components() %>%
          autoplot() + labs(title = "STL Decomp With Box-Cox Transformation")

## We have to save the components..
Dcmp <- myseries %>%
  model(STL(box_cox(Turnover,lambda),robust = TRUE)) %>%
          components()

Dcmp

## # A dable: 369 x 9 [1M]
## # Key:     State, Industry, .model [1]
## # :        box_cox(Turnover, lambda) = trend + season_year + remainder
##    State       Industry .model    Month box_cox(Turnover, la…¹ trend season_year
##    <chr>       <chr>    <chr>     <mth>                  <dbl> <dbl>       <dbl>
##  1 Northern T… Clothin… STL(b… 1988 Apr                  0.862 0.991   -0.111   
##  2 Northern T… Clothin… STL(b… 1988 May                  1.11  1.01     0.0515  
##  3 Northern T… Clothin… STL(b… 1988 Jun                  0.994 1.03     0.0438  
##  4 Northern T… Clothin… STL(b… 1988 Jul                  1.07  1.05     0.248   
##  5 Northern T… Clothin… STL(b… 1988 Aug                  1.11  1.07     0.111   
##  6 Northern T… Clothin… STL(b… 1988 Sep                  1.15  1.09     0.0424  
##  7 Northern T… Clothin… STL(b… 1988 Oct                  1.19  1.11     0.0265  
##  8 Northern T… Clothin… STL(b… 1988 Nov                  1.15  1.13     0.000123
##  9 Northern T… Clothin… STL(b… 1988 Dec                  1.52  1.15     0.357   
## 10 Northern T… Clothin… STL(b… 1989 Jan                  1.04  1.17    -0.212   
## # ℹ 359 more rows
## # ℹ abbreviated name: ¹`box_cox(Turnover, lambda)`
## # ℹ 2 more variables: remainder <dbl>, season_adjust <dbl>

## Save the season_adjust in myseries
myseries$Season_adj <- Dcmp$season_adjust

head(myseries)

## # A tsibble: 6 x 6 [1M]
## # Key:       State, Industry [1]
##   State              Industry           `Series ID`    Month Turnover Season_adj
##   <chr>              <chr>              <chr>          <mth>    <dbl>      <dbl>
## 1 Northern Territory Clothing, footwea… A3349767W   1988 Apr      2.3      0.974
## 2 Northern Territory Clothing, footwea… A3349767W   1988 May      2.9      1.06 
## 3 Northern Territory Clothing, footwea… A3349767W   1988 Jun      2.6      0.951
## 4 Northern Territory Clothing, footwea… A3349767W   1988 Jul      2.8      0.827
## 5 Northern Territory Clothing, footwea… A3349767W   1988 Aug      2.9      1.00 
## 6 Northern Territory Clothing, footwea… A3349767W   1988 Sep      3        1.11

## Let's create training and testing sets again.. 
## Set the training data to the end of 2010
Trainseries1 <- myseries %>%
  filter_index("1988 Aug"~ "2010 Dec") 


## Fit the models again this time with the seasonal_adjusted data
Series_fit1 <- Trainseries1 %>%
  model(multiplicative = ETS(Season_adj ~ error("M") + trend("A") + season("M")))


Series_fit1 %>%
  gg_tsresiduals()

The residuals look normal, there is some correlations with the acf and the variance is high at the beginning of the set but the variance reduces after Jan 2000.

## Make a test_set start from 2011 Jan
Testseries1 <- myseries %>%
  filter_index("2011 Jan"~.)

## We can forecast the testing data..

forcast <- Series_fit1 %>%
  forecast(new_data = Testseries1)

## Review the accuracy for the training set
Series_fit1 %>%
  accuracy()

## # A tibble: 1 × 12
##   State    Industry .model .type       ME   RMSE    MAE    MPE  MAPE  MASE RMSSE
##   <chr>    <chr>    <chr>  <chr>    <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… multi… Trai… -0.00408 0.0732 0.0560 -0.314  2.84 0.368 0.359
## # ℹ 1 more variable: ACF1 <dbl>

## Review it for the test_set
forcast %>%
  accuracy(myseries)

## # A tibble: 1 × 12
##   .model      State Industry .type      ME   RMSE    MAE   MPE  MAPE  MASE RMSSE
##   <chr>       <chr> <chr>    <chr>   <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 multiplica… Nort… Clothin… Test  -0.0559 0.0827 0.0681 -2.00  2.41 0.448 0.406
## # ℹ 1 more variable: ACF1 <dbl>

The RMSE for the test set is 0.082 and for the training set it is 0.0731