Homework 5

library(fpp3)

## Warning: package 'fpp3' was built under R version 4.3.3

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.4     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1

## Warning: package 'lubridate' was built under R version 4.3.3

## Warning: package 'tsibble' was built under R version 4.3.3

## Warning: package 'feasts' was built under R version 4.3.3

## Warning: package 'fabletools' was built under R version 4.3.3

## Warning: package 'fable' was built under R version 4.3.3

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

Question 8.1

Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

Part A

Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of and l_0 , and generate forecasts for the next four months.

head(aus_livestock)

## # A tsibble: 6 x 4 [1M]
## # Key:       Animal, State [1]
##      Month Animal                     State                        Count
##      <mth> <fct>                      <fct>                        <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory  2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory  2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory  2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory  1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory  2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory  1800

pig_data <- aus_livestock %>%
  filter(Animal == "Pigs",State == "Victoria")

pig_fit <- pig_data %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N")))
pig_fc <- pig_fit %>%
  forecast(h = 4, level = 95)
pig_fit %>% report()

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

pig_fc

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                                                   Month
##   <fct>  <fct>    <chr>                                                    <mth>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Jan
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Feb
## 3 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Mar
## 4 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Apr
## # ℹ 2 more variables: Count <dist>, .mean <dbl>

From the output above we can see that the optimal value for alpha was calculated to be about .322 and the optimal value for l0 is about 100646.

pig_data %>%
  autoplot(Count) + 
  autolayer(pig_fc, series = "Forecast", PI = TRUE) + 
  ggtitle("Forecast of Pigs Slaughtered in Victoria") + 
  ylab("Number of Pigs") + 
  xlab("Year Month")

## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series` and `PI`

## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series` and `PI`

Part B

Compute a 95% prediction interval for the first forecast using ^y ±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

# Compute standard deviation of residuals
residuals <- augment(pig_fit)$.resid
s <- sd(residuals, na.rm = TRUE)

# Compute first forecast and its manual 95% prediction interval
first_forecast <- pig_fc$.mean[1]
manual_lower <- first_forecast - 1.96 * s
manual_upper <- first_forecast + 1.96 * s

# Print manual interval
print(paste("Manual 95% Prediction Interval:", manual_lower, "to", manual_upper))

## [1] "Manual 95% Prediction Interval: 76871.0124775157 to 113502.102384467"

# Extract first forecast's mean and R's computed 95% prediction interval
r_forecast <- pig_fc %>% slice(1)  # Get first forecasted value

# Compute 95% prediction interval (2.5% and 97.5% quantiles)
r_lower <- quantile(r_forecast$Count, p = 0.025)
r_upper <- quantile(r_forecast$Count, p = 0.975)


# Print R's computed prediction interval
print(paste("R Computed 95% Prediction Interval:", r_lower, "to", r_upper))

## [1] "R Computed 95% Prediction Interval: 76854.7888896402 to 113518.325972343"

From the output above we can see that the R computed are slighlty larger but overall they are very similar.

Question 8.5

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

Part A

Plot the Exports series and discuss the main features of the data.

head(global_economy)

## # A tsibble: 6 x 9 [1Y]
## # Key:       Country [1]
##   Country     Code   Year         GDP Growth   CPI Imports Exports Population
##   <fct>       <fct> <dbl>       <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
## 1 Afghanistan AFG    1960  537777811.     NA    NA    7.02    4.13    8996351
## 2 Afghanistan AFG    1961  548888896.     NA    NA    8.10    4.45    9166764
## 3 Afghanistan AFG    1962  546666678.     NA    NA    9.35    4.88    9345868
## 4 Afghanistan AFG    1963  751111191.     NA    NA   16.9     9.17    9533954
## 5 Afghanistan AFG    1964  800000044.     NA    NA   18.1     8.89    9731361
## 6 Afghanistan AFG    1965 1006666638.     NA    NA   21.4    11.3     9938414

us_econ <- global_economy %>%
  filter(Country == "United States")
head(us_econ)

## # A tsibble: 6 x 9 [1Y]
## # Key:       Country [1]
##   Country       Code   Year          GDP Growth   CPI Imports Exports Population
##   <fct>         <fct> <dbl>        <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
## 1 United States USA    1960 543300000000  NA     13.6    4.20    4.97  180671000
## 2 United States USA    1961 563300000000   2.30  13.7    4.03    4.90  183691000
## 3 United States USA    1962 605100000000   6.10  13.9    4.13    4.81  186538000
## 4 United States USA    1963 638600000000   4.40  14.0    4.09    4.87  189242000
## 5 United States USA    1964 685800000000   5.80  14.2    4.10    5.10  191889000
## 6 United States USA    1965 743700000000   6.40  14.4    4.24    4.99  194303000

us_econ %>%
  autoplot(Exports) + 
  labs(title = "US Exports", x="Year", y="Exports of goods and services (% of GDP)")

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

The plot above shows United States Exports of goods and services as a % of GDP. We can see that there is a clear overall rise in exports as a percent of GDP over the years with some cyclic patterns like a big drop off in the 1980s and then another in the early 2000s and one more around 2020 which of course was when covid was.

Part B

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

us_econ <- us_econ %>% filter(!is.na(Exports))

us_fit <- us_econ %>%
  model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N")))

us_forecast <- us_fit %>%
  forecast(h=6)

us_forecast %>%
  autoplot(us_econ) +
  labs(title="US Exports Forecast", x="Year", y="Exports of goods and services (% of GDP)")

Part C

Compute the RMSE values for the training data.

accuracy(us_fit)

## # A tibble: 1 × 11
##   Country       .model .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <fct>         <chr>  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States ANN    Training 0.125 0.627 0.465  1.37  5.10 0.990 0.992 0.239

From the output above we can see the RMSE is 0.63.

Part D

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

us_fit_comp <- us_econ %>%
  model(
    ANN = ETS(Exports ~ error("A") + trend("N") + season("N")),
    AAN = ETS(Exports ~ error("A") + trend("A") + season("N"))
    )

us_forecast_comp <- us_fit_comp %>%
  forecast(h=6)

us_forecast_comp %>%
  autoplot(us_econ) + 
    labs(title="US Exports Forecast", x="Year", y="Exports of goods and services (% of GDP)")

accuracy(us_fit_comp)

## # A tibble: 2 × 11
##   Country       .model .type      ME  RMSE   MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <fct>         <chr>  <chr>   <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States ANN    Train… 0.125  0.627 0.465  1.37    5.10 0.990 0.992 0.239
## 2 United States AAN    Train… 0.0108 0.615 0.466 -0.0570  5.19 0.992 0.973 0.238

From the output above we can see that the AAN model has a slightly lower RMSE which would suggest that it is the better fit to the data. Also, since there is a obvious overall positive trend to the data I think that the AAN model also has more merit for this data set because it has that trend component.

Part E

Compare the forecasts from both methods. Which do you think is best?

us_forecast_comp %>%
  autoplot(us_econ, level = NULL) + 
    labs(title="US Exports Forecast", x="Year", y="Exports of goods and services (% of GDP)")

Comparing the results from both methods above I think that the AAN model is better. The ANN models predictions seem to be a constant last observation prediction while the AAN model is actually accounting for the positive overall trend in the data and giving a more meaningful forecast.

Part F

Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

acc_fit_comp <- accuracy(us_fit_comp)
ann_rmse <- acc_fit_comp$RMSE[1]
aan_rmse <- acc_fit_comp$RMSE[2]

ann_first_forecast <- us_forecast_comp %>%
  filter(.model == "ANN") %>%
  slice(1) 

aan_first_forecast <- us_forecast_comp %>%
  filter(.model == "AAN") %>%
  slice(1)

ann_mean <- ann_first_forecast$.mean
aan_mean <- aan_first_forecast$.mean

lower_ann <- ann_mean - 1.96 * ann_rmse
upper_ann <- ann_mean + 1.96 * ann_rmse

lower_aan <- aan_mean - 1.96 * aan_rmse
upper_aan <- aan_mean + 1.96 * aan_rmse

# Print results
print(paste("Manual 95% PI for ETS(A,N,N):", lower_ann, "to", upper_ann))

## [1] "Manual 95% PI for ETS(A,N,N): 10.6616317815007 to 13.1197353879122"

print(paste("Manual 95% PI for ETS(A,A,N):", lower_aan, "to", upper_aan))

## [1] "Manual 95% PI for ETS(A,A,N): 10.8012985927343 to 13.2119283075914"

ann_r_lower <- quantile(ann_first_forecast$Exports, p = 0.025)
ann_r_upper <- quantile(ann_first_forecast$Exports, p = 0.975)

aan_r_lower <- quantile(aan_first_forecast$Exports, p = 0.025)
aan_r_upper <- quantile(aan_first_forecast$Exports, p = 0.975)

print(paste("R 95% PI for ETS(A,N,N):", ann_r_lower, "to", ann_r_upper))

## [1] "R 95% PI for ETS(A,N,N): 10.6395079134527 to 13.1418592559602"

print(paste("R 95% PI for ETS(A,A,N):", aan_r_lower, "to", aan_r_upper))

## [1] "R 95% PI for ETS(A,A,N): 10.7566652294566 to 13.2565616708691"

We can see from the output above that the prediction intervals that were manually computed are tighter than the ones R produced. The ones that R produced are a bit wider for both models with 10.66 to 13.12 for ANN manual and 10.64 to 13.14 for what R produced and we see similar behavior in the AAN model with a 10.80 to 13.21 for manual and 10.75 to 13.25 for Rs.

Question 6

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

chinese_gdp <- global_economy %>%
  filter(Country == "China") %>%
  select(Year, GDP)
chinese_gdp %>%
  autoplot(GDP) +
  labs(title = "Chinese GDP", x="Year", y="Gross domestic product (in $USD February 2019)")

chinese_lambda <- chinese_gdp %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

china_fit_comp <- chinese_gdp %>%
  model(
    auto = ETS(),
    ETS_Box = ETS(box_cox(GDP, chinese_lambda)),
    AAN = ETS(GDP ~ error("A") + trend("A") + season("N")),
    AADN = ETS(GDP ~ error("A")+ trend("Ad") + season("N"))
    )

## Model not specified, defaulting to automatic modelling of the `GDP` variable.
## Override this using the model formula.

china_forecast_comp <- china_fit_comp %>%
  forecast(h=30) %>%
  mutate(.mean = if_else(.model == "ETS_Box", inv_box_cox(.mean, chinese_lambda), .mean))

china_forecast_comp %>%
  autoplot(chinese_gdp, level = NULL) + 
    labs(title="Chinese GDP Forecast", x="Year", y="Gross domestic product (in $USD February 2019)")

accuracy(china_fit_comp)

## # A tibble: 4 × 10
##   .model  .type              ME    RMSE     MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>   <chr>           <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 auto    Training      4.13e10 2.00e11 9.66e10 2.11   7.72 0.446 0.477  0.268  
## 2 ETS_Box Training     -2.75e10 2.88e11 1.25e11 0.607  7.17 0.578 0.688  0.665  
## 3 AAN     Training      2.36e10 1.90e11 9.59e10 1.41   7.62 0.442 0.453  0.00905
## 4 AADN    Training      2.95e10 1.90e11 9.49e10 1.62   7.62 0.438 0.454 -0.00187

From the output above we can see that the Box-cox transformed model is really taking an exponential trend while the other models give more reasonable more linear forecast.

Question 7

Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

aus_gas <- aus_production %>%
  select(Quarter, Gas)
head(aus_gas)

## # A tsibble: 6 x 2 [1Q]
##   Quarter   Gas
##     <qtr> <dbl>
## 1 1956 Q1     5
## 2 1956 Q2     6
## 3 1956 Q3     7
## 4 1956 Q4     6
## 5 1957 Q1     5
## 6 1957 Q2     7

aus_gas %>%
  autoplot(Gas) +
  labs(title = "Austrialian Gas production in petajoules" , x="Year Quarter", y="Gas Production in petajoules")

aus_gas_fit_comp <- aus_gas %>%
  model(
    auto = ETS(),
    additive = ETS(Gas ~ error("A") + trend("A") + season("A")),
    multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
    aDamped = ETS(Gas ~ error("A") + trend("Ad") + season("A")),
    mDamped = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
    )

## Model not specified, defaulting to automatic modelling of the `Gas` variable.
## Override this using the model formula.

aus_gas_forecast_comp <- aus_gas_fit_comp %>%
  forecast(h=20)

aus_gas_forecast_comp %>%
  autoplot(aus_gas, level = NULL) + 
    labs(title="Austrialian Gas production in petajoules", x="Year Quarter", y="Gas Production in petajoules")

accuracy(aus_gas_fit_comp)

## # A tibble: 5 × 10
##   .model         .type          ME  RMSE   MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>          <chr>       <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 auto           Training -0.115    4.60  3.02  0.199  4.08 0.542 0.606 -0.0131
## 2 additive       Training  0.00525  4.76  3.35 -4.69  10.9  0.600 0.628  0.0772
## 3 multiplicative Training -0.115    4.60  3.02  0.199  4.08 0.542 0.606 -0.0131
## 4 aDamped        Training  0.600    4.58  3.16  0.460  7.39 0.566 0.604  0.0660
## 5 mDamped        Training -0.00439  4.59  3.03  0.326  4.10 0.544 0.606 -0.0217

report(aus_gas_fit_comp)

## Warning in report.mdl_df(aus_gas_fit_comp): Model reporting is only supported
## for individual models, so a glance will be shown. To see the report for a
## specific model, use `select()` and `filter()` to identify a single model.

## # A tibble: 5 × 9
##   .model           sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
##   <chr>             <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 auto            0.00324   -831. 1681. 1682. 1711.  21.1  32.2 0.0413
## 2 additive       23.6       -927. 1872. 1873. 1903.  22.7  29.7 3.35  
## 3 multiplicative  0.00324   -831. 1681. 1682. 1711.  21.1  32.2 0.0413
## 4 aDamped        21.9       -919. 1857. 1858. 1891.  21.0  29.6 3.16  
## 5 mDamped         0.00329   -832. 1684. 1685. 1718.  21.1  32.0 0.0417

I decided to fit 5 different models to the Australian gas data. I fitted a auto ETS model, Additive, Multiplicative, Additive Damped and Multiplicative Damped model. Looking at the RMSE we can see that they all give similar RMSE with the Additive Damped giving the lowest. Then, looking at AIC/BICs we can see that the auto and Multiplicative models gave the lowest AICs and BICs they actually gave the exact same results for both hinting that they may be the exact same models. So, if going by AIC/BIC take the Multiplicative model and if you want to go off of RMSE go for the Additive damped model. Multiplicative seasonality is needed in this case because we can see in the graph that as time goes on the seasonal variance also increases.

Question 8.8

Recall your retail time series data (from Exercise 7 in Section 2.10).

Part A

Why is multiplicative seasonality necessary for this series?

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries %>% 
  autoplot()

## Plot variable not specified, automatically selected `.vars = Turnover`

Multiplicative seasonality is necessary for this series because the variance changes over time, it increases as time increases with the seasonality. At the start we see small seasonal variance and then as time goes on the season variance increases.

Part B

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

holts_models <- myseries %>%
  model(
    hw = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    hw_damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
  )

holts_forecasts <- holts_models %>%
  forecast(h=20)

holts_forecasts %>%
  autoplot(myseries, level = NULL) + 
    labs(title="Austrialian Retail Turnover Forecasts", x="Year Month", y="Retail turnover in $Million AUD")

accuracy(holts_models)

## # A tibble: 2 × 12
##   State      Industry .model .type      ME  RMSE   MAE     MPE  MAPE  MASE RMSSE
##   <chr>      <chr>    <chr>  <chr>   <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 Northern … Clothin… hw     Trai… -0.0128 0.613 0.450 -0.469   5.15 0.513 0.529
## 2 Northern … Clothin… hw_da… Trai…  0.0398 0.616 0.444 -0.0723  5.06 0.507 0.531
## # ℹ 1 more variable: ACF1 <dbl>

report(holts_models)

## Warning in report.mdl_df(holts_models): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 2 × 11
##   State     Industry .model  sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
##   <chr>     <chr>    <chr>    <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Northern… Clothin… hw     0.00447   -870. 1775. 1776. 1841. 0.376 0.473 0.0511
## 2 Northern… Clothin… hw_da… 0.00457   -872. 1781. 1783. 1851. 0.379 0.479 0.0505

The regular Holts Winters method appears to have a little more variation and seems to have higher peaks and lower drops than the dampened model.

Part C

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

accuracy(holts_models) %>%
  select(.model, RMSE)

## # A tibble: 2 × 2
##   .model     RMSE
##   <chr>     <dbl>
## 1 hw        0.613
## 2 hw_damped 0.616

From the output above we can see that the RMSE is slightly lower for the Non damped model so I would prefer this model as opposed to the damped one in this case.

Part D

Check that the residuals from the best method look like white noise.

resids_best <- myseries %>%
  model(hw = ETS(Turnover ~ error("M") + trend("A") + season("M")))
resids_best %>%
  augment() %>%
  autoplot(.resid) +
  labs(title = "Residuals from the Best Model",
       y = "Residuals",
       x = "Time")

resids_best %>% augment() %>%
  features(.innov, ljung_box, lag = 10)

## # A tibble: 1 × 5
##   State              Industry                           .model lb_stat lb_pvalue
##   <chr>              <chr>                              <chr>    <dbl>     <dbl>
## 1 Northern Territory Clothing, footwear and personal a… hw        11.0     0.359

Looking at the above plot and results from the lijung box test the residuals do appear to be white noise. In the plot we can see that they are centered about zero and there doesnt seem to be any trend or seasonality apparent in the residuals. Then, when looking at the box test we can see that the p value shows that the residuals are white noise.

Part E

Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

fit_train <- myseries_train %>%
  model(
    best_model = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    snaive = SNAIVE(Turnover)
    )

forecasts <- fit_train %>%
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`

forecasts %>% autoplot(myseries_train, level = NULL)

forecasts %>% accuracy(myseries)  %>%
  select(.type, .model, RMSE)

## # A tibble: 2 × 3
##   .type .model      RMSE
##   <chr> <chr>      <dbl>
## 1 Test  best_model  1.78
## 2 Test  snaive      1.55

Based on the output above we can see that the seasonal naive method was able to fit to the data better giving the smaller RMSE of 1.55 compared to 1.78 so I would say that I was not able to beat the seasonal naive model in this case it is the better model.

Question 9

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

train_data <- myseries %>%
  filter(Month < yearmonth("2011 Jan"))

test_data <- myseries %>%
  filter(Month >= yearmonth("2011 Jan"))

lambda <- train_data %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

train_data %>%
  model(STL(box_cox(Turnover,lambda) ~ season(window = "periodic"), robust = TRUE)) %>%
  components() %>%
  autoplot() +
  ggtitle("STL with Box-Cox Transformation")

train_decomp <- train_data %>%
  model(STL_box = STL(box_cox(Turnover,lambda) ~ season(window = "periodic"), robust = TRUE)) %>%
  components()

# Seasonally Adjusted
train_data$Turnover <- train_decomp$season_adjust

fit <- train_data %>%
  model(ETS(Turnover ~ error("M") + trend("A") + season("M")))
fit %>% gg_tsresiduals()  +
  ggtitle("Australian Retail Turnover Residual")

test_forecast <- fit %>%
  forecast(new_data = anti_join(myseries, train_data))

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`

test_forecast %>% accuracy(myseries)  %>%
  select(.type, .model, RMSE)

## # A tibble: 1 × 3
##   .type .model                                                         RMSE
##   <chr> <chr>                                                         <dbl>
## 1 Test  "ETS(Turnover ~ error(\"M\") + trend(\"A\") + season(\"M\"))"  5.21

From the output above we can see that this model performed much worse than the original model giving a RSME of 5.21 compared to 1.78 and 1.55 from the previous question. Doing the ETS on the seasonally adjusted data seems to have decreased the model greatly.

Homework 5

Joey Bochnik

2025-03-04

Question 8.1

Part A

Part B

Question 8.5

Part A

Part B

Part C

Part D

Part E

Part F

Question 6

Question 7

Question 8.8

Part A

Part B

Part C

Part D

Part E

Question 9