homework 5

8.1 Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset. Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of
α and ℓ0 , and generate forecasts for the next four months.

#this is called sheeps because I accidentally first did it with sheep, and realized my error later on
#rather than change the code, I left it.
sheeps <- aus_livestock |> 
  filter(Animal == "Pigs") |> 
  summarise(Count = sum(Count))

fit <- sheeps |>
  model(ETS(Count ~ error("A") + trend("N") + season("N")))
fc <- fit |>
  forecast(h = 4)

report(fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.2229786 
## 
##   Initial states:
##      l[0]
##  388521.4
## 
##   sigma^2:  871548918
## 
##      AIC     AICc      BIC 
## 15019.86 15019.90 15032.83

# alpha = 0.2229786    l[0] =  388521.4

Compute a 95% prediction interval for the first forecast using
^y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

fc |>
    autoplot(sheeps) +
    geom_line(aes(y = .fitted), col="#D55E00",
              data = augment(fit)) +
    labs(title="Sheep Slaughter: Australia") +
    guides(colour = "none")

#calculate the prediction interval

#forecast
fc <- fit |> forecast(h = 1)

#mean of the forecast
y <- fc$.mean

print(y)

## [1] 448200.4

#standard deviation of the residuals
s <- sqrt(glance(fit)$sigma2)

print(s)

## [1] 29522.01

#calculating 
lower <- y - 1.96 * s
upper <- y + 1.96 * s

print(lower)

## [1] 390337.3

print(upper)

## [1] 506063.5

#lower - 390337.3, upper - 506063.5

#R's interval
# hilo function converts the forecast distributions into intervals
sheep_interval <- fc |> hilo(level = 95)

print(sheep_interval$`95%`)

## <hilo[1]>
## [1] [390338.3, 506062.5]95

sheep_interval |> 
  unpack_hilo("95%")

## # A tsibble: 1 x 6 [?]
## # Key:       .model [1]
##   .model                                                        Month
##   <chr>                                                         <mth>
## 1 "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"N\"))" 2019 Jan
## # ℹ 4 more variables: Count <dist>, .mean <dbl>, `95%_lower` <dbl>,
## #   `95%_upper` <dbl>

#they' incredibly similar for the first month - lower 390338.3, upper - 506062.5

8.5

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

Plot the Exports series and discuss the main features of the data.

italy <- global_economy |> filter(Country == "Italy") |> mutate (exports_by_pop = Exports/Population)
autoplot(italy, (Exports))

#adjusting for population
autoplot(italy, (exports_by_pop))

#their population is pretty stable (there isn't a ton of difference between these graphs)
#so I will use the exports variable.

#decompose
italy |>
  model(
    STL(Exports ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) |>
  components() |>
  autoplot()

#generally trends upward with a drop in the 80s/90s and mid-2000s
#otherwise, trend is slightly inconsistent from year to year (frequent drops after spikes,
#though the trend is generally upward)
#data is yearly, so there's no way to see seasonality

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

fit2 <- italy |>
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))
fc2 <- fit2 |>
  forecast(h = 10)

fc2 |>
  autoplot(italy)

report(fit2)

## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9998995 
## 
##   Initial states:
##      l[0]
##  12.49719
## 
##   sigma^2:  1.8461
## 
##      AIC     AICc      BIC 
## 275.0274 275.4719 281.2087

Compute the RMSE values for the training data.

fit2 |> accuracy()

## # A tibble: 1 × 11
##   Country .model        .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <fct>   <chr>         <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 Italy   "ETS(Exports… Trai… 0.324  1.34  1.00  1.39  4.75 0.983 0.991 -0.00701

#RMSE = 1.34

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.)

fit3 <- italy |>
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))
fc3 <- fit3 |>
  forecast(h = 10)

fc3 |>
  autoplot(italy)

fit3 |> accuracy()

## # A tibble: 1 × 11
##   Country .model      .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
##   <fct>   <chr>       <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Italy   "ETS(Expor… Trai… -0.00841  1.30 0.934 -0.291  4.47 0.916 0.962 0.0375

#RMSE value is 1.30

Discuss the merits of the two forecasting methods for this data set.

Both models don’t account for seasonality, which is appropriate for yearly data. AAN accounts for trend, and this data has trended upward over the years, so, on one hand, it feels appropriate to predict increasing exports. ANN does not account for trend, which can be a safer approach, as the trend is not the steadiest–there have been some big dips over the years, and exports can go up one year, down the next.

Compare the forecasts from both methods. Which do you think is best?

The AAN model seems like a better predictor, given that it continues the general upward trend of Italy’s exports. The ANN model assumes the same amount each year, which doesn’t reflect the trend and doesn’t seem appropriate given the amount of data.

Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R

#ETS(A,N,N)

fc_2 <- fit2 |> forecast(h = 1)

#Get the RMSE
accuracy_ann <- fit2 |> accuracy()
rmse_ann <- accuracy_ann$RMSE

#mean
y_ann <- fc_2$.mean

print(y_ann)

## [1] 31.29638

# interval with RMSE
lower_ann <- y_ann - 1.96 * rmse_ann
upper_ann <- y_ann + 1.96 * rmse_ann

print(lower_ann)

## [1] 28.67965

print(upper_ann)

## [1] 33.91311

#>lower_ann
#[1] 28.67965
#upper_ann
#[1] 33.91311

#R's interval

ann_interval <- fc_2 |> hilo(level = 95)

print(ann_interval$`95%`)

## <hilo[1]>
## [1] [28.63338, 33.95938]95

ann_interval |> 
  unpack_hilo("95%")

## # A tsibble: 1 x 7 [?]
## # Key:       Country, .model [1]
##   Country .model                                                        Year
##   <fct>   <chr>                                                        <dbl>
## 1 Italy   "ETS(Exports ~ error(\"A\") + trend(\"N\") + season(\"N\"))"  2018
## # ℹ 4 more variables: Exports <dist>, .mean <dbl>, `95%_lower` <dbl>,
## #   `95%_upper` <dbl>

#lower - 28.63338
#upper - 33.95938
#they are pretty close again. The manual interval has a slightly smaller difference 
#between upper and lower

# ETS(A,A,N)

fc_3 <- fit3 |> forecast(h = 1)

#Get the RMSE
accuracy_aan <- fit3 |> accuracy()
rmse_aan <- accuracy_aan$RMSE

#mean
y_aan <- fc_3$.mean

print(y_aan)

## [1] 31.57435

# interval with RMSE
lower_aan <- y_aan - 1.96 * rmse_aan
upper_aan <- y_aan + 1.96 * rmse_aan


print(lower_aan)

## [1] 29.03553

print(upper_aan)

## [1] 34.11317

#lower - 29.03553, upper  34.11317
# difference - 5.07764

#R's interval

aan_interval <- fc_3 |> hilo(level = 95)

print(aan_interval$`95%`)

## <hilo[1]>
## [1] [28.94323, 34.20548]95

aan_interval |> 
  unpack_hilo("95%")

## # A tsibble: 1 x 7 [?]
## # Key:       Country, .model [1]
##   Country .model                                                        Year
##   <fct>   <chr>                                                        <dbl>
## 1 Italy   "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"N\"))"  2018
## # ℹ 4 more variables: Exports <dist>, .mean <dbl>, `95%_lower` <dbl>,
## #   `95%_upper` <dbl>

# lower 28.94323, #upper 34.20548
#difference - 5.26225

#Again, the values are similar, with the manual calculation having a slightly narrower interval

8.6 Forecast the Chinese GDP from the global_economy data set using an ETS model.

Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

cgdp <- global_economy |> filter(Country == "China") |> mutate (gdp_by_pop = GDP/Population)

autoplot(cgdp, (gdp_by_pop))

#error and trend, but no seasonality (we can't have seasonality because the data is annual)
cgdpfit <- cgdp |>
  model(ETS(gdp_by_pop ~ error("A") + trend("A") + season("N")))

fc4 <- cgdpfit |>
  forecast(h = 20)

fc4 |>
  autoplot(cgdp)

#let's dampen the trend
cgdpfit2 <- cgdp |>
  model(ETS(gdp_by_pop ~ error("A") + trend("Ad") + season("N")))

fc5 <- cgdpfit2 |>
  forecast(h = 20)

fc5 |>
  autoplot(cgdp)

#no trend
cgdpfit3 <- cgdp |>
  model(ETS(gdp_by_pop ~ error("A") + trend("N") + season("N")))

fc6 <- cgdpfit3 |>
  forecast(h = 20)

fc6 |>
  autoplot(cgdp)

#with a Box-Cox transformation

lambda <- cgdp |>
  features(gdp_by_pop, features = guerrero) |>
  pull(lambda_guerrero)

cgdpfit4 <- cgdp |>
  model(ETS(box_cox(gdp_by_pop, lambda) ~ error("A") + trend("A") + season("N")))

fc7 <- cgdpfit4 |>
  forecast(h = 20)

fc7 |>
  autoplot(cgdp)

#Box-Cox and trend damped

cgdpfit4 <- cgdp |>
  model(ETS(box_cox(gdp_by_pop, lambda) ~ error("A") + trend("Ad") + season("N")))

fc7 <- cgdpfit4 |>
  forecast(h = 20)

fc7 |>
  autoplot(cgdp)

No trend, only error (ANN) is probably the worst model, since this data has a strong trend. A Box-Cox transformation makes the overall trend harder to see. It also makes the data look steady. This makes the fact that the prediction goes up visually confusing.

This data is all trend – going up consistently since around 2005 – so AAN or AAdN seem like the best options.

8.7 Find an ETS model for the Gas data from aus_production and forecast the next few years.

aus_production |> autoplot(Gas)

fit <- aus_production |>
  model(ETS(Gas))
report(fit)

## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

#MAM

#forecast

gasfit <- aus_production |>
  model(ETS(Gas ~ error("M") + trend("A") + season("M")))

#10 years

fit_gas <- gasfit |>
  forecast(h = 40)

fit_gas |>
  autoplot(aus_production)

#5 years
fit_gas <- gasfit |>
  forecast(h = 20)

fit_gas |>
  autoplot(aus_production)

Why is multiplicative seasonality necessary here?

Gas production has grown significantly since the 1970s, so the seasonal gaps grew with it (the seasonality increases as time goes by, with smaller peaks and troughs in the 70s and larger ones in the 80s, 90s, and 200s). Multiplicative seasonality treats seasonality as a percentage rather than a flat value or number of units.

Experiment with making the trend damped. Does it improve the forecasts?

gasfit <- aus_production |>
  model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))

#10 years

fit_gas <- gasfit |>
  forecast(h = 40)

fit_gas |>
  autoplot(aus_production)

#5 years

fit_gas <- gasfit |>
  forecast(h = 20)

fit_gas |>
  autoplot(aus_production)

#MAM

I don’t think damping the trend improves the forecast (though it doesn’t absolutely ruin it). It lowers the trend slightly, and this variable has an extremely strong trend component.

8.8

Recall your retail time series data (from Exercise 7 in Section 2.10).

aus_retail_3 <- aus_retail |> 
  filter (Industry == "Cafes, restaurants and catering services") |> 
  summarize(Turnover = sum(Turnover))


fit_aus_retail_3 <- aus_retail_3 |>
  model(ETS(Turnover))
report(fit_aus_retail_3)

## Series: Turnover 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.5740803 
##     beta  = 0.00360335 
##     gamma = 0.1409495 
## 
##   Initial states:
##      l[0]     b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]     s[-5]
##  142.3423 2.172155 0.9719912 0.9228726 0.9844832 1.251359 1.029051 0.9996936
##      s[-6]     s[-7]     s[-8]     s[-9]    s[-10]    s[-11]
##  0.9710056 0.9827915 0.9867263 0.9371142 0.9967852 0.9661259
## 
##   sigma^2:  0.0012
## 
##      AIC     AICc      BIC 
## 5547.043 5548.490 5616.557

#MAM


#forecast

retailfit <- aus_retail_3 |>
  model(ETS(Turnover ~ error("M") + trend("A") + season("M")))

#10 years

fit_retail <- retailfit |>
  forecast(h = 40)

fit_retail |>
  autoplot(aus_retail_3)

Why is multiplicative seasonality necessary for this series?

Like gas production, retail turnover has grown since the 70s/80s. The seasonality has also grown, with higher peaks, and as the data trend upward, the highs seasonal get farther from the seasonal lows.

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

#holt-Winters' mult method is applied above 

#damped trend
retailfit2 <- aus_retail_3 |>
  model(ETS(Turnover ~ error("M") + trend("Ad") + season("M")))

# Forecast 10 years
fit_retail2 <- retailfit2 |>
  forecast(h = 40)

fit_retail2 |>
  autoplot(aus_retail_3)

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

#non-dampened Holt-Winter's multiplicative 
retailfit |> accuracy()

## # A tibble: 1 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Turnover ~ error(\… Trai…  2.06  28.9  20.6 0.131  2.57 0.302 0.322 0.141

#28.9

#damped
retailfit2 |> accuracy()

## # A tibble: 1 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Turnover ~ error(\… Trai…  2.45  29.2  20.9 0.276  2.62 0.305 0.325 0.120

#29.2

The original, undamped model has a slightly lower RMSE, indicating a predicted smaller deviation from actuals (it fit the historical data better). The seasonal peaks are strong, so I prefer the undamped (Holt-Winter’s multiplicative) model.

Check that the residuals from the best method look like white noise.

retailfit |> gg_tsresiduals()

## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

The histogram looks normally distributed, with one outlier on the right. The ACF plot, on the other hand, shows some small spikes outside the dotted line, particularly at lag 12 (which can indicate an unaccounted-for seasonal component). The time plot is on a very small scale, but there are some big swings that might not be white noise, particularly early on (late 80s). The intervals get tighter later on, which suggests that the model gets better later.

Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

myseries_train <- aus_retail_3 |>
  filter(year(Month) < 2011)

fit <- myseries_train |>
  model(hw_mult = ETS(Turnover ~ error("M") + trend("A") + season("M")))

fit |> 
  forecast(h = "8 years") |> 
  autoplot(aus_retail_3)

#to ind RMSE
fit |> accuracy()

## # A tibble: 1 × 10
##   .model  .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>   <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hw_mult Training  1.68  26.9  18.3 0.140  2.86 0.305 0.342 0.145

#RMSE 26.9 

#seasonal naive approach from chapter 5
myseries_train <- aus_retail_3 |>
  filter(year(Month) < 2011)

autoplot(aus_retail_3, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

fit <- myseries_train |>
  model(SNAIVE())

## Model not specified, defaulting to automatic modelling of the `Turnover`
## variable. Override this using the model formula.

fit |> accuracy()

## # A tibble: 1 × 10
##   .model   .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE() Training  47.7  78.7  60.0  7.64  9.13     1     1 0.838

#RMSE 78.7

#The lower RMSE on Holt-Winter's multiplicative beats the seasonal naive approach.

8.9 For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

#get lambda
lambda <- aus_retail_3 |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)


#decompose Box-Cox transformed data


stldata <- aus_retail_3 |>
  model(st = STL(box_cox(Turnover, lambda))) |>
  components() |>
  select(Month, season_adjust)

#get the best model
decomposed_aus_retail <- aus_retail_3 |>
  model(ETS(Turnover))
report(decomposed_aus_retail)

## Series: Turnover 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.5740803 
##     beta  = 0.00360335 
##     gamma = 0.1409495 
## 
##   Initial states:
##      l[0]     b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]     s[-5]
##  142.3423 2.172155 0.9719912 0.9228726 0.9844832 1.251359 1.029051 0.9996936
##      s[-6]     s[-7]     s[-8]     s[-9]    s[-10]    s[-11]
##  0.9710056 0.9827915 0.9867263 0.9371142 0.9967852 0.9661259
## 
##   sigma^2:  0.0012
## 
##      AIC     AICc      BIC 
## 5547.043 5548.490 5616.557

#still MAM

#ETS seasonally adjusted
fit_stl_ets <- stldata |>
  model(ets_sa = ETS(season_adjust ~ error("M") + trend("A") + season("M")))

#plot to visualize
final_fit <- fit_stl_ets |>
  forecast(h = 40)

final_fit |>
    autoplot(stldata)

#Visually, this looks like a good prediction

#check RMSE
 fit_stl_ets|> accuracy()

## # A tibble: 1 × 10
##   .model .type          ME  RMSE   MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>       <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets_sa Training -0.00244 0.181 0.139 -0.0235 0.765 0.258 0.281 0.212

 #the RMSE is very low at .181, but this is adjusted for a box-cox transformation (different units), so they shouldn't be compared
 

 fit_adjust <- aus_retail_3 |>
 model(stlf = decomposition_model(
    STL(box_cox(Turnover, lambda)),
      ETS(season_adjust ~ error("M") + trend("A") + season("M"))
  ))
 
fit_adjust |> accuracy()

## # A tibble: 1 × 10
##   .model .type        ME  RMSE   MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>     <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stlf   Training -0.107  26.1  18.4 -0.0532  2.25 0.269 0.290 0.267

#26.1 is slightly better than MAM's earlier value of 28.9

homework 5

Samantha Barbaro

2026-03-03