624 hw5

library(tsibble)

## Warning: package 'tsibble' was built under R version 4.4.3

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(fpp3)

## Warning: package 'fpp3' was built under R version 4.4.3

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──

## ✔ tibble      3.2.1     ✔ ggplot2     3.5.1
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.2
## ✔ lubridate   1.9.3     ✔ fable       0.5.0

## Warning: package 'dplyr' was built under R version 4.4.2

## Warning: package 'ggplot2' was built under R version 4.4.2

## Warning: package 'tsibbledata' was built under R version 4.4.3

## Warning: package 'feasts' was built under R version 4.4.3

## Warning: package 'fabletools' was built under R version 4.4.3

## Warning: package 'fable' was built under R version 4.4.3

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()     masks base::date()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ tsibble::intersect()  masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ tsibble::setdiff()    masks base::setdiff()
## ✖ tsibble::union()      masks base::union()

8.1

Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of alpha and l0, and generate forecasts for the next four months.

# Plot data 
pigs <- aus_livestock |>
  filter(Animal == "Pigs" ) |>
  filter(State == "Victoria")|>
  select(Month, Count)

pigs |> 
  autoplot(Count)

There is no clear trend or seasonality in this dataset so we should use simple exponential smoothing.

# Find ideal parameters 
pigs_fit <- pigs |>
  model(ETS(Count ~ error("A") + trend("N") + season("N")))
coef(pigs_fit)

The optimal alpha is 0.322 and l0 is 100,647.

# Forecast next 4 months 
pigs_fit |> 
  forecast(h=4)

Compute a 95% prediction interval for the first forecast using ^y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

# First forecast 
print(pigs_fit |>
        forecast( h=1) |>
        select(Month, Count))

## # A fable: 1 x 2 [1M]
##      Month             Count
##      <mth>            <dist>
## 1 2019 Jan N(95187, 8.7e+07)

# Define variables 
p_hat <- 95187  # p_hat value is from the first forecast
sd_pigs <- sd(augment(pigs_fit)$.resid)

# Caclulate upper and lower limit of interval 
lower_lim <- p_hat - (1.96 * sd_pigs)
upper_lim <- p_hat + (1.96 * sd_pigs)
pigs_ci <- c(lower_lim, upper_lim)
print (pigs_ci)

## [1]  76871.46 113502.54

# Find R CI 
print(pigs_fit |>
        forecast(h=1)|>
        hilo(level =95) |>
        select(Month, ".mean", "95%"))

## # A tsibble: 1 x 3 [1M]
##      Month  .mean                  `95%`
##      <mth>  <dbl>                 <hilo>
## 1 2019 Jan 95187. [76854.79, 113518.3]95

The calculated prediction interval is similar to the CI predicted in R. The CI range calculated by R is more narrow.

8.5

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

a

Plot the Exports series and discuss the main features of the data.

# Plot France data 
fr_exports <- global_economy |>
  filter(Country == "France") |>
  select(Year, Exports) 

fr_exports |>
  autoplot(Exports)

There an upward trend but no seasonality.

b

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

# Forecast 
fr_exports |>
  model(ETS(Exports ~ error("A") + trend("N") + season("N"))) |>
  forecast(h=25) |> 
  autoplot(fr_exports, level=NULL)

c

Compute the RMSE values for the training data.

# Model using 80% of data 
exports_training <- fr_exports[1:round(nrow(fr_exports)*.8),]
exports_forecast <- exports_training |>
  model(ETS(Exports ~ error("A") + trend("N") + season("N"))) |>
  forecast(new_data = anti_join(fr_exports, exports_training))

## Joining with `by = join_by(Year, Exports)`

# Find RMSE of forecast 
accuracy(exports_forecast, fr_exports) |>
  select(.model, RMSE)

The RMSE of this data is 2.306.

d

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

fr_exports |>
  model(ETS(Exports ~ error("A") + trend("A") + season("N"))) |>
  forecast(h=25) |> 
  autoplot(fr_exports, level =NULL )

The additional parameter used in the the AAN model accounts fo the upward trend which the ANN model excludes.

e

Compare the forecasts from both methods. Which do you think is best?

fr_exports |>
  model(
  ANN = ETS(Exports ~ error("A") + trend("N") + season("N")),
  AAN = ETS(Exports ~ error("A") + trend("A") + season("N"))) |>
  forecast(h=25) |>
  autoplot(fr_exports, level = NULL)

The AAN model seems like a better fit for this data since it considers the positive trend in this data. I can also find the RMSE to confirm my thoughts.

# Create model
fr_exports_RMSE <- fr_exports |>
  model(
    ANN = ETS(Exports~ error("A") + trend("N") + season("N")),
    AAN = ETS(Exports~ error("A") + trend("A") + season("N"))) |>
  forecast(anti_join(fr_exports, exports_training))

## Joining with `by = join_by(Year, Exports)`

# RMSE 
accuracy(fr_exports_RMSE, fr_exports) |>
  select(.model, RMSE)

Interestingly, the RMSE is lower in the ANN model vs the AAN. I think the AAN model may be overshooting the upward trend. In this case, a damped model may be helpful in a more accurate model.

# Damped model plot 
fr_exports |> 
  model(
    ETS(Exports~ error("A") + trend("Ad") + season("N"))) |>
  forecast(h = 25) |>
  autoplot(fr_exports, level = NULL)

f

Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

# Define models
fr_exports_a <- fr_exports |>
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))
fr_exports_b <- fr_exports |>
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))

# s values 
fr_exports_as <- sd(augment(fr_exports_a)$.resid)
fr_exports_bs <- sd(augment(fr_exports_b)$.resid)

# p_hat
fr_exports_a |>
  forecast(h=1) |>
  select(Year, Exports)

fr_exports_b |>
  forecast(h=1) |> 
  select(Year, Exports)

## 95% CI for model A (ANN)
lower_lim_a <- 31 - (1.96 * fr_exports_as)
upper_lim_a <- 31 + (1.96 * fr_exports_as)
ann_ci <- c(lower_lim_a, upper_lim_a)
print(ann_ci)

## [1] 28.79276 33.20724

##  95% CI using R 
 fr_exports_a |>
  forecast(h=1) |>
  hilo(level =95) |>
  select(Year, ".mean", "95%")

For model A (ANN), the CI is very similar for both the calculated and R versions. The R version has a slightly lower range compared to the calculated value.

# 95% CI of model B (AAN)

lower_lim_b <- 31 - (1.96 * fr_exports_bs)
upper_lim_b <- 31 + (1.96 * fr_exports_bs)
aan_ci <- c(lower_lim_b, upper_lim_b)
print(aan_ci)

## [1] 28.78581 33.21419

# 95% CI of model B using R 
fr_exports_b |> 
  forecast(h=1) |> 
  hilo(level=95) |>
  select(Year, ".mean", "95%")

In the AAN model, the CIs are again very similar. The R version of the CI is slightly higher in this case.

8.6

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

# Filter data
china_gdp <- global_economy |>
  filter(Country == "China") |>
  select(Year, GDP)

# ANN and AAN forecasts 
china_gdp |>
  model(
    ANN = ETS(GDP ~ error("A")+ trend("N")+ season("N")),
    AAN = ETS(GDP ~ error("A")+ trend("A") + season("N"))) |> 
  forecast(h=25) |>
  autoplot(china_gdp, level = NULL)

The AAN model accounts for the upward trend while ANN does not.

For the damped model, I will try the minimum suggested value of 0.8 for phi, the maximum of 0.98 and a middle value of 0.9.

# Damped AAN 
china_gdp |> 
  model(
    "phi = 0.8" = ETS(GDP~ error("A") + trend("Ad", phi = 0.8) + season("N")),
    "phi = 0.9" = ETS(GDP~ error("A") + trend("Ad", phi = 0.9) + season("N")),
    "phi = 0.98" = ETS(GDP~ error("A") + trend("Ad", phi = 0.98) + season("N"))) |>
  forecast(h = 25) |>
  autoplot(china_gdp, level = NULL)

The damped trend model uses the existing trend to forecast the positive trend in this data that eventually levels off. Phi values that are larger (close to 1) result in a forecast that take longer time to level off. Phi values that are lower result in a forecast that has an immediate increase and more rapidly levels off.

# Lambda value
lambda_china_gdp <- china_gdp |>
  features(GDP, features = guerrero) |>
  pull(lambda_guerrero)

# Box Cox transformation 
china_box <- china_gdp |>
  mutate(china_bc = box_cox(GDP, lambda_china_gdp)) |>
  select(Year, china_bc)

# Forecast 
china_box |> 
  model(
    "ANN" = ETS(china_bc ~ error("A") + trend("N") + season("N")),
    "AAN" = ETS(china_bc ~ error("A") + trend("A") + season("N")),
    "phi = 0.8" = ETS(china_bc ~ error("A") + trend("Ad", phi = 0.8) + season("N")),
    "phi = 0.9" = ETS(china_bc ~ error("A") + trend("Ad", phi = 0.9) + season("N")),
    "phi = 0.98" = ETS(china_bc ~ error("A") + trend("Ad", phi = 0.98) + season("N"))) |>
  forecast(h = 25) |>
  autoplot(china_box, level = NULL)

The box-cox transformation shows that the data has been straightened.

8.7

Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

# Plot data
aus_gas <- aus_production |>
  select(Quarter, Gas)

aus_gas |> autoplot(Gas)

There is an upward trend and seasonality so the MAM model may be useful here.

# Forecast using MAM 
aus_gas |>
  model(ETS(Gas ~ error("A")+ trend("A")+ season("A"))) |>
  forecast(h=25) |>
  autoplot(aus_gas, level = NULL)

Multiplicative seasonality is necessary here to account for the proportional growth of seasonal variation with the level of the plot. At the beginning, the variation of values is much smaller than later in the plot. Additive seasonality does not account for this change so the forecast would not show the increasing variation.

# Multiplicative seasonality 

aus_gas |>
  model(
    additive = ETS(Gas ~ error("A") + trend("A") + season("A")),
    multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M"))) |> 
  forecast(h=50) |>
  autoplot(aus_gas, level =NULL)

# Damped forecast 
aus_gas |> 
  model(ETS(Gas ~ error("A") + trend("Ad") + season("A"))) |>
  forecast(h=50) |>
  autoplot(aus_gas, level=NULL)

The damped forecast levels off the growth unlike the previous models. This isn’t as consistent to the overall trend of the data which shows a clear upward trend so I do not believe the damped model is an improvement.

8.8

Recall your retail time series data (from Exercise 7 in Section 2.10).

# code from book 
set.seed(31415)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
print(head(myseries))

## # A tsibble: 6 x 5 [1M]
## # Key:       State, Industry [1]
##   State              Industry                      `Series ID`    Month Turnover
##   <chr>              <chr>                         <chr>          <mth>    <dbl>
## 1 Northern Territory Other recreational goods ret… A3349843L   1988 Apr      0.6
## 2 Northern Territory Other recreational goods ret… A3349843L   1988 May      1.1
## 3 Northern Territory Other recreational goods ret… A3349843L   1988 Jun      0.9
## 4 Northern Territory Other recreational goods ret… A3349843L   1988 Jul      0.9
## 5 Northern Territory Other recreational goods ret… A3349843L   1988 Aug      1.1
## 6 Northern Territory Other recreational goods ret… A3349843L   1988 Sep      1.2

myseries <- myseries |>
  select(Month, Turnover)

Why is multiplicative seasonality necessary for this series?

# Plot data
myseries |>
  autoplot(Turnover)

Multiplicative seasonality is necessary because of the variation which increases over time.

b.Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

myseries |> 
  model(
    "Non-damped" = ETS(Turnover ~ error("M") + trend("A") + season("M")), 
     "Damped" = ETS(Turnover ~ error ("M") + trend("Ad") + season("M"))) |> 
  forecast(h=50) |>
  autoplot(myseries, level = NULL )

The damped model shows a slight downward trend comapred to the non-damped model.

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

# Training data 

train_myseries <- myseries[1:round(nrow(myseries)*.8),]
myseries_forecast <- train_myseries |>
  model(
    "Non-damped" = ETS(Turnover ~ error("M") + trend("A") + season("M")), 
    "Damped" = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))) |> 
  forecast(anti_join(myseries, train_myseries))

## Joining with `by = join_by(Month, Turnover)`

accuracy(myseries_forecast, myseries) |>
  select(.model, RMSE)

The damped model has a lower RMSE so I would prefer that model ov er the non-damped.

Check that the residuals from the best method look like white noise.

myseries |> 
  model(ETS(Turnover ~ error ("M") + trend("Ad") + season("M"))) |>
  gg_tsresiduals()

## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

There are no clear patterns or significant outliers in the residuals. They are likely white noise.

Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

# Filter  data
train_2010 <- myseries |>
  filter_index("1988 Apr" ~ "2010 Dec" )

# Forecast remaining data
myseries_forecast_b <- train_2010 |>
  model(ETS(Turnover~ error("M") + trend("Ad") + season("M"))) |>
  forecast(anti_join(myseries, train_2010))

## Joining with `by = join_by(Month, Turnover)`

# RMSE
accuracy(myseries_forecast_b, myseries) |> select(.model, RMSE)

# Accuracy of Naive method
myseries_train <- myseries |>
  filter(year(Month) < 2011)
myseries_fit <-myseries_train |>
  model(SNAIVE())

## Model not specified, defaulting to automatic modelling of the `Turnover`
## variable. Override this using the model formula.

myseries_fit <- myseries_fit |>
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining with `by = join_by(Month, Turnover)`

myseries_fit |> 
  accuracy(myseries) |>
  select(.model, RMSE)

8.9

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

# Find lambda value 
lambda <- myseries |> 
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)

# Box-cox transformation 

myseries_box <- myseries |>
  mutate(box = box_cox(Turnover, lambda)) |>
  select(Month, box)

# STL decomposition
stl <- myseries_box |>
  model(STL(box)) |>
  components() |>
  as_tsibble() |>
  select(Month, season_adjust) 

# Training set and fit 
stl_train <- stl |>
  filter(year(Month) <2011) 
ets_fit <- stl_train |>
  model(ETS(season_adjust ~ error("A") + trend("Ad") + season("N")))

# Forecast 
forecast <- ets_fit |>
  forecast(anti_join(stl, stl_train))

## Joining with `by = join_by(Month, season_adjust)`

# accuracy of forecast 

forecast |> 
  accuracy(stl) |>
  select(.model, RMSE)

The RMSE is 0.133 which shows that this is the best forecast of the ones I tried.

624 hw5

Jane Song

2026-03-06

8.1

8.5

a

b

c

d

e

f

8.6

8.7

8.8

8.9