Data624_homework5

(1)Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of αand ℓ0, and generate forecasts for the next four months. Compute a 95% prediction interval for the first forecast usings where sis the standard deviation of the residuals. Compare your interval with the interval produced by R.

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(fpp3)

## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.4
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.3     ✔ fable       0.3.3
## ✔ ggplot2     3.4.4     ✔ fabletools  0.4.0

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(ggplot2)

library(dplyr)
library(fable)
library(fabletools)
library(ggplot2)

# Filter data for pigs in Victoria
aus_pig <- aus_livestock %>%
  filter(State == "Victoria" & Animal == "Pigs")

# Model fitting for pigs slaughtered in Victoria
aus_pig_fit <- aus_pig %>%
  model(
    ETS(Count ~ error("A") + trend("N") + season("N"))
  )

# Forecasting pig slaughter count for the next 4 periods
aus_pig_forecast <- aus_pig_fit %>%
  forecast(h = 4)

# Plotting forecasted pig slaughter count
aus_pig_forecast %>%
  autoplot(data = aus_pig %>%
             filter(Month >= yearmonth("2010 Jan"))) +
  labs(y = "Count", title = "Victoria Pigs Slaughtered") +
  guides(colour = "none")

report(aus_pig_fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

# Calculate standard deviation of residuals
resid_std <- sd(augment(aus_pig_fit)$`.resid`)

# Compute confidence interval
lower_bound <- aus_pig_forecast$.mean[1] - (resid_std * 1.96)
upper_bound <- aus_pig_forecast$.mean[1] + (resid_std * 1.96)

# Display calculated confidence interval
sprintf(
  "Calculated confidence interval: [%f, %f]", 
  lower_bound,
  upper_bound
)

## [1] "Calculated confidence interval: [76871.012478, 113502.102384]"

# Calculate 95% prediction interval for forecasted pig slaughter count
aus_pig_prediction_interval <- hilo(aus_pig_forecast$Count, level = 95)

print(aus_pig_prediction_interval)

## <hilo[4]>
## [1] [76854.79, 113518.3]95 [75927.17, 114445.9]95 [75042.22, 115330.9]95
## [4] [74194.54, 116178.6]95

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

Plot the Exports series and discuss the main features of the data. Use an ETS(A,N,N) model to forecast the series, and plot the forecasts. Compute the RMSE values for the training data. Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set. Compare the forecasts from both methods. Which do you think is best? Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

# Filter data for Peru
aus_economy <- global_economy %>%
  filter(Country == "Peru")

# Plot Exports for Peru
aus_economy %>%
  autoplot(Exports) +
  labs(title = "Peru Exports")

# Fit ETS(A,N,N) model
aus_economy_fit <- aus_economy %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))

# Forecast using the fitted model
aus_economy_forecast <- aus_economy_fit %>%
  forecast(h = 12)  # Forecast for the next 12 periods

# Plot forecasts
aus_economy_forecast %>%
  autoplot(aus_economy) +
  labs(title = "Peru Exports Forecast (ETS(A,N,N))")

# Compute accuracy metrics and extract RMSE
rmse <- accuracy(aus_economy_fit) %>%
  pull(RMSE)

# Display RMSE
cat("RMSE for the training data is the RMSE of the ETS(A,N,N):", rmse, "\n")

## RMSE for the training data is the RMSE of the ETS(A,N,N): 2.862248

RMSE for ETS(A,A,N): 2.862429 ETS(A,N,N) has lower RMSE, suggesting better performance for this dataset.

# Forecast using ETS(A,N,N) model
forecast_ENN <- forecast(aus_economy_fit, h = 12)

# Forecast using ETS(A,A,N) model
#forecast_EAN <- forecast(aus_economy_fit_AA, h = 12)

# Plot forecasts
#autoplot(aus_economy) +
#  autolayer(forecast_ENN, series = "ETS(A,N,N) Forecast") +
#  autolayer(forecast_EAN, series = "ETS(A,A,N) Forecast") +
 # labs(title = "Comparison of Forecasts from ETS Models") +
 # guides(colour = guide_legend(title = "Forecast Method")) +
 # theme_minimal()

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

# Filter data for China
china_economy <- global_economy %>%
  filter(Country == "China")

# Calculate GDP per capita
china_economy <- china_economy %>%
  mutate(GDP_per_capita = GDP / Population)

# Plot GDP per capita for China
china_economy %>%
  autoplot(GDP_per_capita) +
  labs(title = "GDP per Capita in China")

# Fit ETS models with different options
fit_no_damped <- china_economy %>%
  model(ETS(GDP_per_capita ~ error("A") + trend("A") + season("N")))
fit_damped <- china_economy %>%
  model(ETS(GDP_per_capita ~ error("A") + trend("Ad") + season("N")))
fit_boxcox <- china_economy %>%
  model(ETS(GDP_per_capita ~ error("A") + trend("A") + season("N")))

# Forecast using the fitted models
forecast_no_damped <- forecast(fit_no_damped, h = 12)
forecast_damped <- forecast(fit_damped, h = 12)
forecast_boxcox <- forecast(fit_boxcox, h = 12)

# Plot forecasts
autoplot(china_economy, GDP_per_capita) +
  autolayer(forecast_no_damped, series = "ETS(A,A,N)") +
  autolayer(forecast_damped, series = "ETS(A,Ad,N)") +
  autolayer(forecast_boxcox, series = "ETS(A,A,N) with Box-Cox") +
  labs(title = "Comparison of ETS Forecasts with Different Options",
       subtitle = "No Damped Trend vs Damped Trend vs Box-Cox Transformation") +
  guides(colour = guide_legend(title = "Forecast Method"))

## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`

## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`

## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.

## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), : Ignoring unknown parameters: `series`
## Ignoring unknown parameters: `series`

## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.

(7) Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

aus_production %>%
  autoplot(Gas)

fit <- aus_production %>%
  select(Gas) %>%
  model(ETS(Gas))

# Report summary of the fitted model
report(fit)

## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

# Generate forecasts and plot
fit %>% 
  forecast(h = 15) %>%
  autoplot() +
  labs(title = "Forecast for Gas Production")

(8) Recall your retail time series data (from Exercise 7 in Section 2.10).

Why is multiplicative seasonality necessary for this series? Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer? Check that the residuals from the best method look like white noise. Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

# Set seed for reproducibility
set.seed(1234567)

# Sample a series from aus_retail
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))

# Filter training data
myseries_train <- myseries %>%
  filter(year(Month) < 2011)

autoplot(myseries_train)

## Plot variable not specified, automatically selected `.vars = Turnover`

hw_model <- myseries_train %>%
  model(ETS(Turnover ~ error("M") + trend("M") + season("M")))

hw<- forecast(hw_model)
autoplot(hw,myseries_train)

Data624_homework5

2024-03-23