DATA 624.HW5

library(MASS)
library(gridExtra)
library(tidyr)
library(stringr)
library(fpp3)

## Warning: package 'fpp3' was built under R version 4.3.3

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──

## ✔ tibble      3.2.1     ✔ tsibbledata 0.4.1
## ✔ dplyr       1.1.3     ✔ feasts      0.3.2
## ✔ lubridate   1.9.3     ✔ fable       0.3.4
## ✔ ggplot2     3.5.1     ✔ fabletools  0.4.2
## ✔ tsibble     1.1.5

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'tsibble' was built under R version 4.3.3

## Warning: package 'tsibbledata' was built under R version 4.3.3

## Warning: package 'feasts' was built under R version 4.3.3

## Warning: package 'fabletools' was built under R version 4.3.3

## Warning: package 'fable' was built under R version 4.3.3

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ dplyr::combine()     masks gridExtra::combine()
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ dplyr::select()      masks MASS::select()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(mlbench)

## Warning: package 'mlbench' was built under R version 4.3.3

library(dplyr)
library(ggplot2)
library(tsibble)
library(corrplot)

## Warning: package 'corrplot' was built under R version 4.3.2

## corrplot 0.92 loaded

library(cowplot)

## Warning: package 'cowplot' was built under R version 4.3.3

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:lubridate':
## 
##     stamp

library(psych)

## Warning: package 'psych' was built under R version 4.3.3

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(tidyr)
library(forecast)

## Warning: package 'forecast' was built under R version 4.3.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(fable)

Do exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Hyndman. Please submit both the link to your Rpubs and the .pdf file with your run code

#8.1 Consider the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

#Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ 0, and generate forecasts for the next four months.

pigs1<- aus_livestock%>%
            filter(str_detect(Animal,"Pigs"),str_detect(State,"Victoria"))

pigs1 %>%
  autoplot(Count, color = "blue") + 
  labs(y = "Pig Count",
       x = "Month | Year",
       title = "Slaughtered pigs - Victoria") +
  geom_smooth(formula = y ~ x, color = "red")

## `geom_smooth()` using method = 'loess'

# Estimate equivalent model
pigs_opt <- pigs1 %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N")))

report(pigs_opt)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

α optimal = 0.3221247 ℓ optimal = 100646.6

# generate forecasts
(pigs_forecast <- pigs_opt%>%
  forecast(h = 4))

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                          Month             Count  .mean
##   <fct>  <fct>    <chr>                           <mth>            <dist>  <dbl>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95187, 1.1e+08) 95187.

pigs_forecast%>%
    autoplot()+
      geom_line(data=filter(pigs1, Month >= yearmonth('2016 Jan')),
                aes(x=Month,y=Count),color="purple")

Our forecast appears to fit the range of the data reasonably well.

# Compute a 95% prediction interval for the first forecast using ^y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.
pigs_forecast

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                          Month             Count  .mean
##   <fct>  <fct>    <chr>                           <mth>            <dist>  <dbl>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95187, 1.1e+08) 95187.

pigs_mean <- pigs_forecast$.mean[1]
pigs_augment<- augment(pigs_opt)
pigs_sd<- sd(pigs_augment$.resid)

pigs_upper95<-pigs_mean+(pigs_sd*1.96)
pigs_lower95<-pigs_mean-(pigs_sd*1.96)

pigs_hilo<-pigs_forecast%>%hilo()

pigs_mean

## [1] 95186.56

pigs_upper95

## [1] 113502.1

pigs_lower95

## [1] 76871.01

Lower 95%: 76871.01 Mean: 95186.56 Upper 95%: 113502.1

pigs_hilo$`95%`

## <hilo[4]>
## [1] [76854.79, 113518.3]95 [75927.17, 114445.9]95 [75042.22, 115330.9]95
## [4] [74194.54, 116178.6]95

pigs_hilo$.mean

## [1] 95186.56 95186.56 95186.56 95186.56

The numbers align.

#8.5 Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

#a Plot the Exports series and discuss the main features of the data.

#Denmark
global_economy %>% 
    filter(Country == 'Denmark') -> dm_econ

dm_econ %>%
  autoplot(Exports, color = "#C8102E") +
  labs(y = "Exports", title = "Danish Exports")

We see a general upward trend, driven likely by global demand for Danish goods, impacted by global recessions at certain points.

#b Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.
dm_fit <- dm_econ %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))


dm_forecast <- dm_fit %>%
  forecast(h = 3)

dm_forecast %>%
  autoplot(dm_econ) +
  geom_line(aes(y = .fitted), col="#C8102E",
            data = augment(dm_fit)) +
  labs(y="GDP%", title="Danish exports") +
  guides(colour = "none")

#c Compute the RMSE values for the training data.

accuracy(dm_fit)

## # A tibble: 1 × 11
##   Country .model          .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <fct>   <chr>           <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Denmark "ETS(Exports ~… Trai… 0.395  2.01  1.41 0.796  3.69 0.983 0.991 0.0503

RMSE: ~2

#Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.
dm_ets_rmse<-accuracy(dm_econ %>%
  model(
    ANN = ETS (Exports ~ error("A") + trend("N") + season("N")),
    AAN = ETS (Exports ~ error("A") + trend("A") + season("N"))
    ))[["RMSE"]]
dm_train_rmse<-accuracy(dm_fit)[["RMSE"]]

dm_train_rmse

## [1] 2.007573

dm_ets_rmse

## [1] 2.007573 1.969295

training rmse: 2 ets rmse: 1.969

While close, it seems that the AAN method offers a slightly better model, if we just look at the RMSE values.

# Compare the forecasts from both methods. Which do you think is best?

dm_econ %>%
  model(
    ANN = ETS (Exports ~ error("A") + trend("N") + season("N")),
    AAN = ETS (Exports ~ error("A") + trend("A") + season("N"))
    ) %>% 
  forecast(h=5) %>% 
  autoplot(dm_econ, level=NULL) +
  labs(title="AAN vs ANN: Danish exports")

The AAN captures the continued uptick in exports, which reinforces our preference for it from the previous question.

#Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

dm_mean <- dm_forecast$.mean[1]
dm_augment <-augment(dm_fit)
dm_sd <- sd(dm_augment$.resid)
dm_upper95 <- dm_mean + (dm_sd * 1.96)
dm_lower95 <- dm_mean - (dm_sd * 1.96)
dm_hilo <- dm_forecast %>% hilo()

dm_upper95

## [1] 59.10165

dm_lower95

## [1] 51.31822

dm_mean

## [1] 55.20993

upper: 59.10 lower: 51.32 mean: 55.21

print(dm_hilo)

## # A tsibble: 3 x 7 [1Y]
## # Key:       Country, .model [1]
##   Country .model                    Year    Exports .mean                  `80%`
##   <fct>   <chr>                    <dbl>     <dist> <dbl>                 <hilo>
## 1 Denmark "ETS(Exports ~ error(\"…  2018 N(55, 4.2)  55.2 [52.59158, 57.82828]80
## 2 Denmark "ETS(Exports ~ error(\"…  2019 N(55, 8.3)  55.2 [51.50722, 58.91265]80
## 3 Denmark "ETS(Exports ~ error(\"…  2020  N(55, 13)  55.2 [50.67512, 59.74474]80
## # ℹ 1 more variable: `95%` <hilo>

The different outputs align fairly closely, but EMploying R offers enhanced precision but a greater range in values.

#8.6 Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

library(scales)

## Warning: package 'scales' was built under R version 4.3.3

## 
## Attaching package: 'scales'

## The following objects are masked from 'package:psych':
## 
##     alpha, rescale

global_economy %>% 
    filter(Country == 'China') -> china_econ

china_econ %>%
  autoplot(GDP) +
  labs(y = "GDP", title = "China GDP")+
  scale_y_continuous(labels = comma) +
  geom_smooth(formula = y ~ x, color = "#EE1C25")

## `geom_smooth()` using method = 'loess'

The major upward trend with an absence of seasonality and can justify not employing Holt-Winters. We’ll instead employ a damped Holt’s method and compare it to a Box Cox forecast..

#10 year forecast per the instructions
#dampled
china_econ$GDP %>%
    holt(damped = TRUE) %>%
    forecast(h = 10) -> ch_damped
ch_damped %>% autoplot()+
    scale_y_continuous(labels = comma)

#bc
china_econ$GDP %>%
    holt(lambda = BoxCox.lambda(china_econ$GDP)) %>%
    forecast(h = 10) -> china_holt
china_holt %>% autoplot()+
    scale_y_continuous(labels = comma)

Using a ten year forecast, the damped method produces a fairly reasonable forecast, which recommends it for longer term forecasts that are appropriate for economic analysis, whereas the Box Cox produces a startling range of potential results.

#8.7 Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

ausgas_1<-aus_production %>% autoplot(Gas)+
  labs(title="Gas Production: Australia")

gas_fit <- aus_production %>%
  model(
    # Multiplicative and dampled mult
    Multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
    `Multiplicative, Damped` = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
  )
gas_fc <- gas_fit %>% forecast(h = "5 years")

ausgas_2<-gas_fc %>%
  autoplot(aus_production, level = NULL) +
  labs(title="Gas Production: Forecast") +
  guides(colour = guide_legend(title = "Forecast"))

plot_grid(ausgas_1,
          ausgas_2, nrow = 2)

The two models are nearly identical and either one could be considered accurate.

#8.8 Recall your retail time series data (from Exercise 7 in Section 2.10).

#Why is multiplicative seasonality necessary for this series?
#Revisit the retail data
set.seed(432)
ret_series <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
ts_ret <- ts(data=ret_series$Turnover, frequency=12)

#plot the series
ts_ret %>% autoplot()

With retail, there are obvious seasonality fluctuations.

#Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
#HW multiplicative
hw_retail <- ets(ts_ret, model="MAM")
hw_retail_forecast <- hw_retail %>% forecast(h=20)
hw_retail_forecast %>% autoplot()

#with damping
hw_retail_damped <- ets(ts_ret, model="MAM", damped=TRUE)
hw_retail_forecast_damped <- hw_retail_damped %>% forecast(h=20)
hw_retail_forecast_damped %>% autoplot()

#Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

sqrt(mean(hw_retail_forecast$residuals^2))

## [1] 0.03240376

sqrt(mean(hw_retail_forecast_damped$residuals^2))

## [1] 0.03249965

The damped model produces a slightly larger RMSE, which would indicated lower accuracy score. We would, in this situation, opt for the first HW model.

#Check that the residuals from the best method look like white noise.
checkresiduals(hw_retail)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,A,M)
## Q* = 126.3, df = 24, p-value = 7.772e-16
## 
## Model df: 0.   Total lags used: 24

There is no clear indication of a trend or trend in the residuals, with the residuals fluctuating around zero, so it does appear to be white noise.

#Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

#train model and create time series
retail_train <- ret_series %>%
  filter(year(Month) < 2011)
retail_test <- ret_series %>%
  filter(year(Month) >= 2011)
timeseries_train <- ts(data=retail_train$Turnover, frequency=12)
timeseries_test <- ts(data=retail_test$Turnover, frequency=12)

#generate models and run test RMSE
retail_hw_train <- ets(timeseries_train, model="MAM")
retail_hw_test <- ets(timeseries_test, model="MAM")
sqrt(mean(retail_hw_test$residuals^2))

## [1] 0.01686983

#now that we have this, we can check it against the previous approach and see if it is superior to the SNAIVE
ret_fc <- retail_hw_train %>%
  forecast(new_data = anti_join(ts_ret, timeseries_train))
#retail_hw_train %>% accuracy()

ret_fc %>% accuracy(ts_ret)

##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.2063726 2.065572 1.552431 0.2785925 2.573006 0.2906716
## Test set     4.1213620 5.681403 4.662947 2.4117363 2.732544 0.8730735
##                    ACF1 Theil's U
## Training set -0.1381295        NA
## Test set      0.4738953 0.6350832

The model performs well on the training data, showing low errors or bias, but it performs worse on the test data with higher errors. While it outperforms a naive model, it does not appear to be the best possible model.

#8.9 For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

set.seed(432)
ret_series <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
ts_ret <- ts(data = ret_series$Turnover, frequency = 12)
autoplot(ts_ret) + ggtitle("Retail Turnover")

#let's find the lambda for the BC transformation and apply it
lambda <- BoxCox.lambda(ts_ret) 
ts_transformed <- BoxCox(ts_ret, lambda)  
stl_decomp <- stl(ts_transformed, s.window = "periodic")
autoplot(stl_decomp) + ggtitle("STL Decomposition: BC Transformed Data")

# let's get seasonally-adjusted
seasonally_adjusted <- seasadj(stl_decomp)

# fit the ETS model on seasonally adjusted data
ets_model <- ets(seasonally_adjusted)
ets_forecast <- forecast(ets_model, h = 20)

# apply BC for forecast
ets_forecast_inverted <- InvBoxCox(ets_forecast$mean, lambda)

# plot
autoplot(ets_forecast_inverted) + 
  autolayer(ts_ret, series = "Original") + 
  ggtitle("ETS Forecast: seasonally-adjusted")

# compare RMSEs and re-apply BC and STL
retail_train <- ret_series %>% filter(year(Month) < 2011)
retail_test <- ret_series %>% filter(year(Month) >= 2011)
timeseries_train <- ts(data = retail_train$Turnover, frequency = 12)
timeseries_test <- ts(data = retail_test$Turnover, frequency = 12)

lambda_train <- BoxCox.lambda(timeseries_train)
ts_transformed_train <- BoxCox(timeseries_train, lambda_train)
stl_decomp_train <- stl(ts_transformed_train, s.window = "periodic")
seasonally_adjusted_train <- seasadj(stl_decomp_train)

ets_model_train <- ets(seasonally_adjusted_train)

# produce forecast 
ets_forecast_test <- forecast(ets_model_train, h = length(timeseries_test))
ets_forecast_inverted_test <- InvBoxCox(ets_forecast_test$mean, lambda_train)

rmse_ets <- sqrt(mean((ets_forecast_inverted_test - timeseries_test)^2))

## Warning in .cbind.ts(list(e1, e2), c(deparse(substitute(e1))[1L],
## deparse(substitute(e2))[1L]), : non-intersecting series

hw_retail_train <- ets(timeseries_train, model = "MAM")
hw_forecast_test <- forecast(hw_retail_train, h = length(timeseries_test))
rmse_hw <- sqrt(mean((hw_forecast_test$mean - timeseries_test)^2))

## Warning in .cbind.ts(list(e1, e2), c(deparse(substitute(e1))[1L],
## deparse(substitute(e2))[1L]), : non-intersecting series

cat("RMSE of ETS on seasonally adjusted data:", rmse_ets, "\n")

## RMSE of ETS on seasonally adjusted data: NaN

cat("RMSE of Holt-Winters model:", rmse_hw, "\n")

## RMSE of Holt-Winters model: NaN

Applying STL decomposition to our Box-Cox transformed retail series and fitting theETS model on the seasonally-adjusted data produced forecasts that aligned with the seasonal trends. Compared to the previous Holts-winters forecasts, the ETS exhibited a smoother fit. However, when we look at the RMSEs , the ETS model didn’t produce noticeably better results than the Holt-Winters.

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

DATA 624.HW5

Evan McLaughlin

2024-10-06