Homework 5

All libraries needed for the Homework

library(fpp3)

## Warning: package 'fpp3' was built under R version 4.3.3

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## -- Attaching packages -------------------------------------------- fpp3 1.0.0 --

## v tibble      3.2.1     v tsibble     1.1.5
## v dplyr       1.1.2     v tsibbledata 0.4.1
## v tidyr       1.3.0     v feasts      0.3.2
## v lubridate   1.9.2     v fable       0.3.4
## v ggplot2     3.5.1     v fabletools  0.4.2

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'tsibble' was built under R version 4.3.3

## Warning: package 'tsibbledata' was built under R version 4.3.3

## Warning: package 'feasts' was built under R version 4.3.3

## Warning: package 'fabletools' was built under R version 4.3.3

## Warning: package 'fable' was built under R version 4.3.3

## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()    masks base::date()
## x dplyr::filter()      masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval()  masks lubridate::interval()
## x dplyr::lag()         masks stats::lag()
## x tsibble::setdiff()   masks base::setdiff()
## x tsibble::union()     masks base::union()

library(forecast)

## Warning: package 'forecast' was built under R version 4.3.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(tidyverse)

## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v forcats 1.0.0     v readr   2.1.4
## v purrr   1.0.1     v stringr 1.5.0

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter()     masks stats::filter()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag()        masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(lubridate)
library(tsibble)
library(pracma)

## Warning: package 'pracma' was built under R version 4.3.3

## 
## Attaching package: 'pracma'
## 
## The following object is masked from 'package:purrr':
## 
##     cross

library(mlbench)

## Warning: package 'mlbench' was built under R version 4.3.3

library(corrplot)

## corrplot 0.92 loaded

library(e1071)

## Warning: package 'e1071' was built under R version 4.3.1

## 
## Attaching package: 'e1071'
## 
## The following object is masked from 'package:pracma':
## 
##     sigmoid
## 
## The following object is masked from 'package:fabletools':
## 
##     interpolate

library(psych)

## Warning: package 'psych' was built under R version 4.3.1

## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:pracma':
## 
##     logit, polar
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(imputeTS)

## Warning: package 'imputeTS' was built under R version 4.3.3

8.1 - Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0, and generate forecasts for the next four months.
Compute a 95% prediction interval for the first forecast using ^y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.
Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0, and generate forecasts for the next four months.

#I will filter for aus_livestock (pigs) slaughtered in Victoria and implement the ETS() function
aus_pigs <- aus_livestock %>%
  filter(State == "Victoria",
         Animal == "Pigs") %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N"))) 
#I will forecast for the next 6 values
aus_pigs_forecast <- aus_pigs %>%
  forecast(h = 6)
#I will now plot the time series
aus_pigs_forecast %>%
  autoplot(aus_livestock) +
  ggtitle("Amount of Pigs Slaughtered in Victoria")

# I will now find the optimal values of α and ℓ0
report(aus_pigs)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

#I will now generate forecasts for the next four months
aus_pigs_forecast

## # A fable: 6 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                          Month             Count  .mean
##   <fct>  <fct>    <chr>                           <mth>            <dist>  <dbl>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") +~ 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") +~ 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria "ETS(Count ~ error(\"A\") +~ 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria "ETS(Count ~ error(\"A\") +~ 2019 Apr N(95187, 1.1e+08) 95187.
## 5 Pigs   Victoria "ETS(Count ~ error(\"A\") +~ 2019 May N(95187, 1.2e+08) 95187.
## 6 Pigs   Victoria "ETS(Count ~ error(\"A\") +~ 2019 Jun N(95187, 1.3e+08) 95187.

The optimal values are: α = 0.3221247 and ℓ0 = 100646.6

Compute a 95% prediction interval for the first forecast using ^y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

# I will now compute a 95% prediction interval for the first forecast using ^y±1.96s where s is the standard deviation of the residuals
#These values are taken from the first row of the forecast table created in (a).
mean <- 95186.56
s <- sqrt(87480760)
z <- 1.96
margin <- z * s
lower_mean <- mean - margin
upper_mean <- mean + margin
paste(lower_mean, upper_mean)

## [1] "76854.4546212935 113518.665378707"

#I will now use R for this
aus_pigs_forecast %>% hilo(95) %>% pull('95%') %>% head(1)

## <hilo[1]>
## [1] [76854.79, 113518.3]95

Based on the results by hand and those calculated by R they are very similar becasue of the negligent error rate. (i,e 76854.79 - 76854.45 = 0.34 )

8.5 - Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

Plot the Exports series and discuss the main features of the data.
Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.
Compute the RMSE values for the training data.
Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.
Compare the forecasts from both methods. Which do you think is best?
Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.
Plot the Exports series and discuss the main features of the data.

global_economy

## # A tsibble: 15,150 x 9 [1Y]
## # Key:       Country [263]
##    Country     Code   Year         GDP Growth   CPI Imports Exports Population
##    <fct>       <fct> <dbl>       <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
##  1 Afghanistan AFG    1960  537777811.     NA    NA    7.02    4.13    8996351
##  2 Afghanistan AFG    1961  548888896.     NA    NA    8.10    4.45    9166764
##  3 Afghanistan AFG    1962  546666678.     NA    NA    9.35    4.88    9345868
##  4 Afghanistan AFG    1963  751111191.     NA    NA   16.9     9.17    9533954
##  5 Afghanistan AFG    1964  800000044.     NA    NA   18.1     8.89    9731361
##  6 Afghanistan AFG    1965 1006666638.     NA    NA   21.4    11.3     9938414
##  7 Afghanistan AFG    1966 1399999967.     NA    NA   18.6     8.57   10152331
##  8 Afghanistan AFG    1967 1673333418.     NA    NA   14.2     6.77   10372630
##  9 Afghanistan AFG    1968 1373333367.     NA    NA   15.2     8.90   10604346
## 10 Afghanistan AFG    1969 1408888922.     NA    NA   15.0    10.1    10854428
## # i 15,140 more rows

#I will plot the exports series below for the country of my selection: Aruba
global_economy %>%
  filter(Country == "Australia") %>%
  autoplot(Exports) +
  labs(y="% of GDP", title="Australia Exports")

Looking at the series below: There is mostly an upward trend from the end of the 1970’s to the early 2000’s. I did notice a couple of sharp downward trends (dips), notably in the mid 1960’s, early 1980’s, early 2000’s and in the 2010’s. I observed no seasonality in the time series.

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

#I will now utilize the ETS(A,N,N) model 
Australia_ETS_ANN <- global_economy %>%
  filter(Country == "Australia") %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N"))) 

#I will now predict for the next 8 values
Australia_ETS_ANN_forecast <- Australia_ETS_ANN %>%
  forecast(h = 8)

#I will plot the forecasts
Australia_ETS_ANN_forecast%>%
  autoplot(global_economy) +
  labs(y="% of GDP", title="Australia Exports - ETS(A,N,N)")

Compute the RMSE values for the training data of ETS(A,N,N).

Australia_ETS_ANN_RMSE<-accuracy(Australia_ETS_ANN)$RMSE
Australia_ETS_ANN_RMSE

## [1] 1.146794

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

#I will now utilize the ETS(A,A,N) model 
Australia_ETS_AAN <- global_economy %>%
  filter(Country == "Australia") %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N"))) 

#I will now predict for the next 8 values
Australia_ETS_AAN_forecast <- Australia_ETS_AAN %>%
  forecast(h = 8)

#I will plot the forecasts
Australia_ETS_AAN_forecast%>%
  autoplot(global_economy) +
  labs(y="% of GDP", title="Australia Exports - ETS(A,A,N)")

# I will now calculate the RMSE for the ETS(A,A,N)
Australia_ETS_AAN_RMSE<-accuracy(Australia_ETS_AAN)$RMSE
Australia_ETS_AAN_RMSE

## [1] 1.116727

Compare the forecasts from both methods. Which do you think is best?
Since the ETS(A,A,N) method has a lower RMSE: 1.117 than ETS(A,N,N) RMSE: 1.147, the ETS(A,A,N) looks to be the better model

(f)Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

# I will now calculate the confidence interval of the ETS(A,N,N) method
r_Australia_ETS_ANN <- residuals(Australia_ETS_ANN)$.resid %>% sd()
m_Australia_ETS_ANN  <- Australia_ETS_ANN_forecast$.mean[1]
Australia_ETS_ANN_forecast %>% hilo(95) %>% pull('95%') %>% head(1)

## <hilo[1]>
## [1] [18.3197, 22.89462]95

# I will now calculate the confidence interval of the ETS(A,A,N) method
r_Australia_ETS_AAN <- residuals(Australia_ETS_AAN)$.resid %>% sd()
m_Australia_ETS_AAN  <- Australia_ETS_AAN_forecast$.mean[1]
Australia_ETS_AAN_forecast %>% hilo(95) %>% pull('95%') %>% head(1)

## <hilo[1]>
## [1] [18.57028, 23.107]95

The ETS(A,N,N) has a 95% confidence interval of:[18.3197, 22.89462]. The ETS(A,A,N) has a confidence interval of [18.57028, 23.107].

8.6 - Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

# I will calculate the lambda
lam_china <- global_economy %>%
  filter(Country == "China") %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

# I will now utilize the Holt's method, Damped Holt's method, Box-Cox, Damped Box-Cox and ETS(A,A,N) methods
china_models <- global_economy %>%
  filter(Country == "China") %>%
  model(`ETS(A,A,N)` = ETS(GDP ~ error("A") + trend("A") + season("N")),
        `Holt's method` = ETS(GDP ~ error("A") + trend("A") + season("N")),
        `Damped Holt's method` = ETS(GDP ~ error("A") + trend("Ad", phi = 0.8) + season("N")),
        `Box-Cox` = ETS(box_cox(GDP,lam_china) ~ error("A") + trend("Ad") + season("N")),
        `Damped Box-Cox` = ETS(box_cox(GDP,lam_china) ~ error("A") + trend("Ad", phi = 0.8) + season("N"))) 

#I will make a prediction for the next 28 values
china_models_forecast <- china_models %>%
  forecast(h = 28)

#plot all of the models' forecasting for China's GDP
china_models_forecast %>%
  autoplot(global_economy, level = NULL) +
  labs(title="Forecasting Models for China's GDP") +
  guides(colour = guide_legend(title = "Forecast"))

8.7 - Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

#I will now utilize the ETS(A,A,A) model for both multiplicative and damped multiplicative seasonality
ETS_gas <- aus_production %>%
  model(additive = ETS(Gas ~ error("A") + trend("A") + season("A")),
        multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
        `damped multiplicative` = ETS(Gas ~ error("M") + trend("Ad", phi = 0.9) + season("M"))) 

#I will now forecast and plot for both additive and multiplicative seasonality for Australian gas production for the
#next 30 values
aus_production %>%
   model(additive = ETS(Gas ~ error("A") + trend("A") + season("A")),
        multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M"))) %>%
   forecast(h=30) %>%
  autoplot(aus_production, level = NULL) +
  labs(title="Australian Gas Production") +
  guides(colour = guide_legend(title = "Forecast"))

#I will now forecast and plot for both multiplicative and damped multiplicative seasonality for Australian gas production for the next 30 values

aus_production %>%
  model(multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
        `damped multiplicative` = ETS(Gas ~ error("M") + trend("Ad", phi = 0.9) + season("M"))
        ) %>%
  forecast(h=30) %>%
  autoplot(aus_production, level= NULL) +
  labs(title="Australian Gas Production") +
  guides(colour = guide_legend(title = "Forecast"))

#I will now get the RMSE for each model
accuracy(ETS_gas)

## # A tibble: 3 x 10
##   .model            .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE     ACF1
##   <chr>             <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 additive          Trai~  0.00525  4.76  3.35 -4.69  10.9  0.600 0.628  0.0772 
## 2 multiplicative    Trai~ -0.115    4.60  3.02  0.199  4.08 0.542 0.606 -0.0131 
## 3 damped multiplic~ Trai~  0.255    4.58  3.04  0.655  4.15 0.545 0.604 -0.00451

Multiplicative seasonality is necessary because the amplitude of the seasonality is increasing.Another words every season - January we see an increase as the years progress. It looks like making the trend damped improves the forecast because out of the three models the damped multiplicative model has the lowest RMSE.

8.8 - Recall your retail time series data (from Exercise 7 in Section 2.10).

Why is multiplicative seasonality necessary for this series?
Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
Check that the residuals from the best method look like white noise.
Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?
Why is multiplicative seasonality necessary for this series?
Multiplicative seasonality is necessary in the cases where variation of the seasonal pattern becomes proportional to the level of the series. Another words the trend is increasing through all of the seasons.
Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

#This is taken from exercise 7 in Section 2.10 and plotting the seris
set.seed(2456)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>% autoplot()

## Plot variable not specified, automatically selected `.vars = Turnover`

#I will now apply the multiplicative method to the myseries method
ETS<- myseries %>%
  model(`Multiplicative` = ETS(Turnover ~ error("M") + trend("A") + season("M")),
        `Damped Multiplicative` = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))) 
ETS %>%
  forecast(h=50) %>%
  autoplot(myseries, level = NULL) +
  labs(title="Australian Turnover in Retail") +
  guides(colour = guide_legend(title = "Forecast"))

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

#I will now calculate the RMSE for both Multiplicative and Damped Multiplicative method for comparison
accuracy(ETS) %>%
  select(.model, RMSE)

## # A tibble: 2 x 2
##   .model                 RMSE
##   <chr>                 <dbl>
## 1 Multiplicative         22.9
## 2 Damped Multiplicative  23.2

In this particular case I would select the Multiplicative model because it has a RMSE of 22.86 which is lower than the 23.22 damped multiplicative model.

Check that the residuals from the best method look like white noise.

#ploting the residuals
myseries %>%
  model(multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M"))) %>%
  gg_tsresiduals() +
  ggtitle("Multiplicative Method")

e).Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

#training the model to the end of 2010
myseries_train <- myseries %>%
  filter(year(Month) < 2011)
#utilizing the naive approach
snaive_train <- myseries_train %>%
  model(multi = ETS(Turnover ~ error("M") + trend("A") + season("M")),
        snaive = SNAIVE(Turnover))

#forecasting for the naive model
snaive_train_forecast <- snaive_train  %>%
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`

#plotting the forecast
snaive_train_forecast %>% autoplot(myseries, level = NULL)

#training seasonal naive  RMSE
accuracy(snaive_train) %>%
  select(.type, .model, RMSE)

## # A tibble: 2 x 3
##   .type    .model  RMSE
##   <chr>    <chr>  <dbl>
## 1 Training multi   15.7
## 2 Training snaive  36.9

#myseries RMSE
snaive_train_forecast %>% accuracy(myseries)  %>%
  select(.type, .model, RMSE)

## # A tibble: 2 x 3
##   .type .model  RMSE
##   <chr> <chr>  <dbl>
## 1 Test  multi   165.
## 2 Test  snaive  283.

The seasonal naive approach could not be beaten. It had almost a 10 times less RMSE than the myseries one.

8.9 -For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

#retrieving the lambda for the retail
l_retail <- myseries %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

#applying STL composition to Box-Cox
STL <-myseries %>%
  model(STL(box_cox(Turnover,l_retail) ~ season(window = "periodic"), robust = TRUE)) %>%
  components() %>%
  autoplot() +
  ggtitle("Box-Cox Transformation with STL")

#Performing ETS on seasonally adjusted data
adjusted <- myseries %>%
  model(STL_box = STL(box_cox(Turnover,l_retail) ~ season(window = "periodic"), robust = TRUE)) %>%
  components()

myseries$Turnover_sa <- adjusted$season_adjust

myseries_training <- myseries %>%
  filter(year(Month) < 2011)

ETS <- myseries_train %>%
  model(ETS(Turnover ~ error("M") + trend("A") + season("M")))
ETS %>% gg_tsresiduals()  +
  ggtitle("Australian Retail Turnover Residual")

#I will now forecast the training data
ETS_forecast <- ETS %>%
  forecast(new_data = anti_join(myseries, myseries_training))

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover,
## Turnover_sa)`

ETS %>% accuracy() %>%
  select(.model, .type, RMSE)

## # A tibble: 1 x 3
##   .model                                                        .type     RMSE
##   <chr>                                                         <chr>    <dbl>
## 1 "ETS(Turnover ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Training  15.7

#I will now forecast the testing data
ETS_forecast %>% accuracy(myseries) %>%
  select(.model, .type, RMSE)

## # A tibble: 1 x 3
##   .model                                                        .type  RMSE
##   <chr>                                                         <chr> <dbl>
## 1 "ETS(Turnover ~ error(\"M\") + trend(\"A\") + season(\"M\"))" Test   165.

The RMSE indicates that the Box-Cox transformation works well on the test data in particular.

Homework 5

Vladimir Nimchenko

2024-10-06