Forecast #4

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ lubridate   1.8.0     ✔ feasts      0.2.2
## ✔ tsibble     1.1.2     ✔ fable       0.3.1
## ✔ tsibbledata 0.4.0     
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(readxl)

Here I load the data and coerce it to a tsibble.

bacon_data <- read_excel("C:\\Users\\PythonAcct\\Downloads\\APU0000704111.xls")
bd <- bacon_data %>% 
  mutate(Months=yearmonth(Month)) %>% 
  as_tsibble(index=Months) %>% 
  select(Months,Price)

Always visualize the data

bd %>% 
  autoplot(Price)

The data has a clear upward trend, and appears to exhibit heteroskedasticity.

bd %>% 
  features(Price,features=guerrero)

## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1          -0.572

bd %>% 
  autoplot(box_cox(Price,-.5718469))

I am making a new dataset of the transformed data to see if it affects the forecast at all.

trandata <- bd %>% 
  mutate(TPrice=box_cox(Price,-.5718469)) %>% 
  select(Months,TPrice)
trandata %>% 
  autoplot(TPrice)

I will make smaller datasets to improve visualisation

viewbd <- bd %>% 
  filter(year(Months)>2020)
viewtbd <- trandata %>% 
  filter(year(Months)>2020)

Here are all the forecasts on one plot for the untransformed data

bd %>%
  model(
    `Simple Exponential Smoothing`=ETS(Price ~ error("A") + trend("N") + season("N")),
    `Holt's method` = ETS(Price ~ error("A") +  trend("A") + season("N")),
    `Damped Holt's method` = ETS(Price ~ error("A") +
                                   trend("Ad", phi = 0.9) + season("N")),
    `Additive Seasonal` = ETS(Price ~ error("A") + trend("A") + season("A")),
    `Multiplicative Seasonal` = ETS(Price ~ error("M") + trend("A") + season("M"))) %>%
  forecast(h = 4) %>%
  autoplot(viewbd, level = NULL) +
  guides(colour = guide_legend(title = "Forecast"))

Here is the same for the transformed dataset

trandata %>%
  model(
    `Simple Exponential Smoothing`=ETS(TPrice ~ error("A") + trend("N") + season("N")),
    `Holt's method` = ETS(TPrice ~ error("A") +  trend("A") + season("N")),
    `Damped Holt's method` = ETS(TPrice ~ error("A") +
                                   trend("Ad", phi = 0.9) + season("N")),
    `Additive Seasonal` = ETS(TPrice ~ error("A") + trend("A") + season("A")),
    `Multiplicative Seasonal` = ETS(TPrice ~ error("M") + trend("A") + season("M"))) %>%
  forecast(h = 4) %>%
  autoplot(viewtbd, level = NULL) +
  guides(colour = guide_legend(title = "Forecast"))

These look fine, but there isn’t really a way to assess the accuracy. Test and training data would be required for this, so I will use cross-validation.

This is a cross-validation for untransformed data with initial size of 5

bd %>%
  stretch_tsibble(.init = 5) %>%
  model(
    `Simple Exponential Smoothing`=ETS(Price ~ error("A") + trend("N") + season("N")),
    `Holt's method` = ETS(Price ~ error("A") +  trend("A") + season("N")),
    `Damped Holt's method` = ETS(Price ~ error("A") +
                                   trend("Ad", phi = 0.9) + season("N")),
    `Additive Seasonal` = ETS(Price ~ error("A") + trend("A") + season("A")),
    `Multiplicative Seasonal` = ETS(Price ~ error("M") + trend("A") + season("M"))) %>%
  forecast(h = 1) %>%
  accuracy(bd)

## Warning: 1 error encountered for Holt's method
## [1] Not enough data to estimate this ETS model.

## Warning: 1 error encountered for Damped Holt's method
## [1] Not enough data to estimate this ETS model.

## Warning: 13 errors (2 unique) encountered for Additive Seasonal
## [8] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.

## Warning: 13 errors (2 unique) encountered for Multiplicative Seasonal
## [8] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022 Oct

## # A tibble: 5 × 10
##   .model             .type       ME  RMSE    MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>              <chr>    <dbl> <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Additive Seasonal  Test   1.48e-3 0.135 0.0997 -0.0440  2.16 0.289 0.272 0.320
## 2 Damped Holt's met… Test   1.06e-2 0.129 0.0948  0.171   2.06 0.275 0.259 0.252
## 3 Holt's method      Test   4.07e-4 0.128 0.0948 -0.0736  2.07 0.275 0.259 0.268
## 4 Multiplicative Se… Test  -1.20e-3 0.142 0.107  -0.118   2.33 0.311 0.286 0.376
## 5 Simple Exponentia… Test   1.66e-2 0.127 0.0936  0.305   2.03 0.272 0.256 0.248

trandata %>%
  stretch_tsibble(.init = 5) %>%
  model(
    `Simple Exponential Smoothing`=ETS(TPrice ~ error("A") + trend("N") + season("N")),
    `Holt's method` = ETS(TPrice ~ error("A") +  trend("A") + season("N")),
    `Damped Holt's method` = ETS(TPrice ~ error("A") +
                                   trend("Ad", phi = 0.9) + season("N")),
    `Additive Seasonal` = ETS(TPrice ~ error("A") + trend("A") + season("A")),
    `Multiplicative Seasonal` = ETS(TPrice ~ error("M") + trend("A") + season("M"))) %>%
  forecast(h = 1) %>%
  accuracy(trandata)

## Warning: 1 error encountered for Holt's method
## [1] Not enough data to estimate this ETS model.

## Warning: 1 error encountered for Damped Holt's method
## [1] Not enough data to estimate this ETS model.

## Warning: 13 errors (2 unique) encountered for Additive Seasonal
## [8] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.

## Warning: 13 errors (2 unique) encountered for Multiplicative Seasonal
## [8] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022 Oct

## # A tibble: 5 × 10
##   .model           .type       ME   RMSE     MAE     MPE  MAPE  MASE RMSSE  ACF1
##   <chr>            <chr>    <dbl>  <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Additive Season… Test   2.44e-4 0.0203 0.0155  -0.0198 1.57  0.520 0.516 0.734
## 2 Damped Holt's m… Test   1.09e-3 0.0117 0.00884  0.0965 0.909 0.297 0.298 0.171
## 3 Holt's method    Test  -1.11e-3 0.0122 0.00918 -0.136  0.952 0.308 0.311 0.234
## 4 Multiplicative … Test  -2.68e-4 0.0193 0.0152  -0.0566 1.55  0.511 0.492 0.682
## 5 Simple Exponent… Test   1.43e-3 0.0115 0.00874  0.137  0.898 0.294 0.292 0.117

Simple exponential smoothing has the lowest RMSE for the transformed and untransformed data.

baconfit <- bd %>%
  model(
    `Simple Exponential Smoothing`=ETS(Price ~ error("A") + trend("N") + season("N"))) %>%
  forecast(h = 4)

baconfit%>%
  autoplot(viewbd) +
  guides(colour = guide_legend(title = "Forecast"))

hilo(baconfit)

## # A tsibble: 4 x 6 [1M]
## # Key:       .model [1]
##   .model                       Months         Price .mean                  `80%`
##   <chr>                         <mth>        <dist> <dbl>                 <hilo>
## 1 Simple Exponential Smooth… 2022 Oct N(7.4, 0.016)  7.38 [7.220737, 7.545261]80
## 2 Simple Exponential Smooth… 2022 Nov N(7.4, 0.032)  7.38 [7.153538, 7.612460]80
## 3 Simple Exponential Smooth… 2022 Dec N(7.4, 0.048)  7.38 [7.101972, 7.664026]80
## 4 Simple Exponential Smooth… 2023 Jan N(7.4, 0.064)  7.38 [7.058500, 7.707498]80
## # … with 1 more variable: `95%` <hilo>
## # ℹ Use `colnames()` to see all variable names

Here I use bootstrapping for more believable prediction intervals.

bootexp <- bd %>% 
  model(`Simple Exponential Smoothing`=ETS(Price ~ error("A") + trend("N") + season("N")))
boots <- bootexp %>%
  forecast(h=4,bootstrap=TRUE,times=5000)
hilo(boots,95)

## # A tsibble: 4 x 5 [1M]
## # Key:       .model [1]
##   .model                        Months        Price .mean                  `95%`
##   <chr>                          <mth>       <dist> <dbl>                 <hilo>
## 1 Simple Exponential Smoothi… 2022 Oct sample[5000]  7.38 [7.125476, 7.626021]95
## 2 Simple Exponential Smoothi… 2022 Nov sample[5000]  7.38 [7.030046, 7.750030]95
## 3 Simple Exponential Smoothi… 2022 Dec sample[5000]  7.39 [6.946074, 7.826097]95
## 4 Simple Exponential Smoothi… 2023 Jan sample[5000]  7.39 [6.876181, 7.869092]95

boots %>% 
  autoplot(viewbd)

My point forecast and associated prediction intervals can be seen above.

Forecast #4

Brandon Pascali

2022-11-04