library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ lubridate 1.8.0 ✔ feasts 0.2.2
## ✔ tsibble 1.1.2 ✔ fable 0.3.1
## ✔ tsibbledata 0.4.0
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(readxl)
Here I load the data and coerce it to a tsibble.
bacon_data <- read_excel("C:\\Users\\PythonAcct\\Downloads\\APU0000704111.xls")
bd <- bacon_data %>%
mutate(Months=yearmonth(Month)) %>%
as_tsibble(index=Months) %>%
select(Months,Price)
Always visualize the data
bd %>%
autoplot(Price)
The data has a clear upward trend, and appears to exhibit heteroskedasticity.
bd %>%
features(Price,features=guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 -0.572
bd %>%
autoplot(box_cox(Price,-.5718469))
I am making a new dataset of the transformed data to see if it affects the forecast at all.
trandata <- bd %>%
mutate(TPrice=box_cox(Price,-.5718469)) %>%
select(Months,TPrice)
trandata %>%
autoplot(TPrice)
I will make smaller datasets to improve visualisation
viewbd <- bd %>%
filter(year(Months)>2020)
viewtbd <- trandata %>%
filter(year(Months)>2020)
Here are all the forecasts on one plot for the untransformed data
bd %>%
model(
`Simple Exponential Smoothing`=ETS(Price ~ error("A") + trend("N") + season("N")),
`Holt's method` = ETS(Price ~ error("A") + trend("A") + season("N")),
`Damped Holt's method` = ETS(Price ~ error("A") +
trend("Ad", phi = 0.9) + season("N")),
`Additive Seasonal` = ETS(Price ~ error("A") + trend("A") + season("A")),
`Multiplicative Seasonal` = ETS(Price ~ error("M") + trend("A") + season("M"))) %>%
forecast(h = 4) %>%
autoplot(viewbd, level = NULL) +
guides(colour = guide_legend(title = "Forecast"))
Here is the same for the transformed dataset
trandata %>%
model(
`Simple Exponential Smoothing`=ETS(TPrice ~ error("A") + trend("N") + season("N")),
`Holt's method` = ETS(TPrice ~ error("A") + trend("A") + season("N")),
`Damped Holt's method` = ETS(TPrice ~ error("A") +
trend("Ad", phi = 0.9) + season("N")),
`Additive Seasonal` = ETS(TPrice ~ error("A") + trend("A") + season("A")),
`Multiplicative Seasonal` = ETS(TPrice ~ error("M") + trend("A") + season("M"))) %>%
forecast(h = 4) %>%
autoplot(viewtbd, level = NULL) +
guides(colour = guide_legend(title = "Forecast"))
These look fine, but there isn’t really a way to assess the accuracy. Test and training data would be required for this, so I will use cross-validation.
This is a cross-validation for untransformed data with initial size of 5
bd %>%
stretch_tsibble(.init = 5) %>%
model(
`Simple Exponential Smoothing`=ETS(Price ~ error("A") + trend("N") + season("N")),
`Holt's method` = ETS(Price ~ error("A") + trend("A") + season("N")),
`Damped Holt's method` = ETS(Price ~ error("A") +
trend("Ad", phi = 0.9) + season("N")),
`Additive Seasonal` = ETS(Price ~ error("A") + trend("A") + season("A")),
`Multiplicative Seasonal` = ETS(Price ~ error("M") + trend("A") + season("M"))) %>%
forecast(h = 1) %>%
accuracy(bd)
## Warning: 1 error encountered for Holt's method
## [1] Not enough data to estimate this ETS model.
## Warning: 1 error encountered for Damped Holt's method
## [1] Not enough data to estimate this ETS model.
## Warning: 13 errors (2 unique) encountered for Additive Seasonal
## [8] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: 13 errors (2 unique) encountered for Multiplicative Seasonal
## [8] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022 Oct
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Additive Seasonal Test 1.48e-3 0.135 0.0997 -0.0440 2.16 0.289 0.272 0.320
## 2 Damped Holt's met… Test 1.06e-2 0.129 0.0948 0.171 2.06 0.275 0.259 0.252
## 3 Holt's method Test 4.07e-4 0.128 0.0948 -0.0736 2.07 0.275 0.259 0.268
## 4 Multiplicative Se… Test -1.20e-3 0.142 0.107 -0.118 2.33 0.311 0.286 0.376
## 5 Simple Exponentia… Test 1.66e-2 0.127 0.0936 0.305 2.03 0.272 0.256 0.248
trandata %>%
stretch_tsibble(.init = 5) %>%
model(
`Simple Exponential Smoothing`=ETS(TPrice ~ error("A") + trend("N") + season("N")),
`Holt's method` = ETS(TPrice ~ error("A") + trend("A") + season("N")),
`Damped Holt's method` = ETS(TPrice ~ error("A") +
trend("Ad", phi = 0.9) + season("N")),
`Additive Seasonal` = ETS(TPrice ~ error("A") + trend("A") + season("A")),
`Multiplicative Seasonal` = ETS(TPrice ~ error("M") + trend("A") + season("M"))) %>%
forecast(h = 1) %>%
accuracy(trandata)
## Warning: 1 error encountered for Holt's method
## [1] Not enough data to estimate this ETS model.
## Warning: 1 error encountered for Damped Holt's method
## [1] Not enough data to estimate this ETS model.
## Warning: 13 errors (2 unique) encountered for Additive Seasonal
## [8] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: 13 errors (2 unique) encountered for Multiplicative Seasonal
## [8] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022 Oct
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Additive Season… Test 2.44e-4 0.0203 0.0155 -0.0198 1.57 0.520 0.516 0.734
## 2 Damped Holt's m… Test 1.09e-3 0.0117 0.00884 0.0965 0.909 0.297 0.298 0.171
## 3 Holt's method Test -1.11e-3 0.0122 0.00918 -0.136 0.952 0.308 0.311 0.234
## 4 Multiplicative … Test -2.68e-4 0.0193 0.0152 -0.0566 1.55 0.511 0.492 0.682
## 5 Simple Exponent… Test 1.43e-3 0.0115 0.00874 0.137 0.898 0.294 0.292 0.117
Simple exponential smoothing has the lowest RMSE for the transformed and untransformed data.
baconfit <- bd %>%
model(
`Simple Exponential Smoothing`=ETS(Price ~ error("A") + trend("N") + season("N"))) %>%
forecast(h = 4)
baconfit%>%
autoplot(viewbd) +
guides(colour = guide_legend(title = "Forecast"))
hilo(baconfit)
## # A tsibble: 4 x 6 [1M]
## # Key: .model [1]
## .model Months Price .mean `80%`
## <chr> <mth> <dist> <dbl> <hilo>
## 1 Simple Exponential Smooth… 2022 Oct N(7.4, 0.016) 7.38 [7.220737, 7.545261]80
## 2 Simple Exponential Smooth… 2022 Nov N(7.4, 0.032) 7.38 [7.153538, 7.612460]80
## 3 Simple Exponential Smooth… 2022 Dec N(7.4, 0.048) 7.38 [7.101972, 7.664026]80
## 4 Simple Exponential Smooth… 2023 Jan N(7.4, 0.064) 7.38 [7.058500, 7.707498]80
## # … with 1 more variable: `95%` <hilo>
## # ℹ Use `colnames()` to see all variable names
Here I use bootstrapping for more believable prediction intervals.
bootexp <- bd %>%
model(`Simple Exponential Smoothing`=ETS(Price ~ error("A") + trend("N") + season("N")))
boots <- bootexp %>%
forecast(h=4,bootstrap=TRUE,times=5000)
hilo(boots,95)
## # A tsibble: 4 x 5 [1M]
## # Key: .model [1]
## .model Months Price .mean `95%`
## <chr> <mth> <dist> <dbl> <hilo>
## 1 Simple Exponential Smoothi… 2022 Oct sample[5000] 7.38 [7.125476, 7.626021]95
## 2 Simple Exponential Smoothi… 2022 Nov sample[5000] 7.38 [7.030046, 7.750030]95
## 3 Simple Exponential Smoothi… 2022 Dec sample[5000] 7.39 [6.946074, 7.826097]95
## 4 Simple Exponential Smoothi… 2023 Jan sample[5000] 7.39 [6.876181, 7.869092]95
boots %>%
autoplot(viewbd)
My point forecast and associated prediction intervals can be seen above.