Setup
Load Libraries
Registered S3 method overwritten by 'tsibble':
method from
as_tibble.grouped_df dplyr
── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
✔ tibble 3.3.0 ✔ tsibble 1.1.6
✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
✔ tidyr 1.3.1 ✔ feasts 0.4.2
✔ lubridate 1.9.4 ✔ fable 0.4.1
✔ ggplot2 4.0.0
── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
✖ lubridate::date() masks base::date()
✖ dplyr::filter() masks stats::filter()
✖ tsibble::intersect() masks base::intersect()
✖ tsibble::interval() masks lubridate::interval()
✖ dplyr::lag() masks stats::lag()
✖ tsibble::setdiff() masks base::setdiff()
✖ tsibble::union() masks base::union()
library (fredr)
library (tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.1 ✔ readr 2.1.5
✔ purrr 1.1.0 ✔ stringr 1.5.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ tsibble::interval() masks lubridate::interval()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (patchwork)
library (knitr)
library (kableExtra)
Attaching package: 'kableExtra'
The following object is masked from 'package:dplyr':
group_rows
Import Data
remove (list= ls ())
fredr_set_key ("523a2b98a1ce120186357fd0c916cc26" )
myts <- fredr (series_id = "OILPRICE" ,
observation_start = as.Date ("1980-01-01" ),
observation_end = as.Date ("2024-12-01" )
) |>
transmute (Month = yearmonth (date), value) |>
as_tsibble (index = Month)
glimpse (myts)
Rows: 403
Columns: 2
$ Month <mth> 1980 Jan, 1980 Feb, 1980 Mar, 1980 Apr, 1980 May, 1980 Jun, 1980…
$ value <dbl> 32.50, 37.00, 38.00, 39.50, 39.50, 39.50, 39.50, 38.00, 36.00, 3…
tbl_ts [403 × 2] (S3: tbl_ts/tbl_df/tbl/data.frame)
$ Month: mth [1:403] 1980 Jan, 1980 Feb, 1980 Mar, 1980 Apr, 1980 May, 1980 Jun...
$ value: num [1:403] 32.5 37 38 39.5 39.5 39.5 39.5 38 36 36 ...
- attr(*, "key")= tibble [1 × 1] (S3: tbl_df/tbl/data.frame)
..$ .rows: list<int> [1:1]
.. ..$ : int [1:403] 1 2 3 4 5 6 7 8 9 10 ...
.. ..@ ptype: int(0)
- attr(*, "index")= chr "Month"
..- attr(*, "ordered")= logi TRUE
- attr(*, "index2")= chr "Month"
- attr(*, "interval")= interval [1:1] 1M
..@ .regular: logi TRUE
Split Data
split_index <- floor (0.8 * nrow (myts))
train <- myts[1 : split_index, ]
test <- myts[(split_index + 1 ): nrow (myts), ]
# A tsibble: 1 x 2 [1M]
Month value
<mth> <dbl>
1 1980 Jan 32.5
# A tsibble: 1 x 2 [1M]
Month value
<mth> <dbl>
1 2006 Oct 58.9
# A tsibble: 1 x 2 [1M]
Month value
<mth> <dbl>
1 2006 Nov 59.4
# A tsibble: 1 x 2 [1M]
Month value
<mth> <dbl>
1 2013 Jul 105.
Train data contains monthly observations from January, 1980 - October, 2006 while the test data contains monthly observations from November, 2006 - July, 2013.
Forecasting
# A tibble: 1 × 1
.gaps
<lgl>
1 FALSE
Decomposition
mods <- train |>
fabletools:: model (
additive = decomposition_model (
STL (formula = value ~ season (window= "periodic" )),
ETS (formula = season_adjust ~ error ("A" ) + trend ("Ad" ) + season ("N" ))
),
multiplicative = decomposition_model (
dcmp = STL (formula = log (value) ~ season (window = "periodic" )),
ETS (formula = season_adjust)
)
)
mods
# A mable: 1 x 2
additive multiplicative
<model> <model>
1 <STL decomposition model> <STL decomposition model>
Rows: 1
Columns: 2
$ additive <model> [STL decomposition model]
$ multiplicative <model> [STL decomposition model]
p1 <- train |>
model (STL (value ~ season (window= "periodic" ))) |>
fabletools:: components () |> autoplot () + ggtitle ("Additive STL decomposition" )
p2 <- train |>
model (STL (log (value) ~ season (window= "periodic" ))) |>
components () |> autoplot () + ggtitle ("Multiplicative STL decomposition (log)" )
p1/ p2
Workflow
models <- train %>% model ( NAIVE = NAIVE (formula = value),
MEAN = MEAN (formula = value),
SNAIVE = SNAIVE (formula = value),
AVG = MEAN (formula = value),
WAVG = TSLM (formula = value ~ trend ())
)
fc <- models %>%
forecast (h = nrow (test)
)
fc %>% autoplot (train) +
labs ( title = "Five Benchmark Forecasts: Naive, Mean, SNaive, Average, Weighted" ,
y = "Value" ,
x = "Month"
) + facet_wrap (~ .model,
ncol = 2
)
preds <- forecast (object = mods,
new_data = test) |>
mutate (.mean = ifelse (test = .model== "multiplicative" ,
yes = exp (.mean),
no = .mean)
)
p3 <- autoplot (myts) +
autolayer (filter (preds, .model== "additive" ), colour= "blue" ) +
autolayer (filter (preds, .model== "multiplicative" ), colour= "red" ) +
ggtitle ("Additive (blue) vs Multiplicative (red) forecasts" )
Plot variable not specified, automatically selected `.vars = value`
Scale for fill_ramp is already present.
Adding another scale for fill_ramp, which will replace the existing scale.
p4 <- autoplot (myts |> filter_index ("2006 Nov" ~ .)) +
autolayer (filter (preds, .model== "additive" ), colour= "blue" ) +
autolayer (filter (preds, .model== "multiplicative" ), colour= "red" ) +
ggtitle ("Forecasts vs Actuals (2006+)" )
Plot variable not specified, automatically selected `.vars = value`
Scale for fill_ramp is already present.
Adding another scale for fill_ramp, which will replace the existing scale.
Accuracy Metrics
acc <- accuracy (object = fc,
data = test
) |>
select (.model, ME, MPE, RMSE, MAE, MAPE#, MASE, RMSSE
)
acc
# A tibble: 5 × 6
.model ME MPE RMSE MAE MAPE
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AVG 57.0 65.6 60.2 57.0 65.6
2 MEAN 57.0 65.6 60.2 57.0 65.6
3 NAIVE 25.3 25.5 31.9 27.3 30.1
4 SNAIVE 18.4 17.1 26.6 22.3 25.1
5 WAVG 50.0 56.8 53.5 50.0 56.8
kable (acc, caption = "Forecast Accuracy Metrics" ) |>
kable_styling (bootstrap_options = c ("striped" , "hover" , "condensed" ), full_width = FALSE )
Forecast Accuracy Metrics
AVG
57.01006
65.62180
60.21732
57.01006
65.62180
MEAN
57.01006
65.62180
60.21732
57.01006
65.62180
NAIVE
25.31358
25.53619
31.88661
27.30025
30.13719
SNAIVE
18.37642
17.06327
26.55340
22.30037
25.14820
WAVG
49.96245
56.82841
53.49147
49.96245
56.82841
Replicating the values
fc_snaive <- models %>%
select (SNAIVE) %>%
forecast (h = nrow (test))
Join Forecasts to Actuals
snaive_model <- train %>%
model (SNAIVE = SNAIVE (value))
fc_snaive <- snaive_model %>%
forecast (h = nrow (test))
compare <- fc_snaive %>%
left_join (test %>% rename (actual = value), by = "Month" ) %>%
rename (forecast = .mean) %>%
select (Month, actual, forecast)
actual <- compare$ actual
forecast <- compare$ forecast
ME <- mean (actual - forecast)
MPE <- mean ((actual - forecast) / actual) * 100
RMSE <- sqrt (mean ((actual - forecast)^ 2 ))
MAE <- mean (abs (actual - forecast))
MAPE <- mean (abs ((actual - forecast) / actual)) * 100
tibble:: tibble (
Model = "SNAIVE" ,
ME = ME,
MPE = MPE,
RMSE = RMSE,
MAE = MAE,
MAPE = MAPE
)
# A tibble: 1 × 6
Model ME MPE RMSE MAE MAPE
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 SNAIVE 18.4 17.1 26.6 22.3 25.1
Export
models <- train %>% model ( NAIVE = NAIVE (formula = value),
MEAN = MEAN (formula = value),
SNAIVE = SNAIVE (formula = value),
AVG = MEAN (formula = value),
WAVG = TSLM (formula = value ~ trend ())
)
fc_all <- models %>% fabletools:: forecast (h = nrow (test))
fc_wide <- fc_all %>%
as_tibble () %>%
select (Month, .model, .mean) %>%
pivot_wider (names_from = .model, values_from = .mean)
df_forecast_all <- test %>%
rename (actual = value) %>%
left_join (fc_wide, by = "Month" ) %>%
mutate (Month = as.Date (Month))
print (df_forecast_all)
# A tsibble: 81 x 7 [1D]
Month actual NAIVE MEAN SNAIVE AVG WAVG
<date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2006-11-01 59.4 58.9 27.2 58.3 27.2 32.8
2 2006-12-01 62.0 58.9 27.2 59.4 27.2 32.9
3 2007-01-01 54.6 58.9 27.2 65.5 27.2 32.9
4 2007-02-01 59.3 58.9 27.2 61.6 27.2 32.9
5 2007-03-01 60.6 58.9 27.2 62.9 27.2 33.0
6 2007-04-01 64.0 58.9 27.2 69.7 27.2 33.0
7 2007-05-01 63.5 58.9 27.2 70.9 27.2 33.0
8 2007-06-01 67.5 58.9 27.2 71.0 27.2 33.1
9 2007-07-01 74.2 58.9 27.2 74.4 27.2 33.1
10 2007-08-01 72.4 58.9 27.2 73.0 27.2 33.1
# ℹ 71 more rows
write_xlsx (df_forecast_all, "accuracy_metrics.xlsx" )
str (train)
tbl_ts [322 × 2] (S3: tbl_ts/tbl_df/tbl/data.frame)
$ Month: mth [1:322] 1980 Jan, 1980 Feb, 1980 Mar, 1980 Apr, 1980 May, 1980 Jun...
$ value: num [1:322] 32.5 37 38 39.5 39.5 39.5 39.5 38 36 36 ...
- attr(*, "key")= tibble [1 × 1] (S3: tbl_df/tbl/data.frame)
..$ .rows: list<int> [1:1]
.. ..$ : int [1:322] 1 2 3 4 5 6 7 8 9 10 ...
.. ..@ ptype: int(0)
- attr(*, "index")= chr "Month"
..- attr(*, "ordered")= logi TRUE
- attr(*, "index2")= chr "Month"
- attr(*, "interval")= interval [1:1] 1M
..@ .regular: logi TRUE
get_label <- function (x) {
lab <- attr (x, "label" , exact = TRUE )
if (is.null (lab)) NA_character_ else lab
}
labels_df <- data.frame (
variable = names (train),
label = sapply (train, get_label)
)
write_xlsx (train, "accuracy_metrics_raw_data.xlsx" )