Module 2 Discussion

Author

Ryan Bean

Setup

Load Libraries

library(fpp3)
Registered S3 method overwritten by 'tsibble':
  method               from 
  as_tibble.grouped_df dplyr
── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
✔ tibble      3.3.0     ✔ tsibble     1.1.6
✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
✔ tidyr       1.3.1     ✔ feasts      0.4.2
✔ lubridate   1.9.4     ✔ fable       0.4.1
✔ ggplot2     4.0.0     
── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
✖ lubridate::date()    masks base::date()
✖ dplyr::filter()      masks stats::filter()
✖ tsibble::intersect() masks base::intersect()
✖ tsibble::interval()  masks lubridate::interval()
✖ dplyr::lag()         masks stats::lag()
✖ tsibble::setdiff()   masks base::setdiff()
✖ tsibble::union()     masks base::union()
library(fredr)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.1     ✔ readr   2.1.5
✔ purrr   1.1.0     ✔ stringr 1.5.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter()     masks stats::filter()
✖ tsibble::interval() masks lubridate::interval()
✖ dplyr::lag()        masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
library(knitr)
library(kableExtra)

Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows
library(writexl)

Import Data

remove(list=ls())

fredr_set_key("523a2b98a1ce120186357fd0c916cc26")

myts <- fredr(series_id = "OILPRICE",
              observation_start = as.Date("1980-01-01"),
              observation_end   = as.Date("2024-12-01")
              ) |>
  transmute(Month = yearmonth(date), value) |>
  as_tsibble(index = Month)

glimpse(myts)
Rows: 403
Columns: 2
$ Month <mth> 1980 Jan, 1980 Feb, 1980 Mar, 1980 Apr, 1980 May, 1980 Jun, 1980…
$ value <dbl> 32.50, 37.00, 38.00, 39.50, 39.50, 39.50, 39.50, 38.00, 36.00, 3…
str(myts)
tbl_ts [403 × 2] (S3: tbl_ts/tbl_df/tbl/data.frame)
 $ Month: mth [1:403] 1980 Jan, 1980 Feb, 1980 Mar, 1980 Apr, 1980 May, 1980 Jun...
 $ value: num [1:403] 32.5 37 38 39.5 39.5 39.5 39.5 38 36 36 ...
 - attr(*, "key")= tibble [1 × 1] (S3: tbl_df/tbl/data.frame)
  ..$ .rows: list<int> [1:1] 
  .. ..$ : int [1:403] 1 2 3 4 5 6 7 8 9 10 ...
  .. ..@ ptype: int(0) 
 - attr(*, "index")= chr "Month"
  ..- attr(*, "ordered")= logi TRUE
 - attr(*, "index2")= chr "Month"
 - attr(*, "interval")= interval [1:1] 1M
  ..@ .regular: logi TRUE

Split Data

split_index <- floor(0.8 * nrow(myts)) 

train <- myts[1:split_index, ]
test  <- myts[(split_index + 1):nrow(myts), ]
head(train, 1)
# A tsibble: 1 x 2 [1M]
     Month value
     <mth> <dbl>
1 1980 Jan  32.5
tail(x = train, n = 1)
# A tsibble: 1 x 2 [1M]
     Month value
     <mth> <dbl>
1 2006 Oct  58.9
head(x = test, n = 1) 
# A tsibble: 1 x 2 [1M]
     Month value
     <mth> <dbl>
1 2006 Nov  59.4
tail(test, 1)
# A tsibble: 1 x 2 [1M]
     Month value
     <mth> <dbl>
1 2013 Jul  105.

Train data contains monthly observations from January, 1980 - October, 2006 while the test data contains monthly observations from November, 2006 - July, 2013.

Forecasting

tsibble::has_gaps(myts)
# A tibble: 1 × 1
  .gaps
  <lgl>
1 FALSE
tsibble::interval(myts)
<interval[1]>
[1] 1M

Decomposition

mods <- train |>
  fabletools::model(
    additive = decomposition_model(
      STL(formula = value ~ season(window="periodic")),
      ETS(formula = season_adjust ~ error("A") + trend("Ad") + season("N")) 
    ),
    multiplicative = decomposition_model(
      dcmp = STL(formula = log(value) ~ season(window = "periodic")),
      ETS(formula = season_adjust)
    )
  )

mods
# A mable: 1 x 2
                   additive            multiplicative
                    <model>                   <model>
1 <STL decomposition model> <STL decomposition model>
glimpse(mods)
Rows: 1
Columns: 2
$ additive       <model> [STL decomposition model]
$ multiplicative <model> [STL decomposition model]
p1 <- train |>
  model(STL(value ~ season(window="periodic"))) |>
  fabletools::components() |> autoplot() + ggtitle("Additive STL decomposition")

p2 <- train |>
  model(STL(log(value) ~ season(window="periodic"))) |>
  components() |> autoplot() + ggtitle("Multiplicative STL decomposition (log)")
 
p1/p2


Workflow

models <- train %>% model( NAIVE  = NAIVE(formula = value), 
                           MEAN   = MEAN(formula = value), 
                           SNAIVE = SNAIVE(formula = value), 
                           AVG    = MEAN(formula = value), 
                           WAVG   = TSLM(formula = value ~ trend())
                           ) 
fc <- models %>%
  forecast(h = nrow(test)
           )

fc %>% autoplot(train) + 
  labs( title = "Five Benchmark Forecasts: Naive, Mean, SNaive, Average, Weighted",
        y = "Value",
        x = "Month" 
        ) + facet_wrap(~ .model,
                       ncol = 2
                       ) 

preds <- forecast(object = mods, 
                  new_data = test) |>
  mutate(.mean = ifelse(test = .model=="multiplicative", 
                        yes = exp(.mean),
                        no = .mean)
         )

p3 <- autoplot(myts) +
  autolayer(filter(preds, .model=="additive"), colour="blue") +
  autolayer(filter(preds, .model=="multiplicative"), colour="red") +
  ggtitle("Additive (blue) vs Multiplicative (red) forecasts")
Plot variable not specified, automatically selected `.vars = value`
Scale for fill_ramp is already present.
Adding another scale for fill_ramp, which will replace the existing scale.
p4 <- autoplot(myts |> filter_index("2006 Nov"~.)) +
  autolayer(filter(preds, .model=="additive"), colour="blue") +
  autolayer(filter(preds, .model=="multiplicative"), colour="red") +
  ggtitle("Forecasts vs Actuals (2006+)")
Plot variable not specified, automatically selected `.vars = value`
Scale for fill_ramp is already present.
Adding another scale for fill_ramp, which will replace the existing scale.
(p3 / p4)

Accuracy Metrics


acc <- accuracy(object = fc,  
                data = test   
                ) |>
  select(.model, ME, MPE, RMSE, MAE, MAPE#, MASE, RMSSE
         )


acc
# A tibble: 5 × 6
  .model    ME   MPE  RMSE   MAE  MAPE
  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 AVG     57.0  65.6  60.2  57.0  65.6
2 MEAN    57.0  65.6  60.2  57.0  65.6
3 NAIVE   25.3  25.5  31.9  27.3  30.1
4 SNAIVE  18.4  17.1  26.6  22.3  25.1
5 WAVG    50.0  56.8  53.5  50.0  56.8
kable(acc, caption = "Forecast Accuracy Metrics") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Forecast Accuracy Metrics
.model ME MPE RMSE MAE MAPE
AVG 57.01006 65.62180 60.21732 57.01006 65.62180
MEAN 57.01006 65.62180 60.21732 57.01006 65.62180
NAIVE 25.31358 25.53619 31.88661 27.30025 30.13719
SNAIVE 18.37642 17.06327 26.55340 22.30037 25.14820
WAVG 49.96245 56.82841 53.49147 49.96245 56.82841

Replicating the values

fc_snaive <- models %>%
  select(SNAIVE) %>%
  forecast(h = nrow(test))

Join Forecasts to Actuals

snaive_model <- train %>%
  model(SNAIVE = SNAIVE(value))

fc_snaive <- snaive_model %>%
  forecast(h = nrow(test))

compare <- fc_snaive %>%
  left_join(test %>% rename(actual = value), by = "Month") %>%
  rename(forecast = .mean) %>%
  select(Month, actual, forecast)

actual <- compare$actual
forecast <- compare$forecast

ME    <- mean(actual - forecast)                        
MPE   <- mean((actual - forecast) / actual) * 100       
RMSE  <- sqrt(mean((actual - forecast)^2))              
MAE   <- mean(abs(actual - forecast))                   
MAPE  <- mean(abs((actual - forecast) / actual)) * 100   

tibble::tibble(
  Model = "SNAIVE",
  ME = ME,
  MPE = MPE,
  RMSE = RMSE,
  MAE = MAE,
  MAPE = MAPE
)
# A tibble: 1 × 6
  Model     ME   MPE  RMSE   MAE  MAPE
  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 SNAIVE  18.4  17.1  26.6  22.3  25.1

Export

models <- train %>% model( NAIVE  = NAIVE(formula = value), 
                           MEAN   = MEAN(formula = value), 
                           SNAIVE = SNAIVE(formula = value), 
                           AVG    = MEAN(formula = value),  
                           WAVG   = TSLM(formula = value ~ trend()) 
                           ) 

fc_all <- models %>% fabletools::forecast(h = nrow(test))


fc_wide <- fc_all %>%
  as_tibble() %>%  
  select(Month, .model, .mean) %>%
  pivot_wider(names_from = .model, values_from = .mean)


df_forecast_all <- test %>%
  rename(actual = value) %>%
  left_join(fc_wide, by = "Month")  %>%
  mutate(Month = as.Date(Month))


print(df_forecast_all)
# A tsibble: 81 x 7 [1D]
   Month      actual NAIVE  MEAN SNAIVE   AVG  WAVG
   <date>      <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
 1 2006-11-01   59.4  58.9  27.2   58.3  27.2  32.8
 2 2006-12-01   62.0  58.9  27.2   59.4  27.2  32.9
 3 2007-01-01   54.6  58.9  27.2   65.5  27.2  32.9
 4 2007-02-01   59.3  58.9  27.2   61.6  27.2  32.9
 5 2007-03-01   60.6  58.9  27.2   62.9  27.2  33.0
 6 2007-04-01   64.0  58.9  27.2   69.7  27.2  33.0
 7 2007-05-01   63.5  58.9  27.2   70.9  27.2  33.0
 8 2007-06-01   67.5  58.9  27.2   71.0  27.2  33.1
 9 2007-07-01   74.2  58.9  27.2   74.4  27.2  33.1
10 2007-08-01   72.4  58.9  27.2   73.0  27.2  33.1
# ℹ 71 more rows
write_xlsx(df_forecast_all, "accuracy_metrics.xlsx")

str(train)
tbl_ts [322 × 2] (S3: tbl_ts/tbl_df/tbl/data.frame)
 $ Month: mth [1:322] 1980 Jan, 1980 Feb, 1980 Mar, 1980 Apr, 1980 May, 1980 Jun...
 $ value: num [1:322] 32.5 37 38 39.5 39.5 39.5 39.5 38 36 36 ...
 - attr(*, "key")= tibble [1 × 1] (S3: tbl_df/tbl/data.frame)
  ..$ .rows: list<int> [1:1] 
  .. ..$ : int [1:322] 1 2 3 4 5 6 7 8 9 10 ...
  .. ..@ ptype: int(0) 
 - attr(*, "index")= chr "Month"
  ..- attr(*, "ordered")= logi TRUE
 - attr(*, "index2")= chr "Month"
 - attr(*, "interval")= interval [1:1] 1M
  ..@ .regular: logi TRUE
get_label <- function(x) {
  lab <- attr(x, "label", exact = TRUE)
  if (is.null(lab)) NA_character_ else lab
}

labels_df <- data.frame(
  variable = names(train),
  label    = sapply(train, get_label)
)


write_xlsx(train, "accuracy_metrics_raw_data.xlsx")