Module 2 Discussion

Author

Ryan Bean

Setup

Load Libraries

library(fpp3)

Registered S3 method overwritten by 'tsibble':
  method               from 
  as_tibble.grouped_df dplyr

── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──

✔ tibble      3.3.0     ✔ tsibble     1.1.6
✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
✔ tidyr       1.3.1     ✔ feasts      0.4.2
✔ lubridate   1.9.4     ✔ fable       0.4.1
✔ ggplot2     4.0.0

── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
✖ lubridate::date()    masks base::date()
✖ dplyr::filter()      masks stats::filter()
✖ tsibble::intersect() masks base::intersect()
✖ tsibble::interval()  masks lubridate::interval()
✖ dplyr::lag()         masks stats::lag()
✖ tsibble::setdiff()   masks base::setdiff()
✖ tsibble::union()     masks base::union()

library(fredr)
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.1     ✔ readr   2.1.5
✔ purrr   1.1.0     ✔ stringr 1.5.1

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter()     masks stats::filter()
✖ tsibble::interval() masks lubridate::interval()
✖ dplyr::lag()        masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(patchwork)
library(knitr)
library(kableExtra)


Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows

library(writexl)

Import Data

remove(list=ls())

fredr_set_key("523a2b98a1ce120186357fd0c916cc26")

myts <- fredr(series_id = "OILPRICE",
              observation_start = as.Date("1980-01-01"),
              observation_end   = as.Date("2024-12-01")
              ) |>
  transmute(Month = yearmonth(date), value) |>
  as_tsibble(index = Month)

glimpse(myts)

Rows: 403
Columns: 2
$ Month <mth> 1980 Jan, 1980 Feb, 1980 Mar, 1980 Apr, 1980 May, 1980 Jun, 1980…
$ value <dbl> 32.50, 37.00, 38.00, 39.50, 39.50, 39.50, 39.50, 38.00, 36.00, 3…

str(myts)

tbl_ts [403 × 2] (S3: tbl_ts/tbl_df/tbl/data.frame)
 $ Month: mth [1:403] 1980 Jan, 1980 Feb, 1980 Mar, 1980 Apr, 1980 May, 1980 Jun...
 $ value: num [1:403] 32.5 37 38 39.5 39.5 39.5 39.5 38 36 36 ...
 - attr(*, "key")= tibble [1 × 1] (S3: tbl_df/tbl/data.frame)
  ..$ .rows: list<int> [1:1] 
  .. ..$ : int [1:403] 1 2 3 4 5 6 7 8 9 10 ...
  .. ..@ ptype: int(0) 
 - attr(*, "index")= chr "Month"
  ..- attr(*, "ordered")= logi TRUE
 - attr(*, "index2")= chr "Month"
 - attr(*, "interval")= interval [1:1] 1M
  ..@ .regular: logi TRUE

Split Data

split_index <- floor(0.8 * nrow(myts)) 

train <- myts[1:split_index, ]
test  <- myts[(split_index + 1):nrow(myts), ]

head(train, 1)

# A tsibble: 1 x 2 [1M]
     Month value
     <mth> <dbl>
1 1980 Jan  32.5

tail(x = train, n = 1)

# A tsibble: 1 x 2 [1M]
     Month value
     <mth> <dbl>
1 2006 Oct  58.9

head(x = test, n = 1)

# A tsibble: 1 x 2 [1M]
     Month value
     <mth> <dbl>
1 2006 Nov  59.4

tail(test, 1)

# A tsibble: 1 x 2 [1M]
     Month value
     <mth> <dbl>
1 2013 Jul  105.

Train data contains monthly observations from January, 1980 - October, 2006 while the test data contains monthly observations from November, 2006 - July, 2013.

Forecasting

tsibble::has_gaps(myts)

# A tibble: 1 × 1
  .gaps
  <lgl>
1 FALSE

tsibble::interval(myts)

<interval[1]>
[1] 1M

Decomposition

mods <- train |>
  fabletools::model(
    additive = decomposition_model(
      STL(formula = value ~ season(window="periodic")),
      ETS(formula = season_adjust ~ error("A") + trend("Ad") + season("N")) 
    ),
    multiplicative = decomposition_model(
      dcmp = STL(formula = log(value) ~ season(window = "periodic")),
      ETS(formula = season_adjust)
    )
  )

mods

# A mable: 1 x 2
                   additive            multiplicative
                    <model>                   <model>
1 <STL decomposition model> <STL decomposition model>

glimpse(mods)

Rows: 1
Columns: 2
$ additive       <model> [STL decomposition model]
$ multiplicative <model> [STL decomposition model]

p1 <- train |>
  model(STL(value ~ season(window="periodic"))) |>
  fabletools::components() |> autoplot() + ggtitle("Additive STL decomposition")

p2 <- train |>
  model(STL(log(value) ~ season(window="periodic"))) |>
  components() |> autoplot() + ggtitle("Multiplicative STL decomposition (log)")
 
p1/p2

Workflow

models <- train %>% model( NAIVE  = NAIVE(formula = value), 
                           MEAN   = MEAN(formula = value), 
                           SNAIVE = SNAIVE(formula = value), 
                           AVG    = MEAN(formula = value), 
                           WAVG   = TSLM(formula = value ~ trend())
                           ) 
fc <- models %>%
  forecast(h = nrow(test)
           )

fc %>% autoplot(train) + 
  labs( title = "Five Benchmark Forecasts: Naive, Mean, SNaive, Average, Weighted",
        y = "Value",
        x = "Month" 
        ) + facet_wrap(~ .model,
                       ncol = 2
                       )

preds <- forecast(object = mods, 
                  new_data = test) |>
  mutate(.mean = ifelse(test = .model=="multiplicative", 
                        yes = exp(.mean),
                        no = .mean)
         )

p3 <- autoplot(myts) +
  autolayer(filter(preds, .model=="additive"), colour="blue") +
  autolayer(filter(preds, .model=="multiplicative"), colour="red") +
  ggtitle("Additive (blue) vs Multiplicative (red) forecasts")

Plot variable not specified, automatically selected `.vars = value`
Scale for fill_ramp is already present.
Adding another scale for fill_ramp, which will replace the existing scale.

p4 <- autoplot(myts |> filter_index("2006 Nov"~.)) +
  autolayer(filter(preds, .model=="additive"), colour="blue") +
  autolayer(filter(preds, .model=="multiplicative"), colour="red") +
  ggtitle("Forecasts vs Actuals (2006+)")

Plot variable not specified, automatically selected `.vars = value`
Scale for fill_ramp is already present.
Adding another scale for fill_ramp, which will replace the existing scale.

(p3 / p4)

Accuracy Metrics

acc <- accuracy(object = fc,  
                data = test   
                ) |>
  select(.model, ME, MPE, RMSE, MAE, MAPE#, MASE, RMSSE
         )


acc

# A tibble: 5 × 6
  .model    ME   MPE  RMSE   MAE  MAPE
  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 AVG     57.0  65.6  60.2  57.0  65.6
2 MEAN    57.0  65.6  60.2  57.0  65.6
3 NAIVE   25.3  25.5  31.9  27.3  30.1
4 SNAIVE  18.4  17.1  26.6  22.3  25.1
5 WAVG    50.0  56.8  53.5  50.0  56.8

kable(acc, caption = "Forecast Accuracy Metrics") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Forecast Accuracy Metrics
.model	ME	MPE	RMSE	MAE	MAPE
AVG	57.01006	65.62180	60.21732	57.01006	65.62180
MEAN	57.01006	65.62180	60.21732	57.01006	65.62180
NAIVE	25.31358	25.53619	31.88661	27.30025	30.13719
SNAIVE	18.37642	17.06327	26.55340	22.30037	25.14820
WAVG	49.96245	56.82841	53.49147	49.96245	56.82841

Replicating the values

fc_snaive <- models %>%
  select(SNAIVE) %>%
  forecast(h = nrow(test))

Join Forecasts to Actuals

snaive_model <- train %>%
  model(SNAIVE = SNAIVE(value))

fc_snaive <- snaive_model %>%
  forecast(h = nrow(test))

compare <- fc_snaive %>%
  left_join(test %>% rename(actual = value), by = "Month") %>%
  rename(forecast = .mean) %>%
  select(Month, actual, forecast)

actual <- compare$actual
forecast <- compare$forecast

ME    <- mean(actual - forecast)                        
MPE   <- mean((actual - forecast) / actual) * 100       
RMSE  <- sqrt(mean((actual - forecast)^2))              
MAE   <- mean(abs(actual - forecast))                   
MAPE  <- mean(abs((actual - forecast) / actual)) * 100   

tibble::tibble(
  Model = "SNAIVE",
  ME = ME,
  MPE = MPE,
  RMSE = RMSE,
  MAE = MAE,
  MAPE = MAPE
)

# A tibble: 1 × 6
  Model     ME   MPE  RMSE   MAE  MAPE
  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 SNAIVE  18.4  17.1  26.6  22.3  25.1

Export

models <- train %>% model( NAIVE  = NAIVE(formula = value), 
                           MEAN   = MEAN(formula = value), 
                           SNAIVE = SNAIVE(formula = value), 
                           AVG    = MEAN(formula = value),  
                           WAVG   = TSLM(formula = value ~ trend()) 
                           ) 

fc_all <- models %>% fabletools::forecast(h = nrow(test))


fc_wide <- fc_all %>%
  as_tibble() %>%  
  select(Month, .model, .mean) %>%
  pivot_wider(names_from = .model, values_from = .mean)


df_forecast_all <- test %>%
  rename(actual = value) %>%
  left_join(fc_wide, by = "Month")  %>%
  mutate(Month = as.Date(Month))


print(df_forecast_all)

# A tsibble: 81 x 7 [1D]
   Month      actual NAIVE  MEAN SNAIVE   AVG  WAVG
   <date>      <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
 1 2006-11-01   59.4  58.9  27.2   58.3  27.2  32.8
 2 2006-12-01   62.0  58.9  27.2   59.4  27.2  32.9
 3 2007-01-01   54.6  58.9  27.2   65.5  27.2  32.9
 4 2007-02-01   59.3  58.9  27.2   61.6  27.2  32.9
 5 2007-03-01   60.6  58.9  27.2   62.9  27.2  33.0
 6 2007-04-01   64.0  58.9  27.2   69.7  27.2  33.0
 7 2007-05-01   63.5  58.9  27.2   70.9  27.2  33.0
 8 2007-06-01   67.5  58.9  27.2   71.0  27.2  33.1
 9 2007-07-01   74.2  58.9  27.2   74.4  27.2  33.1
10 2007-08-01   72.4  58.9  27.2   73.0  27.2  33.1
# ℹ 71 more rows

write_xlsx(df_forecast_all, "accuracy_metrics.xlsx")

str(train)

tbl_ts [322 × 2] (S3: tbl_ts/tbl_df/tbl/data.frame)
 $ Month: mth [1:322] 1980 Jan, 1980 Feb, 1980 Mar, 1980 Apr, 1980 May, 1980 Jun...
 $ value: num [1:322] 32.5 37 38 39.5 39.5 39.5 39.5 38 36 36 ...
 - attr(*, "key")= tibble [1 × 1] (S3: tbl_df/tbl/data.frame)
  ..$ .rows: list<int> [1:1] 
  .. ..$ : int [1:322] 1 2 3 4 5 6 7 8 9 10 ...
  .. ..@ ptype: int(0) 
 - attr(*, "index")= chr "Month"
  ..- attr(*, "ordered")= logi TRUE
 - attr(*, "index2")= chr "Month"
 - attr(*, "interval")= interval [1:1] 1M
  ..@ .regular: logi TRUE

get_label <- function(x) {
  lab <- attr(x, "label", exact = TRUE)
  if (is.null(lab)) NA_character_ else lab
}

labels_df <- data.frame(
  variable = names(train),
  label    = sapply(train, get_label)
)


write_xlsx(train, "accuracy_metrics_raw_data.xlsx")