HW2: Accuracy Metrics and Decomposition (FRED)

Author

John Guarini

1 HW2: fpp3 pipeline + accuracy + decomposition

1.1 Data: FRED monthly series SPDYNLE00INUSA

2 Load libraries

library(fpp3) # tsibble + fable + feasts + ggplot2 tools for forecasting 
library(fredr) # pull data from FRED using API key library(gt) \# nice tables 
library(webshot2) # helps gt save tables as images (usually needed)
library(gt)

3 Set your FRED API key (yours)

fredr_set_key("236e22b82f488006f4ae1706a07408e4")

4 Pull MONTHLY data from FRED

SERIES_ID <- "CES6562000001" # US life expectancy at birth (World Bank via FRED) 
START_DATE <- as.Date("1960-01-01")

raw <- fredr( series_id = SERIES_ID, observation_start = START_DATE )

5 Convert to tsibble (fpp3 data structure)

myts <- raw |> 
  select(date, value) |> 
  mutate(Month = yearmonth(date)) |>
  select(Month, value) |>
  as_tsibble(index = Month)

6 Quick plot of the series

p_series <- myts |>
  autoplot(value) + 
  labs( title = paste("FRED:", SERIES_ID), subtitle = "Monthly data", y = "Value", x = NULL )

print(p_series)

ggsave("01_series_plot.png", p_series, width = 9, height = 5, dpi = 200)

7 Train/Test split

SPLIT_DATE <- yearmonth("2018 Jan")

train <- myts |> filter(Month < SPLIT_DATE) 
test <- myts |> filter(Month >= SPLIT_DATE)

h <- nrow(test) # forecast horizon equals length of test set

8 Fit models

8.1 MEAN = mean forecast (flat line at historical average)

8.2 NAIVE = random walk (last value)

8.3 SNAIVE = seasonal naive (last year same month)

8.4 AVG (drift) = random walk with drift (trend based on avg change)

fits <- train |>
  model( MEAN = MEAN(value), 
         NAIVE = NAIVE(value), 
         SNAIVE = SNAIVE(value ~ lag("year")), 
         AVG = RW(value ~ drift()) )

9 Forecast h steps ahead + prediction intervals

fc <- fits |> forecast(h = h)

10 Plot forecasts with 80% and 95% intervals + show test data

p_fc <- fc |> autoplot(train, level = c(80, 95)) + autolayer(test, value, alpha = 0.7) + labs( title = paste("Forecasts with CI —", SERIES_ID), subtitle = paste("Train < ", SPLIT_DATE, " | Test ≥ ", SPLIT_DATE), y = "Value", x = NULL )

print(p_fc)

ggsave("02_forecast_plot.png", p_fc, width = 10, height = 6, dpi = 200)
p_fc <- fc |>
  autoplot(train, level = c(80, 95)) +
  autolayer(test, value, alpha = 0.7) +
  facet_wrap(~ .model, scales = "free_y") +
  labs(
    title = paste("Forecasts with CI by Model —", SERIES_ID),
    y = "Value",
    x = NULL
  )
print(p_fc)

11 Accuracy metrics ( 5 required)

acc <- accuracy(fc, test) |> select(.model, ME, MPE, RMSE, MAE, MAPE) |> arrange(RMSE)

print(acc)
# A tibble: 4 × 6
  .model    ME   MPE  RMSE   MAE  MAPE
  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 AVG    -188. -1.04  689.  545.  2.62
2 NAIVE  1342.  6.05 1848. 1395.  6.33
3 SNAIVE 1503.  6.83 1964. 1541.  7.02
4 MEAN   6657. 31.4  6777. 6657. 31.4 

12 Accuracy table (gt)

acc_gt <- acc |> gt() |> tab_header( title = md(paste0("Accuracy Metrics — **", SERIES_ID, "**")), subtitle = md(paste0("Test starts: **", SPLIT_DATE, "**")) ) |> fmt_number(columns = c(ME, MPE, RMSE, MAE, MAPE), decimals = 3)

acc
# A tibble: 4 × 6
  .model    ME   MPE  RMSE   MAE  MAPE
  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 AVG    -188. -1.04  689.  545.  2.62
2 NAIVE  1342.  6.05 1848. 1395.  6.33
3 SNAIVE 1503.  6.83 1964. 1541.  7.02
4 MEAN   6657. 31.4  6777. 6657. 31.4 

13 Create an Excel-friendly table of Actuals + forecasts (point forecasts)

excel_tbl <- fc |> as_tibble() |> select(Month, .model, .mean) |> tidyr::pivot_wider(names_from = .model, values_from = .mean) |> left_join(test |> as_tibble(), by = "Month") |> rename(Actual = value)

print(excel_tbl)
# A tibble: 96 × 6
      Month   MEAN  NAIVE SNAIVE    AVG Actual
      <mth>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 2018 Jan 14367. 19683. 19330. 19714. 19732.
 2 2018 Feb 14367. 19683. 19371. 19746. 19785.
 3 2018 Mar 14367. 19683. 19391. 19777. 19816.
 4 2018 Apr 14367. 19683. 19443. 19809. 19839.
 5 2018 May 14367. 19683. 19470. 19840. 19875.
 6 2018 Jun 14367. 19683. 19521. 19872. 19892.
 7 2018 Jul 14367. 19683. 19569. 19903. 19916.
 8 2018 Aug 14367. 19683. 19589  19935. 19962.
 9 2018 Sep 14367. 19683. 19603. 19966. 19990.
10 2018 Oct 14367. 19683. 19623. 19998. 20039.
# ℹ 86 more rows
write.csv(excel_tbl, "04_excel_table.csv", row.names = FALSE)

14 Decomposition — Additive STL

fit_add <- train |> model(STL(value ~ season(window = "periodic")))

comp_add <- components(fit_add)

p_add <- autoplot(comp_add) + labs(title = paste("Additive STL decomposition —", SERIES_ID))

print(p_add) 

ggsave("05_decomposition_additive.png", p_add, width = 10, height = 7, dpi = 200)

15 Decomposition — Multiplicative (log) STL

15.1 Multiplicative decomposition is usually done by decomposing log(value).

fit_mult <- train |> model(STL(log(value) ~ season(window = "periodic")))

comp_mult <- components(fit_mult)

p_mult <- autoplot(comp_mult) + labs(title = paste("Multiplicative (log) STL decomposition —", SERIES_ID))

print(p_mult) 

ggsave("06_decomposition_multiplicative.png", p_mult, width = 10, height = 7, dpi = 200)

16 Quick Numeric Comparison: Remainder “size”

16.1 Smaller remainder SD often indicates a cleaner decomposition.

rem_compare <- tibble( method = c("Additive", "Multiplicative_log"), remainder_sd = c( sd(comp_add$remainder, na.rm = TRUE),
    sd(comp_mult$remainder, na.rm = TRUE) ) )

print(rem_compare)
# A tibble: 2 × 2
  method             remainder_sd
  <chr>                     <dbl>
1 Additive              13.6     
2 Multiplicative_log     0.000961