library(fpp3) # tsibble + fable + feasts + ggplot2 tools for forecasting
library(fredr) # pull data from FRED using API key library(gt) \# nice tables
library(webshot2) # helps gt save tables as images (usually needed)
library(gt)HW2: Accuracy Metrics and Decomposition (FRED)
1 HW2: fpp3 pipeline + accuracy + decomposition
1.1 Data: FRED monthly series SPDYNLE00INUSA
2 Load libraries
3 Set your FRED API key (yours)
fredr_set_key("236e22b82f488006f4ae1706a07408e4")4 Pull MONTHLY data from FRED
SERIES_ID <- "CES6562000001" # US life expectancy at birth (World Bank via FRED)
START_DATE <- as.Date("1960-01-01")
raw <- fredr( series_id = SERIES_ID, observation_start = START_DATE )5 Convert to tsibble (fpp3 data structure)
myts <- raw |>
select(date, value) |>
mutate(Month = yearmonth(date)) |>
select(Month, value) |>
as_tsibble(index = Month)6 Quick plot of the series
p_series <- myts |>
autoplot(value) +
labs( title = paste("FRED:", SERIES_ID), subtitle = "Monthly data", y = "Value", x = NULL )
print(p_series)ggsave("01_series_plot.png", p_series, width = 9, height = 5, dpi = 200)7 Train/Test split
SPLIT_DATE <- yearmonth("2018 Jan")
train <- myts |> filter(Month < SPLIT_DATE)
test <- myts |> filter(Month >= SPLIT_DATE)
h <- nrow(test) # forecast horizon equals length of test set8 Fit models
8.1 MEAN = mean forecast (flat line at historical average)
8.2 NAIVE = random walk (last value)
8.3 SNAIVE = seasonal naive (last year same month)
8.4 AVG (drift) = random walk with drift (trend based on avg change)
fits <- train |>
model( MEAN = MEAN(value),
NAIVE = NAIVE(value),
SNAIVE = SNAIVE(value ~ lag("year")),
AVG = RW(value ~ drift()) )9 Forecast h steps ahead + prediction intervals
fc <- fits |> forecast(h = h)10 Plot forecasts with 80% and 95% intervals + show test data
p_fc <- fc |> autoplot(train, level = c(80, 95)) + autolayer(test, value, alpha = 0.7) + labs( title = paste("Forecasts with CI —", SERIES_ID), subtitle = paste("Train < ", SPLIT_DATE, " | Test ≥ ", SPLIT_DATE), y = "Value", x = NULL )
print(p_fc)ggsave("02_forecast_plot.png", p_fc, width = 10, height = 6, dpi = 200)p_fc <- fc |>
autoplot(train, level = c(80, 95)) +
autolayer(test, value, alpha = 0.7) +
facet_wrap(~ .model, scales = "free_y") +
labs(
title = paste("Forecasts with CI by Model —", SERIES_ID),
y = "Value",
x = NULL
)
print(p_fc)11 Accuracy metrics ( 5 required)
acc <- accuracy(fc, test) |> select(.model, ME, MPE, RMSE, MAE, MAPE) |> arrange(RMSE)
print(acc)# A tibble: 4 × 6
.model ME MPE RMSE MAE MAPE
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AVG -188. -1.04 689. 545. 2.62
2 NAIVE 1342. 6.05 1848. 1395. 6.33
3 SNAIVE 1503. 6.83 1964. 1541. 7.02
4 MEAN 6657. 31.4 6777. 6657. 31.4
12 Accuracy table (gt)
acc_gt <- acc |> gt() |> tab_header( title = md(paste0("Accuracy Metrics — **", SERIES_ID, "**")), subtitle = md(paste0("Test starts: **", SPLIT_DATE, "**")) ) |> fmt_number(columns = c(ME, MPE, RMSE, MAE, MAPE), decimals = 3)
acc# A tibble: 4 × 6
.model ME MPE RMSE MAE MAPE
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AVG -188. -1.04 689. 545. 2.62
2 NAIVE 1342. 6.05 1848. 1395. 6.33
3 SNAIVE 1503. 6.83 1964. 1541. 7.02
4 MEAN 6657. 31.4 6777. 6657. 31.4
13 Create an Excel-friendly table of Actuals + forecasts (point forecasts)
excel_tbl <- fc |> as_tibble() |> select(Month, .model, .mean) |> tidyr::pivot_wider(names_from = .model, values_from = .mean) |> left_join(test |> as_tibble(), by = "Month") |> rename(Actual = value)
print(excel_tbl)# A tibble: 96 × 6
Month MEAN NAIVE SNAIVE AVG Actual
<mth> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2018 Jan 14367. 19683. 19330. 19714. 19732.
2 2018 Feb 14367. 19683. 19371. 19746. 19785.
3 2018 Mar 14367. 19683. 19391. 19777. 19816.
4 2018 Apr 14367. 19683. 19443. 19809. 19839.
5 2018 May 14367. 19683. 19470. 19840. 19875.
6 2018 Jun 14367. 19683. 19521. 19872. 19892.
7 2018 Jul 14367. 19683. 19569. 19903. 19916.
8 2018 Aug 14367. 19683. 19589 19935. 19962.
9 2018 Sep 14367. 19683. 19603. 19966. 19990.
10 2018 Oct 14367. 19683. 19623. 19998. 20039.
# ℹ 86 more rows
write.csv(excel_tbl, "04_excel_table.csv", row.names = FALSE)14 Decomposition — Additive STL
fit_add <- train |> model(STL(value ~ season(window = "periodic")))
comp_add <- components(fit_add)
p_add <- autoplot(comp_add) + labs(title = paste("Additive STL decomposition —", SERIES_ID))
print(p_add) ggsave("05_decomposition_additive.png", p_add, width = 10, height = 7, dpi = 200)15 Decomposition — Multiplicative (log) STL
15.1 Multiplicative decomposition is usually done by decomposing log(value).
fit_mult <- train |> model(STL(log(value) ~ season(window = "periodic")))
comp_mult <- components(fit_mult)
p_mult <- autoplot(comp_mult) + labs(title = paste("Multiplicative (log) STL decomposition —", SERIES_ID))
print(p_mult) ggsave("06_decomposition_multiplicative.png", p_mult, width = 10, height = 7, dpi = 200)16 Quick Numeric Comparison: Remainder “size”
16.1 Smaller remainder SD often indicates a cleaner decomposition.
rem_compare <- tibble( method = c("Additive", "Multiplicative_log"), remainder_sd = c( sd(comp_add$remainder, na.rm = TRUE),
sd(comp_mult$remainder, na.rm = TRUE) ) )
print(rem_compare)# A tibble: 2 × 2
method remainder_sd
<chr> <dbl>
1 Additive 13.6
2 Multiplicative_log 0.000961