library(tidyverse)
library(tsibble)
library(feasts)
library(fable)
library(fabletools)
library(forecast) # Box-Cox + ndiffs/nsdiffsHomework Assignment 1: Foundational Forecasting with Time Series Regression
0.1 Data source and citation
Monthly data from FRED (Federal Reserve Bank of St. Louis).
- Series: Retail Sales: Retail Trade and Food Services (MRTSSM44X72USN)
- Series page: https://fred.stlouisfed.org/series/MRTSSM44X72USN
- Direct CSV endpoint used below: https://fred.stlouisfed.org/graph/fredgraph.csv?id=MRTSSM44X72USN
Federal Reserve Bank of St. Louis; Retail Sales: Retail Trade and Food Services [MRTSSM44X72USN], retrieved from FRED, Federal Reserve Bank of St. Louis; (2/19/26).
0.2 Setup
0.2.1 Required libraries
# Optional libraries availability map (named logical vector)
opt_available <- c(
strucchange = requireNamespace("strucchange", quietly = TRUE),
tseries = requireNamespace("tseries", quietly = TRUE),
urca = requireNamespace("urca", quietly = TRUE),
forecast = requireNamespace("forecast", quietly = TRUE),
fable = requireNamespace("fable", quietly = TRUE),
feasts = requireNamespace("feasts", quietly = TRUE),
tsibble = requireNamespace("tsibble", quietly = TRUE),
ggplot2 = requireNamespace("ggplot2", quietly = TRUE)
)
# Safe helper (prevents errors if a name is missing)
has_pkg <- function(pkg) isTRUE(opt_available[[pkg]])0.3 Data
series_id <- "MRTSSM44X72USN"
fred_url <- paste0("https://fred.stlouisfed.org/graph/fredgraph.csv?id=", series_id)
raw0 <- readr::read_csv(fred_url, show_col_types = FALSE)
date_col <- names(raw0)[1]
value_col <- names(raw0)[2]
raw <- raw0 |>
rename(date = all_of(date_col), value = all_of(value_col)) |>
mutate(
date = as.Date(date),
Month = yearmonth(date),
value = as.numeric(value)
) |>
select(Month, value) |>
as_tsibble(index = Month) |>
fill_gaps()
raw# A tsibble: 407 x 2 [1M]
Month value
<mth> <dbl>
1 1992 Jan 142051
2 1992 Feb 142498
3 1992 Mar 154439
4 1992 Apr 158430
5 1992 May 164788
6 1992 Jun 163417
7 1992 Jul 164575
8 1992 Aug 165387
9 1992 Sep 159892
10 1992 Oct 168529
# ℹ 397 more rows
# Sanity checks
n_total <- nrow(raw)
raw |> summarise(start = min(Month), end = max(Month), n = n())# A tsibble: 407 x 4 [1M]
Month start end n
<mth> <mth> <mth> <int>
1 1992 Jan 1992 Jan 1992 Jan 1
2 1992 Feb 1992 Feb 1992 Feb 1
3 1992 Mar 1992 Mar 1992 Mar 1
4 1992 Apr 1992 Apr 1992 Apr 1
5 1992 May 1992 May 1992 May 1
6 1992 Jun 1992 Jun 1992 Jun 1
7 1992 Jul 1992 Jul 1992 Jul 1
8 1992 Aug 1992 Aug 1992 Aug 1
9 1992 Sep 1992 Sep 1992 Sep 1
10 1992 Oct 1992 Oct 1992 Oct 1
# ℹ 397 more rows
if (n_total < 100) stop("Series has fewer than 100 observations; pick another monthly series.")
if (anyNA(raw$value)) stop("Missing values detected after fill_gaps(). Handle missingness (impute or drop) before modeling.")
if (any(raw$value <= 0)) warning("Non-positive values detected; Box-Cox/log transforms may not be valid.")0.4 Train/test split (80/20)
\[ T = \lvert \mathcal{D} \rvert \]
\[ T_0 = \left\lfloor 0.8\,T \right\rfloor \]
\[ \mathcal{D}_{\text{train}} = \{\, y_t \mid 1 \le t \le T_0 \,\} \]
\[ \mathcal{D}_{\text{test}} = \{\, y_t \mid T_0 < t \le T \,\} \]
\[ \hat{y}_{t \mid T_0}, \quad t > T_0 \]
train_n <- floor(0.8 * n_total)
train <- raw |> slice(1:train_n)
test <- raw |> slice((train_n + 1):n_total)
train |> summarise(start = min(Month), end = max(Month), n = n())# A tsibble: 325 x 4 [1M]
Month start end n
<mth> <mth> <mth> <int>
1 1992 Jan 1992 Jan 1992 Jan 1
2 1992 Feb 1992 Feb 1992 Feb 1
3 1992 Mar 1992 Mar 1992 Mar 1
4 1992 Apr 1992 Apr 1992 Apr 1
5 1992 May 1992 May 1992 May 1
6 1992 Jun 1992 Jun 1992 Jun 1
7 1992 Jul 1992 Jul 1992 Jul 1
8 1992 Aug 1992 Aug 1992 Aug 1
9 1992 Sep 1992 Sep 1992 Sep 1
10 1992 Oct 1992 Oct 1992 Oct 1
# ℹ 315 more rows
test |> summarise(start = min(Month), end = max(Month), n = n())# A tsibble: 82 x 4 [1M]
Month start end n
<mth> <mth> <mth> <int>
1 2019 Feb 2019 Feb 2019 Feb 1
2 2019 Mar 2019 Mar 2019 Mar 1
3 2019 Apr 2019 Apr 2019 Apr 1
4 2019 May 2019 May 2019 May 1
5 2019 Jun 2019 Jun 2019 Jun 1
6 2019 Jul 2019 Jul 2019 Jul 1
7 2019 Aug 2019 Aug 2019 Aug 1
8 2019 Sep 2019 Sep 2019 Sep 1
9 2019 Oct 2019 Oct 2019 Oct 1
10 2019 Nov 2019 Nov 2019 Nov 1
# ℹ 72 more rows
# Plot with train/test clearly labeled
bind_rows(
train |> mutate(Set = "Train"),
test |> mutate(Set = "Test")
) |>
ggplot(aes(x = Month, y = value, linetype = Set)) +
geom_line() +
labs(
title = "Monthly series with train/test split",
x = NULL,
y = "Value"
)0.5 Explore the training set (graphs for trend, seasonality, structural breaks)
0.5.1 Trend (training only)
train_trend <- train |>
as_tibble() |>
mutate(
date = as.Date(Month),
ma12 = as.numeric(stats::filter(value, rep(1/12, 12), sides = 2))
)
ggplot(train_trend, aes(x = date, y = value)) +
geom_line(alpha = 0.6) +
geom_line(aes(y = ma12), linewidth = 1.0, na.rm = TRUE) +
labs(
title = "Training set: level + 12-month moving average (trend proxy)",
subtitle = "Thin = raw training series; thick = centered 12-month moving average",
x = NULL,
y = "Value"
)0.5.2 Seasonality (training only)
train |>
gg_season(value) +
labs(title = "Training set: seasonal plot")train |>
gg_subseries(value) +
labs(title = "Training set: seasonal subseries plot")0.5.3 Structural breaks (training-only candidate detection + graph)
Screening approach: flag months where the 1-step change is unusually large relative to typical changes in the training period. The structural breaks here if they where part of the test set would need a judgement adjustment due to a temporal causality.
train_breaks <- train |>
mutate(
d1 = difference(value),
d1_z = as.numeric(scale(d1)),
break_flag = abs(d1_z) >= 3
)
train_breaks |> filter(break_flag) |> select(Month, value, d1, d1_z)# A tsibble: 5 x 4 [1M]
Month value d1 d1_z
<mth> <dbl> <dbl> <dbl>
1 2015 Jan 391738 -98749 -3.07
2 2016 Jan 395277 -110874 -3.45
3 2017 Jan 415443 -111347 -3.46
4 2018 Jan 437073 -107824 -3.35
5 2019 Jan 447787 -98348 -3.06
train_breaks_plot <- train_breaks |>
as_tibble() |>
mutate(date = as.Date(Month))
ggplot(train_breaks_plot, aes(x = date, y = value)) +
geom_line(alpha = 0.6) +
geom_point(
data = train_breaks_plot |> filter(break_flag),
aes(x = date, y = value),
size = 2
) +
labs(
title = "Training set: candidate structural breaks (flagged by large 1-step change)",
subtitle = "Points mark months where |z-score of 1-step change| ≥ 3 (training only)",
x = NULL,
y = "Value"
)0.6 Decompose the training set (classical + STL)
0.6.1 Classical decomposition (additive)
train_dc_add <- train |>
model(classical_add = classical_decomposition(value, type = "additive"))
components(train_dc_add)# A dable: 325 x 7 [1M]
# Key: .model [1]
# : value = trend + seasonal + random
.model Month value trend seasonal random season_adjust
<chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
1 classical_add 1992 Jan 142051 NA -31985. NA 174036.
2 classical_add 1992 Feb 142498 NA -34101. NA 176599.
3 classical_add 1992 Mar 154439 NA 3875. NA 150564.
4 classical_add 1992 Apr 158430 NA -3539. NA 161969.
5 classical_add 1992 May 164788 NA 14146. NA 150642.
6 classical_add 1992 Jun 163417 NA 4530. NA 158887.
7 classical_add 1992 Jul 164575 163171. 3889. -2484. 160686.
8 classical_add 1992 Aug 165387 163592. 10201. -8406. 155186.
9 classical_add 1992 Sep 159892 164182. -11737. 7448. 171629.
10 classical_add 1992 Oct 168529 165168. -3656. 7017. 172185.
# ℹ 315 more rows
components(train_dc_add) |>
autoplot() +
labs(title = "Classical decomposition (additive) — training only")Classical decomposition under additive and multiplicative assumptions produces essentially the same interpretation; the residual (‘random’) component dominates any visible differences. The only noticeable distinction is marginal—small changes in the residual structure (e.g., slight differences in lag/ACF behavior relative to the stationarity bands). Because these differences do not change the substantive conclusion, I retain the additive classical decomposition and omit the multiplicative.
0.6.2 Interpret + compare decomposition techniques (quant + overlay)
# Ensure the STL object exists (robust STL on training only)
train_stl_rob <- train |>
model(stl_rob = STL(value, robust = TRUE))# STL-based feature summaries (strength of trend/season, etc.)
train |>
features(value, feat_stl) |>
select(starts_with("trend_"), starts_with("season_"), starts_with("remainder_"))# A tibble: 1 × 1
trend_strength
<dbl>
1 0.998
# Overlay: trend estimates from classical-additive vs STL-robust (training only)
# Convert to plain tibbles BEFORE dplyr verbs so bind_rows() does not try to reconstruct a tsibble.
trend_classical <- components(train_dc_add) |>
tibble::as_tibble() |>
dplyr::select(Month, trend) |>
dplyr::mutate(method = "Classical (additive)")
trend_stl <- components(train_stl_rob) |>
tibble::as_tibble() |>
dplyr::select(Month, trend) |>
dplyr::mutate(method = "STL (robust)")
dplyr::bind_rows(trend_classical, trend_stl) |>
ggplot2::ggplot(ggplot2::aes(x = Month, y = trend, linetype = method)) +
ggplot2::geom_line(linewidth = 0.9) +
ggplot2::labs(
title = "Trend component comparison (training only)",
x = NULL,
y = "Trend"
)The training series exhibits a strong upward trend and stable monthly seasonality. Seasonal swings increase as the level rises, suggesting multiplicative behavior in the raw series. The STL trend component also shows a noticeable slowdown/dip around the 2008–2009 period, consistent with a macroeconomic shock affecting the underlying trend. Because variability grows with the level, a variance-stabilizing transformation is appropriate before regression-style modeling.
0.7 Transformation diagnostics (training only)
0.7.1 Variance stability + Box–Cox
# Box–Cox lambda estimate on training only (Guerrero method)
lambda_bc <- forecast::BoxCox.lambda(train$value, method = "guerrero")
lambda_bc[1] 0.2278336
Guerrero’s method estimates λ ≈ 0.228 on the training set. Because 0 < λ < 1, the Box–Cox transform compresses larger values more than smaller values, reducing level-dependent variability (heteroskedasticity). This prevents high-level periods from dominating the fit and makes the residual variance closer to constant, improving compatibility with regression-style assumptions.
train_tx <- train |>
mutate(
y_raw = value,
y_log = log(value),
y_bc = forecast::BoxCox(value, lambda_bc)
)
train_tx |>
pivot_longer(cols = c(y_raw, y_log, y_bc), names_to = "transform", values_to = "y") |>
ggplot(aes(x = Month, y = y)) +
geom_line() +
facet_wrap(~ transform, scales = "free_y", ncol = 1) +
labs(
title = "Training set: raw vs log vs Box–Cox",
x = NULL,
y = NULL
)0.7.2 Differencing guidance + diagnostics (training only)
# Suggested differencing orders (nsdiffs requires a seasonal ts object)
train_ts <- ts(train$value, frequency = 12)
nd <- forecast::ndiffs(train_ts)
nsd <- forecast::nsdiffs(train_ts)
c(ndiffs = nd, nsdiffs = nsd) ndiffs nsdiffs
1 1
# ACF/PACF displays for raw + differenced variants (training only)
train |>
gg_tsdisplay(value, plot_type = "partial") +
labs(title = "Training set: raw series diagnostics")train |>
mutate(d1 = difference(value, differences = 1)) |>
gg_tsdisplay(d1, plot_type = "partial") +
labs(title = "Training set: 1st difference diagnostics")train |>
mutate(d12 = difference(value, lag = 12, differences = 1)) |>
gg_tsdisplay(d12, plot_type = "partial") +
labs(title = "Training set: seasonal difference (lag 12) diagnostics")0.7.3 Stationarity tests (training only; run all available)
Null hypotheses (typical):
- ADF: null = unit root (non-stationary). Small p-value ⇒ reject null ⇒ evidence of stationarity.
- KPSS: null = stationary. Small p-value ⇒ reject null ⇒ evidence of non-stationarity.
- PP: null = unit root (non-stationary).
# Include Month explicitly so pivot_longer can exclude it cleanly
series_for_tests <- train_tx |>
transmute(
Month,
raw = y_raw,
log = y_log,
bc = y_bc,
d1 = difference(y_raw, differences = 1),
d12 = difference(y_raw, lag = 12, differences = 1)
)
run_tseries_tests <- function(x) {
x <- na.omit(as.numeric(x))
if (!requireNamespace("tseries", quietly = TRUE)) {
return(tibble(
test = NA_character_,
null = NA_character_,
statistic = NA_real_,
p_value = NA_real_,
note = "Package 'tseries' not installed; install.packages('tseries') to run ADF/KPSS/PP."
))
}
adf <- tseries::adf.test(x)
kpss <- tseries::kpss.test(x)
out <- tibble(
test = c("ADF", "KPSS"),
null = c("unit root", "stationary"),
statistic = c(unname(adf$statistic), unname(kpss$statistic)),
p_value = c(adf$p.value, kpss$p.value)
)
# PP (if available)
if ("pp.test" %in% getNamespaceExports("tseries")) {
pp <- tseries::pp.test(x)
out <- bind_rows(
out,
tibble(test = "PP", null = "unit root", statistic = unname(pp$statistic), p_value = pp$p.value)
)
}
out
}
# Force tibble method (avoid tsibble pivot_longer dispatch)
stationarity_results <- series_for_tests |>
tibble::as_tibble() |>
tidyr::pivot_longer(cols = -Month, names_to = "variant", values_to = "x") |>
dplyr::group_by(variant) |>
dplyr::group_modify(~ run_tseries_tests(.x$x)) |>
dplyr::ungroup() |>
dplyr::mutate(
decision = dplyr::case_when(
test %in% c("ADF", "PP") & !is.na(p_value) ~ if_else(p_value < 0.05, "reject null", "fail to reject"),
test == "KPSS" & !is.na(p_value) ~ if_else(p_value < 0.05, "reject null", "fail to reject"),
TRUE ~ NA_character_
)
)
stationarity_results# A tibble: 15 × 6
variant test null statistic p_value decision
<chr> <chr> <chr> <dbl> <dbl> <chr>
1 bc ADF unit root -2.75 0.262 fail to reject
2 bc KPSS stationary 5.32 0.01 reject null
3 bc PP unit root -271. 0.01 reject null
4 d1 ADF unit root -13.0 0.01 reject null
5 d1 KPSS stationary 0.0203 0.1 fail to reject
6 d1 PP unit root -347. 0.01 reject null
7 d12 ADF unit root -4.18 0.01 reject null
8 d12 KPSS stationary 0.154 0.1 fail to reject
9 d12 PP unit root -59.9 0.01 reject null
10 log ADF unit root -2.68 0.290 fail to reject
11 log KPSS stationary 5.28 0.01 reject null
12 log PP unit root -238. 0.01 reject null
13 raw ADF unit root -2.58 0.332 fail to reject
14 raw KPSS stationary 5.35 0.01 reject null
15 raw PP unit root -317. 0.01 reject null
# Optional: urca tests (only if installed)
# Fix: has_pkg() currently indexes opt_available[[pkg]] and errors if pkg not in the vector.
has_pkg <- function(pkg) isTRUE(opt_available[pkg])
if (has_pkg("urca")) {
x <- na.omit(as.numeric(train$value))
ur_adf <- urca::ur.df(x, type = "trend", selectlags = "AIC")
ur_kpss <- urca::ur.kpss(x, type = "tau")
summary(ur_adf)
summary(ur_kpss)
} else {
cat("urca not installed; skipping ur.df / ur.kpss.\n")
} Length Class Mode
1 ur.kpss S4
0.8 Model fitting (training only) — focused model set
Primary modeling target is Box–Cox transformed y (lambda estimated on training only).
To keep the argument tight (baseline vs regression), this notebook centers on:
- Baseline: seasonal naïve (
SNAIVE) - Regression: time series linear regression with trend + seasonal dummies (
TSLM) - Optional comparator: ETS (
ETS) to show a standard exponential-smoothing alternative (not required by the prompt)
lambda <- lambda_bc
train_m <- train |>
mutate(y = forecast::BoxCox(value, lambda))
test_m <- test |>
mutate(y = forecast::BoxCox(value, lambda))
h <- nrow(test_m)
# Focused model set (training only)
fits <- train_m |>
model(
snaive = SNAIVE(y),
tslm_trend_season = TSLM(y ~ trend() + season()),
ets = ETS(y)
)
fits# A mable: 1 x 3
snaive tslm_trend_season ets
<model> <model> <model>
1 <SNAIVE> <TSLM> <ETS(M,A,A)>
0.9 Model summaries + regression coefficients
# Compact summaries for the focused set
fabletools::report(fits)# A tibble: 3 × 18
.model sigma2 r_squared adj_r_squared statistic p_value df log_lik
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl>
1 snaive 4.58e-1 NA NA NA NA NA NA
2 tslm_trend… 9.06e-1 0.973 0.972 923. 1.32e-235 13 -439.
3 ets 2.18e-5 NA NA NA NA NA -584.
# ℹ 10 more variables: AIC <dbl>, AICc <dbl>, BIC <dbl>, CV <dbl>,
# deviance <dbl>, df.residual <int>, rank <int>, MSE <dbl>, AMSE <dbl>,
# MAE <dbl>
# Regression coefficients (trend + seasonal dummies)
fit_tslm <- fits |> select(tslm_trend_season)
fabletools::tidy(fit_tslm)# A tibble: 13 × 6
.model term estimate std.error statistic p.value
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 tslm_trend_season (Intercept) 62.4 0.202 309. 0
2 tslm_trend_season trend() 0.0577 0.000563 103. 2.49e-242
3 tslm_trend_season season()year2 -0.0982 0.257 -0.382 7.02e- 1
4 tslm_trend_season season()year3 2.06 0.257 8.03 1.97e- 14
5 tslm_trend_season season()year4 1.71 0.257 6.68 1.11e- 10
6 tslm_trend_season season()year5 2.65 0.257 10.3 1.14e- 21
7 tslm_trend_season season()year6 2.18 0.257 8.49 8.76e- 16
8 tslm_trend_season season()year7 2.14 0.257 8.32 2.73e- 15
9 tslm_trend_season season()year8 2.50 0.257 9.73 1.03e- 19
10 tslm_trend_season season()year9 1.25 0.257 4.88 1.67e- 6
11 tslm_trend_season season()year10 1.76 0.257 6.86 3.78e- 11
12 tslm_trend_season season()year11 1.94 0.257 7.55 4.88e- 13
13 tslm_trend_season season()year12 4.57 0.257 17.8 1.91e- 49
The trend coefficient is positive and highly significant, indicating sustained growth over time (on the transformed scale). Seasonal dummy coefficients capture systematic month-of-year effects relative to the omitted reference month, with December showing the largest positive deviation, consistent with holiday-driven demand. Because the model is fit on the Box–Cox transformed response, coefficients describe changes on the transformed scale rather than raw dollars.
1 Forecast the test period (models fit on training only)
1.0.1 Forecast + back-transform to original scale
# Hard guardrails
if (!exists("train_m")) stop("train_m not found (Box–Cox training data).")
if (!exists("test")) stop("test not found.")
if (!exists("train")) stop("train not found.")
if (!exists("lambda")) stop("lambda not found.")
if (!exists("h")) h <- nrow(test)
# Fit a minimal-but-sufficient model set here (fast + meets rubric)
fits <- train_m |>
model(
snaive = SNAIVE(y),
tslm_trend_season = TSLM(y ~ trend() + season()),
ets = ETS(y)
)
fc <- fabletools::forecast(fits, h = h)
# ---- back-transform mean forecasts ----
fc_tbl <- fc |> tibble::as_tibble()
idx_name <- if (".index" %in% names(fc_tbl)) ".index" else if ("Month" %in% names(fc_tbl)) "Month" else names(fc_tbl)[1]
fc_mean_bt <- fc_tbl |>
dplyr::rename(Month = dplyr::all_of(idx_name)) |>
dplyr::transmute(
Month,
.model,
mean_value = forecast::InvBoxCox(.mean, lambda)
)
# ---- back-transform 80% and 95% intervals ----
fc_int_tbl <- fc |>
fabletools::hilo(level = c(80, 95)) |>
tibble::as_tibble()
idx_name2 <- if (".index" %in% names(fc_int_tbl)) ".index" else if ("Month" %in% names(fc_int_tbl)) "Month" else names(fc_int_tbl)[1]
fc_int <- fc_int_tbl |>
dplyr::rename(Month = dplyr::all_of(idx_name2)) |>
dplyr::transmute(
Month,
.model,
lo80_y = purrr::map_dbl(`80%`, ~ .x$lower),
hi80_y = purrr::map_dbl(`80%`, ~ .x$upper),
lo95_y = purrr::map_dbl(`95%`, ~ .x$lower),
hi95_y = purrr::map_dbl(`95%`, ~ .x$upper)
) |>
dplyr::mutate(
lo80 = forecast::InvBoxCox(lo80_y, lambda),
hi80 = forecast::InvBoxCox(hi80_y, lambda),
lo95 = forecast::InvBoxCox(lo95_y, lambda),
hi95 = forecast::InvBoxCox(hi95_y, lambda)
)
train_df <- train |> tibble::as_tibble()
test_df <- test |> tibble::as_tibble()
plot_data <- test_df |>
dplyr::select(Month, actual = value) |>
dplyr::left_join(fc_mean_bt, by = "Month") |>
dplyr::left_join(fc_int, by = c("Month", ".model"))
ggplot2::ggplot() +
ggplot2::geom_line(data = train_df, ggplot2::aes(Month, value), alpha = 0.5) +
ggplot2::geom_line(data = test_df, ggplot2::aes(Month, value), linetype = "dashed") +
ggplot2::geom_ribbon(data = plot_data, ggplot2::aes(Month, ymin = lo95, ymax = hi95), alpha = 0.15) +
ggplot2::geom_ribbon(data = plot_data, ggplot2::aes(Month, ymin = lo80, ymax = hi80), alpha = 0.25) +
ggplot2::geom_line(data = plot_data, ggplot2::aes(Month, y = mean_value), linewidth = 0.8) +
ggplot2::facet_wrap(~ .model, scales = "free_y") +
ggplot2::labs(
title = "Forecasts vs actuals (test period) — one panel per model",
subtitle = "Solid = forecast mean; shaded = 80% and 95% intervals (back-transformed); dashed = actual test",
x = NULL, y = "Value"
)# 9.2 Test-set accuracy (original scale)
# Build a long table: actuals + predictions per model
acc_df <- plot_data |>
dplyr::select(Month, .model, actual, mean_value) |>
dplyr::rename(.pred = mean_value)
# Helper metrics (no extra packages)
rmse <- function(a, f) sqrt(mean((a - f)^2, na.rm = TRUE))
mae <- function(a, f) mean(abs(a - f), na.rm = TRUE)
mape <- function(a, f) mean(abs((a - f) / a), na.rm = TRUE) * 100
acc_tbl <- acc_df |>
dplyr::group_by(.model) |>
dplyr::summarise(
RMSE = rmse(actual, .pred),
MAE = mae(actual, .pred),
MAPE = mape(actual, .pred),
.groups = "drop"
) |>
dplyr::arrange(MAPE)
acc_tbl# A tibble: 3 × 4
.model RMSE MAE MAPE
<chr> <dbl> <dbl> <dbl>
1 tslm_trend_season 74029. 64251. 9.82
2 ets 77185. 66974. 10.2
3 snaive 158086. 137204. 20.6
On the test set, tslm_trend_season achieves the lowest RMSE, MAE, and MAPE, indicating the best out-of-sample performance. ETS is a close second, while seasonal naïve performs substantially worse. Relative to the baseline, the regression roughly halves MAPE (20.6% → 9.82%), showing that explicitly modeling trend and month-of-year effects materially improves forecast accuracy.
1.1 Scenario-based judgmental adjustments
Scenario-based judgmental adjustments
The statistical models are trained only through January 2019 and therefore cannot anticipate shocks occurring after the training window. The test period includes the COVID-19 pandemic, which represents an exogenous structural break not encoded in the historical pattern. A purely data-driven forecast would extrapolate pre-2019 trend and seasonality and therefore miss the abrupt contraction.
To incorporate domain knowledge, I consider three structured scenarios:
Baseline case (model-driven): Forecasts are taken directly from the fitted regression/ETS models without adjustment. This assumes no extraordinary shock and that pre-2019 dynamics continue. This represents the disciplined statistical benchmark.
Pessimistic case (negative demand shock): Beginning early 2020, forecasts are shifted downward to reflect mandated shutdowns, supply-chain disruption, and consumer contraction. Prediction intervals are widened to reflect elevated volatility and structural uncertainty. This adjustment acknowledges a temporary but severe level shock inconsistent with historical seasonal fluctuations.
Optimistic case (rapid rebound): After the initial contraction, forecasts are adjusted upward relative to baseline to reflect stimulus effects, reopening momentum, and pent-up demand. This scenario assumes a faster reversion toward trend and potentially overshooting in high-consumption months.
These scenarios illustrate the role of judgment in forecasting practice. Statistical models provide internally consistent baselines grounded in historical structure. Scenario-based forecasting overlays qualitative information about policy changes, macroeconomic shocks, and behavioral shifts that lie outside the training distribution. Judgment is therefore not a replacement for modeling, but a structured extension of it when external conditions materially alter the data-generating process.
1.2 Interpretive summary
- Modeling choices: I used a seasonal naïve benchmark as a baseline and a time-series regression with trend + seasonal dummies as the primary model (with ETS as an optional comparator).
- Key diagnostics: training plots and decomposition show strong trend and stable monthly seasonality; seasonal amplitude increases with level, motivating Box–Cox variance stabilization (λ ≈ 0.228).
- Accuracy: the trend+season regression outperforms seasonal naïve on the test set (lower RMSE/MAE/MAPE), indicating meaningful value added from modeling structure explicitly.
- Judgment: scenario-based adjustments are appropriate when an external shock occurs outside the training window (e.g., COVID in the test period), where qualitative information is necessary to modify baseline forecasts.