set.seed(123)
build_modulated_tbl <- function(s, m) {
idx_name <- tsibble::index_var(s) # e.g., "Month" or "Quarter"
fit <- s |> model(stl = STL(value))
comps <- fit |> components()
n <- nrow(comps)
tr <- comps$trend
re <- comps$remainder
# Seasonal component may be named differently (e.g., "season_year")
season_cols <- grep("^season", names(comps), value = TRUE)
if (length(season_cols) == 0) {
se <- rep(0, n)
} else if (length(season_cols) == 1) {
se <- comps[[season_cols[1]]]
} else {
se <- Reduce(`+`, lapply(season_cols, function(cc) comps[[cc]]))
}
# Noise scale from remainder
noise_sd <- sd(re, na.rm = TRUE)
if (!is.finite(noise_sd) || noise_sd == 0) noise_sd <- sd(comps$value, na.rm = TRUE)
eps <- rnorm(n, 0, noise_sd)
# Estimate AR(1) phi from remainder (fallback if it fails)
phi <- 0.5
ar_sd <- noise_sd
re_clean <- re[is.finite(re)]
if (length(re_clean) > 20) {
ar_fit <- try(stats::arima(re_clean, order = c(1,0,0)), silent = TRUE)
if (!inherits(ar_fit, "try-error")) {
phi_hat <- as.numeric(ar_fit$coef[1])
if (is.finite(phi_hat)) phi <- max(min(phi_hat, 0.99), -0.99)
sigma_hat <- sqrt(ar_fit$sigma2)
if (is.finite(sigma_hat) && sigma_hat > 0) ar_sd <- sigma_hat
}
}
ar_series <- as.numeric(stats::arima.sim(n = n, list(ar = phi), sd = ar_sd))
wide_tbl <- tibble(
idx = comps[[idx_name]],
`Trend only` = tr + eps,
`Season only` = se + eps,
`Trend + season` = tr + se + eps,
`AR(1) only (stationary)` = ar_series
) |>
rename(!!idx_name := idx)
wide_tbl |>
pivot_longer(-all_of(idx_name), names_to = "Series", values_to = "value") |>
as_tsibble(index = !!idx_name, key = Series)
}
for (dataset_nm in names(series_list)) {
cat("\n\n# BONUS dataset:", dataset_nm, "\n")
s <- series_list[[dataset_nm]]
m <- seasonal_lags[[dataset_nm]]
mod_tbl <- build_modulated_tbl(s, m)
# 1) Time plot
print(
autoplot(mod_tbl, value) +
facet_wrap(~Series, scales = "free_y") +
labs(title = paste("BONUS:", dataset_nm, "- Modulated series (original)"))
)
# 2) Stationarity tests on each modulated series
for (ser_nm in unique(mod_tbl$Series)) {
cat("\n\n###", ser_nm, "\n")
x <- mod_tbl |> filter(Series == ser_nm) |> pull(value)
adf <- tseries::adf.test(x)
kpss <- tseries::kpss.test(x)
cat("ADF (H0: unit root / non-stationary): p =", adf$p.value, "\n")
cat("KPSS (H0: stationary): p =", kpss$p.value, "\n")
}
# 3) ACF/PACF on originals
for (ser_nm in unique(mod_tbl$Series)) {
s_sub <- mod_tbl |> filter(Series == ser_nm)
print(
ggtime::gg_tsdisplay(s_sub, value, plot_type = "partial") +
labs(title = paste("BONUS:", dataset_nm, "- ACF/PACF (original):", ser_nm))
)
}
# 4) Suggest differencing per modulated series
diff_params <- tibble(Series = unique(mod_tbl$Series)) |>
mutate(
D = map_dbl(Series, ~{
tmp <- mod_tbl |> filter(Series == .x)
tmp %>% features(value, unitroot_nsdiffs) %>% pull(nsdiffs)
}),
d = map_dbl(Series, ~{
tmp <- mod_tbl |> filter(Series == .x)
tmp %>% features(value, unitroot_ndiffs) %>% pull(ndiffs)
})
)
print(diff_params)
# Apply differencing separately per modulated series
mod_stat_list <- map2(diff_params$Series, seq_len(nrow(diff_params)), ~{
ser_nm <- .x
i <- .y
D_val <- diff_params$D[i]
d_val <- diff_params$d[i]
tmp <- mod_tbl |> filter(Series == ser_nm)
if (D_val > 0) tmp <- tmp |> mutate(value = difference(value, lag = m))
if (d_val > 0) tmp <- tmp |> mutate(value = difference(value, differences = d_val))
tmp |> filter(!is.na(value))
})
mod_stat2 <- bind_rows(mod_stat_list)
print(
autoplot(mod_stat2, value) +
facet_wrap(~Series, scales = "free_y") +
labs(title = paste("BONUS:", dataset_nm, "- Modulated series after suggested differencing"))
)
for (ser_nm in unique(mod_stat2$Series)) {
s_sub <- mod_stat2 |> filter(Series == ser_nm)
print(
ggtime::gg_tsdisplay(s_sub, value, plot_type = "partial") +
labs(title = paste("BONUS:", dataset_nm, "- ACF/PACF (after differencing):", ser_nm))
)
}
}