A hierarchical time series (HTS) follows a tree structure, where lower-level series aggregate uniquely into higher-level series. In contrast, a grouped time series (GTS) allows the same observations to be grouped across multiple dimensions, so aggregation is not restricted to one single tree. A forecasting method is the statistical model used to generate base forecasts, while a reconciliation method adjusts those forecasts so they are coherent across the hierarchy. In this discussion, I use a hierarchical retail turnover example and compare reconciliation approaches using a test set. :contentReferenceoaicite:0 :contentReferenceoaicite:1
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.5.2
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
## ✔ tibble 3.3.0 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.2
## ✔ lubridate 1.9.4 ✔ fable 0.5.0
## ✔ ggplot2 4.0.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tsibble' was built under R version 4.5.2
## Warning: package 'tsibbledata' was built under R version 4.5.2
## Warning: package 'feasts' was built under R version 4.5.2
## Warning: package 'fabletools' was built under R version 4.5.2
## Warning: package 'fable' was built under R version 4.5.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(dplyr)
library(ggplot2)
library(tidyr)
data("aus_retail")
retail_small <- aus_retail %>%
filter(
State %in% c("New South Wales", "Victoria", "Queensland"),
Industry %in% c("Food retailing", "Department stores")
) %>%
filter(!is.na(Turnover)) %>%
aggregate_key(State / Industry, Turnover = sum(Turnover))
retail_small
## # A tsibble: 4,410 x 4 [1M]
## # Key: State, Industry [10]
## Month State Industry Turnover
## <mth> <chr*> <chr*> <dbl>
## 1 1982 4月 <aggregated> <aggregated> 1231.
## 2 1982 5月 <aggregated> <aggregated> 1256.
## 3 1982 6月 <aggregated> <aggregated> 1215.
## 4 1982 7月 <aggregated> <aggregated> 1253.
## 5 1982 8月 <aggregated> <aggregated> 1198.
## 6 1982 9月 <aggregated> <aggregated> 1251.
## 7 1982 10月 <aggregated> <aggregated> 1286.
## 8 1982 11月 <aggregated> <aggregated> 1369.
## 9 1982 12月 <aggregated> <aggregated> 1810.
## 10 1983 1月 <aggregated> <aggregated> 1200
## # ℹ 4,400 more rows
train <- retail_small %>%
filter(Month < yearmonth("2018 Jan"))
test <- retail_small %>%
filter(Month >= yearmonth("2018 Jan"))
train %>%
filter(is_aggregated(State), is_aggregated(Industry)) %>%
autoplot(Turnover) +
labs(
title = "Training Data: Total Retail Turnover",
x = "Month",
y = "Turnover"
)
fit <- train %>%
model(
ets = ETS(Turnover)
)
fit
## # A mable: 10 x 3
## # Key: State, Industry [10]
## State Industry ets
## <chr*> <chr*> <model>
## 1 New South Wales Department stores <ETS(M,A,M)>
## 2 New South Wales Food retailing <ETS(M,A,M)>
## 3 New South Wales <aggregated> <ETS(M,A,M)>
## 4 Queensland Department stores <ETS(M,A,M)>
## 5 Queensland Food retailing <ETS(M,A,M)>
## 6 Queensland <aggregated> <ETS(M,A,M)>
## 7 Victoria Department stores <ETS(M,A,M)>
## 8 Victoria Food retailing <ETS(M,A,M)>
## 9 Victoria <aggregated> <ETS(M,A,M)>
## 10 <aggregated> <aggregated> <ETS(M,A,M)>
rec_fit <- fit %>%
reconcile(
td = top_down(ets),
mo = middle_out(ets, split = "State"),
mint = min_trace(ets, method = "mint_shrink")
)
rec_fit
## # A mable: 10 x 6
## # Key: State, Industry [10]
## State Industry ets td mo mint
## <chr*> <chr*> <model> <model> <model> <model>
## 1 New South Wal… Departmen… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
## 2 New South Wal… Food reta… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
## 3 New South Wal… <aggregat… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
## 4 Queensland … Departmen… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
## 5 Queensland … Food reta… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
## 6 Queensland … <aggregat… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
## 7 Victoria … Departmen… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
## 8 Victoria … Food reta… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
## 9 Victoria … <aggregat… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
## 10 <aggregated> <aggregat… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
fc <- rec_fit %>%
forecast(h = "12 months")
fc
## # A fable: 480 x 6 [1M]
## # Key: State, Industry, .model [40]
## State Industry .model Month
## <chr*> <chr*> <chr> <mth>
## 1 New South Wales Department stores ets 2018 1月
## 2 New South Wales Department stores ets 2018 2月
## 3 New South Wales Department stores ets 2018 3月
## 4 New South Wales Department stores ets 2018 4月
## 5 New South Wales Department stores ets 2018 5月
## 6 New South Wales Department stores ets 2018 6月
## 7 New South Wales Department stores ets 2018 7月
## 8 New South Wales Department stores ets 2018 8月
## 9 New South Wales Department stores ets 2018 9月
## 10 New South Wales Department stores ets 2018 10月
## # ℹ 470 more rows
## # ℹ 2 more variables: Turnover <dist>, .mean <dbl>
fc %>%
filter(is_aggregated(State), is_aggregated(Industry)) %>%
autoplot(train, level = NULL) +
autolayer(
test %>% filter(is_aggregated(State), is_aggregated(Industry)),
Turnover,
colour = "black"
) +
labs(
title = "Train + Test + Reconciled Forecasts (Total Series)",
x = "Month",
y = "Turnover"
)
selected_test <- test %>%
filter(!is_aggregated(State), !is_aggregated(Industry)) %>%
filter(
State %in% c("New South Wales", "Victoria"),
Industry %in% c("Food retailing", "Department stores")
)
selected_fc <- fc %>%
filter(!is_aggregated(State), !is_aggregated(Industry)) %>%
filter(
State %in% c("New South Wales", "Victoria"),
Industry %in% c("Food retailing", "Department stores")
)
autoplot(selected_fc, train, level = NULL) +
autolayer(selected_test, Turnover, colour = "black") +
labs(
title = "Bottom-Level Forecasts vs Actuals",
x = "Month",
y = "Turnover"
)
acc <- fc %>%
accuracy(test) %>%
arrange(.model)
acc
## # A tibble: 40 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE
## <chr> <chr*> <chr*> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets New South Wa… Departmen… Test -4.53 18.5 15.5 -0.989 3.15 NaN
## 2 ets New South Wa… Food reta… Test -27.9 55.2 45.2 -0.872 1.37 NaN
## 3 ets New South Wa… <aggregat… Test -3.35 53.7 46.3 -0.0791 1.20 NaN
## 4 ets Queensland … Departmen… Test 6.63 10.3 8.94 2.20 3.04 NaN
## 5 ets Queensland … Food reta… Test -2.03 39.5 27.5 -0.186 1.19 NaN
## 6 ets Queensland … <aggregat… Test 37.9 50.4 44.2 1.41 1.67 NaN
## 7 ets Victoria … Departmen… Test -6.21 12.0 9.21 -1.57 2.35 NaN
## 8 ets Victoria … Food reta… Test 24.4 45.4 35.0 0.846 1.26 NaN
## 9 ets Victoria … <aggregat… Test 31.9 52.2 42.8 1.02 1.39 NaN
## 10 ets <aggregated> <aggregat… Test 58.3 127. 108. 0.596 1.14 NaN
## # ℹ 30 more rows
## # ℹ 2 more variables: RMSSE <dbl>, ACF1 <dbl>
best_model <- acc %>%
group_by(.model) %>%
summarise(
avg_RMSE = mean(RMSE, na.rm = TRUE),
avg_MAE = mean(MAE, na.rm = TRUE),
avg_MAPE = mean(MAPE, na.rm = TRUE)
) %>%
arrange(avg_RMSE)
best_model
## # A tibble: 4 × 4
## .model avg_RMSE avg_MAE avg_MAPE
## <chr> <dbl> <dbl> <dbl>
## 1 mint 43.7 36.0 1.73
## 2 td 45.7 38.4 1.87
## 3 ets 46.4 38.3 1.78
## 4 mo 46.7 39.1 1.89
best_method <- best_model %>%
slice(1)
best_method
## # A tibble: 1 × 4
## .model avg_RMSE avg_MAE avg_MAPE
## <chr> <dbl> <dbl> <dbl>
## 1 mint 43.7 36.0 1.73