Part I

A hierarchical time series (HTS) follows a tree structure, where lower-level series aggregate uniquely into higher-level series. In contrast, a grouped time series (GTS) allows the same observations to be grouped across multiple dimensions, so aggregation is not restricted to one single tree. A forecasting method is the statistical model used to generate base forecasts, while a reconciliation method adjusts those forecasts so they are coherent across the hierarchy. In this discussion, I use a hierarchical retail turnover example and compare reconciliation approaches using a test set. :contentReferenceoaicite:0 :contentReferenceoaicite:1

Load packages

library(fpp3)
## Warning: package 'fpp3' was built under R version 4.5.2
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
## ✔ tibble      3.3.0     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.2
## ✔ lubridate   1.9.4     ✔ fable       0.5.0
## ✔ ggplot2     4.0.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tsibble' was built under R version 4.5.2
## Warning: package 'tsibbledata' was built under R version 4.5.2
## Warning: package 'feasts' was built under R version 4.5.2
## Warning: package 'fabletools' was built under R version 4.5.2
## Warning: package 'fable' was built under R version 4.5.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(dplyr)
library(ggplot2)
library(tidyr)
data("aus_retail")

retail_small <- aus_retail %>%
  filter(
    State %in% c("New South Wales", "Victoria", "Queensland"),
    Industry %in% c("Food retailing", "Department stores")
  ) %>%
  filter(!is.na(Turnover)) %>%
  aggregate_key(State / Industry, Turnover = sum(Turnover))

retail_small
## # A tsibble: 4,410 x 4 [1M]
## # Key:       State, Industry [10]
##        Month State        Industry     Turnover
##        <mth> <chr*>       <chr*>          <dbl>
##  1  1982 4月 <aggregated> <aggregated>    1231.
##  2  1982 5月 <aggregated> <aggregated>    1256.
##  3  1982 6月 <aggregated> <aggregated>    1215.
##  4  1982 7月 <aggregated> <aggregated>    1253.
##  5  1982 8月 <aggregated> <aggregated>    1198.
##  6  1982 9月 <aggregated> <aggregated>    1251.
##  7 1982 10月 <aggregated> <aggregated>    1286.
##  8 1982 11月 <aggregated> <aggregated>    1369.
##  9 1982 12月 <aggregated> <aggregated>    1810.
## 10  1983 1月 <aggregated> <aggregated>    1200 
## # ℹ 4,400 more rows
train <- retail_small %>%
  filter(Month < yearmonth("2018 Jan"))

test <- retail_small %>%
  filter(Month >= yearmonth("2018 Jan"))
train %>%
  filter(is_aggregated(State), is_aggregated(Industry)) %>%
  autoplot(Turnover) +
  labs(
    title = "Training Data: Total Retail Turnover",
    x = "Month",
    y = "Turnover"
  )

fit <- train %>%
  model(
    ets = ETS(Turnover)
  )

fit
## # A mable: 10 x 3
## # Key:     State, Industry [10]
##    State           Industry                   ets
##    <chr*>          <chr*>                 <model>
##  1 New South Wales Department stores <ETS(M,A,M)>
##  2 New South Wales Food retailing    <ETS(M,A,M)>
##  3 New South Wales <aggregated>      <ETS(M,A,M)>
##  4 Queensland      Department stores <ETS(M,A,M)>
##  5 Queensland      Food retailing    <ETS(M,A,M)>
##  6 Queensland      <aggregated>      <ETS(M,A,M)>
##  7 Victoria        Department stores <ETS(M,A,M)>
##  8 Victoria        Food retailing    <ETS(M,A,M)>
##  9 Victoria        <aggregated>      <ETS(M,A,M)>
## 10 <aggregated>    <aggregated>      <ETS(M,A,M)>
rec_fit <- fit %>%
  reconcile(
    td   = top_down(ets),
    mo   = middle_out(ets, split = "State"),
    mint = min_trace(ets, method = "mint_shrink")
  )

rec_fit
## # A mable: 10 x 6
## # Key:     State, Industry [10]
##    State          Industry            ets td           mo           mint        
##    <chr*>         <chr*>          <model> <model>      <model>      <model>     
##  1 New South Wal… Departmen… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
##  2 New South Wal… Food reta… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
##  3 New South Wal… <aggregat… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
##  4 Queensland   … Departmen… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
##  5 Queensland   … Food reta… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
##  6 Queensland   … <aggregat… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
##  7 Victoria     … Departmen… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
##  8 Victoria     … Food reta… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
##  9 Victoria     … <aggregat… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
## 10 <aggregated>   <aggregat… <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)> <ETS(M,A,M)>
fc <- rec_fit %>%
  forecast(h = "12 months")

fc
## # A fable: 480 x 6 [1M]
## # Key:     State, Industry, .model [40]
##    State           Industry          .model     Month
##    <chr*>          <chr*>            <chr>      <mth>
##  1 New South Wales Department stores ets     2018 1月
##  2 New South Wales Department stores ets     2018 2月
##  3 New South Wales Department stores ets     2018 3月
##  4 New South Wales Department stores ets     2018 4月
##  5 New South Wales Department stores ets     2018 5月
##  6 New South Wales Department stores ets     2018 6月
##  7 New South Wales Department stores ets     2018 7月
##  8 New South Wales Department stores ets     2018 8月
##  9 New South Wales Department stores ets     2018 9月
## 10 New South Wales Department stores ets    2018 10月
## # ℹ 470 more rows
## # ℹ 2 more variables: Turnover <dist>, .mean <dbl>
fc %>%
  filter(is_aggregated(State), is_aggregated(Industry)) %>%
  autoplot(train, level = NULL) +
  autolayer(
    test %>% filter(is_aggregated(State), is_aggregated(Industry)),
    Turnover,
    colour = "black"
  ) +
  labs(
    title = "Train + Test + Reconciled Forecasts (Total Series)",
    x = "Month",
    y = "Turnover"
  )

selected_test <- test %>%
  filter(!is_aggregated(State), !is_aggregated(Industry)) %>%
  filter(
    State %in% c("New South Wales", "Victoria"),
    Industry %in% c("Food retailing", "Department stores")
  )

selected_fc <- fc %>%
  filter(!is_aggregated(State), !is_aggregated(Industry)) %>%
  filter(
    State %in% c("New South Wales", "Victoria"),
    Industry %in% c("Food retailing", "Department stores")
  )

autoplot(selected_fc, train, level = NULL) +
  autolayer(selected_test, Turnover, colour = "black") +
  labs(
    title = "Bottom-Level Forecasts vs Actuals",
    x = "Month",
    y = "Turnover"
  )

acc <- fc %>%
  accuracy(test) %>%
  arrange(.model)

acc
## # A tibble: 40 × 12
##    .model State         Industry   .type     ME  RMSE    MAE     MPE  MAPE  MASE
##    <chr>  <chr*>        <chr*>     <chr>  <dbl> <dbl>  <dbl>   <dbl> <dbl> <dbl>
##  1 ets    New South Wa… Departmen… Test   -4.53  18.5  15.5  -0.989   3.15   NaN
##  2 ets    New South Wa… Food reta… Test  -27.9   55.2  45.2  -0.872   1.37   NaN
##  3 ets    New South Wa… <aggregat… Test   -3.35  53.7  46.3  -0.0791  1.20   NaN
##  4 ets    Queensland  … Departmen… Test    6.63  10.3   8.94  2.20    3.04   NaN
##  5 ets    Queensland  … Food reta… Test   -2.03  39.5  27.5  -0.186   1.19   NaN
##  6 ets    Queensland  … <aggregat… Test   37.9   50.4  44.2   1.41    1.67   NaN
##  7 ets    Victoria    … Departmen… Test   -6.21  12.0   9.21 -1.57    2.35   NaN
##  8 ets    Victoria    … Food reta… Test   24.4   45.4  35.0   0.846   1.26   NaN
##  9 ets    Victoria    … <aggregat… Test   31.9   52.2  42.8   1.02    1.39   NaN
## 10 ets    <aggregated>  <aggregat… Test   58.3  127.  108.    0.596   1.14   NaN
## # ℹ 30 more rows
## # ℹ 2 more variables: RMSSE <dbl>, ACF1 <dbl>
best_model <- acc %>%
  group_by(.model) %>%
  summarise(
    avg_RMSE = mean(RMSE, na.rm = TRUE),
    avg_MAE  = mean(MAE, na.rm = TRUE),
    avg_MAPE = mean(MAPE, na.rm = TRUE)
  ) %>%
  arrange(avg_RMSE)

best_model
## # A tibble: 4 × 4
##   .model avg_RMSE avg_MAE avg_MAPE
##   <chr>     <dbl>   <dbl>    <dbl>
## 1 mint       43.7    36.0     1.73
## 2 td         45.7    38.4     1.87
## 3 ets        46.4    38.3     1.78
## 4 mo         46.7    39.1     1.89
best_method <- best_model %>%
  slice(1)

best_method
## # A tibble: 1 × 4
##   .model avg_RMSE avg_MAE avg_MAPE
##   <chr>     <dbl>   <dbl>    <dbl>
## 1 mint       43.7    36.0     1.73