# I picked four large companies from two sectors.# This gives me a simple portfolio structure:# Total portfolio -> Sector -> Individual stockstocks <-c("AAPL", "MSFT", "XOM", "CVX")stock_info <-tibble(symbol =c("AAPL", "MSFT", "XOM", "CVX"),sector =c("Technology", "Technology", "Energy", "Energy"),company =c("Apple", "Microsoft", "Exxon Mobil", "Chevron"))stock_info
# A tibble: 4 × 3
symbol sector company
<chr> <chr> <chr>
1 AAPL Technology Apple
2 MSFT Technology Microsoft
3 XOM Energy Exxon Mobil
4 CVX Energy Chevron
## Pull daily adjusted closing prices# I used Yahoo Finance data from 2020 through 2024.# The first four years will be training data and 2024 will be the test year.getSymbols(c("AAPL", "MSFT", "XOM", "CVX"),src ="yahoo",from ="2020-01-01",to ="2024-12-31")
# The assignment calls for monthly adjusted closing data.# I kept the last adjusted closing price from each month.monthly_prices <- stock_prices |>mutate(month =floor_date(date, "month")) |>group_by(month) |>summarise(AAPL =last(AAPL),MSFT =last(MSFT),XOM =last(XOM),CVX =last(CVX))head(monthly_prices)
## Create equal-dollar portfolio values# I treated this like I invested $1,000 in each stock at the beginning.# Each stock starts at 1,000 and then moves based on its adjusted closing price.portfolio_values <- monthly_prices |>mutate(AAPL_value =1000* AAPL /first(AAPL),MSFT_value =1000* MSFT /first(MSFT),XOM_value =1000* XOM /first(XOM),CVX_value =1000* CVX /first(CVX)) |>select(month, AAPL_value, MSFT_value, XOM_value, CVX_value)head(portfolio_values)
# Technology is Apple plus Microsoft.# Energy is Exxon Mobil plus Chevron.# Total is the full four-stock portfolio.portfolio_summary <- portfolio_values |>mutate(Technology = AAPL_value + MSFT_value,Energy = XOM_value + CVX_value,Total = Technology + Energy)head(portfolio_summary)
# This version is useful for viewing the individual series and plotting the base ARIMA forecasts.ts_data <- portfolio_summary |>select(month, AAPL_value, MSFT_value, XOM_value, CVX_value, Technology, Energy, Total) |>pivot_longer(-month, names_to ="series", values_to ="value") |>mutate(month =yearmonth(month)) |>as_tsibble(key = series, index = month)ts_data
# A tsibble: 420 x 3 [1M]
# Key: series [7]
month series value
<mth> <chr> <dbl>
1 2020 Jan AAPL_value 1000
2 2020 Feb AAPL_value 885.
3 2020 Mar AAPL_value 824.
4 2020 Apr AAPL_value 951.
5 2020 May AAPL_value 1032.
6 2020 Jun AAPL_value 1185.
7 2020 Jul AAPL_value 1380.
8 2020 Aug AAPL_value 1679.
9 2020 Sep AAPL_value 1507.
10 2020 Oct AAPL_value 1417.
# ℹ 410 more rows
Train-test split
# Training period: 2020 through 2023# Test period: 2024train_data <- ts_data |>filter(month <=yearmonth("2023 Dec"))test_data <- ts_data |>filter(month >=yearmonth("2024 Jan"))train_data
# A tsibble: 336 x 3 [1M]
# Key: series [7]
month series value
<mth> <chr> <dbl>
1 2020 Jan AAPL_value 1000
2 2020 Feb AAPL_value 885.
3 2020 Mar AAPL_value 824.
4 2020 Apr AAPL_value 951.
5 2020 May AAPL_value 1032.
6 2020 Jun AAPL_value 1185.
7 2020 Jul AAPL_value 1380.
8 2020 Aug AAPL_value 1679.
9 2020 Sep AAPL_value 1507.
10 2020 Oct AAPL_value 1417.
# ℹ 326 more rows
test_data
# A tsibble: 84 x 3 [1M]
# Key: series [7]
month series value
<mth> <chr> <dbl>
1 2024 Jan AAPL_value 2446.
2 2024 Feb AAPL_value 2401.
3 2024 Mar AAPL_value 2278.
4 2024 Apr AAPL_value 2262.
5 2024 May AAPL_value 2557.
6 2024 Jun AAPL_value 2801.
7 2024 Jul AAPL_value 2954.
8 2024 Aug AAPL_value 3049.
9 2024 Sep AAPL_value 3103.
10 2024 Oct AAPL_value 3008.
# ℹ 74 more rows
Fit base ARIMA models
# This fits an ARIMA model to each series before reconciliation.fit <- train_data |>model(arima =ARIMA(value))fit
# A mable: 7 x 2
# Key: series [7]
series arima
<chr> <model>
1 AAPL_value <ARIMA(0,1,0)>
2 CVX_value <ARIMA(0,1,1)>
3 Energy <ARIMA(0,1,0)(0,0,1)[12] w/ drift>
4 MSFT_value <ARIMA(0,1,0) w/ drift>
5 Technology <ARIMA(0,1,0) w/ drift>
6 Total <ARIMA(0,1,1) w/ drift>
7 XOM_value <ARIMA(0,1,0)(0,0,1)[12] w/ drift>
fc_base <- fit |>forecast(h =12)fc_base
# A fable: 84 x 5 [1M]
# Key: series, .model [7]
series .model month
<chr> <chr> <mth>
1 AAPL_value arima 2024 Jan
2 AAPL_value arima 2024 Feb
3 AAPL_value arima 2024 Mar
4 AAPL_value arima 2024 Apr
5 AAPL_value arima 2024 May
6 AAPL_value arima 2024 Jun
7 AAPL_value arima 2024 Jul
8 AAPL_value arima 2024 Aug
9 AAPL_value arima 2024 Sep
10 AAPL_value arima 2024 Oct
# ℹ 74 more rows
# ℹ 2 more variables: value <dist>, .mean <dbl>
Plot base forecasts against actual test data
# The black line is the actual 2024 data.# This plot is mainly to show the unreconciled ARIMA forecasts before applying HTS methods.fc_base |>autoplot(train_data) +autolayer(test_data, value, color ="black") +labs(title ="Base ARIMA Forecasts vs Actual Test Data",y ="Portfolio Value",x ="Month")
Build the hierarchical structure
# For reconciliation, I need the bottom-level data first.# The structure is:# Total portfolio# Technology# AAPL, MSFT# Energy# XOM, CVXbottom_data <- portfolio_values |>select(month, AAPL_value, MSFT_value, XOM_value, CVX_value) |>pivot_longer(-month, names_to ="stock", values_to ="value") |>mutate(month =yearmonth(month),sector =case_when( stock %in%c("AAPL_value", "MSFT_value") ~"Technology", stock %in%c("XOM_value", "CVX_value") ~"Energy" ) ) |>as_tsibble(key =c(sector, stock), index = month)bottom_data
# A tsibble: 240 x 4 [1M]
# Key: sector, stock [4]
month stock value sector
<mth> <chr> <dbl> <chr>
1 2020 Jan CVX_value 1000 Energy
2 2020 Feb CVX_value 881. Energy
3 2020 Mar CVX_value 684. Energy
4 2020 Apr CVX_value 869. Energy
5 2020 May CVX_value 879. Energy
6 2020 Jun CVX_value 855. Energy
7 2020 Jul CVX_value 804. Energy
8 2020 Aug CVX_value 816. Energy
9 2020 Sep CVX_value 700. Energy
10 2020 Oct CVX_value 676. Energy
# ℹ 230 more rows
Aggregate the hierarchy
# aggregate_key() creates the sector totals and overall portfolio total from the bottom-level stock data.hier_data <- bottom_data |>aggregate_key(sector / stock, value =sum(value))hier_data
# A tsibble: 420 x 4 [1M]
# Key: sector, stock [7]
month sector stock value
<mth> <chr*> <chr*> <dbl>
1 2020 Jan <aggregated> <aggregated> 4000
2 2020 Feb <aggregated> <aggregated> 3561.
3 2020 Mar <aggregated> <aggregated> 3057.
4 2020 Apr <aggregated> <aggregated> 3635.
5 2020 May <aggregated> <aggregated> 3750.
6 2020 Jun <aggregated> <aggregated> 3986.
7 2020 Jul <aggregated> <aggregated> 4096.
8 2020 Aug <aggregated> <aggregated> 4508.
9 2020 Sep <aggregated> <aggregated> 4035.
10 2020 Oct <aggregated> <aggregated> 3845.
# ℹ 410 more rows
# A fable: 84 x 6 [1M]
# Key: sector, stock, .model [7]
sector stock .model month
<chr*> <chr*> <chr> <mth>
1 Energy CVX_value arima 2024 Jan
2 Energy CVX_value arima 2024 Feb
3 Energy CVX_value arima 2024 Mar
4 Energy CVX_value arima 2024 Apr
5 Energy CVX_value arima 2024 May
6 Energy CVX_value arima 2024 Jun
7 Energy CVX_value arima 2024 Jul
8 Energy CVX_value arima 2024 Aug
9 Energy CVX_value arima 2024 Sep
10 Energy CVX_value arima 2024 Oct
# ℹ 74 more rows
# ℹ 2 more variables: value <dist>, .mean <dbl>
Bottom-up reconciliation
# Bottom-up forecasts the individual stocks and then rolls them up to sector and total levels.hier_fc_bu <- hier_fit |>reconcile(bottom_up =bottom_up(arima) ) |>forecast(h =12)hier_fc_bu
# A fable: 168 x 6 [1M]
# Key: sector, stock, .model [14]
sector stock .model month
<chr*> <chr*> <chr> <mth>
1 Energy CVX_value arima 2024 Jan
2 Energy CVX_value arima 2024 Feb
3 Energy CVX_value arima 2024 Mar
4 Energy CVX_value arima 2024 Apr
5 Energy CVX_value arima 2024 May
6 Energy CVX_value arima 2024 Jun
7 Energy CVX_value arima 2024 Jul
8 Energy CVX_value arima 2024 Aug
9 Energy CVX_value arima 2024 Sep
10 Energy CVX_value arima 2024 Oct
# ℹ 158 more rows
# ℹ 2 more variables: value <dist>, .mean <dbl>
Top-down reconciliation
# Top-down starts with the total portfolio forecast and pushes it down into the lower levels.hier_fc_td <- hier_fit |>reconcile(top_down =top_down(arima) ) |>forecast(h =12)hier_fc_td
# A fable: 168 x 6 [1M]
# Key: sector, stock, .model [14]
sector stock .model month
<chr*> <chr*> <chr> <mth>
1 Energy CVX_value arima 2024 Jan
2 Energy CVX_value arima 2024 Feb
3 Energy CVX_value arima 2024 Mar
4 Energy CVX_value arima 2024 Apr
5 Energy CVX_value arima 2024 May
6 Energy CVX_value arima 2024 Jun
7 Energy CVX_value arima 2024 Jul
8 Energy CVX_value arima 2024 Aug
9 Energy CVX_value arima 2024 Sep
10 Energy CVX_value arima 2024 Oct
# ℹ 158 more rows
# ℹ 2 more variables: value <dist>, .mean <dbl>
MinT reconciliation
# MinT uses forecast error relationships across the hierarchy to adjust the forecasts.hier_fc_mint <- hier_fit |>reconcile(mint =min_trace(arima) ) |>forecast(h =12)hier_fc_mint
# A fable: 168 x 6 [1M]
# Key: sector, stock, .model [14]
sector stock .model month
<chr*> <chr*> <chr> <mth>
1 Energy CVX_value arima 2024 Jan
2 Energy CVX_value arima 2024 Feb
3 Energy CVX_value arima 2024 Mar
4 Energy CVX_value arima 2024 Apr
5 Energy CVX_value arima 2024 May
6 Energy CVX_value arima 2024 Jun
7 Energy CVX_value arima 2024 Jul
8 Energy CVX_value arima 2024 Aug
9 Energy CVX_value arima 2024 Sep
10 Energy CVX_value arima 2024 Oct
# ℹ 158 more rows
# ℹ 2 more variables: value <dist>, .mean <dbl>
Middle-out reconciliation
# Middle-out uses the sector level as the middle point.# It reconciles upward to the total and downward to the individual stocks.hier_fc_mo <- hier_fit |>reconcile(middle_out =middle_out(arima, split =1) ) |>forecast(h =12)hier_fc_mo
# A fable: 168 x 6 [1M]
# Key: sector, stock, .model [14]
sector stock .model month
<chr*> <chr*> <chr> <mth>
1 Energy CVX_value arima 2024 Jan
2 Energy CVX_value arima 2024 Feb
3 Energy CVX_value arima 2024 Mar
4 Energy CVX_value arima 2024 Apr
5 Energy CVX_value arima 2024 May
6 Energy CVX_value arima 2024 Jun
7 Energy CVX_value arima 2024 Jul
8 Energy CVX_value arima 2024 Aug
9 Energy CVX_value arima 2024 Sep
10 Energy CVX_value arima 2024 Oct
# ℹ 158 more rows
# ℹ 2 more variables: value <dist>, .mean <dbl>
Combine the reconciled forecasts
# Each forecast object also keeps the original ARIMA model, so I combine them first and then filter to only the reconciliation methods.all_forecasts <-as_tibble(hier_fc_bu) |>bind_rows(as_tibble(hier_fc_td)) |>bind_rows(as_tibble(hier_fc_mint)) |>bind_rows(as_tibble(hier_fc_mo))all_forecasts
# A tibble: 672 × 6
sector stock .model month
<chr*> <chr*> <chr> <mth>
1 Energy CVX_value arima 2024 Jan
2 Energy CVX_value arima 2024 Feb
3 Energy CVX_value arima 2024 Mar
4 Energy CVX_value arima 2024 Apr
5 Energy CVX_value arima 2024 May
6 Energy CVX_value arima 2024 Jun
7 Energy CVX_value arima 2024 Jul
8 Energy CVX_value arima 2024 Aug
9 Energy CVX_value arima 2024 Sep
10 Energy CVX_value arima 2024 Oct
# ℹ 662 more rows
# ℹ 2 more variables: value <dist>, .mean <dbl>
# A tibble: 336 × 6
sector stock .model month
<chr*> <chr*> <chr> <mth>
1 Energy CVX_value bottom_up 2024 Jan
2 Energy CVX_value bottom_up 2024 Feb
3 Energy CVX_value bottom_up 2024 Mar
4 Energy CVX_value bottom_up 2024 Apr
5 Energy CVX_value bottom_up 2024 May
6 Energy CVX_value bottom_up 2024 Jun
7 Energy CVX_value bottom_up 2024 Jul
8 Energy CVX_value bottom_up 2024 Aug
9 Energy CVX_value bottom_up 2024 Sep
10 Energy CVX_value bottom_up 2024 Oct
# ℹ 326 more rows
# ℹ 2 more variables: value <dist>, .mean <dbl>
Accuracy comparison
#This compares each reconciled forecast against the 2024 test data.accuracy_results <- all_forecasts_clean |>as_tsibble(key =c(sector, stock, .model),index = month ) |>as_fable(response ="value",distribution = value ) |>accuracy(hier_test)accuracy_results
# A tibble: 28 × 12
.model sector stock .type ME RMSE MAE MPE MAPE MASE RMSSE
<chr> <chr*> <chr*> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 bottom… Energy CVX_value Test 104. 128. 105. 5.74 5.83 NaN NaN
2 bottom… Energy XOM_value Test 166. 213. 198. 7.05 8.52 NaN NaN
3 bottom… Energy <aggregat… Test 270. 326. 300. 6.50 7.28 NaN NaN
4 bottom… Technology AAPL_value Test 228. 428. 366. 6.58 12.6 NaN NaN
5 bottom… Technology MSFT_value Test 81.1 127. 100. 3.12 3.88 NaN NaN
6 bottom… Technology <aggregat… Test 309. 455. 376. 5.33 6.76 NaN NaN
7 bottom… <aggregat… <aggregat… Test 579. 651. 579. 6.02 6.02 NaN NaN
8 middle… Energy CVX_value Test 19.8 87.5 78.4 0.951 4.43 NaN NaN
9 middle… Energy XOM_value Test 54.1 183. 143. 2.24 6.24 NaN NaN
10 middle… Energy <aggregat… Test 73.9 255. 188. 1.70 4.63 NaN NaN
# ℹ 18 more rows
# ℹ 1 more variable: ACF1 <dbl>
Summary by reconciliation method
# I averaged the accuracy measures across the hierarchy.# The lowest average RMSE is the best-performing method.accuracy_summary <- accuracy_results |>group_by(.model) |>summarise(Avg_RMSE =mean(RMSE, na.rm =TRUE),Avg_MAE =mean(MAE, na.rm =TRUE),Avg_MAPE =mean(MAPE, na.rm =TRUE) ) |>arrange(Avg_RMSE)accuracy_summary