1. Load Packages

2. Import Data

#Load Data for Healthcare Sector
MRNA <- read.csv("C:/Users/schic/OneDrive/Documents/Predictive Analytics and Forecasting/MRNA.csv")
PFE <- read.csv("C:/Users/schic/OneDrive/Documents/Predictive Analytics and Forecasting/PFE.csv")

#Load Data for Industrials Sector
GE <- read.csv("C:/Users/schic/OneDrive/Documents/Predictive Analytics and Forecasting/GE.csv")
CAT <- read.csv("C:/Users/schic/OneDrive/Documents/Predictive Analytics and Forecasting/CAT.csv")

#drop last observation from each set
MRNA <- MRNA[-c(61),]
PFE <- PFE[-c(61),]
GE <- GE[-c(61),]
CAT <- CAT[-c(61),]

3. Data Manipulation

#formatting the Date variable as Date
MRNA$Date <- as.Date(MRNA$Date, format = "%Y-%m-%d")

PFE$Date <- as.Date(PFE$Date, format = "%Y-%m-%d")

GE$Date <- as.Date(GE$Date, format = "%Y-%m-%d")

CAT$Date <- as.Date(CAT$Date, format = "%Y-%m-%d")

#Add Ticker ID column to each dataframe
MRNA$ID <- "MRNA"
PFE$ID <- "PFE"
GE$ID <- "GE"
CAT$ID <- "CAT"

#Add sector column to each dataframe
MRNA$sector <- "Healthcare"
PFE$sector <- "Healthcare"
GE$sector <- "Industrials"
CAT$sector <- "Industrials"

a. Time Series Objects

#Create Portfolio for Top of Hierarchy

portfolio_total <- MRNA$Adj.Close + PFE$Adj.Close + GE$Adj.Close + CAT$Adj.Close

Date = MRNA$Date

portfolio <- data.frame(Date = Date, Adj.Close = portfolio_total)

#create time series objects for portfolio
portfolio_ts <- ts(portfolio, start = c(2019, 05), frequency = 12)

portfolio_train_ts <- ts(portfolio, start = c(2019, 05), end = c(2023, 04), frequency = 12)
portfolio_test_ts <- ts(portfolio, start = c(2023, 05), end = c(2024, 04), frequency = 12)

b. Historical Plots

p1 <- ggplot(CAT, aes(x = Date, y = Adj.Close)) +
  geom_line() +
  labs( x = 'Date', y = "Closing Price Caterpillar (CAT) Stock ", title = "Historical Data for Closing Prices of CAT stock, May 2019 to April 2024") +
  theme_minimal() + 
  scale_x_date(date_breaks = "4 months", date_labels = "%b-%y") + theme(plot.title = element_text(size = 9),  # Change font size of plot title
    axis.title = element_text(size = 7),  # Change font size of axis titles
    axis.text = element_text(size = 7))
p1

p2 <- ggplot(GE, aes(x = Date, y = Adj.Close)) +
  geom_line() +
  labs( x = 'Date', y = "Closing Price General Electreic (GE) Stock ", title = "Historical Data for Closing Prices of GE stock, May 2019 to April 2024") +
  theme_minimal() + 
  scale_x_date(date_breaks = "4 months", date_labels = "%b-%y") + theme(plot.title = element_text(size = 9),  # Change font size of plot title
    axis.title = element_text(size = 7),  # Change font size of axis titles
    axis.text = element_text(size = 7))
p2

p3 <- ggplot(MRNA, aes(x = Date, y = Adj.Close)) +
  geom_line() +
  labs( x = 'Date', y = "Closing Price Moderna (MRNA) Stock ", title = "Historical Data for Closing Prices of MRNA stock, May 2019 to April 2024") +
  theme_minimal() + 
  scale_x_date(date_breaks = "4 months", date_labels = "%b-%y") + theme(plot.title = element_text(size = 9),  # Change font size of plot title
    axis.title = element_text(size = 7),  # Change font size of axis titles
    axis.text = element_text(size = 7))
p3

p4 <- ggplot(PFE, aes(x = Date, y = Adj.Close)) +
  geom_line() +
  labs( x = 'Date', y = "Closing Price Pfizer (PFE) Stock ", title = "Historical Data for Closing Prices of PFE stock, May 2019 to April 2024") +
  theme_minimal() + 
  scale_x_date(date_breaks = "4 months", date_labels = "%b-%y") + theme(plot.title = element_text(size = 9),  # Change font size of plot title
    axis.title = element_text(size = 7),  # Change font size of axis titles
    axis.text = element_text(size = 7))
p4

p5 <- ggplot(portfolio, aes(x = Date, y = Adj.Close)) +
  geom_line() +
  labs( x = 'Date', y = "Closing Price Portfolio Value ", title = "Historical Data for Portfolio, May 2019 to April 2024") +
  theme_minimal() + 
  scale_x_date(date_breaks = "4 months", date_labels = "%b-%y") + theme(plot.title = element_text(size = 9),  # Change font size of plot title
    axis.title = element_text(size = 7),  # Change font size of axis titles
    axis.text = element_text(size = 7))
p5

grid.arrange(p1,p2,p3,p4,p5, nrow = 3)

4. Modeling

#create tsibble object
Portfolio2 <- bind_rows(MRNA, PFE, GE, CAT)
Portfolio_tsibble <- tsibble(Portfolio2, key = c(ID, sector), index = Date)
Portfolio_tsibble <- mutate(Portfolio_tsibble, Date = yearmonth(Date))

#Establishing Hierarchical Structure
Portfolio_hts <- Portfolio_tsibble |>
  aggregate_key(sector/ID , Value = sum(Adj.Close))
Portfolio_hts

## # A tsibble: 420 x 4 [1M]
## # Key:       sector, ID [7]
##        Date sector       ID           Value
##       <mth> <chr*>       <chr*>       <dbl>
##  1 2019 May <aggregated> <aggregated>  206.
##  2 2019 Jun <aggregated> <aggregated>  221.
##  3 2019 Jul <aggregated> <aggregated>  212.
##  4 2019 Aug <aggregated> <aggregated>  191.
##  5 2019 Sep <aggregated> <aggregated>  202.
##  6 2019 Oct <aggregated> <aggregated>  220.
##  7 2019 Nov <aggregated> <aggregated>  237.
##  8 2019 Dec <aggregated> <aggregated>  239.
##  9 2020 Jan <aggregated> <aggregated>  230.
## 10 2020 Feb <aggregated> <aggregated>  220.
## # ℹ 410 more rows

Portfolio_hts |>
  filter(is_aggregated(ID)) |>
  autoplot(Value) +
  labs(y = "Value",
       title = "Portfolio Value: Portfolio and Sectors") +
  facet_wrap(vars(sector), scales = "free_y", ncol = 3) +
  theme(legend.position = "none")

a. Bottom-Up Approach

MRNA_train <- MRNA %>%
  filter(Date >= '2019-05-01' & Date <= '2023-04-01')
PFE_train <- PFE %>%
  filter(Date >= '2019-05-01' & Date <= '2023-04-01')
GE_train <- GE %>%
  filter(Date >= '2019-05-01' & Date <= '2023-04-01')
CAT_train <- CAT %>%
  filter(Date >= '2019-05-01' & Date <= '2023-04-01')

MRNA_test <- MRNA %>%
  filter(Date >= '2023-05-01' & Date <= '2024-04-01')
PFE_test <- PFE %>%
  filter(Date >= '2023-05-01' & Date <= '2024-04-01')
GE_test <- GE %>%
  filter(Date >= '2023-05-01' & Date <= '2024-04-01')
CAT_test <- CAT %>%
  filter(Date >= '2023-05-01' & Date <= '2024-04-01')

Portfolio3 <- bind_rows(MRNA_train, PFE_train, GE_train, CAT_train)
Portfolio_tsibble2 <- tsibble(Portfolio3, key = c(ID, sector), index = Date)
Portfolio_tsibble2 <- mutate(Portfolio_tsibble2, Date = yearmonth(Date))

Portfolio4 <- bind_rows(MRNA_test, PFE_test, GE_test, CAT_test)
Portfolio_tsibble3 <- tsibble(Portfolio4, key = c(ID, sector), index = Date)
Portfolio_tsibble3 <- mutate(Portfolio_tsibble3, Date = yearmonth(Date))

Portfolio_hts_train <- Portfolio_tsibble2 |>
  aggregate_key(sector/ID , Value = sum(Adj.Close))

Portfolio_hts_test <- Portfolio_tsibble3 |>
  aggregate_key(sector/ID , Value = sum(Adj.Close))

bottom_up <- Portfolio_hts_train |>
  model(ets = ETS(Value)) |>
  reconcile(bu = bottom_up(ets))
  #forecast(h = "1 year")

head(bottom_up)

## # A mable: 6 x 4
## # Key:     sector, ID [6]
##   sector      ID                    ets bu          
##   <chr*>      <chr*>            <model> <model>     
## 1 Healthcare  MRNA         <ETS(M,A,N)> <ETS(M,A,N)>
## 2 Healthcare  PFE          <ETS(M,N,N)> <ETS(M,N,N)>
## 3 Healthcare  <aggregated> <ETS(M,N,N)> <ETS(M,N,N)>
## 4 Industrials CAT          <ETS(M,N,N)> <ETS(M,N,N)>
## 5 Industrials GE           <ETS(A,N,N)> <ETS(A,N,N)>
## 6 Industrials <aggregated> <ETS(M,N,N)> <ETS(M,N,N)>

b. Top-Down Approach

top_down <- Portfolio_hts_train |>
  model(ets = ETS(Value)) |>
  reconcile(td = top_down(ets))
 # forecast(h = "1 year")

head(top_down)

## # A mable: 6 x 4
## # Key:     sector, ID [6]
##   sector      ID                    ets td          
##   <chr*>      <chr*>            <model> <model>     
## 1 Healthcare  MRNA         <ETS(M,A,N)> <ETS(M,A,N)>
## 2 Healthcare  PFE          <ETS(M,N,N)> <ETS(M,N,N)>
## 3 Healthcare  <aggregated> <ETS(M,N,N)> <ETS(M,N,N)>
## 4 Industrials CAT          <ETS(M,N,N)> <ETS(M,N,N)>
## 5 Industrials GE           <ETS(A,N,N)> <ETS(A,N,N)>
## 6 Industrials <aggregated> <ETS(M,N,N)> <ETS(M,N,N)>

c. Middle-Out Approach

middle_out <- Portfolio_hts_train |>
  model(ets = ETS(Value)) |>
  reconcile(mo = middle_out(ets)) 

#head(middle_out_forecast)

MO_FC <- forecast(middle_out, h = 12)

HTS_Model <- Portfolio_hts_train |>
  model(ets = ETS(Value)) |>
  reconcile(bu = bottom_up(ets), td = top_down(ets), mo = middle_out(ets))

fc <- HTS_Model |> forecast(h = '1 year')

fc |>
  filter(is_aggregated(ID)) |>
  autoplot(Portfolio_hts) +
  labs(y = "Value") +
  facet_wrap(vars(sector), scales = "free_y")

5. Model Stats/Accuracy

accuracy_stats <- accuracy(fc, Portfolio_hts)
print(accuracy_stats)

## # A tibble: 28 × 12
##    .model sector       ID           .type     ME   RMSE   MAE    MPE  MAPE  MASE
##    <chr>  <chr*>       <chr*>       <chr>  <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl>
##  1 bu     Healthcare   MRNA         Test   -4.99  16.6  12.4   -6.60  13.4 0.105
##  2 bu     Healthcare   PFE          Test   -6.46   7.35  6.46 -22.6   22.6 0.920
##  3 bu     Healthcare   <aggregated> Test  -11.5   19.0  15.2   -9.75  12.5 0.125
##  4 bu     Industrials  CAT          Test   67.2   84.3  69.3   21.3   22.3 1.79 
##  5 bu     Industrials  GE           Test   25.3   33.9  25.3   21.3   21.3 1.47 
##  6 bu     Industrials  <aggregated> Test   92.5  118.   94.3   21.4   22.0 1.84 
##  7 bu     <aggregated> <aggregated> Test   81.1  116.   88.2   13.8   15.4 0.546
##  8 ets    Healthcare   MRNA         Test   -4.99  16.6  12.4   -6.60  13.4 0.105
##  9 ets    Healthcare   PFE          Test   -6.46   7.35  6.46 -22.6   22.6 0.920
## 10 ets    Healthcare   <aggregated> Test  -37.4   41.4  37.4  -30.1   30.1 0.308
## # ℹ 18 more rows
## # ℹ 2 more variables: RMSSE <dbl>, ACF1 <dbl>

fc |>
  filter(is_aggregated(ID), is_aggregated(sector)) |>
  accuracy(
    data = Portfolio_hts,
    measures = list(rmse = RMSE, mae = MAE)
  ) |>
  group_by(.model) |>
  summarise(rmse = mean(rmse), mae = mean(mae))

## # A tibble: 4 × 3
##   .model  rmse   mae
##   <chr>  <dbl> <dbl>
## 1 bu     116.   88.2
## 2 ets     91.9  70.1
## 3 mo      90.7  69.1
## 4 td      91.9  70.1

Based on the forecasting graphs and accuracy analysis done above, I have come to the conclusion that the middle-out approach performed the best, followed by the top-down approach, and then the bottom-up approach. In this case, it is also interesting to note that the ETS model performed the same as the top-down approach, but slightly worse than the middle-out approach. It is important to address that visually, it does not seem as though the models performed that great.

Discussion Week 5: Hierarchical Time Series

Daniel Schick

2024-04-11