#Load Data for Healthcare Sector
MRNA <- read.csv("C:/Users/schic/OneDrive/Documents/Predictive Analytics and Forecasting/MRNA.csv")
PFE <- read.csv("C:/Users/schic/OneDrive/Documents/Predictive Analytics and Forecasting/PFE.csv")
#Load Data for Industrials Sector
GE <- read.csv("C:/Users/schic/OneDrive/Documents/Predictive Analytics and Forecasting/GE.csv")
CAT <- read.csv("C:/Users/schic/OneDrive/Documents/Predictive Analytics and Forecasting/CAT.csv")
#drop last observation from each set
MRNA <- MRNA[-c(61),]
PFE <- PFE[-c(61),]
GE <- GE[-c(61),]
CAT <- CAT[-c(61),]
#formatting the Date variable as Date
MRNA$Date <- as.Date(MRNA$Date, format = "%Y-%m-%d")
PFE$Date <- as.Date(PFE$Date, format = "%Y-%m-%d")
GE$Date <- as.Date(GE$Date, format = "%Y-%m-%d")
CAT$Date <- as.Date(CAT$Date, format = "%Y-%m-%d")
#Add Ticker ID column to each dataframe
MRNA$ID <- "MRNA"
PFE$ID <- "PFE"
GE$ID <- "GE"
CAT$ID <- "CAT"
#Add sector column to each dataframe
MRNA$sector <- "Healthcare"
PFE$sector <- "Healthcare"
GE$sector <- "Industrials"
CAT$sector <- "Industrials"
#Create Portfolio for Top of Hierarchy
portfolio_total <- MRNA$Adj.Close + PFE$Adj.Close + GE$Adj.Close + CAT$Adj.Close
Date = MRNA$Date
portfolio <- data.frame(Date = Date, Adj.Close = portfolio_total)
#create time series objects for portfolio
portfolio_ts <- ts(portfolio, start = c(2019, 05), frequency = 12)
portfolio_train_ts <- ts(portfolio, start = c(2019, 05), end = c(2023, 04), frequency = 12)
portfolio_test_ts <- ts(portfolio, start = c(2023, 05), end = c(2024, 04), frequency = 12)
p1 <- ggplot(CAT, aes(x = Date, y = Adj.Close)) +
geom_line() +
labs( x = 'Date', y = "Closing Price Caterpillar (CAT) Stock ", title = "Historical Data for Closing Prices of CAT stock, May 2019 to April 2024") +
theme_minimal() +
scale_x_date(date_breaks = "4 months", date_labels = "%b-%y") + theme(plot.title = element_text(size = 9), # Change font size of plot title
axis.title = element_text(size = 7), # Change font size of axis titles
axis.text = element_text(size = 7))
p1
p2 <- ggplot(GE, aes(x = Date, y = Adj.Close)) +
geom_line() +
labs( x = 'Date', y = "Closing Price General Electreic (GE) Stock ", title = "Historical Data for Closing Prices of GE stock, May 2019 to April 2024") +
theme_minimal() +
scale_x_date(date_breaks = "4 months", date_labels = "%b-%y") + theme(plot.title = element_text(size = 9), # Change font size of plot title
axis.title = element_text(size = 7), # Change font size of axis titles
axis.text = element_text(size = 7))
p2
p3 <- ggplot(MRNA, aes(x = Date, y = Adj.Close)) +
geom_line() +
labs( x = 'Date', y = "Closing Price Moderna (MRNA) Stock ", title = "Historical Data for Closing Prices of MRNA stock, May 2019 to April 2024") +
theme_minimal() +
scale_x_date(date_breaks = "4 months", date_labels = "%b-%y") + theme(plot.title = element_text(size = 9), # Change font size of plot title
axis.title = element_text(size = 7), # Change font size of axis titles
axis.text = element_text(size = 7))
p3
p4 <- ggplot(PFE, aes(x = Date, y = Adj.Close)) +
geom_line() +
labs( x = 'Date', y = "Closing Price Pfizer (PFE) Stock ", title = "Historical Data for Closing Prices of PFE stock, May 2019 to April 2024") +
theme_minimal() +
scale_x_date(date_breaks = "4 months", date_labels = "%b-%y") + theme(plot.title = element_text(size = 9), # Change font size of plot title
axis.title = element_text(size = 7), # Change font size of axis titles
axis.text = element_text(size = 7))
p4
p5 <- ggplot(portfolio, aes(x = Date, y = Adj.Close)) +
geom_line() +
labs( x = 'Date', y = "Closing Price Portfolio Value ", title = "Historical Data for Portfolio, May 2019 to April 2024") +
theme_minimal() +
scale_x_date(date_breaks = "4 months", date_labels = "%b-%y") + theme(plot.title = element_text(size = 9), # Change font size of plot title
axis.title = element_text(size = 7), # Change font size of axis titles
axis.text = element_text(size = 7))
p5
grid.arrange(p1,p2,p3,p4,p5, nrow = 3)
#create tsibble object
Portfolio2 <- bind_rows(MRNA, PFE, GE, CAT)
Portfolio_tsibble <- tsibble(Portfolio2, key = c(ID, sector), index = Date)
Portfolio_tsibble <- mutate(Portfolio_tsibble, Date = yearmonth(Date))
#Establishing Hierarchical Structure
Portfolio_hts <- Portfolio_tsibble |>
aggregate_key(sector/ID , Value = sum(Adj.Close))
Portfolio_hts
## # A tsibble: 420 x 4 [1M]
## # Key: sector, ID [7]
## Date sector ID Value
## <mth> <chr*> <chr*> <dbl>
## 1 2019 May <aggregated> <aggregated> 206.
## 2 2019 Jun <aggregated> <aggregated> 221.
## 3 2019 Jul <aggregated> <aggregated> 212.
## 4 2019 Aug <aggregated> <aggregated> 191.
## 5 2019 Sep <aggregated> <aggregated> 202.
## 6 2019 Oct <aggregated> <aggregated> 220.
## 7 2019 Nov <aggregated> <aggregated> 237.
## 8 2019 Dec <aggregated> <aggregated> 239.
## 9 2020 Jan <aggregated> <aggregated> 230.
## 10 2020 Feb <aggregated> <aggregated> 220.
## # ℹ 410 more rows
Portfolio_hts |>
filter(is_aggregated(ID)) |>
autoplot(Value) +
labs(y = "Value",
title = "Portfolio Value: Portfolio and Sectors") +
facet_wrap(vars(sector), scales = "free_y", ncol = 3) +
theme(legend.position = "none")
MRNA_train <- MRNA %>%
filter(Date >= '2019-05-01' & Date <= '2023-04-01')
PFE_train <- PFE %>%
filter(Date >= '2019-05-01' & Date <= '2023-04-01')
GE_train <- GE %>%
filter(Date >= '2019-05-01' & Date <= '2023-04-01')
CAT_train <- CAT %>%
filter(Date >= '2019-05-01' & Date <= '2023-04-01')
MRNA_test <- MRNA %>%
filter(Date >= '2023-05-01' & Date <= '2024-04-01')
PFE_test <- PFE %>%
filter(Date >= '2023-05-01' & Date <= '2024-04-01')
GE_test <- GE %>%
filter(Date >= '2023-05-01' & Date <= '2024-04-01')
CAT_test <- CAT %>%
filter(Date >= '2023-05-01' & Date <= '2024-04-01')
Portfolio3 <- bind_rows(MRNA_train, PFE_train, GE_train, CAT_train)
Portfolio_tsibble2 <- tsibble(Portfolio3, key = c(ID, sector), index = Date)
Portfolio_tsibble2 <- mutate(Portfolio_tsibble2, Date = yearmonth(Date))
Portfolio4 <- bind_rows(MRNA_test, PFE_test, GE_test, CAT_test)
Portfolio_tsibble3 <- tsibble(Portfolio4, key = c(ID, sector), index = Date)
Portfolio_tsibble3 <- mutate(Portfolio_tsibble3, Date = yearmonth(Date))
Portfolio_hts_train <- Portfolio_tsibble2 |>
aggregate_key(sector/ID , Value = sum(Adj.Close))
Portfolio_hts_test <- Portfolio_tsibble3 |>
aggregate_key(sector/ID , Value = sum(Adj.Close))
bottom_up <- Portfolio_hts_train |>
model(ets = ETS(Value)) |>
reconcile(bu = bottom_up(ets))
#forecast(h = "1 year")
head(bottom_up)
## # A mable: 6 x 4
## # Key: sector, ID [6]
## sector ID ets bu
## <chr*> <chr*> <model> <model>
## 1 Healthcare MRNA <ETS(M,A,N)> <ETS(M,A,N)>
## 2 Healthcare PFE <ETS(M,N,N)> <ETS(M,N,N)>
## 3 Healthcare <aggregated> <ETS(M,N,N)> <ETS(M,N,N)>
## 4 Industrials CAT <ETS(M,N,N)> <ETS(M,N,N)>
## 5 Industrials GE <ETS(A,N,N)> <ETS(A,N,N)>
## 6 Industrials <aggregated> <ETS(M,N,N)> <ETS(M,N,N)>
top_down <- Portfolio_hts_train |>
model(ets = ETS(Value)) |>
reconcile(td = top_down(ets))
# forecast(h = "1 year")
head(top_down)
## # A mable: 6 x 4
## # Key: sector, ID [6]
## sector ID ets td
## <chr*> <chr*> <model> <model>
## 1 Healthcare MRNA <ETS(M,A,N)> <ETS(M,A,N)>
## 2 Healthcare PFE <ETS(M,N,N)> <ETS(M,N,N)>
## 3 Healthcare <aggregated> <ETS(M,N,N)> <ETS(M,N,N)>
## 4 Industrials CAT <ETS(M,N,N)> <ETS(M,N,N)>
## 5 Industrials GE <ETS(A,N,N)> <ETS(A,N,N)>
## 6 Industrials <aggregated> <ETS(M,N,N)> <ETS(M,N,N)>
middle_out <- Portfolio_hts_train |>
model(ets = ETS(Value)) |>
reconcile(mo = middle_out(ets))
#head(middle_out_forecast)
MO_FC <- forecast(middle_out, h = 12)
HTS_Model <- Portfolio_hts_train |>
model(ets = ETS(Value)) |>
reconcile(bu = bottom_up(ets), td = top_down(ets), mo = middle_out(ets))
fc <- HTS_Model |> forecast(h = '1 year')
fc |>
filter(is_aggregated(ID)) |>
autoplot(Portfolio_hts) +
labs(y = "Value") +
facet_wrap(vars(sector), scales = "free_y")
accuracy_stats <- accuracy(fc, Portfolio_hts)
print(accuracy_stats)
## # A tibble: 28 × 12
## .model sector ID .type ME RMSE MAE MPE MAPE MASE
## <chr> <chr*> <chr*> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 bu Healthcare MRNA Test -4.99 16.6 12.4 -6.60 13.4 0.105
## 2 bu Healthcare PFE Test -6.46 7.35 6.46 -22.6 22.6 0.920
## 3 bu Healthcare <aggregated> Test -11.5 19.0 15.2 -9.75 12.5 0.125
## 4 bu Industrials CAT Test 67.2 84.3 69.3 21.3 22.3 1.79
## 5 bu Industrials GE Test 25.3 33.9 25.3 21.3 21.3 1.47
## 6 bu Industrials <aggregated> Test 92.5 118. 94.3 21.4 22.0 1.84
## 7 bu <aggregated> <aggregated> Test 81.1 116. 88.2 13.8 15.4 0.546
## 8 ets Healthcare MRNA Test -4.99 16.6 12.4 -6.60 13.4 0.105
## 9 ets Healthcare PFE Test -6.46 7.35 6.46 -22.6 22.6 0.920
## 10 ets Healthcare <aggregated> Test -37.4 41.4 37.4 -30.1 30.1 0.308
## # ℹ 18 more rows
## # ℹ 2 more variables: RMSSE <dbl>, ACF1 <dbl>
fc |>
filter(is_aggregated(ID), is_aggregated(sector)) |>
accuracy(
data = Portfolio_hts,
measures = list(rmse = RMSE, mae = MAE)
) |>
group_by(.model) |>
summarise(rmse = mean(rmse), mae = mean(mae))
## # A tibble: 4 × 3
## .model rmse mae
## <chr> <dbl> <dbl>
## 1 bu 116. 88.2
## 2 ets 91.9 70.1
## 3 mo 90.7 69.1
## 4 td 91.9 70.1
Based on the forecasting graphs and accuracy analysis done above, I have come to the conclusion that the middle-out approach performed the best, followed by the top-down approach, and then the bottom-up approach. In this case, it is also interesting to note that the ETS model performed the same as the top-down approach, but slightly worse than the middle-out approach. It is important to address that visually, it does not seem as though the models performed that great.