# Load data

## Semiconductor firms
tsm <- read.csv("/Users/pin.lyu/Desktop/BC_Class_Folder/Predictive_Analytics/Data_Sets/TSM.csv")
nvda <- read.csv("/Users/pin.lyu/Desktop/BC_Class_Folder/Predictive_Analytics/Data_Sets/NVDA.csv")

## EV firms
tsla <- read.csv("/Users/pin.lyu/Desktop/BC_Class_Folder/Predictive_Analytics/Data_Sets/TSLA.csv")
byd <- read.csv("/Users/pin.lyu/Desktop/BC_Class_Folder/Predictive_Analytics/Data_Sets/BYDDY.csv")

Data Manipulation

# Date all "Date" variables

## Semiconductor
tsm$Date <- as.Date(tsm$Date)
nvda$Date <- as.Date(nvda$Date)

##EV
tsla$Date <- as.Date(tsla$Date)
byd$Date <- as.Date(byd$Date)
# Select variables

## Semiconductor
tsm_ts <- tsm[,c(1,5)]
nvda_ts <- nvda[,c(1,5)]

##EV
tsla_ts <- tsla[,c(1,5)]
byd_ts <- byd[,c(1,5)]
# Make time series object

tsm_ts <- ts(tsm_ts, start = c(2019,05), end = c(2024,04), frequency = 12)
nvda_ts <- ts(nvda_ts, start = c(2019,05), end = c(2024,04), frequency = 12)

tsla_ts <- ts(tsla_ts, start = c(2019,05), end = c(2024,04), frequency = 12)
byd_ts <- ts(byd_ts, start = c(2019,05), end = c(2024,04), frequency = 12)
# Split the xts object into training and testing sets

tsm_train <- ts(tsm_ts[1:48,], start = c(2019,05), end = c(2023,04), frequency = 12)
nvda_train <- ts(nvda_ts[1:48,], start = c(2019,05), end = c(2023,04), frequency = 12)

tsla_train <- ts(tsla_ts[1:48,], start = c(2019,05), end = c(2023,04), frequency = 12)
byd_train  <- ts(byd_ts[1:48,], start = c(2019,05), end = c(2023,04), frequency = 12)
# Aggregate portfolio value

portfolio_value <- tsm_ts[,2] + nvda_ts[,2] + tsla_ts[,2] + byd_ts[,2]

# Extract & add date variable 

Date <- tsm[,1]

portfolio <- ts(cbind(portfolio_value, Date), start = c(2019,05), end = c(2024,04), frequency = 12)

Data Visualization

Modeling

Top down approach

# ETS model (M,A,N)

base <- portfolio[1:48,1] |>
  ets()
# Top-Down Approach

top_down_forecast <- forecast(base, h = 12)  # Forecast for Year 5 (12 months)
# Convert time series forecast to a data frame

top_down_df <- as.data.frame(top_down_forecast)

# Add a "Date" column containing the forecasted dates

top_down_df$Date <- as.Date(c("2023-05-01", "2023-06-01", "2023-07-01", "2023-08-01",
                       "2023-09-01", "2023-10-01", "2023-11-01", "2023-12-01",
                       "2024-01-01", "2024-02-01", "2024-03-01", "2024-04-01"))
# Select variables

top_down_df <- top_down_df[,c(1,6)]

# rename variables

names <- c("value","time")
colnames(top_down_df) <- names

Bottom up Approach

# Bottom-up Approach

## Semiconductor firms

semic_tms <- forecast(ets(tsm_train[,2]), h = 12)    # (A,N,N)
semic_nvda <- forecast(ets(nvda_train[,2]), h = 12)  # (M,A,N)

## EV firms 

ev_tsla <- forecast(ets(tsla_train[,2]), h = 12)     # (M,A,N)
ev_byd <- forecast(ets(byd_train[,2]), h = 12)       # (M,N,N)

# Sum 

bottom_up_forecast <- semic_tms$mean + semic_nvda$mean + ev_tsla$mean + ev_byd$mean
# Show sum 

print(bottom_up_forecast)
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2023                                     596.3820 603.9572 611.5325 619.1077
## 2024 656.9840 664.5592 672.1345 679.7097                                    
##           Sep      Oct      Nov      Dec
## 2023 626.6830 634.2582 641.8335 649.4087
## 2024
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

Middle Out Approach

  • Each industry as a whole is chosen as the middle section
# Separate by industry

industry_semic <- tsm_ts[,2] + nvda_ts[,2] 

industry_ev <- + tsla_ts[,2] + byd_ts[,2]

# Add date

Date <- tsm[,1]

industry_semic_ts <- ts(cbind(industry_semic, Date), start = c(2019,05), end = c(2024,04), frequency = 12)

industry_ev_ts <- ts(cbind(industry_ev, Date), start = c(2019,05), end = c(2024,04), frequency = 12)
## Semiconductor industry

semic_ind <- forecast(ets(industry_semic_ts[1:48, 2]), h = 12)    # (M,A,N)

## EV industry

ev_ind <- forecast(ets(industry_ev_ts[1:48, 2]), h = 12)     # (M,A,N)

# Sum 

middle_out_forecast <- semic_ind$mean + ev_ind$mean
# time object result
middle_out_forecast <- ts(middle_out_forecast, start = c(2023,05), end = c(2024,04), frequency = 12)

# Convert time series forecast to a data frame

middle_out_df <- as.data.frame(bottom_up_forecast)

# Add a "Date" column containing the forecasted dates

middle_out_df$Date <- as.Date(c("2023-05-01", "2023-06-01", "2023-07-01", "2023-08-01",
                       "2023-09-01", "2023-10-01", "2023-11-01", "2023-12-01",
                       "2024-01-01", "2024-02-01", "2024-03-01", "2024-04-01"))

# rename variables

names <- c("value","time")
colnames(middle_out_df) <- names
# Plot the forecast

ggplot() +
  geom_line(data = middle_out_df, aes(x = time, y = value), color = "blue") +
  geom_line(data = portfolio_df, aes(x = Date, y = portfolio_value), color = "black") +
  # Customize x-axis labels for monthly dates
  scale_x_date(date_breaks = "6 month", date_labels = "%Y-%m") +
  # Add other plot formatting options as needed
  labs(x = "Date", y = "Forecasted Value", title = "Bottom-Up Forecast vs. Actual Portfolio Value") +
  theme_classic()
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

Summary

  • In this discussion, I employed ETS models across the board for forecasting. Based on the graphs above, it is evident that all three models—top-down, bottom-up, and middle-out—produced the same forecast. However, this result appears inconsistent with the textbook, as all these models should typically exhibit slight variations in their outcomes. To address this discrepancy, further investigation into the model implementation and data preprocessing may be necessary to identify any potential errors or factors contributing to the uniform forecasts. Alternatively, it’s possible that the data characteristics or specific circumstances of the analysis could lead to such results, though this would require a thorough examination to confirm.