Mixed Hierarchical Forecasting of Post-Covid MBTA Subway Ridership Code

Author

Teddy Kelly

Loading In the MBTA Subway Time Series

# Clearing existing environment and loading in the necessary packages
rm(list=ls())
library(fpp3)
library(kableExtra)
library(patchwork)
library(zoo)
library(webshot2)
library(knitr)
library(stringr)
library(scales)
library(readr)

# The file path specified below will work on any system as long as the MBTA dataset 
# is inside the 'Data' subfolder within the 'MBTA_project_files' main folder
# The code should take around 4 minutes to compile
mbta <- read.csv('../Data/MBTA_Monthly_Ridership_By_Mode_and_Line.csv')

# Converting the data into a tsibble
mbta <- mbta |>
  mutate(month = yearmonth(month_of_service)) |>
  select(-month_of_service, -ObjectId) |>
  as_tsibble(index = month,
             key = c('daytype', 'route_or_line'))

Cleaning up Time Series to only include the Subway System Data

# CREATING SUBWAY WEEKDAY TSIBBLE
# Heavy Rail weekday
heavy_rail_weekday <- mbta |> filter(daytype == 'Weekday', 
                                     route_or_line == 'Heavy Rail') |>
  select(month, daytype, route_or_line, ridership_average)

# Light Rail weekday
light_rail_weekday <- mbta |> filter(daytype == 'Weekday', 
                                     route_or_line == 'Light Rail') |>
  select(month, daytype, route_or_line, ridership_average)

# Adding light rail and heavy rail to get total subway weekday ridership
subway_weekday <- light_rail_weekday |> 
  mutate(ridership_average = heavy_rail_weekday[[4]] + light_rail_weekday[[4]],
         route_or_line = "All Subway Lines")

# CREATING SUBWAY WEEKEND TSIBBLE
# Heavy Rail Weekend
heavy_rail_sat <- mbta |> filter(daytype == 'Saturday', 
                                 route_or_line == 'Heavy Rail') |>
  select(month, daytype, route_or_line, ridership_average)
heavy_rail_sun <- mbta |> filter(daytype == 'Sunday', 
                                 route_or_line == 'Heavy Rail') |>
  select(month, daytype, route_or_line, ridership_average)

heavy_rail_weekend <- heavy_rail_sat |> 
  mutate(ridership_average = heavy_rail_sat[[4]] + heavy_rail_sun[[4]],
         daytype = 'Weekend') 

# Heavy Rail Time Series
heavy_rail_ts <- bind_rows(heavy_rail_weekday, heavy_rail_weekend)


# Light Rail Weekend
light_rail_sat <- mbta |> filter(daytype == 'Saturday', 
                                 route_or_line == 'Light Rail') |>
  select(month, daytype, route_or_line, ridership_average)
light_rail_sun <- mbta |> filter(daytype == 'Sunday', 
                                 route_or_line == 'Light Rail') |>
  select(month, daytype, route_or_line, ridership_average)

light_rail_weekend <- light_rail_sat |> 
  mutate(ridership_average = light_rail_sat[[4]] + light_rail_sun[[4]],
         daytype = 'Weekend') 

# Light Rail Time Series
light_rail_ts <- bind_rows(light_rail_weekday, light_rail_weekend)

# Subway Weekend
subway_weekend <- light_rail_weekend |> 
  mutate(ridership_average = heavy_rail_weekend[[4]] + light_rail_weekend[[4]],
        route_or_line = "All Subway Lines")

# CREATING SUBWAY ALL DAYS TSIBBLE COMBINNING WEEKDAYS AND WEEKENDS
subway_alldays <- subway_weekday |>
  mutate(ridership_average = subway_weekday[[4]] + subway_weekend[[4]],
         daytype = 'Total')

# Combining subway alldays, weekday, and weekend into one time series
subway_ts <- bind_rows(subway_alldays, subway_weekday, subway_weekend)


# CREATING TIME SERIES WITH INDIVIDUAL SUBWAY LINES

# Creating subway line data for Saturday
rgbo_sat <- mbta |> 
  filter(daytype == 'Saturday', 
         route_or_line %in% c('Blue Line', 'Green Line', 'Orange Line', 'Red Line'))

# Creating subway line data for Sunday
rgbo_sun <- mbta |> 
  filter(daytype == 'Sunday', 
         route_or_line %in% c('Blue Line', 'Green Line', 'Orange Line', 'Red Line'))

# Combining subway line data for saturday and sunday to make weekend data
rgbo_weekend <- rgbo_sat |>
  select(-daytype) |>
  mutate(ridership_total = rgbo_sat[[4]] + rgbo_sun[[4]], 
         ridership_average = rgbo_sat[[5]] + rgbo_sun[[5]],
         daytype = 'Weekend') |>
  select(month, daytype, route_or_line, ridership_average)


# Filtering original mbta time series to create subway line data for weekdays
rgbo_weekday <- mbta |> 
  filter(daytype == 'Weekday', 
         route_or_line %in% c('Blue Line', 'Green Line', 'Orange Line', 'Red Line')) |> 
  select(month, daytype, route_or_line, ridership_average)

# Combining subway line weekday and weekend time series to get total
rgbo_ts <- bind_rows(rgbo_weekday, rgbo_weekend)

# COMBINING LINES AND TOTAL SUBWAY INTO ONE TIME SERIES
mbta_ts <- bind_rows(subway_ts, rgbo_ts, heavy_rail_ts, light_rail_ts)

Initial Graph Showing Monthly Average Daily Ridership for the entire Subway System for Both Weekdays and Weekends

subway_alldays_plot <- mbta_ts |> 
  filter(route_or_line == 'All Subway Lines', daytype == 'Total') |>
  autoplot(ridership_average) +
  labs(title = 'MBTA Monthly Average Daily Subway Ridership',
       subtitle = 'Jun 2018 - Dec 2025',
       x = 'Time (Months)',
       y = 'Average Ridership') +
  scale_y_continuous(labels = scales::comma) +
  theme(legend.position = 'bottom')

# Saving the graph to the "Plots" subfolder
ggsave('../Plots/initial_graphs/initial_subway_ridership_alldays.pdf',
       plot = subway_alldays_plot,
       height = 6,
       width = 8)

Initial Graph Comparing Average Weekday Ridership Vs Average Weekend Ridership Before and After Covid

# PLOTTING INTIAL COMPARISON OF SUBWAY WEEKEND AND WEEKDAY RIDERSHIP
weekend_vs_weekday_plot <- mbta_ts |> 
  filter(route_or_line == 'All Subway Lines', 
         daytype != 'Total') |>
  autoplot(ridership_average, aes(color = daytype)) +
  labs(title = 'MBTA Monthly Average Subway Ridership By Daytype',
       subtitle = 'Jun 2018 - Dec 2025',
       x = 'Time (Months)',
       y = 'Average Ridership') +
  scale_y_continuous(labels = scales::comma) +
  theme(legend.position = 'bottom') 
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/initial_graphs/weekday_vs_weekend_ridership.pdf',
       plot = weekend_vs_weekday_plot,
       height = 6,
       width = 8)

Converting `mbta_ts` into a Mixed Hierarchical and Group Structure

# Making hierarchical time series

# BEGIN WITH MOST DISAGGREGATED LEVEL BY DELETING AGGREGATE VALUES FOR HEAVY/LIGTH RAIL AND ALL SUBWAY
mbta_clean <- mbta_ts |>
  filter(!route_or_line %in% c('All Subway Lines', 'Heavy Rail', 'Light Rail'))

#CREATE A RAIL MODE VARIABLE
mbta_clean <- mbta_clean |>
  mutate(
    rail_type = case_when(
      route_or_line %in% c('Blue Line', 'Orange Line', 'Red Line') ~ 'Heavy Rail',
      route_or_line %in% c('Green Line') ~ 'Light Rail'
    )
  )

# UPDATE mbta_clean to have rail_type as another key
mbta_clean <- mbta_clean |>
  update_tsibble(key = c(daytype, rail_type, route_or_line),
                 index = month)

# BUILDING MIXED HIERARCHICAL AND GROUPED STRUCTURE USING aggregate_key()
# daytype is the group component and rail_type and route_or_line is the hierarchical component

mbta_hts <- mbta_clean |>
  aggregate_key(
    daytype / (rail_type / route_or_line),
    ridership_average = sum(ridership_average)
  )

Graphing the Hierarchical Structure of the Time Series for all day types, weekdays, and weekends

# WEEKDAYS
weekday_hierarchy <- mbta_hts |> 
  filter(route_or_line == '<aggregated>', 
         rail_type == '<aggregated>',
         daytype == 'Weekday') |>
    autoplot(ridership_average, aes(color = 'All Subway')) + 
  geom_line(data = mbta_hts |> 
              filter(rail_type %in% c('Heavy Rail'),                        
                     route_or_line == '<aggregated>',
                     daytype == 'Weekday'), 
            aes(y = ridership_average, color = 'Heavy Rail')) +
    geom_line(data = mbta_hts |> filter(route_or_line %in% c('Blue Line'),
                                        daytype == 'Weekday'),
              aes(y = ridership_average, color = 'Blue Line')) +
    geom_line(data = mbta_hts |> filter(route_or_line %in% c('Green Line'),
                                        daytype == 'Weekday'),
              aes(y = ridership_average, color = 'Green Line/Light Rail')) +
    geom_line(data = mbta_hts |> filter(route_or_line %in% c('Orange Line'),
                                        daytype == 'Weekday'),
              aes(y = ridership_average, color = 'Orange Line')) +
    geom_line(data = mbta_hts |> filter(route_or_line %in% c('Red Line'),
                                        daytype == 'Weekday'),
              aes(y = ridership_average, color = 'Red Line')) +
    scale_color_manual(
        values = c('black', 'magenta','blue', '#00843D', 'orange', 'red'),
        breaks = c('All Subway','Heavy Rail','Blue Line', 'Green Line/Light Rail', 'Orange Line', 'Red Line')
    ) +
    labs(title = 'MBTA Monthly Weekday Ridership Average by Mode',
         subtitle = 'Jul 2018 - Dec 2025',
         x = 'Time (Months)',
         y = 'Average Daily Ridership') +
    theme(legend.position = 'bottom') +
    scale_y_continuous(labels = scales::comma)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/hierarchy_graphs/weekday_hierarchy_plot.pdf',
       plot = weekday_hierarchy,
       height = 6,
       width = 8)

# WEEKENDS
weekend_hierarchy <- mbta_hts |> 
  filter(route_or_line == '<aggregated>', 
         rail_type == '<aggregated>', 
         daytype == 'Weekend') |>
    autoplot(ridership_average, aes(color = 'All Subway')) + 
  geom_line(data = mbta_hts |> 
              filter(rail_type %in% c('Heavy Rail'), 
                     route_or_line == '<aggregated>', 
                     daytype == 'Weekend'), 
            aes(y = ridership_average, color = 'Heavy Rail')) +
    geom_line(data = mbta_hts |> filter(route_or_line %in% c('Blue Line'),
                                        daytype == 'Weekend'),
              aes(y = ridership_average, color = 'Blue Line')) +
    geom_line(data = mbta_hts |> filter(route_or_line %in% c('Green Line'),
                                        daytype == 'Weekend'),
              aes(y = ridership_average, color = 'Green Line/Light Rail')) +
    geom_line(data = mbta_hts |> filter(route_or_line %in% c('Orange Line'),
                                        daytype == 'Weekend'),
              aes(y = ridership_average, color = 'Orange Line')) +
    geom_line(data = mbta_hts |> filter(route_or_line %in% c('Red Line'),
                                        daytype == 'Weekend'),
              aes(y = ridership_average, color = 'Red Line')) +
    scale_color_manual(
        values = c('black', 'magenta','blue', '#00843D', 'orange', 'red'),
        breaks = c('All Subway','Heavy Rail','Blue Line', 'Green Line/Light Rail', 'Orange Line', 'Red Line')
    ) + 
    labs(title = 'MBTA Monthly Weekend Ridership Average by Mode',
         subtitle = 'Jul 2018 - Dec 2025',
         x = 'Time (Months)',
         y = 'Average Daily Ridership') +
    theme(legend.position = 'bottom') +
    scale_y_continuous(labels = scales::comma)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/hierarchy_graphs/weekend_hierarchy_plot.pdf',
       plot = weekend_hierarchy,
       height = 6,
       width = 8)

Creating Train and Test Sets for the time series with the Hierarchical Structure

# TRAIN AND TEST SETS FOR JUST POST COVID DATA
train_post_hts <- mbta_hts |> filter_index('2020 Apr' ~ '2024 Oct')
test_post_hts <- mbta_hts |> filter_index('2024 Nov'~.)

Graphing the training and testing sets for the post covid data

train_test_plot <- mbta_hts |> filter(daytype == '<aggregated>') |> 
  autoplot(ridership_average, aes(color = 'Pre-Covid Data')) +
  geom_line(data = train_post_hts |> 
              filter(daytype == '<aggregated>'), 
            aes(y = ridership_average, color = 'Training Set')) +
  geom_line(data = test_post_hts |> 
              filter(daytype == '<aggregated>'), 
            aes(y = ridership_average, color = 'Testing Set')) +
  scale_color_manual(
    values = c('black', 'blue', 'red'),
    breaks = c('Pre-Covid Data', 'Training Set', 'Testing Set')
  ) +
  labs(title = 'MBTA Monthly Average Daily Subway Ridership', 
       subtitle = 'Train-Test Split', 
       x = 'Time (Months)', 
       y = 'Average Daily Ridership') +
  theme(legend.position = 'bottom')
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/training_test_graphs/train_test_split.pdf',
       plot = train_test_plot,
       height = 6,
       width = 8)

Fitting ETS, ARIMA, NAIVE, SNAIVE, TSLM, and Ensemble Models on the Training Set and Forecasting Against the Test set

# FITTING models on post train set
fit_post_hts <- train_post_hts |>
  model(ets = ETS(ridership_average),
        arima = ARIMA(ridership_average),
        naive = NAIVE(ridership_average),
        snaive = SNAIVE(ridership_average),
        tslm = TSLM(ridership_average ~ trend() + season()))

# Ensemble Model
fit_post_hts <- fit_post_hts |>
  mutate(ensemble = (ets + arima + naive + snaive + tslm) / 5)

# Save the fitted values into a csv in the forecast_tables subfoler inside the Tables Folder
write_csv(fit_post_hts, '../Tables/forecast_tables/fit_post_hts.csv')

# Forecasting against the testing set. Reconciling the forecasts using the bottom up and minimum trace approach
fc_post_hts <- fit_post_hts |>
  reconcile(
    bu_ets = bottom_up(ets),
    bu_arima = bottom_up(arima),
    bu_naive = bottom_up(naive),
    bu_snaive = bottom_up(snaive),
    bu_tslm = bottom_up(tslm),
    bu_ensemble = bottom_up(ensemble),
    mint_ets = min_trace(ets, method = 'mint_shrink'),
    mint_arima = min_trace(arima, method = 'mint_shrink'),
    mint_naive = min_trace(naive, method = 'mint_shrink'),
    mint_snaive = min_trace(snaive, method = 'mint_shrink'),
    mint_tslm = min_trace(tslm, method = 'mint_shrink'),
    mint_ensemble = min_trace(ensemble, method = 'mint_shrink')
  ) |>
  forecast(h = 14)
# Save forecast table as a csv in the forecast_tables subfolder inside the Tables folder
write_csv(fc_post_hts, '../Tables/forecast_tables/fc_post_hts.csv')

# COMPUTING THE ERROR METRICS FOR ALL FORECASTING METHODS AT EACH LEVEL OF AGGREGATION FOR POST COVID DATA
accuracy_metrics_post <- accuracy(fc_post_hts, test_post_hts) |>
  select(.model, daytype, rail_type, route_or_line, ME, RMSE, MAE, MAPE)
# Save entire accuracy metrics table as a csv inside the forecast_tables subfolder in the Tables Folder
write_csv(accuracy_metrics_post, '../Tables/forecast_tables/accuracy_metrics_post.csv')

Accuracy Metric Comparison of Bottom Up, Original, and mint forecasting methods for red line weekday, all data, and post-covid data

# ETS COMPARISON between original, bottom up, and mint approach for post covid red line
red_ets_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_ets_weekday, '../Tables/red_line_tables/red_ets_weekday.png')

# # ARIMA COMPARISON between original and bottom up approach
red_arima_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_arima_weekday, '../Tables/red_line_tables/red_arima_weekday.png')

# NAIVE COMPARISON between original and bottom up approach
red_naive_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_naive_weekday, '../Tables/red_line_tables/red_naive_weekday.png')


# SNAIVE COMPARISON between original and bottom up approach
red_snaive_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)
# Save the table
save_kable(red_snaive_weekday, '../Tables/red_line_tables/red_snaive_weekday.png')

# TSLM COMPARISON between original and bottom up approach
red_tslm_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_tslm_weekday, '../Tables/red_line_tables/red_tslm_weekday.png')

# Ensemble Comparison between original, bottom up, and mint approach
red_ensemble_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_ensemble_weekday, '../Tables/red_line_tables/red_ensemble_weekday.png')

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for blue line weekday, all data and post-covid data

# ETS COMPARISON between original, bottom up, and mint approach for post covid blue line
blue_ets_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
   kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_ets_weekday, '../Tables/blue_line_tables/blue_ets_weekday.png')

# # ARIMA COMPARISON between original and bottom up approach
blue_arima_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_arima_weekday, '../Tables/blue_line_tables/blue_arima_weekday.png')

# NAIVE COMPARISON between original and bottom up approach
blue_naive_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_naive_weekday, '../Tables/blue_line_tables/blue_naive_weekday.png') 

# SNAIVE COMPARISON between original and bottom up approach
blue_snaive_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_snaive_weekday, '../Tables/blue_line_tables/blue_snaive_weekday.png')

# TSLM COMPARISON between original and bottom up approach
blue_tslm_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_tslm_weekday, '../Tables/blue_line_tables/blue_tslm_weekday.png')

# Ensemble Comparison between original, bottom up, and mint approach
blue_ensemble_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_ensemble_weekday, 
           '../Tables/blue_line_tables/blue_ensemble_weekday.png')

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for orange line weekday, all data and post-covid data

# ETS COMPARISON between original, bottom up, and mint approach for post covid Orange line
orange_ets_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
   kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_ets_weekday, '../Tables/orange_line_tables/orange_ets_weekday.png')

# # ARIMA COMPARISON between original and bottom up approach
orange_arima_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_arima_weekday, 
           '../Tables/orange_line_tables/orange_arima_weekday.png')

# NAIVE COMPARISON between original and bottom up approach
orange_naive_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_naive_weekday, '../Tables/orange_line_tables/orange_naive_weekday.png')

# SNAIVE COMPARISON between original and bottom up approach
orange_snaive_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_snaive_weekday, '../Tables/orange_line_tables/orange_snaive_weekday.png')

# TSLM COMPARISON between original and bottom up approach
orange_tslm_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_tslm_weekday, '../Tables/orange_line_tables/orange_tslm_weekday.png')

# Ensemble Comparison between original, bottom up, and mint approach
orange_ensemble_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_ensemble_weekday, 
           '../Tables/orange_line_tables/orange_ensemble_weekday.png')

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for heavy rail weekday aggregates, all data and post-covid data

# ETS COMPARISON between original, bottom up, and mint approach for post covid red line
aggregated_ets_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                                   daytype == 'Weekday',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                                   daytype == 'Weekday',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_ets_weekday, 
           '../Tables/aggregated_levels_tables/aggregated_ets_weekday.png')

# # ARIMA COMPARISON between original and bottom up approach
aggregated_arima_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                                   daytype == 'Weekday',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                                   daytype == 'Weekday',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_arima_weekday, 
           '../Tables/aggregated_levels_tables/aggregated_arima_weekday.png')

# NAIVE COMPARISON between original and bottom up approach
aggregated_naive_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                                   daytype == 'Weekday',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                                   daytype == 'Weekday',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_naive_weekday, 
           '../Tables/aggregated_levels_tables/aggregated_naive_weekday.png')

# SNAIVE COMPARISON between original and bottom up approach
aggregated_snaive_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                                   daytype == 'Weekday',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                                   daytype == 'Weekday',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_snaive_weekday, 
           '../Tables/aggregated_levels_tables/aggregated_snaive_weekday.png')

# TSLM COMPARISON between original and bottom up approach
aggregated_tslm_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                                   daytype == 'Weekday',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                                   daytype == 'Weekday',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_tslm_weekday, 
           '../Tables/aggregated_levels_tables/aggregated_tslm_weekday.png')

# Ensemble comparison between the original, bottom up, and mint approach
aggregated_ensemble_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                                   daytype == 'Weekday',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> 
          filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                                   daytype == 'Weekday',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> 
          filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_ensemble_weekday, 
           '../Tables/aggregated_levels_tables/aggregated_ensemble_weekday.png')

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for green line/light rail weekday, all data and post-covid data

# ETS COMPARISON between original, bottom up, and mint approach for post covid green line
green_ets_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                           daytype == 'Weekday',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
   kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_ets_weekday, '../Tables/green_line_tables/green_ets_weekday.png')

# # ARIMA COMPARISON between original and bottom up approach
green_arima_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                           daytype == 'Weekday',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_arima_weekday, '../Tables/green_line_tables/green_arima_weekday.png')

# NAIVE COMPARISON between original and bottom up approach
green_naive_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                           daytype == 'Weekday',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_naive_weekday, '../Tables/green_line_tables/green_naive_weekday.png')

# SNAIVE COMPARISON between original and bottom up approach
green_snaive_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                           daytype == 'Weekday',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_snaive_weekday, '../Tables/green_line_tables/green_snaive_weekday.png')

# TSLM COMPARISON between original and bottom up approach
green_tslm_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                           daytype == 'Weekday',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_tslm_weekday, '../Tables/green_line_tables/green_tslm_weekday.png')

# Ensemble COMPARISON between original and bottom up approach and mint
green_ensemble_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                           daytype == 'Weekday',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(green_ensemble_weekday, 
           '../Tables/green_line_tables/green_ensemble_weekday.png')

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for red line Weekend, all data and post-covid data

# ETS COMPARISON between original, bottom up, and mint approach for post covid red line
red_ets_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_ets_weekend, '../Tables/red_line_tables/red_ets_weekend.png')

# ARIMA COMPARISON between original and bottom up approach
red_arima_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_arima_weekend, '../Tables/red_line_tables/red_arima_weekend.png')

# NAIVE COMPARISON between original and bottom up approach
red_naive_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_naive_weekend, '../Tables/red_line_tables/red_naive_weekend.png')

# SNAIVE COMPARISON between original and bottom up approach
red_snaive_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_snaive_weekend, '../Tables/red_line_tables/red_snaive_weekend.png')

# TSLM COMPARISON between original and bottom up approach
red_tslm_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_tslm_weekend, '../Tables/red_line_tables/red_tslm_weekend.png')

# Ensemble COMPARISON between original and bottom up approach
red_ensemble_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_ensemble_weekend, '../Tables/red_line_tables/red_ensemble_weekend.png')

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for blue line Weekend, all data and post-covid data

# ETS COMPARISON between original, bottom up, and mint approach for post covid red line
blue_ets_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_ets_weekend, '../Tables/blue_line_tables/blue_ets_weekend.png')

# # ARIMA COMPARISON between original and bottom up approach
blue_arima_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_arima_weekend, '../Tables/blue_line_tables/blue_arima_weekend.png')

# NAIVE COMPARISON between original and bottom up approach
blue_naive_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_naive_weekend, '../Tables/blue_line_tables/blue_naive_weekend.png')

# SNAIVE COMPARISON between original and bottom up approach
blue_snaive_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_snaive_weekend, '../Tables/blue_line_tables/blue_snaive_weekend.png')

# TSLM COMPARISON between original and bottom up approach
blue_tslm_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_tslm_weekend, '../Tables/blue_line_tables/blue_tslm_weekend.png')

# Ensemble COMPARISON between original and bottom up approach
blue_ensemble_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_ensemble_weekend, 
           '../Tables/blue_line_tables/blue_ensemble_weekend.png')

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for orange line Weekend, all data and post-covid data

# ETS COMPARISON between original, bottom up, and mint approach for post covid red line
orange_ets_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_ets_weekend, '../Tables/orange_line_tables/orange_ets_weekend.png')

# # ARIMA COMPARISON between original and bottom up approach
orange_arima_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_arima_weekend, '../Tables/orange_line_tables/orange_arima_weekend.png')

# NAIVE COMPARISON between original and bottom up approach
orange_naive_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_naive_weekend, '../Tables/orange_line_tables/orange_naive_weekend.png')

# SNAIVE COMPARISON between original and bottom up approach
orange_snaive_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_snaive_weekend, '../Tables/orange_line_tables/orange_snaive_weekend.png')

# TSLM COMPARISON between original and bottom up approach
orange_tslm_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_tslm_weekend, '../Tables/orange_line_tables/orange_tslm_weekend.png')

# Ensemble COMPARISON between original and bottom up approach
orange_ensemble_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_ensemble_weekend, 
           '../Tables/orange_line_tables/orange_ensemble_weekend.png')

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for heavy rail weekend aggregates, all data and post-covid data

# ETS COMPARISON between original, bottom up, and mint approach for post covid red line
aggregated_ets_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                                   daytype == 'Weekend',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                                   daytype == 'Weekend',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_ets_weekend, 
           '../Tables/aggregated_levels_tables/aggregated_ets_weekend.png')

# # ARIMA COMPARISON between original and bottom up approach
aggregated_arima_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                                   daytype == 'Weekend',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                                   daytype == 'Weekend',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_arima_weekend, 
           '../Tables/aggregated_levels_tables/aggregated_arima_weekend.png')

# NAIVE COMPARISON between original and bottom up approach
aggregated_naive_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                                   daytype == 'Weekend',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                                   daytype == 'Weekend',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_naive_weekend, 
           '../Tables/aggregated_levels_tables/aggregated_naive_weekend.png')

# SNAIVE COMPARISON between original and bottom up approach
aggregated_snaive_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                                   daytype == 'Weekend',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                                   daytype == 'Weekend',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_snaive_weekend, 
           '../Tables/aggregated_levels_tables/aggregated_snaive_weekend.png')

# TSLM COMPARISON between original and bottom up approach
aggregated_tslm_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                                   daytype == 'Weekend',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                                   daytype == 'Weekend',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_tslm_weekend, 
           '../Tables/aggregated_levels_tables/aggregated_tslm_weekend.png')

# Ensemble comparison between the original, bottom up, and mint approach
aggregated_ensemble_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                                   daytype == 'Weekend',
                                   rail_type == 'Heavy Rail',
                                   route_or_line == '<aggregated>') |>
    bind_rows(
        accuracy_metrics_post |> 
          filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                                   daytype == 'Weekend',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |>
    bind_rows(
        accuracy_metrics_post |> 
          filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                                   daytype == '<aggregated>',
                                   rail_type == '<aggregated>',
                                   route_or_line == '<aggregated>')
    ) |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_ensemble_weekend, 
           '../Tables/aggregated_levels_tables/aggregated_ensemble_weekend.png')

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for green line/light rail weekend, all data and post-covid data

# ETS COMPARISON between original, bottom up, and mint approach for post covid green line
green_ets_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('ets', 'bu_ets', 'mint_ets'),
                           daytype == 'Weekend',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
   kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_ets_weekend, '../Tables/green_line_tables/green_ets_weekend.png')

# # ARIMA COMPARISON between original and bottom up approach
green_arima_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('arima', 'bu_arima', 'mint_arima'),
                           daytype == 'Weekend',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_arima_weekend, '../Tables/green_line_tables/green_arima_weekend.png')

# NAIVE COMPARISON between original and bottom up approach
green_naive_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('naive', 'bu_naive', 'mint_naive'),
                           daytype == 'Weekend',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_naive_weekend, '../Tables/green_line_tables/green_naive_weekend.png')

# SNAIVE COMPARISON between original and bottom up approach
green_snaive_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('snaive', 'bu_snaive', 'mint_snaive'),
                           daytype == 'Weekend',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_snaive_weekend, '../Tables/green_line_tables/green_snaive_weekend.png')

# TSLM COMPARISON between original and bottom up approach
green_tslm_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('tslm', 'bu_tslm', 'mint_tslm'),
                           daytype == 'Weekend',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_tslm_weekend, '../Tables/green_line_tables/green_tslm_weekend.png')

# Ensemble COMPARISON between original and bottom up approach
green_ensemble_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('ensemble', 'bu_ensemble', 'mint_ensemble'),
                           daytype == 'Weekend',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
    kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(green_ensemble_weekend, 
           '../Tables/green_line_tables/green_ensemble_weekend.png')

Accuracy Metric Comparison of each forecasting method nomination for each individual color line and daytype

# Red Weekday
red_best_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('mint_ets', 'mint_arima', 'naive', 'snaive', 'tslm', 'mint_ensemble'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  arrange(MAPE) |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_best_weekday, '../Tables/red_line_tables/red_best_weekday.png')

# Red Weekend
red_best_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('mint_ets', 'mint_arima', 'naive', 'snaive', 'tslm', 'ensemble'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Red Line') |>
  arrange(MAPE) |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(red_best_weekend, '../Tables/red_line_tables/red_best_weekend.png')

# Blue Weekday
blue_best_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('mint_ets', 'bu_arima', 'naive', 'snaive', 'tslm', 'mint_ensemble'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
  arrange(MAPE) |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_best_weekday, '../Tables/blue_line_tables/blue_best_weekday.png')

# Blue Weekend
blue_best_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('mint_ets', 'mint_arima', 'naive', 'snaive', 'tslm', 'mint_ensemble'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Blue Line') |>
  arrange(MAPE) |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(blue_best_weekend, '../Tables/blue_line_tables/blue_best_weekend.png')

# Orange Weekday
orange_best_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('bu_ets', 'bu_arima', 'naive', 'snaive', 'tslm', 'ensemble'),
                           daytype == 'Weekday',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
  arrange(MAPE) |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_best_weekday, '../Tables/orange_line_tables/orange_best_weekday.png')

# Orange Weekend
orange_best_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('bu_ets', 'bu_arima', 'naive', 'snaive', 'tslm', 'ensemble'),
                           daytype == 'Weekend',
                           rail_type == 'Heavy Rail',
                           route_or_line == 'Orange Line') |>
  arrange(MAPE) |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(orange_best_weekend, '../Tables/orange_line_tables/orange_best_weekend.png')

# Green Weekday
green_best_weekday <- accuracy_metrics_post |> 
  filter(.model %in% c('bu_ets', 'mint_arima', 'naive', 'snaive', 'tslm', 'mint_ensemble'),
                           daytype == 'Weekday',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
  arrange(MAPE) |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_best_weekday, '../Tables/green_line_tables/green_best_weekday.png')

# Green Weekend
green_best_weekend <- accuracy_metrics_post |> 
  filter(.model %in% c('bu_ets', 'bu_arima', 'naive', 'snaive', 'tslm', 'mint_ensemble'),
                           daytype == 'Weekend',
                           rail_type == 'Light Rail',
                           route_or_line == 'Green Line') |>
  arrange(MAPE) |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

# Save the table
save_kable(green_best_weekend, '../Tables/green_line_tables/green_best_weekend.png')

Table Displaying the Best Forecasting Method for Each Subway Line for Weekdays and Weekends

# BEST MODEL FOR EACH SUBWAY LINE FOR WEEKDAYS  
rgbo_weekday_mape_table <- accuracy_metrics_post |>
  filter(.model == 'tslm', 
         daytype == 'Weekday',
         route_or_line == 'Orange Line') |>
    bind_rows(accuracy_metrics_post |> 
                filter(.model == 'mint_arima', 
                       daytype == 'Weekday',
                       route_or_line == 'Red Line')) |>
    bind_rows(accuracy_metrics_post |> 
                filter(.model == 'mint_ets', 
                       daytype == 'Weekday',
                       route_or_line == 'Blue Line')) |>
    bind_rows(accuracy_metrics_post |> 
                filter(.model == 'tslm', 
                       daytype == 'Weekday',
                       route_or_line == 'Green Line')) |>
  select(route_or_line, daytype, .model, MAPE)

rgbo_weekday_mape_table <- rgbo_weekday_mape_table |>
  rename('Route or Line' = 'route_or_line',
         'Day Type' = 'daytype',
         'Model' = '.model')
rgbo_weekday_mape_table[[1, 3]] <- 'TSLM'
rgbo_weekday_mape_table[[2, 3]] <- 'Mint Arima'
rgbo_weekday_mape_table[[3, 3]] <- 'Mint ETS'
rgbo_weekday_mape_table[[4, 3]] <- 'TSLM'

  
rgbo_weekday_mape_kable <- rgbo_weekday_mape_table |> kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(rgbo_weekday_mape_kable, 
           '../Tables/subway_lines_best_methods/rgbo_best_fc_weekday.png')


# BEST MODEL FOR EACH SUBWAY LINE FOR WEEKDENDS
rgbo_weekend_mape_table <- accuracy_metrics_post |>
    filter(.model == 'tslm', 
           daytype == 'Weekend',
           route_or_line == 'Red Line') |>
    bind_rows(accuracy_metrics_post |> 
                  filter(.model == 'ensemble', 
                         daytype == 'Weekend',
                         route_or_line == 'Orange Line')) |>
    bind_rows(accuracy_metrics_post |> 
                  filter(.model == 'mint_ets', 
                         daytype == 'Weekend',
                         route_or_line == 'Blue Line')) |>
    bind_rows(accuracy_metrics_post |> 
                  filter(.model == 'bu_arima', 
                         daytype == 'Weekend',
                         route_or_line == 'Green Line')) |>
    select(route_or_line, daytype, .model, MAPE) 

rgbo_weekend_mape_table <- rgbo_weekend_mape_table |>
  rename('Route or Line' = 'route_or_line',
         'Day Type' = 'daytype',
         'Model' = '.model')
rgbo_weekend_mape_table[[1, 3]] <- 'TSLM'
rgbo_weekend_mape_table[[2, 3]] <- 'Ensemble'
rgbo_weekend_mape_table[[3, 3]] <- 'Mint ETS'
rgbo_weekend_mape_table[[4, 3]] <- 'Bottom Up Arima'

  
rgbo_weekend_mape_kable <- rgbo_weekend_mape_table |> 
  kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(rgbo_weekend_mape_kable, 
           '../Tables/subway_lines_best_methods/rgbo_best_fc_weekend.png')

# DISPLAYING BEST FORECASTING METHOD FOR EACH AGGREGATED SERIES ALL IN ONE TABLE
aggregated_mape_table <- accuracy_metrics_post |>
    filter(.model == 'bu_ensemble', 
           daytype == 'Weekday',
           rail_type == 'Heavy Rail',
           route_or_line == '<aggregated>') |>
    bind_rows(accuracy_metrics_post |> 
                  filter(.model == 'bu_ensemble', 
                         daytype == 'Weekend',
                         rail_type == 'Heavy Rail',
                         route_or_line == '<aggregated>')) |>
    bind_rows(accuracy_metrics_post |> 
                  filter(.model == 'arima', 
                         daytype == 'Weekday',
                         rail_type == '<aggregated>',
                         route_or_line == '<aggregated>')) |>
    bind_rows(accuracy_metrics_post |> 
                  filter(.model == 'mint_ensemble', 
                         daytype == 'Weekend',
                         rail_type == '<aggregated>',
                         route_or_line == '<aggregated>')) |>
    select(daytype, rail_type, route_or_line, .model, MAPE)
# Changing the aggregated value so they appear in the table
aggregated_mape_table[[1, 3]] <- 'All Lines'
aggregated_mape_table[[2, 3]] <- 'All Lines'
aggregated_mape_table[[3, 2]] <- 'All Rails'
aggregated_mape_table[[3, 3]] <- 'All Lines'
aggregated_mape_table[[4, 2]] <- 'All Rails'
aggregated_mape_table[[4, 3]] <- 'All Lines'
aggregated_mape_table[[1, 4]] <- 'Bottom Up Ensemble'
aggregated_mape_table[[2, 4]] <- 'Bottom Up Ensemble'
aggregated_mape_table[[3, 4]] <- 'Arima'
aggregated_mape_table[[4, 4]] <- 'Arima'
aggregated_mape_table <- aggregated_mape_table |> 
  rename('Day Type' = 'daytype',
         'Rail Type' = 'rail_type',
         'Route or Line' = 'route_or_line',
         'Model' = '.model')

aggregated_mape_kable <- aggregated_mape_table |> 
  kable(digits = 2, format = 'html', booktabs = T) |> 
  kable_styling(full_width = F)

# Save the table
save_kable(aggregated_mape_kable, 
           '../Tables/subway_lines_best_methods/aggregated_best_methods.png')

Graphing the fitted values against the training set for each forecasting method for a few of the color line time series

# Fitted vs actual values for total monthly average daily subway ridership
total_subway_fitted_values <- augment(fit_post_hts) |> 
  filter(daytype == '<aggregated>') |>
    ggplot(aes(x = month)) +
    geom_line(aes(y = ridership_average, color = 'Actual Values')) +
    geom_line(aes(y = .fitted, color = 'Fitted Values')) +
    scale_color_manual(
        values = c('black', 'blue'),
        breaks = c('Actual Values', 'Fitted Values')
    ) +
    facet_wrap(~.model, nrow = 2) +
    theme(legend.position = 'bottom') +
    labs(title = "Comparing the Fitted Vs Training Values for each Forecasting Method",
         subtitle = "Total Subway Ridership for All Days",
         x = 'Time (Months)',
         y = 'Ridership Average')
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/training_test_graphs/training_vs_fitted_values_subway.pdf',
       plot = total_subway_fitted_values,
       height = 6,
       width = 8)

Graphing Best Forecasting Approach (bottom up or mint) for each forecasting method (ets, arima, naive, snaive, tslm) for each color line time series (red, blue, orange, green) for both weekdays and weekends

# WEEKDAYS FIRST
# RED LINE WEEKDAY
red_line_fc_weekday <- fc_post_hts |> 
    filter(.model %in% c('bu_arima', 'mint_ets', 'naive', 'snaive', 'tslm', 'ensemble'), 
           daytype == 'Weekday', route_or_line == 'Red Line') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekday', route_or_line == 'Red Line')) +
    facet_wrap(~.model, nrow = 2) +
    theme(legend.position = 'bottom') +
    labs(title = 'Red Line Weekday Ridership Forecast Accuracy Comparison',
         x = 'Time (Months)',
         y = 'Average Weekday Ridership') +
    scale_y_continuous(labels = scales::comma)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/red_line_graphs/red_line_fc_weekday.pdf',
       plot = red_line_fc_weekday,
       height = 6,
       width = 8)

# BLUE LINE WEEKDAY
blue_line_fc_weekday <- fc_post_hts |> 
    filter(.model %in% c('bu_arima', 'mint_ets', 'naive', 'snaive', 'tslm', 'ensemble'), 
           daytype == 'Weekday', route_or_line == 'Blue Line') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekday', route_or_line == 'Blue Line')) +
    facet_wrap(~.model, nrow = 2) +
    theme(legend.position = 'bottom') +
    labs(title = 'Blue Line Weekday Ridership Forecast Accuracy Comparison',
         x = 'Time (Months)',
         y = 'Average Weekday Ridership') +
    scale_y_continuous(labels = scales::comma)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/blue_line_graphs/blue_line_fc_weekday.pdf',
       plot = blue_line_fc_weekday,
       height = 6,
       width = 8)

# ORANGE LINE WEEKDAY
orange_line_fc_weekday <- fc_post_hts |> 
    filter(.model %in% c('bu_arima', 'bu_ets', 'naive', 'snaive', 'tslm', 'ensemble'), 
           daytype == 'Weekday', route_or_line == 'Orange Line') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekday', route_or_line == 'Orange Line')) +
    facet_wrap(~.model, nrow = 2) +
    theme(legend.position = 'bottom') +
    labs(title = 'Orange Line Weekday Ridership Forecast Accuracy Comparison',
         x = 'Time (Months)',
         y = 'Average Weekday Ridership') +
    scale_y_continuous(labels = scales::comma)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/orange_line_graphs/orange_line_fc_weekday.pdf',
       plot = orange_line_fc_weekday,
       height = 6,
       width = 8)

# GREEN LINE WEEKDAY
green_line_fc_weekday <- fc_post_hts |> 
    filter(.model %in% c('mint_arima', 'bu_ets', 'naive', 'snaive', 'tslm', 'ensemble'), 
           daytype == 'Weekday', route_or_line == 'Green Line') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekday', route_or_line == 'Green Line')) +
    facet_wrap(~.model, nrow = 2) +
    theme(legend.position = 'bottom') +
    labs(title = 'Green Line Weekday Ridership Forecast Accuracy Comparison',
         x = 'Time (Months)',
         y = 'Average Weekday Ridership') +
    scale_y_continuous(labels = scales::comma)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/green_line_graphs/green_line_fc_weekday.pdf',
       plot = green_line_fc_weekday,
       height = 6,
       width = 8)

# NOW WEEKENDS

# RED LINE WEEKEND
red_line_fc_weekend <- fc_post_hts |> 
    filter(.model %in% c('mint_arima', 'mint_ets', 'naive', 'snaive', 'tslm', 'ensemble'), 
           daytype == 'Weekend', route_or_line == 'Red Line') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekend', route_or_line == 'Red Line')) +
    facet_wrap(~.model, nrow = 2) +
    theme(legend.position = 'bottom') +
    labs(title = 'Red Line Weekend Ridership Forecast Accuracy Comparison',
         x = 'Time (Months)',
         y = 'Average Weekday Ridership') +
    scale_y_continuous(labels = scales::comma)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/red_line_graphs/red_line_fc_weekend.pdf',
       plot = red_line_fc_weekend,
       height = 6,
       width = 8)

# BLUE LINE WEEKEND
blue_line_fc_weekend <- fc_post_hts |> 
    filter(.model %in% c('bu_arima', 'mint_ets', 'naive', 'snaive', 'tslm', 'ensemble'), 
           daytype == 'Weekend', route_or_line == 'Blue Line') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekend', route_or_line == 'Blue Line')) +
    facet_wrap(~.model, nrow = 2) +
    theme(legend.position = 'bottom') +
    labs(title = 'Blue Line Weekend Ridership Forecast Accuracy Comparison',
         x = 'Time (Months)',
         y = 'Average Weekday Ridership') +
    scale_y_continuous(labels = scales::comma)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/blue_line_graphs/blue_line_fc_weekend.pdf',
       plot = blue_line_fc_weekend,
       height = 6,
       width = 8)

# ORANGE LINE WEEKEND
orange_line_fc_weekend <- fc_post_hts |> 
    filter(.model %in% c('bu_arima', 'bu_ets', 'naive', 'snaive', 'tslm', 'ensemble'), 
           daytype == 'Weekend', route_or_line == 'Orange Line') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekend', route_or_line == 'Orange Line')) +
    facet_wrap(~.model, nrow = 2) +
    theme(legend.position = 'bottom') +
    labs(title = 'Orange Line Weekend Ridership Forecast Accuracy Comparison',
         x = 'Time (Months)',
         y = 'Average Weekday Ridership') +
    scale_y_continuous(labels = scales::comma)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/orange_line_graphs/orange_line_fc_weekend.pdf',
       plot = orange_line_fc_weekend,
       height = 6,
       width = 8)

# GREEN LINE WEEKEND
green_line_fc_weekend <- fc_post_hts |> 
    filter(.model %in% c('bu_arima', 'bu_ets', 'naive', 'snaive', 'tslm', 'ensemble'), 
           daytype == 'Weekend', route_or_line == 'Green Line') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekend', route_or_line == 'Green Line')) +
    facet_wrap(~.model, nrow = 2) +
    theme(legend.position = 'bottom') +
    labs(title = 'Green Line Weekend Ridership Forecast Accuracy Comparison',
         x = 'Time (Months)',
         y = 'Average Weekday Ridership') +
    scale_y_continuous(labels = scales::comma)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/green_line_graphs/green_line_fc_weekend.pdf',
       plot = green_line_fc_weekend,
       height = 6,
       width = 8)

Graphing the Best Forecasting Methods For each Color Line for Both Weekends and Weekdays and all aggregated series

# All FORECAST AGGREGATES
# Heavy rail weekdays
heavy_best_fc_weekday <- fc_post_hts |> 
  filter(daytype == 'Weekday', 
         route_or_line == '<aggregated>', 
         rail_type == 'Heavy Rail', .model == 'bu_ensemble') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekday', 
                                route_or_line == '<aggregated>',
                                rail_type == 'Heavy Rail')) +
    labs(title = 'Heavy Rail Weekday Best Forecast',
         subtitle = 'Bottom Up Ensemble Method',
         x = 'Time (Months)',
         y = 'Ridership Average') +
  theme(legend.position = 'bottom') +
  scale_y_continuous(labels = scales::comma)

# Heavy Rail Weekends
heavy_best_fc_weekend <- fc_post_hts |> 
  filter(daytype == 'Weekend', 
         route_or_line == '<aggregated>', 
         rail_type == 'Heavy Rail', .model == 'bu_ensemble') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekend', 
                                route_or_line == '<aggregated>',
                                rail_type == 'Heavy Rail')) +
    labs(title = 'Heavy Rail Weekend Best Forecast',
         subtitle = 'Bottom Up Ensemble Method',
         x = 'Time (Months)',
         y = 'Ridership Average') +
  theme(legend.position = 'bottom') +
  scale_y_continuous(labels = scales::comma)

# Subway Weekday
subway_best_fc_weekday <- fc_post_hts |> 
  filter(daytype == 'Weekday', 
         route_or_line == '<aggregated>', 
         rail_type == '<aggregated>', .model == 'arima') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekday', 
                                route_or_line == '<aggregated>',
                                rail_type == '<aggregated>')) +
    labs(title = 'All Subway Weekday Best Forecast',
         subtitle = 'Original Arima Method',
         x = 'Time (Months)',
         y = 'Ridership Average') +
  theme(legend.position = 'bottom') +
  scale_y_continuous(labels = scales::comma)

# Subway Weekend
subway_best_fc_weekend <- fc_post_hts |> 
  filter(daytype == 'Weekend', 
         route_or_line == '<aggregated>', 
         rail_type == '<aggregated>', .model == 'mint_ensemble') |>
    autoplot(mbta_hts |> filter(daytype == 'Weekend', 
                                route_or_line == '<aggregated>',
                                rail_type == '<aggregated>')) +
    labs(title = 'All Subway Weekend Best Forecast',
         subtitle = 'Mint Ensemble Method',
         x = 'Time (Months)',
         y = 'Ridership Average') +
  theme(legend.position = 'bottom') +
  scale_y_continuous(labels = scales::comma)


# Graphing the heavy rail weekday and weekend with total subway weekday and weekends
subway_heavy_rail_best_fc <- (heavy_best_fc_weekday + heavy_best_fc_weekend) / 
  (subway_best_fc_weekday + subway_best_fc_weekend)
# Save the graph to the Plots folder
ggsave('../Plots/best_forecasting_methods_graphs/subway_heavy_rail_best_fc.pdf',
       plot = subway_heavy_rail_best_fc,
       height = 6,
       width = 8)

Cleaning Up Table Names with a Function

# Function that cleans up table names for the poster
clean_for_display <- function(df) {
  df |>
    rename_with(~ stringr::str_remove(.x, "^\\.")) |>
    rename_with(~ stringr::str_replace_all(.x, "_", " ")) |>
    rename_with(~ stringr::str_to_title(.x)) |>
    rename(`Line Type` = `Route Or Line`) |>
    rename(`MAPE` = `Mape`) |>
    mutate(
      Model = stringr::str_replace_all(Model, "_", " "),
      Model = stringr::str_to_title(Model),
      Model = dplyr::recode(Model, "Bu Ensemble" = "Bottom Up Ensemble")
    )
}

# BEST MODEL FOR EACH SUBWAY LINE FOR WEEKDAYS
rgbo_weekday_mape_table1 <- accuracy_metrics_post |>
  filter(.model == "tslm", daytype == "Weekday", route_or_line == "Orange Line") |>
  bind_rows(
    accuracy_metrics_post |>
      filter(.model == "mint_arima", daytype == "Weekday", route_or_line == "Red Line")
  ) |>
  bind_rows(
    accuracy_metrics_post |>
      filter(.model == "mint_ets", daytype == "Weekday", route_or_line == "Blue Line")
  ) |>
  bind_rows(
    accuracy_metrics_post |>
      filter(.model == "tslm", daytype == "Weekday", route_or_line == "Green Line")
  ) |>
  select(route_or_line, daytype, .model, MAPE) |>
  clean_for_display() |>
  kable(digits = 2)


# BEST MODEL FOR EACH SUBWAY LINE FOR WEEKENDS
rgbo_weekend_mape_table1 <- accuracy_metrics_post |>
  filter(.model == "tslm", daytype == "Weekend", route_or_line == "Red Line") |>
  bind_rows(
    accuracy_metrics_post |>
      filter(.model == "ensemble", daytype == "Weekend", route_or_line == "Orange Line")
  ) |>
  bind_rows(
    accuracy_metrics_post |>
      filter(.model == "mint_ets", daytype == "Weekend", route_or_line == "Blue Line")
  ) |>
  bind_rows(
    accuracy_metrics_post |>
      filter(.model == "arima", daytype == "Weekend", route_or_line == "Green Line")
  ) |>
  select(route_or_line, daytype, .model, MAPE) |>
  clean_for_display() |>
  kable(digits = 2)


# BEST FORECASTING METHOD FOR EACH AGGREGATED SERIES
aggregated_mape_table1 <- accuracy_metrics_post |>
  filter(.model == "bu_ensemble", daytype == "Weekday", 
         rail_type == "Heavy Rail", route_or_line == "<aggregated>") |>
  bind_rows(
    accuracy_metrics_post |>
      filter(.model == "bu_ensemble", daytype == "Weekend", 
             rail_type == "Heavy Rail", route_or_line == "<aggregated>")
  ) |>
  bind_rows(
    accuracy_metrics_post |>
      filter(.model == "arima", daytype == "Weekday", 
             rail_type == "<aggregated>", route_or_line == "<aggregated>")
  ) |>
  bind_rows(
    accuracy_metrics_post |>
      filter(.model == "mint_ensemble", daytype == "Weekend", 
             rail_type == "<aggregated>", route_or_line == "<aggregated>")
  ) |>
  bind_rows(
    accuracy_metrics_post |>
      filter(.model == "arima", daytype == "<aggregated>", 
             rail_type == "<aggregated>", route_or_line == "<aggregated>")
  ) |>
  select(daytype, rail_type, route_or_line, .model, MAPE) |>
  mutate(
    daytype = if_else(daytype == "<aggregated>", "All Days", daytype),
    rail_type = if_else(rail_type == "<aggregated>", "All Rails", rail_type),
    route_or_line = if_else(route_or_line == "<aggregated>", "All Lines", route_or_line)
  ) |>
  clean_for_display() |>
  kable(digits = 2)

Creating graphs for best forecasting method for each individual line with prediction interval colors corresponding to the line color

# A function that graphs the best forecasting method for each subway line for weekday ridership
plot_best_weekday <- function(route, model_name, subtitle_txt, line_col) {
  hist_data <- mbta_hts |>
    filter(daytype == "Weekday", route_or_line == route)
  
  fc_data <- fc_post_hts |>
    filter(daytype == "Weekday", route_or_line == route, .model == model_name)
  
  autoplot(hist_data, colour = "black", size = 0.5) +
    # Outer interval first
    autolayer(fc_data, level = 95, fill = line_col, colour = NA, alpha = 0.25) +
    # Inner interval second (darker)
    autolayer(fc_data, level = 80, fill = line_col, colour = NA, alpha = 0.40) +
    # Forecast mean
    autolayer(fc_data, colour = line_col, size = 0.5) +
    # Re-draw historical line on top so it stays visible
    autolayer(hist_data, colour = "black", size = 0.5) +
    labs(
      title = paste(route, "Weekday Best Forecast"),
      subtitle = subtitle_txt,
      x = "Time (Months)",
      y = "Ridership Average"
    ) +
    theme(legend.position = "bottom") +
    scale_y_continuous(labels = comma)
}

red_best_fc_weekday <- plot_best_weekday(
  route = "Red Line",
  model_name = "mint_arima",
  subtitle_txt = "Mint ARIMA Method",
  line_col = "#DA291C"
)

blue_best_fc_weekday <- plot_best_weekday(
  route = "Blue Line",
  model_name = "mint_ets",
  subtitle_txt = "Mint ETS Method",
  line_col = "#4F83CC"
)

orange_best_fc_weekday <- plot_best_weekday(
  route = "Orange Line",
  model_name = "tslm",
  subtitle_txt = "TSLM Method",
  line_col = "#ED8B00"
)

green_best_fc_weekday <- plot_best_weekday(
  route = "Green Line",
  model_name = "tslm",
  subtitle_txt = "TSLM Method",
  line_col = "#00843D"
)

rgbo_best_fc_weekday <- (red_best_fc_weekday + blue_best_fc_weekday) /
  (orange_best_fc_weekday + green_best_fc_weekday)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/best_forecasting_methods_graphs/rgbo_best_fc_weekday.pdf',
       plot = rgbo_best_fc_weekday,
       height = 6,
       width = 8)

# A function that graphs the best forecasting method for each subway line for weekend ridership
plot_best_weekend <- function(route, model_name, subtitle_txt, line_col) {
  hist_data <- mbta_hts |>
    filter(daytype == "Weekend", route_or_line == route)
  
  fc_data <- fc_post_hts |>
    filter(daytype == "Weekend", route_or_line == route, .model == model_name)
  
  autoplot(hist_data, colour = "black", size = 0.5) +
    # Outer interval first
    autolayer(fc_data, level = 95, fill = line_col, colour = NA, alpha = 0.25) +
    # Inner interval second (darker)
    autolayer(fc_data, level = 80, fill = line_col, colour = NA, alpha = 0.40) +
    # Forecast mean
    autolayer(fc_data, colour = line_col, size = 0.5) +
    # Re-draw historical line on top so it stays visible
    autolayer(hist_data, colour = "black", size = 0.5) +
    labs(
      title = paste(route, "Weekend Best Forecast"),
      subtitle = subtitle_txt,
      x = "Time (Months)",
      y = "Ridership Average"
    ) +
    theme(legend.position = "bottom") +
    scale_y_continuous(labels = comma)
}

red_best_fc_weekend <- plot_best_weekend(
  route = "Red Line",
  model_name = "tslm",
  subtitle_txt = "TSLM Method",
  line_col = "#DA291C"
)

blue_best_fc_weekend <- plot_best_weekend(
  route = "Blue Line",
  model_name = "mint_ets",
  subtitle_txt = "Mint ETS Method",
  line_col = "#4F83CC"
)

orange_best_fc_weekend <- plot_best_weekend(
  route = "Orange Line",
  model_name = "ensemble",
  subtitle_txt = "Ensemble Method",
  line_col = "#ED8B00"
)

green_best_fc_weekend <- plot_best_weekend(
  route = "Green Line",
  model_name = "arima",
  subtitle_txt = "ARIMA Method",
  line_col = "#00843D"
)

rgbo_best_fc_weekend <- (red_best_fc_weekend + blue_best_fc_weekend) /
  (orange_best_fc_weekend + green_best_fc_weekend)
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/best_forecasting_methods_graphs/rgbo_best_fc_weekend.pdf',
       plot = rgbo_best_fc_weekend,
       height = 6,
       width = 8)

Residual Plots for base fitted models on the total ridership time series for all days

# Creating residuals
residuals_fit <- residuals(fit_post_hts |> filter(daytype == '<aggregated>'))

# Graphing the residuals
subway_res_train <- ggplot(residuals_fit, aes(x = month, y = .resid)) +
  geom_line(color = "blue") +
  facet_grid(rows = vars(.model), scales = "free_y") +
  labs(title = "Residuals on Training Data for MBTA Fitted Models",
       x = "Date", y = "Residuals") +
  theme_minimal()
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/residual_graphs/subway_training_residuals_graph.pdf',
       plot = subway_res_train,
       height = 6,
       width = 8)

Residual Plots for base forecasting models on total ridership time series for all days

# Creating test residuals
residuals_fc <- fc_post_hts |>
  filter(daytype == '<aggregated>', 
         .model %in% c('ets', 'arima', 'naive', 'snaive', 'tslm', 'ensemble')) |>
    as_tibble() |>
    select(month, .model, .mean) |>
    inner_join(test_post_hts |> filter(daytype == '<aggregated>'), by = "month") |>
    mutate(
        .resid = ridership_average - .mean,
    )

# Graphing the test residuals
subway_res_test <- ggplot(residuals_fc, aes(x = month, y = .resid)) +
  geom_line(color = "blue") +
  facet_grid(rows = vars(.model), scales = "free_y") +
  labs(title = "Residuals on the Testing Data for MBTA Forecast Models",
       x = "Date", y = "Residuals") +
  theme_minimal()
# Saving the graph to the "Plots" subfolder
ggsave('../Plots/residual_graphs/subway_test_residuals_graph.pdf',
       plot = subway_res_test,
       height = 6,
       width = 8)

Best Forecasting models for total subway ridership all days

all_subway_forecasts <- fc_post_hts |> 
  filter(.model %in% c('ets', 'arima', 'naive', 'snaive', 'tslm', 'mint_ensemble'),
                      daytype == '<aggregated>') |>
    autoplot(train_post_hts, level = c(95)) +
    autolayer(test_post_hts |> 
                filter(daytype == '<aggregated>'), 
              ridership_average, color = 'black', linetype = 'dashed') +
  labs(title = 'Monthly Average Daily Ridership Forecasts for Total Subway Ridership for All Days',
       subtitle = 'Best Versions of Each Forecasting Method',
       x = 'Time (Months)',
       y = 'Average Daily Ridership') +
  theme(legend.position = 'bottom')

ggsave('../Plots/best_forecasting_methods_graphs/best_forecasts_all_subway.pdf',
       plot = all_subway_forecasts,
       height = 6,
       width = 8)

Best Forecasting models for total subway ridership all days metric table

all_subway_metrics_tibble <- accuracy_metrics_post |> 
  filter(daytype == '<aggregated>', 
         .model %in% c('ets', 'arima', 'naive', 'snaive', 'tslm', 'mint_ensemble')) |> 
  select(.model, ME, RMSE, MAE, MAPE)

all_subway_metrics_tibble <- all_subway_metrics_tibble |> 
  rename('Model' = '.model') |> arrange(MAPE)

all_subway_metrics_tibble[[1, 1]] <- 'Arima'
all_subway_metrics_tibble[[2, 1]] <- 'Mint Ensemble'
all_subway_metrics_tibble[[3, 1]] <- 'ETS'
all_subway_metrics_tibble[[4, 1]] <- 'TSLM'
all_subway_metrics_tibble[[5, 1]] <- 'Naive'
all_subway_metrics_tibble[[6, 1]] <- 'SNaive'

all_subway_metrics_kable <- all_subway_metrics_tibble |>
  kable(digits = 2, format = 'html', booktabs = T) |> kable_styling(full_width = F)

save_kable(all_subway_metrics_kable, 
           '../Tables/subway_lines_best_methods/all_subway_fc_metrics.png')

Loading In the MBTA Subway Time Series

Cleaning up Time Series to only include the Subway System Data

Initial Graph Showing Monthly Average Daily Ridership for the entire Subway System for Both Weekdays and Weekends

Initial Graph Comparing Average Weekday Ridership Vs Average Weekend Ridership Before and After Covid

Converting mbta_ts into a Mixed Hierarchical and Group Structure

Graphing the Hierarchical Structure of the Time Series for all day types, weekdays, and weekends

Creating Train and Test Sets for the time series with the Hierarchical Structure

Graphing the training and testing sets for the post covid data

Fitting ETS, ARIMA, NAIVE, SNAIVE, TSLM, and Ensemble Models on the Training Set and Forecasting Against the Test set

Accuracy Metric Comparison of Bottom Up, Original, and mint forecasting methods for red line weekday, all data, and post-covid data

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for blue line weekday, all data and post-covid data

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for orange line weekday, all data and post-covid data

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for heavy rail weekday aggregates, all data and post-covid data

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for green line/light rail weekday, all data and post-covid data

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for red line Weekend, all data and post-covid data

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for blue line Weekend, all data and post-covid data

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for orange line Weekend, all data and post-covid data

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for heavy rail weekend aggregates, all data and post-covid data

Accuracy Metric Comparison of Bottom Up vs Original vs mint forecasting methods for green line/light rail weekend, all data and post-covid data

Accuracy Metric Comparison of each forecasting method nomination for each individual color line and daytype

Table Displaying the Best Forecasting Method for Each Subway Line for Weekdays and Weekends

Graphing the fitted values against the training set for each forecasting method for a few of the color line time series

Graphing Best Forecasting Approach (bottom up or mint) for each forecasting method (ets, arima, naive, snaive, tslm) for each color line time series (red, blue, orange, green) for both weekdays and weekends

Graphing the Best Forecasting Methods For each Color Line for Both Weekends and Weekdays and all aggregated series

Cleaning Up Table Names with a Function

Creating graphs for best forecasting method for each individual line with prediction interval colors corresponding to the line color

Residual Plots for base fitted models on the total ridership time series for all days

Residual Plots for base forecasting models on total ridership time series for all days

Best Forecasting models for total subway ridership all days

Best Forecasting models for total subway ridership all days metric table

Converting `mbta_ts` into a Mixed Hierarchical and Group Structure