# Load libraries
library(tidyverse)
library(lubridate)
library(plotly)
library(scales)
library(kableExtra)

1 Introduction

This project examines Staten Island Ferry ridership trends from January to March in 2024 and 2025. Using monthly ferry reports released by NYC DOT, we assess how the introduction of congestion pricing in early 2025 influenced Staten Island commuters’ behavior.

While the Staten Island Ferry experienced a short-lived rise in ridership immediately after pricing began, this trend was not sustained. The data suggests that ferry service alone does not meet residents’ long-term transportation needs.

2 Data Source

The following six monthly CSV files were downloaded from NYC Open Data and manually cleaned to remove totals and empty rows:

SIFMonthlyRidership_2024_01.csv through SIFMonthlyRidership_2025_03.csv

Columns: Day, Date, WHT, STG, Combined, Note

# Load and clean function
load_and_tag <- function(path, year, month) {
  df <- read_csv(path, show_col_types = FALSE) %>%
    rename_with(~c("Day", "Date", "WHT", "STG", "Combined", "Note")[1:length(.)])
  
  if (is.numeric(df$Date)) {
    df <- df %>% mutate(Date = as.Date(Date, origin = "1899-12-30"))
  } else {
    df <- df %>% mutate(Date = mdy(Date))
  }
  
  df %>%
    filter(!is.na(Date)) %>%
    mutate(
      Combined = as.numeric(Combined),
      Year = year,
      Month = month
    ) %>%
    select(Date, Combined, Year, Month)
}

# Load all 6 files
ridership_data <- bind_rows(
  load_and_tag("D:/Spring2025Hunter/GTECH785_Final_Project/OneDrive_1_4-23-2025/SIFMonthlyRidership_2024_01.csv", 2024, "January"),
  load_and_tag("D:/Spring2025Hunter/GTECH785_Final_Project/OneDrive_1_4-23-2025/SIFMonthlyRidership_2024_02.csv", 2024, "February"),
  load_and_tag("D:/Spring2025Hunter/GTECH785_Final_Project/OneDrive_1_4-23-2025/SIFMonthlyRidership_2024_03.csv", 2024, "March"),
  load_and_tag("D:/Spring2025Hunter/GTECH785_Final_Project/OneDrive_2_4-23-2025/SIFMonthlyRidership_2025_01.csv", 2025, "January"),
  load_and_tag("D:/Spring2025Hunter/GTECH785_Final_Project/OneDrive_2_4-23-2025/SIFMonthlyRidership_2025_02.csv", 2025, "February"),
  load_and_tag("D:/Spring2025Hunter/GTECH785_Final_Project/OneDrive_2_4-23-2025/SIFMonthlyRidership_2025_03.csv", 2025, "March")
)
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...6`
# Summarize monthly totals
summary_df <- ridership_data %>%
  group_by(Month, Year) %>%
  summarise(Total_Riders = sum(Combined, na.rm = TRUE), .groups = "drop") %>%
  mutate(Month = factor(Month, levels = c("January", "February", "March"), ordered = TRUE))

# Compute percent change
summary_pct_change <- summary_df %>%
  pivot_wider(names_from = Year, values_from = Total_Riders) %>%
  mutate(
    Pct_Change = (`2025` - `2024`) / `2024` * 100,
    Abs_Change = `2025` - `2024`,
    Change_Label = paste0(scales::comma(Abs_Change), " (", 
                          ifelse(Pct_Change > 0, "+", ""), round(Pct_Change, 1), "%)"),
    Pct_Label = ifelse(Pct_Change > 0,
                       paste0("+", round(Pct_Change, 1), "%"),
                       paste0(round(Pct_Change, 1), "%")),
    Month = factor(Month, levels = c("January", "February", "March"), ordered = TRUE)
  )

# Merge back into main df for tooltip creation
summary_df <- summary_df %>%
  left_join(summary_pct_change %>% select(Month, Abs_Change, Pct_Label, Change_Label), by = "Month") %>%
  mutate(
    Tooltip = ifelse(Year == 2025,
                     paste0("Year: ", Year,
                            "<br>Total Riders: ", scales::comma(Total_Riders),
                            "<br>Change: ", Change_Label),
                     paste0("Year: ", Year,
                            "<br>Total Riders: ", scales::comma(Total_Riders)))
  )

# Interactive Chart 1: Total Riders (2024 vs 2025)
p_total <- ggplot(summary_df, aes(x = Month, y = Total_Riders, fill = as.factor(Year), text = Tooltip)) +
  geom_col(position = position_dodge(width = 0.8)) +
  geom_text(
    aes(label = paste0(round(Total_Riders / 1e6, 2), "M"),
        y = Total_Riders + 30000),
    position = position_dodge(width = 0.8),
    vjust = 0,
    fontface = "bold",
    color = "black",
    size = 4
  ) +
  scale_fill_manual(values = c("2024" = "#1f77b4", "2025" = "#ff7f0e")) +
  scale_y_continuous(labels = comma, expand = expansion(mult = c(0, 0.15))) +
  labs(title = "Ferry Ridership Before & After Congestion Pricing",
              x = "Month", y = "Total Riders", fill = "Year") +
  theme_minimal(base_size = 14)

ggplotly(p_total, tooltip = "text")
# Interactive Chart 2: Percent Change with Labels Inside Bars
summary_pct_change <- summary_pct_change %>%
  mutate(
    Change_Direction = ifelse(Pct_Change > 0, "Increase", "Decrease")
  )
p_change <- ggplot(summary_pct_change, aes(x = Month, y = Pct_Change,
                                           fill = Change_Direction,
                                           text = paste0("Change: ", Pct_Label))) +
  geom_col(width = 0.6) +
  geom_text(aes(label = Pct_Label),
            position = position_stack(vjust = 0.5),
            color = "white", fontface = "bold", size = 5) +
  scale_fill_manual(values = c("Increase" = "#2ca02c", "Decrease" = "#d62728"),
                    name = "Change") +
  labs(
    title = "% Change in Ferry Ridership: Jan–Mar 2024 → 2025",
    subtitle = "January up ~5%, followed by a February–March decline",
    x = "Month", y = "Percent Change"
  ) +
  theme_minimal(base_size = 14)
ggplotly(p_change, tooltip = "text")

3 Conclusion

While the Staten Island Ferry initially experienced a +5% ridership increase in January 2025 following the launch of NYC’s congestion pricing, that trend was not sustained. Ridership declined by –5.8% in February and –2.5% in March, indicating that Staten Island residents may have tested ferry commuting early on but quickly returned to previous habits or faced limitations with ferry service availability.

This finding suggests that ferry service alone cannot serve as a long-term substitute for more robust transit alternatives. Policy responses should consider enhancing frequency, accessibility, and connections for the Staten Island Ferry to encourage lasting mode shift and equity for residents impacted by congestion pricing.