Project Overview

This project examines how subway access is distributed across NYC census tracts and how it relates to median household income.

Income Data

# Get median household income for NYC census tracts
nyc_income <- get_acs(
  geography = "tract",
  variables = c(median_income = "B19013_001"),
  state = "NY",
  county = c("New York", "Kings", "Queens", "Bronx", "Richmond"),
  year = 2023,
  survey = "acs5",
  geometry = TRUE
)

# Preview data
head(nyc_income)
## Simple feature collection with 6 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -73.90606 ymin: 40.82439 xmax: -73.84726 ymax: 40.90264
## Geodetic CRS:  NAD83
##         GEOID                                        NAME      variable
## 1 36005023502 Census Tract 235.02; Bronx County; New York median_income
## 2 36005013500    Census Tract 135; Bronx County; New York median_income
## 3 36005009200     Census Tract 92; Bronx County; New York median_income
## 4 36005005400     Census Tract 54; Bronx County; New York median_income
## 5 36005036501 Census Tract 365.01; Bronx County; New York median_income
## 6 36005044902 Census Tract 449.02; Bronx County; New York median_income
##   estimate   moe                       geometry
## 1    47484  8551 MULTIPOLYGON (((-73.906 40....
## 2    30865 26467 MULTIPOLYGON (((-73.90508 4...
## 3    56913 14039 MULTIPOLYGON (((-73.85773 4...
## 4    58452 19753 MULTIPOLYGON (((-73.88389 4...
## 5    54097 27972 MULTIPOLYGON (((-73.88832 4...
## 6    99250 23563 MULTIPOLYGON (((-73.87136 4...

Income Map

# Create income map
ggplot(nyc_income) +
  geom_sf(aes(fill = estimate), color = NA) +
  scale_fill_viridis_c(
    name = "Median household income",
    labels = scales::dollar,
    na.value = "lightgray"
  ) +
  labs(
    title = "Median Household Income by Census Tract in NYC",
    caption = "Source: 2019–2023 ACS 5-Year Estimates"
  ) +
  theme_void()

Subway Data

# Load subway station dataset
subway <- read_csv(
  "data/mta_subway_stations.csv",
  show_col_types = FALSE
)

# Preview data
head(subway)
## # A tibble: 6 × 19
##   georeference       ada_notes cbd   south_direction_label north_direction_label
##   <chr>              <chr>     <lgl> <chr>                 <chr>                
## 1 POINT (-73.975939… <NA>      FALSE Coney Island          Manhattan            
## 2 POINT (-73.98795 … <NA>      TRUE  Downtown              Uptown               
## 3 POINT (-73.940858… <NA>      FALSE Outbound              Manhattan            
## 4 POINT (-73.975776… <NA>      FALSE Southbound            Northbound           
## 5 POINT (-73.948411… <NA>      FALSE Flatbush              Manhattan            
## 6 POINT (-74.000495… <NA>      TRUE  Downtown              Uptown               
## # ℹ 14 more variables: gtfs_longitude <dbl>, gtfs_latitude <dbl>,
## #   structure <chr>, daytime_routes <chr>, borough <chr>, stop_name <chr>,
## #   line <chr>, division <chr>, gtfs_stop_id <chr>, complex_id <dbl>,
## #   station_id <dbl>, ada_southbound <dbl>, ada_northbound <dbl>, ada <dbl>

Convert Subway Data to Spatial Format

# Convert subway data to sf object using coordinates
subway_sf <- st_as_sf(
  subway,
  coords = c("gtfs_longitude", "gtfs_latitude"),
  crs = 4326
)

# Preview spatial data
head(subway_sf)
## Simple feature collection with 6 features and 17 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.0005 ymin: 40.57613 xmax: -73.94086 ymax: 40.74957
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 18
##   georeference       ada_notes cbd   south_direction_label north_direction_label
##   <chr>              <chr>     <lgl> <chr>                 <chr>                
## 1 POINT (-73.975939… <NA>      FALSE Coney Island          Manhattan            
## 2 POINT (-73.98795 … <NA>      TRUE  Downtown              Uptown               
## 3 POINT (-73.940858… <NA>      FALSE Outbound              Manhattan            
## 4 POINT (-73.975776… <NA>      FALSE Southbound            Northbound           
## 5 POINT (-73.948411… <NA>      FALSE Flatbush              Manhattan            
## 6 POINT (-74.000495… <NA>      TRUE  Downtown              Uptown               
## # ℹ 13 more variables: structure <chr>, daytime_routes <chr>, borough <chr>,
## #   stop_name <chr>, line <chr>, division <chr>, gtfs_stop_id <chr>,
## #   complex_id <dbl>, station_id <dbl>, ada_southbound <dbl>,
## #   ada_northbound <dbl>, ada <dbl>, geometry <POINT [°]>
nrow(subway_sf)
## [1] 496

Income and Subway Station Map

# Overlay subway stations on income map
ggplot() +
  geom_sf(data = nyc_income, aes(fill = estimate), color = NA) +
  geom_sf(data = subway_sf, color = "black", size = 0.8, alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Median household income",
    labels = scales::dollar,
    na.value = "lightgray"
  ) +
  labs(
    title = "Median Household Income and Subway Stations in NYC",
    caption = "Sources: 2019–2023 ACS 5-Year Estimates; NYS MTA Subway Stations"
  ) +
  theme_void()

ggsave("maps/income_subway_map.png", width = 8, height = 6, dpi = 300)

Transit Access Calculation

# Make sure both datasets use the same CRS
subway_sf <- st_transform(subway_sf, st_crs(nyc_income))

# Count number of subway stations per tract
station_count <- st_intersects(nyc_income, subway_sf)

stations_per_tract <- nyc_income %>%
  mutate(stations = lengths(station_count))

head(stations_per_tract)
## Simple feature collection with 6 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -73.90606 ymin: 40.82439 xmax: -73.84726 ymax: 40.90264
## Geodetic CRS:  NAD83
##         GEOID                                        NAME      variable
## 1 36005023502 Census Tract 235.02; Bronx County; New York median_income
## 2 36005013500    Census Tract 135; Bronx County; New York median_income
## 3 36005009200     Census Tract 92; Bronx County; New York median_income
## 4 36005005400     Census Tract 54; Bronx County; New York median_income
## 5 36005036501 Census Tract 365.01; Bronx County; New York median_income
## 6 36005044902 Census Tract 449.02; Bronx County; New York median_income
##   estimate   moe                       geometry stations
## 1    47484  8551 MULTIPOLYGON (((-73.906 40....        1
## 2    30865 26467 MULTIPOLYGON (((-73.90508 4...        0
## 3    56913 14039 MULTIPOLYGON (((-73.85773 4...        0
## 4    58452 19753 MULTIPOLYGON (((-73.88389 4...        0
## 5    54097 27972 MULTIPOLYGON (((-73.88832 4...        0
## 6    99250 23563 MULTIPOLYGON (((-73.87136 4...        0
summary(stations_per_tract$stations)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.2132  0.0000  7.0000

Transit Access Map

# Map number of stations per tract
ggplot(stations_per_tract) +
  geom_sf(aes(fill = stations), color = NA) +
  scale_fill_viridis_c(
    name = "Number of subway stations",
    option = "plasma"
  ) +
  labs(
    title = "Subway Station Access by Census Tract in NYC",
    caption = "Source: NYS MTA Subway Stations"
  ) +
  theme_void()

ggsave("maps/transit_access_map.png", width = 8, height = 6, dpi = 300)

Income and Transit Access Comparison

# Compare average income based on access to subway stations
stations_per_tract %>%
  mutate(access = ifelse(stations > 0, "Has station", "No station")) %>%
  group_by(access) %>%
  summarise(avg_income = mean(estimate, na.rm = TRUE)) %>%
  ggplot(aes(x = access, y = avg_income, fill = access)) +
  geom_col() +
  labs(
    title = "Average Income by Subway Access",
    x = "Transit Access",
    y = "Average Median Income"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

ggsave("maps/comparison_graph.png", width = 6, height = 4, dpi = 300)