This project examines how subway access is distributed across NYC census tracts and how it relates to median household income.
# Get median household income for NYC census tracts
nyc_income <- get_acs(
geography = "tract",
variables = c(median_income = "B19013_001"),
state = "NY",
county = c("New York", "Kings", "Queens", "Bronx", "Richmond"),
year = 2023,
survey = "acs5",
geometry = TRUE
)
# Preview data
head(nyc_income)
## Simple feature collection with 6 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -73.90606 ymin: 40.82439 xmax: -73.84726 ymax: 40.90264
## Geodetic CRS: NAD83
## GEOID NAME variable
## 1 36005023502 Census Tract 235.02; Bronx County; New York median_income
## 2 36005013500 Census Tract 135; Bronx County; New York median_income
## 3 36005009200 Census Tract 92; Bronx County; New York median_income
## 4 36005005400 Census Tract 54; Bronx County; New York median_income
## 5 36005036501 Census Tract 365.01; Bronx County; New York median_income
## 6 36005044902 Census Tract 449.02; Bronx County; New York median_income
## estimate moe geometry
## 1 47484 8551 MULTIPOLYGON (((-73.906 40....
## 2 30865 26467 MULTIPOLYGON (((-73.90508 4...
## 3 56913 14039 MULTIPOLYGON (((-73.85773 4...
## 4 58452 19753 MULTIPOLYGON (((-73.88389 4...
## 5 54097 27972 MULTIPOLYGON (((-73.88832 4...
## 6 99250 23563 MULTIPOLYGON (((-73.87136 4...
# Create income map
ggplot(nyc_income) +
geom_sf(aes(fill = estimate), color = NA) +
scale_fill_viridis_c(
name = "Median household income",
labels = scales::dollar,
na.value = "lightgray"
) +
labs(
title = "Median Household Income by Census Tract in NYC",
caption = "Source: 2019–2023 ACS 5-Year Estimates"
) +
theme_void()
# Load subway station dataset
subway <- read_csv(
"data/mta_subway_stations.csv",
show_col_types = FALSE
)
# Preview data
head(subway)
## # A tibble: 6 × 19
## georeference ada_notes cbd south_direction_label north_direction_label
## <chr> <chr> <lgl> <chr> <chr>
## 1 POINT (-73.975939… <NA> FALSE Coney Island Manhattan
## 2 POINT (-73.98795 … <NA> TRUE Downtown Uptown
## 3 POINT (-73.940858… <NA> FALSE Outbound Manhattan
## 4 POINT (-73.975776… <NA> FALSE Southbound Northbound
## 5 POINT (-73.948411… <NA> FALSE Flatbush Manhattan
## 6 POINT (-74.000495… <NA> TRUE Downtown Uptown
## # ℹ 14 more variables: gtfs_longitude <dbl>, gtfs_latitude <dbl>,
## # structure <chr>, daytime_routes <chr>, borough <chr>, stop_name <chr>,
## # line <chr>, division <chr>, gtfs_stop_id <chr>, complex_id <dbl>,
## # station_id <dbl>, ada_southbound <dbl>, ada_northbound <dbl>, ada <dbl>
# Convert subway data to sf object using coordinates
subway_sf <- st_as_sf(
subway,
coords = c("gtfs_longitude", "gtfs_latitude"),
crs = 4326
)
# Preview spatial data
head(subway_sf)
## Simple feature collection with 6 features and 17 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.0005 ymin: 40.57613 xmax: -73.94086 ymax: 40.74957
## Geodetic CRS: WGS 84
## # A tibble: 6 × 18
## georeference ada_notes cbd south_direction_label north_direction_label
## <chr> <chr> <lgl> <chr> <chr>
## 1 POINT (-73.975939… <NA> FALSE Coney Island Manhattan
## 2 POINT (-73.98795 … <NA> TRUE Downtown Uptown
## 3 POINT (-73.940858… <NA> FALSE Outbound Manhattan
## 4 POINT (-73.975776… <NA> FALSE Southbound Northbound
## 5 POINT (-73.948411… <NA> FALSE Flatbush Manhattan
## 6 POINT (-74.000495… <NA> TRUE Downtown Uptown
## # ℹ 13 more variables: structure <chr>, daytime_routes <chr>, borough <chr>,
## # stop_name <chr>, line <chr>, division <chr>, gtfs_stop_id <chr>,
## # complex_id <dbl>, station_id <dbl>, ada_southbound <dbl>,
## # ada_northbound <dbl>, ada <dbl>, geometry <POINT [°]>
nrow(subway_sf)
## [1] 496
# Overlay subway stations on income map
ggplot() +
geom_sf(data = nyc_income, aes(fill = estimate), color = NA) +
geom_sf(data = subway_sf, color = "black", size = 0.8, alpha = 0.8) +
scale_fill_viridis_c(
name = "Median household income",
labels = scales::dollar,
na.value = "lightgray"
) +
labs(
title = "Median Household Income and Subway Stations in NYC",
caption = "Sources: 2019–2023 ACS 5-Year Estimates; NYS MTA Subway Stations"
) +
theme_void()
ggsave("maps/income_subway_map.png", width = 8, height = 6, dpi = 300)
# Make sure both datasets use the same CRS
subway_sf <- st_transform(subway_sf, st_crs(nyc_income))
# Count number of subway stations per tract
station_count <- st_intersects(nyc_income, subway_sf)
stations_per_tract <- nyc_income %>%
mutate(stations = lengths(station_count))
head(stations_per_tract)
## Simple feature collection with 6 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -73.90606 ymin: 40.82439 xmax: -73.84726 ymax: 40.90264
## Geodetic CRS: NAD83
## GEOID NAME variable
## 1 36005023502 Census Tract 235.02; Bronx County; New York median_income
## 2 36005013500 Census Tract 135; Bronx County; New York median_income
## 3 36005009200 Census Tract 92; Bronx County; New York median_income
## 4 36005005400 Census Tract 54; Bronx County; New York median_income
## 5 36005036501 Census Tract 365.01; Bronx County; New York median_income
## 6 36005044902 Census Tract 449.02; Bronx County; New York median_income
## estimate moe geometry stations
## 1 47484 8551 MULTIPOLYGON (((-73.906 40.... 1
## 2 30865 26467 MULTIPOLYGON (((-73.90508 4... 0
## 3 56913 14039 MULTIPOLYGON (((-73.85773 4... 0
## 4 58452 19753 MULTIPOLYGON (((-73.88389 4... 0
## 5 54097 27972 MULTIPOLYGON (((-73.88832 4... 0
## 6 99250 23563 MULTIPOLYGON (((-73.87136 4... 0
summary(stations_per_tract$stations)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2132 0.0000 7.0000
# Map number of stations per tract
ggplot(stations_per_tract) +
geom_sf(aes(fill = stations), color = NA) +
scale_fill_viridis_c(
name = "Number of subway stations",
option = "plasma"
) +
labs(
title = "Subway Station Access by Census Tract in NYC",
caption = "Source: NYS MTA Subway Stations"
) +
theme_void()
ggsave("maps/transit_access_map.png", width = 8, height = 6, dpi = 300)
# Compare average income based on access to subway stations
stations_per_tract %>%
mutate(access = ifelse(stations > 0, "Has station", "No station")) %>%
group_by(access) %>%
summarise(avg_income = mean(estimate, na.rm = TRUE)) %>%
ggplot(aes(x = access, y = avg_income, fill = access)) +
geom_col() +
labs(
title = "Average Income by Subway Access",
x = "Transit Access",
y = "Average Median Income"
) +
theme_minimal() +
theme(legend.position = "none")
ggsave("maps/comparison_graph.png", width = 6, height = 4, dpi = 300)