knitr::opts_chunk$set(echo = TRUE)
https://raw.githubusercontent.com/ujhwang/urban-analytics-2025/main/Assignment/mini_3/hospital_11counties.geojson
.name
,
system
, type
, address
) and the
geometry, check validity with sf::st_make_valid()
, and
standardize to WGS84 (EPSG:4326) so it aligns with
Census tract data. The result is a clean, analysis-ready sf
object of hospitals.I select tract-level indicators that proxy need, barriers, and resources for healthcare access across the 11 Metro Atlanta counties (Fulton, DeKalb, Cobb, Gwinnett, Clayton, Cherokee, Douglas, Fayette, Henry, Rockdale, Forsyth).
B17001
): economic
constraint limiting transportation, time flexibility, and ability to
afford care.S2701_C05_001
): direct
barrier to care and a predictor of delayed treatment.S0101_C02_030
): older
adults have higher utilization and are more sensitive to travel
distance.B08201
):
mobility barrier; longer distances are more burdensome without a private
car.B19013_001
):
overall socioeconomic status and community resources.B15003
): health literacy and ability to navigate systems;
often correlates with provider supply.B01001
):
demographic structure relevant to maternal and women’s health
needs.B03002
: NH
White, NH Black, Hispanic): populations that may face structural
barriers and historical disparities.Together, these variables represent different circumstances (demographics, need) and constraints (coverage, wealth, mobility) that shape realized access.
tidycensus
"GA"
)I use tidycensus::get_acs()
to retrieve: - Counts for
ratios (poverty, no-vehicle,
race), converting to proportions. - Subject-table
percent fields (uninsured, age
65+) and convert to proportions. - Detail-table builds for
education (B15003) and gender (B01001; female
cells 026–049). - geometry = TRUE
for tracts so
the ACS table joins directly to spatial features (CRS later matched to
the POI layer).
This produces a single tract-level dataset with harmonized columns that is ready to be merged with the hospital POIs for measurement (e.g., distance to nearest hospital, hospitals within 5 miles) and equity analysis.
# core
library(sf)
library(dplyr)
library(tidyr)
library(stringr)
library(purrr)
# data & mapping
library(tidycensus) # ACS 5-year
library(tigris)
library(tmap)
library(ggplot2)
options(tigris_use_cache = TRUE)
tmap_mode("plot") # static maps for knitting
hosp_url <- "https://raw.githubusercontent.com/ujhwang/urban-analytics-2025/main/Assignment/mini_3/hospital_11counties.geojson"
hosp <- sf::st_read(hosp_url, quiet = TRUE) |>
st_make_valid() |>
st_transform(4326)
# Basic clean — keep a few informative fields if present, else just geometry
poi_cols <- intersect(names(hosp), c("name","hospital","system","type","address"))
hosp <- dplyr::select(hosp, any_of(poi_cols), geometry)
cat("Hospitals loaded:", nrow(hosp), "\n")
## Hospitals loaded: 119
year <- 2022
state <- "GA"
atl_counties <- c("Fulton","DeKalb","Cobb","Gwinnett","Clayton",
"Cherokee","Douglas","Fayette","Henry","Rockdale","Forsyth")
# --- Tract geometries + total pop ---
tracts <- tidycensus::get_acs(
geography = "tract", variables = "B01001_001",
year = year, state = state, county = atl_counties,
geometry = TRUE, output = "wide"
) |>
sf::st_transform(4326) |>
dplyr::rename(pop_total = B01001_001E) |>
dplyr::select(GEOID, NAME, pop_total, geometry)
# --- Poverty (B17001) ---
pov <- tidycensus::get_acs("tract",
variables = c(den="B17001_001", num="B17001_002"),
year=year, state=state, county=atl_counties, output="wide"
) |>
dplyr::transmute(GEOID, pov_rate = ifelse(denE>0, numE/denE, NA_real_))
# --- Uninsured (subject table; percent -> proportion) ---
# If this errors for you, replace with a B27010-based build.
unins <- tidycensus::get_acs("tract",
variables = "S2701_C05_001",
year=year, state=state, county=atl_counties, output="wide"
) |>
dplyr::transmute(GEOID, unins_rate = S2701_C05_001E / 100)
# --- Age 65+ (subject table; percent -> proportion) ---
age <- tidycensus::get_acs("tract",
variables = "S0101_C02_030",
year=year, state=state, county=atl_counties, output="wide"
) |>
dplyr::transmute(GEOID, age65_rate = S0101_C02_030E / 100)
# --- No-vehicle households (B08201) ---
veh <- tidycensus::get_acs("tract",
variables = c(den="B08201_001", nov="B08201_002"),
year=year, state=state, county=atl_counties, output="wide"
) |>
dplyr::transmute(GEOID, no_vehicle_rate = ifelse(denE>0, novE/denE, NA_real_))
# --- Median HH income (B19013) ---
inc <- tidycensus::get_acs("tract",
variables = "B19013_001",
year=year, state=state, county=atl_counties, output="wide"
) |>
dplyr::transmute(GEOID, med_hh_inc = B19013_001E)
# =========================
# NEW: Education, Gender, Race
# =========================
# Education (B15003): Bachelor's or higher = 022+023+024+025 over 001
edu <- tidycensus::get_acs("tract",
variables = c(tot="B15003_001", bach="B15003_022", mast="B15003_023",
prof="B15003_024", doct="B15003_025"),
year=year, state=state, county=atl_counties, output="wide"
) |>
dplyr::transmute(
GEOID,
bach_plus_rate = ifelse(totE>0, (bachE+mastE+profE+doctE)/totE, NA_real_)
)
# Female share from B01001: sum of cells 026–049 (female) over total 001
female_vars <- sprintf("B01001_%03d", 26:49)
# alias ONLY the total; keep female codes as-is so we get B01001_026E, ... columns
var_vec <- c(total = "B01001_001", stats::setNames(female_vars, female_vars))
gender_wide <- tidycensus::get_acs(
geography = "tract",
variables = var_vec,
year = year, state = state, county = atl_counties,
output = "wide"
)
# safely find the female columns that actually exist in the result
female_cols <- intersect(paste0(female_vars, "E"), names(gender_wide))
gender <- gender_wide |>
dplyr::mutate(
female_total = if (length(female_cols)) {
rowSums(dplyr::across(dplyr::all_of(female_cols)), na.rm = TRUE)
} else NA_real_
) |>
dplyr::transmute(
GEOID,
female_rate = dplyr::if_else(totalE > 0, female_total / totalE, NA_real_)
)
# Race/Ethnicity (B03002): NH White, NH Black, Hispanic shares
race <- tidycensus::get_acs("tract",
variables = c(tot="B03002_001", nh_white="B03002_003",
nh_black="B03002_004", hisp="B03002_012"),
year=year, state=state, county=atl_counties, output="wide"
) |>
dplyr::transmute(
GEOID,
share_nh_white = ifelse(totE>0, nh_whiteE/totE, NA_real_),
share_nh_black = ifelse(totE>0, nh_blackE/totE, NA_real_),
share_hispanic = ifelse(totE>0, hispE/totE, NA_real_)
)
# ---- Final ACS join ----
acs <- tracts |>
dplyr::left_join(pov, by="GEOID") |>
dplyr::left_join(unins, by="GEOID") |>
dplyr::left_join(age, by="GEOID") |>
dplyr::left_join(veh, by="GEOID") |>
dplyr::left_join(inc, by="GEOID") |>
dplyr::left_join(edu, by="GEOID") |>
dplyr::left_join(gender, by="GEOID") |>
dplyr::left_join(race, by="GEOID")
# quick peek
dplyr::glimpse(dplyr::select(
acs, GEOID, pop_total, pov_rate, unins_rate, age65_rate,
no_vehicle_rate, med_hh_inc, bach_plus_rate, female_rate,
share_nh_white, share_nh_black, share_hispanic
))
## Rows: 1,248
## Columns: 13
## $ GEOID <chr> "13089023212", "13089020400", "13089020500", "13089022…
## $ pop_total <dbl> 3358, 2709, 3688, 4178, 5080, 4277, 4218, 4843, 4732, …
## $ pov_rate <dbl> 0.13731343, 0.02444444, 0.06914317, 0.01532567, 0.2311…
## $ unins_rate <dbl> 0.213, 0.019, 0.075, 0.031, 0.065, 0.077, 0.192, 0.022…
## $ age65_rate <dbl> 0.167, 0.078, 0.107, 0.106, 0.131, 0.130, 0.062, 0.151…
## $ no_vehicle_rate <dbl> 0.041446873, 0.010797342, 0.038570618, 0.023827825, 0.…
## $ med_hh_inc <dbl> 64698, 176250, 83594, 159821, 68224, 131250, 72000, 17…
## $ bach_plus_rate <dbl> 0.2072699, 0.8554587, 0.6121990, 0.7885425, 0.3549460,…
## $ female_rate <dbl> 1.0506254, 1.0380214, 1.1030369, 1.0646242, 0.9519685,…
## $ share_nh_white <dbl> 0.013103038, 0.826135105, 0.487798265, 0.651986596, 0.…
## $ share_nh_black <dbl> 0.92703990, 0.02657807, 0.39262473, 0.13930110, 0.4139…
## $ share_hispanic <dbl> 0.01727219, 0.03359173, 0.05124729, 0.06701771, 0.1307…
## $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-84.18651 3..., MULTIPOLY…
Here are 3 tasks that I did: - Drop tracts with missing key denominators. - Use tract population-weighted centroids for distance (safer than geometric centroids if odd shapes). If pop weights aren’t available, I will use the geometry centroid and note the limitation. - Compute distances in meters.
# Keep reasonable rows
acs_clean <- acs |>
filter(!is.na(pop_total)) |>
st_make_valid()
# Centroids for distance (geometric; documented limitation)
tract_pts <- st_centroid(acs_clean)
# Project to meters for distance math
tract_pts_m <- st_transform(tract_pts, 3857)
hosp_m <- st_transform(hosp, 3857)
In this task, I implemented four tract-level metrics, count within 5 miles, presence within 5 miles (binary), distance to nearest hospital (meters & miles), average distance to 3 nearest hospitals (miles).
mi_to_m <- function(mi) mi*1609.344
radius_m <- mi_to_m(5)
# 1) Count within 5 miles (buffer + join)
buf <- st_buffer(tract_pts_m, radius_m)
join_ct <- lengths(st_intersects(buf, hosp_m))
# 2) Presence (binary)
has_hosp_5mi <- as.integer(join_ct > 0)
# 3) Distance to nearest
nearest_idx <- st_nearest_feature(tract_pts_m, hosp_m)
nearest_dist_m <- st_distance(tract_pts_m, hosp_m[nearest_idx,], by_element = TRUE)
nearest_dist_mi <- as.numeric(nearest_dist_m) / 1609.344
# 4) Average distance to k=3 nearest
k <- 3
# distance matrix can be big; do k-NN efficiently
dmat <- st_distance(tract_pts_m, hosp_m) # [n_tracts x n_hosp]
ord <- apply(dmat, 1, function(v) sort(as.numeric(v))[seq_len(min(k, length(v)))])
avg3_mi <- if (is.matrix(ord)) colMeans(ord)/1609.344 else sapply(ord, mean)/1609.344
# Append to ACS tracts
tract_access <- acs_clean |>
mutate(
hosp_within5 = join_ct,
hosp_presence5 = has_hosp_5mi,
dist_nearest_mi = as.numeric(nearest_dist_mi),
dist_avg3_mi = as.numeric(avg3_mi)
)
###6)Analyze the spatial distribution of hospitals from an equity perspective
ggplot(tract_access, aes(pov_rate, dist_nearest_mi)) +
geom_point(alpha=.5) +
geom_smooth(method="lm", se=FALSE, linetype="dashed") +
scale_x_continuous(labels=scales::percent_format(accuracy = 1)) +
labs(x="Poverty rate", y="Distance to nearest hospital (mi)",
title="Tracts with higher poverty often live farther from hospitals") +
theme_minimal()
library(scales)
# Graph 2: Income vs distance
ggplot(tract_access, aes(med_hh_inc, dist_nearest_mi)) +
geom_point(alpha=.5) +
geom_smooth(method="lm", se=FALSE, linetype="dashed") +
scale_x_continuous(labels = label_dollar()) +
labs(x="Median household income ($)", y="Distance to nearest hospital (mi)",
title="Higher-income tracts tend to be closer to hospitals") +
theme_minimal()
# Graph 3: Education vs count within 5 miles
ggplot(tract_access, aes(bach_plus_rate, hosp_within5)) +
geom_point(alpha=.5) +
geom_smooth(method="lm", se=FALSE, linetype="dashed") +
scale_x_continuous(labels = percent_format(accuracy = 1)) +
labs(x="% Bachelor's or higher", y="# hospitals within 5 miles",
title="Choice density rises with educational attainment") +
theme_minimal()
tm_shape(tract_access) +
tm_polygons("pov_rate", palette="Reds", style="quantile",
title="Poverty rate") +
tm_layout(frame=FALSE)
# Map B: No-vehicle households
tm_shape(tract_access) +
tm_polygons("no_vehicle_rate", palette="Purples", style="quantile",
title="% households with no vehicle") +
tm_layout(frame=FALSE)
Across Metro Atlanta, hospital access is not spatially equitable. Proximity and choice concentrate along major corridors and wealthier, more educated tracts, while many lower-income, higher-poverty, and no-vehicle areas—especially on the periphery—face fewer nearby hospitals and longer travel. This pattern implies compounded barriers where need may be high. Policy responses could include supporting satellite/urgent-care facilities, strengthening transit connections to medical hubs, and incentivizing service provision in underserved tracts.