knitr::opts_chunk$set(echo = TRUE)

Data import & tidying

Equity variables from ACS (5-year) — what & why

I select tract-level indicators that proxy need, barriers, and resources for healthcare access across the 11 Metro Atlanta counties (Fulton, DeKalb, Cobb, Gwinnett, Clayton, Cherokee, Douglas, Fayette, Henry, Rockdale, Forsyth).

Together, these variables represent different circumstances (demographics, need) and constraints (coverage, wealth, mobility) that shape realized access.

Downloading ACS data (tract level) with tidycensus

I use tidycensus::get_acs() to retrieve: - Counts for ratios (poverty, no-vehicle, race), converting to proportions. - Subject-table percent fields (uninsured, age 65+) and convert to proportions. - Detail-table builds for education (B15003) and gender (B01001; female cells 026–049). - geometry = TRUE for tracts so the ACS table joins directly to spatial features (CRS later matched to the POI layer).

This produces a single tract-level dataset with harmonized columns that is ready to be merged with the hospital POIs for measurement (e.g., distance to nearest hospital, hospitals within 5 miles) and equity analysis.

# core
library(sf)
library(dplyr)
library(tidyr)
library(stringr)
library(purrr)

# data & mapping
library(tidycensus)   # ACS 5-year
library(tigris)
library(tmap)
library(ggplot2)

options(tigris_use_cache = TRUE)
tmap_mode("plot")     # static maps for knitting

1) Import hospital POIs (tidy), Do

hosp_url <- "https://raw.githubusercontent.com/ujhwang/urban-analytics-2025/main/Assignment/mini_3/hospital_11counties.geojson"
hosp <- sf::st_read(hosp_url, quiet = TRUE) |>
  st_make_valid() |>
  st_transform(4326)

# Basic clean — keep a few informative fields if present, else just geometry
poi_cols <- intersect(names(hosp), c("name","hospital","system","type","address"))
hosp <- dplyr::select(hosp, any_of(poi_cols), geometry)

cat("Hospitals loaded:", nrow(hosp), "\n")
## Hospitals loaded: 119

2) &3) Explore variables & Download data in the Census ACS

year <- 2022
state <- "GA"
atl_counties <- c("Fulton","DeKalb","Cobb","Gwinnett","Clayton",
                  "Cherokee","Douglas","Fayette","Henry","Rockdale","Forsyth")

# --- Tract geometries + total pop ---
tracts <- tidycensus::get_acs(
  geography = "tract", variables = "B01001_001",
  year = year, state = state, county = atl_counties,
  geometry = TRUE, output = "wide"
) |>
  sf::st_transform(4326) |>
  dplyr::rename(pop_total = B01001_001E) |>
  dplyr::select(GEOID, NAME, pop_total, geometry)

# --- Poverty (B17001) ---
pov <- tidycensus::get_acs("tract",
  variables = c(den="B17001_001", num="B17001_002"),
  year=year, state=state, county=atl_counties, output="wide"
) |>
  dplyr::transmute(GEOID, pov_rate = ifelse(denE>0, numE/denE, NA_real_))

# --- Uninsured (subject table; percent -> proportion) ---
# If this errors for you, replace with a B27010-based build.
unins <- tidycensus::get_acs("tract",
  variables = "S2701_C05_001",
  year=year, state=state, county=atl_counties, output="wide"
) |>
  dplyr::transmute(GEOID, unins_rate = S2701_C05_001E / 100)

# --- Age 65+ (subject table; percent -> proportion) ---
age <- tidycensus::get_acs("tract",
  variables = "S0101_C02_030",
  year=year, state=state, county=atl_counties, output="wide"
) |>
  dplyr::transmute(GEOID, age65_rate = S0101_C02_030E / 100)

# --- No-vehicle households (B08201) ---
veh <- tidycensus::get_acs("tract",
  variables = c(den="B08201_001", nov="B08201_002"),
  year=year, state=state, county=atl_counties, output="wide"
) |>
  dplyr::transmute(GEOID, no_vehicle_rate = ifelse(denE>0, novE/denE, NA_real_))

# --- Median HH income (B19013) ---
inc <- tidycensus::get_acs("tract",
  variables = "B19013_001",
  year=year, state=state, county=atl_counties, output="wide"
) |>
  dplyr::transmute(GEOID, med_hh_inc = B19013_001E)

# =========================
# NEW: Education, Gender, Race
# =========================

# Education (B15003): Bachelor's or higher = 022+023+024+025 over 001
edu <- tidycensus::get_acs("tract",
  variables = c(tot="B15003_001", bach="B15003_022", mast="B15003_023",
                prof="B15003_024", doct="B15003_025"),
  year=year, state=state, county=atl_counties, output="wide"
) |>
  dplyr::transmute(
    GEOID,
    bach_plus_rate = ifelse(totE>0, (bachE+mastE+profE+doctE)/totE, NA_real_)
  )

# Female share from B01001: sum of cells 026–049 (female) over total 001
female_vars <- sprintf("B01001_%03d", 26:49)

# alias ONLY the total; keep female codes as-is so we get B01001_026E, ... columns
var_vec <- c(total = "B01001_001", stats::setNames(female_vars, female_vars))

gender_wide <- tidycensus::get_acs(
  geography = "tract",
  variables = var_vec,
  year = year, state = state, county = atl_counties,
  output = "wide"
)

# safely find the female columns that actually exist in the result
female_cols <- intersect(paste0(female_vars, "E"), names(gender_wide))

gender <- gender_wide |>
  dplyr::mutate(
    female_total = if (length(female_cols)) {
      rowSums(dplyr::across(dplyr::all_of(female_cols)), na.rm = TRUE)
    } else NA_real_
  ) |>
  dplyr::transmute(
    GEOID,
    female_rate = dplyr::if_else(totalE > 0, female_total / totalE, NA_real_)
  )


# Race/Ethnicity (B03002): NH White, NH Black, Hispanic shares
race <- tidycensus::get_acs("tract",
  variables = c(tot="B03002_001", nh_white="B03002_003",
                nh_black="B03002_004", hisp="B03002_012"),
  year=year, state=state, county=atl_counties, output="wide"
) |>
  dplyr::transmute(
    GEOID,
    share_nh_white = ifelse(totE>0, nh_whiteE/totE, NA_real_),
    share_nh_black = ifelse(totE>0, nh_blackE/totE, NA_real_),
    share_hispanic = ifelse(totE>0, hispE/totE, NA_real_)
  )

# ---- Final ACS join ----
acs <- tracts |>
  dplyr::left_join(pov,     by="GEOID") |>
  dplyr::left_join(unins,   by="GEOID") |>
  dplyr::left_join(age,     by="GEOID") |>
  dplyr::left_join(veh,     by="GEOID") |>
  dplyr::left_join(inc,     by="GEOID") |>
  dplyr::left_join(edu,     by="GEOID") |>
  dplyr::left_join(gender,  by="GEOID") |>
  dplyr::left_join(race,    by="GEOID")

# quick peek
dplyr::glimpse(dplyr::select(
  acs, GEOID, pop_total, pov_rate, unins_rate, age65_rate,
  no_vehicle_rate, med_hh_inc, bach_plus_rate, female_rate,
  share_nh_white, share_nh_black, share_hispanic
))
## Rows: 1,248
## Columns: 13
## $ GEOID           <chr> "13089023212", "13089020400", "13089020500", "13089022…
## $ pop_total       <dbl> 3358, 2709, 3688, 4178, 5080, 4277, 4218, 4843, 4732, …
## $ pov_rate        <dbl> 0.13731343, 0.02444444, 0.06914317, 0.01532567, 0.2311…
## $ unins_rate      <dbl> 0.213, 0.019, 0.075, 0.031, 0.065, 0.077, 0.192, 0.022…
## $ age65_rate      <dbl> 0.167, 0.078, 0.107, 0.106, 0.131, 0.130, 0.062, 0.151…
## $ no_vehicle_rate <dbl> 0.041446873, 0.010797342, 0.038570618, 0.023827825, 0.…
## $ med_hh_inc      <dbl> 64698, 176250, 83594, 159821, 68224, 131250, 72000, 17…
## $ bach_plus_rate  <dbl> 0.2072699, 0.8554587, 0.6121990, 0.7885425, 0.3549460,…
## $ female_rate     <dbl> 1.0506254, 1.0380214, 1.1030369, 1.0646242, 0.9519685,…
## $ share_nh_white  <dbl> 0.013103038, 0.826135105, 0.487798265, 0.651986596, 0.…
## $ share_nh_black  <dbl> 0.92703990, 0.02657807, 0.39262473, 0.13930110, 0.4139…
## $ share_hispanic  <dbl> 0.01727219, 0.03359173, 0.05124729, 0.06701771, 0.1307…
## $ geometry        <MULTIPOLYGON [°]> MULTIPOLYGON (((-84.18651 3..., MULTIPOLY…

Prepare for analysis

Here are 3 tasks that I did: - Drop tracts with missing key denominators. - Use tract population-weighted centroids for distance (safer than geometric centroids if odd shapes). If pop weights aren’t available, I will use the geometry centroid and note the limitation. - Compute distances in meters.

# Keep reasonable rows
acs_clean <- acs |>
  filter(!is.na(pop_total)) |>
  st_make_valid()

# Centroids for distance (geometric; documented limitation)
tract_pts <- st_centroid(acs_clean)

# Project to meters for distance math
tract_pts_m <- st_transform(tract_pts, 3857)
hosp_m      <- st_transform(hosp, 3857)

Define and compute my operational measure of healthcare access (the dependent variable)

In this task, I implemented four tract-level metrics, count within 5 miles, presence within 5 miles (binary), distance to nearest hospital (meters & miles), average distance to 3 nearest hospitals (miles).

mi_to_m <- function(mi) mi*1609.344
radius_m <- mi_to_m(5)

# 1) Count within 5 miles (buffer + join)
buf <- st_buffer(tract_pts_m, radius_m)
join_ct <- lengths(st_intersects(buf, hosp_m))

# 2) Presence (binary)
has_hosp_5mi <- as.integer(join_ct > 0)

# 3) Distance to nearest
nearest_idx <- st_nearest_feature(tract_pts_m, hosp_m)
nearest_dist_m <- st_distance(tract_pts_m, hosp_m[nearest_idx,], by_element = TRUE)
nearest_dist_mi <- as.numeric(nearest_dist_m) / 1609.344

# 4) Average distance to k=3 nearest
k <- 3
# distance matrix can be big; do k-NN efficiently
dmat <- st_distance(tract_pts_m, hosp_m)        # [n_tracts x n_hosp]
ord  <- apply(dmat, 1, function(v) sort(as.numeric(v))[seq_len(min(k, length(v)))])
avg3_mi <- if (is.matrix(ord)) colMeans(ord)/1609.344 else sapply(ord, mean)/1609.344

# Append to ACS tracts
tract_access <- acs_clean |>
  mutate(
    hosp_within5 = join_ct,
    hosp_presence5 = has_hosp_5mi,
    dist_nearest_mi = as.numeric(nearest_dist_mi),
    dist_avg3_mi    = as.numeric(avg3_mi)
  )

###6)Analyze the spatial distribution of hospitals from an equity perspective

ggplot(tract_access, aes(pov_rate, dist_nearest_mi)) +
  geom_point(alpha=.5) +
  geom_smooth(method="lm", se=FALSE, linetype="dashed") +
  scale_x_continuous(labels=scales::percent_format(accuracy = 1)) +
  labs(x="Poverty rate", y="Distance to nearest hospital (mi)",
       title="Tracts with higher poverty often live farther from hospitals") +
  theme_minimal()

library(scales)

# Graph 2: Income vs distance
ggplot(tract_access, aes(med_hh_inc, dist_nearest_mi)) +
  geom_point(alpha=.5) +
  geom_smooth(method="lm", se=FALSE, linetype="dashed") +
  scale_x_continuous(labels = label_dollar()) +
  labs(x="Median household income ($)", y="Distance to nearest hospital (mi)",
       title="Higher-income tracts tend to be closer to hospitals") +
  theme_minimal()

# Graph 3: Education vs count within 5 miles
ggplot(tract_access, aes(bach_plus_rate, hosp_within5)) +
  geom_point(alpha=.5) +
  geom_smooth(method="lm", se=FALSE, linetype="dashed") +
  scale_x_continuous(labels = percent_format(accuracy = 1)) +
  labs(x="% Bachelor's or higher", y="# hospitals within 5 miles",
       title="Choice density rises with educational attainment") +
  theme_minimal()

Scatter: Median income vs. distance to nearest hospital

  • The trend line slopes downward: higher-income tracts tend to be closer to a hospital.
  • Dispersion shrinks at higher incomes, distance is not only lower but also more consistent, suggesting more reliable proximity in affluent areas.

Scatter: % Bachelor’s+ vs. hospitals within 5 miles

  • A clear positive association: more educated tracts have more hospitals nearby (greater choice density).
  • At very low education shares, counts cluster at 0–2 the upper tail (6–10+) appears mainly where Bachelor’s+ is high.
tm_shape(tract_access) +
  tm_polygons("pov_rate", palette="Reds", style="quantile",
              title="Poverty rate") +
  tm_layout(frame=FALSE)

# Map B: No-vehicle households
tm_shape(tract_access) +
  tm_polygons("no_vehicle_rate", palette="Purples", style="quantile",
              title="% households with no vehicle") +
  tm_layout(frame=FALSE)

Choropleth: % households with no vehicle

  • Hot spots occur in parts of the urban core and select inner-ring tracts. These areas also face critical dependence on transit, making proximity especially important.

Choropleth: Poverty rate

  • Elevated poverty concentrates in south and southwest tracts and some east/southeast areas. These locations overlap with lower hospital counts and longer distances in the access maps.

Conclusion (equity verdict)

Across Metro Atlanta, hospital access is not spatially equitable. Proximity and choice concentrate along major corridors and wealthier, more educated tracts, while many lower-income, higher-poverty, and no-vehicle areas—especially on the periphery—face fewer nearby hospitals and longer travel. This pattern implies compounded barriers where need may be high. Policy responses could include supporting satellite/urgent-care facilities, strengthening transit connections to medical hubs, and incentivizing service provision in underserved tracts.