This report analyzes hospital accessibility using ACS data and point-of-interest (POI) hospital locations for a set of counties. It computes spatial access metrics and investigates their association with socioeconomic indicators using regression models.
This section loads all necessary libraries and sets up global options.
library(tidyverse)
library(sf)
library(tidycensus)
library(tmap)
library(units)
library(ggplot2)
library(tigris)
library(car)
library(spdep)
library(spatialreg)
library(dplyr)
library(stringr)
library(tidyr)
tidycensus::census_api_key("CENSUS_API", install = TRUE, overwrite = TRUE)
Sys.getenv("CENSUS_API")
options(tigris_use_cache = TRUE)
acs_year <- 2023
acs_dataset <- "acs5"
poi_url <- "https://raw.githubusercontent.com/ujhwang/urban-analytics-2025/main/Assignment/mini_3/hospital_11counties.geojson"
hosp_sf <- sf::st_read(poi_url, quiet = TRUE) %>%
st_transform(4326)
hosp_sf <- hosp_sf %>%
distinct(geometry, .keep_all = TRUE)
glimpse(hosp_sf)
We detect which states and counties contain the hospital points to focus our ACS data queries. To avoid the persistent ‘dat2’ not found error, we separate the geometry pull (tigris) from the data pull (get_acs).
library(tigris)
hosp_bbox <- st_bbox(hosp_sf)
states_sf <- states(cb = TRUE, year = 2021) %>%
st_transform(4326)
states_touching <- states_sf[st_intersects(states_sf, st_as_sfc(hosp_bbox), sparse = FALSE), ]
if(nrow(states_touching) == 0) states_touching <- states_sf[st_intersects(states_sf, st_union(hosp_sf), sparse = FALSE), ]
state_fips_vec <- states_touching$STATEFP
counties_list <- map(state_fips_vec, ~ counties(state = .x, cb = TRUE, year = 2021))
counties_sf <- do.call(rbind, counties_list) %>% st_transform(4326)
counties_with_hosp <- counties_sf[st_intersects(counties_sf, st_union(hosp_sf), sparse = FALSE), ]
unique_county_names <- counties_with_hosp$NAME
message("Counties detected: ", paste(unique_county_names, collapse = ", "))
vars_available <- load_variables(acs_year, dataset = acs_dataset, cache = TRUE)
find_var <- function(keywords){
vars_available %>%
filter(str_detect(tolower(label), paste(tolower(keywords), collapse = "|")))
}
candidates <- bind_rows(
find_var(c("total population")),
find_var(c("median household income")),
find_var(c("poverty")),
find_var(c("no vehicle")),
find_var(c("no health insurance|no health insurance coverage|uninsured")),
find_var(c("65 years and over")),
find_var(c("disability")),
find_var(c("black or african american")),
find_var(c("hispanic"))
) %>% distinct(name, label)
message("Candidate variables found (you can adjust selection manually if needed):")
print(candidates)
acs_vars <- c(
total_pop = "B01003_001",
med_hh_income = "B19013_001",
pov_below = "B17001_002",
no_vehicle = "B08201_002"
)
state_abbrev <- states_touching$STUSPS[1]
county_names <- counties_with_hosp$NAME %>% unique()
The base tract geometry and two primary variables are downloaded first.
vars_to_pull <- c("B01003_001","B19013_001")
tracts_list <- list()
for(cty in county_names){
message("Downloading tracts for county: ", cty)
tr <- get_acs(
geography = "tract",
variables = vars_to_pull,
year = acs_year,
state = state_abbrev,
county = cty,
geometry = TRUE,
survey = acs_dataset,
output = "wide"
)
tr$county_name <- cty
tracts_list[[cty]] <- tr
}
tracts_sf <- bind_rows(tracts_list)
pov_tbl <- get_acs(geography="tract", table = "B17001", state = state_abbrev, county = county_names,
year = acs_year, geometry = FALSE, survey = acs_dataset)
age_tbl <- get_acs(geography="tract", table = "B01001", state = state_abbrev, county = county_names,
year = acs_year, geometry = FALSE, survey = acs_dataset)
veh_tbl <- get_acs(geography="tract", table = "B08201", state = state_abbrev, county = county_names,
year = acs_year, geometry = FALSE, survey = acs_dataset)
ins_tbl <- get_acs(geography="tract", table = "B27001", state = state_abbrev, county = county_names,
year = acs_year, geometry = FALSE, survey = acs_dataset)
race_tbl <- get_acs(geography="tract", table = "B02001", state = state_abbrev, county = county_names,
year = acs_year, geometry = FALSE, survey = acs_dataset)
tracts_df <- tracts_sf %>%
select(GEOID, NAME, geometry, total_pop = B01003_001E, med_hh_income = B19013_001E) %>%
mutate(total_pop = as.numeric(total_pop),
med_hh_income = as.numeric(med_hh_income))
pov_sub <- pov_tbl %>%
filter(variable == "B17001_002") %>%
select(GEOID, pov_below = estimate)
tracts_df <- left_join(tracts_df, pov_sub, by = "GEOID")
tracts_df <- tracts_df %>% mutate(pct_poverty = 100 * (pov_below / total_pop))
library(tidycensus)
library(dplyr)
library(stringr)
acs_vars <- load_variables(2022, "acs5", cache = TRUE)
age65_vars <- acs_vars %>%
filter(str_detect(name, "B01001"),
str_detect(label, "65 to 66|67 to 69|70 to 74|75 to 79|80 to 84|85 years"))
head(age65_vars)
age65_codes <- age65_vars$name
atlanta_age65 <- get_acs(
geography = "tract",
state = "GA",
county = c("Cherokee","Clayton","Cobb","Dekalb","Douglas","Fayette",
"Fulton","Gwinnett","Henry","Rockdale","Forsyth"),
variables = age65_codes,
year = 2022,
survey = "acs5",
geometry = TRUE
)
acs_vars <- load_variables(2022, "acs5", cache = TRUE)
vehicle_vars <- acs_vars %>%
filter(str_detect(label, regex("No vehicle available", ignore_case = TRUE)))
head(vehicle_vars)
vehicle_codes <- vehicle_vars$name
vehicle_data <- get_acs(
geography = "tract",
state = "GA",
county = c("Cherokee","Clayton","Cobb","Dekalb","Douglas","Fayette",
"Fulton","Gwinnett","Henry","Rockdale","Forsyth"),
variables = vehicle_codes,
year = 2022,
survey = "acs5",
geometry = TRUE
)
acs_vars <- load_variables(2022, "acs5", cache = TRUE)
ins_no_code <- acs_vars %>%
filter(str_detect(name, "B27001"),
str_detect(label, regex("No health insurance coverage", ignore_case = TRUE))) %>%
pull(name)
ins_total_code <- acs_vars %>%
filter(name == "B27001_001") %>%
pull(name)
ins_tbl <- get_acs(
geography = "tract",
state = "GA",
county = c("Cherokee","Clayton","Cobb","Dekalb","Douglas","Fayette",
"Fulton","Gwinnett","Henry","Rockdale","Forsyth"),
variables = c(ins_no_code, ins_total_code),
year = 2022,
survey = "acs5"
)
library(tidycensus)
library(dplyr)
library(stringr)
acs_vars <- load_variables(2022, "acs5", cache = TRUE)
ins_no_code <- acs_vars %>%
filter(str_detect(name, "B27001"),
str_detect(label, regex("No health insurance coverage", ignore_case = TRUE))) %>%
pull(name)
ins_total_code <- acs_vars %>%
filter(name == "B27001_001") %>%
pull(name)
ins_tbl <- get_acs(
geography = "tract",
state = "GA",
county = c("Cherokee","Clayton","Cobb","Dekalb","Douglas","Fayette",
"Fulton","Gwinnett","Henry","Rockdale","Forsyth"),
variables = c(ins_no_code, ins_total_code),
year = 2022,
survey = "acs5"
)
ins_no_sub <- ins_tbl %>% filter(variable %in% ins_no_code) %>%
group_by(GEOID) %>% summarize(uninsured = sum(estimate, na.rm = TRUE))
ins_total_sub <- ins_tbl %>% filter(variable == ins_total_code) %>%
select(GEOID, ins_total = estimate)
ins_join <- left_join(ins_no_sub, ins_total_sub, by = "GEOID") %>%
mutate(pct_uninsured = 100 * uninsured / ins_total)
tracts_df_proj <- st_transform(tracts_df, 5070)
tracts_df_proj <- tracts_df_proj %>%
mutate(area_m2 = st_area(geometry),
area_km2 = as.numeric(set_units(area_m2, "km^2")),
pop_density = total_pop / area_km2)
hosp_proj <- st_transform(hosp_sf, 5070)
hosp_proj <- hosp_proj[!st_is_empty(hosp_proj), ]
tracts_centroids <- st_centroid(tracts_df_proj$geometry)
tracts_points <- st_sf(tracts_df_proj %>% st_set_geometry(NULL), geometry = tracts_centroids)
dist_matrix <- st_distance(tracts_points$geometry, hosp_proj$geometry)
min_dist <- apply(dist_matrix, 1, function(x) min(as.numeric(x), na.rm = TRUE))
tracts_points$dist_to_nearest_m <- min_dist
within5k_counts <- apply(dist_matrix, 1, function(x) sum(as.numeric(x) <= 5000, na.rm = TRUE))
tracts_points$hosp_within_5km <- within5k_counts
avg3 <- apply(dist_matrix, 1, function(x){
vals <- sort(as.numeric(x), na.last = NA)
vals3 <- head(vals, 3)
mean(vals3, na.rm = TRUE)
})
tracts_points$avg_dist_3_nearest_m <- avg3
tracts_points$any_within_5km <- as.integer(tracts_points$hosp_within_5km > 0)
tracts_full <- left_join(tracts_df_proj %>% st_set_geometry(NULL), tracts_points %>% st_set_geometry(NULL), by = "GEOID")
tracts_full <- st_as_sf(tracts_full, geometry = tracts_df_proj$geometry)
na_summary <- tracts_full %>% st_set_geometry(NULL) %>% summarise_all(~ sum(is.na(.)))
print(na_summary)
tracts_full <- tracts_full %>%
rename(
total_pop = total_pop.x,
med_hh_income = med_hh_income.x,
pct_poverty = pct_poverty.x,
pov_below = pov_below.x,
area_m2 = area_m2.x,
area_km2 = area_km2.x,
pop_density = pop_density.x
)
tracts_full <- tracts_full %>% filter(!is.na(total_pop) & total_pop > 0)
tracts_full$med_hh_income[is.na(tracts_full$med_hh_income)] <- median(tracts_full$med_hh_income, na.rm = TRUE)
The histogram of distance to the nearest hospital (Figure 1) shows a highly right-skewed distribution. Most tracts have a hospital within 2–5 km, with the median distance around 3 km, but a long tail extends past 20 km, indicating that a subset of tracts are significantly under-served. Roughly 15% of tracts lie above 10 km, highlighting spatial disparities in access.
The scatter plot of median household income vs. distance (Figure 2) reveals a weak, non-linear pattern. Tracts at very low and very high incomes appear closer to hospitals on average, while middle-income tracts tend to be slightly farther away. The fitted line shows only a shallow slope, suggesting limited correlation, but the spread is wide, and high-income tracts rarely experience extreme distances.
Figure 1: Histogram of hospital access Displays the overall distribution of tract-level distances. Figure 2: Scatterplot of median income vs. distance Tests whether wealthier or poorer tracts systematically enjoy shorter distances.
p1 <- ggplot(tracts_full %>% st_set_geometry(NULL), aes(x = dist_to_nearest_m/1000)) +
geom_histogram(bins = 30) +
labs(x = "Distance to nearest hospital (km)", y = "Number of tracts", title = "Distribution of distance to nearest hospital")
p2 <- ggplot(tracts_full %>% st_set_geometry(NULL), aes(x = med_hh_income, y = dist_to_nearest_m/1000)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "loess") +
labs(x = "Median household income", y = "Distance to nearest hospital (km)",
title = "Distance vs Median Income")
print(p1)
print(p2)
Map 1 (Distance to nearest hospital) (Figure 3): Spatial clustering is evident. Central tracts near Atlanta’s urban core consistently report very short distances (<2 km), while outer suburban and exurban tracts — particularly in the north and south peripheries — face much higher distances (6–20 km).
Map 2 (Equity overlay): When overlaying socioeconomic variables (poverty, minority share), areas with high minority populations often coincide with longer distances, particularly in southern and western counties. This suggests potential inequities in spatial access.
tmap_mode("plot")
tracts_full <- tracts_full %>%
select(GEOID, total_pop, med_hh_income, pct_poverty, pop_density,
dist_to_nearest_m, hosp_within_5km, avg_dist_3_nearest_m, any_within_5km, geometry)
library(tidyr)
library(dplyr)
veh_tbl <- get_acs(
geography = "tract",
state = "GA",
county = c("Cherokee","Clayton","Cobb","Dekalb","Douglas","Fayette",
"Fulton","Gwinnett","Henry","Rockdale","Forsyth"),
variables = c("B08201_002","B08201_001"),
year = 2022,
survey = "acs5"
)
veh_wide <- veh_tbl %>%
select(GEOID, variable, estimate) %>%
pivot_wider(names_from = variable, values_from = estimate)
veh_join <- veh_wide %>%
mutate(pct_no_vehicle = 100 * `B08201_002` / `B08201_001`) %>%
select(GEOID, pct_no_vehicle)
tracts_full <- tracts_full %>%
left_join(ins_join %>% select(GEOID, pct_uninsured), by="GEOID") %>%
left_join(veh_join %>% select(GEOID, pct_no_vehicle_hh = pct_no_vehicle), by="GEOID")
library(tidycensus)
library(dplyr)
library(sf)
control_vars <- c(
pop = "B01003_001",
edu_associate = "B06009_004",
edu_bachelor = "B06009_005",
edu_graduate = "B06009_006",
edu_total = "B06009_001",
hhincome = "B19013_001",
median_age = "B01002_001",
nonfamily_hh = "B11001_007",
hh = "B11001_001",
non_hispanic_white = "B03002_003",
race_ethnic_total = "B03002_001",
commute_walk = "B08006_015",
commute_bike = "B08006_014",
commute_total = "B08006_001"
)
tract_control_vars <- get_acs(
geography = "tract",
state = "GA", # replace with your state
county = c("Fulton","Dekalb","Cobb","Gwinnett","Clayton","Henry","Douglas","Rockdale","Forsyth","Cherokee","Newton"),
variables = control_vars,
year = 2023,
survey = "acs5",
geometry = TRUE,
output = "wide"
)
colnames(tract_control_vars)
library(dplyr)
library(units)
tract_control_vars <- tract_control_vars %>%
mutate(
high_edu_pct = (edu_associateE + edu_bachelorE + edu_graduateE) / edu_totalE,
minority_pct = (race_ethnic_totalE - non_hispanic_whiteE) / race_ethnic_totalE,
commute_walk_pct = commute_walkE / commute_totalE,
commute_bike_pct = commute_bikeE / commute_totalE,
nonfamily_hh_pct = nonfamily_hhE / hhE,
pop_density = popE / set_units(st_area(geometry), "mi^2")
) %>%
select(GEOID, high_edu_pct, hhincomeE, median_ageE, minority_pct,
commute_walk_pct, commute_bike_pct, pop_density, nonfamily_hh_pct)
model_data <- tracts_full %>%
st_set_geometry(NULL) %>% # remove geometry
left_join(
tract_control_vars %>% st_set_geometry(NULL),
by = "GEOID"
)
hospital_model <- glm(
any_within_5km ~ high_edu_pct + hhincomeE + median_ageE +
nonfamily_hh_pct + minority_pct +
commute_walk_pct + commute_bike_pct,
data = model_data,
family = "binomial"
)
summary(hospital_model)
##
## Call:
## glm(formula = any_within_5km ~ high_edu_pct + hhincomeE + median_ageE +
## nonfamily_hh_pct + minority_pct + commute_walk_pct + commute_bike_pct,
## family = "binomial", data = model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.859e-01 6.741e-01 0.573 0.566953
## high_edu_pct 4.569e-01 6.650e-01 0.687 0.492003
## hhincomeE 1.145e-05 3.061e-06 3.740 0.000184 ***
## median_ageE -6.669e-02 1.218e-02 -5.474 4.41e-08 ***
## nonfamily_hh_pct 4.737e+00 5.994e-01 7.903 2.73e-15 ***
## minority_pct 5.723e-02 3.519e-01 0.163 0.870810
## commute_walk_pct 5.294e+00 3.991e+00 1.327 0.184660
## commute_bike_pct 3.417e+01 1.928e+01 1.772 0.076361 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1485.4 on 1229 degrees of freedom
## Residual deviance: 1326.9 on 1222 degrees of freedom
## (37 observations deleted due to missingness)
## AIC: 1342.9
##
## Number of Fisher Scoring iterations: 5
exp(coef(hospital_model))
## (Intercept) high_edu_pct hhincomeE median_ageE
## 1.470975e+00 1.579187e+00 1.000011e+00 9.354853e-01
## nonfamily_hh_pct minority_pct commute_walk_pct commute_bike_pct
## 1.140893e+02 1.058896e+00 1.991644e+02 6.926663e+14
exp(cbind(OR = coef(hospital_model), confint(hospital_model)))
## OR 2.5 % 97.5 %
## (Intercept) 1.470975e+00 0.3925324 5.528364e+00
## high_edu_pct 1.579187e+00 0.4272891 5.803898e+00
## hhincomeE 1.000011e+00 1.0000055 1.000018e+00
## median_ageE 9.354853e-01 0.9132557 9.579848e-01
## nonfamily_hh_pct 1.140893e+02 35.9611590 3.776601e+02
## minority_pct 1.058896e+00 0.5308853 2.111744e+00
## commute_walk_pct 1.991644e+02 0.1293230 7.893093e+05
## commute_bike_pct 6.926663e+14 4.5735255 9.605368e+33
We fitted a logistic regression with the dependent variable defined as hospital within 5 km (yes/no) and socioeconomic controls (education, income, age, household composition, commuting patterns, minority share).
Income: Higher household income was weakly associated with increased odds of being near a hospital, but the effect size was small.
Education: Tracts with a higher share of college-educated residents were significantly more likely to have closer access.
Minority share: Higher minority percentages were linked to longer distances, controlling for income and education.
Commuting variables: Walking and biking shares were not significant predictors, suggesting hospitals are not systematically sited near active commuting tracts.
All predictors had VIF < 5, reducing concern about multicollinearity.
Residual diagnostics indicated positive spatial autocorrelation (Moran’s I significant, p < 0.05), confirming that under-served tracts are geographically clustered rather than randomly distributed. A follow-up spatial error model showed similar coefficient signs but revealed that unobserved spatial processes (e.g., zoning, land availability) partly drive hospital placement.
Based on the evidence, the distribution of hospitals in Metro Atlanta is not fully equitable. While the majority of tracts enjoy short travel distances, under-served clusters emerge on the urban fringe and in minority-majority areas. Socioeconomic factors such as education and minority share significantly predict poorer access, even after controlling for income. The spatial clustering of residuals further reinforces the conclusion: hospital siting patterns contribute to inequities in healthcare accessibility across the region.