library(tidycensus)
library(sf)
library(tmap)
library(jsonlite)
library(tidyverse)
library(httr)
library(jsonlite)
library(reshape2)
library(here)
library(knitr)
library(magrittr)
library(units)
census_api_key(Sys.getenv('CENSUS_API'))
tract <- suppressMessages(
get_acs(geography = "tract",
state = "GA",
county = c("Cherokee", "Clayton", "Cobb", "DeKalb", "Douglas", "Fayette", "Forsyth", "Fulton", "Gwinnett", "Henry", "Rockdale"),
variables = c(median_rent_2br = "B25031_004E"
),
year = 2023,
survey = "acs5",
geometry = TRUE,
output = "wide")) %>%
select(GEOID,
median_rent_2br= median_rent_2br)
Atlanta<- tigris::places("GA", progress_bar = F) %>%
filter(NAME %in% c("Atlanta"))
tmap_mode("view")
tm_shape(tract) + tm_fill(fill = "median_rent_2br") +
tm_shape(Atlanta) + tm_borders(col = "black", lwd = 2)
tract %<>%
drop_na(median_rent_2br) %>%
filter(median_rent_2br > 300)
hist(tract$median_rent_2br, breaks = 50,
main = "Distribution of Median Rent (2-Bedroom Units) in Metro Atlanta",
xlab = "Median Rent (USD)",
ylab = "Number of Census Tracts",
col = "lightblue",
border = "white")
abline(v = mean(tract$median_rent_2br), col = "red", lwd = 3)
q <- quantile(tract$median_rent_2br, probs = c(0.33, 0.67), na.rm = TRUE)
tract <- tract %>%
mutate(rent_category = case_when(
median_rent_2br <= q[1] ~ "Low",
median_rent_2br <= q[2] ~ "Medium",
TRUE ~ "High"
))
tract$rent_category <- factor(tract$rent_category, levels = c("High", "Medium", "Low"))
table(tract$rent_category)
##
## High Medium Low
## 232 237 233
tm_shape(tract) + tm_fill(col = "rent_category", palette = c("blue","lightblue","white")) +
tm_shape(Atlanta) + tm_borders(col = "black", lwd = 0.1) +
tm_view(set.view = c(-84.397671, 33.776624, 11))
hospitals <- st_read("https://raw.githubusercontent.com/ujhwang/urban-analytics-2025/main/Assignment/mini_3/hospital_11counties.geojson")
hospitals %>%
select(-address) %>%
head(5) %>%
kable()
skimr::skim(hospitals)
6.Visualize Data
tm_shape(tract) + tm_borders(fill='white')+
tm_shape(hospitals) + tm_dots(fill="red")
gcs_id <- 4326
pcs_id <- 32616
tract <- st_transform(tract, pcs_id)
hospitals <- st_transform(hospitals, pcs_id)
tract_hospitals <- st_join(tract, hospitals, join = st_intersects)
geoid_tracts_with_hospitals <- tract_hospitals %>%
filter(!is.na(name)) %>%
pull(GEOID) %>%
unique()
tract %<>% mutate(hospitals = GEOID %in% geoid_tracts_with_hospitals)
tm_shape(tract) + tm_fill(col = "hospitals", palette = c("pink", "lightblue")) +
tm_shape(hospitals) + tm_dots(size = 0.25, fill = "red")
tm_shape(tract) + tm_fill(col = "rent_category", palette = c("blue","lightblue","white")) +
tm_shape(hospitals) +tm_dots(size = 0.25, fill = "red")
hosp_summary <- tract %>%
st_drop_geometry() %>%
group_by(hospitals, rent_category) %>%
summarize(n = n()) %>%
ungroup() %>%
mutate(proportion = n / sum(n))
hosp_summary %>%
ggplot(aes(x = rent_category, y = proportion, fill = hospitals)) +
scale_fill_manual(values = c("TRUE" = "darkred", "FALSE" = "lightgrey")) +
geom_bar(stat = 'identity', position = 'fill')
tract_centroids <- st_centroid(tract)
tract_buffers <- st_buffer(tract_centroids, dist = 5000)
hospital_counts <- lengths(st_intersects(tract_buffers, hospitals))
tract <- tract %>%
mutate(
hospitals_within5km = hospital_counts,
hospital_within5km = hospitals_within5km > 0
)
7.Visualization - A choropleth was created using tract data, where tracts are shaded based on the number of hospitals within 5 km and outlined by rent category.
ggplot(tract) +
geom_sf(aes(fill = hospitals_within5km, color = rent_category)) +
scale_fill_gradient(low = "white", high = "darkred", name = "Hospitals within 5 km") +
scale_color_brewer(palette = "Set1", name = "Rent Category") +
labs(title = "Healthcare Access by Tract and Rent Category") +
theme_minimal()
8.Choose and collect data for control variables - Population density was chosen as more urbanized or dense areas usually have higher hospital concentration. Household income was chosen wealthier populations often have greater residential choice and can self-select into areas with more healthcare options. Median age was chosen as older populations may deliberately choose to live nearer to healthcare services, influencing hospital location patterns. Finally percentage of ethnic minority was chosen as health disparities often track along racial or ethnic lines and captures potential inequity.
tract_control_vars <- suppressMessages(
get_acs(geography = "tract",
state = 'GA',
county = c("Cherokee", "Clayton", "Cobb", "DeKalb", "Douglas", "Fayette", "Forsyth", "Fulton", "Gwinnett", "Henry", "Rockdale"),
variables = c(pop = "B01003_001E",
hhincome = "B19013_001E",
median_age = "B01002_001E",
non_hispanic_white = "B03002_003E",
race_ethnic_total = "B03002_001E"),
year = 2023,
survey = "acs5",
geometry = TRUE,
output = "wide")) %>%
mutate(minority_pct = (race_ethnic_total - non_hispanic_white)/race_ethnic_total)
# Calculate population density
tract_control_vars %<>%
mutate(area = st_area(.) %>% set_units(mi^2)) %>%
mutate(pop_density = pop/area)
# Remove unnecessary columns
tract_control_vars %<>%
select(GEOID, hhincome, median_age, minority_pct,
pop_density) %>%
st_drop_geometry()
9.Run regression models A logistic regression model was run with the probability of presence of a hospital within 5km as the response variable. In addition a simple linear regression model was also run with the count of hospitals present within 5km radius as the response variable.
model_data <- tract %>% left_join(tract_control_vars, by = "GEOID")
regression_model1 <- glm(hospital_within5km ~ rent_category + hhincome + median_age +
minority_pct + pop_density,
data=model_data, family = "binomial")
summary(regression_model1)
##
## Call:
## glm(formula = hospital_within5km ~ rent_category + hhincome +
## median_age + minority_pct + pop_density, family = "binomial",
## data = model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.299e+00 1.124e+00 2.045 0.04088 *
## rent_categoryMedium -9.443e-01 3.274e-01 -2.884 0.00392 **
## rent_categoryLow -1.386e+00 3.558e-01 -3.895 9.81e-05 ***
## hhincome -2.640e-06 4.359e-06 -0.606 0.54480
## median_age -1.264e-02 1.963e-02 -0.644 0.51959
## minority_pct -1.567e-01 6.236e-01 -0.251 0.80163
## pop_density 2.704e-04 6.295e-05 4.295 1.74e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 651.89 on 694 degrees of freedom
## Residual deviance: 583.82 on 688 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 597.82
##
## Number of Fisher Scoring iterations: 6
regression_model2 <- lm(hospitals_within5km ~ rent_category + hhincome + median_age +
minority_pct + pop_density,
data = model_data)
summary(regression_model2)
##
## Call:
## lm(formula = hospitals_within5km ~ rent_category + hhincome +
## median_age + minority_pct + pop_density, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.5571 -1.4753 -0.2714 1.1324 8.9046
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.674e+00 8.110e-01 5.764 1.24e-08 ***
## rent_categoryMedium -1.154e+00 2.127e-01 -5.427 7.95e-08 ***
## rent_categoryLow -1.172e+00 2.424e-01 -4.837 1.63e-06 ***
## hhincome -5.346e-06 3.250e-06 -1.645 0.100
## median_age -1.033e-02 1.493e-02 -0.692 0.489
## minority_pct -1.978e+00 4.922e-01 -4.019 6.50e-05 ***
## pop_density 2.593e-04 2.093e-05 12.389 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.094 on 688 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.3256, Adjusted R-squared: 0.3197
## F-statistic: 55.35 on 6 and 688 DF, p-value: < 2.2e-16
1. Model 1:Logistic regression model
Probability of at least one hospital within 5 km
- The results indicate that tracts in lower-rent categories are
significantly less likely to have a hospital nearby compared to
high-rent tracts. Both “Medium” and “Low” rent neighborhoods show
negative and statistically significant coefficients.
- Population density has a strong and positive effect, confirming that
denser areas are much more likely to have hospitals.
- Median household income, age, and minority share are not significant
once rent and density are accounted for.
- Taken together, these findings suggest that hospital access is
spatially skewed toward high-rent, high-density neighborhoods,
reinforcing the idea of spatial inequity in the metro Atlanta
region.
2. Model 2: Simple linear regression
Number of hospitals within 5 km
- The linear regression results confirm and deepen this pattern.
Compared to high-rent areas, both medium and low rent tracts have on
average 1.1 fewer hospitals within 5 km, a statistically significant
disadvantage.
- Population density again emerges as a strong positive driver with each
increase in density bringing about a sizable increase in hospital
counts.
- Importantly, the proportion of minority residents has a significant
negative coefficient, implying that areas with higher shares of minority
populations have access to fewer hospitals, even after controlling for
rent, density, and income.