#1. Import the hospital POI dataand make sure it is tidy and ready for analysis.
#load JSON files
json <- fromJSON("https://raw.githubusercontent.com/ujhwang/urban-analytics-2025/main/Assignment/mini_3/hospital_11counties.geojson", simplifyVector = F)
str(json)
not_flat <- fromJSON("https://raw.githubusercontent.com/ujhwang/urban-analytics-2025/main/Assignment/mini_3/hospital_11counties.geojson", flatten = F)
glimpse(not_flat)
flat <- fromJSON("https://raw.githubusercontent.com/ujhwang/urban-analytics-2025/main/Assignment/mini_3/hospital_11counties.geojson", flatten = T)
glimpse(flat)
#bring JSON data in a readable format for data analysis
#put the features list into a tibble
df <- tibble(features = not_flat$features)
#unnest features into columns
unnested_long_wide <- df %>%
unnest_wider(features) %>%
unnest_wider(properties)
kable(unnested_long_wide)
#clean data
poi <- unnested_long_wide %>%
filter(!is.na(name) & !sapply(geometry$coordinates, function(x) any(is.na(x))))
poi_unique <- poi %>%
distinct(name, .keep_all = TRUE)
glue("Before dropping duplicated rows, there were {nrow(poi)} rows. After dropping them, there are {nrow(poi_unique)} rows.")
## Before dropping duplicated rows, there were 119 rows. After dropping them, there are 104 rows.
#2. Explore variables in the Census ACS 5-year estimates, carefully select those that are relevant for an equity analysis, and provide justification for each variable you select.
#Think about how each variable captures different circumstances and needs for healthcare.
### Get Census data
# Define API key
census_api_key(Sys.getenv("MY_API_KEY"))
## To install your API key for use in future sessions, run this function with `install = TRUE`.
#census_api_key("698568bc0852158fccf6044f4cd5ec586b74b2e0", install = TRUE, overwrite = TRUE)
# Pull data from ACS
acs_variables <- load_variables(year = 2023, dataset = "acs5", cache = TRUE)
#3. Download ACS data for the selected variables at the Census Tract level for the 11 counties using the tidycensus.
#Pull variable
race_eth <- get_acs(
geography = "tract",
state = "GA",
county = c("Cherokee", "Clayton", "Cobb", "DeKalb", "Douglas", "Fayette",
"Forsyth", "Fulton", "Gwinnett", "Henry", "Rockdale"),
variables = c(total = "B03002_001",
hisp = "B03002_012",
non_hisp_white = "B03002_003",
non_hisp_black = "B03002_004",
non_hisp_asian = "B03002_006"),
year = 2023,
survey = "acs5",
geometry = TRUE,
output = "wide"
)
## Getting data from the 2019-2023 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
race_eth <- race_eth %>%
mutate(
tot_indv = totalE,
hisp = hispE,
non_hisp_white = non_hisp_whiteE,
non_hisp_black = non_hisp_blackE,
non_hisp_asian = non_hisp_asianE,
non_hisp_other = tot_indv - (hisp + non_hisp_white + non_hisp_black + non_hisp_asian)
) %>%
select(GEOID, NAME, tot_indv, hisp, non_hisp_white, non_hisp_black, non_hisp_asian, non_hisp_other, geometry)
hh_inc <- get_acs(
geography = "tract",
state = "GA",
county = c("Cherokee", "Clayton", "Cobb", "DeKalb", "Douglas", "Fayette",
"Forsyth", "Fulton", "Gwinnett", "Henry", "Rockdale"),
variables = c(median_hhinc = "B19013_001E"),
year = 2023,
survey = "acs5",
geometry = TRUE,
output = "wide"
)
## Getting data from the 2019-2023 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
edu <- get_acs(
geography = "tract",
state = "GA",
county = c("Cherokee", "Clayton", "Cobb", "DeKalb", "Douglas", "Fayette",
"Forsyth", "Fulton", "Gwinnett", "Henry", "Rockdale"),
variables = c(
edu_associate = "B06009_004E",
edu_bachelor = "B06009_005E",
edu_graduate = "B06009_006E",
edu_total = "B06009_001E"
),
year = 2023,
survey = "acs5",
geometry = TRUE,
output = "wide"
)
## Getting data from the 2019-2023 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
edu <- edu %>%
mutate(
high_edu_pct = round((edu_associate + edu_bachelor + edu_graduate) / edu_total * 100)
)
acs_joined <- race_eth %>%
left_join(hh_inc %>% st_drop_geometry(), by = "GEOID") %>%
left_join(edu %>% st_drop_geometry() %>% select(GEOID, high_edu_pct), by = "GEOID")
***I selected race/ethnicity and household income as key measures of equity. Household income is a particularly strong indicator, as it reflects the distribution of economic resources and the resulting purchasing power that influences access to and demand for quality services — in this case, hospitals and general healthcare. Lower-income households often face barriers, such as limited insurance coverage, higher transportation costs, and fewer healthcare options in their neighborhoods.
In the United States, significant disparities persist across racial and ethnic groups, which are often intertwined with income inequality. For instance, Black and Hispanic populations tend to have lower average household incomes and disproportionately higher rates of being uninsured or underinsured compared to White and Asian populations.
Lastly, I picked educational attainment as the highly educated individuals tend to have higher paying jobs and hence can afford to live in areas that have better access to services such as healthcare. ***
#3. Prepare the data for analysis
acs_joined_clean_na <- acs_joined %>%
filter(!is.na(`NAME.x`))
acs_joined_clean <- acs_joined_clean_na %>%
distinct(`NAME.x`, .keep_all = TRUE)
glue("Before dropping duplicated rows, there were {nrow(acs_joined_clean_na)} rows. After dropping them, there are {nrow(acs_joined_clean)} rows.")
## Before dropping duplicated rows, there were 1248 rows. After dropping them, there are 1248 rows.
acs_joined_final <- acs_joined_clean %>%
mutate(
pct_hisp = round(100 * (hisp /tot_indv), 1),
pct_non_hisp_white = round(100 * (non_hisp_white /tot_indv), 1),
pct_non_hisp_black = round(100 * (non_hisp_black/tot_indv), 1),
pct_non_hisp_asian = round(100 * (non_hisp_asian/tot_indv), 1),
pct_non_hisp_other = round(100 * (non_hisp_other/tot_indv), 1)
)
#join poi data with census data
poi_unique_clean <- unnested_long_wide %>%
rowwise() %>%
mutate(
lon = as.numeric(geometry$coordinates[[1]][1]),
lat = as.numeric(geometry$coordinates[[1]][2])
) %>%
ungroup() %>%
st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
st_transform(2240)
## Make sure the CRS of the two sf objects are in the same PCS
acs_joined_final %<>% st_transform(2240)
# Join hospital poi with tracts
poi_acs_joined_final <- st_join(poi_unique_clean, acs_joined_final)
# Get the presence of hospitals at the tract level
geoid_tracts_with_hospitals <- poi_acs_joined_final %>%
filter(!is.na(name)) %>%
pull(GEOID) %>%
unique()
# Join the presence of hospitals to `tract`
hospital_in_acs_tracts <- acs_joined_final %>% mutate(hospitals = GEOID %in% geoid_tracts_with_hospitals)
#Visualize hospitals in ACS tracts
tmap_mode("view")
tm_shape(hospital_in_acs_tracts) +
tm_fill(
col = "hospitals",
palette = c("lightgrey", "purple"),
title = "Hospital Presence",
alpha = 0.5
) +
tm_borders(col = "darkgray", lwd = 1) +
tm_shape(poi_unique_clean) +
tm_dots(
size = 0.6,
col = "red",
border.col = "white",
popup.vars = c("name")
) +
tm_layout(
title = "Hospitals in Georgia (Census Tracts)",
legend.outside = TRUE,
legend.position = c("left", "bottom")
)
*** I chose to calculate two metrics for this analysis: the presence of a hospital within 5-miles and the distance to the nearest hospital. The first metric provides a simple measure of local access, indicating whether residents have a hospital in close proximity, while the second metric captures finer-grained variation in accessibility by quantifying how far residents must travel to reach the nearest facility. These metrics are particularly useful for examining disparities, as tracts with lower income or lower educational attainment may have fewer hospitals nearby, reflecting inequities in service provision. ***
#5. Define and compute your operational measure of healthcare access (the dependent variable).
#Identify suitable metric(s) and justify your choices. Example metrics include:
#The presence of a hospital within X miles.
#The distance to the nearest hospital.
##Compute your chosen measure(s).
tract_centroids <- st_centroid(hospital_in_acs_tracts)
## Warning: st_centroid assumes attributes are constant over geometries
# Compute distance matrix (in meters)
dist_matrix <- st_distance(tract_centroids, poi_unique_clean)
hospital_in_acs_tracts$dist_to_nearest_hospital_m <- apply(dist_matrix, 1, min)
# Convert to miles
hospital_in_acs_tracts$dist_to_nearest_hospital_mi <- as.numeric(hospital_in_acs_tracts$dist_to_nearest_hospital_m) / 1609.34
tm_shape(hospital_in_acs_tracts) +
tm_fill(
col = "dist_to_nearest_hospital_mi",
palette = "viridis",
style = "quantile",
title = "Distance to Nearest Hospital (miles)"
) +
tm_shape(poi_unique_clean) +
tm_dots(col = "red", size = 0.3, border.col = "white")