hospitals <- st_read("https://raw.githubusercontent.com/ujhwang/urban-analytics-2025/main/Assignment/mini_3/hospital_11counties.geojson")
## Reading layer `hospital_11counties' from data source
## `https://raw.githubusercontent.com/ujhwang/urban-analytics-2025/main/Assignment/mini_3/hospital_11counties.geojson'
## using driver `GeoJSON'
## Simple feature collection with 119 features and 8 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -84.73147 ymin: 33.42719 xmax: -83.92052 ymax: 34.24585
## Geodetic CRS: WGS 84
#Extract Census data for the 4 variables
census_var <- c(mhincome = "B19019_001E", #Median Household Income in the Past 12 Months by Household Size
totalpop = "B01003_001E", #total population
whitepop = "B02001_002E", #white population
blackpop = "B02001_003E") #black population
#List of counties within City of Atlanta
census_county <- c("Cherokee", "Clayton", "Cobb", "DeKalb", "Douglas",
"Fayette", "Forsyth", "Fulton", "Gwinnett", "Henry", "Rockdale")
#Retrieve Census ACS Data at census tract level
census <- get_acs(geography = "tract", state = "GA", county = census_county,
output = "wide", geometry = TRUE, year = 2023,
variables = census_var) %>%
mutate(white_pct = (whitepop/totalpop)*100,
#percentage of white population out of total population
black_pct = (blackpop/totalpop)*100,
#percentage of black population out of total population
belowmhincome = ifelse(mhincome < median(mhincome, na.rm = TRUE), 1, 0)) %>%
#1 for below median household income
separate(col = NAME, into=c("tract","county","state"), sep='; ')
• Median Household Income:
Median household income is chosen
as a variable because income and lower socioeconomic groups are often
seen having less opportunities and I want to test the hypothesis out.
Households with lower income sometimes may need to be in closer
proximity to hospitals because not all households have cars to travel
far for healthcare.
• Total Population:
Total population
variable is chosen to later divide white and black population data to
better standardize across census tracts.
• White Population &
Black Population:
Race is always an important factor in
evaluating equity and equality.I wanted to see if race influences the
number of accessible hospitals. Since racial minority neighborhoods tend
to not have decent built infrastructure and services. White population
and black population will later divided by total population to create
variables percentage of white/black population.
#Transform to appropriate pcs
census <- st_transform(census, 32616)
hospitals <- st_transform(hospitals, 32616)
#Convert polygon to centroids
census_centroids <- st_centroid(census)
#Create 10 km buffers around each centroid
census_buffers <- st_buffer(census_centroids, dist = 10000) #10km buffer
#Spatial join to count hospitals in each buffer
census_hospitalcount <- census_buffers %>%
st_join(hospitals %>% mutate(n = 1)) %>%
group_by(GEOID) %>%
summarise(hosp_count_10km = sum(n, na.rm = TRUE))
#Join hospital counts back to original census dataframe
census_final <- census %>%
left_join(census_hospitalcount %>%
st_drop_geometry(), by = "GEOID")
#Create summary statistics by County
census_dissolve <- census_final %>%
group_by(county) %>%
summarise(med_mhincome = median(mhincome, na.rm = TRUE), #calculate median of the median household income of all tracts
black_pct = (sum(blackpop)/sum(totalpop))*100) %>% #calculate total black population percentage out of entire county population
mutate(county = str_replace_all(county, regex("county", ignore_case = TRUE), "") %>%
str_trim())
#Plot ggplot
ggplot(census_dissolve, aes(x = county, y = med_mhincome)) +
geom_col(fill = "#7dd481") +
labs(
title = "Median Household Income by County",
x = "County",
y = "Median Household Income ($)"
) +
scale_y_continuous(labels = comma, limits = c(0, 150000)) +
geom_text(aes(label = scales::comma(med_mhincome)),
vjust = -0.5, #position text above bar
size = 4) +
theme_minimal()
Among all counties, Forsyth County has the highest median household income, followed by Fayette County and Cherokee County. The lowest median county is located in Clayton County. The average median household income across all counties is 92,982.2. This bar chart shows that Clayton, DeKalb, Douglas, Gwinnett, Henry, and Rockdale County are under the county average of median household income.
ggplot(census_dissolve, aes(x = county, y = black_pct)) +
geom_col(fill = "#828282") +
labs(
title = "Black Population Percentage by County",
x = "County",
y = "Black Population % (%)"
) +
scale_y_continuous(labels = comma, limits = c(0, 70)) +
geom_text(aes(label = scales::comma(black_pct)),
vjust = -0.5, #position text above bar
size = 4) +
theme_minimal()
Among all counties, Clayton County has the highest percentage of black population, while Forsyth County has the smallest percentage. Linking back to the conclusion of the previous median household income bar chart - Forsyth highest, Clayton lowest - can reveal some racial and income disparities.
tmap_mode("view")
tm_shape(census_final) +
tm_polygons("black_pct",
palette = "purples",
style = "quantile", #quantile classification method
n = 5, # 5 bins
border.col = "grey",
lwd = 0.5,
title = "Black Population %") +
tm_layout(title = "Black Population Percentage by Census Tract")
Merely looking at the spatial distribution across City of Atlanta, it seems that census tracts in the south of City of Atlanta have higher black population percentage.
tm_shape(census_final) +
tm_polygons("mhincome",
palette = "BuGn", #green palette
style = "quantile", #quantile classification method
n = 5, # 5 bins
border.col = "grey",
lwd = 0.5,
title = "Median Household Income") +
tm_layout(title = "Median Household Income by Census Tract")