Mini-Assignment 3

Jiaqi Gu

2024-10-02

Healthcare Equity Analysis

Data

  • Census ACS “hhincome” data for 2022 in Fulton and Dekalb Counties
  • Hospital POI data from Yelp within the two counties
tract <- suppressMessages(
  get_acs(geography = 'tract',
          state = 'GA',
          county = c('Fulton','Dekalb'),
          variables = c(hhincome = 'B19019_001'),
          year = 2022,
          survey = "acs5", 
          geometry = TRUE,
          output = "wide")) %>% 
  select(GEOID, hhincome = hhincomeE)
## Warning: • You have not set a Census API key. Users without a key are limited to 500
## queries per day and may experience performance limitations.
## ℹ For best results, get a Census API key at
## http://api.census.gov/data/key_signup.html and then supply the key to the
## `census_api_key()` function to use it throughout your tidycensus session.
## This warning is displayed once per session.


Classify Census Tracts by the three categories.

tract <- tract %>% 
  mutate(inc_category = case_when(
    hhincome >= 200000 ~ "200,000-300,000",
    hhincome >= 100000 ~ "100,000-200,000",
    TRUE ~ "0-100,000"))

print(table(tract$inc_category))
## 
##       0-100,000 100,000-200,000 200,000-300,000 
##             345             156              29

Display

## tmap mode set to interactive viewing
tmap_mode("view")

tm_shape(tract) + tm_polygons(col = "hhincome")
tm_shape(tract) + tm_polygons(col = "inc_category")


Yelp data

# Reading the yelp data
yelp_hosi <- st_read("https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson")
## Reading layer `yelp_hospital' from data source 
##   `https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 129 features and 23 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -84.56242 ymin: 33.60009 xmax: -84.08677 ymax: 34.0701
## Geodetic CRS:  WGS 84


skim(yelp_hosi)
Data summary
Name yelp_hosi
Number of rows 129
Number of columns 24
_______________________
Column type frequency:
character 18
logical 1
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
id 0 1.00 22 22 0 129 0
alias 0 1.00 17 73 0 129 0
name 0 1.00 8 64 0 115 0
image_url 0 1.00 0 68 86 44 0
url 0 1.00 174 230 0 129 0
categories 0 1.00 9 62 0 21 0
transactions 0 1.00 0 0 129 1 0
phone 0 1.00 0 12 5 107 0
display_phone 0 1.00 0 14 5 107 0
location.address1 2 0.98 0 34 14 87 0
location.address2 13 0.90 0 7 100 15 0
location.address3 23 0.82 0 52 104 3 0
location.city 0 1.00 6 14 0 13 0
location.zip_code 0 1.00 0 5 1 33 0
location.country 0 1.00 2 2 0 1 0
location.state 0 1.00 2 2 0 1 0
location.display_address 0 1.00 17 93 0 93 0
geometry 0 1.00 21 38 0 103 0

Variable type: logical

skim_variable n_missing complete_rate mean count
is_closed 0 1 0 FAL: 129

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
review_count 0 1 12.90 42.98 0.00 0.00 0.00 2.00 319.00 ▇▁▁▁▁
rating 0 1 1.05 1.47 0.00 0.00 0.00 2.00 5.00 ▇▁▂▁▁
distance 0 1 1188.10 735.72 204.09 564.81 1199.68 1647.70 4098.37 ▇▇▂▁▁
coordinates.latitude 0 1 33.86 0.12 33.60 33.77 33.81 33.92 34.07 ▁▇▅▅▅
coordinates.longitude 0 1 -84.34 0.06 -84.56 -84.39 -84.35 -84.32 -84.09 ▁▆▇▁▁
tm_shape(yelp_hosi) + tm_dots(col="red") + tm_shape(tract) + tm_borders()

Appending Census data

  1. make sure the CRS of the two sf objects are same
  2. Calculate how many hospitals are within each census tract
  3. Join the two files with the count of hospitals
## Check to see that the CRS for both the sf files are the same
head(tract$geometry)
## Geometry set for 6 features 
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -84.47194 ymin: 33.65339 xmax: -84.16538 ymax: 33.77463
## Geodetic CRS:  NAD83
## First 5 geometries:
## MULTIPOLYGON (((-84.18651 33.72737, -84.18381 3...
## MULTIPOLYGON (((-84.34921 33.76496, -84.34915 3...
## MULTIPOLYGON (((-84.34919 33.7481, -84.34919 33...
## MULTIPOLYGON (((-84.2965 33.76991, -84.29264 33...
## MULTIPOLYGON (((-84.4686 33.66347, -84.46678 33...
head(yelp_hosi$geometry)
## Geometry set for 6 features 
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -84.39409 ymin: 33.81016 xmax: -84.26324 ymax: 34.0701
## Geodetic CRS:  WGS 84
## First 5 geometries:
## POINT (-84.26324 34.0701)
## POINT (-84.27857 34.05106)
## POINT (-84.26324 34.0701)
## POINT (-84.28281 34.06029)
## POINT (-84.37021 33.84957)
tract <- tract %>% st_transform(crs=4326) 
yelp_hosi <- yelp_hosi %>% st_transform(crs=4326)

# Calculate the count of hospitals at the tract level
buffer_distance <- 402.336
tract_buffered <- tract %>%
  st_buffer(dist = buffer_distance)

intersections <- st_intersects(tract_buffered, yelp_hosi)

tract$buffered_hospital_count <- sapply(intersections, length)

# Join tract geometry with hospitals
tract_hosi <- st_join(tract, yelp_hosi %>% mutate(hos_count = 1))

tract_hosi_count <- tract_hosi %>%
  group_by(GEOID) %>%
  summarise(hos_count = sum(hos_count, na.rm = TRUE))

tract <- tract %>%
  left_join(tract_hosi_count %>% st_drop_geometry(), 
            by = "GEOID")

tract$hos_count[is.na(tract$hos_count)] <- 0

tract <- tract %>%
  mutate(total_hospital_count = hos_count + buffered_hospital_count)
skim(tract)
## Warning: Couldn't find skimmers for class: sfc_MULTIPOLYGON, sfc; No
## user-defined `sfl` provided. Falling back to `character`.
Data summary
Name tract
Number of rows 530
Number of columns 7
_______________________
Column type frequency:
character 3
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
GEOID 0 1 11 11 0 530 0
inc_category 0 1 9 15 0 3 0
geometry 0 1 270 6019 0 530 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
hhincome 11 0.98 94833.94 52684.71 15625 56193 81096 119728.5 250001 ▆▇▃▂▁
buffered_hospital_count 0 1.00 0.65 2.04 0 0 0 0.0 20 ▇▁▁▁▁
hos_count 0 1.00 0.24 1.38 0 0 0 0.0 20 ▇▁▁▁▁
total_hospital_count 0 1.00 0.90 3.17 0 0 0 0.0 40 ▇▁▁▁▁
# Let's check to see if the points of yoga studios and their counts in the polygon data match
tm_shape(tract) + tm_polygons(col="hos_count") +
  tm_shape(yelp_hosi) + tm_dots()

Some questions

Are hospitals more likely be in neighborhoods with high household income?

ggplot(tract, aes(x=hhincome, y=total_hospital_count)) +
  geom_point() +
  ylab("Number of Hospitals") +
  xlab("Household Income in Tract")
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_point()`).


Are hospitals in high-household-income areas?

tm_shape(tract) + tm_polygons(col="inc_category") +
  tm_shape(yelp_hosi) +tm_dots()

Most of hospitals are in areas where average household income are above 100,000.


Create a “boxplot”

boxplot(total_hospital_count ~ inc_category, data=tract, 
        main="Boxplot of Hospitals by Household Income Type", 
        xlab="Household Income types", 
        ylab="Hospitals count")


Whether a Tract has a hospital nearby or not

tract <- tract %>% 
  mutate(hosi_binary = total_hospital_count > 0)

hosi_binary_summary <- tract %>% 
  st_drop_geometry() %>% 
  group_by(hosi_binary, inc_category) %>% 
  summarize(n = n()) %>% 
  ungroup() %>% 
  mutate(proportion = n / sum(n))
## `summarise()` has grouped output by 'hosi_binary'. You can override using the
## `.groups` argument.
hosi_binary_summary %>% 
  ggplot(aes(x = inc_category, y = proportion, fill = hosi_binary)) +
  geom_bar(stat = 'identity', position = 'fill')

Is the spatial distribution of hospitals in Fulton and DeKalb counties equitable?

Hospitals are more distributed in areas with higher household income, but there is no huge imbalance between areas.