Mini-Assignment 3

Jiaqi Gu

2024-10-02

Healthcare Equity Analysis

Data

Census ACS “hhincome” data for 2022 in Fulton and Dekalb Counties
Hospital POI data from Yelp within the two counties

tract <- suppressMessages(
  get_acs(geography = 'tract',
          state = 'GA',
          county = c('Fulton','Dekalb'),
          variables = c(hhincome = 'B19019_001'),
          year = 2022,
          survey = "acs5", 
          geometry = TRUE,
          output = "wide")) %>% 
  select(GEOID, hhincome = hhincomeE)

## Warning: • You have not set a Census API key. Users without a key are limited to 500
## queries per day and may experience performance limitations.
## ℹ For best results, get a Census API key at
## http://api.census.gov/data/key_signup.html and then supply the key to the
## `census_api_key()` function to use it throughout your tidycensus session.
## This warning is displayed once per session.

Classify Census Tracts by the three categories.

tract <- tract %>% 
  mutate(inc_category = case_when(
    hhincome >= 200000 ~ "200,000-300,000",
    hhincome >= 100000 ~ "100,000-200,000",
    TRUE ~ "0-100,000"))

print(table(tract$inc_category))

## 
##       0-100,000 100,000-200,000 200,000-300,000 
##             345             156              29

Display

## tmap mode set to interactive viewing
tmap_mode("view")

tm_shape(tract) + tm_polygons(col = "hhincome")

tm_shape(tract) + tm_polygons(col = "inc_category")

Yelp data

# Reading the yelp data
yelp_hosi <- st_read("https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson")

## Reading layer `yelp_hospital' from data source 
##   `https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 129 features and 23 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -84.56242 ymin: 33.60009 xmax: -84.08677 ymax: 34.0701
## Geodetic CRS:  WGS 84

skim(yelp_hosi)

Data summary
Name	yelp_hosi
Number of rows	129
Number of columns	24
_______________________
Column type frequency:
character	18
logical	1
numeric	5
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique
id	0	1.00	22	22	0	129
alias	0	1.00	17	73	0	129
name	0	1.00	8	64	0	115
image_url	0	1.00	0	68	86	44
url	0	1.00	174	230	0	129
categories	0	1.00	9	62	0	21
transactions	0	1.00	0	0	129	1
phone	0	1.00	0	12	5	107
display_phone	0	1.00	0	14	5	107
location.address1	2	0.98	0	34	14	87
location.address2	13	0.90	0	7	100	15
location.address3	23	0.82	0	52	104	3
location.city	0	1.00	6	14	0	13
location.zip_code	0	1.00	0	5	1	33
location.country	0	1.00	2	2	0	1
location.state	0	1.00	2	2	0	1
location.display_address	0	1.00	17	93	0	93
geometry	0	1.00	21	38	0	103

Variable type: logical

skim_variable	n_missing	complete_rate	mean	count
is_closed	0	1	0	FAL: 129

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
review_count	1	12.90	42.98	0.00	0.00	0.00	2.00	319.00	▇▁▁▁▁
rating	1	1.05	1.47	0.00	0.00	0.00	2.00	5.00	▇▁▂▁▁
distance	1	1188.10	735.72	204.09	564.81	1199.68	1647.70	4098.37	▇▇▂▁▁
coordinates.latitude	1	33.86	0.12	33.60	33.77	33.81	33.92	34.07	▁▇▅▅▅
coordinates.longitude	1	-84.34	0.06	-84.56	-84.39	-84.35	-84.32	-84.09	▁▆▇▁▁

tm_shape(yelp_hosi) + tm_dots(col="red") + tm_shape(tract) + tm_borders()

Appending Census data

make sure the CRS of the two sf objects are same
Calculate how many hospitals are within each census tract
Join the two files with the count of hospitals

## Check to see that the CRS for both the sf files are the same
head(tract$geometry)

## Geometry set for 6 features 
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -84.47194 ymin: 33.65339 xmax: -84.16538 ymax: 33.77463
## Geodetic CRS:  NAD83
## First 5 geometries:

## MULTIPOLYGON (((-84.18651 33.72737, -84.18381 3...

## MULTIPOLYGON (((-84.34921 33.76496, -84.34915 3...

## MULTIPOLYGON (((-84.34919 33.7481, -84.34919 33...

## MULTIPOLYGON (((-84.2965 33.76991, -84.29264 33...

## MULTIPOLYGON (((-84.4686 33.66347, -84.46678 33...

head(yelp_hosi$geometry)

## Geometry set for 6 features 
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -84.39409 ymin: 33.81016 xmax: -84.26324 ymax: 34.0701
## Geodetic CRS:  WGS 84
## First 5 geometries:

## POINT (-84.26324 34.0701)

## POINT (-84.27857 34.05106)

## POINT (-84.26324 34.0701)

## POINT (-84.28281 34.06029)

## POINT (-84.37021 33.84957)

tract <- tract %>% st_transform(crs=4326) 
yelp_hosi <- yelp_hosi %>% st_transform(crs=4326)

# Calculate the count of hospitals at the tract level
buffer_distance <- 402.336
tract_buffered <- tract %>%
  st_buffer(dist = buffer_distance)

intersections <- st_intersects(tract_buffered, yelp_hosi)

tract$buffered_hospital_count <- sapply(intersections, length)

# Join tract geometry with hospitals
tract_hosi <- st_join(tract, yelp_hosi %>% mutate(hos_count = 1))

tract_hosi_count <- tract_hosi %>%
  group_by(GEOID) %>%
  summarise(hos_count = sum(hos_count, na.rm = TRUE))

tract <- tract %>%
  left_join(tract_hosi_count %>% st_drop_geometry(), 
            by = "GEOID")

tract$hos_count[is.na(tract$hos_count)] <- 0

tract <- tract %>%
  mutate(total_hospital_count = hos_count + buffered_hospital_count)

skim(tract)

## Warning: Couldn't find skimmers for class: sfc_MULTIPOLYGON, sfc; No
## user-defined `sfl` provided. Falling back to `character`.

Data summary
Name	tract
Number of rows	530
Number of columns	7
_______________________
Column type frequency:
character	3
numeric	4
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
GEOID	1	11	11	530
inc_category	1	9	15	3
geometry	1	270	6019	530

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
hhincome	11	0.98	94833.94	52684.71	15625	56193	81096	119728.5	250001	▆▇▃▂▁
buffered_hospital_count	0	1.00	0.65	2.04	0	0	0	0.0	20	▇▁▁▁▁
hos_count	0	1.00	0.24	1.38	0	0	0	0.0	20	▇▁▁▁▁
total_hospital_count	0	1.00	0.90	3.17	0	0	0	0.0	40	▇▁▁▁▁

# Let's check to see if the points of yoga studios and their counts in the polygon data match
tm_shape(tract) + tm_polygons(col="hos_count") +
  tm_shape(yelp_hosi) + tm_dots()

Some questions

Are hospitals more likely be in neighborhoods with high household income?

ggplot(tract, aes(x=hhincome, y=total_hospital_count)) +
  geom_point() +
  ylab("Number of Hospitals") +
  xlab("Household Income in Tract")

## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_point()`).

Are hospitals in high-household-income areas?

tm_shape(tract) + tm_polygons(col="inc_category") +
  tm_shape(yelp_hosi) +tm_dots()

Most of hospitals are in areas where average household income are above 100,000.

Create a “boxplot”

boxplot(total_hospital_count ~ inc_category, data=tract, 
        main="Boxplot of Hospitals by Household Income Type", 
        xlab="Household Income types", 
        ylab="Hospitals count")

Whether a Tract has a hospital nearby or not

tract <- tract %>% 
  mutate(hosi_binary = total_hospital_count > 0)

hosi_binary_summary <- tract %>% 
  st_drop_geometry() %>% 
  group_by(hosi_binary, inc_category) %>% 
  summarize(n = n()) %>% 
  ungroup() %>% 
  mutate(proportion = n / sum(n))

## `summarise()` has grouped output by 'hosi_binary'. You can override using the
## `.groups` argument.

hosi_binary_summary %>% 
  ggplot(aes(x = inc_category, y = proportion, fill = hosi_binary)) +
  geom_bar(stat = 'identity', position = 'fill')

Is the spatial distribution of hospitals in Fulton and DeKalb counties equitable?

Hospitals are more distributed in areas with higher household income, but there is no huge imbalance between areas.