Hospital distribution and Equity

Benson Bai

2024-10-04

Hospital distribution and equity

# Reading the yelp data
yelp_hospital <- st_read("https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson")

## Reading layer `yelp_hospital' from data source 
##   `https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 129 features and 23 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -84.56242 ymin: 33.60009 xmax: -84.08677 ymax: 34.0701
## Geodetic CRS:  WGS 84

Let’s get the census tract level data of median household income first.

bg <- get_acs(geography = "tract",
              state = "GA",
              county = c("Fulton", "Dekalb"), 
              variables = c(hhincome = 'B19013_001'),
              year = 2022,
              survey = "acs5", # American Community Survey 5-year estimate
              geometry = TRUE, # We want sf object
              output = "wide",
              progress = FALSE) %>% 
  rename(hhincome = hhincomeE) %>% 
  st_transform(4326)

## Getting data from the 2018-2022 5-year ACS

## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.

Displaying the household income data with hospitals.

tmap_mode("view")

## tmap mode set to interactive viewing

tm_shape(bg) + tm_polygons(col = "hhincome", style = "jenks", palette="YlGnBu") + tm_shape(yelp_hospital) + 
  tm_dots(col="red", popup.vars=c("Name"="name", "Reviews"="review_count", "Rating"="rating"))

The hospitals seem to be located near the tracts with high median household income.

We can also get the health insurance rate data. However, the census only provides the data of number people with health insurance in each sex and each age group. We need to add them all together and divide it by the total population in each tract.

bg1 <- get_acs(geography = "tract",
              state = "GA",
              county = c("Fulton", "Dekalb"), 
              variables = c(h0 = "B27001_001", h1 = "B27001_004", h2 = "B27001_007", h3 = "B27001_010", h4 = "B27001_013", h5 = "B27001_016", h6 = "B27001_019", h7 = "B27001_022", h8 = "B27001_025", h9 = "B27001_028", h10 = "B27001_032", h11 = "B27001_035", h12 = "B27001_038", h13 = "B27001_041", h14 = "B27001_044", h15 = "B27001_047", h16 = "B27001_050", h17 = "B27001_053", h18 = "B27001_056"),
              year = 2022,
              survey = "acs5", 
              geometry = FALSE, 
              output = "wide",
              progress = FALSE) %>% 
  mutate(hinsurance = (h1E + h2E + h3E + h4E + h5E + h6E + h7E + h8E + h9E + h10E + h11E + h12E + h13E + h14E + h15E + h16E + h17E + h18E), totpop = h0E, hinsurance_rate = hinsurance / totpop)

## Getting data from the 2018-2022 5-year ACS

Joining the data to one table.

bg1 <- bg1[c("GEOID", "hinsurance", "totpop", "hinsurance_rate")]
full <- left_join(bg, bg1, 
                   by = "GEOID") %>%  
  drop_na(hinsurance_rate, hhincome)

We can now plot the health insurance rate data with hospitals.

#icon <- tmap_icons('https://icons.iconarchive.com/icons/icons-land/points-of-interest/256/Hospital-Red-icon.png')
tm_shape(full) + tm_polygons(col = "hinsurance_rate", 
                             style = "jenks", 
                             palette="YlGnBu", 
                             border.col = "white",
                             border.lwd = 1) +
  tm_shape(yelp_hospital) + 
  tm_dots(#shape = icon,
          col="red", 
          #size=0.12,
          popup.vars=c("Name"="name", "Reviews"="review_count", "Rating"="rating"))

To get the accessibility of hospitals for each census tract, we can do a 0.25 mile buffer on each of the census tract and count for the number of hospitals in the buffer zone.

full_buffered <- st_buffer(full, dist = 0.25 * 1609)
tract_hospital <- st_join(full_buffered, yelp_hospital %>% mutate(count = 1), join = st_intersects)
tract_hospital_count <- tract_hospital %>%
  group_by(GEOID) %>%
  summarise(count = sum(count, na.rm = T))
full_new <- full %>%
  left_join(tract_hospital_count %>% st_drop_geometry(), 
            by = "GEOID")

Plotting the number of accessible hospitals for each tract.

tm_shape(full_new) + tm_polygons(col = "count", style="jenks") + tm_shape(yelp_hospital) + 
  tm_dots(col="red", popup.vars=c("Name"="name", "Reviews"="review_count", "Rating"="rating"))

Are hospitals more likely to be in counties with higher health insurance coverage?

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:httr':
## 
##     config

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

p <- ggplot(full_new, aes(x=hinsurance_rate, y=count, color = hhincome)) +
  geom_point()  +
  geom_smooth(method='lm', col='grey') +
  ylab("Number of Hospitals") +
  xlab("Percentage of Population With Health Insurance Coverage in Tract")
plotly::ggplotly(p)

## `geom_smooth()` using formula = 'y ~ x'

According to the graph, there is somewhat positive trend of number of hospitals and health insurance cover rate, meaning that neighborhoods with more hospitals are more likely to have better health insurance coverage.

Are hospitals more likely be in neighborhoods with higher median household income?

p <- ggplot(full_new, aes(x=hhincome, y=count)) +
  geom_point() +
  geom_smooth(method='lm', col='grey') +
  ylab("Number of Hospitals") +
  xlab("Median Household Income in Tract")
plotly::ggplotly(p)

## `geom_smooth()` using formula = 'y ~ x'

According to the graph, it is not likely that median household income has a high correlation with number of accessible hospitals, since there are both high and very low-income neighborhoods with plenty of accessible hospitals. However, there are many outliers that significantly affects the visuals of the graph.

p <- ggplot(full_new, aes(x=hhincome, y=hinsurance_rate)) +
  geom_point() +
  geom_smooth(method='lm', col='grey') +
  ylab("Percentage of Population With Health Insurance Coverage in Tract") +
  xlab("Median Household Income in Tract")
plotly::ggplotly(p)

## `geom_smooth()` using formula = 'y ~ x'

According to the graph, neighborhoods with higher median household income tend to have significantly higher health insurance coverage, which is possible to be a result of more hospitals.

So overall, the distribution of hospitals is generally unequal in Fulton and Dekalb counties, with counties with hospitals tend to have higher income and health insurance coverage, although there are many exceptions (outliers), especially in the low-income area west of downtown Atlanta where there are plenty of accessible hospitals.