# 1
suppressWarnings(suppressMessages({
library(tidycensus)
library(sf)
library(tmap)
library(jsonlite)
library(tidyverse)
library(httr)
library(reshape2)
library(here)
library(knitr)
library(ggplot2)
}))
# Import data
hospital <- st_read("https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson")
## Reading layer `yelp_hospital' from data source
## `https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson'
## using driver `GeoJSON'
## Simple feature collection with 129 features and 23 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -84.56242 ymin: 33.60009 xmax: -84.08677 ymax: 34.0701
## Geodetic CRS: WGS 84
# We need the geometry data, so we run a filter to ensure that every hospital has geometry data.
hospital <- hospital %>% filter(!is.na(geometry))
# 2+3
census <- get_acs(geography = "tract",
state = "GA",
county = c("Fulton", "DeKalb"),
output = "wide",
geometry = TRUE,
year = 2020,
survey = "acs5",
variables = c(hhincome = "B19019_001",
poor = "B17001_002",
white = "B02001_002",
black = "B02001_003",
asian = "B02001_005",
pop = "B01003_001",
nocar = "B08014_002",
car = "B08014_001"))
## Getting data from the 2016-2020 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 15% | |=========== | 16% | |============ | 17% | |============= | 18% | |============= | 19% | |=============== | 21% | |================ | 22% | |================ | 23% | |================== | 25% | |=================== | 27% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |============================ | 40% | |============================= | 41% | |============================== | 43% | |================================ | 46% | |================================= | 47% | |================================== | 49% | |=================================== | 50% | |===================================== | 53% | |======================================== | 57% | |========================================== | 60% | |=========================================== | 61% | |============================================ | 63% | |============================================= | 65% | |=============================================== | 68% | |================================================= | 70% | |==================================================== | 74% | |==================================================== | 75% | |========================================================= | 81% | |============================================================= | 88% | |================================================================== | 94% | |======================================================================| 100%
# hhincome refers to the median income of people within a census tract; poor refers to the number of people with an income below the poverty level. Both can be used to assess whether the distribution of hospitals is related to income.
# white, black, and asian refer to the number of white, black, and Asian people in each census tract. I used them to classify each census tract as white, black, or Asian. They can reveal whether certain races are closer to hospitals.
# nocar is the number of workers without cars in each census tract. It can show whether census tracts with a higher proportion of workers without cars are closer to hospitals.
# 4
census_prepared <- census %>% select(GEOID, income = hhincomeE, poor = poorE, white = whiteE, black = blackE, asian = asianE, pop = popE, nocar = nocarE, car = carE) %>% mutate(poor = (poor/pop), race = case_when(
white >= black & white >= asian ~ "white",
black >= white & black >= asian ~ "black",
TRUE ~ "asian"), nocar = (nocar/car)) %>% st_transform(crs = 4326)
# 5
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(census_prepared) + tm_polygons(col = "income") +
tm_shape(hospital) + tm_dots(col = 'darkblue')
# Based on the income data, we see that the north seems to be richer, whereas the south seems to be poorer. Most hospitals seem to be located at the center and north of Atlanta. Perhaps richer people have easier access to hospitals.
count <- st_intersects(hospital, census_prepared)
census_prepared$count = 0
for (i in 1:nrow(count)){
a = count[[i]]
census_prepared$count[a] = census_prepared$count[a] + 1
}
ggplot(data = census_prepared) + geom_point(mapping = aes(x = poor, y = count)) + labs(x = "Ratio of Poverty", y = "Number of Hospitals")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

# I tried a scatter plot to see whether census tracts that are poorer have less hospitals in them. We do not see a clear association, which is expected because the map above already showed that a lot of census tracts had no hospitals.
centroid <- census_prepared %>% st_centroid()
## Warning: st_centroid assumes attributes are constant over geometries
distance <- st_distance(centroid, hospital)
for (i in 1:nrow(census_prepared)){
census_prepared$distance[i] = min(as.numeric(distance[i,]))
}
tm_shape(census_prepared %>% filter(race == "white")) + tm_polygons(col = "distance", style = "cont", legend.show = TRUE) +
tm_shape(hospital) + tm_dots(col = 'darkblue')
tm_shape(census_prepared %>% filter(race == "black")) + tm_polygons(col = "distance", style = "cont", legend.show = TRUE) +
tm_shape(hospital) + tm_dots(col = 'darkblue')
tm_shape(census_prepared %>% filter(race == "asian")) + tm_polygons(col = "distance", style = "cont", legend.show = TRUE) +
tm_shape(hospital) + tm_dots(col = 'darkblue')
# We see that white people tend to live in the north and are closer to hospitals (the southwest census tract is the only outlier). Black people tend to live in the south and are further away from hospitals. Asian people are scattered and their location does not have a clear pattern, but they are closer to hospitals if we look at the legend. Since the distance was calculated using centroids of the census tracts, Asian people might be living even closer to hospitals.
ggplot(data = census_prepared %>% filter(nocar != 0.0), aes(x = reorder(GEOID, nocar), y = nocar, fill = distance)) +
geom_bar(stat = "identity") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
labs(x = "Census Tract", y = "Percentage of Workers without Cars", fill = "Distance to Hospitals") + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())

# I was expecting the census tracts with a higher percentage of workers without cars to be closer to hospitals, but that does not seem to be the case.
tm_shape(census_prepared %>% filter(nocar != 0.0)) + tm_polygons(col = "nocar", style = "cont", legend.show = TRUE, title = "Percentage of Workers without Cars") +
tm_shape(hospital) + tm_dots(col = 'darkblue') +
tm_shape(census_prepared %>% filter(nocar == 0.0)) + tm_polygons(col = "white", legend.show = FALSE)
# While hospitals are not closely associated with the percentage of workers without cars, many census tracts that do not have a percentage of zero (meaning everyone has a car) do not have a hospital within them. Therefore, people who do not have a car in these census tracts would probably struggle to get to a hospital in a rapid manner unless they call a taxi or get a friend to drive them to one.
# In summary, if we look at income, most of the hospitals are closer to census tracts that have a higher median income. If we look at race, Asian people are generally closer to hospitals, whereas black people are generally further away from hospitals. We also know that many census tracts do not have hospitals in them, including some census tracts that have a high percentage of workers without cars. Therefore, we can conclude that the spatial distribution of hospitals in Fulton and DeKalb Counties is not equitable.