To do the required analysis, we should first load all dataset required.
# Load Required Packages
library(tidycensus)
library(tidyverse)
library(tmap)
library(sf)
library(jsonlite)
library(dplyr)
library(ggplot2)
# Load Census API
tidycensus::census_api_key(Sys.getenv("census_api"))
In this section, I loaded the median age data for census tracts in Fulton and DeKalb Counties. From an equity perspective, I expect to see more hospitals in areas with a higher median age, as older populations typically require more healthcare services.
# Load the geometry of Census Tracts within Fulton and DeKalb Counties from ACS
tract <- suppressMessages(
get_acs(geography = "tract", # Project Requirement
state = "GA", # Project Requirement
county = c("Fulton", "DeKalb"), # Project Requirement
variables = c(median_age = 'B01002_001'), # Median Age
year = 2021,
survey = "acs5", # ACS 5-year estimate
geometry = TRUE, # Returns sf objects
output = "wide") %>%
rename(median_age = median_ageE) %>%
st_transform(4326)
)
fulton <- tigris::tracts("GA", "Fulton") %>%
st_transform(4326)
dekalb <- tigris::tracts("GA", "DeKalb") %>%
st_transform(4326)
tract_fulton <- tract[fulton, ] # select census tracts of Fulton county
tract_dekalb <- tract[dekalb, ] # select census tracts of DeKalb county
hospital <- st_read("E:/Georgia_Tech_MCRP/2024_FALL/CP8883_Urban_Analytics/Assignment/Assignment_3/yelp_hospital.geojson") %>%
st_transform(4326)
Next, we can clean and prepare the data into a form that we wish to analyze.
Before starting data cleaning process, we can use the sapply function to review the data class of each column in the two data sets. This task identifies that there is no nested columns.
sapply(tract, class) %>% print() # review data class of each column: tract
## $GEOID
## [1] "character"
##
## $NAME
## [1] "character"
##
## $median_age
## [1] "numeric"
##
## $median_ageM
## [1] "numeric"
##
## $geometry
## [1] "sfc_MULTIPOLYGON" "sfc"
sapply(hospital, class) %>% print() # review data class of each column: hospital
## $id
## [1] "character"
##
## $alias
## [1] "character"
##
## $name
## [1] "character"
##
## $image_url
## [1] "character"
##
## $is_closed
## [1] "logical"
##
## $url
## [1] "character"
##
## $review_count
## [1] "integer"
##
## $categories
## [1] "character"
##
## $rating
## [1] "numeric"
##
## $transactions
## [1] "character"
##
## $phone
## [1] "character"
##
## $display_phone
## [1] "character"
##
## $distance
## [1] "numeric"
##
## $coordinates.latitude
## [1] "numeric"
##
## $coordinates.longitude
## [1] "numeric"
##
## $location.address1
## [1] "character"
##
## $location.address2
## [1] "character"
##
## $location.address3
## [1] "character"
##
## $location.city
## [1] "character"
##
## $location.zip_code
## [1] "character"
##
## $location.country
## [1] "character"
##
## $location.state
## [1] "character"
##
## $location.display_address
## [1] "character"
##
## $geometry
## [1] "sfc_POINT" "sfc"
Since there are no nested columns identified, we will check for duplicated rows.
tract_nodup <- tract %>% # Drop duplicated rows: tract
distinct()
hospital_nodup <- hospital %>% # Drop duplicated rows: hospital
distinct()
Moving forward, we can drop rows that does not have coordinate information from hospital dataset. In this case, there is no row without coordinate information. We can also drop rows that does not have median age information from census dataset.
hospital_nodup %>% # Drop rows with no coordinates values
map_dbl(., function(x) sum(is.na(x)))
hospital_nodup %>% # Drop rows with no coordinates values
map_dbl(., function(x) sum(is.na(x)))
tract %>%
filter(!is.na(median_age))
After loading the census data, we can left join yelp data to census data, while also can left join census data to yelp data. You can also tramsform datasets into spatial objects.
hospital_tract_fulton <- st_join(tract_fulton, hospital_nodup, join = st_intersects) # join yelp data into census tract data: hospital_fulton
hospital_tract_dekalb <- st_join(tract_dekalb, hospital_nodup, join = st_intersects) # join yelp data into census tract data: hospital_dekalb
tract_fulton_hospital <- st_join(hospital_nodup, tract_fulton, join = st_intersects) # join census tract data into yelp data: hospital_fulton
tract_dekalb_hospital <- st_join(hospital_nodup, tract_dekalb, join = st_intersects) # join yelp data into census tract data: hospital_dekalb
hospital_tract_fulton <- hospital_tract_fulton %>% # transform data set to a spatial object: hospital_tract_fulton
st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"),
crs = 4326)
hospital_tract_dekalb <- hospital_tract_dekalb %>% # transform dataset to a spatial object: hospital_tract_dekalb
st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"),
crs = 4326)
tract_fulton_hospital <- tract_fulton_hospital %>% # transform data set to a spatial object: tract_fulton_hospital
st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"),
crs = 4326)
tract_dekalb_hospital <- tract_dekalb_hospital %>% # transform dataset to a spatial object: tract_dekalb_hospital
st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"),
crs = 4326)
Now, we can run the analysis to examine the equity status of the spatial distribution of hospitals in Fulton and DeKalb County, GA. To examine the equity status, we will use 2 indicators, (a) the number of hospitals within 1km from tract, and (b) the distance to the nearest hospital from tract. Our primary objective in this analysis is to investigate whether counties with higher median ages have a greater number of hospitals located within 1 km of tract and whether these counties also exhibit shorter distances to the nearest hospital from tract.
First, we can use sf::st_buffer and sf::st_intersects to identify the number of hospitals within 1 km from census tracts for two counties.
tract_fulton_buffer <- st_buffer(tract_fulton, dist = 1000) # Create 1km buffer around each tract: Fulton
tract_dekalb_buffer <- st_buffer(tract_dekalb, dist = 1000) # Create 1km buffer around each tract: DeKalb
hospital_1km_fulton <- lengths(st_intersects(tract_fulton_buffer, hospital_nodup)) # Hospitals within 1km from each tract: fulton
hospital_1km_dekalb <- lengths(st_intersects(tract_dekalb_buffer, hospital_nodup)) # Hospitals within 1km from each tract: dekalb
tract_fulton$hospital_1km <- hospital_1km_fulton # Add a column on No. of hospitals within 1km: fulton
tract_dekalb$hospital_1km <- hospital_1km_dekalb # Add a column on No. of hospitals within 1km: DeKalb
tract_fulton_clean <- tract_fulton %>% # drop census tract 9800 (an empty census tract)
filter(!is.na(median_age))
tract_dekalb_clean <- tract_dekalb %>% # drop census tract 9800 (an empty census tract)
filter(!is.na(median_age))
Next, we can compare the trends in the number of hospitals within 1 km of each census tract for both counties. First, let’s draw maps that show the number of hospitals within 1 km for each census tract for both counties. The maps show that most census tracts of both counties have no to very few hospital within 1km of its track boundary, meaning that the hospital network is not robust enough to serve the County. When examining the color patterns in the maps, we might say that the equity situation in Fulton County is a little bit more concerning. This is because the Fulton County map exhibits a relatively significant contrast between very dark and very light colors, suggesting a significant disparity in hospital access.
# Plot map: fulton
ggplot() +
geom_sf(data = tract_fulton_clean, aes(fill = hospital_1km), color = NA) + # Fill by the nearest hospital distance
scale_fill_viridis_c(option = "plasma", name = "No. of hospitals within 1km") + # Color scale
labs(title = "No. of hospitals within 1km by census tract, Fulton County") +
theme_minimal() +
theme(legend.position = "bottom") # Adjust legend position
# Plot map: dekalb
ggplot() +
geom_sf(data = tract_dekalb_clean, aes(fill = hospital_1km), color = NA) + # Fill by the nearest hospital distance
scale_fill_viridis_c(option = "plasma", name = "No. of hospitals within 1km") + # Color scale
labs(title = "No. of hospitals within 1km by census tract, Dekalb County") +
theme_minimal() +
theme(legend.position = "bottom") # Adjust legend position
Moving forward, we can compare the trends in the number of hospitals
within 1 km of each census tract and the median age of census tracts for
both counties. The scatter plots below will help assess the equity of
healthcare access by illustrating how proximity to hospitals correlates
with the age distribution of residents. The results show that, while
many census tracts in both counties have no 1 km accessibility to any
hospital across various median age levels, Fulton County demonstrates a
lower level of equity as the number of 1 km accessible hospitals drops
drastically as median age of census tract goes up.
# Scatter plot: Fulton
suppressMessages({
ggplot(tract_fulton_clean, aes(x = median_age, y = hospital_1km)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Median Age vs. No. of Hospitals Within 1 km of census tracts: Fulton County, GA",
x = "Median Age",
y = "No. of Hospitals Within 1 km")}
)
## `geom_smooth()` using formula = 'y ~ x'
# Scatter plot: DeKalb
suppressMessages({
ggplot(tract_dekalb_clean, aes(x = median_age, y = hospital_1km)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Median Age vs. No. of Hospitals Within 1 km of census tracts: Dekalb County, GA",
x = "Median Age",
y = "No. of Hospitals Within 1 km")}
)
## `geom_smooth()` using formula = 'y ~ x'
Next, we can use sf::st_distance and st_intersects to identify the distance to the nearest hospital from census tracts of two counties.
fulton_nearest_hospital <- st_distance(tract_fulton_clean, hospital_nodup) %>% # Distance to the nearest hospital: Fulton
apply(1, min) # Minimum distance
dekalb_nearest_hospital <- st_distance(tract_dekalb_clean, hospital_nodup) %>% # Distance to the nearest hospital: DeKalb
apply(1, min) # Minimum distance
tract_fulton_clean$nearest_hospital <- fulton_nearest_hospital # Add to dataset
tract_dekalb_clean$nearest_hospital <- dekalb_nearest_hospital # Add to dataset
As with our previous analysis, we can also create maps to illustrate the distance to the nearest hospital for each census tract in both counties. These maps reveal that most census tracts in both counties have their closest hospital located approximately 2 to 4 km from their boundaries, as indicated by the prevalence of darker colors on the maps. Therefore, we can expect that most of the residents in both counties need to travel a significant distance to visit hospital. Also, when examining the color patterns in the maps, we might also say that the equity situation in Fulton County is a little bit more concerning. This is because the Fulton County map exhibits a relatively significant contrast between very dark and very light colors, suggesting a significant disparity in the distance to the closest hospital.
# Plot map: fulton
ggplot() +
geom_sf(data = tract_fulton_clean, aes(fill = nearest_hospital), color = NA) + # Fill by the nearest hospital distance
scale_fill_viridis_c(option = "plasma", name = "Distance to closest hospital") + # Color scale
labs(title = "Distance to closest hospital by census tract, Fulton County") +
theme_minimal() +
theme(legend.position = "bottom") # Adjust legend position
# Plot map: dekalb
ggplot() +
geom_sf(data = tract_dekalb_clean, aes(fill = nearest_hospital), color = NA) + # Fill by the nearest hospital distance
scale_fill_viridis_c(option = "plasma", name = "Distance to closest hospital") + # Color scale
labs(title = "Distance to closest hospital by census tract, Dekalb County") +
theme_minimal() +
theme(legend.position = "bottom") # Adjust legend position
Furthermore, like what we have done from the previous analysis, we can
also compare the trends in the distance of the closest hospital for each
census tract and the median age of census tracts for both counties. The
scatter plots below helps assess the equity of healthcare access by
illustrating how the distance to closest hospital correlates with the
age distribution of residents. The results show that, while many census
tracts in both counties have to travel significant distance to visit the
closest hospital, Fulton County demonstrates a lower level of equity as
distance of the closest hospital increases as median age of census tract
goes up.
# Scatter plot: Fulton
suppressMessages({
ggplot(tract_fulton_clean, aes(x = median_age, y = nearest_hospital)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Median Age vs. Closest hospital by census tracts: Fulton County, GA",
x = "Median Age",
y = "Distance to Closest hospital (meter)")}
)
## `geom_smooth()` using formula = 'y ~ x'
# Scatter plot: DeKalb
suppressMessages({
ggplot(tract_dekalb_clean, aes(x = median_age, y = nearest_hospital)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Median Age vs. Closest hospital by census tracts: Dekalb County, GA",
x = "Median Age",
y = "Distance to Closest hospital (meter)")}
)
## `geom_smooth()` using formula = 'y ~ x'