1. Import Data

To do the required analysis, we should first load all dataset required.

1) Load Required Parkages

# Load Required Packages
library(tidycensus)
library(tidyverse)
library(tmap)
library(sf)
library(jsonlite)
library(dplyr)
library(ggplot2)

2) Load Census API

# Load Census API
tidycensus::census_api_key(Sys.getenv("census_api"))

3) Load Geometries: Census Tracts within Fulton & Dekalb counties (ACS, Tigris)

In this section, I loaded the median age data for census tracts in Fulton and DeKalb Counties. From an equity perspective, I expect to see more hospitals in areas with a higher median age, as older populations typically require more healthcare services.

# Load the geometry of Census Tracts within Fulton and DeKalb Counties from ACS
tract <- suppressMessages(
  get_acs(geography = "tract", # Project Requirement
          state = "GA", # Project Requirement
          county = c("Fulton", "DeKalb"), # Project Requirement
          variables = c(median_age = 'B01002_001'), # Median Age
          year = 2021,
          survey = "acs5", # ACS 5-year estimate
          geometry = TRUE, # Returns sf objects
          output = "wide") %>%
    rename(median_age = median_ageE) %>%
    st_transform(4326)
)

fulton <- tigris::tracts("GA", "Fulton") %>%
    st_transform(4326)
dekalb <- tigris::tracts("GA", "DeKalb") %>%
    st_transform(4326)

tract_fulton <- tract[fulton, ] # select census tracts of Fulton county
tract_dekalb <- tract[dekalb, ] # select census tracts of DeKalb county

4) Load Yelp hospital data (geojson)

hospital <- st_read("E:/Georgia_Tech_MCRP/2024_FALL/CP8883_Urban_Analytics/Assignment/Assignment_3/yelp_hospital.geojson") %>%
  st_transform(4326)

2. Data Cleaning & Preparation

Next, we can clean and prepare the data into a form that we wish to analyze.

5) Review data class of each column

Before starting data cleaning process, we can use the sapply function to review the data class of each column in the two data sets. This task identifies that there is no nested columns.

sapply(tract, class) %>% print() # review data class of each column: tract
## $GEOID
## [1] "character"
## 
## $NAME
## [1] "character"
## 
## $median_age
## [1] "numeric"
## 
## $median_ageM
## [1] "numeric"
## 
## $geometry
## [1] "sfc_MULTIPOLYGON" "sfc"
sapply(hospital, class) %>% print() # review data class of each column: hospital
## $id
## [1] "character"
## 
## $alias
## [1] "character"
## 
## $name
## [1] "character"
## 
## $image_url
## [1] "character"
## 
## $is_closed
## [1] "logical"
## 
## $url
## [1] "character"
## 
## $review_count
## [1] "integer"
## 
## $categories
## [1] "character"
## 
## $rating
## [1] "numeric"
## 
## $transactions
## [1] "character"
## 
## $phone
## [1] "character"
## 
## $display_phone
## [1] "character"
## 
## $distance
## [1] "numeric"
## 
## $coordinates.latitude
## [1] "numeric"
## 
## $coordinates.longitude
## [1] "numeric"
## 
## $location.address1
## [1] "character"
## 
## $location.address2
## [1] "character"
## 
## $location.address3
## [1] "character"
## 
## $location.city
## [1] "character"
## 
## $location.zip_code
## [1] "character"
## 
## $location.country
## [1] "character"
## 
## $location.state
## [1] "character"
## 
## $location.display_address
## [1] "character"
## 
## $geometry
## [1] "sfc_POINT" "sfc"

6) Delete duplicated rows

Since there are no nested columns identified, we will check for duplicated rows.

tract_nodup <- tract %>% # Drop duplicated rows: tract
  distinct()

hospital_nodup <- hospital %>% # Drop duplicated rows: hospital
  distinct()

7) drop rows with missing coordinates values

Moving forward, we can drop rows that does not have coordinate information from hospital dataset. In this case, there is no row without coordinate information. We can also drop rows that does not have median age information from census dataset.

hospital_nodup %>% # Drop rows with no coordinates values
  map_dbl(., function(x) sum(is.na(x))) 

hospital_nodup %>% # Drop rows with no coordinates values
  map_dbl(., function(x) sum(is.na(x))) 

tract %>%
  filter(!is.na(median_age))
8) Join datasets & transform to spatial objects

After loading the census data, we can left join yelp data to census data, while also can left join census data to yelp data. You can also tramsform datasets into spatial objects.

hospital_tract_fulton <- st_join(tract_fulton, hospital_nodup, join = st_intersects) # join yelp data into census tract data: hospital_fulton
hospital_tract_dekalb <- st_join(tract_dekalb, hospital_nodup, join = st_intersects) # join yelp data into census tract data: hospital_dekalb

tract_fulton_hospital <- st_join(hospital_nodup, tract_fulton, join = st_intersects) # join census tract data into yelp data: hospital_fulton
tract_dekalb_hospital <- st_join(hospital_nodup, tract_dekalb, join = st_intersects) # join yelp data into census tract data: hospital_dekalb

hospital_tract_fulton <- hospital_tract_fulton %>% # transform data set to a spatial object: hospital_tract_fulton
  st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), 
           crs = 4326)

hospital_tract_dekalb <- hospital_tract_dekalb %>% # transform dataset to a spatial object: hospital_tract_dekalb
  st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), 
           crs = 4326)

tract_fulton_hospital <- tract_fulton_hospital %>% # transform data set to a spatial object: tract_fulton_hospital
  st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), 
           crs = 4326)

tract_dekalb_hospital <- tract_dekalb_hospital %>% # transform dataset to a spatial object: tract_dekalb_hospital
  st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), 
           crs = 4326)

3. Data Analysis: Equity status of the spatial distribution of hospitals, Fulton and DeKalb County, GA

Now, we can run the analysis to examine the equity status of the spatial distribution of hospitals in Fulton and DeKalb County, GA. To examine the equity status, we will use 2 indicators, (a) the number of hospitals within 1km from tract, and (b) the distance to the nearest hospital from tract. Our primary objective in this analysis is to investigate whether counties with higher median ages have a greater number of hospitals located within 1 km of tract and whether these counties also exhibit shorter distances to the nearest hospital from tract.

9) The number of hospitals within 1 km from census tracts

First, we can use sf::st_buffer and sf::st_intersects to identify the number of hospitals within 1 km from census tracts for two counties.

tract_fulton_buffer <- st_buffer(tract_fulton, dist = 1000) # Create 1km buffer around each tract: Fulton
tract_dekalb_buffer <- st_buffer(tract_dekalb, dist = 1000) # Create 1km buffer around each tract: DeKalb

hospital_1km_fulton <- lengths(st_intersects(tract_fulton_buffer, hospital_nodup)) # Hospitals within 1km from each tract: fulton
hospital_1km_dekalb <- lengths(st_intersects(tract_dekalb_buffer, hospital_nodup)) # Hospitals within 1km from each tract: dekalb

tract_fulton$hospital_1km <- hospital_1km_fulton # Add a column on No. of hospitals within 1km: fulton
tract_dekalb$hospital_1km <- hospital_1km_dekalb # Add a column on No. of hospitals within 1km: DeKalb

tract_fulton_clean <- tract_fulton %>% # drop census tract 9800 (an empty census tract)
  filter(!is.na(median_age))

tract_dekalb_clean <- tract_dekalb %>% # drop census tract 9800 (an empty census tract)
  filter(!is.na(median_age))

Next, we can compare the trends in the number of hospitals within 1 km of each census tract for both counties. First, let’s draw maps that show the number of hospitals within 1 km for each census tract for both counties. The maps show that most census tracts of both counties have no to very few hospital within 1km of its track boundary, meaning that the hospital network is not robust enough to serve the County. When examining the color patterns in the maps, we might say that the equity situation in Fulton County is a little bit more concerning. This is because the Fulton County map exhibits a relatively significant contrast between very dark and very light colors, suggesting a significant disparity in hospital access.

# Plot map: fulton
ggplot() +
  geom_sf(data = tract_fulton_clean, aes(fill = hospital_1km), color = NA) +  # Fill by the nearest hospital distance
  scale_fill_viridis_c(option = "plasma", name = "No. of hospitals within 1km") +  # Color scale
  labs(title = "No. of hospitals within 1km by census tract, Fulton County") +
  theme_minimal() +
  theme(legend.position = "bottom")  # Adjust legend position

# Plot map: dekalb
ggplot() +
  geom_sf(data = tract_dekalb_clean, aes(fill = hospital_1km), color = NA) +  # Fill by the nearest hospital distance
  scale_fill_viridis_c(option = "plasma", name = "No. of hospitals within 1km") +  # Color scale
  labs(title = "No. of hospitals within 1km by census tract, Dekalb County") +
  theme_minimal() +
  theme(legend.position = "bottom")  # Adjust legend position

Moving forward, we can compare the trends in the number of hospitals within 1 km of each census tract and the median age of census tracts for both counties. The scatter plots below will help assess the equity of healthcare access by illustrating how proximity to hospitals correlates with the age distribution of residents. The results show that, while many census tracts in both counties have no 1 km accessibility to any hospital across various median age levels, Fulton County demonstrates a lower level of equity as the number of 1 km accessible hospitals drops drastically as median age of census tract goes up.

# Scatter plot: Fulton
suppressMessages({
  ggplot(tract_fulton_clean, aes(x = median_age, y = hospital_1km)) +
    geom_point() +
    geom_smooth(method = "lm", se = FALSE) +
    labs(title = "Median Age vs. No. of Hospitals Within 1 km of census tracts: Fulton County, GA",
         x = "Median Age",
         y = "No. of Hospitals Within 1 km")}
)
## `geom_smooth()` using formula = 'y ~ x'

# Scatter plot: DeKalb
suppressMessages({
  ggplot(tract_dekalb_clean, aes(x = median_age, y = hospital_1km)) +
    geom_point() +
    geom_smooth(method = "lm", se = FALSE) +
    labs(title = "Median Age vs. No. of Hospitals Within 1 km of census tracts: Dekalb County, GA",
         x = "Median Age",
         y = "No. of Hospitals Within 1 km")}
)
## `geom_smooth()` using formula = 'y ~ x'

10) The distance to the nearest hospital from census tracts

Next, we can use sf::st_distance and st_intersects to identify the distance to the nearest hospital from census tracts of two counties.

fulton_nearest_hospital <- st_distance(tract_fulton_clean, hospital_nodup) %>% # Distance to the nearest hospital: Fulton
  apply(1, min) # Minimum distance

dekalb_nearest_hospital <- st_distance(tract_dekalb_clean, hospital_nodup) %>% # Distance to the nearest hospital: DeKalb
  apply(1, min) # Minimum distance

tract_fulton_clean$nearest_hospital <- fulton_nearest_hospital # Add to dataset
tract_dekalb_clean$nearest_hospital <- dekalb_nearest_hospital # Add to dataset

As with our previous analysis, we can also create maps to illustrate the distance to the nearest hospital for each census tract in both counties. These maps reveal that most census tracts in both counties have their closest hospital located approximately 2 to 4 km from their boundaries, as indicated by the prevalence of darker colors on the maps. Therefore, we can expect that most of the residents in both counties need to travel a significant distance to visit hospital. Also, when examining the color patterns in the maps, we might also say that the equity situation in Fulton County is a little bit more concerning. This is because the Fulton County map exhibits a relatively significant contrast between very dark and very light colors, suggesting a significant disparity in the distance to the closest hospital.

# Plot map: fulton
ggplot() +
  geom_sf(data = tract_fulton_clean, aes(fill = nearest_hospital), color = NA) +  # Fill by the nearest hospital distance
  scale_fill_viridis_c(option = "plasma", name = "Distance to closest hospital") +  # Color scale
  labs(title = "Distance to closest hospital by census tract, Fulton County") +
  theme_minimal() +
  theme(legend.position = "bottom")  # Adjust legend position

# Plot map: dekalb
ggplot() +
  geom_sf(data = tract_dekalb_clean, aes(fill = nearest_hospital), color = NA) +  # Fill by the nearest hospital distance
  scale_fill_viridis_c(option = "plasma", name = "Distance to closest hospital") +  # Color scale
  labs(title = "Distance to closest hospital by census tract, Dekalb County") +
  theme_minimal() +
  theme(legend.position = "bottom")  # Adjust legend position

Furthermore, like what we have done from the previous analysis, we can also compare the trends in the distance of the closest hospital for each census tract and the median age of census tracts for both counties. The scatter plots below helps assess the equity of healthcare access by illustrating how the distance to closest hospital correlates with the age distribution of residents. The results show that, while many census tracts in both counties have to travel significant distance to visit the closest hospital, Fulton County demonstrates a lower level of equity as distance of the closest hospital increases as median age of census tract goes up.

# Scatter plot: Fulton
suppressMessages({
  ggplot(tract_fulton_clean, aes(x = median_age, y = nearest_hospital)) +
    geom_point() +
    geom_smooth(method = "lm", se = FALSE) +
    labs(title = "Median Age vs. Closest hospital by census tracts: Fulton County, GA",
         x = "Median Age",
         y = "Distance to Closest hospital (meter)")}
)
## `geom_smooth()` using formula = 'y ~ x'

# Scatter plot: DeKalb
suppressMessages({
  ggplot(tract_dekalb_clean, aes(x = median_age, y = nearest_hospital)) +
    geom_point() +
    geom_smooth(method = "lm", se = FALSE) +
    labs(title = "Median Age vs. Closest hospital by census tracts: Dekalb County, GA",
         x = "Median Age",
         y = "Distance to Closest hospital (meter)")}
)
## `geom_smooth()` using formula = 'y ~ x'