# Load necessary packages:

tidycensus::census_api_key(Sys.getenv("census_api_key"))
## To install your API key for use in future sessions, run this function with `install = TRUE`.
library(tidycensus)
library(sf)
## Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
library(tmap)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
library(jsonlite)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()  masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(httr)
library(jsonlite)
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(here)
## here() starts at C:/Users/wpgeorgia/Documents/GT MSUA/CP 8883/Intro to UA R Projects
library(knitr)
library(skimr)
library(units)
## udunits database from C:/Users/wpgeorgia/AppData/Local/R/win-library/4.4/units/share/udunits/udunits2.xml
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggalt)
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
# Load and view Yelp data:

yelp_healthcare <- st_read("https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson")
## Reading layer `yelp_hospital' from data source 
##   `https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 129 features and 23 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -84.56242 ymin: 33.60009 xmax: -84.08677 ymax: 34.0701
## Geodetic CRS:  WGS 84
View(yelp_healthcare)

# After viewing yelp_healthcare, it does appear tidy and ready for further analysis.
# Retrieving 2019 data from the census for insured and uninsured individuals aged 35-64 by census tract in Dekalb and Fulton counties. Then, adding a new variable in a new column to calculate the % uninsured in each tract. 

tract_hc2019 <- suppressMessages(
  get_acs(geography = "tract",
          state = "GA",
          county = c("Dekalb", "Fulton"),
          variables = c(insured = "B27010_035E", uninsured = "B27010_050E"),
          year = 2019,
          survey = "acs5", 
          geometry = TRUE,
          output = "wide")) %>%
  mutate(pct_uninsured = (uninsured / (uninsured + insured))) %>%   
  drop_na(pct_uninsured)
##   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |========                                                              |  12%  |                                                                              |==========                                                            |  14%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  16%  |                                                                              |============                                                          |  17%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |===============                                                       |  21%  |                                                                              |=================                                                     |  24%  |                                                                              |=================                                                     |  25%  |                                                                              |==================                                                    |  25%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  34%  |                                                                              |========================                                              |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |===========================                                           |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  40%  |                                                                              |=============================                                         |  41%  |                                                                              |=============================                                         |  42%  |                                                                              |==============================                                        |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |======================================                                |  54%  |                                                                              |==========================================                            |  60%  |                                                                              |===========================================                           |  61%  |                                                                              |===========================================                           |  62%  |                                                                              |=============================================                         |  64%  |                                                                              |===============================================                       |  68%  |                                                                              |=================================================                     |  70%  |                                                                              |====================================================                  |  75%  |                                                                              |======================================================                |  77%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |============================================================          |  85%  |                                                                              |==============================================================        |  88%  |                                                                              |===============================================================       |  90%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================| 100%
View(tract_hc2019)
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(tract_hc2019) + tm_polygons(col = "pct_uninsured")
# Inspect the yelp hc data spatially:

tm_shape(yelp_healthcare) + tm_dots(col="darkgreen") + tm_shape(tract_hc2019) + tm_borders()
# Check CRS of both data frames; they don't match, so convert both to match the same cs

head(tract_hc2019$geometry)
## Geometry set for 6 features 
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -84.41692 ymin: 33.72043 xmax: -84.33426 ymax: 33.97001
## Geodetic CRS:  NAD83
## First 5 geometries:
## MULTIPOLYGON (((-84.38782 33.78458, -84.38781 3...
## MULTIPOLYGON (((-84.38738 33.82925, -84.38304 3...
## MULTIPOLYGON (((-84.41692 33.72796, -84.4141 33...
## MULTIPOLYGON (((-84.36575 33.96188, -84.36533 3...
## MULTIPOLYGON (((-84.39472 33.84677, -84.38882 3...
head(yelp_healthcare$geometry)
## Geometry set for 6 features 
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -84.39409 ymin: 33.81016 xmax: -84.26324 ymax: 34.0701
## Geodetic CRS:  WGS 84
## First 5 geometries:
## POINT (-84.26324 34.0701)
## POINT (-84.27857 34.05106)
## POINT (-84.26324 34.0701)
## POINT (-84.28281 34.06029)
## POINT (-84.37021 33.84957)
tract_hc2019 <- tract_hc2019 %>% st_transform(4326)
yelp_healthcare <- yelp_healthcare %>% st_transform(4326)
# Check the two crs values after transformation; they match, so onwards.

head(tract_hc2019$geometry)
## Geometry set for 6 features 
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -84.41692 ymin: 33.72043 xmax: -84.33426 ymax: 33.97001
## Geodetic CRS:  WGS 84
## First 5 geometries:
## MULTIPOLYGON (((-84.38782 33.78458, -84.38781 3...
## MULTIPOLYGON (((-84.38738 33.82925, -84.38304 3...
## MULTIPOLYGON (((-84.41692 33.72796, -84.4141 33...
## MULTIPOLYGON (((-84.36575 33.96188, -84.36533 3...
## MULTIPOLYGON (((-84.39472 33.84677, -84.38882 3...
head(yelp_healthcare$geometry)
## Geometry set for 6 features 
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -84.39409 ymin: 33.81016 xmax: -84.26324 ymax: 34.0701
## Geodetic CRS:  WGS 84
## First 5 geometries:
## POINT (-84.26324 34.0701)
## POINT (-84.27857 34.05106)
## POINT (-84.26324 34.0701)
## POINT (-84.28281 34.06029)
## POINT (-84.37021 33.84957)
# join census and yelp data, run counts of healthcare facilties, and then plot facilities over the census tract map.

tract_yelp_hc2019 <- st_join(tract_hc2019, yelp_healthcare %>% mutate(count = 1))

tract_yelp_hc2019_count <- tract_yelp_hc2019 %>%
  group_by(GEOID) %>%
  summarise(count = sum(count, na.rm = T))

tract_hc2019 <- tract_hc2019 %>%
  left_join(tract_yelp_hc2019_count %>% st_drop_geometry(), 
            by = "GEOID")

tm_shape(tract_hc2019) + tm_polygons(col="pct_uninsured") +
  tm_shape(yelp_healthcare) + tm_dots(col="darkgreen")
# plotting number of healthcare facilities to % of uninsured in each tract.

low_uninsured <- as.data.frame(tract_hc2019[tract_hc2019$pct_uninsured < 0.2,])
ggplot(tract_hc2019, aes(x=pct_uninsured, y=count)) +
  geom_point(col="darkgreen") +
  ylab("Number of Healthcare Facilities") +
  xlab("% of Uninsured in Tract") +
  scale_x_continuous(labels = scales::percent)+
  geom_encircle(inherit.aes = TRUE, 
              data=low_uninsured, 
              color="orange", 
              size=3, 
              expand=0.015)

# use st_distance to calculate distance from each tract to closest facility from yelp df and create new df with just Uninsured % and distance

hc_distances <- as.data.frame(st_distance(x=tract_hc2019$geometry,y=yelp_healthcare$geometry, by_element = FALSE) *  0.000621371192) %>% cbind(tract_hc2019$pct_uninsured)
hc_distances$Closest <- apply(FUN=min,MARGIN=1,X=hc_distances[1:129])
hc_distances <- hc_distances[colnames(hc_distances)[c(130,131,1:129)]]
hc_distances2 <- round(hc_distances[,1:2],2)
hc_distances3 <- hc_distances2[order(-tract_hc2019$pct_uninsured),]
print(head(hc_distances3,10))
##     tract_hc2019$pct_uninsured Closest
## 49                        1.00    0.75
## 192                       0.91    0.00
## 334                       0.68    0.26
## 111                       0.66    0.78
## 21                        0.60    0.20
## 176                       0.57    0.38
## 301                       0.56    0.30
## 112                       0.54    0.00
## 275                       0.51    2.27
## 311                       0.46    0.00
# plot uninsured % vs distance to analyze results

ggplot(hc_distances3, aes(Closest, tract_hc2019$pct_uninsured)) +
  geom_point(col="darkgreen") +
  xlab("Closest Healthcare Facility in Miles") +
  ylab("Uninsured %") +
  scale_y_continuous(labels = scales::percent)



Conclusions
Are healthcare facilities spatially dispersed in an equitable manner throughout Fulton and Dekalb counties? It’s a good, but difficult, question to answer. So what does the data have to say?

To get started, data was gathered from Yelp providing the location of facilities across the two counties. Demographic data was pulled from the US Census Bureau using the two counties’ Census tracts. Two key variables from the 5-year American Community Survey were chosen - 1) the number of residents aged 35-64 with at least one form of health insurance and 2) the number of residents aged 35-64 with no form of health insurance. The Census offered the data in chunks by age, and 35-64 best represents the most significant portion of the population in their working years (appropriate since most insurance coverage in America is employer-provided). A calculation of the uninsured percentage of the population was calculated for each tract.

Next, a spatial representation was generated mapping Census tracts by uninsured percentage with the actual location of healthcare facilites. A visual inspection of the map revealed that more of the lower-insured tracts appear to be in the southern portion of the counties while more of the facilities appear to be in the northern portion of the counties. That alone suggests some level of inequity. To dig further, the count of facilities by Census tract was plotted against the percentage of uninsured by tract. While the results were not overwhelming, they clearly showed that the tracts with the most facilities had relatively lower percentage of uninsured residents. Conversely, tracts with higher uninsured people had fewer facilities. However, there were a number of lower uninsured tracts which also had few facilities.

The analysis went a layer further as each Census tract’s nearest healthcare facility was calculated. The results were then displayed in another scatterplot. This second plot again showed that several of the higher uninsured tracts had facilities located relatively far away. However, overall, the plot seemed to indicate that most tracts’ nearest facility was located fairly close. In fact, only two tracts had their nearest facility located beyond five miles.

So, is there healthcare inequity across the two counties? The answer appears to be yes, but with some caveats. There is certainly an imbalance of facilities with regard to the level of insured residents - where insurance coverage is greater, more facilities are located. However, the data also seems to indicate that most tracts are located a reasonable distance from at least one facility. Perhaps then, the answer to the question isn’t best answered by looking at the location of facilities. There may be more meaningful metrics such as: are the best doctors and the best care options available to all demographics, do healthcare providers discriminate in providing services, and how does the ability to pay for services impact the quality of care? Clearly, the inequity question warrants much further analysis!