# Import the hospital POI data and see heads.
getwd()
## [1] "C:/Users/HojungYu/OneDrive/GT_Semester3/Urban_Analytics/mini_3/hw_3"
hos_poi <- st_read("../hospital_11counties.geojson")
## Reading layer `hospital_11counties' from data source
## `C:\Users\HojungYu\OneDrive\GT_Semester3\Urban_Analytics\mini_3\hospital_11counties.geojson'
## using driver `GeoJSON'
## Simple feature collection with 119 features and 8 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -84.73147 ymin: 33.42719 xmax: -83.92052 ymax: 34.24585
## Geodetic CRS: WGS 84
head(hos_poi)
## Simple feature collection with 6 features and 8 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -84.50847 ymin: 33.42719 xmax: -84.09618 ymax: 33.51114
## Geodetic CRS: WGS 84
## id name
## 1 ChIJZfvWaSfv9IgRO0Er_vksXXU Piedmont Fayette
## 2 ChIJg5T0Tsxb9IgRmDdy4cVBydU Primary Pediatrics
## 3 ChIJeZlNG9RE9IgRsiEsG6aOOPI Aylo Health - Primary Care at McDonough, Hwy 81
## 4 ChIJn4tmsLD69IgR3Jdd7nzpAkY Southeast Medical Group at Fayetteville
## 5 ChIJA3catM1b9IgRa7fpG9cm3Iw Resurgens Orthopaedics
## 6 ChIJBduprw5X9IgRygZRl0TyxOs Piedmont Henry
## address primary_type
## 1 1255 Hwy 54 W, Fayetteville, GA 30214, USA hospital
## 2 110-A Regency Park Dr, McDonough, GA 30253, USA hospital
## 3 65 Old Jackson Rd, McDonough, GA 30252, USA hospital
## 4 105 Carnegie Pl #103, Fayetteville, GA 30214, USA hospital
## 5 156 Foster Dr Ste B, McDonough, GA 30253, USA hospital
## 6 1133 Eagles Landing Pkwy, Stockbridge, GA 30281, USA hospital
## types status rating
## 1 hospital,health,point_of_interest,establishment OPERATIONAL 2.6
## 2 hospital,health,point_of_interest,establishment OPERATIONAL 4.0
## 3 hospital,doctor,health,point_of_interest,establishment OPERATIONAL 4.7
## 4 hospital,doctor,health,point_of_interest,establishment OPERATIONAL 4.5
## 5 hospital,doctor,health,point_of_interest,establishment OPERATIONAL 4.8
## 6 hospital,health,point_of_interest,establishment OPERATIONAL 2.0
## rating_count geometry
## 1 1096 POINT (-84.50847 33.45259)
## 2 69 POINT (-84.17159 33.43241)
## 3 1165 POINT (-84.09618 33.42719)
## 4 739 POINT (-84.42877 33.50894)
## 5 3180 POINT (-84.2052 33.46362)
## 6 1356 POINT (-84.22713 33.51114)
sum(is.na(hos_poi))
## [1] 0
Hospital POIs are cleaned datasets.
In the assignment definition, equity recognizes that each group of people has different circumstances/demands and allocates resources/opportunities accordingly. Therefore, equity analysis in access often includes identifying different spatial distribution of each resource. Also, it includes observing correlation between socio-demographic background with certain measure. Since this is healthcare access, I will include socioeconomic measures such as household income, ethnicity, and educational attainment, and transportation circumstances such as vehicle ownership to see how it affects the access to healthcare system.
# vars <- load_variables(2023, "acs5")
census_api_key(Sys.getenv("CENSUS_API"))
## To install your API key for use in future sessions, run this function with `install = TRUE`.
tract_vars <- suppressMessages(
get_acs(geography = "tract",
state = "GA",
county = c("Cherokee", "Clayton", "Cobb", "Dekalb", "Douglas", "Fayette", "Forsyth", "Fulton", "Gwinnett", "Henry", "Rockdale"),
variables = c(pop = "B01003_001E",
edu_associate = "B06009_004E",
edu_bachelor = "B06009_005E",
edu_graduate = "B06009_006E",
edu_total = "B06009_001E",
hhincome = "B19013_001E",
median_age = "B01002_001E",
hh = "B11001_001E",
non_hispanic_white = "B03002_003E",
race_ethnic_total = "B03002_001E",
total = "B08201_001E", # total households
v0 = "B08201_002E", # no vehicle available
v1 = "B08201_003E",
v2 = "B08201_004E",
v3 = "B08201_005E",
v4p = "B08201_006E" ),
year = 2023,
survey = "acs5",
geometry = TRUE,
output = "wide"))
## | | | 0% | |= | 1% | |= | 2% | |== | 3% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |================ | 22% | |================ | 23% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 37% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
# Creating measures from raw data
tract_vars_a <- tract_vars %>%
mutate(
high_edu_pct = (edu_associate + edu_bachelor + edu_graduate) / edu_total,
minority_pct = (race_ethnic_total - non_hispanic_white) / race_ethnic_total,
vehicles_per_hh = (0*v0 + 1*v1 + 2*v2 + 3.5*v3 + 4.5*v4p) / total,
)
Check for missing values and handle them appropriately. Create new variables if needed (e.g., proportions, densities). Join the POI data with the ACS data. Ensure coordinate reference systems (CRS) are appropriate for distance calculations.
clean_vars_a <- tract_vars_a[, c("GEOID","pop", "hhincome", "median_age", "high_edu_pct", "minority_pct", "vehicles_per_hh", "geometry")]
# Check NA rows.
na_rows_any_column <- clean_vars_a[rowSums(is.na(clean_vars_a)) > 0, ]
# Usually household income are NA. NA census tracts are usually non-residential area. So we will drop these rows.
print(na_rows_any_column)
## Simple feature collection with 16 features and 7 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -84.4925 ymin: 33.46943 xmax: -84.23339 ymax: 33.80567
## Geodetic CRS: NAD83
## First 10 features:
## GEOID pop hhincome median_age high_edu_pct minority_pct
## 17 13121980000 0 NA NA NaN NaN
## 18 13121003700 330 NA 68.9 0.5000000 0.9424242
## 58 13121008203 2586 NA 46.3 0.5662269 0.9682908
## 108 13121006801 1800 NA 41.9 0.2100508 0.7972222
## 207 13121012000 3615 NA 36.3 0.4843750 0.8575380
## 244 13089023115 1765 NA 33.0 0.4044630 0.8532578
## 297 13121003800 3856 NA 19.9 0.6939891 0.7562241
## 298 13089980000 0 NA NA NaN NaN
## 308 13121008400 2826 NA 49.1 0.7903071 0.9104742
## 311 13121008302 1952 NA 39.5 0.4641385 0.9697746
## vehicles_per_hh geometry
## 17 NaN MULTIPOLYGON (((-84.4403 33...
## 18 0.6351931 MULTIPOLYGON (((-84.41132 3...
## 58 1.3721498 MULTIPOLYGON (((-84.4925 33...
## 108 NaN MULTIPOLYGON (((-84.37364 3...
## 207 0.9331476 MULTIPOLYGON (((-84.4014 33...
## 244 NaN MULTIPOLYGON (((-84.24931 3...
## 297 1.0549451 MULTIPOLYGON (((-84.41753 3...
## 298 NaN MULTIPOLYGON (((-84.30973 3...
## 308 0.9570978 MULTIPOLYGON (((-84.44427 3...
## 311 0.9814815 MULTIPOLYGON (((-84.46315 3...
mapview(na_rows_any_column)
# Dropping rows with NA values.
clean_vars <- clean_vars_a %>% drop_na()
# Doing spatial join with hospital
clean_vars %<>% st_transform(26916)
hos_poi %<>% st_transform(26916)
Identify suitable metric(s) and justify your choices. Example metrics include: The number of hospitals within X miles. The presence of a hospital within X miles. The distance to the nearest hospital. The average distance to the X nearest hospitals. Compute your chosen measure(s).
I identify the euclidean distance to hospitals from the central point in each census tract as the measure of healthcare access.
centroids <- st_centroid(clean_vars)
## Warning: st_centroid assumes attributes are constant over geometries
# compute euclidean distance (meter)
dist_matrix <- st_distance(centroids, hos_poi)
# Get minimum distance per centroid
min_dist <- apply(dist_matrix, 1, min)
# Add to your data frame
df_final <- clean_vars %>%
mutate(dist_to_nearest_hos = as.numeric(min_dist))
sapply(df_final, function(x) sum(is.na(x)))
## GEOID pop hhincome median_age
## 0 0 0 0
## high_edu_pct minority_pct vehicles_per_hh geometry
## 0 0 0 0
## dist_to_nearest_hos
## 0
poi_joined <- st_join(hos_poi, clean_vars, join = st_within, left = FALSE)
# Count POIs within boundary
poi_count <- poi_joined %>%
st_drop_geometry() %>%
group_by(GEOID) %>%
summarise(poi_count = n())
hos_summary <- df_final %>%
left_join(poi_count, by = "GEOID") %>%
mutate(poi_count = replace_na(poi_count, 0))
# Graph 1: Graph histogram of household income
hist(df_final$hhincome, breaks = 50)
# Graph 2: Basic scatterplot
ggplot(df_final, aes(x = hhincome, y = dist_to_nearest_hos)) +
geom_point(alpha = 0.6, size = 2, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "darkred", linewidth = 1) +
labs(
title = "Relationship between Household Income and Distance to Nearest Hospital",
x = "Median Household Income (USD)",
y = "Distance to Nearest Hospital (meters)"
) +
theme_minimal(base_size = 14)
## `geom_smooth()` using formula = 'y ~ x'
# Map 1: Visualize the number of POIs (hospital)
tm_shape(hos_summary) +
tm_polygons("poi_count", palette = "viridis", title = "Number of POIs") +
tm_layout(frame = FALSE)
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'Registered S3 method overwritten by 'jsonify':
## method from
## print.json jsonlite
# Map 2: Visualize the distance to POIs (hospital)
tm_shape(df_final) +
tm_polygons("dist_to_nearest_hos", palette = "YlGn", title = "Distance to POIs") +
tm_layout(frame = FALSE)
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "YlGn" is named
## "brewer.yl_gn"Multiple palettes called "yl_gn" found: "brewer.yl_gn", "matplotlib.yl_gn". The first one, "brewer.yl_gn", is returned.
# Basic linear regression
model <- lm(dist_to_nearest_hos ~ hhincome + high_edu_pct + minority_pct + vehicles_per_hh, data = df_final)
# View summary
summary(model)
##
## Call:
## lm(formula = dist_to_nearest_hos ~ hhincome + high_edu_pct +
## minority_pct + vehicles_per_hh, data = df_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5223.5 -1706.2 -488.4 1072.4 14582.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.352e+02 7.384e+02 0.860 0.3898
## hhincome -1.212e-02 3.024e-03 -4.007 6.53e-05 ***
## high_edu_pct -1.159e+03 6.802e+02 -1.704 0.0886 .
## minority_pct -5.504e+02 4.058e+02 -1.356 0.1753
## vehicles_per_hh 3.014e+03 2.026e+02 14.876 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2602 on 1227 degrees of freedom
## Multiple R-squared: 0.193, Adjusted R-squared: 0.1904
## F-statistic: 73.38 on 4 and 1227 DF, p-value: < 2.2e-16
Household income and the number of vehicle per household is statistically significant variables to dependent variable, the nearest distance to nearest hospital. For every $1 increase in median household income, distance to hospital decreases by about 0.012m. So for ever $10,000 higher income, hospitals are ~120m closer. This is statistically significant (p < 0.001).
Areas with more vehicles per household are farther from hospitals. This is significant positive relationships (p < 0.001).
Seeing from R square value, the model explains ~19% of the variation in hospital distance.
Overall, the model supports a spatial equity narrative: wealthier, denser, or more urban areas with fewer cars tend to have better access to hospitals.