library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
yelp <- st_read("https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson")
## Reading layer `yelp_hospital' from data source
## `https://raw.githubusercontent.com/ujhwang/urban-analytics-2024/main/Assignment/mini_3/yelp_hospital.geojson'
## using driver `GeoJSON'
## Simple feature collection with 129 features and 23 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -84.56242 ymin: 33.60009 xmax: -84.08677 ymax: 34.0701
## Geodetic CRS: WGS 84
library(skimr)
skim(yelp)
## Warning: Couldn't find skimmers for class: sfc_POINT, sfc; No user-defined
## `sfl` provided. Falling back to `character`.
| Name | yelp |
| Number of rows | 129 |
| Number of columns | 24 |
| _______________________ | |
| Column type frequency: | |
| character | 18 |
| logical | 1 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| id | 0 | 1.00 | 22 | 22 | 0 | 129 | 0 |
| alias | 0 | 1.00 | 17 | 73 | 0 | 129 | 0 |
| name | 0 | 1.00 | 8 | 64 | 0 | 115 | 0 |
| image_url | 0 | 1.00 | 0 | 68 | 86 | 44 | 0 |
| url | 0 | 1.00 | 174 | 230 | 0 | 129 | 0 |
| categories | 0 | 1.00 | 9 | 62 | 0 | 21 | 0 |
| transactions | 0 | 1.00 | 0 | 0 | 129 | 1 | 0 |
| phone | 0 | 1.00 | 0 | 12 | 5 | 107 | 0 |
| display_phone | 0 | 1.00 | 0 | 14 | 5 | 107 | 0 |
| location.address1 | 2 | 0.98 | 0 | 34 | 14 | 87 | 0 |
| location.address2 | 13 | 0.90 | 0 | 7 | 100 | 15 | 0 |
| location.address3 | 23 | 0.82 | 0 | 52 | 104 | 3 | 0 |
| location.city | 0 | 1.00 | 6 | 14 | 0 | 13 | 0 |
| location.zip_code | 0 | 1.00 | 0 | 5 | 1 | 33 | 0 |
| location.country | 0 | 1.00 | 2 | 2 | 0 | 1 | 0 |
| location.state | 0 | 1.00 | 2 | 2 | 0 | 1 | 0 |
| location.display_address | 0 | 1.00 | 17 | 93 | 0 | 93 | 0 |
| geometry | 0 | 1.00 | 21 | 38 | 0 | 103 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| is_closed | 0 | 1 | 0 | FAL: 129 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| review_count | 0 | 1 | 12.90 | 42.98 | 0.00 | 0.00 | 0.00 | 2.00 | 319.00 | ▇▁▁▁▁ |
| rating | 0 | 1 | 1.05 | 1.47 | 0.00 | 0.00 | 0.00 | 2.00 | 5.00 | ▇▁▂▁▁ |
| distance | 0 | 1 | 1188.10 | 735.72 | 204.09 | 564.81 | 1199.68 | 1647.70 | 4098.37 | ▇▇▂▁▁ |
| coordinates.latitude | 0 | 1 | 33.86 | 0.12 | 33.60 | 33.77 | 33.81 | 33.92 | 34.07 | ▁▇▅▅▅ |
| coordinates.longitude | 0 | 1 | -84.34 | 0.06 | -84.56 | -84.39 | -84.35 | -84.32 | -84.09 | ▁▆▇▁▁ |
I chose ‘housing_price’, ‘median_income’, ‘white population’,‘black population’
First, I believe that considering the economic status is crucial for equity analysis. I selected key variables such as housing values, median income.
Secondly, in a previous class, I observed a clear boundary separating Black and White populations. This led me to consider racial distribution as a significant factor in analyzing regional equity.
tract <- suppressMessages(
get_acs(geography = "tract",
state = "GA",
county = c("Fulton", "Dekalb"),
variables = c(
total = "B02001_001",
housing_price = "B25077_001",
med_income = "B19013_001",
poverty = "B17001_002",
white = "B02001_002",
black = "B02001_003",
no_insurance = "B27010_005"
),
year = 2021,
survey = "acs5",
geometry = TRUE,
output = "wide"))%>%
select(GEOID, NAME,
total = totalE,
housing_price = housing_priceE,
med_income = med_incomeE,
poverty = povertyE,
white = whiteE,
black = blackE,
no_insurance = no_insuranceE )
## | | | 0% | |= | 1% | |== | 2% | |== | 3% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 7% | |====== | 8% | |====== | 9% | |========= | 13% | |========== | 14% | |=========== | 15% | |============ | 17% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 49% | |=================================== | 51% | |==================================== | 51% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |================================================= | 70% | |=================================================== | 72% | |==================================================== | 74% | |===================================================== | 75% | |======================================================= | 78% | |======================================================== | 80% | |========================================================= | 82% | |========================================================== | 83% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
tract <- tract %>% drop_na()
tract <- tract %>%
mutate(black_rate = black/total,
white_rate = white/total)
tract <- tract %>% st_transform(crs=4326)
yelp <- yelp %>% st_transform(crs=4326)
tmap_mode("plot")
## tmap mode set to plotting
blk_rate <- tm_shape(tract) + tm_polygons(col = "black_rate", alpha = 0.5, style = "pretty") + tm_layout(title = "Black Rate")
wht_rate <- tm_shape(tract) + tm_polygons(col = "white_rate", alpha = 0.5, style = "pretty") + tm_layout(title = "White Rate")
hou_pric <- tm_shape(tract) + tm_polygons(col = "housing_price", alpha = 0.5, style = "pretty") + tm_layout(title = "Housing Price")
med_inc <- tm_shape(tract) + tm_polygons(col = "med_income", alpha = 0.5, style = "pretty") + tm_layout(title = "Med Income")
tmap_arrange(blk_rate, wht_rate, hou_pric, med_inc)
## Some legend labels were too wide. These labels have been resized to 0.64, 0.59, 0.59. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
##### The proportion of area covered by hospitals accessible within a
10-minute drive.
tract_d <- tract %>%
st_drop_geometry()
tract_c <- st_as_sf(tract, coords = c("longitude", "latitude"), crs = 4326) # 4326은 WGS 84
tract_center <- st_centroid(tract_c)
## Warning: st_centroid assumes attributes are constant over geometries
buffer_10 <- st_buffer(yelp, dist = 1000) # 10 minutes by car
buffer_20 <- st_buffer(yelp, dist = 2000) # 20 minutes by car
buffer_30 <- st_buffer(yelp, dist = 3000) # 30 minutes by car
tract_hos <- tract_c %>%
mutate(
buf_cnt_10 = sapply(st_intersects(tract_c, buffer_10), length),
buf_cnt_20 = sapply(st_intersects(tract_c, buffer_20), length),
buf_cnt_30 = sapply(st_intersects(tract_c, buffer_30), length)
)
tmap_mode("view")
## tmap mode set to interactive viewing
buf_cnt_10 <- tm_shape(tract_hos) + tm_polygons(col = "buf_cnt_10", style = "pretty", border.col = NA)+ tm_layout(title = "10-Minute Buffer Coverage")
buf_cnt_20 <- tm_shape(tract_hos) + tm_polygons(col = "buf_cnt_20", style = "pretty", border.col = NA) + tm_layout(title = "20-Minute Buffer Coverage")
buf_cnt_30 <- tm_shape(tract_hos) + tm_polygons(col = "buf_cnt_30", style = "pretty", border.col = NA) + tm_layout(title = "30-Minute Buffer Coverage")
tmap_arrange(buf_cnt_10,buf_cnt_20,buf_cnt_30)
tract_cent <- st_centroid(tract)
## Warning: st_centroid assumes attributes are constant over geometries
dist <- st_distance(tract_cent, yelp) # yelp는 병원 데이터
min_dist <- apply(dist, 1, min)
tract_hos$dist_to_hos <- as.numeric(min_dist)
black <- ggplot(tract_hos, aes(x = black_rate, y = dist_to_hos, color = total)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(x = "Ratio of Black People",
y = "Distance to Hospital",
title = "Distance to Hospitals (Black Population) ",
color = "Total Residents") +
theme_minimal() +
scale_color_viridis_c(option = "magma", direction = -1)
house <- ggplot(tract_hos, aes(x = housing_price, y = dist_to_hos, color = total)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(x = "Housing Price",
y = "Distance to Hospital",
title = "Distance to Hospitals (Housing Price) ",
color = "Total Residents") +
theme_minimal() +
scale_color_viridis_c(option = "magma", direction = -1)
med_income <- ggplot(tract_hos, aes(x = med_income, y = dist_to_hos, color = total)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(x = "Median Income",
y = "Distance to Hospital",
title = "Distance to Hospitals (Median Income) ",
color = "Total Residents") +
theme_minimal() +
scale_color_viridis_c(option = "magma", direction = -1)
white <- ggplot(tract_hos, aes(x = white_rate, y = dist_to_hos, color = total)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(x = "White People",
y = "Distance to Hospital",
title = "Distance to Hospitals (White Population) ",
color = "Total Residents") +
theme_minimal() +
scale_color_viridis_c(option = "magma", direction = -1)
grid.arrange(black, white)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
grid.arrange(house, med_income)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
library(dplyr)
cor_data <- tract_hos %>%
st_drop_geometry() %>%
select(dist_to_hos, housing_price, med_income, black_rate, white_rate) %>%
mutate(across(everything(), ~ as.numeric(as.character(.)))) %>%
drop_na() # 결측치가 있는 행 제거
cor_matrix <- cor(cor_data, use = "complete.obs")
print(cor_matrix)
## dist_to_hos housing_price med_income black_rate white_rate
## dist_to_hos 1.00000000 -0.2331008 -0.06552576 0.3894317 -0.3419459
## housing_price -0.23310079 1.0000000 0.77855571 -0.7336738 0.7870004
## med_income -0.06552576 0.7785557 1.00000000 -0.6662809 0.7201066
## black_rate 0.38943168 -0.7336738 -0.66628087 1.0000000 -0.9375622
## white_rate -0.34194593 0.7870004 0.72010657 -0.9375622 1.0000000
Most hospitals are concentrated in the central part of the area, with a few located in the northern regions. As it moves further south, housing prices and median income tend to be lower, while the proportion of the Black population is higher, and the distance to the nearest hospital also increases. The graphs similarly show a trend where a higher proportion of Black residents correlates with a greater distance to hospitals, although the difference is minimal. And I checked the correlation between the distance to hospitals and other variables. The Black population rate has the strongest positive correlation, indicating that areas with higher proportions of Black residents tend to have greater distances to hospitals. In contrast, both the White population rate and housing prices show a negative correlation with hospital distance, suggesting that areas with higher White populations and higher housing prices are closer to hospitals. However, for median income, there isn’t a clear correlation with hospital distance, making it difficult to draw a definitive conclusion.There are some differences in distribution, but overall, I think the spatial distribution appears to be relatively even.