This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(mapview)
The three spatial datasets created in Lab 1 are loaded for reuse.
load("section_07/lab1_spatial_objects.RData")
nyc_zip <- nyc_zip |>
mutate(zip = as.character(ZIPCODE))
COVID-19 testing and confirmed case data are joined to the NYC ZIP code polygons.
covid_raw <- read_csv(
"section_08/tests-by-zcta_2020_04_19.csv",
show_col_types = F
) |>
clean_names() |>
filter(!is.na(modzcta)) |>
mutate(zip = as.character(as.integer(modzcta)))
zip_covid <- nyc_zip |>
left_join(covid_raw, by = "zip")
names(zip_covid)
## [1] "ZIPCODE" "BLDGZIP" "PO_NAME"
## [4] "POPULATION" "AREA" "STATE"
## [7] "COUNTY" "ST_FIPS" "CTY_FIPS"
## [10] "URL" "SHAPE_AREA" "SHAPE_LEN"
## [13] "zip" "modzcta" "positive"
## [16] "total" "zcta_cum_perc_pos" "geometry"
Retail food store locations are spatially joined to ZIP code areas and aggregated.
food_all <- st_read("section_08/nycFoodStore.shp", quiet = T) |>
st_transform(4326) |>
clean_names()
names(food_all)
## [1] "i_cnty" "lcns_nm" "oprtn_t" "estbl_t" "entty_n" "dba_nam"
## [7] "strt_nmb" "stret_nm" "add_l_2" "add_l_3" "city" "state"
## [13] "zip_cod" "sqr_ftg" "locatin" "coords" "geometry"
table(food_all$estbl_t)
##
## A JAB JABC JABCD JABCDH JABCDK JABCH JABCHK JABCK JABH JABHK
## 2289 5 224 3 1 1 54 11 18 22 6
## JABK JAC JACD JACDE JACDH JACDHK JACDK JACG JACH JACHK JACK
## 5 8457 45 2 3 13 13 1 36 12 46
## JACZ JAD JADHK JADK JADO JAHK JAK JAZ JDA JKA
## 1 11 2 13 1 1 1 1 1 1
food_selected <- food_all |>
filter(str_detect(estbl_t, "A|B"))
food_join <- st_join(zip_covid, food_selected, left = T)
food_count_zip <- food_join |>
st_drop_geometry() |>
count(zip, name = "n_food_stores")
zip_covid_food <- zip_covid |>
left_join(food_count_zip, by = "zip") |>
mutate(n_food_stores = replace_na(n_food_stores, 0))
Health facility locations are converted to spatial points and aggregated to ZIP code areas.
health_all <- read_csv(
"section_08/NYS_Health_Facility.csv",
show_col_types = F
) |>
clean_names() |>
filter(!is.na(facility_longitude), !is.na(facility_latitude)) |>
mutate(
longitude = as.numeric(facility_longitude),
latitude = as.numeric(facility_latitude)
) |>
st_as_sf(coords = c("longitude", "latitude"), crs = 4326, remove = F)
names(health_all)
## [1] "facility_id" "facility_name"
## [3] "short_description" "description"
## [5] "facility_open_date" "facility_address_1"
## [7] "facility_address_2" "facility_city"
## [9] "facility_state" "facility_zip_code"
## [11] "facility_phone_number" "facility_fax_number"
## [13] "facility_website" "facility_county_code"
## [15] "facility_county" "regional_office_id"
## [17] "regional_office" "main_site_name"
## [19] "main_site_facility_id" "operating_certificate_number"
## [21] "operator_name" "operator_address_1"
## [23] "operator_address_2" "operator_city"
## [25] "operator_state" "operator_zip_code"
## [27] "cooperator_name" "cooperator_address"
## [29] "cooperator_address_2" "cooperator_city"
## [31] "cooperator_state" "cooperator_zip_code"
## [33] "ownership_type" "facility_latitude"
## [35] "facility_longitude" "facility_location"
## [37] "longitude" "latitude"
## [39] "geometry"
table(health_all$description)
##
## Adult Day Health Care Program - Offsite
## 38
## Certified Home Health Agency
## 114
## Diagnostic and Treatment Center
## 511
## Diagnostic and Treatment Center Extension Clinic
## 677
## Hospice
## 42
## Hospital
## 226
## Hospital Extension Clinic
## 1142
## Long Term Home Health Care Program
## 43
## Mobile Diagnostic and Treatment Center Extension Clinic
## 28
## Mobile Hospital Extension Clinic
## 13
## Primary Care Hospital - Critical Access Hospital
## 20
## Primary Care Hospital - Critical Access Hospital Extension Clinic
## 32
## Residential Health Care Facility - SNF
## 620
## School Based Diagnostic and Treatment Center Extension Clinic
## 166
## School Based Hospital Extension Clinic
## 171
## School Based Primary Care Hospital - Critical Access Extension Clinic
## 5
health_selected <- health_all |>
filter(str_detect(description, "Residential Health Care Facility|SNF|Nursing"))
health_join <- st_join(zip_covid_food, health_selected, left = T)
health_count_zip <- health_join |>
st_drop_geometry() |>
count(zip, name = "n_health_facilities")
zip_covid_food_health <- zip_covid_food |>
left_join(health_count_zip, by = "zip") |>
mutate(n_health_facilities = replace_na(n_health_facilities, 0))
Census tract geometries are joined with ACS demographic data.
tracts <- st_read(
"section_08/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp",
quiet = T
) |>
st_transform(4326) |>
clean_names()
acs_raw <- read_csv(
"section_08/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv",
show_col_types = F
)
acs <- acs_raw[-1, ] |>
clean_names() |>
mutate(geoid = str_remove(geo_id, "^1400000US"))
county_lookup <- tibble(
boro_code = c("1", "2", "3", "4", "5"),
county_fips = c("061", "005", "047", "081", "085")
)
tracts <- tracts |>
mutate(boro_code = as.character(boro_code)) |>
left_join(county_lookup, by = "boro_code") |>
mutate(geoid = paste0("36", county_fips, ct2010))
tract_acs <- tracts |>
left_join(acs, by = "geoid")
Demographic variables are aggregated from census tracts to ZIP code areas.
tract_acs <- tract_acs |>
mutate(
total_pop = as.numeric(dp05_0001e),
pop_65_plus = as.numeric(dp05_0024e),
white_pop = as.numeric(dp05_0037e),
black_pop = as.numeric(dp05_0038e),
asian_pop = as.numeric(dp05_0044e),
hispanic_pop = as.numeric(dp05_0071e)
)
acs_zip <- st_join(tract_acs, nyc_zip, left = F) |>
st_drop_geometry() |>
group_by(ZIPCODE) |>
summarise(
total_pop = sum(total_pop, na.rm = T),
pop_65_plus = sum(pop_65_plus, na.rm = T),
white_pop = sum(white_pop, na.rm = T),
black_pop = sum(black_pop, na.rm = T),
asian_pop = sum(asian_pop, na.rm = T),
hispanic_pop = sum(hispanic_pop, na.rm = T),
.groups = "drop"
) |>
mutate(zip = as.character(ZIPCODE))
All datasets are combined into a final spatial dataset at the ZIP code level.
zip_final <- zip_covid_food_health |>
left_join(select(acs_zip, -ZIPCODE), by = "zip")
names(zip_final)
## [1] "ZIPCODE" "BLDGZIP" "PO_NAME"
## [4] "POPULATION" "AREA" "STATE"
## [7] "COUNTY" "ST_FIPS" "CTY_FIPS"
## [10] "URL" "SHAPE_AREA" "SHAPE_LEN"
## [13] "zip" "modzcta" "positive"
## [16] "total" "zcta_cum_perc_pos" "n_food_stores"
## [19] "n_health_facilities" "total_pop" "pop_65_plus"
## [22] "white_pop" "black_pop" "asian_pop"
## [25] "hispanic_pop" "geometry"
mapview(zip_final, zcol = "positive", layer.name = "COVID Positive Cases") +
mapview(st_centroid(zip_final), zcol = "n_food_stores", layer.name = "Food Stores")
## Warning: st_centroid assumes attributes are constant over geometries
save(zip_final, file = "section_08/lab2_final_zip_dataset.RData")
st_write(
zip_final,
"section_08/lab2_final_zip_dataset.gpkg",
layer = "zip_final",
delete_layer = T,
quiet = T
)