Lab 2

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Load Packages

library(sf)

## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(janitor)

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(mapview)

Load Spatial Data from Lab 1

The three spatial datasets created in Lab 1 are loaded for reuse.

load("section_07/lab1_spatial_objects.RData")

nyc_zip <- nyc_zip |>
  mutate(zip = as.character(ZIPCODE))

Join COVID-19 Data to ZIP Code Areas

COVID-19 testing and confirmed case data are joined to the NYC ZIP code polygons.

covid_raw <- read_csv(
  "section_08/tests-by-zcta_2020_04_19.csv",
  show_col_types = F
) |>
  clean_names() |>
  filter(!is.na(modzcta)) |>
  mutate(zip = as.character(as.integer(modzcta)))

zip_covid <- nyc_zip |>
  left_join(covid_raw, by = "zip")

names(zip_covid)

##  [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
##  [4] "POPULATION"        "AREA"              "STATE"            
##  [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
## [10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
## [13] "zip"               "modzcta"           "positive"         
## [16] "total"             "zcta_cum_perc_pos" "geometry"

Aggregate Food Retail Stores to ZIP Code Areas

Retail food store locations are spatially joined to ZIP code areas and aggregated.

food_all <- st_read("section_08/nycFoodStore.shp", quiet = T) |>
  st_transform(4326) |>
  clean_names()

names(food_all)

##  [1] "i_cnty"   "lcns_nm"  "oprtn_t"  "estbl_t"  "entty_n"  "dba_nam" 
##  [7] "strt_nmb" "stret_nm" "add_l_2"  "add_l_3"  "city"     "state"   
## [13] "zip_cod"  "sqr_ftg"  "locatin"  "coords"   "geometry"

table(food_all$estbl_t)

## 
##      A    JAB   JABC  JABCD JABCDH JABCDK  JABCH JABCHK  JABCK   JABH  JABHK 
##   2289      5    224      3      1      1     54     11     18     22      6 
##   JABK    JAC   JACD  JACDE  JACDH JACDHK  JACDK   JACG   JACH  JACHK   JACK 
##      5   8457     45      2      3     13     13      1     36     12     46 
##   JACZ    JAD  JADHK   JADK   JADO   JAHK    JAK    JAZ    JDA    JKA 
##      1     11      2     13      1      1      1      1      1      1

food_selected <- food_all |>
  filter(str_detect(estbl_t, "A|B"))

food_join <- st_join(zip_covid, food_selected, left = T)

food_count_zip <- food_join |>
  st_drop_geometry() |>
  count(zip, name = "n_food_stores")

zip_covid_food <- zip_covid |>
  left_join(food_count_zip, by = "zip") |>
  mutate(n_food_stores = replace_na(n_food_stores, 0))

Aggregate Health Facilities to ZIP Code Areas

Health facility locations are converted to spatial points and aggregated to ZIP code areas.

health_all <- read_csv(
  "section_08/NYS_Health_Facility.csv",
  show_col_types = F
) |>
  clean_names() |>
  filter(!is.na(facility_longitude), !is.na(facility_latitude)) |>
  mutate(
    longitude = as.numeric(facility_longitude),
    latitude = as.numeric(facility_latitude)
  ) |>
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326, remove = F)

names(health_all)

##  [1] "facility_id"                  "facility_name"               
##  [3] "short_description"            "description"                 
##  [5] "facility_open_date"           "facility_address_1"          
##  [7] "facility_address_2"           "facility_city"               
##  [9] "facility_state"               "facility_zip_code"           
## [11] "facility_phone_number"        "facility_fax_number"         
## [13] "facility_website"             "facility_county_code"        
## [15] "facility_county"              "regional_office_id"          
## [17] "regional_office"              "main_site_name"              
## [19] "main_site_facility_id"        "operating_certificate_number"
## [21] "operator_name"                "operator_address_1"          
## [23] "operator_address_2"           "operator_city"               
## [25] "operator_state"               "operator_zip_code"           
## [27] "cooperator_name"              "cooperator_address"          
## [29] "cooperator_address_2"         "cooperator_city"             
## [31] "cooperator_state"             "cooperator_zip_code"         
## [33] "ownership_type"               "facility_latitude"           
## [35] "facility_longitude"           "facility_location"           
## [37] "longitude"                    "latitude"                    
## [39] "geometry"

table(health_all$description)

## 
##                               Adult Day Health Care Program - Offsite 
##                                                                    38 
##                                          Certified Home Health Agency 
##                                                                   114 
##                                       Diagnostic and Treatment Center 
##                                                                   511 
##                      Diagnostic and Treatment Center Extension Clinic 
##                                                                   677 
##                                                               Hospice 
##                                                                    42 
##                                                              Hospital 
##                                                                   226 
##                                             Hospital Extension Clinic 
##                                                                  1142 
##                                    Long Term Home Health Care Program 
##                                                                    43 
##               Mobile Diagnostic and Treatment Center Extension Clinic 
##                                                                    28 
##                                      Mobile Hospital Extension Clinic 
##                                                                    13 
##                      Primary Care Hospital - Critical Access Hospital 
##                                                                    20 
##     Primary Care Hospital - Critical Access Hospital Extension Clinic 
##                                                                    32 
##                                Residential Health Care Facility - SNF 
##                                                                   620 
##         School Based Diagnostic and Treatment Center Extension Clinic 
##                                                                   166 
##                                School Based Hospital Extension Clinic 
##                                                                   171 
## School Based Primary Care Hospital - Critical Access Extension Clinic 
##                                                                     5

health_selected <- health_all |>
  filter(str_detect(description, "Residential Health Care Facility|SNF|Nursing"))

health_join <- st_join(zip_covid_food, health_selected, left = T)

health_count_zip <- health_join |>
  st_drop_geometry() |>
  count(zip, name = "n_health_facilities")

zip_covid_food_health <- zip_covid_food |>
  left_join(health_count_zip, by = "zip") |>
  mutate(n_health_facilities = replace_na(n_health_facilities, 0))

Join ACS Data to Census Tracts

Census tract geometries are joined with ACS demographic data.

tracts <- st_read(
  "section_08/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp",
  quiet = T
) |>
  st_transform(4326) |>
  clean_names()

acs_raw <- read_csv(
  "section_08/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv",
  show_col_types = F
)

acs <- acs_raw[-1, ] |>
  clean_names() |>
  mutate(geoid = str_remove(geo_id, "^1400000US"))

county_lookup <- tibble(
  boro_code = c("1", "2", "3", "4", "5"),
  county_fips = c("061", "005", "047", "081", "085")
)

tracts <- tracts |>
  mutate(boro_code = as.character(boro_code)) |>
  left_join(county_lookup, by = "boro_code") |>
  mutate(geoid = paste0("36", county_fips, ct2010))

tract_acs <- tracts |>
  left_join(acs, by = "geoid")

Aggregate ACS Data to ZIP Code Areas

Demographic variables are aggregated from census tracts to ZIP code areas.

tract_acs <- tract_acs |>
  mutate(
    total_pop = as.numeric(dp05_0001e),
    pop_65_plus = as.numeric(dp05_0024e),
    white_pop = as.numeric(dp05_0037e),
    black_pop = as.numeric(dp05_0038e),
    asian_pop = as.numeric(dp05_0044e),
    hispanic_pop = as.numeric(dp05_0071e)
  )

acs_zip <- st_join(tract_acs, nyc_zip, left = F) |>
  st_drop_geometry() |>
  group_by(ZIPCODE) |>
  summarise(
    total_pop = sum(total_pop, na.rm = T),
    pop_65_plus = sum(pop_65_plus, na.rm = T),
    white_pop = sum(white_pop, na.rm = T),
    black_pop = sum(black_pop, na.rm = T),
    asian_pop = sum(asian_pop, na.rm = T),
    hispanic_pop = sum(hispanic_pop, na.rm = T),
    .groups = "drop"
  ) |>
  mutate(zip = as.character(ZIPCODE))

Create Final ZIP-Level Dataset

All datasets are combined into a final spatial dataset at the ZIP code level.

zip_final <- zip_covid_food_health |>
  left_join(select(acs_zip, -ZIPCODE), by = "zip")

names(zip_final)

##  [1] "ZIPCODE"             "BLDGZIP"             "PO_NAME"            
##  [4] "POPULATION"          "AREA"                "STATE"              
##  [7] "COUNTY"              "ST_FIPS"             "CTY_FIPS"           
## [10] "URL"                 "SHAPE_AREA"          "SHAPE_LEN"          
## [13] "zip"                 "modzcta"             "positive"           
## [16] "total"               "zcta_cum_perc_pos"   "n_food_stores"      
## [19] "n_health_facilities" "total_pop"           "pop_65_plus"        
## [22] "white_pop"           "black_pop"           "asian_pop"          
## [25] "hispanic_pop"        "geometry"

Verify Results with Mapview

mapview(zip_final, zcol = "positive", layer.name = "COVID Positive Cases") +
  mapview(st_centroid(zip_final), zcol = "n_food_stores", layer.name = "Food Stores")

## Warning: st_centroid assumes attributes are constant over geometries

Save Final Output

save(zip_final, file = "section_08/lab2_final_zip_dataset.RData")

st_write(
  zip_final,
  "section_08/lab2_final_zip_dataset.gpkg",
  layer = "zip_final",
  delete_layer = T,
  quiet = T
)