Read in Zipcode and Covid data

zips <- st_read(file.path(data_path,'session_11_output\\nys.gpkg'),layer='zipcodes') %>%  
  distinct(ZIPCODE, .keep_all = TRUE)                                                       # filter out duplicate rows
## Reading layer `zipcodes' from data source 
##   `C:\Users\roseh\Desktop\7852\Session_11\R-Spatial_II_Lab\R-Spatial_II_Lab\session_11_output\nys.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
covid <- read_csv(file.path(data_path,'tests-by-zcta_2021_04_23.csv'))
## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Read in stores and health facility data

stores <- st_read(file.path(data_path,'session_11_output\\nys.gpkg'),layer='retail_stores') %>%   
  st_transform(2263) %>%
  filter(Establishment.Type == 'JAC')
## Reading layer `retail_stores' from data source 
##   `C:\Users\roseh\Desktop\7852\Session_11\R-Spatial_II_Lab\R-Spatial_II_Lab\session_11_output\nys.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 23972 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -79.75953 ymin: 40.50782 xmax: -71.93873 ymax: 44.99484
## Geodetic CRS:  WGS 84
health <- (st_read(file.path(data_path,'session_11_output\\nys.gpkg'),layer='health_facilities')) %>%   
  st_transform(2263) 
## Reading layer `health_facilities' from data source 
##   `C:\Users\roseh\Desktop\7852\Session_11\R-Spatial_II_Lab\R-Spatial_II_Lab\session_11_output\nys.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 3794 features and 34 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -79.6299 ymin: 40.51677 xmax: -73.00694 ymax: 44.97849
## Geodetic CRS:  WGS 84

Read in ACS data

acs <- readLines(file.path(data_path,'ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv')) %>%
  extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, 
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  mutate(censusCode = str_sub(GEO_ID, -9,-1));

Read in Census data

census <- st_read(file.path(data_path,'\\2010 Census Tracts\\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'), stringsAsFactors = FALSE) %>%
  st_transform(2263)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\roseh\Desktop\7852\Session_11\R-Spatial_II_Lab\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
census %<>% mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

Task 1: Join covid data to Zipcodes

covid_zips <- merge(zips, covid, by.x ='ZIPCODE', by.y = 'MODIFIED_ZCTA')

Task 2: Aggregate food stores to zipcodes

stores_zips <-  st_join(zips,stores,st_contains)

num_stores_per_zip <- stores_zips %>%
  group_by(ZIPCODE) %>%
  summarize(num_stores = n())

Task 3: Aggregate health facilities to zipcodes

facs_zips <- st_join(zips,health,st_contains)

num_facs <- facs_zips %>%
  group_by(ZIPCODE) %>%
  summarize(num_facs = n())

Task 4: Join ACS data to census tracts

acs_census <- merge(census, acs, by.x ='tractFIPS', by.y = 'censusCode')

Task 5: Aggregate ACS/census tract data to zipcodes

census_zips <- st_join(zips,acs_census,st_contains)