R Spatial Lab Assignment # 2

Task 1: Joining COVID-19 data to NYC zip code area data

# Loading the data from Spatial Assignment 1
load("spatial_data.RData")

nyc_covid_tests <- readr::read_csv("Data/R-Spatial_II_Lab/tests-by-zcta_2020_04_19.csv", 
                           show_col_types=FALSE,
                                  lazy=FALSE)

dplyr::left_join(nyc_zipcode_sf %>%
                   dplyr::mutate(ZIPCODE=as.numeric(as.character(ZIPCODE))),
                 nyc_covid_tests,
                 by = c('ZIPCODE' = 'MODZCTA')) -> nyc_sf_merged

mapview(nyc_sf_merged, zcol=c('POPULATION','Positive'))

Task 2: Aggregating NYC food retail store data to zip code data

nyc_food_stores <- st_read("Data/R-Spatial_II_Lab/nycFoodStore.shp")
## Reading layer `nycFoodStore' from data source 
##   `/Users/sonia/Documents/Spring 2026/R Vizualization/Module 3 - Shipeng Sun/Data/R-Spatial_II_Lab/nycFoodStore.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84
nyc_food_stores_2263 <- st_transform(nyc_food_stores, 2263)

zip_food <- nyc_food_stores_2263 %>% dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>%
  sf::st_join(nyc_sf_merged, ., join= st_contains) %>%
  group_by(ZIPCODE) %>%
  summarize(FoodStoreNum = n()) %>%
  st_drop_geometry()

nyc_sf_merged <- left_join(nyc_sf_merged,zip_food, by='ZIPCODE')

Task 3: Aggregating NYC health facilities data to zip code data

# Cleaning up data set

health_facilities <- nys_health_facilities_sf %>% 
  cbind(st_coordinates(.)) %>%
  dplyr::filter(`Short.Description` == 'NH') 

facility_points <- st_as_sf(health_facilities, 
                            coords=c('X','Y'),
                            crs=4326)

facility_points_2263 <- st_transform(facility_points, 2263)

zip_nh <- facility_points_2263 %>%
  sf::st_join(nyc_sf_merged, ., join= st_contains) %>%
  group_by(ZIPCODE) %>%
  summarize(NursingHomeNum = n()) %>%
  st_drop_geometry()

nyc_sf_merged <- left_join(nyc_sf_merged,zip_nh, by='ZIPCODE')

head(nyc_sf_merged)
## Simple feature collection with 6 features and 17 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 986490.1 ymin: 168910.5 xmax: 1043042 ymax: 189382.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
##   ZIPCODE BLDGZIP  PO_NAME POPULATION     AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1   11436       0  Jamaica      18681 22699295    NY Queens      36      081
## 2   11213       0 Brooklyn      62426 29631004    NY  Kings      36      047
## 3   11212       0 Brooklyn      83866 41972104    NY  Kings      36      047
## 4   11225       0 Brooklyn      56527 23698630    NY  Kings      36      047
## 5   11218       0 Brooklyn      72280 36868799    NY  Kings      36      047
## 6   11226       0 Brooklyn     106132 39408598    NY  Kings      36      047
##                    URL SHAPE_AREA SHAPE_LEN Positive Total zcta_cum.perc_pos
## 1 http://www.usps.com/          0         0      342   567             60.32
## 2 http://www.usps.com/          0         0      972  1653             58.80
## 3 http://www.usps.com/          0         0     1086  1793             60.57
## 4 http://www.usps.com/          0         0      814  1359             59.90
## 5 http://www.usps.com/          0         0     1163  1967             59.13
## 6 http://www.usps.com/          0         0     1336  2170             61.57
##   FoodStoreNum NursingHomeNum                       geometry
## 1            3              1 POLYGON ((1038098 188138.4,...
## 2          118              1 POLYGON ((1001614 186926.4,...
## 3          187              1 POLYGON ((1011174 183696.3,...
## 4          103              1 POLYGON ((995908.4 183617.6...
## 5          135              1 POLYGON ((991997.1 176307.5...
## 6          236              3 POLYGON ((994821.5 177865.7...

Task 4: Joining Census ACS data to NYC planning census tract data

nycCensus <- sf::st_read('Data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', 
                         stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/sonia/Documents/Spring 2026/R Vizualization/Module 3 - Shipeng Sun/Data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
nycCensus %<>% dplyr::mutate(
  cntyFIPS = case_when(
    boro_name == 'Bronx' ~ '005',
    boro_name == 'Brooklyn' ~ '047',
    boro_name == 'Manhattan' ~ '061',
    boro_name == 'Queens' ~ '081',
    boro_name == 'Staten Island' ~ '085'
    ),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

acsData <- readLines("Data/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))


popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')

popNYC <- sf::st_transform(popData, st_crs(nyc_sf_merged)) 

covidPopZipNYC <- sf::st_join(nyc_sf_merged, 
                              popNYC %>% sf::st_centroid(), 
                              join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total) %>% 
  summarise(totPop = sum(totPop),
            elderlyPop = sum(elderlyPop), 
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop))
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by ZIPCODE, PO_NAME, POPULATION, COUNTY,
##   Positive, and Total.
## ℹ Output is grouped by ZIPCODE, PO_NAME, POPULATION, COUNTY, and Positive.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive,
##   Total))` for per-operation grouping (`?dplyr::dplyr_by`) instead.

Task 5: Aggregating ACS Census Data to zip code area data

nyc_final_sf <- left_join(nyc_sf_merged,
  covidPopZipNYC %>% 
    st_drop_geometry() %>%
    select(ZIPCODE, totPop, elderlyPop, whitePop, blackPop, asianPop, hispanicPop),
  by = "ZIPCODE"
)
## Adding missing grouping variables: `PO_NAME`, `POPULATION`, `COUNTY`,
## `Positive`
## Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 15 of `x` matches multiple rows in `y`.
## ℹ Row 130 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
print(nyc_final_sf)
## Simple feature collection with 271 features and 27 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
##    ZIPCODE BLDGZIP PO_NAME.x POPULATION.x     AREA STATE COUNTY.x ST_FIPS
## 1    11436       0   Jamaica        18681 22699295    NY   Queens      36
## 2    11213       0  Brooklyn        62426 29631004    NY    Kings      36
## 3    11212       0  Brooklyn        83866 41972104    NY    Kings      36
## 4    11225       0  Brooklyn        56527 23698630    NY    Kings      36
## 5    11218       0  Brooklyn        72280 36868799    NY    Kings      36
## 6    11226       0  Brooklyn       106132 39408598    NY    Kings      36
## 7    11219       0  Brooklyn        92561 42002738    NY    Kings      36
## 8    11210       0  Brooklyn        67067 47887023    NY    Kings      36
## 9    11230       0  Brooklyn        80857 49926703    NY    Kings      36
## 10   11204       0  Brooklyn        77354 43555185    NY    Kings      36
##    CTY_FIPS                  URL SHAPE_AREA SHAPE_LEN Positive.x Total
## 1       081 http://www.usps.com/          0         0        342   567
## 2       047 http://www.usps.com/          0         0        972  1653
## 3       047 http://www.usps.com/          0         0       1086  1793
## 4       047 http://www.usps.com/          0         0        814  1359
## 5       047 http://www.usps.com/          0         0       1163  1967
## 6       047 http://www.usps.com/          0         0       1336  2170
## 7       047 http://www.usps.com/          0         0       1961  3018
## 8       047 http://www.usps.com/          0         0       1162  1931
## 9       047 http://www.usps.com/          0         0       1573  2615
## 10      047 http://www.usps.com/          0         0       1325  2234
##    zcta_cum.perc_pos FoodStoreNum NursingHomeNum PO_NAME.y POPULATION.y
## 1              60.32            3              1   Jamaica        18681
## 2              58.80          118              1  Brooklyn        62426
## 3              60.57          187              1  Brooklyn        83866
## 4              59.90          103              1  Brooklyn        56527
## 5              59.13          135              1  Brooklyn        72280
## 6              61.57          236              3  Brooklyn       106132
## 7              64.98          176              3  Brooklyn        92561
## 8              60.18           73              1  Brooklyn        67067
## 9              60.15          128              1  Brooklyn        80857
## 10             59.31          144              1  Brooklyn        77354
##    COUNTY.y Positive.y totPop elderlyPop whitePop blackPop asianPop hispanicPop
## 1    Queens        342  22377       2456     1192    13972     2626        3226
## 2     Kings        972  66602       7662    16483    43625     1836        6738
## 3     Kings       1086  73069       9518     5278    59541     1330       13900
## 4     Kings        814  60958       7592    15300    40048     2272        5293
## 5     Kings       1163  67426       8144    40315     5045    14695       11169
## 6     Kings       1336 103729      12278    16173    69058     5207       18244
## 7     Kings       1961  85562      10192    59375      890    19828        9982
## 8     Kings       1162  74162      10368    27580    37835     4451        5860
## 9     Kings       1573  77494      11808    55294     4325    12744        7536
## 10    Kings       1325  83040      11083    47554      845    26910       10986
##                          geometry
## 1  POLYGON ((1038098 188138.4,...
## 2  POLYGON ((1001614 186926.4,...
## 3  POLYGON ((1011174 183696.3,...
## 4  POLYGON ((995908.4 183617.6...
## 5  POLYGON ((991997.1 176307.5...
## 6  POLYGON ((994821.5 177865.7...
## 7  POLYGON ((987286.4 173946.5...
## 8  POLYGON ((995796 171110.1, ...
## 9  POLYGON ((994099.3 171240.7...
## 10 POLYGON ((989500.2 170730.2...
mapview(nyc_final_sf, zcol = "totPop",      layer.name = "Total Population") +
mapview(nyc_final_sf, zcol = "elderlyPop",  layer.name = "Elderly Population")

Saving into RDdata File

save(nyc_final_sf, file = "zip_codes_spatial_data.RData")

cat("Final NYC sf object with COVID-29 positive cases, food stores, 
    health facilities, and populaton at zip code level saved to a RData File.")
## Final NYC sf object with COVID-29 positive cases, food stores, 
##     health facilities, and populaton at zip code level saved to a RData File.