Author

Alexandra Mejia

Published

March 18, 2026

Explanation of the template

Update the title with your information. Make sure to include identification information so that we know it is your submission.

Also update the author name and date accordingly.

Check out the Source Code from the top-right corner </>Code menu.

In the following R code chunk, load_packages is the code chunk name. include=FALSE suggests that the code chunk will run, but the code itself and its outputs will not be included in the rendered HTML. echo=TRUE in the following code chunk suggests that the code and results from running the code will be included in the rendered HTML.

Lab 2 Assignment

Reading in files of data:

Reading NYC postal areas

Show the code
library(sf)
zip_code <- st_read("../Week_07/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")
Reading layer `ZIP_CODE_040114' from data source 
  `/Users/Ally/Desktop/R_Spatial_Section/Week_07/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 263 features and 12 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
Projected CRS: NAD83 / New York Long Island (ftUS)
Show the code
st_crs(zip_code)
Coordinate Reference System:
  User input: NAD83 / New York Long Island (ftUS) 
  wkt:
PROJCRS["NAD83 / New York Long Island (ftUS)",
    BASEGEOGCRS["NAD83",
        DATUM["North American Datum 1983",
            ELLIPSOID["GRS 1980",6378137,298.257222101,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4269]],
    CONVERSION["SPCS83 New York Long Island zone (US survey foot)",
        METHOD["Lambert Conic Conformal (2SP)",
            ID["EPSG",9802]],
        PARAMETER["Latitude of false origin",40.1666666666667,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8821]],
        PARAMETER["Longitude of false origin",-74,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8822]],
        PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8823]],
        PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8824]],
        PARAMETER["Easting at false origin",984250,
            LENGTHUNIT["US survey foot",0.304800609601219],
            ID["EPSG",8826]],
        PARAMETER["Northing at false origin",0,
            LENGTHUNIT["US survey foot",0.304800609601219],
            ID["EPSG",8827]]],
    CS[Cartesian,2],
        AXIS["easting (X)",east,
            ORDER[1],
            LENGTHUNIT["US survey foot",0.304800609601219]],
        AXIS["northing (Y)",north,
            ORDER[2],
            LENGTHUNIT["US survey foot",0.304800609601219]],
    USAGE[
        SCOPE["Engineering survey, topographic mapping."],
        AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
        BBOX[40.47,-74.26,41.3,-71.8]],
    ID["EPSG",2263]]

Reading NYS health facilities

Show the code
nys_health <- read_csv("R-Spatial_II_Lab/NYS_Health_Facility.csv", show_col_types = FALSE)

nys_health_clean <- nys_health %>%
  filter(
    !is.na(`Facility Longitude`),
    !is.na(`Facility Latitude`),
    `Facility Longitude` != 0,
    `Facility Latitude` != 0
  )

nys_health_sf <- st_as_sf(nys_health_clean, coords = c("Facility Longitude", "Facility Latitude"), crs = 4326)

Reading NYS retail food stores

Show the code
nys_food_store <- read_csv("../Week_07/R-Spatial_I_Lab/nys_retail_food_store_xy.csv",
                           locale = locale(encoding = "Latin1"),
                           show_col_types = FALSE)

nys_food_store_clean <- nys_food_store %>%
  filter(
    !is.na(`X`),
    !is.na(`Y`),
    `X` != 0,
    `Y` != 0,
    `ï..County` %in% c("Bronx", "Kings", "New York", "Queens", "Richmond")
  )


nys_food_store_sf <- st_as_sf(nys_food_store_clean, coords = c("X", "Y"), crs = 4326) 

Reading the COVID-19 data

Show the code
covid19 <- read_csv("R-Spatial_II_Lab/tests-by-zcta_2020_04_12.csv", show_col_types = FALSE)
  1. Join the COVID-19 data to the NYC zip code area data
Show the code
nyc_sf_merged <- base::merge(zip_code, covid19, by.x = "ZIPCODE", by.y = "MODZCTA")

dplyr::left_join(zip_code %>% 
                   dplyr::mutate(ZIPCODE=as.numeric(as.character(ZIPCODE))), 
                 covid19, 
                 by = c('ZIPCODE' = 'MODZCTA')) -> nyc_sf_merged

names(nyc_sf_merged)
 [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
 [4] "POPULATION"        "AREA"              "STATE"            
 [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
[10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
[13] "Positive"          "Total"             "zcta_cum.perc_pos"
[16] "geometry"         
  1. Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area.
Show the code
nys_food_store_sf <- st_transform(nys_food_store_sf, st_crs(nyc_sf_merged))

food_store_count <- nys_food_store_sf %>% dplyr::filter(stringr::str_detect(Establishment.Type, 'JAC')) %>%
  st_join(nyc_sf_merged, ., join= st_intersects) %>%
  group_by(ZIPCODE) %>%
  summarise(FOOD_STORE_COUNT = n())  %>%
  st_set_geometry(NULL)

food_store_merged <- dplyr::left_join(nyc_sf_merged, food_store_count, by = "ZIPCODE")

names(food_store_merged)
 [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
 [4] "POPULATION"        "AREA"              "STATE"            
 [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
[10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
[13] "Positive"          "Total"             "zcta_cum.perc_pos"
[16] "FOOD_STORE_COUNT"  "geometry"         
  1. Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.
Show the code
nys_health_sf <- st_transform(nys_health_sf, st_crs(food_store_merged))

facilities_count <- nys_health_sf %>% dplyr::filter(`Short Description` == "NH") %>%
  sf::st_join(food_store_merged, ., join = st_intersects) %>%
  dplyr::group_by(ZIPCODE) %>%
  dplyr::summarise(NURSING_HOME_COUNT = n()) %>%
  sf::st_set_geometry(NULL)

facilities_merged <- dplyr::left_join(food_store_merged, facilities_count, by = "ZIPCODE")

names(facilities_merged)
 [1] "ZIPCODE"            "BLDGZIP"            "PO_NAME"           
 [4] "POPULATION"         "AREA"               "STATE"             
 [7] "COUNTY"             "ST_FIPS"            "CTY_FIPS"          
[10] "URL"                "SHAPE_AREA"         "SHAPE_LEN"         
[13] "Positive"           "Total"              "zcta_cum.perc_pos" 
[16] "FOOD_STORE_COUNT"   "NURSING_HOME_COUNT" "geometry"          

4.Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

Show the code
tracts <- st_read("R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp",
                  stringsAsFactors = FALSE)
Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/Ally/Desktop/R_Spatial_Section/Week_08/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 2165 features and 11 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
Geodetic CRS:  WGS84(DD)
Show the code
acs <- readLines("R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv")

tracts <- tracts %>%
  dplyr::mutate(
    cntyFIPS = dplyr::case_when(
      boro_name == "Bronx" ~ "005",
      boro_name == "Brooklyn" ~ "047",
      boro_name == "Manhattan" ~ "061",
      boro_name == "Queens" ~ "081",
      boro_name == "Staten Island" ~ "085"
    ),
    tractFIPS = paste(cntyFIPS, ct2010, sep = "")
  )


tracts <- tracts %>%
  dplyr::mutate(
    cntyFIPS = dplyr::case_when(
      boro_name == "Bronx" ~ "005",
      boro_name == "Brooklyn" ~ "047",
      boro_name == "Manhattan" ~ "061",
      boro_name == "Queens" ~ "081",
      boro_name == "Staten Island" ~ "085"
    ),
    tractFIPS = paste(cntyFIPS, ct2010, sep = "")
  )

acs <- acs %>%
  magrittr::extract(-2) %>%
  textConnection() %>%
  read.csv(header = TRUE, quote = "\"") %>%
  dplyr::select(
    GEO_ID,
    totPop = DP05_0001E,
    elderlyPop = DP05_0024E,
    malePop = DP05_0002E,
    femalePop = DP05_0003E,
    whitePop = DP05_0037E,
    blackPop = DP05_0038E,
    asianPop = DP05_0067E,
    hispanicPop = DP05_0071E,
    adultPop = DP05_0021E,
    citizenAdult = DP05_0087E
  ) %>%
  dplyr::mutate(
    censusCode = stringr::str_sub(GEO_ID, -9, -1)
  )

popData <- merge(tracts, acs, by.x = "tractFIPS", by.y = "censusCode")

names(popData)
 [1] "tractFIPS"    "boro_code"    "boro_ct201"   "boro_name"    "cdeligibil"  
 [6] "ct2010"       "ctlabel"      "ntacode"      "ntaname"      "puma"        
[11] "shape_area"   "shape_leng"   "cntyFIPS"     "GEO_ID"       "totPop"      
[16] "elderlyPop"   "malePop"      "femalePop"    "whitePop"     "blackPop"    
[21] "asianPop"     "hispanicPop"  "adultPop"     "citizenAdult" "geometry"    
Show the code
sum(popData$totPop, na.rm = TRUE)
[1] 8443713
Show the code
st_crs(popData)
Coordinate Reference System:
  User input: WGS84(DD) 
  wkt:
GEOGCRS["WGS84(DD)",
    DATUM["WGS84",
        ELLIPSOID["WGS84",6378137,298.257223563,
            LENGTHUNIT["metre",1,
                ID["EPSG",9001]]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["longitude",east,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["latitude",north,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]]]
  1. Aggregate the ACS census data to zip code area data.
Show the code
popNYC <- sf::st_transform(popData, st_crs(facilities_merged))

acsZipNYC <- sf::st_join(facilities_merged,
                         popNYC %>% sf::st_centroid(),
                         join = st_contains) %>%
  dplyr::group_by(
    ZIPCODE, PO_NAME, POPULATION, COUNTY,
    Positive, Total, zcta_cum.perc_pos,
    FOOD_STORE_COUNT, NURSING_HOME_COUNT
  ) %>%
  dplyr::summarise(
    totPop = sum(totPop, na.rm = TRUE),
    elderlyPop = sum(elderlyPop, na.rm = TRUE),
    malePop = sum(malePop, na.rm = TRUE),
    femalePop = sum(femalePop, na.rm = TRUE),
    whitePop = sum(whitePop, na.rm = TRUE),
    blackPop = sum(blackPop, na.rm = TRUE),
    asianPop = sum(asianPop, na.rm = TRUE),
    hispanicPop = sum(hispanicPop, na.rm = TRUE),
    adultPop = sum(adultPop, na.rm = TRUE),
    citizenAdult = sum(citizenAdult, na.rm = TRUE),
    malePctg = sum(malePop, na.rm = TRUE) / sum(totPop, na.rm = TRUE) * 100
  )
Warning: st_centroid assumes attributes are constant over geometries
`summarise()` has regrouped the output.
ℹ Summaries were computed grouped by ZIPCODE, PO_NAME, POPULATION, COUNTY,
  Positive, Total, zcta_cum.perc_pos, FOOD_STORE_COUNT, and NURSING_HOME_COUNT.
ℹ Output is grouped by ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total,
  zcta_cum.perc_pos, and FOOD_STORE_COUNT.
ℹ Use `summarise(.groups = "drop_last")` to silence this message.
ℹ Use `summarise(.by = c(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total,
  zcta_cum.perc_pos, FOOD_STORE_COUNT, NURSING_HOME_COUNT))` for per-operation
  grouping (`?dplyr::dplyr_by`) instead.
Show the code
names(acsZipNYC)
 [1] "ZIPCODE"            "PO_NAME"            "POPULATION"        
 [4] "COUNTY"             "Positive"           "Total"             
 [7] "zcta_cum.perc_pos"  "FOOD_STORE_COUNT"   "NURSING_HOME_COUNT"
[10] "totPop"             "elderlyPop"         "malePop"           
[13] "femalePop"          "whitePop"           "blackPop"          
[16] "asianPop"           "hispanicPop"        "adultPop"          
[19] "citizenAdult"       "malePctg"           "geometry"          
Show the code
sum(acsZipNYC$totPop, na.rm = TRUE)
[1] 8446394