title: “R Week 8 Assignment” author: “Genesis Santos” date: “3/22/2025” output: html_document:

In the following R code chunk, load_packages is the code chunk name. include=FALSE suggests that the code chunk will run, but the code itself and its outputs will not be included in the rendered HTML. echo=TRUE in the following code chunk suggests that the code and results from running the code will be included in the rendered HTML.

R Spatial Lab Assignment# 2

##Don’t use a single chunk for the entire assignment. Break it into multiple chunks.

task 1:# Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

##This task requires an attribute join by matching columns with base::merge or dplyr::left_join. ##Between the two, left_join is recommended. ##Pay attention to the order of the inputs in left_join. ##You can choose one or multiple weeks of COVID test data.

zpNYC <- st_read("ZIP_CODE_040114.shp", quiet = TRUE) %>%
  mutate(ZIPCODE = as.character(ZIPCODE))
  
covid_2021 <- read_csv("tests-by-zcta_2021_04_23.csv", show_col_types = FALSE) %>%
  mutate(MODIFIED_ZCTA = as.character(MODIFIED_ZCTA)) %>%
  rename(ZIPCODE = MODIFIED_ZCTA)

zpNYC_covid <- zpNYC %>%
  left_join(covid_2021, by = "ZIPCODE")
  
nycFoodStoreSF <- st_read("nycFoodStore.shp", quiet = TRUE) %>%
  st_transform(st_crs(zpNYC_covid))

task 2:Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

##This task requires a spatial join with sf::st_join. ##It needs to use the result from task 1 as one input so that the output includes information from task 1.

# A = Store
# D = Food Warehouse
# J = Multiple Operations


nycFoodRetail <- nycFoodStoreSF %>%
  filter(str_detect(Estbl_T, "[AJD]"))

zpNYC_covid_food <- st_join(
  zpNYC_covid,
  nycFoodRetail,
  join = st_contains,
  left = TRUE
) %>%
  group_by(
    ZIPCODE,
    PO_NAME,
    POPULATION,
    COUNTY,
    COVID_CASE_COUNT,
    TOTAL_COVID_TESTS
  ) %>%
  summarise(
    FoodStoreNum = sum(!is.na(Lcns_Nm)),
    .groups = "drop"
  )

Task3: Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

##Like task 2, this also requires a spatial join. ##Again, use output from task 2 as one input for task 3.

nycHealthFacilities <- read_csv("NYS_Health_Facility.csv", show_col_types = FALSE)

nycNursingHomeSF <- nycHealthFacilities %>%
  filter(`Short Description` == "NH") %>%
  filter(!is.na(`Facility Latitude`), !is.na(`Facility Longitude`)) %>%
  st_as_sf(
    coords = c("Facility Longitude", "Facility Latitude"),
    crs = 4326,
    remove = FALSE
  ) %>%
  st_transform(st_crs(zpNYC_covid_food))

zpNYC_final <- st_join(
  zpNYC_covid_food,
  nycNursingHomeSF,
  join = st_contains,
  left = TRUE
) %>%
  group_by(
    ZIPCODE,
    PO_NAME,
    POPULATION,
    COUNTY,
    COVID_CASE_COUNT,
    TOTAL_COVID_TESTS,
    FoodStoreNum
  ) %>%
  summarise(
    NursingHomeNum = sum(!is.na(`Facility ID`)),
    .groups = "drop"
  )

Task 4: Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

#This is an attribute join. #It may need to process the data first to derive matching columns. #Check out the sample code from our Dropbox site.

nycCensus <- st_read("geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", quiet = TRUE) %>%
  mutate(
    cntyFIPS = case_when(
      boro_name == "Bronx" ~ "005",
      boro_name == "Brooklyn" ~ "047",
      boro_name == "Manhattan" ~ "061",
      boro_name == "Queens" ~ "081",
      boro_name == "Staten Island" ~ "085"
    ),
    tractFIPS = paste0(cntyFIPS, ct2010)
  )

names(nycCensus)
##  [1] "boro_code"  "boro_ct201" "boro_name"  "cdeligibil" "ct2010"    
##  [6] "ctlabel"    "ntacode"    "ntaname"    "puma"       "shape_area"
## [11] "shape_leng" "geometry"   "cntyFIPS"   "tractFIPS"
head(nycCensus)
## Simple feature collection with 6 features and 13 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.08721 ymin: 40.63981 xmax: -73.96433 ymax: 40.76364
## Geodetic CRS:  WGS84(DD)
##   boro_code boro_ct201     boro_name cdeligibil ct2010 ctlabel ntacode
## 1         5    5000900 Staten Island          E 000900       9    SI22
## 2         1    1009800     Manhattan          I 009800      98    MN19
## 3         1    1010000     Manhattan          I 010000     100    MN19
## 4         1    1010200     Manhattan          I 010200     102    MN17
## 5         1    1010400     Manhattan          I 010400     104    MN17
## 6         1    1011300     Manhattan          I 011300     113    MN17
##                                     ntaname puma shape_area shape_leng
## 1 West New Brighton-New Brighton-St. George 3903    2497010   7729.017
## 2                   Turtle Bay-East Midtown 3808    1906016   5534.200
## 3                   Turtle Bay-East Midtown 3808    1860938   5692.169
## 4                     Midtown-Midtown South 3807    1860993   5687.802
## 5                     Midtown-Midtown South 3807    1864600   5693.036
## 6                     Midtown-Midtown South 3807    1890907   5699.861
##                         geometry cntyFIPS tractFIPS
## 1 MULTIPOLYGON (((-74.07921 4...      085 085000900
## 2 MULTIPOLYGON (((-73.96433 4...      061 061009800
## 3 MULTIPOLYGON (((-73.96802 4...      061 061010000
## 4 MULTIPOLYGON (((-73.97124 4...      061 061010200
## 5 MULTIPOLYGON (((-73.97446 4...      061 061010400
## 6 MULTIPOLYGON (((-73.98412 4...      061 061011300
acs_lines <- readLines("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv", warn = FALSE)
acs_text <- paste(c(acs_lines[1], acs_lines[-c(1, 2)]), collapse = "\n")

acs_raw <- read_csv(I(acs_text), show_col_types = FALSE)

acsData <- acs_raw %>%
  transmute(
    GEO_ID,
    totPop = as.numeric(DP05_0001E),
    elderlyPop = as.numeric(DP05_0024E),
    malePop = as.numeric(DP05_0002E),
    femalePop = as.numeric(DP05_0003E),
    whitePop = as.numeric(DP05_0037E),
    blackPop = as.numeric(DP05_0038E),
    asianPop = as.numeric(DP05_0067E),
    hispanicPop = as.numeric(DP05_0071E),
    adultPop = as.numeric(DP05_0021E),
    citizenAdult = as.numeric(DP05_0087E),
    censusCode = str_sub(GEO_ID, -9, -1)
  )

popData <- nycCensus %>%
  left_join(acsData, by = c("tractFIPS" = "censusCode"))

class(popData)
## [1] "sf"         "data.frame"
inherits(popData, "sf")
## [1] TRUE

###task 5:Aggregate the ACS census data to zip code area data. ##This requires a spatial join (see tips in the sample code). ##Use outputs from task 3 and 4 as inputs.

st_crs(zpNYC_final)
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US survey foot)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
st_crs(popData)
## Coordinate Reference System:
##   User input: WGS84(DD) 
##   wkt:
## GEOGCRS["WGS84(DD)",
##     DATUM["WGS84",
##         ELLIPSOID["WGS84",6378137,298.257223563,
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["longitude",east,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["latitude",north,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]]]
popNYC <- st_transform(popData, st_crs(zpNYC_final))

popNYC_centroid <- st_centroid(popNYC)
## Warning: st_centroid assumes attributes are constant over geometries
zip_joined <- st_join(
  zpNYC_final,
  popNYC_centroid,
  join = st_contains,
  left = TRUE
)

zip_grouped <- zip_joined %>%
  group_by(
    ZIPCODE,
    PO_NAME,
    POPULATION,
    COUNTY,
    COVID_CASE_COUNT,
    TOTAL_COVID_TESTS,
    FoodStoreNum,
    NursingHomeNum
  )

zipACS <- zip_grouped %>%
  summarise(
    totPop = sum(totPop, na.rm = TRUE),
    elderlyPop = sum(elderlyPop, na.rm = TRUE),
    adultPop = sum(adultPop, na.rm = TRUE),
    citizenAdult = sum(citizenAdult, na.rm = TRUE),
    asianPop = sum(asianPop, na.rm = TRUE),
    blackPop = sum(blackPop, na.rm = TRUE),
    hispanicPop = sum(hispanicPop, na.rm = TRUE),
    whitePop = sum(whitePop, na.rm = TRUE),
    malePop = sum(malePop, na.rm = TRUE),
    femalePop = sum(femalePop, na.rm = TRUE),
    malePctg = ifelse(totPop > 0, malePop / totPop * 100, NA_real_),
    femalePctg = ifelse(totPop > 0, femalePop / totPop * 100, NA_real_),
    elderlyPctg = ifelse(totPop > 0, elderlyPop / totPop * 100, NA_real_),
    .groups = "drop"
  )

names(zipACS)
##  [1] "ZIPCODE"           "PO_NAME"           "POPULATION"       
##  [4] "COUNTY"            "COVID_CASE_COUNT"  "TOTAL_COVID_TESTS"
##  [7] "FoodStoreNum"      "NursingHomeNum"    "totPop"           
## [10] "elderlyPop"        "adultPop"          "citizenAdult"     
## [13] "asianPop"          "blackPop"          "hispanicPop"      
## [16] "whitePop"          "malePop"           "femalePop"        
## [19] "malePctg"          "femalePctg"        "elderlyPctg"      
## [22] "geometry"
head(zipACS)
## Simple feature collection with 6 features and 21 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 188447.3 xmax: 998309.7 ymax: 230942.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 × 22
##   ZIPCODE PO_NAME      POPULATION COUNTY   COVID_CASE_COUNT TOTAL_COVID_TESTS
##   <chr>   <chr>             <dbl> <chr>               <dbl>             <dbl>
## 1 00083   Central Park         25 New York               NA                NA
## 2 10001   New York          22413 New York             1542             20158
## 3 10002   New York          81305 New York             5902             48197
## 4 10003   New York          55878 New York             2803             41076
## 5 10004   New York           2187 New York              247              3599
## 6 10005   New York           8107 New York              413              6102
## # ℹ 16 more variables: FoodStoreNum <int>, NursingHomeNum <int>, totPop <dbl>,
## #   elderlyPop <dbl>, adultPop <dbl>, citizenAdult <dbl>, asianPop <dbl>,
## #   blackPop <dbl>, hispanicPop <dbl>, whitePop <dbl>, malePop <dbl>,
## #   femalePop <dbl>, malePctg <dbl>, femalePctg <dbl>, elderlyPctg <dbl>,
## #   geometry <GEOMETRY [US_survey_foot]>
sum(zipACS$totPop, na.rm = TRUE)
## [1] 8446394
zipACS %>% filter(is.na(totPop))
## Simple feature collection with 0 features and 21 fields
## Bounding box:  xmin: NA ymin: NA xmax: NA ymax: NA
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 0 × 22
## # ℹ 22 variables: ZIPCODE <chr>, PO_NAME <chr>, POPULATION <dbl>, COUNTY <chr>,
## #   COVID_CASE_COUNT <dbl>, TOTAL_COVID_TESTS <dbl>, FoodStoreNum <int>,
## #   NursingHomeNum <int>, totPop <dbl>, elderlyPop <dbl>, adultPop <dbl>,
## #   citizenAdult <dbl>, asianPop <dbl>, blackPop <dbl>, hispanicPop <dbl>,
## #   whitePop <dbl>, malePop <dbl>, femalePop <dbl>, malePctg <dbl>,
## #   femalePctg <dbl>, elderlyPctg <dbl>, geometry <GEOMETRY [US_survey_foot]>
plot(zipACS["totPop"])

plot(zipACS["asianPop"])

plot(zipACS["COVID_CASE_COUNT"])