R Week 08 Assignment Updated

Author

Caitlin Cacciatore

Published

March 26, 2026

Loading Packages - First Steps

Task 1

Show the code

# Task 1

# read the file 

zip_sf <- st_read("Zip_Code_040114.shp")

Reading layer `ZIP_CODE_040114' from data source 
  `/Users/heliosselene/Desktop/R-Spatial/ZIP_CODE_040114.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 263 features and 12 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
Projected CRS: NAD83 / New York Long Island (ftUS)

Show the code

#clean the data

zip_nyc <- zip_sf %>%
  filter(!is.na(ZIPCODE))

nyc_zip_sf <- st_as_sf(zip_nyc,crs = 4326)

# create  fancy plots

plot(nyc_zip_sf)

Warning: plotting the first 9 out of 12 attributes; use max.plot = 12 to plot
all

Show the code

# figure out structure

str(nyc_zip_sf)

Classes 'sf' and 'data.frame':  263 obs. of  13 variables:
 $ ZIPCODE   : chr  "11436" "11213" "11212" "11225" ...
 $ BLDGZIP   : chr  "0" "0" "0" "0" ...
 $ PO_NAME   : chr  "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
 $ POPULATION: num  18681 62426 83866 56527 72280 ...
 $ AREA      : num  22699295 29631004 41972104 23698630 36868799 ...
 $ STATE     : chr  "NY" "NY" "NY" "NY" ...
 $ COUNTY    : chr  "Queens" "Kings" "Kings" "Kings" ...
 $ ST_FIPS   : chr  "36" "36" "36" "36" ...
 $ CTY_FIPS  : chr  "081" "047" "047" "047" ...
 $ URL       : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
 $ SHAPE_AREA: num  0 0 0 0 0 0 0 0 0 0 ...
 $ SHAPE_LEN : num  0 0 0 0 0 0 0 0 0 0 ...
 $ geometry  :sfc_POLYGON of length 263; first list element: List of 1
  ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
  ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
 - attr(*, "sf_column")= chr "geometry"
 - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
  ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...

Show the code

# Read the COVID data for one week 

covid_data <- readr::read_csv("tests-by-zcta_2021_04_23.csv", lazy = FALSE)

Rows: 177 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Show the code

str(covid_data)

spc_tbl_ [177 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ MODIFIED_ZCTA    : num [1:177] 10001 10002 10003 10004 10005 ...
 $ NEIGHBORHOOD_NAME: chr [1:177] "Chelsea/NoMad/West Chelsea" "Chinatown/Lower East Side" "East Village/Gramercy/Greenwich Village" "Financial District" ...
 $ BOROUGH_GROUP    : chr [1:177] "Manhattan" "Manhattan" "Manhattan" "Manhattan" ...
 $ label            : chr [1:177] "10001, 10118" "10002" "10003" "10004" ...
 $ lat              : num [1:177] 40.8 40.7 40.7 40.7 40.7 ...
 $ lon              : num [1:177] -74 -74 -74 -74 -74 ...
 $ COVID_CASE_COUNT : num [1:177] 1542 5902 2803 247 413 ...
 $ COVID_CASE_RATE  : num [1:177] 5584 7836 5193 8311 4716 ...
 $ POP_DENOMINATOR  : num [1:177] 27613 75323 53978 2972 8757 ...
 $ COVID_DEATH_COUNT: num [1:177] 35 264 48 2 0 1 4 118 37 62 ...
 $ COVID_DEATH_RATE : num [1:177] 126.8 350.5 88.9 67.3 0 ...
 $ PERCENT_POSITIVE : num [1:177] 7.86 12.63 6.93 6.92 6.72 ...
 $ TOTAL_COVID_TESTS: num [1:177] 20158 48197 41076 3599 6102 ...
 - attr(*, "spec")=
  .. cols(
  ..   MODIFIED_ZCTA = col_double(),
  ..   NEIGHBORHOOD_NAME = col_character(),
  ..   BOROUGH_GROUP = col_character(),
  ..   label = col_character(),
  ..   lat = col_double(),
  ..   lon = col_double(),
  ..   COVID_CASE_COUNT = col_double(),
  ..   COVID_CASE_RATE = col_double(),
  ..   POP_DENOMINATOR = col_double(),
  ..   COVID_DEATH_COUNT = col_double(),
  ..   COVID_DEATH_RATE = col_double(),
  ..   PERCENT_POSITIVE = col_double(),
  ..   TOTAL_COVID_TESTS = col_double()
  .. )
 - attr(*, "problems")=<externalptr>

Show the code

# Merge Zip Code and COVID Data


nyc_covid_data_sf_merged <- 
  base::merge(nyc_zip_sf, covid_data, by.x = "ZIPCODE", by.y = "MODIFIED_ZCTA")
names(nyc_covid_data_sf_merged)

 [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
 [4] "POPULATION"        "AREA"              "STATE"            
 [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
[10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
[13] "NEIGHBORHOOD_NAME" "BOROUGH_GROUP"     "label"            
[16] "lat"               "lon"               "COVID_CASE_COUNT" 
[19] "COVID_CASE_RATE"   "POP_DENOMINATOR"   "COVID_DEATH_COUNT"
[22] "COVID_DEATH_RATE"  "PERCENT_POSITIVE"  "TOTAL_COVID_TESTS"
[25] "geometry"

Task 2

Show the code

# Aggregate by ZIP code 
# Read the CSV file 
nys_retail <- read_csv(
  "nys_retail_food_store_xy.csv",
  locale = locale(encoding = "Latin1"),
  lazy = FALSE
)

Rows: 29389 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (11): ï..County, Operation.Type, Establishment.Type, Entity.Name, DBA.Na...
dbl  (4): License.Number, Zip.Code, Y, X
num  (1): Square.Footage
lgl  (2): Address.Line.2, Address.Line.3

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Show the code

# Let's check column names
names(nys_retail)

 [1] "ï..County"          "License.Number"     "Operation.Type"    
 [4] "Establishment.Type" "Entity.Name"        "DBA.Name"          
 [7] "Street.Number"      "Street.Name"        "Address.Line.2"    
[10] "Address.Line.3"     "City"               "State"             
[13] "Zip.Code"           "Square.Footage"     "Location"          
[16] "Coords"             "Y"                  "X"

Show the code

names(nyc_zip_sf)

 [1] "ZIPCODE"    "BLDGZIP"    "PO_NAME"    "POPULATION" "AREA"      
 [6] "STATE"      "COUNTY"     "ST_FIPS"    "CTY_FIPS"   "URL"       
[11] "SHAPE_AREA" "SHAPE_LEN"  "geometry"

Show the code

# Rename

nys_retail <- nys_retail %>%
  rename("Zip Code" = "Zip.Code")

# Then remove NA values

nys_retail <- nys_retail %>%
  drop_na("X", "Y")  # replace X/Y with your lon/lat column names


# Convert points CSV to sf object using their coordinates
nys_retail_sf <- st_as_sf(
  nys_retail,
  coords = c("X", "Y"),   # replace with your actual lon/lat column names
  crs = 4326              # WGS84
)

# Making sure everything has the same coordinates
nyc_zip_sf <- st_transform(nyc_zip_sf, st_crs(nys_retail_sf))

# Spatial join - joining zip codes
nyc_food_stores <- st_join(nys_retail_sf, nyc_zip_sf)

# Aggregate stores by ZIP code
zip_summary_sf <- nyc_food_stores %>%
  group_by("Zip Code") %>%   # use bare column name, not quotes
  summarise(store_count = n(), .groups = "drop")  # counts stores per ZIP

# 7. See what happened
head(zip_summary_sf)

Simple feature collection with 1 feature and 2 fields
Geometry type: MULTIPOINT
Dimension:     XY
Bounding box:  xmin: -79.75953 ymin: 40.50782 xmax: -71.93873 ymax: 44.99484
Geodetic CRS:  WGS 84
# A tibble: 1 × 3
  `"Zip Code"` store_count                                              geometry
  <chr>              <int>                                      <MULTIPOINT [°]>
1 Zip Code           23989 ((-73.43765 43.80624), (-73.41244 43.85028), (-73.42…

Show the code

names(zip_summary_sf)

[1] "\"Zip Code\"" "store_count"  "geometry"

Show the code

st_crs(zip_summary_sf)

Coordinate Reference System:
  User input: EPSG:4326 
  wkt:
GEOGCRS["WGS 84",
    ENSEMBLE["World Geodetic System 1984 ensemble",
        MEMBER["World Geodetic System 1984 (Transit)"],
        MEMBER["World Geodetic System 1984 (G730)"],
        MEMBER["World Geodetic System 1984 (G873)"],
        MEMBER["World Geodetic System 1984 (G1150)"],
        MEMBER["World Geodetic System 1984 (G1674)"],
        MEMBER["World Geodetic System 1984 (G1762)"],
        MEMBER["World Geodetic System 1984 (G2139)"],
        MEMBER["World Geodetic System 1984 (G2296)"],
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]],
        ENSEMBLEACCURACY[2.0]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    USAGE[
        SCOPE["Horizontal component of 3D system."],
        AREA["World."],
        BBOX[-90,-180,90,180]],
    ID["EPSG",4326]]

Task 3

Show the code

# Read the Health Facilities


nyc_health <- readr::read_csv("NYS_Health_Facility.csv", lazy = FALSE)

Rows: 3990 Columns: 36
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (28): Facility Name, Short Description, Description, Facility Open Date,...
dbl  (8): Facility ID, Facility Phone Number, Facility Fax Number, Facility ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Show the code

str(nyc_health)

spc_tbl_ [3,990 × 36] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ Facility ID                 : num [1:3990] 204 620 654 1156 2589 ...
 $ Facility Name               : chr [1:3990] "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "Central Park Rehabilitation and Nursing Center" "East Side Nursing Home" ...
 $ Short Description           : chr [1:3990] "HSPC" "NH" "NH" "NH" ...
 $ Description                 : chr [1:3990] "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
 $ Facility Open Date          : chr [1:3990] "06/01/1985" "02/01/1989" "02/01/1989" "08/01/1979" ...
 $ Facility Address 1          : chr [1:3990] "4102 Old Vestal Road" "2050 Tilden Avenue" "116 Martin Luther King East" "62 Prospect St" ...
 $ Facility Address 2          : chr [1:3990] NA NA NA NA ...
 $ Facility City               : chr [1:3990] "Vestal" "New Hartford" "Syracuse" "Warsaw" ...
 $ Facility State              : chr [1:3990] "New York" "New York" "New York" "New York" ...
 $ Facility Zip Code           : chr [1:3990] "13850" "13413" "13205" "14569" ...
 $ Facility Phone Number       : num [1:3990] 6.08e+09 3.16e+09 3.15e+09 5.86e+09 5.86e+09 ...
 $ Facility Fax Number         : num [1:3990] NA NA NA NA NA ...
 $ Facility Website            : chr [1:3990] NA NA NA NA ...
 $ Facility County Code        : num [1:3990] 3 32 33 60 2 ...
 $ Facility County             : chr [1:3990] "Broome" "Oneida" "Onondaga" "Wyoming" ...
 $ Regional Office ID          : num [1:3990] 3 3 3 1 1 1 7 1 7 5 ...
 $ Regional Office             : chr [1:3990] "Central New York Regional Office" "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" ...
 $ Main Site Name              : chr [1:3990] NA NA NA NA ...
 $ Main Site Facility ID       : num [1:3990] NA NA NA NA NA ...
 $ Operating Certificate Number: chr [1:3990] "0301501F" "3227304N" "3301326N" "6027303N" ...
 $ Operator Name               : chr [1:3990] "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "CPRNC, LLC" "East Side Nursing Home Inc" ...
 $ Operator Address 1          : chr [1:3990] "169 Riverside Drive" "Box 1000 Tilden Avenue" "116 Martin Luther King East" "62 Prospect Street" ...
 $ Operator Address 2          : chr [1:3990] NA NA NA NA ...
 $ Operator City               : chr [1:3990] "Binghamton" "New Hartford" "Syracuse" "Warsaw" ...
 $ Operator State              : chr [1:3990] "New York" "New York" "New York" "New York" ...
 $ Operator Zip Code           : chr [1:3990] "13905" "13413" "13205" "14569" ...
 $ Cooperator Name             : chr [1:3990] NA NA NA NA ...
 $ Cooperator Address          : chr [1:3990] NA NA NA NA ...
 $ Cooperator Address 2        : chr [1:3990] NA NA NA NA ...
 $ Cooperator City             : chr [1:3990] NA NA NA NA ...
 $ Cooperator State            : chr [1:3990] "New York" "New York" "New York" "New York" ...
 $ Cooperator Zip Code         : chr [1:3990] NA NA NA NA ...
 $ Ownership Type              : chr [1:3990] "Not for Profit Corporation" "Not for Profit Corporation" "LLC" "Business Corporation" ...
 $ Facility Latitude           : num [1:3990] 42.1 43.1 NA 42.7 42.1 ...
 $ Facility Longitude          : num [1:3990] -76 -75.2 NA -78.1 -78 ...
 $ Facility Location           : chr [1:3990] "(42.097095, -75.975243)" "(43.05497, -75.228828)" NA "(42.738979, -78.12867)" ...
 - attr(*, "spec")=
  .. cols(
  ..   `Facility ID` = col_double(),
  ..   `Facility Name` = col_character(),
  ..   `Short Description` = col_character(),
  ..   Description = col_character(),
  ..   `Facility Open Date` = col_character(),
  ..   `Facility Address 1` = col_character(),
  ..   `Facility Address 2` = col_character(),
  ..   `Facility City` = col_character(),
  ..   `Facility State` = col_character(),
  ..   `Facility Zip Code` = col_character(),
  ..   `Facility Phone Number` = col_double(),
  ..   `Facility Fax Number` = col_double(),
  ..   `Facility Website` = col_character(),
  ..   `Facility County Code` = col_double(),
  ..   `Facility County` = col_character(),
  ..   `Regional Office ID` = col_double(),
  ..   `Regional Office` = col_character(),
  ..   `Main Site Name` = col_character(),
  ..   `Main Site Facility ID` = col_double(),
  ..   `Operating Certificate Number` = col_character(),
  ..   `Operator Name` = col_character(),
  ..   `Operator Address 1` = col_character(),
  ..   `Operator Address 2` = col_character(),
  ..   `Operator City` = col_character(),
  ..   `Operator State` = col_character(),
  ..   `Operator Zip Code` = col_character(),
  ..   `Cooperator Name` = col_character(),
  ..   `Cooperator Address` = col_character(),
  ..   `Cooperator Address 2` = col_character(),
  ..   `Cooperator City` = col_character(),
  ..   `Cooperator State` = col_character(),
  ..   `Cooperator Zip Code` = col_character(),
  ..   `Ownership Type` = col_character(),
  ..   `Facility Latitude` = col_double(),
  ..   `Facility Longitude` = col_double(),
  ..   `Facility Location` = col_character()
  .. )
 - attr(*, "problems")=<externalptr>

Show the code

# First get rid of NA values

nyc_health_sf_no_na <- nyc_health %>%
  dplyr::filter(
    !is.na(`Facility Longitude`),
    !is.na(`Facility Latitude`)
  )

# Convert to sf 
nyc_health_sf <- nyc_health_sf_no_na %>%
  sf::st_as_sf(coords = c("Facility Longitude", "Facility Latitude"), crs = 4326)

# Make sure coordinate systems match

nyc_zip_sf <- sf::st_transform(nyc_zip_sf, sf::st_crs(nyc_health_sf))

# Spatial join: Perform

nyc_health_care_centers <- sf::st_join(nyc_health_sf, nyc_zip_sf) %>%
  dplyr::filter(!is.na("Zip Code"))
# Check names of the health care centers
names(nyc_health_care_centers)

 [1] "Facility ID"                  "Facility Name"               
 [3] "Short Description"            "Description"                 
 [5] "Facility Open Date"           "Facility Address 1"          
 [7] "Facility Address 2"           "Facility City"               
 [9] "Facility State"               "Facility Zip Code"           
[11] "Facility Phone Number"        "Facility Fax Number"         
[13] "Facility Website"             "Facility County Code"        
[15] "Facility County"              "Regional Office ID"          
[17] "Regional Office"              "Main Site Name"              
[19] "Main Site Facility ID"        "Operating Certificate Number"
[21] "Operator Name"                "Operator Address 1"          
[23] "Operator Address 2"           "Operator City"               
[25] "Operator State"               "Operator Zip Code"           
[27] "Cooperator Name"              "Cooperator Address"          
[29] "Cooperator Address 2"         "Cooperator City"             
[31] "Cooperator State"             "Cooperator Zip Code"         
[33] "Ownership Type"               "Facility Location"           
[35] "geometry"                     "ZIPCODE"                     
[37] "BLDGZIP"                      "PO_NAME"                     
[39] "POPULATION"                   "AREA"                        
[41] "STATE"                        "COUNTY"                      
[43] "ST_FIPS"                      "CTY_FIPS"                    
[45] "URL"                          "SHAPE_AREA"                  
[47] "SHAPE_LEN"

Task 4

Show the code

# Read the Census Tract Data


nycCensus <- sf::st_read('nyc_census_tracts.shp',
                         stringsAsFactors = FALSE)

Reading layer `nyc_census_tracts' from data source 
  `/Users/heliosselene/Desktop/R-Spatial/nyc_census_tracts.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 2162 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -74.25559 ymin: 40.5021 xmax: -73.70002 ymax: 40.91526
Geodetic CRS:  NAD83

Show the code

str(nycCensus)

Classes 'sf' and 'data.frame':  2162 obs. of  11 variables:
 $ GEOID   : chr  "36061000100" "36061001401" "36061000201" "36061000600" ...
 $ STATEFP : chr  "36" "36" "36" "36" ...
 $ COUNTYFP: chr  "061" "061" "061" "061" ...
 $ TRACTCE : chr  "000100" "001401" "000201" "000600" ...
 $ AFFGEOID: chr  "1400000US36061000100" "1400000US36061001401" "1400000US36061000201" "1400000US36061000600" ...
 $ NAME    : chr  "1" "14.01" "2.01" "6" ...
 $ LSAD    : chr  "CT" "CT" "CT" "CT" ...
 $ ALAND   : num  78638 93510 90233 240406 310039 ...
 $ AWATER  : num  0 0 75976 176018 428737 ...
 $ CBSA    : chr  "New York-Newark-Jersey City, NY-NJ-PA" "New York-Newark-Jersey City, NY-NJ-PA" "New York-Newark-Jersey City, NY-NJ-PA" "New York-Newark-Jersey City, NY-NJ-PA" ...
 $ geometry:sfc_MULTIPOLYGON of length 2162; first list element: List of 3
  ..$ :List of 1
  .. ..$ : num [1:25, 1:2] -74 -74 -74 -74 -74 ...
  ..$ :List of 1
  .. ..$ : num [1:5, 1:2] -74 -74 -74 -74 -74 ...
  ..$ :List of 1
  .. ..$ : num [1:14, 1:2] -74 -74 -74 -74 -74 ...
  ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
 - attr(*, "sf_column")= chr "geometry"
 - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA
  ..- attr(*, "names")= chr [1:10] "GEOID" "STATEFP" "COUNTYFP" "TRACTCE" ...

Show the code

names(nycCensus)

 [1] "GEOID"    "STATEFP"  "COUNTYFP" "TRACTCE"  "AFFGEOID" "NAME"    
 [7] "LSAD"     "ALAND"    "AWATER"   "CBSA"     "geometry"

Show the code

# We must now assign borough names to each borough code so we can 
# have the data sorted by borough

nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  COUNTYFP == 'Bronx' ~ '005',
  COUNTYFP == 'Brooklyn' ~ '047',
  COUNTYFP == 'Manhattan' ~ '061',
  COUNTYFP == 'Queens' ~ '081',
  COUNTYFP == 'Staten Island' ~ '085'),
)

Task 5

acsData <- readLines(“ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv”, encoding = “UTF-8”) %>% magrittr::extract(-2) %>% textConnection() %>% read.csv(header = TRUE, quote = “"”, na.strings = c(““,” “,”NA”, “N/A”, “NULL”)) %>% dplyr::select( “GEO_ID”, totPop = DP05_0001E, elderlyPop = DP05_0024E, malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E, asianPop = DP05_0067E, hispanicPop = DP05_0071E, adultPop = DP05_0021E, citizenAdult = DP05_0087E ) %>% dplyr::mutate(GEO_ID = stringr::str_sub(GEO_ID, -11, -1)) %>% tidyr::drop_na()

acsData %>% magrittr::extract(1:10,)

names(acsData)

Merge (JOIN) ACS data to the census tracts

join by attributes /columns

popData <- nycCensus %>% left_join(acsData, by = “GEOID”)

popNYC <- sf::st_transform(popData, sf::st_crs(nyc_covid_data_sf_merged))

verify the data: check sum and structure

sum(popData$totPop)

str(popData)

st_crs(popData) popNYC <- sf::st_transform(popData, st_crs(nyc_covid_data_sf_merged))

Use JOINED zip code data from task 1.

Now aggregate to the zip code level

Join by locations with st_join (spatial join)

popNYC <- sf::st_join( nyc_covid_data_sf_merged, popNYC %>% sf::st_centroid(), join = sf::st_contains ) %>% dplyr::filter(!is.na(totPop)) %>% # remove NA after join dplyr::group_by( “ZIPCODE”, “PO_NAME”, “POPULATION”, “COUNTY”, “COVID_CASE_COUNT”, “TOTAL_COVID_TESTS” ) %>% dplyr::summarise( totPop = sum(totPop, na.rm = TRUE), malePctg = sum(malePop, na.rm = TRUE) / totPop * 100, asianPop = sum(asianPop, na.rm = TRUE), blackPop = sum(blackPop, na.rm = TRUE), hispanicPop = sum(hispanicPop, na.rm = TRUE), whitePop = sum(whitePop, na.rm = TRUE) )

names(nyc_covid_data_sf_merged)

Check and verify the data again

sum(popNYC$totPop, na.rm = T)




<!-- -->

::: {.quarto-embedded-source-code}
```````````````````{.markdown shortcodes="false"}
---
title: "R Week 08 Assignment Updated"
author: "Caitlin Cacciatore"
date: "3/26/2026"
format:
  html:
    toc: true
    toc-location: left
    code-fold: true
    code-summary: "Show the code"
    code-tools: true
---

# Loading Packages - First Steps

```{r load_packages, include=FALSE}


require(tidyverse);
require(sf); 
require(mapview); 
require(magrittr)

#Loading the Packages

options(repos = c(CRAN = "https://cloud.r-project.org"))

# Load a list of packages. Install them first if they are not available.
# The list of packages to be installed
list.of.packages <- c("sf", "sp", "spatial", "maptools", "rgeos","rgdal",
                      "raster", "grid", "rasterVis",
                      "tidyverse", "magrittr", "ggpubr", "lubridate",
                      "devtools", "htmlwidgets", "mapview",
                      "classInt", "RColorBrewer", "ggmap", "tmap", "leaflet", "mapview",
                      "ggrepel", "ggsn",
                      "spdep","spatialreg","GWmodel");

# Check out the packages that have not been installed yet.
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]

# Install those missing packages first. It could take a long time for the first time.
if(length(new.packages)>0) install.packages(new.packages)

# Load all packages.

lapply(list.of.packages,function(x) {
  require(x,character.only = TRUE,quietly = TRUE)
})

install.packages("sf")     # run once if not installed
install.packages("tidyverse")

library(sf)
library(tidyverse)

Task 1

```{r Task 1, include=TRUE}

Task 1

read the file

zip_sf <- st_read(“Zip_Code_040114.shp”)

#clean the data

zip_nyc <- zip_sf %>% filter(!is.na(ZIPCODE))

nyc_zip_sf <- st_as_sf(zip_nyc,crs = 4326)

create fancy plots

plot(nyc_zip_sf)

figure out structure

str(nyc_zip_sf)

Read the COVID data for one week

covid_data <- readr::read_csv(“tests-by-zcta_2021_04_23.csv”, lazy = FALSE) str(covid_data)

Merge Zip Code and COVID Data

nyc_covid_data_sf_merged <- base::merge(nyc_zip_sf, covid_data, by.x = “ZIPCODE”, by.y = “MODIFIED_ZCTA”) names(nyc_covid_data_sf_merged)


# Task 2 


```{r Task 2, include=TRUE}

# Aggregate by ZIP code 
# Read the CSV file 
nys_retail <- read_csv(
  "nys_retail_food_store_xy.csv",
  locale = locale(encoding = "Latin1"),
  lazy = FALSE
)

# Let's check column names
names(nys_retail)
names(nyc_zip_sf)

# Rename

nys_retail <- nys_retail %>%
  rename("Zip Code" = "Zip.Code")

# Then remove NA values

nys_retail <- nys_retail %>%
  drop_na("X", "Y")  # replace X/Y with your lon/lat column names


# Convert points CSV to sf object using their coordinates
nys_retail_sf <- st_as_sf(
  nys_retail,
  coords = c("X", "Y"),   # replace with your actual lon/lat column names
  crs = 4326              # WGS84
)

# Making sure everything has the same coordinates
nyc_zip_sf <- st_transform(nyc_zip_sf, st_crs(nys_retail_sf))

# Spatial join - joining zip codes
nyc_food_stores <- st_join(nys_retail_sf, nyc_zip_sf)

# Aggregate stores by ZIP code
zip_summary_sf <- nyc_food_stores %>%
  group_by("Zip Code") %>%   # use bare column name, not quotes
  summarise(store_count = n(), .groups = "drop")  # counts stores per ZIP

# 7. See what happened
head(zip_summary_sf)
names(zip_summary_sf)
st_crs(zip_summary_sf)

Task 3

```{r Task 3, include=TRUE}

Read the Health Facilities

nyc_health <- readr::read_csv(“NYS_Health_Facility.csv”, lazy = FALSE) str(nyc_health)

First get rid of NA values

nyc_health_sf_no_na <- nyc_health %>% dplyr::filter( !is.na(Facility Longitude), !is.na(Facility Latitude) )

Convert to sf

nyc_health_sf <- nyc_health_sf_no_na %>% sf::st_as_sf(coords = c(“Facility Longitude”, “Facility Latitude”), crs = 4326)

Make sure coordinate systems match

nyc_zip_sf <- sf::st_transform(nyc_zip_sf, sf::st_crs(nyc_health_sf))

Spatial join: Perform

nyc_health_care_centers <- sf::st_join(nyc_health_sf, nyc_zip_sf) %>% dplyr::filter(!is.na(“Zip Code”)) # Check names of the health care centers names(nyc_health_care_centers)

# Task 4 

```{r Task 4, include=TRUE}

# Read the Census Tract Data


nycCensus <- sf::st_read('nyc_census_tracts.shp',
                         stringsAsFactors = FALSE)
str(nycCensus)

names(nycCensus)


# We must now assign borough names to each borough code so we can 
# have the data sorted by borough

nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  COUNTYFP == 'Bronx' ~ '005',
  COUNTYFP == 'Brooklyn' ~ '047',
  COUNTYFP == 'Manhattan' ~ '061',
  COUNTYFP == 'Queens' ~ '081',
  COUNTYFP == 'Staten Island' ~ '085'),
)

Task 5

acsData %>% magrittr::extract(1:10,)

names(acsData)

Merge (JOIN) ACS data to the census tracts

join by attributes /columns

popData <- nycCensus %>% left_join(acsData, by = “GEOID”)

popNYC <- sf::st_transform(popData, sf::st_crs(nyc_covid_data_sf_merged))

verify the data: check sum and structure

sum(popData$totPop)

str(popData)

st_crs(popData) popNYC <- sf::st_transform(popData, st_crs(nyc_covid_data_sf_merged))

Use JOINED zip code data from task 1.

Now aggregate to the zip code level

Join by locations with st_join (spatial join)

names(nyc_covid_data_sf_merged)

Check and verify the data again

sum(popNYC$totPop, na.rm = T)

:::