The NYC postal ZIP code shapefile was read into R via
st_read() (which imports vector spatial data as an
sf object). I made a simple geometry plot to verify that
all data (CRS, bounding box, and attributes) were loaded correctly…
zip_sf <- st_read("data/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source
## `/home/gamfeld/Hunter/S26/GTECH-385/q7/proj/data/ZIP_CODE_040114/ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
class(zip_sf)
## [1] "sf" "data.frame"
st_crs(zip_sf)
## Coordinate Reference System:
## User input: NAD83 / New York Long Island (ftUS)
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["Engineering survey, topographic mapping."],
## AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
st_bbox(zip_sf)
## xmin ymin xmax ymax
## 913129.0 120020.9 1067494.3 272710.9
names(zip_sf)
## [1] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" "AREA"
## [6] "STATE" "COUNTY" "ST_FIPS" "CTY_FIPS" "URL"
## [11] "SHAPE_AREA" "SHAPE_LEN" "geometry"
plot(st_geometry(zip_sf), main = "NYC ZIP Code Polygons")
The NYS health facilities spreadsheet was read into R as a table,
then converted into an sf point object using its longitude
and latitude fields. The data was then filtered to just focus on the NYC
metro area.
health_df <- read_csv("data/NYS_Health_Facility.csv", show_col_types = FALSE)
health_sf <- health_df %>%
filter(
between(`Facility Longitude`, -80.5, -71.5),
between(`Facility Latitude`, 40.3, 45.2)
) %>%
st_as_sf(coords = c("Facility Longitude", "Facility Latitude"),
crs = 4326, remove = FALSE)
plot(st_geometry(health_sf), main = "Health Facilities (NYS)")
Focusing on just the NYC metro:
health_sf_nyc <- st_join(health_sf, st_transform(zip_sf, 4326), left = FALSE)
plot(st_geometry(st_transform(zip_sf, 4326)), col = NA, border = "grey", main = "Health Facilities (NYC Only)")
plot(st_geometry(health_sf_nyc), add = TRUE, pch = 16, cex = 0.5)
### Task 3: NYS Food Retail stores
Same as before, the NYS food retail dataset was red into R and geographic coordinates were extracted from the Location field. These were then parsed into latitude and longitude values, filtered to valid ranges, and converted into an sf point object using st_as_sf(), across both NYS and the NYC metro.
food_df <- read_csv("data/NYS_Retail_Food_Stores.csv", show_col_types = FALSE)
food_sf <- food_df %>%
mutate(
coords = str_extract(Location, "\\([^)]+\\)"),
coords = str_remove_all(coords, "[()]"),
lat = as.numeric(str_split_fixed(coords, ",\\s*", 2)[,1]),
lon = as.numeric(str_split_fixed(coords, ",\\s*", 2)[,2])
) %>%
filter(between(lon, -80.5, -71.5), between(lat, 40.3, 45.2)) %>%
st_as_sf(coords = c("lon", "lat"), crs = 4326, remove = FALSE)
plot(st_geometry(food_sf), main = "Retail Food Stores (Filtered NYS)")
(again, for NYC metro overlay):
food_sf_nyc <- st_join(food_sf, st_transform(zip_sf, 4326), left = FALSE)
plot(st_geometry(st_transform(zip_sf, 4326)), col = NA, border = "grey",
main = "Retail Food Stores (NYC Only)")
plot(st_geometry(food_sf_nyc), add = TRUE, pch = 16, cex = 0.35)
To wrap up, the three sf datasets have to be checked to confirm that their locations are reasonable and that the NYC subsets align with the ZIP code polygons (mapview is used to create a simple interactive map to confirm location). The zipcodes layer was coded as grey, health facilities as red, and retail stores blue.
zip_mv <- st_transform(zip_sf, 4326)
health_mv <- health_sf_nyc %>%
select(`Facility Name`, `Facility City`, `Facility County`, geometry)
food_mv <- food_sf_nyc %>%
select(`Entity Name`, City, County, `Zip Code`, geometry)
mapview(zip_mv, layer.name = "NYC ZIP Codes", col.regions = "grey") +
mapview(health_mv, layer.name = "Health Facilities", col.regions = "red") +
mapview(food_mv, layer.name = "Retail Food Stores", col.regions = "blue")