# Task 1# read the file zip_sf <-st_read("Zip_Code_040114.shp")
Reading layer `ZIP_CODE_040114' from data source
`/Users/heliosselene/Desktop/R-Spatial/ZIP_CODE_040114.shp'
using driver `ESRI Shapefile'
Simple feature collection with 263 features and 12 fields
Geometry type: POLYGON
Dimension: XY
Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
Projected CRS: NAD83 / New York Long Island (ftUS)
Warning: plotting the first 9 out of 12 attributes; use max.plot = 12 to plot
all
Show the code
# figure out structurestr(nyc_zip_sf)
Classes 'sf' and 'data.frame': 263 obs. of 13 variables:
$ ZIPCODE : chr "11436" "11213" "11212" "11225" ...
$ BLDGZIP : chr "0" "0" "0" "0" ...
$ PO_NAME : chr "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
$ POPULATION: num 18681 62426 83866 56527 72280 ...
$ AREA : num 22699295 29631004 41972104 23698630 36868799 ...
$ STATE : chr "NY" "NY" "NY" "NY" ...
$ COUNTY : chr "Queens" "Kings" "Kings" "Kings" ...
$ ST_FIPS : chr "36" "36" "36" "36" ...
$ CTY_FIPS : chr "081" "047" "047" "047" ...
$ URL : chr "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
$ SHAPE_AREA: num 0 0 0 0 0 0 0 0 0 0 ...
$ SHAPE_LEN : num 0 0 0 0 0 0 0 0 0 0 ...
$ geometry :sfc_POLYGON of length 263; first list element: List of 1
..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
- attr(*, "sf_column")= chr "geometry"
- attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
Show the code
# Read the COVID data for one week covid_data <- readr::read_csv("tests-by-zcta_2021_04_23.csv", lazy =FALSE)
Rows: 177 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Aggregate by ZIP code # Read the CSV file nys_retail <-read_csv("nys_retail_food_store_xy.csv",locale =locale(encoding ="Latin1"),lazy =FALSE)
Rows: 29389 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (11): ï..County, Operation.Type, Establishment.Type, Entity.Name, DBA.Na...
dbl (4): License.Number, Zip.Code, Y, X
num (1): Square.Footage
lgl (2): Address.Line.2, Address.Line.3
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Renamenys_retail <- nys_retail %>%rename("Zip Code"="Zip.Code")# Then remove NA valuesnys_retail <- nys_retail %>%drop_na("X", "Y") # replace X/Y with your lon/lat column names# Convert points CSV to sf object using their coordinatesnys_retail_sf <-st_as_sf( nys_retail,coords =c("X", "Y"), # replace with your actual lon/lat column namescrs =4326# WGS84)# Making sure everything has the same coordinatesnyc_zip_sf <-st_transform(nyc_zip_sf, st_crs(nys_retail_sf))# Spatial join - joining zip codesnyc_food_stores <-st_join(nys_retail_sf, nyc_zip_sf)# Aggregate stores by ZIP codezip_summary_sf <- nyc_food_stores %>%group_by("Zip Code") %>%# use bare column name, not quotessummarise(store_count =n(), .groups ="drop") # counts stores per ZIP# 7. See what happenedhead(zip_summary_sf)
Coordinate Reference System:
User input: EPSG:4326
wkt:
GEOGCRS["WGS 84",
ENSEMBLE["World Geodetic System 1984 ensemble",
MEMBER["World Geodetic System 1984 (Transit)"],
MEMBER["World Geodetic System 1984 (G730)"],
MEMBER["World Geodetic System 1984 (G873)"],
MEMBER["World Geodetic System 1984 (G1150)"],
MEMBER["World Geodetic System 1984 (G1674)"],
MEMBER["World Geodetic System 1984 (G1762)"],
MEMBER["World Geodetic System 1984 (G2139)"],
MEMBER["World Geodetic System 1984 (G2296)"],
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ENSEMBLEACCURACY[2.0]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[
SCOPE["Horizontal component of 3D system."],
AREA["World."],
BBOX[-90,-180,90,180]],
ID["EPSG",4326]]
Task 3
Show the code
# Read the Health Facilitiesnyc_health <- readr::read_csv("NYS_Health_Facility.csv", lazy =FALSE)
Rows: 3990 Columns: 36
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (28): Facility Name, Short Description, Description, Facility Open Date,...
dbl (8): Facility ID, Facility Phone Number, Facility Fax Number, Facility ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Show the code
str(nyc_health)
spc_tbl_ [3,990 × 36] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ Facility ID : num [1:3990] 204 620 654 1156 2589 ...
$ Facility Name : chr [1:3990] "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "Central Park Rehabilitation and Nursing Center" "East Side Nursing Home" ...
$ Short Description : chr [1:3990] "HSPC" "NH" "NH" "NH" ...
$ Description : chr [1:3990] "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
$ Facility Open Date : chr [1:3990] "06/01/1985" "02/01/1989" "02/01/1989" "08/01/1979" ...
$ Facility Address 1 : chr [1:3990] "4102 Old Vestal Road" "2050 Tilden Avenue" "116 Martin Luther King East" "62 Prospect St" ...
$ Facility Address 2 : chr [1:3990] NA NA NA NA ...
$ Facility City : chr [1:3990] "Vestal" "New Hartford" "Syracuse" "Warsaw" ...
$ Facility State : chr [1:3990] "New York" "New York" "New York" "New York" ...
$ Facility Zip Code : chr [1:3990] "13850" "13413" "13205" "14569" ...
$ Facility Phone Number : num [1:3990] 6.08e+09 3.16e+09 3.15e+09 5.86e+09 5.86e+09 ...
$ Facility Fax Number : num [1:3990] NA NA NA NA NA ...
$ Facility Website : chr [1:3990] NA NA NA NA ...
$ Facility County Code : num [1:3990] 3 32 33 60 2 ...
$ Facility County : chr [1:3990] "Broome" "Oneida" "Onondaga" "Wyoming" ...
$ Regional Office ID : num [1:3990] 3 3 3 1 1 1 7 1 7 5 ...
$ Regional Office : chr [1:3990] "Central New York Regional Office" "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" ...
$ Main Site Name : chr [1:3990] NA NA NA NA ...
$ Main Site Facility ID : num [1:3990] NA NA NA NA NA ...
$ Operating Certificate Number: chr [1:3990] "0301501F" "3227304N" "3301326N" "6027303N" ...
$ Operator Name : chr [1:3990] "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "CPRNC, LLC" "East Side Nursing Home Inc" ...
$ Operator Address 1 : chr [1:3990] "169 Riverside Drive" "Box 1000 Tilden Avenue" "116 Martin Luther King East" "62 Prospect Street" ...
$ Operator Address 2 : chr [1:3990] NA NA NA NA ...
$ Operator City : chr [1:3990] "Binghamton" "New Hartford" "Syracuse" "Warsaw" ...
$ Operator State : chr [1:3990] "New York" "New York" "New York" "New York" ...
$ Operator Zip Code : chr [1:3990] "13905" "13413" "13205" "14569" ...
$ Cooperator Name : chr [1:3990] NA NA NA NA ...
$ Cooperator Address : chr [1:3990] NA NA NA NA ...
$ Cooperator Address 2 : chr [1:3990] NA NA NA NA ...
$ Cooperator City : chr [1:3990] NA NA NA NA ...
$ Cooperator State : chr [1:3990] "New York" "New York" "New York" "New York" ...
$ Cooperator Zip Code : chr [1:3990] NA NA NA NA ...
$ Ownership Type : chr [1:3990] "Not for Profit Corporation" "Not for Profit Corporation" "LLC" "Business Corporation" ...
$ Facility Latitude : num [1:3990] 42.1 43.1 NA 42.7 42.1 ...
$ Facility Longitude : num [1:3990] -76 -75.2 NA -78.1 -78 ...
$ Facility Location : chr [1:3990] "(42.097095, -75.975243)" "(43.05497, -75.228828)" NA "(42.738979, -78.12867)" ...
- attr(*, "spec")=
.. cols(
.. `Facility ID` = col_double(),
.. `Facility Name` = col_character(),
.. `Short Description` = col_character(),
.. Description = col_character(),
.. `Facility Open Date` = col_character(),
.. `Facility Address 1` = col_character(),
.. `Facility Address 2` = col_character(),
.. `Facility City` = col_character(),
.. `Facility State` = col_character(),
.. `Facility Zip Code` = col_character(),
.. `Facility Phone Number` = col_double(),
.. `Facility Fax Number` = col_double(),
.. `Facility Website` = col_character(),
.. `Facility County Code` = col_double(),
.. `Facility County` = col_character(),
.. `Regional Office ID` = col_double(),
.. `Regional Office` = col_character(),
.. `Main Site Name` = col_character(),
.. `Main Site Facility ID` = col_double(),
.. `Operating Certificate Number` = col_character(),
.. `Operator Name` = col_character(),
.. `Operator Address 1` = col_character(),
.. `Operator Address 2` = col_character(),
.. `Operator City` = col_character(),
.. `Operator State` = col_character(),
.. `Operator Zip Code` = col_character(),
.. `Cooperator Name` = col_character(),
.. `Cooperator Address` = col_character(),
.. `Cooperator Address 2` = col_character(),
.. `Cooperator City` = col_character(),
.. `Cooperator State` = col_character(),
.. `Cooperator Zip Code` = col_character(),
.. `Ownership Type` = col_character(),
.. `Facility Latitude` = col_double(),
.. `Facility Longitude` = col_double(),
.. `Facility Location` = col_character()
.. )
- attr(*, "problems")=<externalptr>
Show the code
# First get rid of NA valuesnyc_health_sf_no_na <- nyc_health %>% dplyr::filter(!is.na(`Facility Longitude`),!is.na(`Facility Latitude`) )# Convert to sf nyc_health_sf <- nyc_health_sf_no_na %>% sf::st_as_sf(coords =c("Facility Longitude", "Facility Latitude"), crs =4326)# Make sure coordinate systems matchnyc_zip_sf <- sf::st_transform(nyc_zip_sf, sf::st_crs(nyc_health_sf))# Spatial join: Performnyc_health_care_centers <- sf::st_join(nyc_health_sf, nyc_zip_sf) %>% dplyr::filter(!is.na("Zip Code"))# Check names of the health care centersnames(nyc_health_care_centers)
# We must now assign borough names to each borough code so we can # have the data sorted by boroughnycCensus %<>% dplyr::mutate(cntyFIPS =case_when( COUNTYFP =='Bronx'~'005', COUNTYFP =='Brooklyn'~'047', COUNTYFP =='Manhattan'~'061', COUNTYFP =='Queens'~'081', COUNTYFP =='Staten Island'~'085'),)
<!-- -->
::: {.quarto-embedded-source-code}
```````````````````{.markdown shortcodes="false"}
---
title: "R Week 08 Assignment Updated"
author: "Caitlin Cacciatore"
date: "3/26/2026"
format:
html:
toc: true
toc-location: left
code-fold: true
code-summary: "Show the code"
code-tools: true
---
# Loading Packages - First Steps
```{r load_packages, include=FALSE}
require(tidyverse);
require(sf);
require(mapview);
require(magrittr)
#Loading the Packages
options(repos = c(CRAN = "https://cloud.r-project.org"))
# Load a list of packages. Install them first if they are not available.
# The list of packages to be installed
list.of.packages <- c("sf", "sp", "spatial", "maptools", "rgeos","rgdal",
"raster", "grid", "rasterVis",
"tidyverse", "magrittr", "ggpubr", "lubridate",
"devtools", "htmlwidgets", "mapview",
"classInt", "RColorBrewer", "ggmap", "tmap", "leaflet", "mapview",
"ggrepel", "ggsn",
"spdep","spatialreg","GWmodel");
# Check out the packages that have not been installed yet.
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
# Install those missing packages first. It could take a long time for the first time.
if(length(new.packages)>0) install.packages(new.packages)
# Load all packages.
lapply(list.of.packages,function(x) {
require(x,character.only = TRUE,quietly = TRUE)
})
install.packages("sf") # run once if not installed
install.packages("tidyverse")
library(sf)
library(tidyverse)
nyc_health_care_centers <- sf::st_join(nyc_health_sf, nyc_zip_sf) %>% dplyr::filter(!is.na(“Zip Code”)) # Check names of the health care centers names(nyc_health_care_centers)
# Task 4
```{r Task 4, include=TRUE}
# Read the Census Tract Data
nycCensus <- sf::st_read('nyc_census_tracts.shp',
stringsAsFactors = FALSE)
str(nycCensus)
names(nycCensus)
# We must now assign borough names to each borough code so we can
# have the data sorted by borough
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
COUNTYFP == 'Bronx' ~ '005',
COUNTYFP == 'Brooklyn' ~ '047',
COUNTYFP == 'Manhattan' ~ '061',
COUNTYFP == 'Queens' ~ '081',
COUNTYFP == 'Staten Island' ~ '085'),
)