load("~/Spring25/R Analysis/Module 3/Week_07/nyc_spatial_data.RData")
covid_data <- read_csv("~/Spring25/R Analysis/Module 3/Week_08/Data/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv")
## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(covid_data) # Check the structure
## # A tibble: 6 × 13
## MODIFIED_ZCTA NEIGHBORHOOD_NAME BOROUGH_GROUP label lat lon
## <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 10001 Chelsea/NoMad/West Chelsea Manhattan 1000… 40.8 -74.0
## 2 10002 Chinatown/Lower East Side Manhattan 10002 40.7 -74.0
## 3 10003 East Village/Gramercy/Greenwich… Manhattan 10003 40.7 -74.0
## 4 10004 Financial District Manhattan 10004 40.7 -74.0
## 5 10005 Financial District Manhattan 10005 40.7 -74.0
## 6 10006 Financial District Manhattan 10006 40.7 -74.0
## # ℹ 7 more variables: COVID_CASE_COUNT <dbl>, COVID_CASE_RATE <dbl>,
## # POP_DENOMINATOR <dbl>, COVID_DEATH_COUNT <dbl>, COVID_DEATH_RATE <dbl>,
## # PERCENT_POSITIVE <dbl>, TOTAL_COVID_TESTS <dbl>
colnames(covid_data) # Print all column names
## [1] "MODIFIED_ZCTA" "NEIGHBORHOOD_NAME" "BOROUGH_GROUP"
## [4] "label" "lat" "lon"
## [7] "COVID_CASE_COUNT" "COVID_CASE_RATE" "POP_DENOMINATOR"
## [10] "COVID_DEATH_COUNT" "COVID_DEATH_RATE" "PERCENT_POSITIVE"
## [13] "TOTAL_COVID_TESTS"
covid_data <- covid_data %>%
rename(ZIPCODE = MODIFIED_ZCTA) %>%
mutate(ZIPCODE = as.character(ZIPCODE))
st_crs(nyc_zip_codes)
## Coordinate Reference System:
## User input: NAD83 / New York Long Island (ftUS)
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US survey foot)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["Engineering survey, topographic mapping."],
## AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
st_crs(health_facilities)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## MEMBER["World Geodetic System 1984 (G2139)"],
## MEMBER["World Geodetic System 1984 (G2296)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
st_crs(retail_food_stores)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## MEMBER["World Geodetic System 1984 (G2139)"],
## MEMBER["World Geodetic System 1984 (G2296)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
# Reproject all objects to the same CRS
nyc_zip_codes <- st_transform(nyc_zip_codes, crs = 4326)
health_facilities <- st_transform(health_facilities, crs = 4326)
retail_food_stores <- st_transform(retail_food_stores, crs = 4326)
health_count <- health_facilities %>%
st_join(nyc_zip_codes, join = st_contains) %>%
group_by(ZIPCODE) %>%
summarise(HealthFacilitiesNum = n())
food_count <- retail_food_stores %>%
st_join(nyc_zip_codes, join = st_contains) %>%
group_by(ZIPCODE) %>%
summarise(FoodStoreNum = n())
# Drop geometry to create regular data frames for merging
health_count <- st_drop_geometry(health_count)
food_count <- st_drop_geometry(food_count)
final_sf <- nyc_zip_codes %>%
left_join(covid_data, by = "ZIPCODE")
final_sf <- final_sf %>%
left_join(health_count, by = "ZIPCODE") %>%
left_join(food_count, by = "ZIPCODE")
head(final_sf)
## Simple feature collection with 6 features and 26 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -73.99193 ymin: 40.63029 xmax: -73.78805 ymax: 40.6863
## Geodetic CRS: WGS 84
## ZIPCODE BLDGZIP PO_NAME POPULATION AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1 11436 0 Jamaica 18681 22699295 NY Queens 36 081
## 2 11213 0 Brooklyn 62426 29631004 NY Kings 36 047
## 3 11212 0 Brooklyn 83866 41972104 NY Kings 36 047
## 4 11225 0 Brooklyn 56527 23698630 NY Kings 36 047
## 5 11218 0 Brooklyn 72280 36868799 NY Kings 36 047
## 6 11226 0 Brooklyn 106132 39408598 NY Kings 36 047
## URL SHAPE_AREA SHAPE_LEN
## 1 http://www.usps.com/ 0 0
## 2 http://www.usps.com/ 0 0
## 3 http://www.usps.com/ 0 0
## 4 http://www.usps.com/ 0 0
## 5 http://www.usps.com/ 0 0
## 6 http://www.usps.com/ 0 0
## NEIGHBORHOOD_NAME BOROUGH_GROUP label lat
## 1 South Jamaica/South Ozone Park Queens 11436 40.67582
## 2 Crown Heights (East) Brooklyn 11213 40.67107
## 3 Ocean Hill-Brownsville Brooklyn 11212 40.66293
## 4 Crown Heights (West)/Prospect Lefferts Gardens Brooklyn 11225 40.66306
## 5 Kensington/Windsor Terrace Brooklyn 11218 40.64348
## 6 Flatbush/Prospect Lefferts Gardens Brooklyn 11226 40.64646
## lon COVID_CASE_COUNT COVID_CASE_RATE POP_DENOMINATOR COVID_DEATH_COUNT
## 1 -73.79662 1888 9419.96 20042.54 64
## 2 -73.93633 5166 7996.75 64601.26 203
## 3 -73.91301 7182 9709.74 73966.99 330
## 4 -73.95423 3833 6664.50 57513.69 177
## 5 -73.97604 6199 8377.49 73995.92 218
## 6 -73.95665 7279 7476.75 97355.08 368
## COVID_DEATH_RATE PERCENT_POSITIVE TOTAL_COVID_TESTS HealthFacilitiesNum
## 1 319.32 17.57 11082 NA
## 2 314.24 13.72 38560 NA
## 3 446.14 15.64 47319 NA
## 4 307.75 11.62 33709 NA
## 5 294.61 13.93 45884 NA
## 6 378.00 13.33 56287 NA
## FoodStoreNum geometry
## 1 NA POLYGON ((-73.80585 40.6829...
## 2 NA POLYGON ((-73.9374 40.67973...
## 3 NA POLYGON ((-73.90294 40.6708...
## 4 NA POLYGON ((-73.95797 40.6706...
## 5 NA POLYGON ((-73.97208 40.6506...
## 6 NA POLYGON ((-73.9619 40.65487...
summary(final_sf)
## ZIPCODE BLDGZIP PO_NAME POPULATION
## Length:263 Length:263 Length:263 Min. : 0.0
## Class :character Class :character Class :character 1st Qu.: 49.5
## Mode :character Mode :character Mode :character Median : 27985.0
## Mean : 31933.9
## 3rd Qu.: 54445.0
## Max. :109069.0
##
## AREA STATE COUNTY ST_FIPS
## Min. : 3155 Length:263 Length:263 Length:263
## 1st Qu.: 964323 Class :character Class :character Class :character
## Median : 21927545 Mode :character Mode :character Mode :character
## Mean : 31816554
## 3rd Qu.: 45935567
## Max. :473985727
##
## CTY_FIPS URL SHAPE_AREA SHAPE_LEN
## Length:263 Length:263 Min. :0 Min. :0
## Class :character Class :character 1st Qu.:0 1st Qu.:0
## Mode :character Mode :character Median :0 Median :0
## Mean :0 Mean :0
## 3rd Qu.:0 3rd Qu.:0
## Max. :0 Max. :0
##
## NEIGHBORHOOD_NAME BOROUGH_GROUP label lat
## Length:263 Length:263 Length:263 Min. :40.51
## Class :character Class :character Class :character 1st Qu.:40.67
## Mode :character Mode :character Mode :character Median :40.73
## Mean :40.73
## 3rd Qu.:40.78
## Max. :40.90
## NA's :74
## lon COVID_CASE_COUNT COVID_CASE_RATE POP_DENOMINATOR
## Min. :-74.24 Min. : 164 Min. : 3413 Min. : 2972
## 1st Qu.:-73.98 1st Qu.: 1842 1st Qu.: 6776 1st Qu.: 25437
## Median :-73.92 Median : 3172 Median : 8623 Median : 40871
## Mean :-73.92 Mean : 3957 Mean : 8522 Mean : 45326
## 3rd Qu.:-73.85 3rd Qu.: 5673 3rd Qu.:10269 3rd Qu.: 65687
## Max. :-73.71 Max. :11581 Max. :16212 Max. :110370
## NA's :74 NA's :74 NA's :74 NA's :74
## COVID_DEATH_COUNT COVID_DEATH_RATE PERCENT_POSITIVE TOTAL_COVID_TESTS
## Min. : 0.0 Min. : 0.0 Min. : 5.50 Min. : 2441
## 1st Qu.: 59.0 1st Qu.:206.5 1st Qu.:11.21 1st Qu.:16429
## Median :115.0 Median :297.4 Median :15.03 Median :26382
## Mean :145.7 Mean :299.1 Mean :13.79 Mean :28704
## 3rd Qu.:219.0 3rd Qu.:372.7 3rd Qu.:16.87 3rd Qu.:41394
## Max. :518.0 Max. :944.5 Max. :21.10 Max. :72559
## NA's :74 NA's :74 NA's :74 NA's :74
## HealthFacilitiesNum FoodStoreNum geometry
## Min. : NA Min. : NA POLYGON :263
## 1st Qu.: NA 1st Qu.: NA epsg:4326 : 0
## Median : NA Median : NA +proj=long...: 0
## Mean :NaN Mean :NaN
## 3rd Qu.: NA 3rd Qu.: NA
## Max. : NA Max. : NA
## NA's :263 NA's :263
plot(final_sf["COVID_CASE_COUNT"], breaks = "jenks", main = "COVID Cases by ZIP Code")