Spatial Joins! Super fun and cool :)
Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).
#Reading in files
nyczip <- st_read("data/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source
## `/Users/joelbeckwith/Documents/Hunter_College/RVizGeospatial/R-spatial/data/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
# str(nyczip)
covidcsv <- readr::read_csv("data/R-Spatial_II_Lab/tests-by-zcta_2020_04_12.csv", lazy = FALSE)
## Rows: 178 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): MODZCTA, Positive, Total, zcta_cum.perc_pos
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# str(covidcsv)
# The y will be MODZCTA, x will be ZIPCODE
#joining files
covidzip_merged <- base::merge(nyczip, covidcsv, by.x = "ZIPCODE", by.y = "MODZCTA")
str(covidzip_merged)
## Classes 'sf' and 'data.frame': 189 obs. of 16 variables:
## $ ZIPCODE : chr "10001" "10002" "10003" "10004" ...
## $ BLDGZIP : chr "0" "0" "0" "0" ...
## $ PO_NAME : chr "New York" "New York" "New York" "New York" ...
## $ POPULATION : num 22413 81305 55878 2187 2187 ...
## $ AREA : num 17794941 26280129 15538376 670708 4002521 ...
## $ STATE : chr "NY" "NY" "NY" "NY" ...
## $ COUNTY : chr "New York" "New York" "New York" "New York" ...
## $ ST_FIPS : chr "36" "36" "36" "36" ...
## $ CTY_FIPS : chr "061" "061" "061" "061" ...
## $ URL : chr "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
## $ SHAPE_AREA : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SHAPE_LEN : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Positive : num 211 539 279 23 23 23 23 38 10 34 ...
## $ Total : num 448 1024 662 59 59 ...
## $ zcta_cum.perc_pos: num 47.1 52.6 42.1 39 39 ...
## $ geometry :sfc_POLYGON of length 189; first list element: List of 1
## ..$ : num [1:110, 1:2] 981959 981980 981988 981993 982052 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:15] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
#success!
Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.
#reading in, defining, and cleaning food retail data
nysretfd <- readr::read_csv("data/R-Spatial_I_Lab/nys_retail_food_store_xy.csv",
show_col_types= FALSE,
lazy= FALSE,
locale = readr::locale(encoding = "latin1"))
nycretfd_clean = nysretfd %>%
filter(`ï..County` %in% c("Richmond","Kings","Queens","Bronx","New York"),
!is.na(`X`), !is.na(`Y`))
nycretfd_sf <- st_as_sf(nycretfd_clean,
coords = c("X","Y"),
crs = 4326)
#joining and aggregating
covidzip_merged <- covidzip_merged %>% sf::st_transform(4326)
stores_covidmerge <- sf::st_join(covidzip_merged, nycretfd_sf, join = st_contains)
str(stores_covidmerge)
## Classes 'sf' and 'data.frame': 11258 obs. of 32 variables:
## $ ZIPCODE : chr "10001" "10001" "10001" "10001" ...
## $ BLDGZIP : chr "0" "0" "0" "0" ...
## $ PO_NAME : chr "New York" "New York" "New York" "New York" ...
## $ POPULATION : num 22413 22413 22413 22413 22413 ...
## $ AREA : num 17794941 17794941 17794941 17794941 17794941 ...
## $ STATE : chr "NY" "NY" "NY" "NY" ...
## $ COUNTY : chr "New York" "New York" "New York" "New York" ...
## $ ST_FIPS : chr "36" "36" "36" "36" ...
## $ CTY_FIPS : chr "061" "061" "061" "061" ...
## $ URL : chr "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
## $ SHAPE_AREA : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SHAPE_LEN : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Positive : num 211 211 211 211 211 211 211 211 211 211 ...
## $ Total : num 448 448 448 448 448 448 448 448 448 448 ...
## $ zcta_cum.perc_pos : num 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
## $ ï..County : chr "New York" "New York" "New York" "New York" ...
## $ License.Number : num 721694 629278 709754 713531 719005 ...
## $ Operation.Type : chr "Store" "Store" "Store" "Store" ...
## $ Establishment.Type: chr "JAC" "JAC" "JAC" "JAC" ...
## $ Entity.Name : chr "27 BROADWAY DELI INC" "5TH AVENUE WALI INC" "MASSOLA MANAGEMENT 368 CORP" "302 CHELSEA FOODS INC" ...
## $ DBA.Name : chr "27 BROADWAY DELI INC" "7-ELEVEN STORE 35300A" "7 ELEVEN STORE #35621A" "7-ELEVEN STORE #35630A" ...
## $ Street.Number : chr "1158" "224" "368" "302" ...
## $ Street.Name : chr "BROADWAY" "5TH AVE" "8TH AVE" "8TH AVE" ...
## $ Address.Line.2 : logi NA NA NA NA NA NA ...
## $ Address.Line.3 : logi NA NA NA NA NA NA ...
## $ City : chr "NEW YORK" "NEW YORK" "NEW YORK" "NEW YORK" ...
## $ State : chr "NY" "NY" "NY" "NY" ...
## $ Zip.Code : num 10001 10001 10001 10001 10001 ...
## $ Square.Footage : num 300 1500 18000 200 1800 600 500 210 900 1200 ...
## $ Location : chr "1158 BROADWAY\r\nNEW YORK, NY 10001\r\n(40.744597, -73.988869)" "224 5TH AVE\r\nNEW YORK, NY 10001\r\n(40.743731, -73.987994)" "368 8TH AVE\r\nNEW YORK, NY 10001\r\n(40.748759, -73.995946)" "302 8TH AVE\r\nNEW YORK, NY 10001\r\n(40.746712, -73.997438)" ...
## $ Coords : chr "40.744597, -73.988869" "40.743731, -73.987994" "40.748759, -73.995946" "40.746712, -73.997438" ...
## $ geometry :sfc_POLYGON of length 11258; first list element: List of 1
## ..$ : num [1:110, 1:2] -74 -74 -74 -74 -74 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:31] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
stores_agg <- stores_covidmerge %>%
filter(`Establishment.Type` %in% c("JAC")) %>%
group_by(ZIPCODE) %>%
summarize(numstores = n())
#plotting
stores_agg %>% magrittr::extract('numstores') %>%
plot(breaks = "jenks", main = "Number of Food Retail Stores per Zip Code")
#success!
Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.
#reading in, defining, cleaning health facility data
nyshealth <- readr::read_csv("data/R-Spatial_I_Lab/NYS_Health_Facility.csv",
show_col_types = FALSE,
lazy = FALSE)
nychealth_clean <- nyshealth %>%
filter(!is.na(`Facility Longitude`), !is.na(`Facility Latitude`),`Regional Office` == "Metropolitan Area Regional Office - New York City",!`Facility ID` %in% c(10383, 10324,10306))
nychealth_sf <- st_as_sf(nychealth_clean,
coords = c("Facility Longitude","Facility Latitude"),
crs = 4326)
#joining and aggregating
nycdata_merge <- sf::st_join(stores_covidmerge, nychealth_sf, join = st_contains)
str(nycdata_merge)
## Classes 'sf' and 'data.frame': 125595 obs. of 66 variables:
## $ ZIPCODE : chr "10001" "10001" "10001" "10001" ...
## $ BLDGZIP : chr "0" "0" "0" "0" ...
## $ PO_NAME : chr "New York" "New York" "New York" "New York" ...
## $ POPULATION : num 22413 22413 22413 22413 22413 ...
## $ AREA : num 17794941 17794941 17794941 17794941 17794941 ...
## $ STATE : chr "NY" "NY" "NY" "NY" ...
## $ COUNTY : chr "New York" "New York" "New York" "New York" ...
## $ ST_FIPS : chr "36" "36" "36" "36" ...
## $ CTY_FIPS : chr "061" "061" "061" "061" ...
## $ URL : chr "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
## $ SHAPE_AREA : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SHAPE_LEN : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Positive : num 211 211 211 211 211 211 211 211 211 211 ...
## $ Total : num 448 448 448 448 448 448 448 448 448 448 ...
## $ zcta_cum.perc_pos : num 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
## $ ï..County : chr "New York" "New York" "New York" "New York" ...
## $ License.Number : num 721694 721694 721694 721694 721694 ...
## $ Operation.Type : chr "Store" "Store" "Store" "Store" ...
## $ Establishment.Type : chr "JAC" "JAC" "JAC" "JAC" ...
## $ Entity.Name : chr "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" ...
## $ DBA.Name : chr "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" ...
## $ Street.Number : chr "1158" "1158" "1158" "1158" ...
## $ Street.Name : chr "BROADWAY" "BROADWAY" "BROADWAY" "BROADWAY" ...
## $ Address.Line.2 : logi NA NA NA NA NA NA ...
## $ Address.Line.3 : logi NA NA NA NA NA NA ...
## $ City : chr "NEW YORK" "NEW YORK" "NEW YORK" "NEW YORK" ...
## $ State : chr "NY" "NY" "NY" "NY" ...
## $ Zip.Code : num 10001 10001 10001 10001 10001 ...
## $ Square.Footage : num 300 300 300 300 300 300 300 300 300 300 ...
## $ Location : chr "1158 BROADWAY\r\nNEW YORK, NY 10001\r\n(40.744597, -73.988869)" "1158 BROADWAY\r\nNEW YORK, NY 10001\r\n(40.744597, -73.988869)" "1158 BROADWAY\r\nNEW YORK, NY 10001\r\n(40.744597, -73.988869)" "1158 BROADWAY\r\nNEW YORK, NY 10001\r\n(40.744597, -73.988869)" ...
## $ Coords : chr "40.744597, -73.988869" "40.744597, -73.988869" "40.744597, -73.988869" "40.744597, -73.988869" ...
## $ Facility ID : num 1523 1536 9565 9173 7057 ...
## $ Facility Name : chr "Union Health Center-ILGWU" "Lower Manhattan Health District" "The High School of Fashion Industries" "Mount Sinai Comprehensive Health Program - Downtown" ...
## $ Short Description : chr "DTC" "DTC-EC" NA "HOSP-EC" ...
## $ Description : chr "Diagnostic and Treatment Center" "Diagnostic and Treatment Center Extension Clinic" "School Based Diagnostic and Treatment Center Extension Clinic" "Hospital Extension Clinic" ...
## $ Facility Open Date : chr "01/01/1901" "09/27/1979" "04/10/2013" "05/29/2010" ...
## $ Facility Address 1 : chr "275 Seventh Avenue" "303 9th Avenue" "225 W 24th Street" "275 7th Avenue" ...
## $ Facility Address 2 : chr NA NA NA NA ...
## $ Facility City : chr "New York" "New York" "New York" "New York" ...
## $ Facility State : chr "New York" "New York" "New York" "New York" ...
## $ Facility Zip Code : chr "10001" "10001" "10011" "10001-6708" ...
## $ Facility Phone Number : num 2.13e+09 2.12e+09 2.13e+09 2.13e+09 2.13e+09 ...
## $ Facility Fax Number : num NA NA NA 2.13e+09 NA ...
## $ Facility Website : chr NA NA NA NA ...
## $ Facility County Code : num 7093 7093 7093 7093 7093 ...
## $ Facility County : chr "New York" "New York" "New York" "New York" ...
## $ Regional Office ID : num 5 5 5 5 5 5 5 5 5 5 ...
## $ Regional Office : chr "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" ...
## $ Main Site Name : chr NA "New York City Department of Health and Mental Hygiene" "Family Health Center of Harlem" "Mount Sinai Hospital" ...
## $ Main Site Facility ID : num NA 1339 9375 1456 NA ...
## $ Operating Certificate Number: chr "7002231R" "7002112R" "7002137R" "7002024H" ...
## $ Operator Name : chr "Union Sanitorium Association Inc" "City of New York" "Institute for Family Health, Inc." "The Mount Sinai Hospital" ...
## $ Operator Address 1 : chr "275 7th Avenue" "250 Church Street" "16 East 16th Street" "One Gustave L Levy Place" ...
## $ Operator Address 2 : chr NA NA NA NA ...
## $ Operator City : chr "New York" "New York" "New York" "New York" ...
## $ Operator State : chr "New York" "New York" "New York" "New York" ...
## $ Operator Zip Code : chr "10001" "10013" "10003" "10029" ...
## $ Cooperator Name : chr NA NA NA "Mount Sinai Hospitals Group, Inc." ...
## $ Cooperator Address : chr NA NA NA "One Gustave L. Levy Place" ...
## $ Cooperator Address 2 : chr NA NA NA NA ...
## $ Cooperator City : chr NA NA NA "New York" ...
## $ Cooperator State : chr "New York" "New York" "New York" "New York" ...
## $ Cooperator Zip Code : chr NA NA NA "10029" ...
## $ Ownership Type : chr "Not for Profit Corporation" "Municipality" "Not for Profit Corporation" "Not for Profit Corporation" ...
## $ Facility Location : chr "(40.745678, -73.994385)" "(40.749596, -73.999138)" "(40.745266, -73.996231)" "(40.745678, -73.994385)" ...
## $ geometry :sfc_POLYGON of length 125595; first list element: List of 1
## ..$ : num [1:110, 1:2] -74 -74 -74 -74 -74 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:65] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
health_agg <- nycdata_merge %>%
filter(`Short Description` == "NH") %>%
group_by(ZIPCODE) %>%
summarize(nursenum = n())
#plotting
health_agg %>% magrittr::extract('nursenum') %>%
plot(breaks = "jenks", main = "Nursing homes per zip code")
#success!
Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.
#Reading in census tracts
nyccensus <- st_read('data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp')
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/joelbeckwith/Documents/Hunter_College/RVizGeospatial/R-spatial/data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
#editing
nyccensus %<>% dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
#reading in and editing ACD data
acsData <- readLines("data/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E,
asianPop = DP05_0067E, hispanicPop = DP05_0071E,
adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));
acsData %>%
magrittr::extract(1:10,)
## GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1 1400000US36005000100 7080 51 6503 577 1773 4239
## 2 1400000US36005000200 4542 950 2264 2278 2165 1279
## 3 1400000US36005000400 5634 710 2807 2827 2623 1699
## 4 1400000US36005001600 5917 989 2365 3552 2406 2434
## 5 1400000US36005001900 2765 76 1363 1402 585 1041
## 6 1400000US36005002000 9409 977 4119 5290 3185 4487
## 7 1400000US36005002300 4600 648 2175 2425 479 2122
## 8 1400000US36005002400 172 0 121 51 69 89
## 9 1400000US36005002500 5887 548 2958 2929 903 1344
## 10 1400000US36005002701 2868 243 1259 1609 243 987
## asianPop hispanicPop adultPop citizenAdult censusCode
## 1 130 2329 6909 6100 005000100
## 2 119 3367 3582 2952 005000200
## 3 226 3873 4507 4214 005000400
## 4 68 3603 4416 3851 005001600
## 5 130 1413 2008 1787 005001900
## 6 29 5905 6851 6170 005002000
## 7 27 2674 3498 3056 005002300
## 8 14 0 131 42 005002400
## 9 68 4562 4237 2722 005002500
## 10 0 1985 1848 1412 005002701
#merging
popData <- merge(nyccensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')
# verify the data
sum(popData$totPop)
## [1] 8443713
st_crs(popData)
## Coordinate Reference System:
## User input: WGS84(DD)
## wkt:
## GEOGCRS["WGS84(DD)",
## DATUM["WGS84",
## ELLIPSOID["WGS84",6378137,298.257223563,
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["longitude",east,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["latitude",north,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]]]
popNYC <- sf::st_transform(popData, st_crs(covidzip_merged))
popData %>% magrittr::extract('asianPop') %>% plot(breaks='jenks')
This is now correct, however, there is too much data for my mac to handle so commenting the lines out so they can be uploaded.
#NYCdata_full <- sf::st_join(nycdata_merge,
# popNYC %>% sf::st_centroid(),
# join = st_contains) %>%
# group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total) %>%
# summarise(totPop = sum(totPop),
# malePctg = sum(malePop)/totPop*100,
# asianPop = sum(asianPop),
# blackPop = sum(blackPop),
# hispanicPop = sum(hispanicPop),
# whitePop = sum(whitePop))
#NYCdata_full %>% head()
# Check and verify the data again
#sum(NYCdata_full$totPop, na.rm = T)
# See where we have those NAs
#NYCdata_full %>% dplyr::filter(is.na(totPop))
# Looks like we converted census tracts to points,
# so some zip code contains no such points but parts of census tracts are inside the zip code.
# We can either use higher-resolution data (blockgroups, for example) or use areal interpolation.
#
#plot(NYCdata_full["Positive"], breaks='jenks')
#plot(NYCdata_full["asianPop"], breaks='jenks')