Code Setup

R Spatial Lab Assignment # 8

Spatial Joins! Super fun and cool :)

task 1:

Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

#Reading in files
nyczip <- st_read("data/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source 
##   `/Users/joelbeckwith/Documents/Hunter_College/RVizGeospatial/R-spatial/data/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
# str(nyczip)
covidcsv <- readr::read_csv("data/R-Spatial_II_Lab/tests-by-zcta_2020_04_12.csv", lazy = FALSE)
## Rows: 178 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): MODZCTA, Positive, Total, zcta_cum.perc_pos
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# str(covidcsv)
# The y will be MODZCTA, x will be ZIPCODE

#joining files
covidzip_merged <- base::merge(nyczip, covidcsv, by.x = "ZIPCODE", by.y = "MODZCTA")
str(covidzip_merged)
## Classes 'sf' and 'data.frame':   189 obs. of  16 variables:
##  $ ZIPCODE          : chr  "10001" "10002" "10003" "10004" ...
##  $ BLDGZIP          : chr  "0" "0" "0" "0" ...
##  $ PO_NAME          : chr  "New York" "New York" "New York" "New York" ...
##  $ POPULATION       : num  22413 81305 55878 2187 2187 ...
##  $ AREA             : num  17794941 26280129 15538376 670708 4002521 ...
##  $ STATE            : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY           : chr  "New York" "New York" "New York" "New York" ...
##  $ ST_FIPS          : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS         : chr  "061" "061" "061" "061" ...
##  $ URL              : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Positive         : num  211 539 279 23 23 23 23 38 10 34 ...
##  $ Total            : num  448 1024 662 59 59 ...
##  $ zcta_cum.perc_pos: num  47.1 52.6 42.1 39 39 ...
##  $ geometry         :sfc_POLYGON of length 189; first list element: List of 1
##   ..$ : num [1:110, 1:2] 981959 981980 981988 981993 982052 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:15] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
#success!

task 2:

Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

#reading in, defining, and cleaning food retail data
nysretfd <- readr::read_csv("data/R-Spatial_I_Lab/nys_retail_food_store_xy.csv",
                            show_col_types= FALSE,
                            lazy= FALSE,
                            locale = readr::locale(encoding = "latin1"))

nycretfd_clean = nysretfd %>%
  filter(`ï..County` %in% c("Richmond","Kings","Queens","Bronx","New York"),
         !is.na(`X`), !is.na(`Y`))

nycretfd_sf <- st_as_sf(nycretfd_clean,
                        coords = c("X","Y"),
                        crs = 4326)

#joining and aggregating
covidzip_merged <- covidzip_merged %>% sf::st_transform(4326)

stores_covidmerge <- sf::st_join(covidzip_merged, nycretfd_sf, join = st_contains)
str(stores_covidmerge)
## Classes 'sf' and 'data.frame':   11258 obs. of  32 variables:
##  $ ZIPCODE           : chr  "10001" "10001" "10001" "10001" ...
##  $ BLDGZIP           : chr  "0" "0" "0" "0" ...
##  $ PO_NAME           : chr  "New York" "New York" "New York" "New York" ...
##  $ POPULATION        : num  22413 22413 22413 22413 22413 ...
##  $ AREA              : num  17794941 17794941 17794941 17794941 17794941 ...
##  $ STATE             : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY            : chr  "New York" "New York" "New York" "New York" ...
##  $ ST_FIPS           : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS          : chr  "061" "061" "061" "061" ...
##  $ URL               : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Positive          : num  211 211 211 211 211 211 211 211 211 211 ...
##  $ Total             : num  448 448 448 448 448 448 448 448 448 448 ...
##  $ zcta_cum.perc_pos : num  47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
##  $ ï..County         : chr  "New York" "New York" "New York" "New York" ...
##  $ License.Number    : num  721694 629278 709754 713531 719005 ...
##  $ Operation.Type    : chr  "Store" "Store" "Store" "Store" ...
##  $ Establishment.Type: chr  "JAC" "JAC" "JAC" "JAC" ...
##  $ Entity.Name       : chr  "27 BROADWAY DELI INC" "5TH AVENUE WALI INC" "MASSOLA MANAGEMENT 368 CORP" "302 CHELSEA FOODS INC" ...
##  $ DBA.Name          : chr  "27 BROADWAY DELI INC" "7-ELEVEN STORE 35300A" "7 ELEVEN STORE #35621A" "7-ELEVEN STORE #35630A" ...
##  $ Street.Number     : chr  "1158" "224" "368" "302" ...
##  $ Street.Name       : chr  "BROADWAY" "5TH AVE" "8TH AVE" "8TH AVE" ...
##  $ Address.Line.2    : logi  NA NA NA NA NA NA ...
##  $ Address.Line.3    : logi  NA NA NA NA NA NA ...
##  $ City              : chr  "NEW YORK" "NEW YORK" "NEW YORK" "NEW YORK" ...
##  $ State             : chr  "NY" "NY" "NY" "NY" ...
##  $ Zip.Code          : num  10001 10001 10001 10001 10001 ...
##  $ Square.Footage    : num  300 1500 18000 200 1800 600 500 210 900 1200 ...
##  $ Location          : chr  "1158 BROADWAY\r\nNEW YORK, NY 10001\r\n(40.744597, -73.988869)" "224 5TH AVE\r\nNEW YORK, NY 10001\r\n(40.743731, -73.987994)" "368 8TH AVE\r\nNEW YORK, NY 10001\r\n(40.748759, -73.995946)" "302 8TH AVE\r\nNEW YORK, NY 10001\r\n(40.746712, -73.997438)" ...
##  $ Coords            : chr  "40.744597, -73.988869" "40.743731, -73.987994" "40.748759, -73.995946" "40.746712, -73.997438" ...
##  $ geometry          :sfc_POLYGON of length 11258; first list element: List of 1
##   ..$ : num [1:110, 1:2] -74 -74 -74 -74 -74 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:31] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
stores_agg <- stores_covidmerge %>%
  filter(`Establishment.Type` %in% c("JAC")) %>%
  group_by(ZIPCODE) %>%
  summarize(numstores = n())

#plotting
stores_agg %>% magrittr::extract('numstores') %>% 
  plot(breaks = "jenks", main = "Number of Food Retail Stores per Zip Code")

#success!

task 3:

Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

#reading in, defining, cleaning health facility data
nyshealth <- readr::read_csv("data/R-Spatial_I_Lab/NYS_Health_Facility.csv",
                             show_col_types = FALSE, 
                             lazy = FALSE)

nychealth_clean <- nyshealth %>% 
  filter(!is.na(`Facility Longitude`), !is.na(`Facility Latitude`),`Regional Office` == "Metropolitan Area Regional Office - New York City",!`Facility ID` %in% c(10383, 10324,10306))
  

nychealth_sf <- st_as_sf(nychealth_clean,
                         coords = c("Facility Longitude","Facility Latitude"),
                         crs = 4326)

#joining and aggregating

nycdata_merge <- sf::st_join(stores_covidmerge, nychealth_sf, join = st_contains) 

str(nycdata_merge)
## Classes 'sf' and 'data.frame':   125595 obs. of  66 variables:
##  $ ZIPCODE                     : chr  "10001" "10001" "10001" "10001" ...
##  $ BLDGZIP                     : chr  "0" "0" "0" "0" ...
##  $ PO_NAME                     : chr  "New York" "New York" "New York" "New York" ...
##  $ POPULATION                  : num  22413 22413 22413 22413 22413 ...
##  $ AREA                        : num  17794941 17794941 17794941 17794941 17794941 ...
##  $ STATE                       : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY                      : chr  "New York" "New York" "New York" "New York" ...
##  $ ST_FIPS                     : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS                    : chr  "061" "061" "061" "061" ...
##  $ URL                         : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Positive                    : num  211 211 211 211 211 211 211 211 211 211 ...
##  $ Total                       : num  448 448 448 448 448 448 448 448 448 448 ...
##  $ zcta_cum.perc_pos           : num  47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
##  $ ï..County                   : chr  "New York" "New York" "New York" "New York" ...
##  $ License.Number              : num  721694 721694 721694 721694 721694 ...
##  $ Operation.Type              : chr  "Store" "Store" "Store" "Store" ...
##  $ Establishment.Type          : chr  "JAC" "JAC" "JAC" "JAC" ...
##  $ Entity.Name                 : chr  "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" ...
##  $ DBA.Name                    : chr  "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" "27 BROADWAY DELI INC" ...
##  $ Street.Number               : chr  "1158" "1158" "1158" "1158" ...
##  $ Street.Name                 : chr  "BROADWAY" "BROADWAY" "BROADWAY" "BROADWAY" ...
##  $ Address.Line.2              : logi  NA NA NA NA NA NA ...
##  $ Address.Line.3              : logi  NA NA NA NA NA NA ...
##  $ City                        : chr  "NEW YORK" "NEW YORK" "NEW YORK" "NEW YORK" ...
##  $ State                       : chr  "NY" "NY" "NY" "NY" ...
##  $ Zip.Code                    : num  10001 10001 10001 10001 10001 ...
##  $ Square.Footage              : num  300 300 300 300 300 300 300 300 300 300 ...
##  $ Location                    : chr  "1158 BROADWAY\r\nNEW YORK, NY 10001\r\n(40.744597, -73.988869)" "1158 BROADWAY\r\nNEW YORK, NY 10001\r\n(40.744597, -73.988869)" "1158 BROADWAY\r\nNEW YORK, NY 10001\r\n(40.744597, -73.988869)" "1158 BROADWAY\r\nNEW YORK, NY 10001\r\n(40.744597, -73.988869)" ...
##  $ Coords                      : chr  "40.744597, -73.988869" "40.744597, -73.988869" "40.744597, -73.988869" "40.744597, -73.988869" ...
##  $ Facility ID                 : num  1523 1536 9565 9173 7057 ...
##  $ Facility Name               : chr  "Union Health Center-ILGWU" "Lower Manhattan Health District" "The High School of Fashion Industries" "Mount Sinai Comprehensive Health Program - Downtown" ...
##  $ Short Description           : chr  "DTC" "DTC-EC" NA "HOSP-EC" ...
##  $ Description                 : chr  "Diagnostic and Treatment Center" "Diagnostic and Treatment Center Extension Clinic" "School Based Diagnostic and Treatment Center Extension Clinic" "Hospital Extension Clinic" ...
##  $ Facility Open Date          : chr  "01/01/1901" "09/27/1979" "04/10/2013" "05/29/2010" ...
##  $ Facility Address 1          : chr  "275 Seventh Avenue" "303 9th Avenue" "225 W 24th Street" "275 7th Avenue" ...
##  $ Facility Address 2          : chr  NA NA NA NA ...
##  $ Facility City               : chr  "New York" "New York" "New York" "New York" ...
##  $ Facility State              : chr  "New York" "New York" "New York" "New York" ...
##  $ Facility Zip Code           : chr  "10001" "10001" "10011" "10001-6708" ...
##  $ Facility Phone Number       : num  2.13e+09 2.12e+09 2.13e+09 2.13e+09 2.13e+09 ...
##  $ Facility Fax Number         : num  NA NA NA 2.13e+09 NA ...
##  $ Facility Website            : chr  NA NA NA NA ...
##  $ Facility County Code        : num  7093 7093 7093 7093 7093 ...
##  $ Facility County             : chr  "New York" "New York" "New York" "New York" ...
##  $ Regional Office ID          : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ Regional Office             : chr  "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" ...
##  $ Main Site Name              : chr  NA "New York City Department of Health and Mental Hygiene" "Family Health Center of Harlem" "Mount Sinai Hospital" ...
##  $ Main Site Facility ID       : num  NA 1339 9375 1456 NA ...
##  $ Operating Certificate Number: chr  "7002231R" "7002112R" "7002137R" "7002024H" ...
##  $ Operator Name               : chr  "Union Sanitorium Association Inc" "City of New York" "Institute for Family Health, Inc." "The Mount Sinai Hospital" ...
##  $ Operator Address 1          : chr  "275 7th Avenue" "250 Church Street" "16 East 16th Street" "One Gustave L Levy Place" ...
##  $ Operator Address 2          : chr  NA NA NA NA ...
##  $ Operator City               : chr  "New York" "New York" "New York" "New York" ...
##  $ Operator State              : chr  "New York" "New York" "New York" "New York" ...
##  $ Operator Zip Code           : chr  "10001" "10013" "10003" "10029" ...
##  $ Cooperator Name             : chr  NA NA NA "Mount Sinai Hospitals Group, Inc." ...
##  $ Cooperator Address          : chr  NA NA NA "One Gustave L. Levy Place" ...
##  $ Cooperator Address 2        : chr  NA NA NA NA ...
##  $ Cooperator City             : chr  NA NA NA "New York" ...
##  $ Cooperator State            : chr  "New York" "New York" "New York" "New York" ...
##  $ Cooperator Zip Code         : chr  NA NA NA "10029" ...
##  $ Ownership Type              : chr  "Not for Profit Corporation" "Municipality" "Not for Profit Corporation" "Not for Profit Corporation" ...
##  $ Facility Location           : chr  "(40.745678, -73.994385)" "(40.749596, -73.999138)" "(40.745266, -73.996231)" "(40.745678, -73.994385)" ...
##  $ geometry                    :sfc_POLYGON of length 125595; first list element: List of 1
##   ..$ : num [1:110, 1:2] -74 -74 -74 -74 -74 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:65] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
health_agg <- nycdata_merge %>%
  filter(`Short Description` == "NH") %>%
  group_by(ZIPCODE) %>%
  summarize(nursenum = n())

#plotting
  health_agg %>% magrittr::extract('nursenum') %>%
    plot(breaks = "jenks", main = "Nursing homes per zip code")

#success!

task 4:

Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

#Reading in census tracts
nyccensus <- st_read('data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp')
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/joelbeckwith/Documents/Hunter_College/RVizGeospatial/R-spatial/data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
#editing
nyccensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

#reading in and editing ACD data
acsData <- readLines("data/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

acsData %>%
  magrittr::extract(1:10,)
##                  GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1  1400000US36005000100   7080         51    6503       577     1773     4239
## 2  1400000US36005000200   4542        950    2264      2278     2165     1279
## 3  1400000US36005000400   5634        710    2807      2827     2623     1699
## 4  1400000US36005001600   5917        989    2365      3552     2406     2434
## 5  1400000US36005001900   2765         76    1363      1402      585     1041
## 6  1400000US36005002000   9409        977    4119      5290     3185     4487
## 7  1400000US36005002300   4600        648    2175      2425      479     2122
## 8  1400000US36005002400    172          0     121        51       69       89
## 9  1400000US36005002500   5887        548    2958      2929      903     1344
## 10 1400000US36005002701   2868        243    1259      1609      243      987
##    asianPop hispanicPop adultPop citizenAdult censusCode
## 1       130        2329     6909         6100  005000100
## 2       119        3367     3582         2952  005000200
## 3       226        3873     4507         4214  005000400
## 4        68        3603     4416         3851  005001600
## 5       130        1413     2008         1787  005001900
## 6        29        5905     6851         6170  005002000
## 7        27        2674     3498         3056  005002300
## 8        14           0      131           42  005002400
## 9        68        4562     4237         2722  005002500
## 10        0        1985     1848         1412  005002701
#merging
popData <- merge(nyccensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')

# verify the data
sum(popData$totPop)
## [1] 8443713
st_crs(popData)
## Coordinate Reference System:
##   User input: WGS84(DD) 
##   wkt:
## GEOGCRS["WGS84(DD)",
##     DATUM["WGS84",
##         ELLIPSOID["WGS84",6378137,298.257223563,
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["longitude",east,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["latitude",north,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]]]
popNYC <- sf::st_transform(popData, st_crs(covidzip_merged)) 

popData %>% magrittr::extract('asianPop') %>% plot(breaks='jenks')

task 5:

This is now correct, however, there is too much data for my mac to handle so commenting the lines out so they can be uploaded.

#NYCdata_full <- sf::st_join(nycdata_merge, 
#                              popNYC %>% sf::st_centroid(),
#                              join = st_contains) %>% 
#  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total) %>%
#  summarise(totPop = sum(totPop),
#            malePctg = sum(malePop)/totPop*100,
#            asianPop = sum(asianPop),
#            blackPop = sum(blackPop),
#            hispanicPop = sum(hispanicPop),
#            whitePop = sum(whitePop)) 

#NYCdata_full %>% head()

# Check and verify the data again
#sum(NYCdata_full$totPop, na.rm = T)

# See where we have those NAs 
#NYCdata_full %>% dplyr::filter(is.na(totPop))

# Looks like we converted census tracts to points, 
# so some zip code contains no such points but parts of census tracts are inside the zip code.
# We can either use higher-resolution data (blockgroups, for example) or use areal interpolation.
# 

#plot(NYCdata_full["Positive"], breaks='jenks')
#plot(NYCdata_full["asianPop"], breaks='jenks')