R Spatial Lab Assignment #2

task 1: Join the COVID-19 data to the NYC zip code area data

#Read data
covid2021 <- read_csv("R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv",
                       show_col_types = FALSE)
#Check column names
names(covid2021)
##  [1] "MODIFIED_ZCTA"     "NEIGHBORHOOD_NAME" "BOROUGH_GROUP"    
##  [4] "label"             "lat"               "lon"              
##  [7] "COVID_CASE_COUNT"  "COVID_CASE_RATE"   "POP_DENOMINATOR"  
## [10] "COVID_DEATH_COUNT" "COVID_DEATH_RATE"  "PERCENT_POSITIVE" 
## [13] "TOTAL_COVID_TESTS"
names(nyc_zipcode_sf)
##  [1] "ZIPCODE"    "BLDGZIP"    "PO_NAME"    "POPULATION" "AREA"      
##  [6] "STATE"      "COUNTY"     "ST_FIPS"    "CTY_FIPS"   "URL"       
## [11] "SHAPE_AREA" "SHAPE_LEN"  "geometry"
#Change ZCTA column to character type, to match nyc_zipcode_sf
covid2021 <- covid2021 %>%
  mutate(MODIFIED_ZCTA = as.character(MODIFIED_ZCTA))

#Join two objects
covid2021_zip <- dplyr::left_join(
  nyc_zipcode_sf,
  covid2021,
  by = c("ZIPCODE" = "MODIFIED_ZCTA"))

task 2: Aggregate the NYC food retails store data (points) to the zip code data

#Filter for food stores
nys_retailfoodstores <- nys_retailfood_sf %>% dplyr::filter(Establishment.Type == "A")

#Transform retail food store coordinate system to match the 2021 COVID-19 data
nys_retailfood_sf <- st_transform(nys_retailfoodstores, st_crs(covid2021_zip))

#Join the two objects
retailfood_zip_join <- st_join(covid2021_zip, nys_retailfood_sf)

#Create a column including the counts of the retail food stores
foodstore_counts <- retailfood_zip_join %>%group_by(ZIPCODE) %>%
  summarise(food_store_count = n())
## Warning in grep("^[.][.](?:[.]|[1-9][0-9]*)$", names): unable to translate
## '<ef>..County' to a wide string
## Warning in grep("^[.][.](?:[.]|[1-9][0-9]*)$", names): input string 25 is
## invalid
#Temporarily drop geometry due to an error
foodstore_counts_df <- st_drop_geometry(foodstore_counts)

#Join the outpout from previous task, to current object
foodstore_zip <- covid2021_zip %>%left_join(foodstore_counts_df, by = "ZIPCODE")

###task 3: Aggregate the NYC health facilities (points) to the zip code data

#Filter health facility object for nursing homes (NH)
nys_nursinghomes<-nys_healthfacilities_sf%>%dplyr::filter(`Short Description` == 'NH')

#Transform nursing home coordinate system to match the food store data
nys_nursinghomes <- st_transform(nys_nursinghomes, st_crs(foodstore_zip))

#Join food store and nursing home data
nursinghome_zip_join <- st_join(foodstore_zip, nys_nursinghomes)

#Add a column to joined data featuring counts of nursing homes by zipcode
nursinghome_counts <- nursinghome_zip_join %>%
  group_by(ZIPCODE) %>%summarise(nursinghome_count = n())

#Temporarily drop geometry due to an error
nursinghome_counts_df <- st_drop_geometry(nursinghome_counts)

#Finally, join the data to task 2 output
nursinghome_zip <- foodstore_zip %>%left_join(nursinghome_counts_df, by = "ZIPCODE")

###task 4: Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data

#Read tract and ACS population data
nyc_tracts <- st_read("R-Spatial_II_Lab/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp")
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\elish\OneDrive - Hunter - CUNY\GTECH38520\Week8-SpatialOperations\R-Spatial_II_Lab\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
acs <- read_csv("R-Spatial_II_Lab/ACSDP5Y2018.DP05.csv")
## Rows: 2167 Columns: 358
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (181): GEO_ID, NAME, DP05_0031PM, DP05_0032E, DP05_0032M, DP05_0032PE, D...
## dbl (177): DP05_0033E, DP05_0033M, DP05_0033PE, DP05_0034E, DP05_0034M, DP05...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Matching GEOID in tract data, as well as countyFIPS codes based on borough names
nyc_tracts <- nyc_tracts %>%
  mutate(
    countyFIPS = case_when(
      boro_name == "Bronx" ~ "005",
      boro_name == "Brooklyn" ~ "047",
      boro_name == "Manhattan" ~ "061",
      boro_name == "Queens" ~ "081",
      boro_name == "Staten Island" ~ "085"),
    #GEOID is equal to the NY state FIPS(36) + countyFIPS + tract code
    GEOID = paste0("36", countyFIPS, ct2010)) 

#Process ACS data to create matching GEOID and keep only necessary variables
acs2 <- acs %>%
  mutate(GEOID = stringr::str_sub(GEO_ID, -11)) %>% #Extract last 11 digits from GEO_ID
  select(
    GEOID,
    total_pop = DP05_0001E,
    elderly_pop = DP05_0024E,
    white_pop = DP05_0037E,
    black_pop = DP05_0038E,
    asian_pop = DP05_0067E,
    hispanic_pop = DP05_0071E)

#Attribute join to combine ACS data to census tracts
tracts_acs_join <- nyc_tracts %>%left_join(acs2, by = "GEOID")
names(tracts_acs_join)
##  [1] "boro_code"    "boro_ct201"   "boro_name"    "cdeligibil"   "ct2010"      
##  [6] "ctlabel"      "ntacode"      "ntaname"      "puma"         "shape_area"  
## [11] "shape_leng"   "countyFIPS"   "GEOID"        "total_pop"    "elderly_pop" 
## [16] "white_pop"    "black_pop"    "asian_pop"    "hispanic_pop" "geometry"

###task 5: Aggregate the ACS census data to zip code area data

#Convert census tract polygons to centroid points
tracts_points <- st_centroid(tracts_acs_join)
## Warning: st_centroid assumes attributes are constant over geometries
#Chance tracts_points CRS to match nursing home zip code CRS
tracts_points <- st_transform(tracts_points, st_crs(nursinghome_zip))

#Assign each tract to a zip code
tracts_zip_join <- st_join(nursinghome_zip, tracts_points)

#Aggregate ACS data to zip code
acs_zip <- tracts_zip_join %>%
  group_by(ZIPCODE) %>%
  summarise(
    total_pop = sum(total_pop, na.rm = TRUE),
    elderly_pop = sum(elderly_pop, na.rm = TRUE),
    white_pop = sum(white_pop, na.rm = TRUE),
    black_pop = sum(black_pop, na.rm = TRUE),
    asian_pop = sum(asian_pop, na.rm = TRUE),
    hispanic_pop = sum(hispanic_pop, na.rm = TRUE))

#Remove geometry to convert to data frame
acs_zip_df <- st_drop_geometry(acs_zip)

#Join aggregated ACS data to task 3 output
final_data <- nursinghome_zip %>%left_join(acs_zip_df, by = "ZIPCODE")
head(final_data)
## Simple feature collection with 6 features and 32 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 986490.1 ymin: 168910.5 xmax: 1043042 ymax: 189382.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
##   ZIPCODE BLDGZIP  PO_NAME POPULATION     AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1   11436       0  Jamaica      18681 22699295    NY Queens      36      081
## 2   11213       0 Brooklyn      62426 29631004    NY  Kings      36      047
## 3   11212       0 Brooklyn      83866 41972104    NY  Kings      36      047
## 4   11225       0 Brooklyn      56527 23698630    NY  Kings      36      047
## 5   11218       0 Brooklyn      72280 36868799    NY  Kings      36      047
## 6   11226       0 Brooklyn     106132 39408598    NY  Kings      36      047
##                    URL SHAPE_AREA SHAPE_LEN
## 1 http://www.usps.com/          0         0
## 2 http://www.usps.com/          0         0
## 3 http://www.usps.com/          0         0
## 4 http://www.usps.com/          0         0
## 5 http://www.usps.com/          0         0
## 6 http://www.usps.com/          0         0
##                                NEIGHBORHOOD_NAME BOROUGH_GROUP label      lat
## 1                 South Jamaica/South Ozone Park        Queens 11436 40.67582
## 2                           Crown Heights (East)      Brooklyn 11213 40.67107
## 3                         Ocean Hill-Brownsville      Brooklyn 11212 40.66293
## 4 Crown Heights (West)/Prospect Lefferts Gardens      Brooklyn 11225 40.66306
## 5                     Kensington/Windsor Terrace      Brooklyn 11218 40.64348
## 6             Flatbush/Prospect Lefferts Gardens      Brooklyn 11226 40.64646
##         lon COVID_CASE_COUNT COVID_CASE_RATE POP_DENOMINATOR COVID_DEATH_COUNT
## 1 -73.79662             1888         9419.96        20042.54                64
## 2 -73.93633             5166         7996.75        64601.26               203
## 3 -73.91301             7182         9709.74        73966.99               330
## 4 -73.95423             3833         6664.50        57513.69               177
## 5 -73.97604             6199         8377.49        73995.92               218
## 6 -73.95665             7279         7476.75        97355.08               368
##   COVID_DEATH_RATE PERCENT_POSITIVE TOTAL_COVID_TESTS food_store_count
## 1           319.32            17.57             11082                1
## 2           314.24            13.72             38560               16
## 3           446.14            15.64             47319               42
## 4           307.75            11.62             33709               15
## 5           294.61            13.93             45884               18
## 6           378.00            13.33             56287               48
##   nursinghome_count total_pop elderly_pop white_pop black_pop asian_pop
## 1                 1     22377        2456      1192     13972      2626
## 2                 1     66602        7662     16483     43625      1836
## 3                 1     73069        9518      5278     59541      1330
## 4                 1     60958        7592     15300     40048      2272
## 5                 1     67426        8144     40315      5045     14695
## 6                 3    103729       12278     16173     69058      5207
##   hispanic_pop                       geometry
## 1         3226 POLYGON ((1038098 188138.4,...
## 2         6738 POLYGON ((1001614 186926.4,...
## 3        13900 POLYGON ((1011174 183696.3,...
## 4         5293 POLYGON ((995908.4 183617.6...
## 5        11169 POLYGON ((991997.1 176307.5...
## 6        18244 POLYGON ((994821.5 177865.7...