R Week 08 Assignment

R Spatial Lab Assignment # 2

The second lab is to aggregate data from different sources to the zip codes as the core COVID-19 data are available at that scale. Before you start writing and running the code, explore the data files so that you know what each file contains and how files are related to each other

This lab should reuse the data that you saved in the last lab assignment (the last task). In other words, you should read the three sf objects of NYC zip code areas, food retail stores, and health facilities first using the data file, either a .Rdata file or a GeoPackage file/database.

task 1:

Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

list.of.packages <- c("sf", "sp", "spatial", "maptools", "rgeos","rgdal","zipcodeR",
                      "terra", "grid", "rasterVis",
                      "tidyverse", "magrittr", "ggpubr", "lubridate","patchwork",
                      "devtools", "htmlwidgets", "mapview",
                      "classInt", "RColorBrewer", 
                      "ggmap", "OpenStreetMap",
                      "tmap", "leaflet", "mapview", 
                      "ggrepel", "ggsn",
                      "spdep","spatialreg","GWmodel");

# Check out the packages that have not been installed yet.
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]

Task 1:

Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

#get covid and zip code data
nyc_covid <-read_csv("Week_08/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv", lazy = FALSE)

## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

nyc_covid$MODIFIED_ZCTA<-as.character(nyc_covid$MODIFIED_ZCTA)

nyc_covid <- nyc_covid %>%
                      filter(!is.na(lon) , !is.na(lat))

nyc_zip_codes_sf <- st_read("Week_08/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")    %>% st_transform(nyc_zip_codes_sf, crs = 4326)

## Reading layer `ZIP_CODE_040114' from data source 
##   `/Users/khadijajallow/Documents/R/Week_08/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)

zip_covid <- nyc_zip_codes_sf %>%
  left_join(nyc_covid, by = c("ZIPCODE" = "MODIFIED_ZCTA"))

Task 2:

Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

nyc_food_retail <- st_read("Week_08/R-Spatial_II_Lab/nycFoodStore.shp")

## Reading layer `nycFoodStore' from data source 
##   `/Users/khadijajallow/Documents/R/Week_08/R-Spatial_II_Lab/nycFoodStore.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84

  retail <- nyc_food_retail %>%
  dplyr::filter(grepl('B|C|F|H', Estbl_T)) %>%
  sf::st_join(nyc_zip_codes_sf, ., join= st_contains) %>%
  group_by(ZIPCODE) %>%
  summarise(count_store = n())

Task 3:

Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

nys_health <- read.csv("Week_08/R-Spatial_II_Lab/NYS_Health_Facility.csv")
#filtering for nyc counties 
nursing_homes <- nys_health %>%
  dplyr::filter(Facility.County == c('Bronx', 'Kings', 'New York', 'Queens', 'Richmond')) %>% 
  dplyr::filter(Short.Description == 'NH') %>% 
  sf::st_as_sf(coords = c("Facility.Longitude", "Facility.Latitude"), crs = 4326) %>% 
  sf::st_join(zip_covid, ., join= st_contains)

nyc_NursHomes_COUNT <- nursing_homes %>% 
  group_by(ZIPCODE) %>%
  summarise(num_NH = n())

Task 4:

Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

census <- sf::st_read("Week_08/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", stringsAsFactors = FALSE)

## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/khadijajallow/Documents/R/Week_08/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)

census %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

acs<-readLines("Week_08/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))

popData_merged <- merge(census, acs, by.x ='tractFIPS', by.y = 'censusCode')

popData_nyc <- sf::st_transform(popData_merged, st_crs(zip_covid))

Task 5

Aggregate the ACS census data to zip code area data.

popData_nyc_to_zip <- sf::st_join(zip_covid, 
                          popData_nyc %>% sf::st_centroid(),
                          join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>%
  summarise(totPop = sum(totPop),
            malePctg = sum(malePop)/totPop*100,
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop))

## Warning: st_centroid assumes attributes are constant over geometries

## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by ZIPCODE, PO_NAME, POPULATION, COUNTY,
##   COVID_CASE_COUNT, and TOTAL_COVID_TESTS.
## ℹ Output is grouped by ZIPCODE, PO_NAME, POPULATION, COUNTY, and
##   COVID_CASE_COUNT.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(ZIPCODE, PO_NAME, POPULATION, COUNTY,
##   COVID_CASE_COUNT, TOTAL_COVID_TESTS))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.