R Week 08 Assignment

R Spatial Lab Assignment # 2

Task 1: Join the COVID-19 data to the NYC zip code area data

nycCOVID <- read_csv("data/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv")

## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

nyc_zip_sf <- sf::st_read("data/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")

## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\cpetr\OneDrive\Desktop\GTECH 78520 Data Analysis and Visualization with R\R-Spatial\data\R-Spatial_I_Lab\ZIP_CODE_040114\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)

nyc_covid_zip <- base::merge(nyc_zip_sf, nycCOVID, by.x = 'ZIPCODE', by.y = 'MODIFIED_ZCTA')

Task 2: Aggregate NYC food retail store data to the zip code data

Read and configure NYC Food Store data:

nycFoodStore <- st_read("C:/Users/cpetr/OneDrive/Desktop/GTECH 78520 Data Analysis and Visualization with R/R-Spatial/data/R-Spatial_II_Lab/nycFoodStore.shp")

## Reading layer `nycFoodStore' from data source 
##   `C:\Users\cpetr\OneDrive\Desktop\GTECH 78520 Data Analysis and Visualization with R\R-Spatial\data\R-Spatial_II_Lab\nycFoodStore.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84

st_crs(nycFoodStore)

## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]

# Output shows WGS 84, but we want NAD83 New York Long Island like the nyc_covid_zip file

nycFoodStore <- st_transform(nycFoodStore, 2263)

Aggregate number of food stores by zip code:

zip_food <- st_join(nyc_covid_zip, nycFoodStore, join = st_contains) %>%
     group_by(ZIPCODE) %>%
     summarise(FoodStoreNum = n())

Visualize:

mapView(zip_food, zcol= 'FoodStoreNum')

Merge Food Store Number aggregate into Zip Code/COVID-19 data:

zip_covid_food <- as.data.frame(zip_food) %>% 
  merge(nyc_covid_zip, zip_food, by.x = 'ZIPCODE', by.y = 'ZIPCODE')

Task 3: Aggregate the NYC health facilities to the zip code data

Read and configure NY State Health Facility data:

NYS_Health_Facility <- read_csv("data/R-Spatial_II_Lab/NYS_Health_Facility.csv")

## Rows: 3990 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (28): Facility Name, Short Description, Description, Facility Open Date,...
## dbl  (8): Facility ID, Facility Phone Number, Facility Fax Number, Facility ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

nyc_health_sf <- NYS_Health_Facility %>%
     filter('Facility County' %in% c("New York", "Kings", "Bronx", "Queens", "Richmond"))

nyc_health_sf <- st_as_sf(nyc_health_sf, coords = c('Facility Longitude', 'Facility Latitude'), crs = 2263)

## Warning in min(cc[[1]], na.rm = TRUE): no non-missing arguments to min;
## returning Inf

## Warning in min(cc[[2]], na.rm = TRUE): no non-missing arguments to min;
## returning Inf

## Warning in max(cc[[1]], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## Warning in max(cc[[2]], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

Aggregate number of health facilities by zip code:

zip_covid_food <- st_as_sf(zip_covid_food, coords = c('lon', 'lat'), crs = 2263)

health_zip <- st_join(zip_covid_food, nyc_health_sf, join=st_contains) %>%
     group_by(ZIPCODE) %>%
     summarise(HealthFacilityNum = n())

Visualize:

mapView(health_zip, zcol= 'HealthFacilityNum')

Merge Health Facility Number aggregate into Zip Code/COVID-19/Food Store data:

zip_covid_food_health <- as.data.frame(health_zip) %>% 
  merge(nyc_covid_zip, health_zip, by.x = 'ZIPCODE', by.y = 'ZIPCODE')

Task 4: Join the Census ACS population, race, and age data to the NYC Planning Census Tract data

Read and configure Census Tract Data:

nycCensus <- sf::st_read('data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)

## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\cpetr\OneDrive\Desktop\GTECH 78520 Data Analysis and Visualization with R\R-Spatial\data\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)

nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

Read and configure ACS data:

acsData <- readLines("data/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))

Merge:

popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')

Task 5: Aggregate the ACS census data to zip code area data

Fix CRS:

popNYC <- sf::st_transform(popData, st_crs(nyc_covid_zip))

Aggregate to zip code level:

covidPopZipNYC <- sf::st_join(nyc_covid_zip, 
                              popNYC %>% sf::st_centroid(), 
                              join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>% 
  summarise(totPop = sum(totPop),
            malePctg = sum(malePop)/totPop*100, 
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop))

## Warning: st_centroid assumes attributes are constant over geometries

## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION',
## 'COUNTY', 'COVID_CASE_COUNT'. You can override using the `.groups` argument.

Visualize COVID case count by zip code:

plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks')