Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).
zipcodes_nyc_sf <- sf::st_read("data/nyc/nyc_data.gpkg", layer = "census_tracts")
## Reading layer `census_tracts' from data source
## `C:\Users\moniq\Desktop\SPRING_26\DATA_ANALYSIS_R\Week_8\R-Spatial\data\nyc\nyc_data.gpkg'
## using driver `GPKG'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
names(zipcodes_nyc_sf)
## [1] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" "AREA"
## [6] "STATE" "COUNTY" "ST_FIPS" "CTY_FIPS" "URL"
## [11] "SHAPE_AREA" "SHAPE_LEN" "geom"
# ZIPCODE was a string and I had to convert it to a number, made sure to use as.numeric() with as.character() to preserve the zipcode format
zipcodes_nyc_sf$ZIPCODE <- as.numeric(as.character(zipcodes_nyc_sf$ZIPCODE))
# Verify the change
sapply(zipcodes_nyc_sf, class) # "numeric"
## $ZIPCODE
## [1] "numeric"
##
## $BLDGZIP
## [1] "character"
##
## $PO_NAME
## [1] "character"
##
## $POPULATION
## [1] "numeric"
##
## $AREA
## [1] "numeric"
##
## $STATE
## [1] "character"
##
## $COUNTY
## [1] "character"
##
## $ST_FIPS
## [1] "character"
##
## $CTY_FIPS
## [1] "character"
##
## $URL
## [1] "character"
##
## $SHAPE_AREA
## [1] "numeric"
##
## $SHAPE_LEN
## [1] "numeric"
##
## $geom
## [1] "sfc_POLYGON" "sfc"
# Loading the covid data
covid_df <- read_csv("R-Spatial_II_Lab/R-Spatial_II_Lab/tests-by-zcta_2020_04_19.csv",
show_col_types = FALSE)
names(covid_df) # finding the col that has zipcode to left join with
## [1] "MODZCTA" "Positive" "Total"
## [4] "zcta_cum.perc_pos"
zipcodes_covid_sf <- dplyr::left_join(zipcodes_nyc_sf, covid_df,
by = c("ZIPCODE" = "MODZCTA"))
names(zipcodes_covid_sf)
## [1] "ZIPCODE" "BLDGZIP" "PO_NAME"
## [4] "POPULATION" "AREA" "STATE"
## [7] "COUNTY" "ST_FIPS" "CTY_FIPS"
## [10] "URL" "SHAPE_AREA" "SHAPE_LEN"
## [13] "Positive" "Total" "zcta_cum.perc_pos"
## [16] "geom"
mapview(zipcodes_covid_sf, zcol = "Total", layer.name = "COVID Cases by ZIP")
Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail and we need to choose the specific types according to the data.
lab_2_key_tasks.Rfood_sf_nyc <- st_read("data/nyc/nyc_data.gpkg", layer = "retail_food_stores")
## Reading layer `retail_food_stores' from data source
## `C:\Users\moniq\Desktop\SPRING_26\DATA_ANALYSIS_R\Week_8\R-Spatial\data\nyc\nyc_data.gpkg'
## using driver `GPKG'
## Simple feature collection with 11306 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 915179.3 ymin: 124371.4 xmax: 1067194 ymax: 270861.1
## Projected CRS: NAD83 / New York Long Island (ftUS)
food_retail <- food_sf_nyc %>%
filter(str_detect(trimws(Establishment.Type), '[AJD]'))
food_counts_sf <- st_join(zipcodes_covid_sf, food_retail, join = st_contains) %>%
group_by(ZIPCODE) %>%
summarise(
FoodStoreNum = n(),
Total = first(Total),
Positive = first(Positive)
)
FoodStoreNum with mapviewmapview(food_counts_sf, zcol = "FoodStoreNum", layer.name = "Food Stores by ZIP")
Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.
health_facilities layer as
health_sf_nycNHfood_counts_sf as the base so COVID and food columns are
preservedNursingHomeNumhealth_sf_nyc <- st_read("data/nyc/nyc_data.gpkg", layer = "health_facilities")
## Reading layer `health_facilities' from data source
## `C:\Users\moniq\Desktop\SPRING_26\DATA_ANALYSIS_R\Week_8\R-Spatial\data\nyc\nyc_data.gpkg'
## using driver `GPKG'
## Simple feature collection with 1293 features and 34 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 929529 ymin: 127612.4 xmax: 1066447 ymax: 271059.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
nursing_homes <- health_sf_nyc %>%
filter(Short.Description == 'NH')
zipcodes_final_sf <- st_join(food_counts_sf, nursing_homes, join = st_contains) %>%
group_by(ZIPCODE) %>%
summarise(
FoodStoreNum = first(FoodStoreNum),
Total = first(Total),
Positive = first(Positive),
NursingHomeNum = n()
)
mapview(zipcodes_final_sf, zcol = "NursingHomeNum", layer.name = "Nursing Homes by ZIP")
Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.
tractFIPS manuallyreadLines() skips the 2nd header row that
breaks read.csv extract(-2)tractFIPS built in nycCensuspopData is a spatial sf object that keeps the
nycCensus geometry ith the ACS demographic columns
attachednycCensus <- st_read("R-Spatial_II_Lab/R-Spatial_II_Lab/2010_Census_Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp",
stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\moniq\Desktop\SPRING_26\DATA_ANALYSIS_R\Week_8\R-Spatial\R-Spatial_II_Lab\R-Spatial_II_Lab\2010_Census_Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
nycCensus <- nycCensus %>%
mutate(
cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'
),
tractFIPS = paste(cntyFIPS, ct2010, sep = '')
)
acsData <- readLines("R-Spatial_II_Lab/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header = TRUE, quote = "\"") %>%
dplyr::select(
GEO_ID,
totPop = DP05_0001E,
elderlyPop = DP05_0024E,
malePop = DP05_0002E,
femalePop = DP05_0003E,
whitePop = DP05_0037E,
blackPop = DP05_0038E,
asianPop = DP05_0067E,
hispanicPop = DP05_0071E
) %>%
mutate(censusCode = stringr::str_sub(GEO_ID, -9, -1))
popData <- merge(nycCensus, acsData, by.x = 'tractFIPS', by.y = 'censusCode')
mapview(popData, zcol = "totPop", layer.name = "Total Population by Census Tract")
Aggregate the ACS census data to zip code area data. 1. Reproject popData to match zipcodes_final_sf CRS 2. Spatial join using centroids, converting census tract polygons to points, so each tract falls into exactly one zip code with no doubles
popNYC <- st_transform(popData, st_crs(zipcodes_final_sf))
nyc_final <- st_join(zipcodes_final_sf,
popNYC %>% st_centroid(),
join = st_contains) %>%
group_by(ZIPCODE, FoodStoreNum, NursingHomeNum, Total, Positive) %>%
summarise(
totPop = sum(totPop, na.rm = TRUE),
elderlyPop = sum(elderlyPop, na.rm = TRUE),
whitePop = sum(whitePop, na.rm = TRUE),
blackPop = sum(blackPop, na.rm = TRUE),
asianPop = sum(asianPop, na.rm = TRUE),
hispanicPop = sum(hispanicPop, na.rm = TRUE),
malePctg = sum(malePop) / sum(totPop) * 100,
.groups = "drop"
)
## Warning: st_centroid assumes attributes are constant over geometries
sf object# Final plots
mapview(nyc_final, zcol = "totPop", layer.name = "Total Population by ZIP")
mapview(nyc_final, zcol = "elderlyPop", layer.name = "Elderly Population by ZIP")
mapview(nyc_final, zcol = "blackPop", layer.name = "Black Population by ZIP")
mapview(nyc_final, zcol = "hispanicPop", layer.name = "Hispanic Population by ZIP")