Task 1 & 2
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
library(ggplot2)
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.1 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidyr)
library(dplyr)
st_read("./ZIP_CODE_040114/ZIP_CODE_040114.shp") -> nycZIP
## Reading layer `ZIP_CODE_040114' from data source `/Users/radleyciego/Documents/Hunter College/Spring 2021/GTECH 78520 - Data Analysis & Visualization/Homework/Week 12/R-Spatial_II_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp' using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
read.csv("./tests-by-zcta_2021_04_23.csv") -> nycCOVID
# Join the COVID-19 data to the NYC zipcode area
nycZIP %>% dplyr::mutate(ZIP = as.numeric(ZIPCODE)) %>%
dplyr::left_join(nycCOVID, by = c('ZIP' = 'MODIFIED_ZCTA')) -> nycZIPcovid
sf::st_read("./nycFoodStore.shp") -> nycfoodstoreSF
## Reading layer `nycFoodStore' from data source `/Users/radleyciego/Documents/Hunter College/Spring 2021/GTECH 78520 - Data Analysis & Visualization/Homework/Week 12/R-Spatial_II_Lab/nycFoodStore.shp' using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS: WGS 84
st_crs(nycfoodstoreSF)
## Coordinate Reference System:
## User input: WGS 84
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["latitude",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["longitude",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]]
st_crs(nycZIP)
## Coordinate Reference System:
## User input: NAD83 / New York Long Island (ftUS)
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["unknown"],
## AREA["USA - New York - SPCS - Long Island"],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
sf::st_transform(nycfoodstoreSF, sf::st_crs(2263)) -> nycfoodstoreSFLocal
nycfoodstoreSF %>% sf::st_transform(sf::st_crs(nycZIP))
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 915179.3 ymin: 124371.4 xmax: 1075512 ymax: 270861.1
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
## ï__Cnty Lcns_Nm Oprtn_T Estbl_T Entty_N
## 1 Bronx 734149 Store JAC 7 ELEVEN FOOD STORE #37933H
## 2 Bronx 606221 Store JAC 1001 SAN MIGUEL FOOD CENTER INC
## 3 Bronx 606228 Store JAC 1029 FOOD PLAZA INC
## 4 Bronx 723375 Store JAC 1078 DELI GROCERY CORP
## 5 Bronx 724807 Store JAC 1086 LUNA DELI GROCERY CORP
## 6 Bronx 712943 Store JAC 109 AJ DELI GROCERY CORP
## 7 Bronx 703060 Store JAC 10 NEIGHBORHOOD CANDY GROCERY COR
## 8 Bronx 609065 Store JAC 1105 TINTON DELI GROCERY CORP
## 9 Bronx 722972 Store A 1150 WEBSTER PHARMACY INC
## 10 Bronx 609621 Store JAC 1158 GROCERY & DELI INC
## DBA_Nam Strt_Nmb Stret_Nm Add_L_2 Add_L_3 City
## 1 <NA> 500 BAYCHESTER AVE NA NA BRONX
## 2 1001 SAN MIGUEL FD CNTR 1001 SHERIDAN AVE NA NA BRONX
## 3 1029 FOOD PLAZA 122 E 181ST ST NA NA BRONX
## 4 1078 DELI GROCERY 1078 EAST 165TH STREET NA NA BRONX
## 5 1086 LUNA DELI GROCERY 1086 BOSTON ROAD NA NA BRONX
## 6 109 AJ DELI GROCERY 109 E TREMONT AVE NA NA BRONX
## 7 10 NEIGHBORHOOD CANDY G 10 W. GUN HILL RD. NA NA BRONX
## 8 1105 TINTON DELI GRCY 1105 TINTON AVE NA NA BRONX
## 9 1150 WEBSTER PHARMACY I 1150 WEBSTER AVENUE NA NA BRONX
## 10 1158 GROCERY & DELI 1158 ST LAWRENCE AVE NA NA BRONX
## State Zip_Cod Sqr_Ftg
## 1 NY 10475 0
## 2 NY 10456 1,100
## 3 NY 10453 2,000
## 4 NY 10459 1,200
## 5 NY 10456 1,500
## 6 NY 10453 2,400
## 7 NY 10467 1,000
## 8 NY 10456 1,200
## 9 NY 10456 3,400
## 10 NY 10472 500
## Locatin
## 1 500 BAYCHESTER AVE\nBRONX, NY 10475\n(40.869156, -73.831875)
## 2 1001 SHERIDAN AVE\nBRONX, NY 10456\n(40.829061, -73.919613)
## 3 122 E 181ST ST\nBRONX, NY 10453\n(40.854755, -73.902853)
## 4 1078 EAST 165TH STREET\nBRONX, NY 10459\n(40.825105, -73.890589)
## 5 1086 BOSTON ROAD\nBRONX, NY 10456\n(40.827096, -73.905123)
## 6 109 E TREMONT AVE\nBRONX, NY 10453\n(40.850537, -73.907137)
## 7 10 W GUN HILL RD\nBRONX, NY 10467\n(40.882869, -73.881552)
## 8 1105 TINTON AVE\nBRONX, NY 10456\n(40.826607, -73.901498)
## 9 1150 WEBSTER AVENUE\nBRONX, NY 10456\n(40.830425, -73.91063)
## 10 1158 ST LAWRENCE AVE\nBRONX, NY 10472\n(40.829105, -73.866678)
## Coords geometry
## 1 40.869156, -73.831875 POINT (1030750 255979.3)
## 2 40.829061, -73.919613 POINT (1006497 241336.7)
## 3 40.854755, -73.902853 POINT (1011124 250702.7)
## 4 40.825105, -73.890589 POINT (1014531 239904.1)
## 5 40.827096, -73.905123 POINT (1010507 240624.8)
## 6 40.850537, -73.907137 POINT (1009941 249164.7)
## 7 40.882869, -73.881552 POINT (1017003 260953)
## 8 40.826607, -73.901498 POINT (1011511 240447.8)
## 9 40.830425, -73.91063 POINT (1008982 241836.1)
## 10 40.829105, -73.866678 POINT (1021146 241370.6)
nycfoodstoreSFLocal %>% dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>%
sf::st_join(nycZIP, ., join = st_contains) %>%
group_by(ZIPCODE, Estbl_T) %>%
summarise(FoodStoreNum = n()) %>%
magrittr::extract('FoodStoreNum') %>%
plot(breaks = "jenks", main = "Number of Retail Stores")
## `summarise()` has grouped output by 'ZIPCODE'. You can override using the `.groups` argument.

Task 3
read.csv("./NYS_Health_Facility.csv") -> nysHealth
# Filter for NYC counties & facilities, mutate
c("Bronx", "New York", "Richmond", "Queens", "Kings") -> Counties
c("NH", "HOSP", "HCPC") -> Facilities
nysHealth %>%
filter(Facility.County %in% Counties) %>%
filter(Short.Description %in% Facilities) %>%
mutate(Latitude =as.numeric(Facility.Latitude),
Longitude = as.numeric(Facility.Longitude),
ZIPCODE = as.numeric(Facility.Zip.Code)) -> nycHealth
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
# Remove missing values
nycHealth[!is.na(nycHealth$Facility.Longitude),] -> nycHealth
# Remove dirty longitude data
nycHealth[nycHealth$Facility.Longitude != 0.0000,] -> nycHealth
# Join nycHealth to nycZIP
nycZIP %>%
dplyr::left_join(nycHealth, by = c('ZIPCODE' = 'Facility.Zip.Code')) -> nycHealth
st_as_sf(nycHealth, coords = c("Facility.Longitude", "Facility.Latitude")) -> nycHealthSF
st_crs(nycHealthSF) <- 2263
nycHealthSF %>%
group_by(ZIPCODE, Short.Description) %>%
summarise(HealthFacNum = n()) %>%
magrittr::extract('HealthFacNum') %>%
plot(breaks = "jenks", main = "Number of Health Facilities")
## `summarise()` has grouped output by 'ZIPCODE'. You can override using the `.groups` argument.

Task 4 & 5
require(sf);
require(magrittr);
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:ggmap':
##
## inset
require(tidyverse);
sf::st_read('./geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE) -> nycCensus
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/radleyciego/Documents/Hunter College/Spring 2021/GTECH 78520 - Data Analysis & Visualization/Homework/Week 12/R-Spatial_II_Lab/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
str(nycCensus)
## Classes 'sf' and 'data.frame': 2165 obs. of 12 variables:
## $ boro_code : chr "5" "1" "1" "1" ...
## $ boro_ct201: chr "5000900" "1009800" "1010000" "1010200" ...
## $ boro_name : chr "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
## $ cdeligibil: chr "E" "I" "I" "I" ...
## $ ct2010 : chr "000900" "009800" "010000" "010200" ...
## $ ctlabel : chr "9" "98" "100" "102" ...
## $ ntacode : chr "SI22" "MN19" "MN19" "MN17" ...
## $ ntaname : chr "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
## $ puma : chr "3903" "3808" "3808" "3807" ...
## $ shape_area: num 2497010 1906016 1860938 1860993 1864600 ...
## $ shape_leng: num 7729 5534 5692 5688 5693 ...
## $ geometry :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
nycCensus %<>% dplyr::mutate(cntyFIPS = dplyr::case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
acsData <- readLines("./ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E,
asianPop = DP05_0067E, hispanicPop = DP05_0071E,
adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));
acsData %>%
magrittr::extract(1:10,)
## GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1 1400000US36005000100 7080 51 6503 577 1773 4239
## 2 1400000US36005000200 4542 950 2264 2278 2165 1279
## 3 1400000US36005000400 5634 710 2807 2827 2623 1699
## 4 1400000US36005001600 5917 989 2365 3552 2406 2434
## 5 1400000US36005001900 2765 76 1363 1402 585 1041
## 6 1400000US36005002000 9409 977 4119 5290 3185 4487
## 7 1400000US36005002300 4600 648 2175 2425 479 2122
## 8 1400000US36005002400 172 0 121 51 69 89
## 9 1400000US36005002500 5887 548 2958 2929 903 1344
## 10 1400000US36005002701 2868 243 1259 1609 243 987
## asianPop hispanicPop adultPop citizenAdult censusCode
## 1 130 2329 6909 6100 005000100
## 2 119 3367 3582 2952 005000200
## 3 226 3873 4507 4214 005000400
## 4 68 3603 4416 3851 005001600
## 5 130 1413 2008 1787 005001900
## 6 29 5905 6851 6170 005002000
## 7 27 2674 3498 3056 005002300
## 8 14 0 131 42 005002400
## 9 68 4562 4237 2722 005002500
## 10 0 1985 1848 1412 005002701
# Merge (JOIN) ACS data to the census tracts
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')
# verify the data
sum(popData$totPop)
## [1] 8443713
st_crs(popData)
## Coordinate Reference System:
## User input: WGS84(DD)
## wkt:
## GEOGCRS["WGS84(DD)",
## DATUM["WGS84",
## ELLIPSOID["WGS84",6378137,298.257223563,
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic longitude",east,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic latitude",north,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]]]
popNYC <- sf::st_transform(popData, st_crs(nycZIP))
# this may take a long time to run
popData %>%
magrittr::extract('elderlyPop') %>%
plot(breaks = 'jenks')

# Now aggregate to the zip code level
covidPopZipNYC <- sf::st_join(nycZIPcovid,
popNYC %>% sf::st_centroid(), # this essentially converts census tracts to points
join = st_contains) %>%
group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>% # use names(zpNYC) and names(popNYC) to see what we have
summarise(totPop = sum(totPop),
malePctg = sum(malePop)/totPop*100, # note the totPop is the newly calculated
asianPop = sum(asianPop),
blackPop = sum(blackPop),
hispanicPop = sum(hispanicPop),
whitePop = sum(whitePop))
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION', 'COUNTY', 'COVID_CASE_COUNT'. You can override using the `.groups` argument.
covidPopZipNYC %>% head()
## Simple feature collection with 6 features and 12 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 188447.3 xmax: 998309.7 ymax: 230942.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 x 13
## # Groups: ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [6]
## ZIPCODE PO_NAME POPULATION COUNTY COVID_CASE_COUNT TOTAL_COVID_TES… totPop
## <chr> <chr> <dbl> <chr> <int> <int> <int>
## 1 00083 Central P… 25 New Yo… NA NA 3
## 2 10001 New York 22413 New Yo… 1542 20158 19146
## 3 10002 New York 81305 New Yo… 5902 48197 74310
## 4 10003 New York 55878 New Yo… 2803 41076 53487
## 5 10004 New York 2187 New Yo… 247 3599 NA
## 6 10005 New York 8107 New Yo… 413 6102 8809
## # … with 6 more variables: malePctg <dbl>, asianPop <int>, blackPop <int>,
## # hispanicPop <int>, whitePop <int>, geometry <GEOMETRY [US_survey_foot]>
# Check and verify the data again
sum(covidPopZipNYC$totPop, na.rm = T)
## [1] 8395306
# See where we have those NAs
covidPopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 68 features and 12 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 151085.5 xmax: 1067494 ymax: 263362
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 68 x 13
## # Groups: ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [68]
## ZIPCODE PO_NAME POPULATION COUNTY COVID_CASE_COUNT TOTAL_COVID_TES… totPop
## * <chr> <chr> <dbl> <chr> <int> <int> <int>
## 1 10004 New York 2187 New York 247 3599 NA
## 2 10020 New York 0 New York NA NA NA
## 3 10041 New York 0 New York NA NA NA
## 4 10043 New York 0 New York NA NA NA
## 5 10045 New York 0 New York NA NA NA
## 6 10047 New York 0 New York NA NA NA
## 7 10048 New York 0 New York NA NA NA
## 8 10055 New York 12 New York NA NA NA
## 9 10075 New York 25203 New York 1453 18391 NA
## 10 10080 New York 0 New York NA NA NA
## # … with 58 more rows, and 6 more variables: malePctg <dbl>, asianPop <int>,
## # blackPop <int>, hispanicPop <int>, whitePop <int>,
## # geometry <GEOMETRY [US_survey_foot]>
plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks')

plot(covidPopZipNYC["asianPop"], breaks='jenks')
