setwd("~/Desktop/R-spatial")
Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).
setwd("~/Desktop/R-spatial")
covid_4_12<-read_csv("/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-spatial_II_Lab/tests-by-zcta_2020_04_12.csv")
## Rows: 178 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): MODZCTA, Positive, Total, zcta_cum.perc_pos
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nyc_zipsf<-st_read("/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-spatial-data/R-Spatial_I_Labb/ZIP_CODE/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source
## `/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-spatial-data/R-Spatial_I_Labb/ZIP_CODE/ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
covid_4_12<-covid_4_12 %>%
rename(ZIPCODE=MODZCTA)
if (is.na(st_crs(nyc_zipsf))) {
st_crs(nyc_zipsf) <- 2263
} else {
nyc_zipsf <- st_transform(nyc_zipsf, 2263)
}
covid_zip<-base::merge(covid_4_12,nyc_zipsf,by.x="ZIPCODE",by.y="ZIPCODE")
covid_zip <- st_as_sf(covid_zip)
Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.
load("/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-spatial-data/R-Spatial_I_Labb/nysFoodStoreSF.RData")
nysFoodStoreSF<-nysFoodStoreSF %>%
rename(ZIPCODE=Zip.Code)
covid_zip <- st_as_sf(covid_zip)
if (st_crs(nysFoodStoreSF) != st_crs(covid_zip)) {
covid_zip <- st_transform(covid_zip, st_crs(nysFoodStoreSF))
}
food_store_map <- nysFoodStoreSF %>%
filter(str_detect(Establishment.Type, '[AJD]')) %>%
st_join(covid_zip, join = st_contains) %>%
group_by('ZIPCODE') %>%
summarise(FoodStoreNum = n())
plot(food_store_map["FoodStoreNum"], breaks = "jenks", main = "Number of Food Stores")
st_crs(nysFoodStoreSF)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## MEMBER["World Geodetic System 1984 (G2139)"],
## MEMBER["World Geodetic System 1984 (G2296)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
nycRetailFood<-sf::st_transform(nysFoodStoreSF,st_crs(covid_zip))
food_aggregatepoints<-nysFoodStoreSF%>%
group_by(ZIPCODE) %>%
summarise(store_count = n())
food_aggregatepoints %>% head()
## Simple feature collection with 6 features and 2 fields
## Geometry type: MULTIPOINT
## Dimension: XY
## Bounding box: xmin: -74.0167 ymin: 40.70184 xmax: -73.95836 ymax: 40.77978
## Geodetic CRS: WGS 84
## # A tibble: 6 × 3
## ZIPCODE store_count geometry
## <int> <int> <MULTIPOINT [°]>
## 1 10001 57 ((-74.00089 40.7369), (-74.001 40.73673), (-73.99079 40.7…
## 2 10002 192 ((-73.95836 40.77978), (-73.96342 40.77183), (-73.98288 4…
## 3 10003 87 ((-73.98276 40.73535), (-73.98062 40.734), (-73.98038 40.…
## 4 10004 10 ((-74.01357 40.70553), (-74.0158 40.70494), (-74.01267 40…
## 5 10005 10 ((-73.98981 40.73894), (-74.00718 40.70498), (-74.00774 4…
## 6 10006 4 ((-74.01311 40.70753), (-74.01316 40.70783), (-74.013 40.…
sum(food_aggregatepoints$ZIPCODE, na.rm = T)
## [1] 2013357
food_aggregatepoints %>% dplyr::filter(is.na(ZIPCODE))
## Simple feature collection with 0 features and 2 fields
## Bounding box: xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS: WGS 84
## # A tibble: 0 × 3
## # ℹ 3 variables: ZIPCODE <int>, store_count <int>, geometry <GEOMETRY [°]>
plot(food_aggregatepoints["ZIPCODE"], breaks='jenks')
mapview(food_aggregatepoints,cex=2)
Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.
load("/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-spatial-data/nys_facilities_df")
nycNursingHome<-nys_facilities_df %>%
dplyr::filter(`Short Description` == "NH")
nycNursingHome<-st_transform(nycNursingHome,st_crs(covid_zip))
st_crs(nycNursingHome)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## MEMBER["World Geodetic System 1984 (G2139)"],
## MEMBER["World Geodetic System 1984 (G2296)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
NH_aggregatepoints<-nycNursingHome%>%
group_by("ZIPCODE","Facility Name","Facility ID") %>%
summarise(ZIPCODE = n())
## `summarise()` has grouped output by '"ZIPCODE"', '"Facility Name"'. You can
## override using the `.groups` argument.
NH_aggregatepoints %>% head()
## Simple feature collection with 1 feature and 4 fields
## Geometry type: MULTIPOINT
## Dimension: XY
## Bounding box: xmin: -74.13534 ymin: 40.56918 xmax: -73.70622 ymax: 40.90923
## Geodetic CRS: WGS 84
## # A tibble: 1 × 5
## # Groups: "ZIPCODE", "Facility Name" [1]
## `"ZIPCODE"` `"Facility Name"` `"Facility ID"` ZIPCODE
## <chr> <chr> <chr> <int>
## 1 ZIPCODE Facility Name Facility ID 169
## # ℹ 1 more variable: geometry <MULTIPOINT [°]>
sum(NH_aggregatepoints$ZIPCODE, na.rm = T)
## [1] 169
NH_aggregatepoints %>% dplyr::filter(is.na(ZIPCODE))
## Simple feature collection with 0 features and 4 fields
## Bounding box: xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS: WGS 84
## # A tibble: 0 × 5
## # Groups: "ZIPCODE", "Facility Name" [0]
## # ℹ 5 variables: "ZIPCODE" <chr>, "Facility Name" <chr>, "Facility ID" <chr>,
## # ZIPCODE <int>, geometry <GEOMETRY [°]>
plot(st_geometry(covid_zip),border= "gray",main ="Nursing Homes")
plot(st_geometry(NH_aggregatepoints), col= "purple",pch =16, add = TRUE)
mapview(NH_aggregatepoints,cex=2)
Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.
nycCensus<-sf::st_read('/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-Spatial_II_Lab/2010_Census_Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp',stringsAsFactors=FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-Spatial_II_Lab/2010_Census_Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
str(nycCensus)
## Classes 'sf' and 'data.frame': 2165 obs. of 12 variables:
## $ boro_code : chr "5" "1" "1" "1" ...
## $ boro_ct201: chr "5000900" "1009800" "1010000" "1010200" ...
## $ boro_name : chr "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
## $ cdeligibil: chr "E" "I" "I" "I" ...
## $ ct2010 : chr "000900" "009800" "010000" "010200" ...
## $ ctlabel : chr "9" "98" "100" "102" ...
## $ ntacode : chr "SI22" "MN19" "MN19" "MN17" ...
## $ ntaname : chr "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
## $ puma : chr "3903" "3808" "3808" "3807" ...
## $ shape_area: num 2497010 1906016 1860938 1860993 1864600 ...
## $ shape_leng: num 7729 5534 5692 5688 5693 ...
## $ geometry :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
acsData<-readLines("/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E,
asianPop = DP05_0067E, hispanicPop = DP05_0071E,
adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));
acsData %>%
magrittr::extract(1:10,)
## GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1 1400000US36005000100 7080 51 6503 577 1773 4239
## 2 1400000US36005000200 4542 950 2264 2278 2165 1279
## 3 1400000US36005000400 5634 710 2807 2827 2623 1699
## 4 1400000US36005001600 5917 989 2365 3552 2406 2434
## 5 1400000US36005001900 2765 76 1363 1402 585 1041
## 6 1400000US36005002000 9409 977 4119 5290 3185 4487
## 7 1400000US36005002300 4600 648 2175 2425 479 2122
## 8 1400000US36005002400 172 0 121 51 69 89
## 9 1400000US36005002500 5887 548 2958 2929 903 1344
## 10 1400000US36005002701 2868 243 1259 1609 243 987
## asianPop hispanicPop adultPop citizenAdult censusCode
## 1 130 2329 6909 6100 005000100
## 2 119 3367 3582 2952 005000200
## 3 226 3873 4507 4214 005000400
## 4 68 3603 4416 3851 005001600
## 5 130 1413 2008 1787 005001900
## 6 29 5905 6851 6170 005002000
## 7 27 2674 3498 3056 005002300
## 8 14 0 131 42 005002400
## 9 68 4562 4237 2722 005002500
## 10 0 1985 1848 1412 005002701
popData<-merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')
sum(popData$totPop)
## [1] 8443713
Aggregate the ACS census data to zip code area data.
st_crs(popData)
## Coordinate Reference System:
## User input: WGS84(DD)
## wkt:
## GEOGCRS["WGS84(DD)",
## DATUM["WGS84",
## ELLIPSOID["WGS84",6378137,298.257223563,
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["longitude",east,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["latitude",north,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]]]
popNYC <- sf::st_transform(popData, st_crs(covid_zip))
covidPopZipNYC<-sf::st_join(covid_zip,
popNYC %>% sf::st_centroid(),
join = st_contains) %>%
group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total) %>%
summarise(totPop = sum(totPop),
malePctg = sum(malePop)/totPop*100,
asianPop = sum(asianPop),
blackPop = sum(blackPop),
hispanicPop = sum(hispanicPop),
whitePop = sum(whitePop))
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION',
## 'COUNTY', 'Positive'. You can override using the `.groups` argument.
covidPopZipNYC %>% head()
## Simple feature collection with 6 features and 12 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -74.0473 ymin: 40.68392 xmax: -73.97141 ymax: 40.75769
## Geodetic CRS: WGS 84
## # A tibble: 6 × 13
## # Groups: ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive [6]
## ZIPCODE PO_NAME POPULATION COUNTY Positive Total totPop malePctg asianPop
## <dbl> <chr> <dbl> <chr> <dbl> <dbl> <int> <dbl> <int>
## 1 10001 New York 22413 New York 211 448 19146 51.2 4837
## 2 10002 New York 81305 New York 539 1024 74310 48.4 32149
## 3 10003 New York 55878 New York 279 662 53487 50.3 8027
## 4 10004 New York 2187 New York 23 59 NA NA NA
## 5 10005 New York 8107 New York 38 116 8809 42.8 1974
## 6 10006 New York 3011 New York 10 43 4639 44.0 1195
## # ℹ 4 more variables: blackPop <int>, hispanicPop <int>, whitePop <int>,
## # geometry <GEOMETRY [°]>
sum(covidPopZipNYC$totPop, na.rm = T)
## [1] 8334092
covidPopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 7 features and 12 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -74.0473 ymin: 40.58123 xmax: -73.76521 ymax: 40.88939
## Geodetic CRS: WGS 84
## # A tibble: 7 × 13
## # Groups: ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive [7]
## ZIPCODE PO_NAME POPULATION COUNTY Positive Total totPop malePctg asianPop
## * <dbl> <chr> <dbl> <chr> <dbl> <dbl> <int> <dbl> <int>
## 1 10004 New York 2187 New Y… 23 59 NA NA NA
## 2 10075 New York 25203 New Y… 262 564 NA NA NA
## 3 10464 Bronx 4438 Bronx 75 194 NA NA NA
## 4 11109 Long Island… 2752 Queens 37 103 NA NA NA
## 5 11231 Brooklyn 33144 Kings 241 504 NA NA NA
## 6 11693 Far Rockaway 11052 Kings 197 334 NA NA NA
## 7 11693 Far Rockaway 11052 Queens 197 334 NA NA NA
## # ℹ 4 more variables: blackPop <int>, hispanicPop <int>, whitePop <int>,
## # geometry <GEOMETRY [°]>
plot(covidPopZipNYC["Total"], breaks='jenks')