R Spaital Lab Assignment # 1
library(sf); library(rgdal); library(sp); require(magrittr)
library(raster); library(dplyr); library(rgeos); library(rstudioapi)
wd <- dirname(getActiveDocumentContext()$path)
setwd(wd)
#read in as SF
NYC_zip_sf <- st_read("../Wk 11/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source `/Volumes/Storage/School/'21 (1) Spring/GTECH 38520 R/Homework/Wk 11_15/Wk 11/ZIP_CODE_040114/ZIP_CODE_040114.shp' using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
as.numeric(as.character(NYC_zip_sf))
## [1] NA NA NA NA NA NA NA NA NA NA NA NA NA
str(NYC_zip_sf)
## Classes 'sf' and 'data.frame': 263 obs. of 13 variables:
## $ ZIPCODE : chr "11436" "11213" "11212" "11225" ...
## $ BLDGZIP : chr "0" "0" "0" "0" ...
## $ PO_NAME : chr "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
## $ POPULATION: num 18681 62426 83866 56527 72280 ...
## $ AREA : num 22699295 29631004 41972104 23698630 36868799 ...
## $ STATE : chr "NY" "NY" "NY" "NY" ...
## $ COUNTY : chr "Queens" "Kings" "Kings" "Kings" ...
## $ ST_FIPS : chr "36" "36" "36" "36" ...
## $ CTY_FIPS : chr "081" "047" "047" "047" ...
## $ URL : chr "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
## $ SHAPE_AREA: num 0 0 0 0 0 0 0 0 0 0 ...
## $ SHAPE_LEN : num 0 0 0 0 0 0 0 0 0 0 ...
## $ geometry :sfc_POLYGON of length 263; first list element: List of 1
## ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
Covid_Data <- read.csv("tests-by-zcta_2021_04_23.csv")
str(Covid_Data)
## 'data.frame': 177 obs. of 13 variables:
## $ MODIFIED_ZCTA : int 10001 10002 10003 10004 10005 10006 10007 10009 10010 10011 ...
## $ NEIGHBORHOOD_NAME: chr "Chelsea/NoMad/West Chelsea" "Chinatown/Lower East Side" "East Village/Gramercy/Greenwich Village" "Financial District" ...
## $ BOROUGH_GROUP : chr "Manhattan" "Manhattan" "Manhattan" "Manhattan" ...
## $ label : chr "10001, 10118" "10002" "10003" "10004" ...
## $ lat : num 40.8 40.7 40.7 40.7 40.7 ...
## $ lon : num -74 -74 -74 -74 -74 ...
## $ COVID_CASE_COUNT : int 1542 5902 2803 247 413 164 379 3605 1686 2542 ...
## $ COVID_CASE_RATE : num 5584 7836 5193 8311 4716 ...
## $ POP_DENOMINATOR : num 27613 75323 53978 2972 8757 ...
## $ COVID_DEATH_COUNT: int 35 264 48 2 0 1 4 118 37 62 ...
## $ COVID_DEATH_RATE : num 126.8 350.5 88.9 67.3 0 ...
## $ PERCENT_POSITIVE : num 7.86 12.63 6.93 6.92 6.72 ...
## $ TOTAL_COVID_TESTS: int 20158 48197 41076 3599 6102 2441 6342 38773 27864 35539 ...
#left join of the two dfs accounting for difference in type.
# filter out zipcodes where population is 0 and remove some NA areas.
left_join(NYC_zip_sf %>% mutate(ZIPCODE = as.numeric(as.character(ZIPCODE))),
Covid_Data, by = c("ZIPCODE" = "MODIFIED_ZCTA")) %>%
filter(POPULATION != 0) %>%
filter(BOROUGH_GROUP != 'NA') -> Covid_ZIP
str(Covid_ZIP)
## Classes 'sf' and 'data.frame': 189 obs. of 25 variables:
## $ ZIPCODE : num 11436 11213 11212 11225 11218 ...
## $ BLDGZIP : chr "0" "0" "0" "0" ...
## $ PO_NAME : chr "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
## $ POPULATION : num 18681 62426 83866 56527 72280 ...
## $ AREA : num 22699295 29631004 41972104 23698630 36868799 ...
## $ STATE : chr "NY" "NY" "NY" "NY" ...
## $ COUNTY : chr "Queens" "Kings" "Kings" "Kings" ...
## $ ST_FIPS : chr "36" "36" "36" "36" ...
## $ CTY_FIPS : chr "081" "047" "047" "047" ...
## $ URL : chr "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
## $ SHAPE_AREA : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SHAPE_LEN : num 0 0 0 0 0 0 0 0 0 0 ...
## $ NEIGHBORHOOD_NAME: chr "South Jamaica/South Ozone Park" "Crown Heights (East)" "Ocean Hill-Brownsville" "Crown Heights (West)/Prospect Lefferts Gardens" ...
## $ BOROUGH_GROUP : chr "Queens" "Brooklyn" "Brooklyn" "Brooklyn" ...
## $ label : chr "11436" "11213" "11212" "11225" ...
## $ lat : num 40.7 40.7 40.7 40.7 40.6 ...
## $ lon : num -73.8 -73.9 -73.9 -74 -74 ...
## $ COVID_CASE_COUNT : int 1888 5166 7182 3833 6199 7279 8429 5380 11044 7331 ...
## $ COVID_CASE_RATE : num 9420 7997 9710 6664 8377 ...
## $ POP_DENOMINATOR : num 20043 64601 73967 57514 73996 ...
## $ COVID_DEATH_COUNT: int 64 203 330 177 218 368 256 206 380 219 ...
## $ COVID_DEATH_RATE : num 319 314 446 308 295 ...
## $ PERCENT_POSITIVE : num 17.6 13.7 15.6 11.6 13.9 ...
## $ TOTAL_COVID_TESTS: int 11082 38560 47319 33709 45884 56287 55444 35070 59585 43449 ...
## $ geometry :sfc_POLYGON of length 189; first list element: List of 1
## ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:24] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
# Map of Percent positive per zipcode.
plot(Covid_ZIP['PERCENT_POSITIVE'])

# Task 2
st_read("./nycFoodstore.shp") -> nycFoodStoreSF
## Reading layer `nycFoodStore' from data source `/Volumes/Storage/School/'21 (1) Spring/GTECH 38520 R/Homework/Wk 11_15/Wk 12/nycFoodStore.shp' using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS: WGS 84
st_crs(nycFoodStoreSF)
## Coordinate Reference System:
## User input: WGS 84
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["latitude",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["longitude",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]]
st_crs(Covid_ZIP)
## Coordinate Reference System:
## User input: NAD83 / New York Long Island (ftUS)
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["unknown"],
## AREA["USA - New York - SPCS - Long Island"],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
st_transform(nycFoodStoreSF, st_crs(2263)) -> nycFoodStoreSFLocal
nycFoodStoreSF %<>% st_transform(st_crs(Covid_ZIP))
nycFoodStoreSF %>% filter(stringr::str_detect(Estbl_T, '[AJD]')) %>%
st_join(Covid_ZIP, ., join = st_contains) -> nycFoodStoreJoin
nycFoodStoreJoin %>%
group_by(ZIPCODE, Estbl_T) %>%
summarise(FoodStoreNUM = n()) %>%
magrittr::extract('FoodStoreNUM') %>%
plot(breaks = "jenks", main = 'Number of Food Stores')

# Task 3: Aggregate the NYC health facicilities points to zipcodes. Choose appropriate subtypes.
Health_Fac <- st_read("../Wk 11/Health_Fac_SF/Health_Fac_SF.shp")
## Reading layer `Health_Fac_SF' from data source `/Volumes/Storage/School/'21 (1) Spring/GTECH 38520 R/Homework/Wk 11_15/Wk 11/Health_Fac_SF/Health_Fac_SF.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1292 features and 11 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.19681 ymin: 40.51677 xmax: -73.69169 ymax: 40.91062
## Geodetic CRS: WGS 84
str(Health_Fac)
## Classes 'sf' and 'data.frame': 1292 obs. of 12 variables:
## $ Fclt_ID : int 6230 7257 9006 9970 1217 1820 4326 5574 7579 1653 ...
## $ Fclty_N : chr "NYU Langone Rutherford" "Park Ridge Family Health Center" "FedCare, Inc." "Parkmed NYC, LLC" ...
## $ Shrt_Ds : chr "HOSP-EC" "HOSP-EC" "DTC" "DTC" ...
## $ Dscrptn : chr "Hospital Extension Clinic" "Hospital Extension Clinic" "Diagnostic and Treatment Center" "Diagnostic and Treatment Center" ...
## $ Fcl_A_1 : chr "305 Second Ave" "6317 4th Avenue" "344 West 51 Street" "800 Second Avenue, 6th Floor" ...
## $ Fcl_A_2 : chr NA NA NA NA ...
## $ Fclty_Ct: chr "New York" "Brooklyn" "New York" "New York" ...
## $ Fclty_S : chr "New York" "New York" "New York" "New York" ...
## $ Fcl_Z_C : chr "10003" "11220" "10019" "10017" ...
## $ Fcl_C_C : int 7093 7095 7093 7093 7094 7095 7094 7095 7096 7096 ...
## $ Fclty_Cn: chr "New York" "Kings" "New York" "New York" ...
## $ geometry:sfc_POINT of length 1292; first list element: 'XY' num -74 40.7
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:11] "Fclt_ID" "Fclty_N" "Shrt_Ds" "Dscrptn" ...
st_crs(Health_Fac)
## Coordinate Reference System:
## User input: WGS 84
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["latitude",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["longitude",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]]
st_crs(Covid_ZIP)
## Coordinate Reference System:
## User input: NAD83 / New York Long Island (ftUS)
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["unknown"],
## AREA["USA - New York - SPCS - Long Island"],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
# Change Health Fac crs to 2263
st_transform(Health_Fac, st_crs(2263)) -> Health_Fac_Local
st_crs(Health_Fac_Local)
## Coordinate Reference System:
## User input: EPSG:2263
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["unknown"],
## AREA["USA - New York - SPCS - Long Island"],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
Health_Fac %<>% st_transform(st_crs(Covid_ZIP))
not_valid <- c("CHHA", "ADHCP", "HOSP-SB", "LTHHCP", "NA")
Health_Fac_Local %>% filter(Shrt_Ds != "CHHA") %>%
filter(Shrt_Ds != "ADHCP") %>%
filter(Shrt_Ds != "HOSP-SB") %>%
filter(Shrt_Ds != "LTHHCP") %>%
filter(Shrt_Ds != "NA") %>%
st_join(Covid_ZIP, ., join = st_contains) %>%
filter(Shrt_Ds != "NA")-> Health_Fac_Join
Health_Fac_Join %>%
group_by(ZIPCODE) %>%
summarise(HealthFacNUM = n()) %>%
magrittr::extract('HealthFacNUM') %>%
plot(breaks = "jenks", main = 'Number of Health Facilities')

## Produces some blank areas. Potentially due to lack of existing facilities for that area.
## Trouble shooting the error:
## Error in cut.default(values, breaks, include.lowest = TRUE) :
## 'breaks' are not unique
# st_crs(Health_Fac_Join)
# st_crs(Covid_ZIP)
# range(st_coordinates(Health_Fac_Local)) ## 127612.4 1069728.4
# range(st_coordinates(Health_Fac_Join)) ## 1 1067113
# range(st_coordinates(nycFoodStoreJoin)) ## 1 1067113 r operates on this so this must be normal for the above join as well
# SOlved by removing Shrt_Ds from the grouping function.
# Task 4: Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.
Census_TractSF <- st_read("./2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Volumes/Storage/School/'21 (1) Spring/GTECH 38520 R/Homework/Wk 11_15/Wk 12/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
Census_TractSF %<>% dplyr::mutate(cntyFIPS = dplyr::case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
## I was worried the first line would interfere with subsequent operations, but on my original join, it disappears.
ACS_DATA <- readLines("./ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E,
asianPop = DP05_0067E, hispanicPop = DP05_0071E,
adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));
## displays the first ten rows in console
ACS_DATA %>%
magrittr::extract(1:10,)
## GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1 1400000US36005000100 7080 51 6503 577 1773 4239
## 2 1400000US36005000200 4542 950 2264 2278 2165 1279
## 3 1400000US36005000400 5634 710 2807 2827 2623 1699
## 4 1400000US36005001600 5917 989 2365 3552 2406 2434
## 5 1400000US36005001900 2765 76 1363 1402 585 1041
## 6 1400000US36005002000 9409 977 4119 5290 3185 4487
## 7 1400000US36005002300 4600 648 2175 2425 479 2122
## 8 1400000US36005002400 172 0 121 51 69 89
## 9 1400000US36005002500 5887 548 2958 2929 903 1344
## 10 1400000US36005002701 2868 243 1259 1609 243 987
## asianPop hispanicPop adultPop citizenAdult censusCode
## 1 130 2329 6909 6100 005000100
## 2 119 3367 3582 2952 005000200
## 3 226 3873 4507 4214 005000400
## 4 68 3603 4416 3851 005001600
## 5 130 1413 2008 1787 005001900
## 6 29 5905 6851 6170 005002000
## 7 27 2674 3498 3056 005002300
## 8 14 0 131 42 005002400
## 9 68 4562 4237 2722 005002500
## 10 0 1985 1848 1412 005002701
## An SQL example from github:
# CASE WHEN (substring(geo_id2,4,2) = '61')
# THEN '1'
# WHEN (substring(geo_id2,4,2) = '05')
# THEN '2'
# WHEN (substring(geo_id2,4,2) = '47')
# THEN '3'
# WHEN (substring(geo_id2,4,2) = '81')
# THEN '4'
# WHEN (substring(geo_id2,4,2) = '85')
# THEN '5'
# END
# ) || right(geo_id2,6)
## R Method
## Replace the first digit of the boro ct 201 column with the corresponding geo id code.
##My Original Attempt at modifying the borough codes
# Census_TractSF$boro_ct201 <- gsub('^1','061',Census_TractSF$boro_ct201)
# Census_TractSF$boro_ct201 <- gsub('^2','005',Census_TractSF$boro_ct201)
# Census_TractSF$boro_ct201 <- gsub('^4','081',Census_TractSF$boro_ct201)
# Census_TractSF$boro_ct201 <- gsub('^3','047',Census_TractSF$boro_ct201)
# Census_TractSF$boro_ct201 <- gsub('^5','085',Census_TractSF$boro_ct201)
str(Census_TractSF)
## Classes 'sf' and 'data.frame': 2165 obs. of 14 variables:
## $ boro_code : chr "5" "1" "1" "1" ...
## $ boro_ct201: chr "5000900" "1009800" "1010000" "1010200" ...
## $ boro_name : chr "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
## $ cdeligibil: chr "E" "I" "I" "I" ...
## $ ct2010 : chr "000900" "009800" "010000" "010200" ...
## $ ctlabel : chr "9" "98" "100" "102" ...
## $ ntacode : chr "SI22" "MN19" "MN19" "MN17" ...
## $ ntaname : chr "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
## $ puma : chr "3903" "3808" "3808" "3807" ...
## $ shape_area: num 2497010 1906016 1860938 1860993 1864600 ...
## $ shape_leng: num 7729 5534 5692 5688 5693 ...
## $ geometry :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## $ cntyFIPS : chr "085" "061" "061" "061" ...
## $ tractFIPS : chr "085000900" "061009800" "061010000" "061010200" ...
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:13] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
str(ACS_DATA)
## 'data.frame': 2167 obs. of 12 variables:
## $ GEO_ID : chr "1400000US36005000100" "1400000US36005000200" "1400000US36005000400" "1400000US36005001600" ...
## $ totPop : int 7080 4542 5634 5917 2765 9409 4600 172 5887 2868 ...
## $ elderlyPop : int 51 950 710 989 76 977 648 0 548 243 ...
## $ malePop : int 6503 2264 2807 2365 1363 4119 2175 121 2958 1259 ...
## $ femalePop : int 577 2278 2827 3552 1402 5290 2425 51 2929 1609 ...
## $ whitePop : int 1773 2165 2623 2406 585 3185 479 69 903 243 ...
## $ blackPop : int 4239 1279 1699 2434 1041 4487 2122 89 1344 987 ...
## $ asianPop : int 130 119 226 68 130 29 27 14 68 0 ...
## $ hispanicPop : int 2329 3367 3873 3603 1413 5905 2674 0 4562 1985 ...
## $ adultPop : int 6909 3582 4507 4416 2008 6851 3498 131 4237 1848 ...
## $ citizenAdult: int 6100 2952 4214 3851 1787 6170 3056 42 2722 1412 ...
## $ censusCode : chr "005000100" "005000200" "005000400" "005001600" ...
# Makes a new column out of the final 8 digits of GEO_ID. Should be 8 columns
# ACS_DATA %>% mutate(., boro_match = str_sub(ACS_DATA$GEO_ID ,-8,-1)) -> ACS_DATA
# str(ACS_DATA)
# left_join(Census_TractSF, ACS_DATA, by = c("boro_ct201" = "boro_match")) -> ACS_to_TRACT
ACS_to_TRACT <- merge(Census_TractSF, ACS_DATA, by.x ='tractFIPS', by.y = 'censusCode')
sum(ACS_to_TRACT$totPop)
## [1] 8443713
# Task 5
# reprojects the ACS_to_TRACT table
popNYC <- sf::st_transform(ACS_to_TRACT, st_crs(Covid_ZIP))
st_crs(ACS_to_TRACT)
## Coordinate Reference System:
## User input: WGS84(DD)
## wkt:
## GEOGCRS["WGS84(DD)",
## DATUM["WGS84",
## ELLIPSOID["WGS84",6378137,298.257223563,
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic longitude",east,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic latitude",north,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]]]
ACS_to_TRACT %>% magrittr::extract('elderlyPop') %>% plot(breaks = 'jenks')

covidPopZipNYC <- sf::st_join(Covid_ZIP,
popNYC %>% sf::st_centroid(), # converts census tracts to points
join = st_contains) %>%
group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>%
summarise(totPop = sum(totPop),
malePctg = sum(malePop)/totPop*100,
asianPop = sum(asianPop),
blackPop = sum(blackPop),
hispanicPop = sum(hispanicPop),
whitePop = sum(whitePop))
covidPopZipNYC %>% head()
## Simple feature collection with 6 features and 12 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 188447.3 xmax: 992172.8 ymax: 215324.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 x 13
## # Groups: ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [6]
## ZIPCODE PO_NAME POPULATION COUNTY COVID_CASE_COUNT TOTAL_COVID_TES… totPop
## <dbl> <chr> <dbl> <chr> <int> <int> <int>
## 1 10001 New Yo… 22413 New Y… 1542 20158 19146
## 2 10002 New Yo… 81305 New Y… 5902 48197 74310
## 3 10003 New Yo… 55878 New Y… 2803 41076 53487
## 4 10004 New Yo… 2187 New Y… 247 3599 NA
## 5 10005 New Yo… 8107 New Y… 413 6102 8809
## 6 10006 New Yo… 3011 New Y… 164 2441 4639
## # … with 6 more variables: malePctg <dbl>, asianPop <int>, blackPop <int>,
## # hispanicPop <int>, whitePop <int>, geometry <GEOMETRY [US_survey_foot]>
sum(covidPopZipNYC$totPop, na.rm = TRUE)
## [1] 8334092
## My na's do not match nor does my population, but I suspect this is because I removed them in earlier tasks.
## So the only ones I have left are the result of more recent operations.
covidPopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 7 features and 12 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 151085.5 xmax: 1049202 ymax: 263362
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 7 x 13
## # Groups: ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [7]
## ZIPCODE PO_NAME POPULATION COUNTY COVID_CASE_COUNT TOTAL_COVID_TES… totPop
## * <dbl> <chr> <dbl> <chr> <int> <int> <int>
## 1 10004 New Yo… 2187 New Y… 247 3599 NA
## 2 10075 New Yo… 25203 New Y… 1453 18391 NA
## 3 10464 Bronx 4438 Bronx 387 2933 NA
## 4 11109 Long I… 2752 Queens 354 4687 NA
## 5 11231 Brookl… 33144 Kings 1842 24431 NA
## 6 11693 Far Ro… 11052 Kings 1111 7027 NA
## 7 11693 Far Ro… 11052 Queens 1111 7027 NA
## # … with 6 more variables: malePctg <dbl>, asianPop <int>, blackPop <int>,
## # hispanicPop <int>, whitePop <int>, geometry <GEOMETRY [US_survey_foot]>
plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks')

plot(covidPopZipNYC["asianPop"], breaks='jenks')
