R Spatial Lab Assignment # 8
task 1:
nyc_zip <- st_read("/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source
## `/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/ZIP_CODE_040114/ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
covid_data <- readr::read_csv("/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv", lazy = FALSE)
## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nyc_merged <- base::merge(nyc_zip, covid_data, by.x = "ZIPCODE", by.y = "MODIFIED_ZCTA")
names(nyc_merged)
## [1] "ZIPCODE" "BLDGZIP" "PO_NAME"
## [4] "POPULATION" "AREA" "STATE"
## [7] "COUNTY" "ST_FIPS" "CTY_FIPS"
## [10] "URL" "SHAPE_AREA" "SHAPE_LEN"
## [13] "NEIGHBORHOOD_NAME" "BOROUGH_GROUP" "label"
## [16] "lat" "lon" "COVID_CASE_COUNT"
## [19] "COVID_CASE_RATE" "POP_DENOMINATOR" "COVID_DEATH_COUNT"
## [22] "COVID_DEATH_RATE" "PERCENT_POSITIVE" "TOTAL_COVID_TESTS"
## [25] "geometry"
mapview(nyc_merged, zcol="COVID_DEATH_COUNT")
task 2:
nyc_food <- st_read("/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/nycFoodStore.shp")
## Reading layer `nycFoodStore' from data source
## `/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/nycFoodStore.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS: WGS 84
nyc_food <- st_transform(nyc_food, st_crs(nyc_zip))
food_counts <- nyc_food %>%
dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>%
sf::st_join(nyc_zip, ., join = st_contains) %>%
group_by(Zip_Cod) %>%
summarise(FoodStoreNum = n()) %>%
mutate(Zip_Cod = as.character(Zip_Cod))
nyc_merged <- nyc_merged %>%
left_join(st_drop_geometry(food_counts), by = c("ZIPCODE" = "Zip_Cod"))
names(nyc_merged)
## [1] "ZIPCODE" "BLDGZIP" "PO_NAME"
## [4] "POPULATION" "AREA" "STATE"
## [7] "COUNTY" "ST_FIPS" "CTY_FIPS"
## [10] "URL" "SHAPE_AREA" "SHAPE_LEN"
## [13] "NEIGHBORHOOD_NAME" "BOROUGH_GROUP" "label"
## [16] "lat" "lon" "COVID_CASE_COUNT"
## [19] "COVID_CASE_RATE" "POP_DENOMINATOR" "COVID_DEATH_COUNT"
## [22] "COVID_DEATH_RATE" "PERCENT_POSITIVE" "TOTAL_COVID_TESTS"
## [25] "FoodStoreNum" "geometry"
plot(nyc_merged["FoodStoreNum"], breaks = "jenks", main = "Number of Food Stores")

task 3:
nys_health <- read_csv("/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/NYS_Health_Facility.csv",
show_col_types = FALSE, lazy = FALSE)
# Drop the rows without coordinates
missing_coords <- is.na(nys_health$`Facility Longitude`) | is.na(nys_health$`Facility Latitude`)
sum(missing_coords)
## [1] 142
nys_health <- nys_health[!missing_coords, ]
# Drop the rows where coordinates are 0
nys_health <- nys_health %>%
filter(!(nys_health$`Facility Longitude` == 0 & nys_health$`Facility Latitude` == 0))
# Swap Longtitude and Latitude for the row with false coordinates
nys_health <- nys_health %>%
mutate(
temp_lon = `Facility Longitude`,
`Facility Longitude` = ifelse(`Facility ID` == 5764, `Facility Latitude`, `Facility Longitude`),
`Facility Latitude` = ifelse(`Facility ID` == 5764, temp_lon, `Facility Latitude`)
) %>%
select(-temp_lon)
nys_health_sf <- st_as_sf(nys_health, coords = c("Facility Longitude", "Facility Latitude"))
st_crs(nys_health_sf) <- 4326
healthfacilities_count <- nyc_merged %>%
mutate(ziparea = st_area(geometry)) %>%
st_transform(4326) %>%
st_join(nys_health_sf) %>%
group_by(`Facility Zip Code`) %>%
summarize(healthfacilities_NUM = n(),
ziparea = max(ziparea),
rate = healthfacilities_NUM/ziparea * 1e6)
mapview(healthfacilities_count, zcol='healthfacilities_NUM', legend=FALSE)
nyc_merged <- nyc_merged %>%
left_join(
healthfacilities_count %>%
st_drop_geometry(),
by = c("ZIPCODE" = "Facility Zip Code")
)
task 4:
nycCensus <- sf::st_read('/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
str(nycCensus)
## Classes 'sf' and 'data.frame': 2165 obs. of 12 variables:
## $ boro_code : chr "5" "1" "1" "1" ...
## $ boro_ct201: chr "5000900" "1009800" "1010000" "1010200" ...
## $ boro_name : chr "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
## $ cdeligibil: chr "E" "I" "I" "I" ...
## $ ct2010 : chr "000900" "009800" "010000" "010200" ...
## $ ctlabel : chr "9" "98" "100" "102" ...
## $ ntacode : chr "SI22" "MN19" "MN19" "MN17" ...
## $ ntaname : chr "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
## $ puma : chr "3903" "3808" "3808" "3807" ...
## $ shape_area: num 2497010 1906016 1860938 1860993 1864600 ...
## $ shape_leng: num 7729 5534 5692 5688 5693 ...
## $ geometry :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
acsData <- readLines("/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E,
asianPop = DP05_0067E, hispanicPop = DP05_0071E,
adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')
# verify the data
sum(popData$totPop)
## [1] 8443713
st_crs(popData)
## Coordinate Reference System:
## User input: WGS84(DD)
## wkt:
## GEOGCRS["WGS84(DD)",
## DATUM["WGS84",
## ELLIPSOID["WGS84",6378137,298.257223563,
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["longitude",east,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["latitude",north,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]]]
popNYC <- sf::st_transform(popData, st_crs(nyc_merged))
popData %>% magrittr::extract('elderlyPop') %>% plot(breaks = 'jenks')

names(popNYC)
## [1] "tractFIPS" "boro_code" "boro_ct201" "boro_name" "cdeligibil"
## [6] "ct2010" "ctlabel" "ntacode" "ntaname" "puma"
## [11] "shape_area" "shape_leng" "cntyFIPS" "GEO_ID" "totPop"
## [16] "elderlyPop" "malePop" "femalePop" "whitePop" "blackPop"
## [21] "asianPop" "hispanicPop" "adultPop" "citizenAdult" "geometry"
task 5:
covidPopZipNYC <- sf::st_join(nyc_merged,
popNYC %>% sf::st_centroid(),
join = st_contains) %>%
group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY,NEIGHBORHOOD_NAME, COVID_CASE_COUNT, TOTAL_COVID_TESTS, COVID_DEATH_COUNT,
FoodStoreNum, healthfacilities_NUM
) %>%
summarise(totPop = sum(totPop),
adultPop = sum(adultPop),
malePctg = sum(malePop)/totPop*100,
femalePop = sum(femalePop)/totPop*100,
elderlyPop = sum(elderlyPop),
asianPop = sum(asianPop),
blackPop = sum(blackPop),
hispanicPop = sum(hispanicPop),
whitePop = sum(whitePop))
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION',
## 'COUNTY', 'NEIGHBORHOOD_NAME', 'COVID_CASE_COUNT', 'TOTAL_COVID_TESTS',
## 'COVID_DEATH_COUNT', 'FoodStoreNum'. You can override using the `.groups`
## argument.
covidPopZipNYC %>% head()
## Simple feature collection with 6 features and 19 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 188447.3 xmax: 992172.8 ymax: 215324.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 × 20
## # Groups: ZIPCODE, PO_NAME, POPULATION, COUNTY, NEIGHBORHOOD_NAME,
## # COVID_CASE_COUNT, TOTAL_COVID_TESTS, COVID_DEATH_COUNT, FoodStoreNum [6]
## ZIPCODE PO_NAME POPULATION COUNTY NEIGHBORHOOD_NAME COVID_CASE_COUNT
## <chr> <chr> <dbl> <chr> <chr> <dbl>
## 1 10001 New York 22413 New York Chelsea/NoMad/West Chel… 1542
## 2 10002 New York 81305 New York Chinatown/Lower East Si… 5902
## 3 10003 New York 55878 New York East Village/Gramercy/G… 2803
## 4 10004 New York 2187 New York Financial District 247
## 5 10005 New York 8107 New York Financial District 413
## 6 10006 New York 3011 New York Financial District 164
## # ℹ 14 more variables: TOTAL_COVID_TESTS <dbl>, COVID_DEATH_COUNT <dbl>,
## # FoodStoreNum <int>, healthfacilities_NUM <int>, totPop <int>,
## # adultPop <int>, malePctg <dbl>, femalePop <dbl>, elderlyPop <int>,
## # asianPop <int>, blackPop <int>, hispanicPop <int>, whitePop <int>,
## # geometry <GEOMETRY [US_survey_foot]>
sum(covidPopZipNYC$totPop, na.rm = T)
## [1] 8334092
covidPopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 7 features and 19 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 151085.5 xmax: 1049202 ymax: 263362
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 7 × 20
## # Groups: ZIPCODE, PO_NAME, POPULATION, COUNTY, NEIGHBORHOOD_NAME,
## # COVID_CASE_COUNT, TOTAL_COVID_TESTS, COVID_DEATH_COUNT, FoodStoreNum [7]
## ZIPCODE PO_NAME POPULATION COUNTY NEIGHBORHOOD_NAME COVID_CASE_COUNT
## * <chr> <chr> <dbl> <chr> <chr> <dbl>
## 1 10004 New York 2187 New Yo… Financial Distri… 247
## 2 10075 New York 25203 New Yo… Lenox Hill/Upper… 1453
## 3 10464 Bronx 4438 Bronx City Island 387
## 4 11109 Long Island City 2752 Queens Long Island City 354
## 5 11231 Brooklyn 33144 Kings Carroll Gardens/… 1842
## 6 11693 Far Rockaway 11052 Kings Arverne/Broad Ch… 1111
## 7 11693 Far Rockaway 11052 Queens Arverne/Broad Ch… 1111
## # ℹ 14 more variables: TOTAL_COVID_TESTS <dbl>, COVID_DEATH_COUNT <dbl>,
## # FoodStoreNum <int>, healthfacilities_NUM <int>, totPop <int>,
## # adultPop <int>, malePctg <dbl>, femalePop <dbl>, elderlyPop <int>,
## # asianPop <int>, blackPop <int>, hispanicPop <int>, whitePop <int>,
## # geometry <GEOMETRY [US_survey_foot]>
plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks')

plot(covidPopZipNYC["blackPop"], breaks='jenks')

names(covidPopZipNYC)
## [1] "ZIPCODE" "PO_NAME" "POPULATION"
## [4] "COUNTY" "NEIGHBORHOOD_NAME" "COVID_CASE_COUNT"
## [7] "TOTAL_COVID_TESTS" "COVID_DEATH_COUNT" "FoodStoreNum"
## [10] "healthfacilities_NUM" "totPop" "adultPop"
## [13] "malePctg" "femalePop" "elderlyPop"
## [16] "asianPop" "blackPop" "hispanicPop"
## [19] "whitePop" "geometry"