download.file("http://www.geo.hunter.cuny.edu/~ssun/R-Spatial/data/R-spatial-data.zip",
"R-spatial-data.zip");
unzip("R-spatial-data.zip", exdir = "data")
# Load a list of packages. Install them first if they are not available.
# The list of pacakges
list.of.packages <- c("sf", "sp", "spatial", "maptools", "rgeos","rgdal",
"raster", "grid", "rasterVis",
"tidyverse", "magrittr", "ggpubr", "devtools",
"htmlwidgets", "mapview",
"classInt", "RColorBrewer", "ggmap", "tmap", "leaflet",
"ggrepel", "ggsn",
"spdep","spatialreg","GWmodel");
# Check out the packages that have not been installed yet.
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
# Install those missed packages first. It could take a long time for the first time.
if(length(new.packages)>0) install.packages(new.packages)
# Load all packages.
lapply(list.of.packages,function(x) {
require(x,character.only = TRUE,quietly = TRUE)
})
## Warning: package 'sf' was built under R version 4.0.5
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
## Warning: package 'sp' was built under R version 4.0.5
## Warning: package 'maptools' was built under R version 4.0.5
## Checking rgeos availability: TRUE
## Warning: package 'rgeos' was built under R version 4.0.5
## rgeos version: 0.5-5, (SVN revision 640)
## GEOS runtime version: 3.8.0-CAPI-1.13.1
## Linking to sp version: 1.4-5
## Polygon checking: TRUE
## Warning: package 'rgdal' was built under R version 4.0.5
## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: C:/Users/andie/OneDrive/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/andie/OneDrive/Documents/R/win-library/4.0/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
## Overwritten PROJ_LIB was C:/Users/andie/OneDrive/Documents/R/win-library/4.0/rgdal/proj
## Warning: package 'raster' was built under R version 4.0.5
## Warning: package 'rasterVis' was built under R version 4.0.5
## Warning: package 'terra' was built under R version 4.0.5
## terra version 1.1.17
##
## Attaching package: 'terra'
## The following object is masked from 'package:grid':
##
## depth
## The following object is masked from 'package:rgdal':
##
## project
## Warning: package 'latticeExtra' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'purrr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'stringr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::collapse() masks terra::collapse()
## x dplyr::desc() masks terra::desc()
## x tidyr::expand() masks terra::expand()
## x tidyr::extract() masks terra::extract(), raster::extract()
## x tidyr::fill() masks terra::fill()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x ggplot2::layer() masks latticeExtra::layer()
## x dplyr::near() masks terra::near()
## x tidyr::pack() masks terra::pack()
## x dplyr::select() masks terra::select(), raster::select()
## x tidyr::separate() masks terra::separate()
## x purrr::transpose() masks terra::transpose()
## Warning: package 'magrittr' was built under R version 4.0.5
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
## The following objects are masked from 'package:terra':
##
## extract, inset
## The following object is masked from 'package:raster':
##
## extract
## Warning: package 'ggpubr' was built under R version 4.0.5
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:terra':
##
## rotate
## The following object is masked from 'package:raster':
##
## rotate
## Warning: package 'devtools' was built under R version 4.0.5
## Warning: package 'usethis' was built under R version 4.0.5
## Warning: package 'htmlwidgets' was built under R version 4.0.5
## Warning: package 'mapview' was built under R version 4.0.5
## GDAL version >= 3.1.0 | setting mapviewOptions(fgb = TRUE)
## Warning: package 'classInt' was built under R version 4.0.5
## Warning: package 'ggmap' was built under R version 4.0.5
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
##
## Attaching package: 'ggmap'
## The following object is masked from 'package:magrittr':
##
## inset
## The following object is masked from 'package:terra':
##
## inset
## Warning: package 'tmap' was built under R version 4.0.5
## Warning: package 'leaflet' was built under R version 4.0.5
## Warning: package 'ggrepel' was built under R version 4.0.5
## Warning: package 'ggsn' was built under R version 4.0.5
##
## Attaching package: 'ggsn'
## The following object is masked from 'package:raster':
##
## scalebar
## Warning: package 'spdep' was built under R version 4.0.5
## Warning: package 'spData' was built under R version 4.0.5
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Warning: package 'spatialreg' was built under R version 4.0.5
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## The following objects are masked from 'package:terra':
##
## expand, pack
## Registered S3 methods overwritten by 'spatialreg':
## method from
## residuals.stsls spdep
## deviance.stsls spdep
## coef.stsls spdep
## print.stsls spdep
## summary.stsls spdep
## print.summary.stsls spdep
## residuals.gmsar spdep
## deviance.gmsar spdep
## coef.gmsar spdep
## fitted.gmsar spdep
## print.gmsar spdep
## summary.gmsar spdep
## print.summary.gmsar spdep
## print.lagmess spdep
## summary.lagmess spdep
## print.summary.lagmess spdep
## residuals.lagmess spdep
## deviance.lagmess spdep
## coef.lagmess spdep
## fitted.lagmess spdep
## logLik.lagmess spdep
## fitted.SFResult spdep
## print.SFResult spdep
## fitted.ME_res spdep
## print.ME_res spdep
## print.lagImpact spdep
## plot.lagImpact spdep
## summary.lagImpact spdep
## HPDinterval.lagImpact spdep
## print.summary.lagImpact spdep
## print.sarlm spdep
## summary.sarlm spdep
## residuals.sarlm spdep
## deviance.sarlm spdep
## coef.sarlm spdep
## vcov.sarlm spdep
## fitted.sarlm spdep
## logLik.sarlm spdep
## anova.sarlm spdep
## predict.sarlm spdep
## print.summary.sarlm spdep
## print.sarlm.pred spdep
## as.data.frame.sarlm.pred spdep
## residuals.spautolm spdep
## deviance.spautolm spdep
## coef.spautolm spdep
## fitted.spautolm spdep
## print.spautolm spdep
## summary.spautolm spdep
## logLik.spautolm spdep
## print.summary.spautolm spdep
## print.WXImpact spdep
## summary.WXImpact spdep
## print.summary.WXImpact spdep
## predict.SLX spdep
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## anova.sarlm, as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
## as_dsCMatrix_IrW, as_dsTMatrix_listw, bptest.sarlm, can.be.simmed,
## cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
## create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
## deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
## fitted.SFResult, fitted.spautolm, get.ClusterOption,
## get.coresOption, get.mcOption, get.VerboseOption,
## get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
## gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
## LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
## predict.SLX, print.gmsar, print.ME_res, print.sarlm,
## print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
## print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
## print.summary.stsls, residuals.gmsar, residuals.sarlm,
## residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
## SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
## SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
## stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
## summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm
## Warning: package 'GWmodel' was built under R version 4.0.5
## Warning: package 'robustbase' was built under R version 4.0.5
## Warning: package 'Rcpp' was built under R version 4.0.5
## Welcome to GWmodel version 2.2-3.
## The new version of GWmodel 2.2-5 now is ready
##
## Attaching package: 'GWmodel'
## The following objects are masked from 'package:stats':
##
## BIC, fitted
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] TRUE
##
## [[5]]
## [1] TRUE
##
## [[6]]
## [1] TRUE
##
## [[7]]
## [1] TRUE
##
## [[8]]
## [1] TRUE
##
## [[9]]
## [1] TRUE
##
## [[10]]
## [1] TRUE
##
## [[11]]
## [1] TRUE
##
## [[12]]
## [1] TRUE
##
## [[13]]
## [1] TRUE
##
## [[14]]
## [1] TRUE
##
## [[15]]
## [1] TRUE
##
## [[16]]
## [1] TRUE
##
## [[17]]
## [1] TRUE
##
## [[18]]
## [1] TRUE
##
## [[19]]
## [1] TRUE
##
## [[20]]
## [1] TRUE
##
## [[21]]
## [1] TRUE
##
## [[22]]
## [1] TRUE
##
## [[23]]
## [1] TRUE
##
## [[24]]
## [1] TRUE
##
## [[25]]
## [1] TRUE
# Or load specific pacakges
# require(sp); require(sf); library(raster)
require(ggmap)
library(tidyverse)
library(rstudioapi)
## Warning: package 'rstudioapi' was built under R version 4.0.5
#Aggregate data from different sources to the ZIP Codes as the core covid-19 data
#are available at that scale
#Main tasks are
#1. Join COVID-19 Data to the NYC ZIP Code data (sf or sp polygons)
#read in data
nys_hospitals <- st_read("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module12/R-Spatial_II_Lab/NYS_Health_Facility.csv")
## Reading layer `NYS_Health_Facility' from data source `D:\GradSchool\Hunter_College\Spring_2021\DataAnalysisR\Module12\R-Spatial_II_Lab\NYS_Health_Facility.csv' using driver `CSV'
## Warning: no simple feature geometries present: returning a data.frame or tbl_df
nys_food <- st_read("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module12/R-Spatial_II_Lab/nys_retail_food_store_xy.csv")
## Reading layer `nys_retail_food_store_xy' from data source `D:\GradSchool\Hunter_College\Spring_2021\DataAnalysisR\Module12\R-Spatial_II_Lab\nys_retail_food_store_xy.csv' using driver `CSV'
## Warning: no simple feature geometries present: returning a data.frame or tbl_df
tests_COVID <- read_csv("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module12/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## MODIFIED_ZCTA = col_double(),
## NEIGHBORHOOD_NAME = col_character(),
## BOROUGH_GROUP = col_character(),
## label = col_character(),
## lat = col_double(),
## lon = col_double(),
## COVID_CASE_COUNT = col_double(),
## COVID_CASE_RATE = col_double(),
## POP_DENOMINATOR = col_double(),
## COVID_DEATH_COUNT = col_double(),
## COVID_DEATH_RATE = col_double(),
## PERCENT_POSITIVE = col_double(),
## TOTAL_COVID_TESTS = col_double()
## )
ZIPS_sf <- st_read("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module11/R-Spatial/data/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source `D:\GradSchool\Hunter_College\Spring_2021\DataAnalysisR\Module11\R-Spatial\data\ZIP_CODE_040114.shp' using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
#check to make sure that the ZIPs data was read in as an SF
class(ZIPS_sf)
## [1] "sf" "data.frame"
#clean up data -- make sure join columns are the same type
names(ZIPS_sf)
## [1] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" "AREA"
## [6] "STATE" "COUNTY" "ST_FIPS" "CTY_FIPS" "URL"
## [11] "SHAPE_AREA" "SHAPE_LEN" "geometry"
str(ZIPS_sf)
## Classes 'sf' and 'data.frame': 263 obs. of 13 variables:
## $ ZIPCODE : chr "11436" "11213" "11212" "11225" ...
## $ BLDGZIP : chr "0" "0" "0" "0" ...
## $ PO_NAME : chr "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
## $ POPULATION: num 18681 62426 83866 56527 72280 ...
## $ AREA : num 22699295 29631004 41972104 23698630 36868799 ...
## $ STATE : chr "NY" "NY" "NY" "NY" ...
## $ COUNTY : chr "Queens" "Kings" "Kings" "Kings" ...
## $ ST_FIPS : chr "36" "36" "36" "36" ...
## $ CTY_FIPS : chr "081" "047" "047" "047" ...
## $ URL : chr "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
## $ SHAPE_AREA: num 0 0 0 0 0 0 0 0 0 0 ...
## $ SHAPE_LEN : num 0 0 0 0 0 0 0 0 0 0 ...
## $ geometry :sfc_POLYGON of length 263; first list element: List of 1
## ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
ZIPS_sf <- ZIPS_sf%>% mutate(ZIPCODE=as.numeric(as.character(ZIPCODE)))
tests_COVID <- tests_COVID%>% mutate(mod_ZCTA = as.numeric(as.character(MODIFIED_ZCTA)))
#check to make sure join columns are now both numeric
str(tests_COVID)
## spec_tbl_df[,14] [177 x 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ MODIFIED_ZCTA : num [1:177] 10001 10002 10003 10004 10005 ...
## $ NEIGHBORHOOD_NAME: chr [1:177] "Chelsea/NoMad/West Chelsea" "Chinatown/Lower East Side" "East Village/Gramercy/Greenwich Village" "Financial District" ...
## $ BOROUGH_GROUP : chr [1:177] "Manhattan" "Manhattan" "Manhattan" "Manhattan" ...
## $ label : chr [1:177] "10001, 10118" "10002" "10003" "10004" ...
## $ lat : num [1:177] 40.8 40.7 40.7 40.7 40.7 ...
## $ lon : num [1:177] -74 -74 -74 -74 -74 ...
## $ COVID_CASE_COUNT : num [1:177] 1542 5902 2803 247 413 ...
## $ COVID_CASE_RATE : num [1:177] 5584 7836 5193 8311 4716 ...
## $ POP_DENOMINATOR : num [1:177] 27613 75323 53978 2972 8757 ...
## $ COVID_DEATH_COUNT: num [1:177] 35 264 48 2 0 1 4 118 37 62 ...
## $ COVID_DEATH_RATE : num [1:177] 126.8 350.5 88.9 67.3 0 ...
## $ PERCENT_POSITIVE : num [1:177] 7.86 12.63 6.93 6.92 6.72 ...
## $ TOTAL_COVID_TESTS: num [1:177] 20158 48197 41076 3599 6102 ...
## $ mod_ZCTA : num [1:177] 10001 10002 10003 10004 10005 ...
## - attr(*, "spec")=
## .. cols(
## .. MODIFIED_ZCTA = col_double(),
## .. NEIGHBORHOOD_NAME = col_character(),
## .. BOROUGH_GROUP = col_character(),
## .. label = col_character(),
## .. lat = col_double(),
## .. lon = col_double(),
## .. COVID_CASE_COUNT = col_double(),
## .. COVID_CASE_RATE = col_double(),
## .. POP_DENOMINATOR = col_double(),
## .. COVID_DEATH_COUNT = col_double(),
## .. COVID_DEATH_RATE = col_double(),
## .. PERCENT_POSITIVE = col_double(),
## .. TOTAL_COVID_TESTS = col_double()
## .. )
str(ZIPS_sf)
## Classes 'sf' and 'data.frame': 263 obs. of 13 variables:
## $ ZIPCODE : num 11436 11213 11212 11225 11218 ...
## $ BLDGZIP : chr "0" "0" "0" "0" ...
## $ PO_NAME : chr "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
## $ POPULATION: num 18681 62426 83866 56527 72280 ...
## $ AREA : num 22699295 29631004 41972104 23698630 36868799 ...
## $ STATE : chr "NY" "NY" "NY" "NY" ...
## $ COUNTY : chr "Queens" "Kings" "Kings" "Kings" ...
## $ ST_FIPS : chr "36" "36" "36" "36" ...
## $ CTY_FIPS : chr "081" "047" "047" "047" ...
## $ URL : chr "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
## $ SHAPE_AREA: num 0 0 0 0 0 0 0 0 0 0 ...
## $ SHAPE_LEN : num 0 0 0 0 0 0 0 0 0 0 ...
## $ geometry :sfc_POLYGON of length 263; first list element: List of 1
## ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
ZIPS_Covid_Join <- left_join(ZIPS_sf,tests_COVID, by = c('ZIPCODE' = 'mod_ZCTA'))
str(ZIPS_Covid_Join)
## Classes 'sf' and 'data.frame': 263 obs. of 26 variables:
## $ ZIPCODE : num 11436 11213 11212 11225 11218 ...
## $ BLDGZIP : chr "0" "0" "0" "0" ...
## $ PO_NAME : chr "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
## $ POPULATION : num 18681 62426 83866 56527 72280 ...
## $ AREA : num 22699295 29631004 41972104 23698630 36868799 ...
## $ STATE : chr "NY" "NY" "NY" "NY" ...
## $ COUNTY : chr "Queens" "Kings" "Kings" "Kings" ...
## $ ST_FIPS : chr "36" "36" "36" "36" ...
## $ CTY_FIPS : chr "081" "047" "047" "047" ...
## $ URL : chr "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
## $ SHAPE_AREA : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SHAPE_LEN : num 0 0 0 0 0 0 0 0 0 0 ...
## $ MODIFIED_ZCTA : num 11436 11213 11212 11225 11218 ...
## $ NEIGHBORHOOD_NAME: chr "South Jamaica/South Ozone Park" "Crown Heights (East)" "Ocean Hill-Brownsville" "Crown Heights (West)/Prospect Lefferts Gardens" ...
## $ BOROUGH_GROUP : chr "Queens" "Brooklyn" "Brooklyn" "Brooklyn" ...
## $ label : chr "11436" "11213" "11212" "11225" ...
## $ lat : num 40.7 40.7 40.7 40.7 40.6 ...
## $ lon : num -73.8 -73.9 -73.9 -74 -74 ...
## $ COVID_CASE_COUNT : num 1888 5166 7182 3833 6199 ...
## $ COVID_CASE_RATE : num 9420 7997 9710 6664 8377 ...
## $ POP_DENOMINATOR : num 20043 64601 73967 57514 73996 ...
## $ COVID_DEATH_COUNT: num 64 203 330 177 218 368 256 206 380 219 ...
## $ COVID_DEATH_RATE : num 319 314 446 308 295 ...
## $ PERCENT_POSITIVE : num 17.6 13.7 15.6 11.6 13.9 ...
## $ TOTAL_COVID_TESTS: num 11082 38560 47319 33709 45884 ...
## $ geometry :sfc_POLYGON of length 263; first list element: List of 1
## ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:25] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
#Check to make sure join is valid and can be spatially displayed
Plot_PctPos <- plot(ZIPS_Covid_Join['PERCENT_POSITIVE'],breaks = 'jenks',main="Percent Positive Cases through 4/23/2021")
Plot_CaseCnt <- plot(ZIPS_Covid_Join['COVID_CASE_COUNT'],breaks = 'jenks',main="# Positive Cases through 4/23/2021")
#want to see what this looks like with per sq mi taken into account
st_crs(ZIPS_Covid_Join)
## Coordinate Reference System:
## User input: NAD83 / New York Long Island (ftUS)
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["Engineering survey, topographic mapping."],
## AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
ZIPS_Covid_Join <- ZIPS_Covid_Join%>%mutate(areaSF = st_area(geometry),
areaSqMi = areaSF/27878400,
CasesPerSqMi = COVID_CASE_COUNT/areaSqMi,
CasesPer100K = COVID_CASE_COUNT/POPULATION*100000)
Plot_CaseDensity <- plot(ZIPS_Covid_Join['CasesPerSqMi'],breaks = 'jenks',main="Pos. Cases Per Sq Mi through 4/23/2021")
Plot_CasePer100K <- plot(ZIPS_Covid_Join['CasesPer100K'],breaks = 'jenks',main="Pos. Cases Per 100K through 4/23/2021")
sf::st_read("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module12/R-Spatial_II_Lab/nycFoodStore.shp") -> nycFoodStoreSF
## Reading layer `nycFoodStore' from data source `D:\GradSchool\Hunter_College\Spring_2021\DataAnalysisR\Module12\R-Spatial_II_Lab\nycFoodStore.shp' using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS: WGS 84
st_crs(nycFoodStoreSF) #ESPG 4326 (aka WGS 1984)
## Coordinate Reference System:
## User input: WGS 84
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["latitude",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["longitude",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]]
st_crs(ZIPS_sf) #ESPG 2263 (aka NY LONG ISLAND in Ft US)
## Coordinate Reference System:
## User input: NAD83 / New York Long Island (ftUS)
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["Engineering survey, topographic mapping."],
## AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
#want to reproject food store CRS into the same as the ZIPs for a more local projects
sf::st_transform(nycFoodStoreSF, sf::st_crs(2263)) -> nycFoodStoreLocal
st_crs(nycFoodStoreLocal)
## Coordinate Reference System:
## User input: EPSG:2263
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["Engineering survey, topographic mapping."],
## AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
#set coordinate reference system to the same as the ZIPs CRS
nycFoodStoreSF%<>% sf::st_transform((sf::st_crs(ZIPS_sf)))
#join ZIPs (that also have covid data) to the food stores
#join the zips that contain food stores, group by the ZIP and by the establishment type
ZIPS_Food<-sf::st_join(ZIPS_sf,nycFoodStoreLocal,join=st_contains)
ZIPS_Food<-ZIPS_Food%>%
group_by(ZIPCODE, Estbl_T)%>%
dplyr::filter(Estbl_T!='A')%>%
dplyr::filter(stringr::str_detect(Estbl_T,'[ADJ]'))%>%
summarize(FoodStoreNum = n())%>%
#group_by(ZIPCODE)%>%
summarize(NumFood_PerZIP=sum(FoodStoreNum))
## `summarise()` has grouped output by 'ZIPCODE'. You can override using the `.groups` argument.
plot(ZIPS_Food['NumFood_PerZIP'],breaks='jenks', main='Number of Food Stores Per ZIP (Missing ZIPS)')
#want to re-join ZIP data to keep all ZIPS even if there are no food stores in them
#It seems strange that there are ZIPS with null values for number of food stores
ZIPS_Food2<-st_join(ZIPS_sf,ZIPS_Food,by= c('ZIPCODE' = 'ZIPCODE'))
#Now for those ZIP codes with no food stores, replace NA with 0
ZIPS_Food2[is.na('NumFood_PerZIP')]<-0
#str(NumFood_PerZIP2)
ZIPS_Food2<-ZIPS_Food2%>%group_by(ZIPCODE.x)%>%
summarize(NumFood_PerZIP=sum(NumFood_PerZIP))
plot(ZIPS_Food2['NumFood_PerZIP'],breaks = 'jenks',main='Number of Food Stores Per ZIP')
#read in health data
NYS_Health <- read_csv("NYS_Health_Facility.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_character(),
## `Facility ID` = col_double(),
## `Facility Phone Number` = col_double(),
## `Facility Fax Number` = col_double(),
## `Facility County Code` = col_double(),
## `Regional Office ID` = col_double(),
## `Main Site Facility ID` = col_double(),
## `Facility Latitude` = col_double(),
## `Facility Longitude` = col_double()
## )
## i Use `spec()` for the full column specifications.
names(NYS_Health)
## [1] "Facility ID" "Facility Name"
## [3] "Short Description" "Description"
## [5] "Facility Open Date" "Facility Address 1"
## [7] "Facility Address 2" "Facility City"
## [9] "Facility State" "Facility Zip Code"
## [11] "Facility Phone Number" "Facility Fax Number"
## [13] "Facility Website" "Facility County Code"
## [15] "Facility County" "Regional Office ID"
## [17] "Regional Office" "Main Site Name"
## [19] "Main Site Facility ID" "Operating Certificate Number"
## [21] "Operator Name" "Operator Address 1"
## [23] "Operator Address 2" "Operator City"
## [25] "Operator State" "Operator Zip Code"
## [27] "Cooperator Name" "Cooperator Address"
## [29] "Cooperator Address 2" "Cooperator City"
## [31] "Cooperator State" "Cooperator Zip Code"
## [33] "Ownership Type" "Facility Latitude"
## [35] "Facility Longitude" "Facility Location"
#clean data
NYS_Health<-NYS_Health%>%filter(NYS_Health$`Facility Latitude`>0)
NYS_Health%>%filter(!is.na(NYS_Health$`Facility Latitude`))
## # A tibble: 3,843 x 36
## `Facility ID` `Facility Name` `Short Descripti~ Description `Facility Open ~
## <dbl> <chr> <chr> <chr> <chr>
## 1 204 Hospice at Lour~ HSPC Hospice 06/01/1985
## 2 620 Charles T Sitri~ NH Residentia~ 02/01/1989
## 3 1156 East Side Nursi~ NH Residentia~ 08/01/1979
## 4 2589 Wellsville Mano~ NH Residentia~ 02/01/1989
## 5 3455 Harris Hill Nur~ NH Residentia~ 04/08/1992
## 6 3853 Garden City Sur~ DTC Diagnostic~ 04/07/2008
## 7 4249 Willcare CHHA Certified ~ 05/15/1990
## 8 4473 Good Shepherd H~ HSPC Hospice 09/01/2002
## 9 6230 NYU Langone Rut~ HOSP-EC Hospital E~ 01/01/2006
## 10 6482 Endoscopy Cente~ DTC Diagnostic~ 01/20/2003
## # ... with 3,833 more rows, and 31 more variables: Facility Address 1 <chr>,
## # Facility Address 2 <chr>, Facility City <chr>, Facility State <chr>,
## # Facility Zip Code <chr>, Facility Phone Number <dbl>,
## # Facility Fax Number <dbl>, Facility Website <chr>,
## # Facility County Code <dbl>, Facility County <chr>,
## # Regional Office ID <dbl>, Regional Office <chr>, Main Site Name <chr>,
## # Main Site Facility ID <dbl>, Operating Certificate Number <chr>,
## # Operator Name <chr>, Operator Address 1 <chr>, Operator Address 2 <chr>,
## # Operator City <chr>, Operator State <chr>, Operator Zip Code <chr>,
## # Cooperator Name <chr>, Cooperator Address <chr>,
## # Cooperator Address 2 <chr>, Cooperator City <chr>, Cooperator State <chr>,
## # Cooperator Zip Code <chr>, Ownership Type <chr>, Facility Latitude <dbl>,
## # Facility Longitude <dbl>, Facility Location <chr>
NYC_Health<-NYS_Health%>%
filter(NYS_Health$`Facility State`=='New York' & NYS_Health$`Facility County`=='Bronx'|NYS_Health$`Facility County`=='Kings' |NYS_Health$`Facility County`=='New York'| NYS_Health$`Facility County`=='Richmond'| NYS_Health$`Facility County`=='Queens')
unique(NYC_Health$`Short Description`)
## [1] "HOSP-EC" "DTC" "NH" "CHHA" "DTC-EC" "HOSP-SB" "HOSP"
## [8] NA "LTHHCP" "ADHCP" "HSPC"
names(NYC_Health)
## [1] "Facility ID" "Facility Name"
## [3] "Short Description" "Description"
## [5] "Facility Open Date" "Facility Address 1"
## [7] "Facility Address 2" "Facility City"
## [9] "Facility State" "Facility Zip Code"
## [11] "Facility Phone Number" "Facility Fax Number"
## [13] "Facility Website" "Facility County Code"
## [15] "Facility County" "Regional Office ID"
## [17] "Regional Office" "Main Site Name"
## [19] "Main Site Facility ID" "Operating Certificate Number"
## [21] "Operator Name" "Operator Address 1"
## [23] "Operator Address 2" "Operator City"
## [25] "Operator State" "Operator Zip Code"
## [27] "Cooperator Name" "Cooperator Address"
## [29] "Cooperator Address 2" "Cooperator City"
## [31] "Cooperator State" "Cooperator Zip Code"
## [33] "Ownership Type" "Facility Latitude"
## [35] "Facility Longitude" "Facility Location"
NYC_Health<-NYC_Health%>%
filter(NYC_Health$`Short Description`=='NH'|
NYC_Health$`Short Description`=='HOSP'|
NYC_Health$`Short Description`=='HCPC')%>%
mutate(lat = as.numeric(`Facility Latitude`),long = as.numeric(`Facility Longitude`),
ZipCode = as.numeric(`Facility Zip Code`))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
#check to make sure join columns match up
str(NYC_Health) #zip is character column
## spec_tbl_df[,39] [235 x 39] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Facility ID : num [1:235] 1217 1185 1469 1705 7745 ...
## $ Facility Name : chr [1:235] "St Patricks Home" "Montefiore Medical Center - Montefiore Westchester Square" "Mount Sinai Morningside" "Dry Harbor Nursing Home" ...
## $ Short Description : chr [1:235] "NH" "HOSP" "HOSP" "NH" ...
## $ Description : chr [1:235] "Residential Health Care Facility - SNF" "Hospital" "Hospital" "Residential Health Care Facility - SNF" ...
## $ Facility Open Date : chr [1:235] "01/01/1901" "08/01/1979" "01/01/1901" "06/01/1987" ...
## $ Facility Address 1 : chr [1:235] "66 Van Cortlandt Park South" "2475 St. Raymond Avenue" "1111 Amsterdam Avenue" "61-35 Dry Harbor Road" ...
## $ Facility Address 2 : chr [1:235] NA NA NA NA ...
## $ Facility City : chr [1:235] "Bronx" "Bronx" "New York" "Middle Village" ...
## $ Facility State : chr [1:235] "New York" "New York" "New York" "New York" ...
## $ Facility Zip Code : chr [1:235] "10463" "10461" "10025" "11379" ...
## $ Facility Phone Number : num [1:235] 7.19e+09 7.18e+09 2.13e+09 7.19e+09 7.18e+09 ...
## $ Facility Fax Number : num [1:235] NA NA NA NA NA ...
## $ Facility Website : chr [1:235] NA NA NA NA ...
## $ Facility County Code : num [1:235] 7094 7094 7093 7096 7096 ...
## $ Facility County : chr [1:235] "Bronx" "Bronx" "New York" "Queens" ...
## $ Regional Office ID : num [1:235] 5 5 5 5 5 5 5 5 5 5 ...
## $ Regional Office : chr [1:235] "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" ...
## $ Main Site Name : chr [1:235] NA "Montefiore Medical Center - Henry & Lucy Moses Div" "Mount Sinai West" NA ...
## $ Main Site Facility ID : num [1:235] NA 1169 1466 NA NA ...
## $ Operating Certificate Number: chr [1:235] "7000307N" "7000006H" "7002032H" "7003359N" ...
## $ Operator Name : chr [1:235] "St Patricks Home for the Aged & Infirm" "Montefiore Medical Center" "St Lukes Roosevelt Hospital Center Inc" "Dry Harbor HRF Inc" ...
## $ Operator Address 1 : chr [1:235] "Box 218 Rd 1" "111 East 210th Street" "Amsterdam Avenue At 114th Street" "61-35 Dry Harbor Road" ...
## $ Operator Address 2 : chr [1:235] NA NA NA NA ...
## $ Operator City : chr [1:235] "Germantown" "Bronx" "New York" "Middle Village" ...
## $ Operator State : chr [1:235] "New York" "New York" "New York" "New York" ...
## $ Operator Zip Code : chr [1:235] "12526" "10467" "10025" "11379" ...
## $ Cooperator Name : chr [1:235] NA "Montefiore Health System, Inc" "Mount Sinai Hospitals Group, Inc." NA ...
## $ Cooperator Address : chr [1:235] NA "111 East 210th Street" "One Gustave L. Levy Place" NA ...
## $ Cooperator Address 2 : chr [1:235] NA NA NA NA ...
## $ Cooperator City : chr [1:235] NA "Bronx" "New York" NA ...
## $ Cooperator State : chr [1:235] "New York" "New York" "New York" "New York" ...
## $ Cooperator Zip Code : chr [1:235] NA "10467" "10029" NA ...
## $ Ownership Type : chr [1:235] "Not for Profit Corporation" "Not for Profit Corporation" "Not for Profit Corporation" "Business Corporation" ...
## $ Facility Latitude : num [1:235] 40.9 40.8 40.8 40.7 40.8 ...
## $ Facility Longitude : num [1:235] -73.9 -73.8 -74 -73.9 -73.9 ...
## $ Facility Location : chr [1:235] "(40.884361, -73.888451)" "(40.840431, -73.848244)" "(40.805912, -73.961639)" "(40.726879, -73.871681)" ...
## $ lat : num [1:235] 40.9 40.8 40.8 40.7 40.8 ...
## $ long : num [1:235] -73.9 -73.8 -74 -73.9 -73.9 ...
## $ ZipCode : num [1:235] 10463 10461 10025 11379 11369 ...
## - attr(*, "spec")=
## .. cols(
## .. `Facility ID` = col_double(),
## .. `Facility Name` = col_character(),
## .. `Short Description` = col_character(),
## .. Description = col_character(),
## .. `Facility Open Date` = col_character(),
## .. `Facility Address 1` = col_character(),
## .. `Facility Address 2` = col_character(),
## .. `Facility City` = col_character(),
## .. `Facility State` = col_character(),
## .. `Facility Zip Code` = col_character(),
## .. `Facility Phone Number` = col_double(),
## .. `Facility Fax Number` = col_double(),
## .. `Facility Website` = col_character(),
## .. `Facility County Code` = col_double(),
## .. `Facility County` = col_character(),
## .. `Regional Office ID` = col_double(),
## .. `Regional Office` = col_character(),
## .. `Main Site Name` = col_character(),
## .. `Main Site Facility ID` = col_double(),
## .. `Operating Certificate Number` = col_character(),
## .. `Operator Name` = col_character(),
## .. `Operator Address 1` = col_character(),
## .. `Operator Address 2` = col_character(),
## .. `Operator City` = col_character(),
## .. `Operator State` = col_character(),
## .. `Operator Zip Code` = col_character(),
## .. `Cooperator Name` = col_character(),
## .. `Cooperator Address` = col_character(),
## .. `Cooperator Address 2` = col_character(),
## .. `Cooperator City` = col_character(),
## .. `Cooperator State` = col_character(),
## .. `Cooperator Zip Code` = col_character(),
## .. `Ownership Type` = col_character(),
## .. `Facility Latitude` = col_double(),
## .. `Facility Longitude` = col_double(),
## .. `Facility Location` = col_character()
## .. )
names(NYC_Health)
## [1] "Facility ID" "Facility Name"
## [3] "Short Description" "Description"
## [5] "Facility Open Date" "Facility Address 1"
## [7] "Facility Address 2" "Facility City"
## [9] "Facility State" "Facility Zip Code"
## [11] "Facility Phone Number" "Facility Fax Number"
## [13] "Facility Website" "Facility County Code"
## [15] "Facility County" "Regional Office ID"
## [17] "Regional Office" "Main Site Name"
## [19] "Main Site Facility ID" "Operating Certificate Number"
## [21] "Operator Name" "Operator Address 1"
## [23] "Operator Address 2" "Operator City"
## [25] "Operator State" "Operator Zip Code"
## [27] "Cooperator Name" "Cooperator Address"
## [29] "Cooperator Address 2" "Cooperator City"
## [31] "Cooperator State" "Cooperator Zip Code"
## [33] "Ownership Type" "Facility Latitude"
## [35] "Facility Longitude" "Facility Location"
## [37] "lat" "long"
## [39] "ZipCode"
unique(NYC_Health$ZipCode)
## [1] 10463 10461 10025 11379 11369 10466 10035 11201 11219 10016 10301 10469
## [13] 10457 11694 11373 11238 11360 10021 11423 11375 10032 11217 11226 11354
## [25] 11230 10452 11004 11432 10304 10314 10467 11357 11203 11213 10305 10471
## [37] 11228 11355 10309 11691 11692 10475 11377 NA 10024 11362 10003 11234
## [49] 10462 11235 10306 10002 11372 11214 11207 10034 11233 11220 11218 10472
## [61] 10037 11229 10029 11040 10459 11224 11212 11236 10456 11366 11249 10465
## [73] 11429 11418 11237 10040 10065 10473 11368 11215 10044 11427 11434 10310
## [85] 11239 11378 10019 10027 10468 11208 11216 10011 11206 11361 11221 10014
## [97] 11102 10451 10038 11435
#need to filter out NA
NYC_Health<- NYC_Health%>%filter(!is.na(ZipCode))
str(ZIPS_sf) #zips are numeric
## Classes 'sf' and 'data.frame': 263 obs. of 13 variables:
## $ ZIPCODE : num 11436 11213 11212 11225 11218 ...
## $ BLDGZIP : chr "0" "0" "0" "0" ...
## $ PO_NAME : chr "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
## $ POPULATION: num 18681 62426 83866 56527 72280 ...
## $ AREA : num 22699295 29631004 41972104 23698630 36868799 ...
## $ STATE : chr "NY" "NY" "NY" "NY" ...
## $ COUNTY : chr "Queens" "Kings" "Kings" "Kings" ...
## $ ST_FIPS : chr "36" "36" "36" "36" ...
## $ CTY_FIPS : chr "081" "047" "047" "047" ...
## $ URL : chr "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
## $ SHAPE_AREA: num 0 0 0 0 0 0 0 0 0 0 ...
## $ SHAPE_LEN : num 0 0 0 0 0 0 0 0 0 0 ...
## $ geometry :sfc_POLYGON of length 263; first list element: List of 1
## ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
#try and join csv to ZIP sf, comes out with a lot of null/duplicates
ZIPS_Health_csv<-left_join(ZIPS_sf,NYC_Health,by=c('ZIPCODE' = 'ZipCode'))
ZIPS_Health_csv%>%filter(!is.na(lat))
## Simple feature collection with 245 features and 50 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 915316.2 ymin: 122018.3 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
## ZIPCODE BLDGZIP PO_NAME POPULATION AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1 11213 0 Brooklyn 62426 29631004 NY Kings 36 047
## 2 11213 0 Brooklyn 62426 29631004 NY Kings 36 047
## 3 11212 0 Brooklyn 83866 41972104 NY Kings 36 047
## 4 11218 0 Brooklyn 72280 36868799 NY Kings 36 047
## 5 11226 0 Brooklyn 106132 39408598 NY Kings 36 047
## 6 11226 0 Brooklyn 106132 39408598 NY Kings 36 047
## 7 11226 0 Brooklyn 106132 39408598 NY Kings 36 047
## 8 11219 0 Brooklyn 92561 42002738 NY Kings 36 047
## 9 11219 0 Brooklyn 92561 42002738 NY Kings 36 047
## 10 11219 0 Brooklyn 92561 42002738 NY Kings 36 047
## URL SHAPE_AREA SHAPE_LEN Facility ID
## 1 http://www.usps.com/ 0 0 1309
## 2 http://www.usps.com/ 0 0 1407
## 3 http://www.usps.com/ 0 0 1376
## 4 http://www.usps.com/ 0 0 1392
## 5 http://www.usps.com/ 0 0 1576
## 6 http://www.usps.com/ 0 0 1369
## 7 http://www.usps.com/ 0 0 1380
## 8 http://www.usps.com/ 0 0 1403
## 9 http://www.usps.com/ 0 0 1305
## 10 http://www.usps.com/ 0 0 1393
## Facility Name
## 1 Interfaith Medical Center
## 2 Crown Heights Center for Nursing and Rehabilitation
## 3 Schulman and Schachne Institute for Nursing And Rehabilitation
## 4 Palm Gardens Center for Nursing and Rehabilitation
## 5 Ditmas Park Care Center
## 6 NY Congregational Nursing Center, Inc
## 7 Caton Park Rehabilitation and Nursing Center, LLC
## 8 Boro Park Center for Rehabilitation and Healthcare
## 9 Maimonides Medical Center
## 10 The Heritage Rehabilitation and Health Care Center
## Short Description Description Facility Open Date
## 1 HOSP Hospital 04/24/1980
## 2 NH Residential Health Care Facility - SNF 01/01/1901
## 3 NH Residential Health Care Facility - SNF 01/01/1901
## 4 NH Residential Health Care Facility - SNF 02/01/1980
## 5 NH Residential Health Care Facility - SNF 02/01/1989
## 6 NH Residential Health Care Facility - SNF 01/01/1901
## 7 NH Residential Health Care Facility - SNF 02/01/1980
## 8 NH Residential Health Care Facility - SNF 01/01/1901
## 9 HOSP Hospital 01/01/1901
## 10 NH Residential Health Care Facility - SNF 02/01/1980
## Facility Address 1 Facility Address 2 Facility City Facility State
## 1 1545 Atlantic Avenue <NA> Brooklyn New York
## 2 810-20 St Marks Avenue <NA> Brooklyn New York
## 3 555 Rockaway Parkway <NA> Brooklyn New York
## 4 615 Avenue C <NA> Brooklyn New York
## 5 2107 Ditmas Avenue <NA> Brooklyn New York
## 6 135 Linden Boulevard <NA> Brooklyn New York
## 7 1312 Caton Avenue <NA> Brooklyn New York
## 8 4915 10th Ave <NA> Brooklyn New York
## 9 4802 Tenth Avenue <NA> Brooklyn New York
## 10 5606 15th Ave <NA> Brooklyn New York
## Facility Zip Code Facility Phone Number Facility Fax Number Facility Website
## 1 11213 7189357000 NA <NA>
## 2 11213 7184677300 NA <NA>
## 3 11212 7182405775 NA <NA>
## 4 11218 7186333300 NA <NA>
## 5 11226 7184628100 NA <NA>
## 6 11226 7182840039 NA <NA>
## 7 11226 7186937000 NA <NA>
## 8 11219 7188513700 7189726120 <NA>
## 9 11219 7182836000 NA <NA>
## 10 11219 7188511000 NA <NA>
## Facility County Code Facility County Regional Office ID
## 1 7095 Kings 5
## 2 7095 Kings 5
## 3 7095 Kings 5
## 4 7095 Kings 5
## 5 7095 Kings 5
## 6 7095 Kings 5
## 7 7095 Kings 5
## 8 7095 Kings 5
## 9 7095 Kings 5
## 10 7095 Kings 5
## Regional Office Main Site Name
## 1 Metropolitan Area Regional Office - New York City <NA>
## 2 Metropolitan Area Regional Office - New York City <NA>
## 3 Metropolitan Area Regional Office - New York City <NA>
## 4 Metropolitan Area Regional Office - New York City <NA>
## 5 Metropolitan Area Regional Office - New York City <NA>
## 6 Metropolitan Area Regional Office - New York City <NA>
## 7 Metropolitan Area Regional Office - New York City <NA>
## 8 Metropolitan Area Regional Office - New York City <NA>
## 9 Metropolitan Area Regional Office - New York City <NA>
## 10 Metropolitan Area Regional Office - New York City <NA>
## Main Site Facility ID Operating Certificate Number
## 1 NA 7001046H
## 2 NA 7001398N
## 3 NA 7001318N
## 4 NA 7001391N
## 5 NA 7001393N
## 6 NA 7001309N
## 7 NA 7001366N
## 8 NA 7001394N
## 9 NA 7001020H
## 10 NA 7001392N
## Operator Name
## 1 Interfaith Medical Center
## 2 St. Marks Brooklyn Associates L.L.C.
## 3 The Schulman and Schachne Institute for Nursing And Rehab,Inc
## 4 Palm Gardens Care Center, LLC
## 5 Ditmas Park Rehabilitation & Care Center, LLC
## 6 New York Congregational Nursing Center
## 7 Caton Park Rehabilitation and Nursing Center, LLC
## 8 Boro Park Operating Co, LLC
## 9 Maimonides Medical Center
## 10 Palm Tree Care Center, LLC
## Operator Address 1 Operator Address 2 Operator City Operator State
## 1 555 Prospect Place <NA> Brooklyn New York
## 2 810 St. Marks Avenue <NA> Brooklyn New York
## 3 One Brookdale Plaza <NA> Brooklyn New York
## 4 615 Avenue C <NA> Brooklyn New York
## 5 2107 Ditmas Avenue <NA> Brooklyn New York
## 6 135 Linden Boulevard <NA> Brooklyn New York
## 7 1312 Caton Avenue <NA> Brooklyn New York
## 8 4915 Tenth Avenue <NA> Brooklyn New York
## 9 4802 Tenth Ave <NA> Brooklyn New York
## 10 5606 15th Avenue <NA> Brooklyn New York
## Operator Zip Code Cooperator Name Cooperator Address
## 1 11238 One Brooklyn Health System, Inc. 1 Brookdale Plaza
## 2 11213 <NA> <NA>
## 3 11212 One Brooklyn Health System, Inc. 1 Brookdale Plaza
## 4 11218 <NA> <NA>
## 5 11226 <NA> <NA>
## 6 11226 <NA> <NA>
## 7 11226 <NA> <NA>
## 8 11219 <NA> <NA>
## 9 11219 <NA> <NA>
## 10 11219 <NA> <NA>
## Cooperator Address 2 Cooperator City Cooperator State
## 1 Care of Alex Rovt Chairman of the Board Brooklyn New York
## 2 <NA> <NA> New York
## 3 Care of Alex Rovt Chairman of the Board Brooklyn New York
## 4 <NA> <NA> New York
## 5 <NA> <NA> New York
## 6 <NA> <NA> New York
## 7 <NA> <NA> New York
## 8 <NA> <NA> New York
## 9 <NA> <NA> New York
## 10 <NA> <NA> New York
## Cooperator Zip Code Ownership Type Facility Latitude
## 1 11212 Not for Profit Corporation 40.67794
## 2 <NA> LLC 40.67492
## 3 11212 Not for Profit Corporation 40.65505
## 4 <NA> LLC 40.64106
## 5 <NA> LLC 40.63996
## 6 <NA> Not for Profit Corporation 40.65248
## 7 <NA> LLC 40.64961
## 8 <NA> LLC 40.63892
## 9 <NA> Not for Profit Corporation 40.63942
## 10 <NA> LLC 40.62839
## Facility Longitude Facility Location lat long
## 1 -73.93752 (40.67794, -73.937515) 40.67794 -73.93752
## 2 -73.94571 (40.674923, -73.945709) 40.67492 -73.94571
## 3 -73.91326 (40.655045, -73.913261) 40.65505 -73.91326
## 4 -73.97279 (40.641064, -73.972786) 40.64106 -73.97279
## 5 -73.95770 (40.639965, -73.957695) 40.63996 -73.95770
## 6 -73.95376 (40.652481, -73.953758) 40.65248 -73.95376
## 7 -73.96771 (40.649609, -73.967712) 40.64961 -73.96771
## 8 -73.99873 (40.638924, -73.998734) 40.63892 -73.99873
## 9 -73.99839 (40.639423, -73.99839) 40.63942 -73.99839
## 10 -73.99194 (40.628391, -73.991936) 40.62839 -73.99194
## geometry
## 1 POLYGON ((1001614 186926.4,...
## 2 POLYGON ((1001614 186926.4,...
## 3 POLYGON ((1011174 183696.3,...
## 4 POLYGON ((991997.1 176307.5...
## 5 POLYGON ((994821.5 177865.7...
## 6 POLYGON ((994821.5 177865.7...
## 7 POLYGON ((994821.5 177865.7...
## 8 POLYGON ((987286.4 173946.5...
## 9 POLYGON ((987286.4 173946.5...
## 10 POLYGON ((987286.4 173946.5...
NYC_Health_sf<-st_as_sf(NYC_Health, coords = c("long","lat"))
plot(st_geometry(NYC_Health_sf))
plot(st_geometry(ZIPS_Health_csv))
#filter out NAs
ZIPS_Health_sf<-ZIPS_Health_csv%>%filter(!is.na(`Short Description`))
#check crs
st_crs(ZIPS_Health_sf)
## Coordinate Reference System:
## User input: NAD83 / New York Long Island (ftUS)
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["Engineering survey, topographic mapping."],
## AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
st_crs(ZIPS_sf)
## Coordinate Reference System:
## User input: NAD83 / New York Long Island (ftUS)
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["Engineering survey, topographic mapping."],
## AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
#create count of all health facilities per ZIP
Health_Per_ZIP<-ZIPS_Health_sf%>%group_by(ZIPCODE)%>%
summarize(n_health_facilities=n())%>%view()
plot(Health_Per_ZIP['n_health_facilities'],breaks='jenks',main="Number of Nursing Homes and Hospitals Per ZIP")
#want to join missing zip codes back in
Health_Per_ZIP2 <- st_join(ZIPS_sf,Health_Per_ZIP,by=c('ZIPCODE'='ZIPCODE'))
Health_Per_ZIP2[is.na('n_health_facilities')]<-0
plot(Health_Per_ZIP['n_health_facilities'],breaks='jenks',main="Number of Nursing Homes and Hospitals Per ZIP")
## this didn't work....
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
#Q4. Join the Census ACS Pop, race, and age data to the NYC Planning CT Data
#read in files
unzip("2010 Census Tracts.zip")
CT<- sf::st_read("geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `D:\GradSchool\Hunter_College\Spring_2021\DataAnalysisR\Module12\R-Spatial_II_Lab\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
str(CT)
## Classes 'sf' and 'data.frame': 2165 obs. of 12 variables:
## $ boro_code : chr "5" "1" "1" "1" ...
## $ boro_ct201: chr "5000900" "1009800" "1010000" "1010200" ...
## $ boro_name : chr "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
## $ cdeligibil: chr "E" "I" "I" "I" ...
## $ ct2010 : chr "000900" "009800" "010000" "010200" ...
## $ ctlabel : chr "9" "98" "100" "102" ...
## $ ntacode : chr "SI22" "MN19" "MN19" "MN17" ...
## $ ntaname : chr "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
## $ puma : chr "3903" "3808" "3808" "3807" ...
## $ shape_area: num 2497010 1906016 1860938 1860993 1864600 ...
## $ shape_leng: num 7729 5534 5692 5688 5693 ...
## $ geometry :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
CT %<>% dplyr::mutate(cntyFIPS = dplyr::case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
# The ACS data from CENSUS website, 2018 data on population
# We used readLines to bypass the second line/row in the csv file.
# You can also manually delete that line first, and run the read_csv or read.cvs.
ACS_Data <- readLines("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E,
asianPop = DP05_0067E, hispanicPop = DP05_0071E,
adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));
ACS_Data %>%
magrittr::extract(1:10,)
## GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1 1400000US36005000100 7080 51 6503 577 1773 4239
## 2 1400000US36005000200 4542 950 2264 2278 2165 1279
## 3 1400000US36005000400 5634 710 2807 2827 2623 1699
## 4 1400000US36005001600 5917 989 2365 3552 2406 2434
## 5 1400000US36005001900 2765 76 1363 1402 585 1041
## 6 1400000US36005002000 9409 977 4119 5290 3185 4487
## 7 1400000US36005002300 4600 648 2175 2425 479 2122
## 8 1400000US36005002400 172 0 121 51 69 89
## 9 1400000US36005002500 5887 548 2958 2929 903 1344
## 10 1400000US36005002701 2868 243 1259 1609 243 987
## asianPop hispanicPop adultPop citizenAdult censusCode
## 1 130 2329 6909 6100 005000100
## 2 119 3367 3582 2952 005000200
## 3 226 3873 4507 4214 005000400
## 4 68 3603 4416 3851 005001600
## 5 130 1413 2008 1787 005001900
## 6 29 5905 6851 6170 005002000
## 7 27 2674 3498 3056 005002300
## 8 14 0 131 42 005002400
## 9 68 4562 4237 2722 005002500
## 10 0 1985 1848 1412 005002701
popData <- merge(CT, ACS_Data, by.x ='tractFIPS', by.y = 'censusCode')
# verify the data
sum(popData$totPop)
## [1] 8443713
st_crs(popData)
## Coordinate Reference System:
## User input: WGS84(DD)
## wkt:
## GEOGCRS["WGS84(DD)",
## DATUM["WGS84",
## ELLIPSOID["WGS84",6378137,298.257223563,
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic longitude",east,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic latitude",north,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]]]
#assign the coords system in the ZIPS_sf to the pop data crs
popNYC <- sf::st_transform(popData, st_crs(ZIPS_sf)) # zpNYC is the JOINED zip code data from task 1.
# this may take a long time to run
popData %>% magrittr::extract('elderlyPop') %>% plot(breaks = 'jenks')
plot(popData['totPop'],breaks = 'jenks')
# Now aggregate to the zip code level
covidPopZipNYC <- sf::st_join(ZIPS_Covid_Join,
popNYC %>% sf::st_centroid(), # this essentially converts census tracts to points
join = st_contains) %>% #join all CT Centroid points that are contained by ZIPS_sf
group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>% # use names(zpNYC) and names(popNYC) to see what we have
summarise(totPop = sum(totPop),
malePctg = sum(malePop)/totPop*100, # note the totPop is the newly calculated
asianPop = sum(asianPop),
blackPop = sum(blackPop),
hispanicPop = sum(hispanicPop),
whitePop = sum(whitePop))
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION', 'COUNTY', 'COVID_CASE_COUNT'. You can override using the `.groups` argument.
covidPopZipNYC %>% head()
## Simple feature collection with 6 features and 12 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 188447.3 xmax: 998309.7 ymax: 230942.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 x 13
## # Groups: ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [6]
## ZIPCODE PO_NAME POPULATION COUNTY COVID_CASE_COUNT TOTAL_COVID_TES~ totPop
## <dbl> <chr> <dbl> <chr> <dbl> <dbl> <int>
## 1 83 Central P~ 25 New Yo~ NA NA 3
## 2 10001 New York 22413 New Yo~ 1542 20158 19146
## 3 10002 New York 81305 New Yo~ 5902 48197 74310
## 4 10003 New York 55878 New Yo~ 2803 41076 53487
## 5 10004 New York 2187 New Yo~ 247 3599 NA
## 6 10005 New York 8107 New Yo~ 413 6102 8809
## # ... with 6 more variables: malePctg <dbl>, asianPop <int>, blackPop <int>,
## # hispanicPop <int>, whitePop <int>, geometry <GEOMETRY [US_survey_foot]>
#filter out the NAs
sum(covidPopZipNYC$totPop, na.rm = T)
## [1] 8395306
#check to see NAs
covidPopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 68 features and 12 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 151085.5 xmax: 1067494 ymax: 263362
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 68 x 13
## # Groups: ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [68]
## ZIPCODE PO_NAME POPULATION COUNTY COVID_CASE_COUNT TOTAL_COVID_TES~ totPop
## * <dbl> <chr> <dbl> <chr> <dbl> <dbl> <int>
## 1 10004 New York 2187 New York 247 3599 NA
## 2 10020 New York 0 New York NA NA NA
## 3 10041 New York 0 New York NA NA NA
## 4 10043 New York 0 New York NA NA NA
## 5 10045 New York 0 New York NA NA NA
## 6 10047 New York 0 New York NA NA NA
## 7 10048 New York 0 New York NA NA NA
## 8 10055 New York 12 New York NA NA NA
## 9 10075 New York 25203 New York 1453 18391 NA
## 10 10080 New York 0 New York NA NA NA
## # ... with 58 more rows, and 6 more variables: malePctg <dbl>, asianPop <int>,
## # blackPop <int>, hispanicPop <int>, whitePop <int>,
## # geometry <GEOMETRY [US_survey_foot]>
# Looks like we converted census tracts to points,
# so some zip code contains no such points but parts of census tracts are inside the zip code.
# We can either use higher-resolution data (blockgroups, for example) or use areal interpolation.
#
plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks')
plot(covidPopZipNYC["asianPop"], breaks='jenks')