R Spaital Lab Assignment # 2

task 2:

download.file("http://www.geo.hunter.cuny.edu/~ssun/R-Spatial/data/R-spatial-data.zip",
              "R-spatial-data.zip");
unzip("R-spatial-data.zip", exdir = "data")

# Load a list of packages. Install them first if they are not available.

# The list of pacakges
list.of.packages <- c("sf", "sp", "spatial", "maptools", "rgeos","rgdal",
                      "raster", "grid", "rasterVis",
                      "tidyverse", "magrittr", "ggpubr", "devtools",
                      "htmlwidgets", "mapview",
                      "classInt", "RColorBrewer", "ggmap", "tmap", "leaflet", 
                      "ggrepel", "ggsn",
                      "spdep","spatialreg","GWmodel");

# Check out the packages that have not been installed yet.
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]

# Install those missed packages first. It could take a long time for the first time.
if(length(new.packages)>0) install.packages(new.packages)

# Load all packages.
lapply(list.of.packages,function(x) {
  require(x,character.only = TRUE,quietly = TRUE)
})
## Warning: package 'sf' was built under R version 4.0.5
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
## Warning: package 'sp' was built under R version 4.0.5
## Warning: package 'maptools' was built under R version 4.0.5
## Checking rgeos availability: TRUE
## Warning: package 'rgeos' was built under R version 4.0.5
## rgeos version: 0.5-5, (SVN revision 640)
##  GEOS runtime version: 3.8.0-CAPI-1.13.1 
##  Linking to sp version: 1.4-5 
##  Polygon checking: TRUE
## Warning: package 'rgdal' was built under R version 4.0.5
## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: C:/Users/andie/OneDrive/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/andie/OneDrive/Documents/R/win-library/4.0/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
## Overwritten PROJ_LIB was C:/Users/andie/OneDrive/Documents/R/win-library/4.0/rgdal/proj
## Warning: package 'raster' was built under R version 4.0.5
## Warning: package 'rasterVis' was built under R version 4.0.5
## Warning: package 'terra' was built under R version 4.0.5
## terra version 1.1.17
## 
## Attaching package: 'terra'
## The following object is masked from 'package:grid':
## 
##     depth
## The following object is masked from 'package:rgdal':
## 
##     project
## Warning: package 'latticeExtra' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'purrr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'stringr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::collapse()  masks terra::collapse()
## x dplyr::desc()      masks terra::desc()
## x tidyr::expand()    masks terra::expand()
## x tidyr::extract()   masks terra::extract(), raster::extract()
## x tidyr::fill()      masks terra::fill()
## x dplyr::filter()    masks stats::filter()
## x dplyr::lag()       masks stats::lag()
## x ggplot2::layer()   masks latticeExtra::layer()
## x dplyr::near()      masks terra::near()
## x tidyr::pack()      masks terra::pack()
## x dplyr::select()    masks terra::select(), raster::select()
## x tidyr::separate()  masks terra::separate()
## x purrr::transpose() masks terra::transpose()
## Warning: package 'magrittr' was built under R version 4.0.5
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## The following objects are masked from 'package:terra':
## 
##     extract, inset
## The following object is masked from 'package:raster':
## 
##     extract
## Warning: package 'ggpubr' was built under R version 4.0.5
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:terra':
## 
##     rotate
## The following object is masked from 'package:raster':
## 
##     rotate
## Warning: package 'devtools' was built under R version 4.0.5
## Warning: package 'usethis' was built under R version 4.0.5
## Warning: package 'htmlwidgets' was built under R version 4.0.5
## Warning: package 'mapview' was built under R version 4.0.5
## GDAL version >= 3.1.0 | setting mapviewOptions(fgb = TRUE)
## Warning: package 'classInt' was built under R version 4.0.5
## Warning: package 'ggmap' was built under R version 4.0.5
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:magrittr':
## 
##     inset
## The following object is masked from 'package:terra':
## 
##     inset
## Warning: package 'tmap' was built under R version 4.0.5
## Warning: package 'leaflet' was built under R version 4.0.5
## Warning: package 'ggrepel' was built under R version 4.0.5
## Warning: package 'ggsn' was built under R version 4.0.5
## 
## Attaching package: 'ggsn'
## The following object is masked from 'package:raster':
## 
##     scalebar
## Warning: package 'spdep' was built under R version 4.0.5
## Warning: package 'spData' was built under R version 4.0.5
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Warning: package 'spatialreg' was built under R version 4.0.5
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## The following objects are masked from 'package:terra':
## 
##     expand, pack
## Registered S3 methods overwritten by 'spatialreg':
##   method                   from 
##   residuals.stsls          spdep
##   deviance.stsls           spdep
##   coef.stsls               spdep
##   print.stsls              spdep
##   summary.stsls            spdep
##   print.summary.stsls      spdep
##   residuals.gmsar          spdep
##   deviance.gmsar           spdep
##   coef.gmsar               spdep
##   fitted.gmsar             spdep
##   print.gmsar              spdep
##   summary.gmsar            spdep
##   print.summary.gmsar      spdep
##   print.lagmess            spdep
##   summary.lagmess          spdep
##   print.summary.lagmess    spdep
##   residuals.lagmess        spdep
##   deviance.lagmess         spdep
##   coef.lagmess             spdep
##   fitted.lagmess           spdep
##   logLik.lagmess           spdep
##   fitted.SFResult          spdep
##   print.SFResult           spdep
##   fitted.ME_res            spdep
##   print.ME_res             spdep
##   print.lagImpact          spdep
##   plot.lagImpact           spdep
##   summary.lagImpact        spdep
##   HPDinterval.lagImpact    spdep
##   print.summary.lagImpact  spdep
##   print.sarlm              spdep
##   summary.sarlm            spdep
##   residuals.sarlm          spdep
##   deviance.sarlm           spdep
##   coef.sarlm               spdep
##   vcov.sarlm               spdep
##   fitted.sarlm             spdep
##   logLik.sarlm             spdep
##   anova.sarlm              spdep
##   predict.sarlm            spdep
##   print.summary.sarlm      spdep
##   print.sarlm.pred         spdep
##   as.data.frame.sarlm.pred spdep
##   residuals.spautolm       spdep
##   deviance.spautolm        spdep
##   coef.spautolm            spdep
##   fitted.spautolm          spdep
##   print.spautolm           spdep
##   summary.spautolm         spdep
##   logLik.spautolm          spdep
##   print.summary.spautolm   spdep
##   print.WXImpact           spdep
##   summary.WXImpact         spdep
##   print.summary.WXImpact   spdep
##   predict.SLX              spdep
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     anova.sarlm, as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
##     as_dsCMatrix_IrW, as_dsTMatrix_listw, bptest.sarlm, can.be.simmed,
##     cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
##     create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
##     deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
##     fitted.SFResult, fitted.spautolm, get.ClusterOption,
##     get.coresOption, get.mcOption, get.VerboseOption,
##     get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
##     gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
##     LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
##     predict.SLX, print.gmsar, print.ME_res, print.sarlm,
##     print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
##     print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
##     print.summary.stsls, residuals.gmsar, residuals.sarlm,
##     residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
##     SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
##     SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
##     stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
##     summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm
## Warning: package 'GWmodel' was built under R version 4.0.5
## Warning: package 'robustbase' was built under R version 4.0.5
## Warning: package 'Rcpp' was built under R version 4.0.5
## Welcome to GWmodel version 2.2-3.
## The new version of GWmodel 2.2-5 now is ready
## 
## Attaching package: 'GWmodel'
## The following objects are masked from 'package:stats':
## 
##     BIC, fitted
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] TRUE
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] TRUE
## 
## [[5]]
## [1] TRUE
## 
## [[6]]
## [1] TRUE
## 
## [[7]]
## [1] TRUE
## 
## [[8]]
## [1] TRUE
## 
## [[9]]
## [1] TRUE
## 
## [[10]]
## [1] TRUE
## 
## [[11]]
## [1] TRUE
## 
## [[12]]
## [1] TRUE
## 
## [[13]]
## [1] TRUE
## 
## [[14]]
## [1] TRUE
## 
## [[15]]
## [1] TRUE
## 
## [[16]]
## [1] TRUE
## 
## [[17]]
## [1] TRUE
## 
## [[18]]
## [1] TRUE
## 
## [[19]]
## [1] TRUE
## 
## [[20]]
## [1] TRUE
## 
## [[21]]
## [1] TRUE
## 
## [[22]]
## [1] TRUE
## 
## [[23]]
## [1] TRUE
## 
## [[24]]
## [1] TRUE
## 
## [[25]]
## [1] TRUE
# Or load specific pacakges
# require(sp); require(sf); library(raster)


require(ggmap)
library(tidyverse)
library(rstudioapi)
## Warning: package 'rstudioapi' was built under R version 4.0.5
#Aggregate data from different sources to the ZIP Codes as the core covid-19 data
#are available at that scale

#Main tasks are
#1. Join COVID-19 Data to the NYC ZIP Code data (sf or sp polygons)

#read in data
nys_hospitals <- st_read("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module12/R-Spatial_II_Lab/NYS_Health_Facility.csv")
## Reading layer `NYS_Health_Facility' from data source `D:\GradSchool\Hunter_College\Spring_2021\DataAnalysisR\Module12\R-Spatial_II_Lab\NYS_Health_Facility.csv' using driver `CSV'
## Warning: no simple feature geometries present: returning a data.frame or tbl_df
nys_food <- st_read("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module12/R-Spatial_II_Lab/nys_retail_food_store_xy.csv")
## Reading layer `nys_retail_food_store_xy' from data source `D:\GradSchool\Hunter_College\Spring_2021\DataAnalysisR\Module12\R-Spatial_II_Lab\nys_retail_food_store_xy.csv' using driver `CSV'
## Warning: no simple feature geometries present: returning a data.frame or tbl_df
tests_COVID <- read_csv("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module12/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   MODIFIED_ZCTA = col_double(),
##   NEIGHBORHOOD_NAME = col_character(),
##   BOROUGH_GROUP = col_character(),
##   label = col_character(),
##   lat = col_double(),
##   lon = col_double(),
##   COVID_CASE_COUNT = col_double(),
##   COVID_CASE_RATE = col_double(),
##   POP_DENOMINATOR = col_double(),
##   COVID_DEATH_COUNT = col_double(),
##   COVID_DEATH_RATE = col_double(),
##   PERCENT_POSITIVE = col_double(),
##   TOTAL_COVID_TESTS = col_double()
## )
ZIPS_sf <- st_read("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module11/R-Spatial/data/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source `D:\GradSchool\Hunter_College\Spring_2021\DataAnalysisR\Module11\R-Spatial\data\ZIP_CODE_040114.shp' using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
#check to make sure that the ZIPs data was read in as an SF
class(ZIPS_sf)
## [1] "sf"         "data.frame"
#clean up data -- make sure join columns are the same type
names(ZIPS_sf)
##  [1] "ZIPCODE"    "BLDGZIP"    "PO_NAME"    "POPULATION" "AREA"      
##  [6] "STATE"      "COUNTY"     "ST_FIPS"    "CTY_FIPS"   "URL"       
## [11] "SHAPE_AREA" "SHAPE_LEN"  "geometry"
str(ZIPS_sf)
## Classes 'sf' and 'data.frame':   263 obs. of  13 variables:
##  $ ZIPCODE   : chr  "11436" "11213" "11212" "11225" ...
##  $ BLDGZIP   : chr  "0" "0" "0" "0" ...
##  $ PO_NAME   : chr  "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
##  $ POPULATION: num  18681 62426 83866 56527 72280 ...
##  $ AREA      : num  22699295 29631004 41972104 23698630 36868799 ...
##  $ STATE     : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY    : chr  "Queens" "Kings" "Kings" "Kings" ...
##  $ ST_FIPS   : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS  : chr  "081" "047" "047" "047" ...
##  $ URL       : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ geometry  :sfc_POLYGON of length 263; first list element: List of 1
##   ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
ZIPS_sf <- ZIPS_sf%>% mutate(ZIPCODE=as.numeric(as.character(ZIPCODE)))
tests_COVID <- tests_COVID%>% mutate(mod_ZCTA = as.numeric(as.character(MODIFIED_ZCTA)))

#check to make sure join columns are now both numeric
str(tests_COVID)
## spec_tbl_df[,14] [177 x 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ MODIFIED_ZCTA    : num [1:177] 10001 10002 10003 10004 10005 ...
##  $ NEIGHBORHOOD_NAME: chr [1:177] "Chelsea/NoMad/West Chelsea" "Chinatown/Lower East Side" "East Village/Gramercy/Greenwich Village" "Financial District" ...
##  $ BOROUGH_GROUP    : chr [1:177] "Manhattan" "Manhattan" "Manhattan" "Manhattan" ...
##  $ label            : chr [1:177] "10001, 10118" "10002" "10003" "10004" ...
##  $ lat              : num [1:177] 40.8 40.7 40.7 40.7 40.7 ...
##  $ lon              : num [1:177] -74 -74 -74 -74 -74 ...
##  $ COVID_CASE_COUNT : num [1:177] 1542 5902 2803 247 413 ...
##  $ COVID_CASE_RATE  : num [1:177] 5584 7836 5193 8311 4716 ...
##  $ POP_DENOMINATOR  : num [1:177] 27613 75323 53978 2972 8757 ...
##  $ COVID_DEATH_COUNT: num [1:177] 35 264 48 2 0 1 4 118 37 62 ...
##  $ COVID_DEATH_RATE : num [1:177] 126.8 350.5 88.9 67.3 0 ...
##  $ PERCENT_POSITIVE : num [1:177] 7.86 12.63 6.93 6.92 6.72 ...
##  $ TOTAL_COVID_TESTS: num [1:177] 20158 48197 41076 3599 6102 ...
##  $ mod_ZCTA         : num [1:177] 10001 10002 10003 10004 10005 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   MODIFIED_ZCTA = col_double(),
##   ..   NEIGHBORHOOD_NAME = col_character(),
##   ..   BOROUGH_GROUP = col_character(),
##   ..   label = col_character(),
##   ..   lat = col_double(),
##   ..   lon = col_double(),
##   ..   COVID_CASE_COUNT = col_double(),
##   ..   COVID_CASE_RATE = col_double(),
##   ..   POP_DENOMINATOR = col_double(),
##   ..   COVID_DEATH_COUNT = col_double(),
##   ..   COVID_DEATH_RATE = col_double(),
##   ..   PERCENT_POSITIVE = col_double(),
##   ..   TOTAL_COVID_TESTS = col_double()
##   .. )
str(ZIPS_sf)
## Classes 'sf' and 'data.frame':   263 obs. of  13 variables:
##  $ ZIPCODE   : num  11436 11213 11212 11225 11218 ...
##  $ BLDGZIP   : chr  "0" "0" "0" "0" ...
##  $ PO_NAME   : chr  "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
##  $ POPULATION: num  18681 62426 83866 56527 72280 ...
##  $ AREA      : num  22699295 29631004 41972104 23698630 36868799 ...
##  $ STATE     : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY    : chr  "Queens" "Kings" "Kings" "Kings" ...
##  $ ST_FIPS   : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS  : chr  "081" "047" "047" "047" ...
##  $ URL       : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ geometry  :sfc_POLYGON of length 263; first list element: List of 1
##   ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
ZIPS_Covid_Join <- left_join(ZIPS_sf,tests_COVID, by = c('ZIPCODE' = 'mod_ZCTA'))
str(ZIPS_Covid_Join)
## Classes 'sf' and 'data.frame':   263 obs. of  26 variables:
##  $ ZIPCODE          : num  11436 11213 11212 11225 11218 ...
##  $ BLDGZIP          : chr  "0" "0" "0" "0" ...
##  $ PO_NAME          : chr  "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
##  $ POPULATION       : num  18681 62426 83866 56527 72280 ...
##  $ AREA             : num  22699295 29631004 41972104 23698630 36868799 ...
##  $ STATE            : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY           : chr  "Queens" "Kings" "Kings" "Kings" ...
##  $ ST_FIPS          : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS         : chr  "081" "047" "047" "047" ...
##  $ URL              : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ MODIFIED_ZCTA    : num  11436 11213 11212 11225 11218 ...
##  $ NEIGHBORHOOD_NAME: chr  "South Jamaica/South Ozone Park" "Crown Heights (East)" "Ocean Hill-Brownsville" "Crown Heights (West)/Prospect Lefferts Gardens" ...
##  $ BOROUGH_GROUP    : chr  "Queens" "Brooklyn" "Brooklyn" "Brooklyn" ...
##  $ label            : chr  "11436" "11213" "11212" "11225" ...
##  $ lat              : num  40.7 40.7 40.7 40.7 40.6 ...
##  $ lon              : num  -73.8 -73.9 -73.9 -74 -74 ...
##  $ COVID_CASE_COUNT : num  1888 5166 7182 3833 6199 ...
##  $ COVID_CASE_RATE  : num  9420 7997 9710 6664 8377 ...
##  $ POP_DENOMINATOR  : num  20043 64601 73967 57514 73996 ...
##  $ COVID_DEATH_COUNT: num  64 203 330 177 218 368 256 206 380 219 ...
##  $ COVID_DEATH_RATE : num  319 314 446 308 295 ...
##  $ PERCENT_POSITIVE : num  17.6 13.7 15.6 11.6 13.9 ...
##  $ TOTAL_COVID_TESTS: num  11082 38560 47319 33709 45884 ...
##  $ geometry         :sfc_POLYGON of length 263; first list element: List of 1
##   ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:25] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
#Check to make sure join is valid and can be spatially displayed
Plot_PctPos <- plot(ZIPS_Covid_Join['PERCENT_POSITIVE'],breaks = 'jenks',main="Percent Positive Cases through 4/23/2021")

Plot_CaseCnt <- plot(ZIPS_Covid_Join['COVID_CASE_COUNT'],breaks = 'jenks',main="# Positive Cases through 4/23/2021")

#want to see what this looks like with per sq mi taken into account
st_crs(ZIPS_Covid_Join)
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
ZIPS_Covid_Join <- ZIPS_Covid_Join%>%mutate(areaSF = st_area(geometry),
                         areaSqMi = areaSF/27878400,
                         CasesPerSqMi = COVID_CASE_COUNT/areaSqMi,
                         CasesPer100K = COVID_CASE_COUNT/POPULATION*100000)

Plot_CaseDensity <- plot(ZIPS_Covid_Join['CasesPerSqMi'],breaks = 'jenks',main="Pos. Cases Per Sq Mi through 4/23/2021")

Plot_CasePer100K <- plot(ZIPS_Covid_Join['CasesPer100K'],breaks = 'jenks',main="Pos. Cases Per 100K through 4/23/2021")

Task 2

sf::st_read("D:/GradSchool/Hunter_College/Spring_2021/DataAnalysisR/Module12/R-Spatial_II_Lab/nycFoodStore.shp") -> nycFoodStoreSF
## Reading layer `nycFoodStore' from data source `D:\GradSchool\Hunter_College\Spring_2021\DataAnalysisR\Module12\R-Spatial_II_Lab\nycFoodStore.shp' using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84
st_crs(nycFoodStoreSF) #ESPG 4326 (aka WGS 1984)
## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]
st_crs(ZIPS_sf) #ESPG 2263 (aka NY LONG ISLAND in Ft US)
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
#want to reproject food store CRS into the same as the ZIPs for a more local projects
sf::st_transform(nycFoodStoreSF, sf::st_crs(2263)) -> nycFoodStoreLocal
st_crs(nycFoodStoreLocal)
## Coordinate Reference System:
##   User input: EPSG:2263 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
#set coordinate reference system to the same as the ZIPs CRS
nycFoodStoreSF%<>% sf::st_transform((sf::st_crs(ZIPS_sf)))
#join ZIPs (that also have covid data) to the food stores
#join the zips that contain food stores, group by the ZIP and by the establishment type

ZIPS_Food<-sf::st_join(ZIPS_sf,nycFoodStoreLocal,join=st_contains)

ZIPS_Food<-ZIPS_Food%>%
  group_by(ZIPCODE, Estbl_T)%>%
  dplyr::filter(Estbl_T!='A')%>%
  dplyr::filter(stringr::str_detect(Estbl_T,'[ADJ]'))%>%
  summarize(FoodStoreNum = n())%>%
  #group_by(ZIPCODE)%>%
  summarize(NumFood_PerZIP=sum(FoodStoreNum))
## `summarise()` has grouped output by 'ZIPCODE'. You can override using the `.groups` argument.
plot(ZIPS_Food['NumFood_PerZIP'],breaks='jenks', main='Number of Food Stores Per ZIP (Missing ZIPS)')

#want to re-join ZIP data to keep all ZIPS even if there are no food stores in them 
#It seems strange that there are ZIPS with null values for number of food stores

ZIPS_Food2<-st_join(ZIPS_sf,ZIPS_Food,by= c('ZIPCODE' = 'ZIPCODE'))

#Now for those ZIP codes with no food stores, replace NA with 0

ZIPS_Food2[is.na('NumFood_PerZIP')]<-0
#str(NumFood_PerZIP2)


ZIPS_Food2<-ZIPS_Food2%>%group_by(ZIPCODE.x)%>%
  summarize(NumFood_PerZIP=sum(NumFood_PerZIP))

plot(ZIPS_Food2['NumFood_PerZIP'],breaks = 'jenks',main='Number of Food Stores Per ZIP')

#read in health data
NYS_Health <- read_csv("NYS_Health_Facility.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_character(),
##   `Facility ID` = col_double(),
##   `Facility Phone Number` = col_double(),
##   `Facility Fax Number` = col_double(),
##   `Facility County Code` = col_double(),
##   `Regional Office ID` = col_double(),
##   `Main Site Facility ID` = col_double(),
##   `Facility Latitude` = col_double(),
##   `Facility Longitude` = col_double()
## )
## i Use `spec()` for the full column specifications.
names(NYS_Health)
##  [1] "Facility ID"                  "Facility Name"               
##  [3] "Short Description"            "Description"                 
##  [5] "Facility Open Date"           "Facility Address 1"          
##  [7] "Facility Address 2"           "Facility City"               
##  [9] "Facility State"               "Facility Zip Code"           
## [11] "Facility Phone Number"        "Facility Fax Number"         
## [13] "Facility Website"             "Facility County Code"        
## [15] "Facility County"              "Regional Office ID"          
## [17] "Regional Office"              "Main Site Name"              
## [19] "Main Site Facility ID"        "Operating Certificate Number"
## [21] "Operator Name"                "Operator Address 1"          
## [23] "Operator Address 2"           "Operator City"               
## [25] "Operator State"               "Operator Zip Code"           
## [27] "Cooperator Name"              "Cooperator Address"          
## [29] "Cooperator Address 2"         "Cooperator City"             
## [31] "Cooperator State"             "Cooperator Zip Code"         
## [33] "Ownership Type"               "Facility Latitude"           
## [35] "Facility Longitude"           "Facility Location"
#clean data
NYS_Health<-NYS_Health%>%filter(NYS_Health$`Facility Latitude`>0)

NYS_Health%>%filter(!is.na(NYS_Health$`Facility Latitude`))
## # A tibble: 3,843 x 36
##    `Facility ID` `Facility Name`  `Short Descripti~ Description `Facility Open ~
##            <dbl> <chr>            <chr>             <chr>       <chr>           
##  1           204 Hospice at Lour~ HSPC              Hospice     06/01/1985      
##  2           620 Charles T Sitri~ NH                Residentia~ 02/01/1989      
##  3          1156 East Side Nursi~ NH                Residentia~ 08/01/1979      
##  4          2589 Wellsville Mano~ NH                Residentia~ 02/01/1989      
##  5          3455 Harris Hill Nur~ NH                Residentia~ 04/08/1992      
##  6          3853 Garden City Sur~ DTC               Diagnostic~ 04/07/2008      
##  7          4249 Willcare         CHHA              Certified ~ 05/15/1990      
##  8          4473 Good Shepherd H~ HSPC              Hospice     09/01/2002      
##  9          6230 NYU Langone Rut~ HOSP-EC           Hospital E~ 01/01/2006      
## 10          6482 Endoscopy Cente~ DTC               Diagnostic~ 01/20/2003      
## # ... with 3,833 more rows, and 31 more variables: Facility Address 1 <chr>,
## #   Facility Address 2 <chr>, Facility City <chr>, Facility State <chr>,
## #   Facility Zip Code <chr>, Facility Phone Number <dbl>,
## #   Facility Fax Number <dbl>, Facility Website <chr>,
## #   Facility County Code <dbl>, Facility County <chr>,
## #   Regional Office ID <dbl>, Regional Office <chr>, Main Site Name <chr>,
## #   Main Site Facility ID <dbl>, Operating Certificate Number <chr>,
## #   Operator Name <chr>, Operator Address 1 <chr>, Operator Address 2 <chr>,
## #   Operator City <chr>, Operator State <chr>, Operator Zip Code <chr>,
## #   Cooperator Name <chr>, Cooperator Address <chr>,
## #   Cooperator Address 2 <chr>, Cooperator City <chr>, Cooperator State <chr>,
## #   Cooperator Zip Code <chr>, Ownership Type <chr>, Facility Latitude <dbl>,
## #   Facility Longitude <dbl>, Facility Location <chr>
NYC_Health<-NYS_Health%>%
  filter(NYS_Health$`Facility State`=='New York' & NYS_Health$`Facility County`=='Bronx'|NYS_Health$`Facility County`=='Kings' |NYS_Health$`Facility County`=='New York'| NYS_Health$`Facility County`=='Richmond'| NYS_Health$`Facility County`=='Queens')

unique(NYC_Health$`Short Description`)
##  [1] "HOSP-EC" "DTC"     "NH"      "CHHA"    "DTC-EC"  "HOSP-SB" "HOSP"   
##  [8] NA        "LTHHCP"  "ADHCP"   "HSPC"
names(NYC_Health)
##  [1] "Facility ID"                  "Facility Name"               
##  [3] "Short Description"            "Description"                 
##  [5] "Facility Open Date"           "Facility Address 1"          
##  [7] "Facility Address 2"           "Facility City"               
##  [9] "Facility State"               "Facility Zip Code"           
## [11] "Facility Phone Number"        "Facility Fax Number"         
## [13] "Facility Website"             "Facility County Code"        
## [15] "Facility County"              "Regional Office ID"          
## [17] "Regional Office"              "Main Site Name"              
## [19] "Main Site Facility ID"        "Operating Certificate Number"
## [21] "Operator Name"                "Operator Address 1"          
## [23] "Operator Address 2"           "Operator City"               
## [25] "Operator State"               "Operator Zip Code"           
## [27] "Cooperator Name"              "Cooperator Address"          
## [29] "Cooperator Address 2"         "Cooperator City"             
## [31] "Cooperator State"             "Cooperator Zip Code"         
## [33] "Ownership Type"               "Facility Latitude"           
## [35] "Facility Longitude"           "Facility Location"
NYC_Health<-NYC_Health%>%
  filter(NYC_Health$`Short Description`=='NH'|
           NYC_Health$`Short Description`=='HOSP'|
           NYC_Health$`Short Description`=='HCPC')%>%
  mutate(lat = as.numeric(`Facility Latitude`),long = as.numeric(`Facility Longitude`),
         ZipCode = as.numeric(`Facility Zip Code`))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
#check to make sure join columns match up
str(NYC_Health) #zip is character column
## spec_tbl_df[,39] [235 x 39] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Facility ID                 : num [1:235] 1217 1185 1469 1705 7745 ...
##  $ Facility Name               : chr [1:235] "St Patricks Home" "Montefiore Medical Center - Montefiore Westchester Square" "Mount Sinai Morningside" "Dry Harbor Nursing Home" ...
##  $ Short Description           : chr [1:235] "NH" "HOSP" "HOSP" "NH" ...
##  $ Description                 : chr [1:235] "Residential Health Care Facility - SNF" "Hospital" "Hospital" "Residential Health Care Facility - SNF" ...
##  $ Facility Open Date          : chr [1:235] "01/01/1901" "08/01/1979" "01/01/1901" "06/01/1987" ...
##  $ Facility Address 1          : chr [1:235] "66 Van Cortlandt Park South" "2475 St. Raymond Avenue" "1111 Amsterdam Avenue" "61-35 Dry Harbor Road" ...
##  $ Facility Address 2          : chr [1:235] NA NA NA NA ...
##  $ Facility City               : chr [1:235] "Bronx" "Bronx" "New York" "Middle Village" ...
##  $ Facility State              : chr [1:235] "New York" "New York" "New York" "New York" ...
##  $ Facility Zip Code           : chr [1:235] "10463" "10461" "10025" "11379" ...
##  $ Facility Phone Number       : num [1:235] 7.19e+09 7.18e+09 2.13e+09 7.19e+09 7.18e+09 ...
##  $ Facility Fax Number         : num [1:235] NA NA NA NA NA ...
##  $ Facility Website            : chr [1:235] NA NA NA NA ...
##  $ Facility County Code        : num [1:235] 7094 7094 7093 7096 7096 ...
##  $ Facility County             : chr [1:235] "Bronx" "Bronx" "New York" "Queens" ...
##  $ Regional Office ID          : num [1:235] 5 5 5 5 5 5 5 5 5 5 ...
##  $ Regional Office             : chr [1:235] "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" "Metropolitan Area Regional Office - New York City" ...
##  $ Main Site Name              : chr [1:235] NA "Montefiore Medical Center - Henry & Lucy Moses Div" "Mount Sinai West" NA ...
##  $ Main Site Facility ID       : num [1:235] NA 1169 1466 NA NA ...
##  $ Operating Certificate Number: chr [1:235] "7000307N" "7000006H" "7002032H" "7003359N" ...
##  $ Operator Name               : chr [1:235] "St Patricks Home for the Aged & Infirm" "Montefiore Medical Center" "St Lukes Roosevelt Hospital Center Inc" "Dry Harbor HRF Inc" ...
##  $ Operator Address 1          : chr [1:235] "Box 218 Rd 1" "111 East 210th Street" "Amsterdam Avenue At 114th Street" "61-35 Dry Harbor Road" ...
##  $ Operator Address 2          : chr [1:235] NA NA NA NA ...
##  $ Operator City               : chr [1:235] "Germantown" "Bronx" "New York" "Middle Village" ...
##  $ Operator State              : chr [1:235] "New York" "New York" "New York" "New York" ...
##  $ Operator Zip Code           : chr [1:235] "12526" "10467" "10025" "11379" ...
##  $ Cooperator Name             : chr [1:235] NA "Montefiore Health System, Inc" "Mount Sinai Hospitals Group, Inc." NA ...
##  $ Cooperator Address          : chr [1:235] NA "111 East 210th Street" "One Gustave L. Levy Place" NA ...
##  $ Cooperator Address 2        : chr [1:235] NA NA NA NA ...
##  $ Cooperator City             : chr [1:235] NA "Bronx" "New York" NA ...
##  $ Cooperator State            : chr [1:235] "New York" "New York" "New York" "New York" ...
##  $ Cooperator Zip Code         : chr [1:235] NA "10467" "10029" NA ...
##  $ Ownership Type              : chr [1:235] "Not for Profit Corporation" "Not for Profit Corporation" "Not for Profit Corporation" "Business Corporation" ...
##  $ Facility Latitude           : num [1:235] 40.9 40.8 40.8 40.7 40.8 ...
##  $ Facility Longitude          : num [1:235] -73.9 -73.8 -74 -73.9 -73.9 ...
##  $ Facility Location           : chr [1:235] "(40.884361, -73.888451)" "(40.840431, -73.848244)" "(40.805912, -73.961639)" "(40.726879, -73.871681)" ...
##  $ lat                         : num [1:235] 40.9 40.8 40.8 40.7 40.8 ...
##  $ long                        : num [1:235] -73.9 -73.8 -74 -73.9 -73.9 ...
##  $ ZipCode                     : num [1:235] 10463 10461 10025 11379 11369 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Facility ID` = col_double(),
##   ..   `Facility Name` = col_character(),
##   ..   `Short Description` = col_character(),
##   ..   Description = col_character(),
##   ..   `Facility Open Date` = col_character(),
##   ..   `Facility Address 1` = col_character(),
##   ..   `Facility Address 2` = col_character(),
##   ..   `Facility City` = col_character(),
##   ..   `Facility State` = col_character(),
##   ..   `Facility Zip Code` = col_character(),
##   ..   `Facility Phone Number` = col_double(),
##   ..   `Facility Fax Number` = col_double(),
##   ..   `Facility Website` = col_character(),
##   ..   `Facility County Code` = col_double(),
##   ..   `Facility County` = col_character(),
##   ..   `Regional Office ID` = col_double(),
##   ..   `Regional Office` = col_character(),
##   ..   `Main Site Name` = col_character(),
##   ..   `Main Site Facility ID` = col_double(),
##   ..   `Operating Certificate Number` = col_character(),
##   ..   `Operator Name` = col_character(),
##   ..   `Operator Address 1` = col_character(),
##   ..   `Operator Address 2` = col_character(),
##   ..   `Operator City` = col_character(),
##   ..   `Operator State` = col_character(),
##   ..   `Operator Zip Code` = col_character(),
##   ..   `Cooperator Name` = col_character(),
##   ..   `Cooperator Address` = col_character(),
##   ..   `Cooperator Address 2` = col_character(),
##   ..   `Cooperator City` = col_character(),
##   ..   `Cooperator State` = col_character(),
##   ..   `Cooperator Zip Code` = col_character(),
##   ..   `Ownership Type` = col_character(),
##   ..   `Facility Latitude` = col_double(),
##   ..   `Facility Longitude` = col_double(),
##   ..   `Facility Location` = col_character()
##   .. )
names(NYC_Health)
##  [1] "Facility ID"                  "Facility Name"               
##  [3] "Short Description"            "Description"                 
##  [5] "Facility Open Date"           "Facility Address 1"          
##  [7] "Facility Address 2"           "Facility City"               
##  [9] "Facility State"               "Facility Zip Code"           
## [11] "Facility Phone Number"        "Facility Fax Number"         
## [13] "Facility Website"             "Facility County Code"        
## [15] "Facility County"              "Regional Office ID"          
## [17] "Regional Office"              "Main Site Name"              
## [19] "Main Site Facility ID"        "Operating Certificate Number"
## [21] "Operator Name"                "Operator Address 1"          
## [23] "Operator Address 2"           "Operator City"               
## [25] "Operator State"               "Operator Zip Code"           
## [27] "Cooperator Name"              "Cooperator Address"          
## [29] "Cooperator Address 2"         "Cooperator City"             
## [31] "Cooperator State"             "Cooperator Zip Code"         
## [33] "Ownership Type"               "Facility Latitude"           
## [35] "Facility Longitude"           "Facility Location"           
## [37] "lat"                          "long"                        
## [39] "ZipCode"
unique(NYC_Health$ZipCode)
##   [1] 10463 10461 10025 11379 11369 10466 10035 11201 11219 10016 10301 10469
##  [13] 10457 11694 11373 11238 11360 10021 11423 11375 10032 11217 11226 11354
##  [25] 11230 10452 11004 11432 10304 10314 10467 11357 11203 11213 10305 10471
##  [37] 11228 11355 10309 11691 11692 10475 11377    NA 10024 11362 10003 11234
##  [49] 10462 11235 10306 10002 11372 11214 11207 10034 11233 11220 11218 10472
##  [61] 10037 11229 10029 11040 10459 11224 11212 11236 10456 11366 11249 10465
##  [73] 11429 11418 11237 10040 10065 10473 11368 11215 10044 11427 11434 10310
##  [85] 11239 11378 10019 10027 10468 11208 11216 10011 11206 11361 11221 10014
##  [97] 11102 10451 10038 11435
#need to filter out NA
NYC_Health<- NYC_Health%>%filter(!is.na(ZipCode))

str(ZIPS_sf) #zips are numeric
## Classes 'sf' and 'data.frame':   263 obs. of  13 variables:
##  $ ZIPCODE   : num  11436 11213 11212 11225 11218 ...
##  $ BLDGZIP   : chr  "0" "0" "0" "0" ...
##  $ PO_NAME   : chr  "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
##  $ POPULATION: num  18681 62426 83866 56527 72280 ...
##  $ AREA      : num  22699295 29631004 41972104 23698630 36868799 ...
##  $ STATE     : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY    : chr  "Queens" "Kings" "Kings" "Kings" ...
##  $ ST_FIPS   : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS  : chr  "081" "047" "047" "047" ...
##  $ URL       : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ geometry  :sfc_POLYGON of length 263; first list element: List of 1
##   ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
#try and join csv to ZIP sf, comes out with a lot of null/duplicates
ZIPS_Health_csv<-left_join(ZIPS_sf,NYC_Health,by=c('ZIPCODE' = 'ZipCode'))

ZIPS_Health_csv%>%filter(!is.na(lat))
## Simple feature collection with 245 features and 50 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 915316.2 ymin: 122018.3 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
##    ZIPCODE BLDGZIP  PO_NAME POPULATION     AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1    11213       0 Brooklyn      62426 29631004    NY  Kings      36      047
## 2    11213       0 Brooklyn      62426 29631004    NY  Kings      36      047
## 3    11212       0 Brooklyn      83866 41972104    NY  Kings      36      047
## 4    11218       0 Brooklyn      72280 36868799    NY  Kings      36      047
## 5    11226       0 Brooklyn     106132 39408598    NY  Kings      36      047
## 6    11226       0 Brooklyn     106132 39408598    NY  Kings      36      047
## 7    11226       0 Brooklyn     106132 39408598    NY  Kings      36      047
## 8    11219       0 Brooklyn      92561 42002738    NY  Kings      36      047
## 9    11219       0 Brooklyn      92561 42002738    NY  Kings      36      047
## 10   11219       0 Brooklyn      92561 42002738    NY  Kings      36      047
##                     URL SHAPE_AREA SHAPE_LEN Facility ID
## 1  http://www.usps.com/          0         0        1309
## 2  http://www.usps.com/          0         0        1407
## 3  http://www.usps.com/          0         0        1376
## 4  http://www.usps.com/          0         0        1392
## 5  http://www.usps.com/          0         0        1576
## 6  http://www.usps.com/          0         0        1369
## 7  http://www.usps.com/          0         0        1380
## 8  http://www.usps.com/          0         0        1403
## 9  http://www.usps.com/          0         0        1305
## 10 http://www.usps.com/          0         0        1393
##                                                     Facility Name
## 1                                       Interfaith Medical Center
## 2             Crown Heights Center for Nursing and Rehabilitation
## 3  Schulman and Schachne Institute for Nursing And Rehabilitation
## 4              Palm Gardens Center for Nursing and Rehabilitation
## 5                                         Ditmas Park Care Center
## 6                           NY Congregational Nursing Center, Inc
## 7               Caton Park Rehabilitation and Nursing Center, LLC
## 8              Boro Park Center for Rehabilitation and Healthcare
## 9                                       Maimonides Medical Center
## 10             The Heritage Rehabilitation and Health Care Center
##    Short Description                            Description Facility Open Date
## 1               HOSP                               Hospital         04/24/1980
## 2                 NH Residential Health Care Facility - SNF         01/01/1901
## 3                 NH Residential Health Care Facility - SNF         01/01/1901
## 4                 NH Residential Health Care Facility - SNF         02/01/1980
## 5                 NH Residential Health Care Facility - SNF         02/01/1989
## 6                 NH Residential Health Care Facility - SNF         01/01/1901
## 7                 NH Residential Health Care Facility - SNF         02/01/1980
## 8                 NH Residential Health Care Facility - SNF         01/01/1901
## 9               HOSP                               Hospital         01/01/1901
## 10                NH Residential Health Care Facility - SNF         02/01/1980
##        Facility Address 1 Facility Address 2 Facility City Facility State
## 1    1545 Atlantic Avenue               <NA>      Brooklyn       New York
## 2  810-20 St Marks Avenue               <NA>      Brooklyn       New York
## 3    555 Rockaway Parkway               <NA>      Brooklyn       New York
## 4            615 Avenue C               <NA>      Brooklyn       New York
## 5      2107 Ditmas Avenue               <NA>      Brooklyn       New York
## 6    135 Linden Boulevard               <NA>      Brooklyn       New York
## 7       1312 Caton Avenue               <NA>      Brooklyn       New York
## 8           4915 10th Ave               <NA>      Brooklyn       New York
## 9       4802 Tenth Avenue               <NA>      Brooklyn       New York
## 10          5606 15th Ave               <NA>      Brooklyn       New York
##    Facility Zip Code Facility Phone Number Facility Fax Number Facility Website
## 1              11213            7189357000                  NA             <NA>
## 2              11213            7184677300                  NA             <NA>
## 3              11212            7182405775                  NA             <NA>
## 4              11218            7186333300                  NA             <NA>
## 5              11226            7184628100                  NA             <NA>
## 6              11226            7182840039                  NA             <NA>
## 7              11226            7186937000                  NA             <NA>
## 8              11219            7188513700          7189726120             <NA>
## 9              11219            7182836000                  NA             <NA>
## 10             11219            7188511000                  NA             <NA>
##    Facility County Code Facility County Regional Office ID
## 1                  7095           Kings                  5
## 2                  7095           Kings                  5
## 3                  7095           Kings                  5
## 4                  7095           Kings                  5
## 5                  7095           Kings                  5
## 6                  7095           Kings                  5
## 7                  7095           Kings                  5
## 8                  7095           Kings                  5
## 9                  7095           Kings                  5
## 10                 7095           Kings                  5
##                                      Regional Office Main Site Name
## 1  Metropolitan Area Regional Office - New York City           <NA>
## 2  Metropolitan Area Regional Office - New York City           <NA>
## 3  Metropolitan Area Regional Office - New York City           <NA>
## 4  Metropolitan Area Regional Office - New York City           <NA>
## 5  Metropolitan Area Regional Office - New York City           <NA>
## 6  Metropolitan Area Regional Office - New York City           <NA>
## 7  Metropolitan Area Regional Office - New York City           <NA>
## 8  Metropolitan Area Regional Office - New York City           <NA>
## 9  Metropolitan Area Regional Office - New York City           <NA>
## 10 Metropolitan Area Regional Office - New York City           <NA>
##    Main Site Facility ID Operating Certificate Number
## 1                     NA                     7001046H
## 2                     NA                     7001398N
## 3                     NA                     7001318N
## 4                     NA                     7001391N
## 5                     NA                     7001393N
## 6                     NA                     7001309N
## 7                     NA                     7001366N
## 8                     NA                     7001394N
## 9                     NA                     7001020H
## 10                    NA                     7001392N
##                                                    Operator Name
## 1                                      Interfaith Medical Center
## 2                           St. Marks Brooklyn Associates L.L.C.
## 3  The Schulman and Schachne Institute for Nursing And Rehab,Inc
## 4                                  Palm Gardens Care Center, LLC
## 5                  Ditmas Park Rehabilitation & Care Center, LLC
## 6                         New York Congregational Nursing Center
## 7              Caton Park Rehabilitation and Nursing Center, LLC
## 8                                    Boro Park Operating Co, LLC
## 9                                      Maimonides Medical Center
## 10                                    Palm Tree Care Center, LLC
##      Operator Address 1 Operator Address 2 Operator City Operator State
## 1    555 Prospect Place               <NA>      Brooklyn       New York
## 2  810 St. Marks Avenue               <NA>      Brooklyn       New York
## 3   One Brookdale Plaza               <NA>      Brooklyn       New York
## 4          615 Avenue C               <NA>      Brooklyn       New York
## 5    2107 Ditmas Avenue               <NA>      Brooklyn       New York
## 6  135 Linden Boulevard               <NA>      Brooklyn       New York
## 7     1312 Caton Avenue               <NA>      Brooklyn       New York
## 8     4915 Tenth Avenue               <NA>      Brooklyn       New York
## 9        4802 Tenth Ave               <NA>      Brooklyn       New York
## 10     5606 15th Avenue               <NA>      Brooklyn       New York
##    Operator Zip Code                  Cooperator Name Cooperator Address
## 1              11238 One Brooklyn Health System, Inc.  1 Brookdale Plaza
## 2              11213                             <NA>               <NA>
## 3              11212 One Brooklyn Health System, Inc.  1 Brookdale Plaza
## 4              11218                             <NA>               <NA>
## 5              11226                             <NA>               <NA>
## 6              11226                             <NA>               <NA>
## 7              11226                             <NA>               <NA>
## 8              11219                             <NA>               <NA>
## 9              11219                             <NA>               <NA>
## 10             11219                             <NA>               <NA>
##                       Cooperator Address 2 Cooperator City Cooperator State
## 1  Care of Alex Rovt Chairman of the Board        Brooklyn         New York
## 2                                     <NA>            <NA>         New York
## 3  Care of Alex Rovt Chairman of the Board        Brooklyn         New York
## 4                                     <NA>            <NA>         New York
## 5                                     <NA>            <NA>         New York
## 6                                     <NA>            <NA>         New York
## 7                                     <NA>            <NA>         New York
## 8                                     <NA>            <NA>         New York
## 9                                     <NA>            <NA>         New York
## 10                                    <NA>            <NA>         New York
##    Cooperator Zip Code             Ownership Type Facility Latitude
## 1                11212 Not for Profit Corporation          40.67794
## 2                 <NA>                        LLC          40.67492
## 3                11212 Not for Profit Corporation          40.65505
## 4                 <NA>                        LLC          40.64106
## 5                 <NA>                        LLC          40.63996
## 6                 <NA> Not for Profit Corporation          40.65248
## 7                 <NA>                        LLC          40.64961
## 8                 <NA>                        LLC          40.63892
## 9                 <NA> Not for Profit Corporation          40.63942
## 10                <NA>                        LLC          40.62839
##    Facility Longitude       Facility Location      lat      long
## 1           -73.93752  (40.67794, -73.937515) 40.67794 -73.93752
## 2           -73.94571 (40.674923, -73.945709) 40.67492 -73.94571
## 3           -73.91326 (40.655045, -73.913261) 40.65505 -73.91326
## 4           -73.97279 (40.641064, -73.972786) 40.64106 -73.97279
## 5           -73.95770 (40.639965, -73.957695) 40.63996 -73.95770
## 6           -73.95376 (40.652481, -73.953758) 40.65248 -73.95376
## 7           -73.96771 (40.649609, -73.967712) 40.64961 -73.96771
## 8           -73.99873 (40.638924, -73.998734) 40.63892 -73.99873
## 9           -73.99839  (40.639423, -73.99839) 40.63942 -73.99839
## 10          -73.99194 (40.628391, -73.991936) 40.62839 -73.99194
##                          geometry
## 1  POLYGON ((1001614 186926.4,...
## 2  POLYGON ((1001614 186926.4,...
## 3  POLYGON ((1011174 183696.3,...
## 4  POLYGON ((991997.1 176307.5...
## 5  POLYGON ((994821.5 177865.7...
## 6  POLYGON ((994821.5 177865.7...
## 7  POLYGON ((994821.5 177865.7...
## 8  POLYGON ((987286.4 173946.5...
## 9  POLYGON ((987286.4 173946.5...
## 10 POLYGON ((987286.4 173946.5...
NYC_Health_sf<-st_as_sf(NYC_Health, coords = c("long","lat"))
plot(st_geometry(NYC_Health_sf))

plot(st_geometry(ZIPS_Health_csv))

#filter out NAs
ZIPS_Health_sf<-ZIPS_Health_csv%>%filter(!is.na(`Short Description`))

#check crs
st_crs(ZIPS_Health_sf)
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
st_crs(ZIPS_sf)
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
#create count of all health facilities per ZIP
Health_Per_ZIP<-ZIPS_Health_sf%>%group_by(ZIPCODE)%>%
  summarize(n_health_facilities=n())%>%view()

plot(Health_Per_ZIP['n_health_facilities'],breaks='jenks',main="Number of Nursing Homes and Hospitals Per ZIP")  

#want to join missing zip codes back in
Health_Per_ZIP2 <- st_join(ZIPS_sf,Health_Per_ZIP,by=c('ZIPCODE'='ZIPCODE'))
Health_Per_ZIP2[is.na('n_health_facilities')]<-0

plot(Health_Per_ZIP['n_health_facilities'],breaks='jenks',main="Number of Nursing Homes and Hospitals Per ZIP")  

## this didn't work....

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

#Q4. Join the Census ACS Pop, race, and age data to the NYC Planning CT Data

#read in files
unzip("2010 Census Tracts.zip")


CT<- sf::st_read("geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `D:\GradSchool\Hunter_College\Spring_2021\DataAnalysisR\Module12\R-Spatial_II_Lab\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
str(CT)
## Classes 'sf' and 'data.frame':   2165 obs. of  12 variables:
##  $ boro_code : chr  "5" "1" "1" "1" ...
##  $ boro_ct201: chr  "5000900" "1009800" "1010000" "1010200" ...
##  $ boro_name : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ cdeligibil: chr  "E" "I" "I" "I" ...
##  $ ct2010    : chr  "000900" "009800" "010000" "010200" ...
##  $ ctlabel   : chr  "9" "98" "100" "102" ...
##  $ ntacode   : chr  "SI22" "MN19" "MN19" "MN17" ...
##  $ ntaname   : chr  "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
##  $ puma      : chr  "3903" "3808" "3808" "3807" ...
##  $ shape_area: num  2497010 1906016 1860938 1860993 1864600 ...
##  $ shape_leng: num  7729 5534 5692 5688 5693 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
CT %<>% dplyr::mutate(cntyFIPS = dplyr::case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

# The ACS data from CENSUS website, 2018 data on population
# We used readLines to bypass the second line/row in the csv file. 
# You can also manually delete that line first, and run the read_csv or read.cvs. 
ACS_Data <- readLines("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

ACS_Data %>%
  magrittr::extract(1:10,)
##                  GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1  1400000US36005000100   7080         51    6503       577     1773     4239
## 2  1400000US36005000200   4542        950    2264      2278     2165     1279
## 3  1400000US36005000400   5634        710    2807      2827     2623     1699
## 4  1400000US36005001600   5917        989    2365      3552     2406     2434
## 5  1400000US36005001900   2765         76    1363      1402      585     1041
## 6  1400000US36005002000   9409        977    4119      5290     3185     4487
## 7  1400000US36005002300   4600        648    2175      2425      479     2122
## 8  1400000US36005002400    172          0     121        51       69       89
## 9  1400000US36005002500   5887        548    2958      2929      903     1344
## 10 1400000US36005002701   2868        243    1259      1609      243      987
##    asianPop hispanicPop adultPop citizenAdult censusCode
## 1       130        2329     6909         6100  005000100
## 2       119        3367     3582         2952  005000200
## 3       226        3873     4507         4214  005000400
## 4        68        3603     4416         3851  005001600
## 5       130        1413     2008         1787  005001900
## 6        29        5905     6851         6170  005002000
## 7        27        2674     3498         3056  005002300
## 8        14           0      131           42  005002400
## 9        68        4562     4237         2722  005002500
## 10        0        1985     1848         1412  005002701
popData <- merge(CT, ACS_Data, by.x ='tractFIPS', by.y = 'censusCode')

# verify the data
sum(popData$totPop)
## [1] 8443713
st_crs(popData)
## Coordinate Reference System:
##   User input: WGS84(DD) 
##   wkt:
## GEOGCRS["WGS84(DD)",
##     DATUM["WGS84",
##         ELLIPSOID["WGS84",6378137,298.257223563,
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic longitude",east,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic latitude",north,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]]]
#assign the coords system in the ZIPS_sf to the pop data crs
popNYC <- sf::st_transform(popData, st_crs(ZIPS_sf)) # zpNYC is the JOINED zip code data from task 1.

# this may take a long time to run
popData %>% magrittr::extract('elderlyPop') %>% plot(breaks = 'jenks')

plot(popData['totPop'],breaks = 'jenks')

# Now aggregate to the zip code level
covidPopZipNYC <- sf::st_join(ZIPS_Covid_Join, 
                              popNYC %>% sf::st_centroid(), # this essentially converts census tracts to points
                              join = st_contains) %>%  #join all CT Centroid points that are contained by ZIPS_sf
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>% # use names(zpNYC) and names(popNYC) to see what we have
  summarise(totPop = sum(totPop),
            malePctg = sum(malePop)/totPop*100, # note the totPop is the newly calculated
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop)) 
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION', 'COUNTY', 'COVID_CASE_COUNT'. You can override using the `.groups` argument.
covidPopZipNYC %>% head()
## Simple feature collection with 6 features and 12 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 188447.3 xmax: 998309.7 ymax: 230942.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 x 13
## # Groups:   ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [6]
##   ZIPCODE PO_NAME    POPULATION COUNTY  COVID_CASE_COUNT TOTAL_COVID_TES~ totPop
##     <dbl> <chr>           <dbl> <chr>              <dbl>            <dbl>  <int>
## 1      83 Central P~         25 New Yo~               NA               NA      3
## 2   10001 New York        22413 New Yo~             1542            20158  19146
## 3   10002 New York        81305 New Yo~             5902            48197  74310
## 4   10003 New York        55878 New Yo~             2803            41076  53487
## 5   10004 New York         2187 New Yo~              247             3599     NA
## 6   10005 New York         8107 New Yo~              413             6102   8809
## # ... with 6 more variables: malePctg <dbl>, asianPop <int>, blackPop <int>,
## #   hispanicPop <int>, whitePop <int>, geometry <GEOMETRY [US_survey_foot]>
#filter out the NAs
sum(covidPopZipNYC$totPop, na.rm = T)
## [1] 8395306
#check to see NAs
covidPopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 68 features and 12 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 151085.5 xmax: 1067494 ymax: 263362
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 68 x 13
## # Groups:   ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [68]
##    ZIPCODE PO_NAME  POPULATION COUNTY   COVID_CASE_COUNT TOTAL_COVID_TES~ totPop
##  *   <dbl> <chr>         <dbl> <chr>               <dbl>            <dbl>  <int>
##  1   10004 New York       2187 New York              247             3599     NA
##  2   10020 New York          0 New York               NA               NA     NA
##  3   10041 New York          0 New York               NA               NA     NA
##  4   10043 New York          0 New York               NA               NA     NA
##  5   10045 New York          0 New York               NA               NA     NA
##  6   10047 New York          0 New York               NA               NA     NA
##  7   10048 New York          0 New York               NA               NA     NA
##  8   10055 New York         12 New York               NA               NA     NA
##  9   10075 New York      25203 New York             1453            18391     NA
## 10   10080 New York          0 New York               NA               NA     NA
## # ... with 58 more rows, and 6 more variables: malePctg <dbl>, asianPop <int>,
## #   blackPop <int>, hispanicPop <int>, whitePop <int>,
## #   geometry <GEOMETRY [US_survey_foot]>
# Looks like we converted census tracts to points, 
# so some zip code contains no such points but parts of census tracts are inside the zip code.
# We can either use higher-resolution data (blockgroups, for example) or use areal interpolation.
# 

plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks')

plot(covidPopZipNYC["asianPop"], breaks='jenks')