Load packages

R Spaital Lab Assignment # 2

The second lab is to aggregate data from different sources to the zip codes as the core covid-19 data are available at that scale.

task 1: Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

######################################################################

## Covid-19 data
covid_data <- readr::read_csv("./R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv", lazy = FALSE,show_col_types = FALSE)
str(covid_data)
## spc_tbl_ [177 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ MODIFIED_ZCTA    : num [1:177] 10001 10002 10003 10004 10005 ...
##  $ NEIGHBORHOOD_NAME: chr [1:177] "Chelsea/NoMad/West Chelsea" "Chinatown/Lower East Side" "East Village/Gramercy/Greenwich Village" "Financial District" ...
##  $ BOROUGH_GROUP    : chr [1:177] "Manhattan" "Manhattan" "Manhattan" "Manhattan" ...
##  $ label            : chr [1:177] "10001, 10118" "10002" "10003" "10004" ...
##  $ lat              : num [1:177] 40.8 40.7 40.7 40.7 40.7 ...
##  $ lon              : num [1:177] -74 -74 -74 -74 -74 ...
##  $ COVID_CASE_COUNT : num [1:177] 1542 5902 2803 247 413 ...
##  $ COVID_CASE_RATE  : num [1:177] 5584 7836 5193 8311 4716 ...
##  $ POP_DENOMINATOR  : num [1:177] 27613 75323 53978 2972 8757 ...
##  $ COVID_DEATH_COUNT: num [1:177] 35 264 48 2 0 1 4 118 37 62 ...
##  $ COVID_DEATH_RATE : num [1:177] 126.8 350.5 88.9 67.3 0 ...
##  $ PERCENT_POSITIVE : num [1:177] 7.86 12.63 6.93 6.92 6.72 ...
##  $ TOTAL_COVID_TESTS: num [1:177] 20158 48197 41076 3599 6102 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   MODIFIED_ZCTA = col_double(),
##   ..   NEIGHBORHOOD_NAME = col_character(),
##   ..   BOROUGH_GROUP = col_character(),
##   ..   label = col_character(),
##   ..   lat = col_double(),
##   ..   lon = col_double(),
##   ..   COVID_CASE_COUNT = col_double(),
##   ..   COVID_CASE_RATE = col_double(),
##   ..   POP_DENOMINATOR = col_double(),
##   ..   COVID_DEATH_COUNT = col_double(),
##   ..   COVID_DEATH_RATE = col_double(),
##   ..   PERCENT_POSITIVE = col_double(),
##   ..   TOTAL_COVID_TESTS = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
## load the NYC-zip code data
nyc_zipcode_sf <- st_read("./ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\melin\Documents\GTECH78520\part3\ZIP_CODE_040114\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
str(nyc_zipcode_sf)
## Classes 'sf' and 'data.frame':   263 obs. of  13 variables:
##  $ ZIPCODE   : chr  "11436" "11213" "11212" "11225" ...
##  $ BLDGZIP   : chr  "0" "0" "0" "0" ...
##  $ PO_NAME   : chr  "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
##  $ POPULATION: num  18681 62426 83866 56527 72280 ...
##  $ AREA      : num  22699295 29631004 41972104 23698630 36868799 ...
##  $ STATE     : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY    : chr  "Queens" "Kings" "Kings" "Kings" ...
##  $ ST_FIPS   : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS  : chr  "081" "047" "047" "047" ...
##  $ URL       : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ geometry  :sfc_POLYGON of length 263; first list element: List of 1
##   ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
## join data
nyc_sf_merged <- base::merge(nyc_zipcode_sf, covid_data, by.x = "ZIPCODE", by.y = "MODIFIED_ZCTA")
names(nyc_sf_merged) 
##  [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
##  [4] "POPULATION"        "AREA"              "STATE"            
##  [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
## [10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
## [13] "NEIGHBORHOOD_NAME" "BOROUGH_GROUP"     "label"            
## [16] "lat"               "lon"               "COVID_CASE_COUNT" 
## [19] "COVID_CASE_RATE"   "POP_DENOMINATOR"   "COVID_DEATH_COUNT"
## [22] "COVID_DEATH_RATE"  "PERCENT_POSITIVE"  "TOTAL_COVID_TESTS"
## [25] "geometry"
# plot to verify the data
mapview(nyc_sf_merged, zcol=c("TOTAL_COVID_TESTS",'PERCENT_POSITIVE'))

task 2: Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

## load the NYC food retail
nyc_store_sf <- st_read("./R-Spatial_II_Lab/nycFoodStore.shp")
## Reading layer `nycFoodStore' from data source 
##   `C:\Users\melin\Documents\GTECH78520\part3\R-Spatial_II_Lab\nycFoodStore.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84
str(nyc_store_sf)
## Classes 'sf' and 'data.frame':   11300 obs. of  17 variables:
##  $ ï__Cnty : chr  "Bronx" "Bronx" "Bronx" "Bronx" ...
##  $ Lcns_Nm : int  734149 606221 606228 723375 724807 712943 703060 609065 722972 609621 ...
##  $ Oprtn_T : chr  "Store" "Store" "Store" "Store" ...
##  $ Estbl_T : chr  "JAC" "JAC" "JAC" "JAC" ...
##  $ Entty_N : chr  "7 ELEVEN FOOD STORE #37933H" "1001 SAN MIGUEL FOOD CENTER INC" "1029 FOOD PLAZA INC" "1078 DELI GROCERY CORP" ...
##  $ DBA_Nam : chr  NA "1001 SAN MIGUEL FD CNTR" "1029 FOOD PLAZA" "1078 DELI GROCERY" ...
##  $ Strt_Nmb: chr  "500" "1001" "122" "1078" ...
##  $ Stret_Nm: chr  "BAYCHESTER AVE" "SHERIDAN AVE" "E 181ST ST" "EAST 165TH STREET" ...
##  $ Add_L_2 : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Add_L_3 : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ City    : chr  "BRONX" "BRONX" "BRONX" "BRONX" ...
##  $ State   : chr  "NY" "NY" "NY" "NY" ...
##  $ Zip_Cod : int  10475 10456 10453 10459 10456 10453 10467 10456 10456 10472 ...
##  $ Sqr_Ftg : chr  "0" "1,100" "2,000" "1,200" ...
##  $ Locatin : chr  "500 BAYCHESTER AVE\nBRONX, NY 10475\n(40.869156, -73.831875)" "1001 SHERIDAN AVE\nBRONX, NY 10456\n(40.829061, -73.919613)" "122 E 181ST ST\nBRONX, NY 10453\n(40.854755, -73.902853)" "1078 EAST 165TH STREET\nBRONX, NY 10459\n(40.825105, -73.890589)" ...
##  $ Coords  : chr  "40.869156, -73.831875" "40.829061, -73.919613" "40.854755, -73.902853" "40.825105, -73.890589" ...
##  $ geometry:sfc_POINT of length 11300; first list element:  'XY' num  -73.8 40.9
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:16] "ï__Cnty" "Lcns_Nm" "Oprtn_T" "Estbl_T" ...
# Transform the NYC food retail data to match the CRS of NYC zipcode data
nyc_store_sf <- st_transform(nyc_store_sf, st_crs(nyc_zipcode_sf))

# Filter retail store types of food stores and count the number of stores in each zip code area
nyc_store_sf %>% 
  
  # Filter the data to include only rows where Estbl_T contains "A", "J", or "D" which are the type of stores we want
  dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>%
  
  # Perform a spatial join between the new filtered dataframe and the zipcode sf data
  sf::st_join(nyc_zipcode_sf, ., join = st_contains) %>%
  
  # Group the resulting dataframe by ZIPCODE
  group_by(ZIPCODE) %>%
  
  # Summarize each group by counting the number of rows in each group
  summarise(FoodStoreNum = n()) %>%
  
  # Extract the FoodStoreNum column from the resulting data frame
  magrittr::extract('FoodStoreNum') %>%
  
  # Plot a histogram of the FoodStoreNum column
  plot(breaks = "jenks", main = "Number of Food Stores")

### task 3:Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

## First we want to load the data and creat a sf object

## load the NYC health facilities data
health_facilities <- read_csv("./R-Spatial_II_Lab/NYS_Health_Facility.csv", lazy = FALSE,show_col_types = FALSE)

# Remove row of missing data
health_facilities <- health_facilities[complete.cases(health_facilities[,c("Facility Longitude", "Facility Latitude")]),]

#create sf object
health_facilities_sf <- st_as_sf(health_facilities, coords = c("Facility Longitude", "Facility Latitude"), crs = 4326) 
str(health_facilities_sf)
## sf [3,848 × 35] (S3: sf/tbl_df/tbl/data.frame)
##  $ Facility ID                 : num [1:3848] 204 620 1156 2589 3455 ...
##  $ Facility Name               : chr [1:3848] "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "East Side Nursing Home" "Wellsville Manor Care Center" ...
##  $ Short Description           : chr [1:3848] "HSPC" "NH" "NH" "NH" ...
##  $ Description                 : chr [1:3848] "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
##  $ Facility Open Date          : chr [1:3848] "06/01/1985" "02/01/1989" "08/01/1979" "02/01/1989" ...
##  $ Facility Address 1          : chr [1:3848] "4102 Old Vestal Road" "2050 Tilden Avenue" "62 Prospect St" "4192A Bolivar Road" ...
##  $ Facility Address 2          : chr [1:3848] NA NA NA NA ...
##  $ Facility City               : chr [1:3848] "Vestal" "New Hartford" "Warsaw" "Wellsville" ...
##  $ Facility State              : chr [1:3848] "New York" "New York" "New York" "New York" ...
##  $ Facility Zip Code           : chr [1:3848] "13850" "13413" "14569" "14895" ...
##  $ Facility Phone Number       : num [1:3848] 6.08e+09 3.16e+09 5.86e+09 5.86e+09 7.17e+09 ...
##  $ Facility Fax Number         : num [1:3848] NA NA NA NA NA ...
##  $ Facility Website            : chr [1:3848] NA NA NA NA ...
##  $ Facility County Code        : num [1:3848] 3 32 60 2 14 ...
##  $ Facility County             : chr [1:3848] "Broome" "Oneida" "Wyoming" "Allegany" ...
##  $ Regional Office ID          : num [1:3848] 3 3 1 1 1 7 1 7 5 7 ...
##  $ Regional Office             : chr [1:3848] "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" "Western Regional Office - Buffalo" ...
##  $ Main Site Name              : chr [1:3848] NA NA NA NA ...
##  $ Main Site Facility ID       : num [1:3848] NA NA NA NA NA ...
##  $ Operating Certificate Number: chr [1:3848] "0301501F" "3227304N" "6027303N" "0228305N" ...
##  $ Operator Name               : chr [1:3848] "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "East Side Nursing Home Inc" "Wellsville Manor LLC" ...
##  $ Operator Address 1          : chr [1:3848] "169 Riverside Drive" "Box 1000 Tilden Avenue" "62 Prospect Street" "4192a Bolivar Road" ...
##  $ Operator Address 2          : chr [1:3848] NA NA NA NA ...
##  $ Operator City               : chr [1:3848] "Binghamton" "New Hartford" "Warsaw" "Wellsville" ...
##  $ Operator State              : chr [1:3848] "New York" "New York" "New York" "New York" ...
##  $ Operator Zip Code           : chr [1:3848] "13905" "13413" "14569" "14897" ...
##  $ Cooperator Name             : chr [1:3848] NA NA NA NA ...
##  $ Cooperator Address          : chr [1:3848] NA NA NA NA ...
##  $ Cooperator Address 2        : chr [1:3848] NA NA NA NA ...
##  $ Cooperator City             : chr [1:3848] NA NA NA NA ...
##  $ Cooperator State            : chr [1:3848] "New York" "New York" "New York" "New York" ...
##  $ Cooperator Zip Code         : chr [1:3848] NA NA NA NA ...
##  $ Ownership Type              : chr [1:3848] "Not for Profit Corporation" "Not for Profit Corporation" "Business Corporation" "LLC" ...
##  $ Facility Location           : chr [1:3848] "(42.097095, -75.975243)" "(43.05497, -75.228828)" "(42.738979, -78.12867)" "(42.126461, -77.967834)" ...
##  $ geometry                    :sfc_POINT of length 3848; first list element:  'XY' num [1:2] -76 42.1
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:34] "Facility ID" "Facility Name" "Short Description" "Description" ...
# Now we aggregate the NYC health facilities (points) to the zip code data

# Transform NYC health facilities data to match the CRS of zipcode data
health_facilities_sf <- st_transform(health_facilities_sf, st_crs(nyc_zipcode_sf))

health_facilities_sf %>% 
  # Filter the data to include only rows where Short Description contains "NH"
  dplyr::filter(`Short Description` == 'NH') %>%
  
  # Perform a spatial join between the filtered dataframe and the zipe code sf
  sf::st_join(nyc_zipcode_sf, ., join = st_contains) %>%
  
  # Group the resulting data frame by ZIPCODE
  group_by(ZIPCODE) %>%
  
  # Summarize each group by counting the number of rows in each group
  summarise(healthfacilitiesNum = n()) %>%
  
  # Extract the healthfacilitiesNum column from the resulting data frame
  magrittr::extract('healthfacilitiesNum') %>%
  
  # Plot a histogram of the healthfacilitiesNum column
  plot(breaks = "jenks", main = "Number of Health Facilities")  

task 4:Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

# Load the NYC census tracts from Department of Planning
nycCensus <- sf::st_read('./R-Spatial_II_Lab/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\melin\Documents\GTECH78520\part3\R-Spatial_II_Lab\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
str(nycCensus)
## Classes 'sf' and 'data.frame':   2165 obs. of  12 variables:
##  $ boro_code : chr  "5" "1" "1" "1" ...
##  $ boro_ct201: chr  "5000900" "1009800" "1010000" "1010200" ...
##  $ boro_name : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ cdeligibil: chr  "E" "I" "I" "I" ...
##  $ ct2010    : chr  "000900" "009800" "010000" "010200" ...
##  $ ctlabel   : chr  "9" "98" "100" "102" ...
##  $ ntacode   : chr  "SI22" "MN19" "MN19" "MN17" ...
##  $ ntaname   : chr  "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
##  $ puma      : chr  "3903" "3808" "3808" "3807" ...
##  $ shape_area: num  2497010 1906016 1860938 1860993 1864600 ...
##  $ shape_leng: num  7729 5534 5692 5688 5693 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
# does not use the standard County FIPS, but boro code
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

# Load the ACS data from CENSUS website
acsData <- readLines("./R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

# now extract columns 1 through 10 from the acsData data frame
acsData %>%
  magrittr::extract(1:10,)
##                  GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1  1400000US36005000100   7080         51    6503       577     1773     4239
## 2  1400000US36005000200   4542        950    2264      2278     2165     1279
## 3  1400000US36005000400   5634        710    2807      2827     2623     1699
## 4  1400000US36005001600   5917        989    2365      3552     2406     2434
## 5  1400000US36005001900   2765         76    1363      1402      585     1041
## 6  1400000US36005002000   9409        977    4119      5290     3185     4487
## 7  1400000US36005002300   4600        648    2175      2425      479     2122
## 8  1400000US36005002400    172          0     121        51       69       89
## 9  1400000US36005002500   5887        548    2958      2929      903     1344
## 10 1400000US36005002701   2868        243    1259      1609      243      987
##    asianPop hispanicPop adultPop citizenAdult censusCode
## 1       130        2329     6909         6100  005000100
## 2       119        3367     3582         2952  005000200
## 3       226        3873     4507         4214  005000400
## 4        68        3603     4416         3851  005001600
## 5       130        1413     2008         1787  005001900
## 6        29        5905     6851         6170  005002000
## 7        27        2674     3498         3056  005002300
## 8        14           0      131           42  005002400
## 9        68        4562     4237         2722  005002500
## 10        0        1985     1848         1412  005002701
# Join ACS data to the census tracts
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')

# verify the data
sum(popData$totPop)
## [1] 8443713
# Plot the total population by zip code
ggplot() + 
  geom_sf(data = popData, aes(fill = totPop)) + 
  scale_fill_gradient(low = "white", high = "red") + 
  labs(title = "Total Population by Zip Code")

# Plot the elderlypop by zip code
ggplot() + 
  geom_sf(data = popData, aes(fill = elderlyPop)) + 
  scale_fill_gradient(low = "white", high = "red") + 
  labs(title = "Elderly Population by Zip Code")

# Plot the hispanicPop by zip code
ggplot() + 
  geom_sf(data = popData, aes(fill = hispanicPop)) + 
  scale_fill_gradient(low = "white", high = "red") + 
  labs(title = "Hispanic Population by Zip Code")

task 5:Aggregate the ACS census data to zip code area data.

# Transform popData data to match the CRS of zipcode data
popNYC <- sf::st_transform(popData, st_crs(nyc_sf_merged)) 

# plot elderlyPop
popData %>% magrittr::extract('elderlyPop') %>% plot(breaks = 'jenks')

# Now aggregate to the zip code level
covidPopZipNYC <- sf::st_join(nyc_sf_merged, 
                              popNYC %>% sf::st_centroid(),
                              join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>% 
  summarise(totPop = sum(totPop),
            malePctg = sum(malePop)/totPop*100, 
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop)) 
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION',
## 'COUNTY', 'COVID_CASE_COUNT'. You can override using the `.groups` argument.
covidPopZipNYC %>% head()
## Simple feature collection with 6 features and 12 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 188447.3 xmax: 992172.8 ymax: 215324.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 × 13
## # Groups:   ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [6]
##   ZIPCODE PO_NAME  POPUL…¹ COUNTY COVID…² TOTAL…³ totPop maleP…⁴ asian…⁵ black…⁶
##   <chr>   <chr>      <dbl> <chr>    <dbl>   <dbl>  <int>   <dbl>   <int>   <int>
## 1 10001   New York   22413 New Y…    1542   20158  19146    51.2    4837    1092
## 2 10002   New York   81305 New Y…    5902   48197  74310    48.4   32149    5969
## 3 10003   New York   55878 New Y…    2803   41076  53487    50.3    8027    3130
## 4 10004   New York    2187 New Y…     247    3599     NA    NA        NA      NA
## 5 10005   New York    8107 New Y…     413    6102   8809    42.8    1974     185
## 6 10006   New York    3011 New Y…     164    2441   4639    44.0    1195      52
## # … with 3 more variables: hispanicPop <int>, whitePop <int>,
## #   geometry <GEOMETRY [US_survey_foot]>, and abbreviated variable names
## #   ¹​POPULATION, ²​COVID_CASE_COUNT, ³​TOTAL_COVID_TESTS, ⁴​malePctg, ⁵​asianPop,
## #   ⁶​blackPop
# Check and verify the data again
sum(covidPopZipNYC$totPop, na.rm = T)
## [1] 8334092
# See where we have those NAs 
covidPopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 7 features and 12 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 151085.5 xmax: 1049202 ymax: 263362
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 7 × 13
## # Groups:   ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [7]
##   ZIPCODE PO_NAME  POPUL…¹ COUNTY COVID…² TOTAL…³ totPop maleP…⁴ asian…⁵ black…⁶
## * <chr>   <chr>      <dbl> <chr>    <dbl>   <dbl>  <int>   <dbl>   <int>   <int>
## 1 10004   New York    2187 New Y…     247    3599     NA      NA      NA      NA
## 2 10075   New York   25203 New Y…    1453   18391     NA      NA      NA      NA
## 3 10464   Bronx       4438 Bronx      387    2933     NA      NA      NA      NA
## 4 11109   Long Is…    2752 Queens     354    4687     NA      NA      NA      NA
## 5 11231   Brooklyn   33144 Kings     1842   24431     NA      NA      NA      NA
## 6 11693   Far Roc…   11052 Kings     1111    7027     NA      NA      NA      NA
## 7 11693   Far Roc…   11052 Queens    1111    7027     NA      NA      NA      NA
## # … with 3 more variables: hispanicPop <int>, whitePop <int>,
## #   geometry <GEOMETRY [US_survey_foot]>, and abbreviated variable names
## #   ¹​POPULATION, ²​COVID_CASE_COUNT, ³​TOTAL_COVID_TESTS, ⁴​malePctg, ⁵​asianPop,
## #   ⁶​blackPop
#plot the results
plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks')

plot(covidPopZipNYC["asianPop"], breaks='jenks')