Homework 11

R Markdown

Tasks 1: Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

COVID_19_data <- readr::read_csv("/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv", lazy = FALSE)

## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(COVID_19_data)

## spc_tbl_ [177 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ MODIFIED_ZCTA    : num [1:177] 10001 10002 10003 10004 10005 ...
##  $ NEIGHBORHOOD_NAME: chr [1:177] "Chelsea/NoMad/West Chelsea" "Chinatown/Lower East Side" "East Village/Gramercy/Greenwich Village" "Financial District" ...
##  $ BOROUGH_GROUP    : chr [1:177] "Manhattan" "Manhattan" "Manhattan" "Manhattan" ...
##  $ label            : chr [1:177] "10001, 10118" "10002" "10003" "10004" ...
##  $ lat              : num [1:177] 40.8 40.7 40.7 40.7 40.7 ...
##  $ lon              : num [1:177] -74 -74 -74 -74 -74 ...
##  $ COVID_CASE_COUNT : num [1:177] 1542 5902 2803 247 413 ...
##  $ COVID_CASE_RATE  : num [1:177] 5584 7836 5193 8311 4716 ...
##  $ POP_DENOMINATOR  : num [1:177] 27613 75323 53978 2972 8757 ...
##  $ COVID_DEATH_COUNT: num [1:177] 35 264 48 2 0 1 4 118 37 62 ...
##  $ COVID_DEATH_RATE : num [1:177] 126.8 350.5 88.9 67.3 0 ...
##  $ PERCENT_POSITIVE : num [1:177] 7.86 12.63 6.93 6.92 6.72 ...
##  $ TOTAL_COVID_TESTS: num [1:177] 20158 48197 41076 3599 6102 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   MODIFIED_ZCTA = col_double(),
##   ..   NEIGHBORHOOD_NAME = col_character(),
##   ..   BOROUGH_GROUP = col_character(),
##   ..   label = col_character(),
##   ..   lat = col_double(),
##   ..   lon = col_double(),
##   ..   COVID_CASE_COUNT = col_double(),
##   ..   COVID_CASE_RATE = col_double(),
##   ..   POP_DENOMINATOR = col_double(),
##   ..   COVID_DEATH_COUNT = col_double(),
##   ..   COVID_DEATH_RATE = col_double(),
##   ..   PERCENT_POSITIVE = col_double(),
##   ..   TOTAL_COVID_TESTS = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

# head(COVID_19_data)
# according to 04.23.2021

NYC_zipcode_dat_sf <- st_read("/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")

## Reading layer `ZIP_CODE_040114' from data source 
##   `/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)

str(NYC_zipcode_dat_sf)

## Classes 'sf' and 'data.frame':   263 obs. of  13 variables:
##  $ ZIPCODE   : chr  "11436" "11213" "11212" "11225" ...
##  $ BLDGZIP   : chr  "0" "0" "0" "0" ...
##  $ PO_NAME   : chr  "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
##  $ POPULATION: num  18681 62426 83866 56527 72280 ...
##  $ AREA      : num  22699295 29631004 41972104 23698630 36868799 ...
##  $ STATE     : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY    : chr  "Queens" "Kings" "Kings" "Kings" ...
##  $ ST_FIPS   : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS  : chr  "081" "047" "047" "047" ...
##  $ URL       : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ geometry  :sfc_POLYGON of length 263; first list element: List of 1
##   ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...

# joining the files 
nyc_sf_merged <- base::merge(NYC_zipcode_dat_sf, COVID_19_data, by.x = "ZIPCODE", by.y = "MODIFIED_ZCTA")
head(nyc_sf_merged)

## Simple feature collection with 6 features and 24 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 190102.9 xmax: 992172.8 ymax: 215324.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
##   ZIPCODE BLDGZIP  PO_NAME POPULATION     AREA STATE   COUNTY ST_FIPS CTY_FIPS
## 1   10001       0 New York      22413 17794941    NY New York      36      061
## 2   10002       0 New York      81305 26280129    NY New York      36      061
## 3   10003       0 New York      55878 15538376    NY New York      36      061
## 4   10004       0 New York       2187   670708    NY New York      36      061
## 5   10004       0 New York       2187  4002521    NY New York      36      061
## 6   10004       0 New York       2187  1202708    NY New York      36      061
##                    URL SHAPE_AREA SHAPE_LEN
## 1 http://www.usps.com/          0         0
## 2 http://www.usps.com/          0         0
## 3 http://www.usps.com/          0         0
## 4 http://www.usps.com/          0         0
## 5 http://www.usps.com/          0         0
## 6 http://www.usps.com/          0         0
##                         NEIGHBORHOOD_NAME BOROUGH_GROUP        label      lat
## 1              Chelsea/NoMad/West Chelsea     Manhattan 10001, 10118 40.75069
## 2               Chinatown/Lower East Side     Manhattan        10002 40.71578
## 3 East Village/Gramercy/Greenwich Village     Manhattan        10003 40.73183
## 4                      Financial District     Manhattan        10004 40.70367
## 5                      Financial District     Manhattan        10004 40.70367
## 6                      Financial District     Manhattan        10004 40.70367
##         lon COVID_CASE_COUNT COVID_CASE_RATE POP_DENOMINATOR COVID_DEATH_COUNT
## 1 -73.99714             1542         5584.31        27613.09                35
## 2 -73.98618             5902         7835.62        75322.71               264
## 3 -73.98916             2803         5192.87        53977.81                48
## 4 -74.01311              247         8310.58         2972.12                 2
## 5 -74.01311              247         8310.58         2972.12                 2
## 6 -74.01311              247         8310.58         2972.12                 2
##   COVID_DEATH_RATE PERCENT_POSITIVE TOTAL_COVID_TESTS
## 1           126.75             7.86             20158
## 2           350.49            12.63             48197
## 3            88.93             6.93             41076
## 4            67.29             6.92              3599
## 5            67.29             6.92              3599
## 6            67.29             6.92              3599
##                         geometry
## 1 POLYGON ((981958.6 213464.5...
## 2 POLYGON ((991339.9 207576.8...
## 3 POLYGON ((989830.5 207048.1...
## 4 POLYGON ((971218.4 190710.9...
## 5 POLYGON ((981197.4 196571.1...
## 6 POLYGON ((972696.8 193014.5...

# names(nyc_sf_merged) # removed the "MODIFIED_ZCTA" column

Tasks 2:Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

# read in the food store data 
nycFoodStoreSF <- st_read("/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/nycFoodStore.shp")

## Reading layer `nycFoodStore' from data source 
##   `/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/nycFoodStore.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84

str(nycFoodStoreSF)

## Classes 'sf' and 'data.frame':   11300 obs. of  17 variables:
##  $ ï__Cnty : chr  "Bronx" "Bronx" "Bronx" "Bronx" ...
##  $ Lcns_Nm : int  734149 606221 606228 723375 724807 712943 703060 609065 722972 609621 ...
##  $ Oprtn_T : chr  "Store" "Store" "Store" "Store" ...
##  $ Estbl_T : chr  "JAC" "JAC" "JAC" "JAC" ...
##  $ Entty_N : chr  "7 ELEVEN FOOD STORE #37933H" "1001 SAN MIGUEL FOOD CENTER INC" "1029 FOOD PLAZA INC" "1078 DELI GROCERY CORP" ...
##  $ DBA_Nam : chr  NA "1001 SAN MIGUEL FD CNTR" "1029 FOOD PLAZA" "1078 DELI GROCERY" ...
##  $ Strt_Nmb: chr  "500" "1001" "122" "1078" ...
##  $ Stret_Nm: chr  "BAYCHESTER AVE" "SHERIDAN AVE" "E 181ST ST" "EAST 165TH STREET" ...
##  $ Add_L_2 : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Add_L_3 : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ City    : chr  "BRONX" "BRONX" "BRONX" "BRONX" ...
##  $ State   : chr  "NY" "NY" "NY" "NY" ...
##  $ Zip_Cod : int  10475 10456 10453 10459 10456 10453 10467 10456 10456 10472 ...
##  $ Sqr_Ftg : chr  "0" "1,100" "2,000" "1,200" ...
##  $ Locatin : chr  "500 BAYCHESTER AVE\nBRONX, NY 10475\n(40.869156, -73.831875)" "1001 SHERIDAN AVE\nBRONX, NY 10456\n(40.829061, -73.919613)" "122 E 181ST ST\nBRONX, NY 10453\n(40.854755, -73.902853)" "1078 EAST 165TH STREET\nBRONX, NY 10459\n(40.825105, -73.890589)" ...
##  $ Coords  : chr  "40.869156, -73.831875" "40.829061, -73.919613" "40.854755, -73.902853" "40.825105, -73.890589" ...
##  $ geometry:sfc_POINT of length 11300; first list element:  'XY' num  -73.8 40.9
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:16] "ï__Cnty" "Lcns_Nm" "Oprtn_T" "Estbl_T" ...

head(nycFoodStoreSF)

## Simple feature collection with 6 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -73.91961 ymin: 40.8251 xmax: -73.83188 ymax: 40.86916
## Geodetic CRS:  WGS 84
##   ï__Cnty Lcns_Nm Oprtn_T Estbl_T                         Entty_N
## 1   Bronx  734149   Store     JAC     7 ELEVEN FOOD STORE #37933H
## 2   Bronx  606221   Store     JAC 1001 SAN MIGUEL FOOD CENTER INC
## 3   Bronx  606228   Store     JAC             1029 FOOD PLAZA INC
## 4   Bronx  723375   Store     JAC          1078 DELI GROCERY CORP
## 5   Bronx  724807   Store     JAC     1086 LUNA DELI GROCERY CORP
## 6   Bronx  712943   Store     JAC        109 AJ DELI GROCERY CORP
##                   DBA_Nam Strt_Nmb          Stret_Nm Add_L_2 Add_L_3  City
## 1                    <NA>      500    BAYCHESTER AVE      NA      NA BRONX
## 2 1001 SAN MIGUEL FD CNTR     1001      SHERIDAN AVE      NA      NA BRONX
## 3         1029 FOOD PLAZA      122        E 181ST ST      NA      NA BRONX
## 4       1078 DELI GROCERY     1078 EAST 165TH STREET      NA      NA BRONX
## 5  1086 LUNA DELI GROCERY     1086       BOSTON ROAD      NA      NA BRONX
## 6     109 AJ DELI GROCERY      109     E TREMONT AVE      NA      NA BRONX
##   State Zip_Cod Sqr_Ftg
## 1    NY   10475       0
## 2    NY   10456   1,100
## 3    NY   10453   2,000
## 4    NY   10459   1,200
## 5    NY   10456   1,500
## 6    NY   10453   2,400
##                                                            Locatin
## 1     500 BAYCHESTER AVE\nBRONX, NY 10475\n(40.869156, -73.831875)
## 2      1001 SHERIDAN AVE\nBRONX, NY 10456\n(40.829061, -73.919613)
## 3         122 E 181ST ST\nBRONX, NY 10453\n(40.854755, -73.902853)
## 4 1078 EAST 165TH STREET\nBRONX, NY 10459\n(40.825105, -73.890589)
## 5       1086 BOSTON ROAD\nBRONX, NY 10456\n(40.827096, -73.905123)
## 6      109 E TREMONT AVE\nBRONX, NY 10453\n(40.850537, -73.907137)
##                  Coords                   geometry
## 1 40.869156, -73.831875 POINT (-73.83187 40.86916)
## 2 40.829061, -73.919613 POINT (-73.91961 40.82906)
## 3 40.854755, -73.902853 POINT (-73.90285 40.85475)
## 4 40.825105, -73.890589 POINT (-73.89059 40.82511)
## 5 40.827096, -73.905123  POINT (-73.90512 40.8271)
## 6 40.850537, -73.907137 POINT (-73.90714 40.85054)

names(nycFoodStoreSF)

##  [1] "ï__Cnty"  "Lcns_Nm"  "Oprtn_T"  "Estbl_T"  "Entty_N"  "DBA_Nam" 
##  [7] "Strt_Nmb" "Stret_Nm" "Add_L_2"  "Add_L_3"  "City"     "State"   
## [13] "Zip_Cod"  "Sqr_Ftg"  "Locatin"  "Coords"   "geometry"

unique(nycFoodStoreSF$Estbl_T) # am i supposed to choose specific codes????

##  [1] "JAC"    "A"      "JACD"   "JACDK"  "JAD"    "JABCHK" "JACHK"  "JABC"  
##  [9] "JAZ"    "JABCK"  "JACK"   "JACDHK" "JABH"   "JACH"   "JACDE"  "JABCH" 
## [17] "JABCDH" "JABK"   "JABHK"  "JABCD"  "JACG"   "JACDH"  "JADHK"  "JKA"   
## [25] "JADK"   "JAB"    "JAHK"   "JABCDK" "JACZ"   "JAK"    "JADO"   "JDA"

# Check the CRS of both datasets
crs_nyc_sf_merged <- st_crs(nyc_sf_merged)
print(crs_nyc_sf_merged) # NAD83 / New York Long Island

## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]

crs_nycFoodStoreSF <- st_crs(nycFoodStoreSF)
print(crs_nycFoodStoreSF) # WGS 84

## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]

# set to the same CRS
nycFoodStoreSF_transformed <- nycFoodStoreSF %>% sf::st_transform(crs = 4326)
nyc_sf_merged_transformed <- nyc_sf_merged %>% sf::st_transform(crs = 4326)

# i wonder if there is a way to save this dataset with all the rows that have the A,J, or D
# what does the A, J, or D symbolize according to the metadata?
# A = store
# D = Food marehouse
# J = multiple operations

head(nycFoodStoreSF_transformed)

## Simple feature collection with 6 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -73.91961 ymin: 40.8251 xmax: -73.83188 ymax: 40.86916
## Geodetic CRS:  WGS 84
##   ï__Cnty Lcns_Nm Oprtn_T Estbl_T                         Entty_N
## 1   Bronx  734149   Store     JAC     7 ELEVEN FOOD STORE #37933H
## 2   Bronx  606221   Store     JAC 1001 SAN MIGUEL FOOD CENTER INC
## 3   Bronx  606228   Store     JAC             1029 FOOD PLAZA INC
## 4   Bronx  723375   Store     JAC          1078 DELI GROCERY CORP
## 5   Bronx  724807   Store     JAC     1086 LUNA DELI GROCERY CORP
## 6   Bronx  712943   Store     JAC        109 AJ DELI GROCERY CORP
##                   DBA_Nam Strt_Nmb          Stret_Nm Add_L_2 Add_L_3  City
## 1                    <NA>      500    BAYCHESTER AVE      NA      NA BRONX
## 2 1001 SAN MIGUEL FD CNTR     1001      SHERIDAN AVE      NA      NA BRONX
## 3         1029 FOOD PLAZA      122        E 181ST ST      NA      NA BRONX
## 4       1078 DELI GROCERY     1078 EAST 165TH STREET      NA      NA BRONX
## 5  1086 LUNA DELI GROCERY     1086       BOSTON ROAD      NA      NA BRONX
## 6     109 AJ DELI GROCERY      109     E TREMONT AVE      NA      NA BRONX
##   State Zip_Cod Sqr_Ftg
## 1    NY   10475       0
## 2    NY   10456   1,100
## 3    NY   10453   2,000
## 4    NY   10459   1,200
## 5    NY   10456   1,500
## 6    NY   10453   2,400
##                                                            Locatin
## 1     500 BAYCHESTER AVE\nBRONX, NY 10475\n(40.869156, -73.831875)
## 2      1001 SHERIDAN AVE\nBRONX, NY 10456\n(40.829061, -73.919613)
## 3         122 E 181ST ST\nBRONX, NY 10453\n(40.854755, -73.902853)
## 4 1078 EAST 165TH STREET\nBRONX, NY 10459\n(40.825105, -73.890589)
## 5       1086 BOSTON ROAD\nBRONX, NY 10456\n(40.827096, -73.905123)
## 6      109 E TREMONT AVE\nBRONX, NY 10453\n(40.850537, -73.907137)
##                  Coords                   geometry
## 1 40.869156, -73.831875 POINT (-73.83187 40.86916)
## 2 40.829061, -73.919613 POINT (-73.91961 40.82906)
## 3 40.854755, -73.902853 POINT (-73.90285 40.85475)
## 4 40.825105, -73.890589 POINT (-73.89059 40.82511)
## 5 40.827096, -73.905123  POINT (-73.90512 40.8271)
## 6 40.850537, -73.907137 POINT (-73.90714 40.85054)

#Filter certain types of food stores and count the number of stores in each zip code area
# Can you explain what happens in each step of this pipeline?
nycFoodStoreSF_transformed %>% dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>% # looking at any row with a A,J, or D code
  sf::st_join(nyc_sf_merged_transformed, ., join= st_contains) %>% # REPLACE WITH nyc_sf_merged
  group_by(ZIPCODE) %>%
  summarise(FoodStoreNum = n()) %>% # how come i can't find this as a new column ?
  magrittr::extract('FoodStoreNum') %>%
  plot(breaks = "jenks", main="Number of Food Stores")

Tasks 3: Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

# read in the new dataset for the NYC health facilities
nycHealthFacilities <- readr::read_csv("/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/NYS_Health_Facility.csv", lazy = FALSE)

## Rows: 3990 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (28): Facility Name, Short Description, Description, Facility Open Date,...
## dbl  (8): Facility ID, Facility Phone Number, Facility Fax Number, Facility ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(nycHealthFacilities)

## spc_tbl_ [3,990 × 36] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Facility ID                 : num [1:3990] 204 620 654 1156 2589 ...
##  $ Facility Name               : chr [1:3990] "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "Central Park Rehabilitation and Nursing Center" "East Side Nursing Home" ...
##  $ Short Description           : chr [1:3990] "HSPC" "NH" "NH" "NH" ...
##  $ Description                 : chr [1:3990] "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
##  $ Facility Open Date          : chr [1:3990] "06/01/1985" "02/01/1989" "02/01/1989" "08/01/1979" ...
##  $ Facility Address 1          : chr [1:3990] "4102 Old Vestal Road" "2050 Tilden Avenue" "116 Martin Luther King East" "62 Prospect St" ...
##  $ Facility Address 2          : chr [1:3990] NA NA NA NA ...
##  $ Facility City               : chr [1:3990] "Vestal" "New Hartford" "Syracuse" "Warsaw" ...
##  $ Facility State              : chr [1:3990] "New York" "New York" "New York" "New York" ...
##  $ Facility Zip Code           : chr [1:3990] "13850" "13413" "13205" "14569" ...
##  $ Facility Phone Number       : num [1:3990] 6.08e+09 3.16e+09 3.15e+09 5.86e+09 5.86e+09 ...
##  $ Facility Fax Number         : num [1:3990] NA NA NA NA NA ...
##  $ Facility Website            : chr [1:3990] NA NA NA NA ...
##  $ Facility County Code        : num [1:3990] 3 32 33 60 2 ...
##  $ Facility County             : chr [1:3990] "Broome" "Oneida" "Onondaga" "Wyoming" ...
##  $ Regional Office ID          : num [1:3990] 3 3 3 1 1 1 7 1 7 5 ...
##  $ Regional Office             : chr [1:3990] "Central New York Regional Office" "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" ...
##  $ Main Site Name              : chr [1:3990] NA NA NA NA ...
##  $ Main Site Facility ID       : num [1:3990] NA NA NA NA NA ...
##  $ Operating Certificate Number: chr [1:3990] "0301501F" "3227304N" "3301326N" "6027303N" ...
##  $ Operator Name               : chr [1:3990] "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "CPRNC, LLC" "East Side Nursing Home Inc" ...
##  $ Operator Address 1          : chr [1:3990] "169 Riverside Drive" "Box 1000 Tilden Avenue" "116 Martin Luther King East" "62 Prospect Street" ...
##  $ Operator Address 2          : chr [1:3990] NA NA NA NA ...
##  $ Operator City               : chr [1:3990] "Binghamton" "New Hartford" "Syracuse" "Warsaw" ...
##  $ Operator State              : chr [1:3990] "New York" "New York" "New York" "New York" ...
##  $ Operator Zip Code           : chr [1:3990] "13905" "13413" "13205" "14569" ...
##  $ Cooperator Name             : chr [1:3990] NA NA NA NA ...
##  $ Cooperator Address          : chr [1:3990] NA NA NA NA ...
##  $ Cooperator Address 2        : chr [1:3990] NA NA NA NA ...
##  $ Cooperator City             : chr [1:3990] NA NA NA NA ...
##  $ Cooperator State            : chr [1:3990] "New York" "New York" "New York" "New York" ...
##  $ Cooperator Zip Code         : chr [1:3990] NA NA NA NA ...
##  $ Ownership Type              : chr [1:3990] "Not for Profit Corporation" "Not for Profit Corporation" "LLC" "Business Corporation" ...
##  $ Facility Latitude           : num [1:3990] 42.1 43.1 NA 42.7 42.1 ...
##  $ Facility Longitude          : num [1:3990] -76 -75.2 NA -78.1 -78 ...
##  $ Facility Location           : chr [1:3990] "(42.097095, -75.975243)" "(43.05497, -75.228828)" NA "(42.738979, -78.12867)" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Facility ID` = col_double(),
##   ..   `Facility Name` = col_character(),
##   ..   `Short Description` = col_character(),
##   ..   Description = col_character(),
##   ..   `Facility Open Date` = col_character(),
##   ..   `Facility Address 1` = col_character(),
##   ..   `Facility Address 2` = col_character(),
##   ..   `Facility City` = col_character(),
##   ..   `Facility State` = col_character(),
##   ..   `Facility Zip Code` = col_character(),
##   ..   `Facility Phone Number` = col_double(),
##   ..   `Facility Fax Number` = col_double(),
##   ..   `Facility Website` = col_character(),
##   ..   `Facility County Code` = col_double(),
##   ..   `Facility County` = col_character(),
##   ..   `Regional Office ID` = col_double(),
##   ..   `Regional Office` = col_character(),
##   ..   `Main Site Name` = col_character(),
##   ..   `Main Site Facility ID` = col_double(),
##   ..   `Operating Certificate Number` = col_character(),
##   ..   `Operator Name` = col_character(),
##   ..   `Operator Address 1` = col_character(),
##   ..   `Operator Address 2` = col_character(),
##   ..   `Operator City` = col_character(),
##   ..   `Operator State` = col_character(),
##   ..   `Operator Zip Code` = col_character(),
##   ..   `Cooperator Name` = col_character(),
##   ..   `Cooperator Address` = col_character(),
##   ..   `Cooperator Address 2` = col_character(),
##   ..   `Cooperator City` = col_character(),
##   ..   `Cooperator State` = col_character(),
##   ..   `Cooperator Zip Code` = col_character(),
##   ..   `Ownership Type` = col_character(),
##   ..   `Facility Latitude` = col_double(),
##   ..   `Facility Longitude` = col_double(),
##   ..   `Facility Location` = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

# check the name of columns before joining
# names(nycHealthFacilities)
# names(nyc_sf_merged)

# Remove all rows that have a nan in the "Facility Longitude" and "Facility Latitude" columns
new_df <- nycHealthFacilities %>%
  dplyr::filter(!is.na(`Facility Longitude`) & !is.nan(`Facility Longitude`) & !is.na(`Facility Latitude`) & !is.nan(`Facility Latitude`))

head(new_df)

## # A tibble: 6 × 36
##   `Facility ID` Facili…¹ Short…² Descr…³ Facil…⁴ Facil…⁵ Facil…⁶ Facil…⁷ Facil…⁸
##           <dbl> <chr>    <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>  
## 1           204 Hospice… HSPC    Hospice 06/01/… 4102 O… <NA>    Vestal  New Yo…
## 2           620 Charles… NH      Reside… 02/01/… 2050 T… <NA>    New Ha… New Yo…
## 3          1156 East Si… NH      Reside… 08/01/… 62 Pro… <NA>    Warsaw  New Yo…
## 4          2589 Wellsvi… NH      Reside… 02/01/… 4192A … <NA>    Wellsv… New Yo…
## 5          3455 Harris … NH      Reside… 04/08/… 2699 W… <NA>    Willia… New Yo…
## 6          3853 Garden … DTC     Diagno… 04/07/… 400 En… <NA>    Garden… New Yo…
## # … with 27 more variables: `Facility Zip Code` <chr>,
## #   `Facility Phone Number` <dbl>, `Facility Fax Number` <dbl>,
## #   `Facility Website` <chr>, `Facility County Code` <dbl>,
## #   `Facility County` <chr>, `Regional Office ID` <dbl>,
## #   `Regional Office` <chr>, `Main Site Name` <chr>,
## #   `Main Site Facility ID` <dbl>, `Operating Certificate Number` <chr>,
## #   `Operator Name` <chr>, `Operator Address 1` <chr>, …

# Convert the dataframe into a sf file
nycHealthFacilities_sf <- new_df %>%
  sf::st_as_sf(coords = c("Facility Longitude", "Facility Latitude"), crs = 4326)

# Filter NH: Nursing homes
nycNursingHome <- nycHealthFacilities_sf %>% 
   dplyr::filter(`Short Description` == 'NH')

unique(nycHealthFacilities_sf$Description)

##  [1] "Hospice"                                                              
##  [2] "Residential Health Care Facility - SNF"                               
##  [3] "Diagnostic and Treatment Center"                                      
##  [4] "Certified Home Health Agency"                                         
##  [5] "Hospital Extension Clinic"                                            
##  [6] "School Based Diagnostic and Treatment Center Extension Clinic"        
##  [7] "Diagnostic and Treatment Center Extension Clinic"                     
##  [8] "School Based Hospital Extension Clinic"                               
##  [9] "Hospital"                                                             
## [10] "Primary Care Hospital - Critical Access Hospital Extension Clinic"    
## [11] "Adult Day Health Care Program - Offsite"                              
## [12] "Mobile Diagnostic and Treatment Center Extension Clinic"              
## [13] "Long Term Home Health Care Program"                                   
## [14] "Mobile Hospital Extension Clinic"                                     
## [15] "Primary Care Hospital - Critical Access Hospital"                     
## [16] "School Based Primary Care Hospital - Critical Access Extension Clinic"

head(nycNursingHome)

## Simple feature collection with 6 features and 34 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -78.68856 ymin: 40.88436 xmax: -73.88845 ymax: 43.05497
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 35
##   `Facility ID` Facili…¹ Short…² Descr…³ Facil…⁴ Facil…⁵ Facil…⁶ Facil…⁷ Facil…⁸
##           <dbl> <chr>    <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>  
## 1           620 Charles… NH      Reside… 02/01/… 2050 T… <NA>    New Ha… New Yo…
## 2          1156 East Si… NH      Reside… 08/01/… 62 Pro… <NA>    Warsaw  New Yo…
## 3          2589 Wellsvi… NH      Reside… 02/01/… 4192A … <NA>    Wellsv… New Yo…
## 4          3455 Harris … NH      Reside… 04/08/… 2699 W… <NA>    Willia… New Yo…
## 5          1217 St Patr… NH      Reside… 01/01/… 66 Van… <NA>    Bronx   New Yo…
## 6           124 Elcor N… NH      Reside… 02/01/… 48 Col… <NA>    Horseh… New Yo…
## # … with 26 more variables: `Facility Zip Code` <chr>,
## #   `Facility Phone Number` <dbl>, `Facility Fax Number` <dbl>,
## #   `Facility Website` <chr>, `Facility County Code` <dbl>,
## #   `Facility County` <chr>, `Regional Office ID` <dbl>,
## #   `Regional Office` <chr>, `Main Site Name` <chr>,
## #   `Main Site Facility ID` <dbl>, `Operating Certificate Number` <chr>,
## #   `Operator Name` <chr>, `Operator Address 1` <chr>, …

# create a csv from the nycNursingHome file

nycNursingHome %>% dplyr::filter(stringr::str_detect('Short Description', 'NH')) %>%
  sf::st_join(nyc_sf_merged_transformed, ., join= st_contains) %>%
  group_by(ZIPCODE) %>%
  summarise(NursingHomeNum = n()) %>% # how come i can't find this as a new column ?
  magrittr::extract('NursingHomeNum') %>%
  plot(breaks = "jenks", main="Number of Nursing Homes")

Tasks 4: Join the [1] = Census ACS population, race, and age data to the NYC Planning Census Tract Data = [2].

nycCensus <- sf::st_read('/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)

## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)

str(nycCensus)

## Classes 'sf' and 'data.frame':   2165 obs. of  12 variables:
##  $ boro_code : chr  "5" "1" "1" "1" ...
##  $ boro_ct201: chr  "5000900" "1009800" "1010000" "1010200" ...
##  $ boro_name : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ cdeligibil: chr  "E" "I" "I" "I" ...
##  $ ct2010    : chr  "000900" "009800" "010000" "010200" ...
##  $ ctlabel   : chr  "9" "98" "100" "102" ...
##  $ ntacode   : chr  "SI22" "MN19" "MN19" "MN17" ...
##  $ ntaname   : chr  "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
##  $ puma      : chr  "3903" "3808" "3808" "3807" ...
##  $ shape_area: num  2497010 1906016 1860938 1860993 1864600 ...
##  $ shape_leng: num  7729 5534 5692 5688 5693 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...

# But it does not use the standard County FIPS, but boro code
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='') # have to put back together columns to make the ACS data 
)
# created the tractFIPS from the cntyFIPS, ct2010,

unique(nycCensus)

## Simple feature collection with 2165 features and 13 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
## First 10 features:
##    boro_code boro_ct201     boro_name cdeligibil ct2010 ctlabel ntacode
## 1          5    5000900 Staten Island          E 000900       9    SI22
## 2          1    1009800     Manhattan          I 009800      98    MN19
## 3          1    1010000     Manhattan          I 010000     100    MN19
## 4          1    1010200     Manhattan          I 010200     102    MN17
## 5          1    1010400     Manhattan          I 010400     104    MN17
## 6          1    1011300     Manhattan          I 011300     113    MN17
## 7          1    1011402     Manhattan          I 011402  114.02    MN40
## 8          1    1013000     Manhattan          I 013000     130    MN40
## 9          1    1014000     Manhattan          I 014000     140    MN40
## 10         1    1014801     Manhattan          I 014801  148.01    MN40
##                                      ntaname puma shape_area shape_leng
## 1  West New Brighton-New Brighton-St. George 3903  2497009.7   7729.017
## 2                    Turtle Bay-East Midtown 3808  1906016.4   5534.200
## 3                    Turtle Bay-East Midtown 3808  1860938.4   5692.169
## 4                      Midtown-Midtown South 3807  1860992.7   5687.802
## 5                      Midtown-Midtown South 3807  1864600.4   5693.036
## 6                      Midtown-Midtown South 3807  1890907.3   5699.861
## 7              Upper East Side-Carnegie Hill 3805  1063547.4   4125.256
## 8              Upper East Side-Carnegie Hill 3805  1918144.5   5807.973
## 9              Upper East Side-Carnegie Hill 3805  1925984.2   5820.816
## 10             Upper East Side-Carnegie Hill 3805   559216.2   3135.951
##                          geometry cntyFIPS tractFIPS
## 1  MULTIPOLYGON (((-74.07921 4...      085 085000900
## 2  MULTIPOLYGON (((-73.96433 4...      061 061009800
## 3  MULTIPOLYGON (((-73.96802 4...      061 061010000
## 4  MULTIPOLYGON (((-73.97124 4...      061 061010200
## 5  MULTIPOLYGON (((-73.97446 4...      061 061010400
## 6  MULTIPOLYGON (((-73.98412 4...      061 061011300
## 7  MULTIPOLYGON (((-73.96476 4...      061 061011402
## 8  MULTIPOLYGON (((-73.96148 4...      061 061013000
## 9  MULTIPOLYGON (((-73.95495 4...      061 061014000
## 10 MULTIPOLYGON (((-73.95398 4...      061 061014801

head(nycCensus)

## Simple feature collection with 6 features and 13 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.08721 ymin: 40.63981 xmax: -73.96433 ymax: 40.76364
## Geodetic CRS:  WGS84(DD)
##   boro_code boro_ct201     boro_name cdeligibil ct2010 ctlabel ntacode
## 1         5    5000900 Staten Island          E 000900       9    SI22
## 2         1    1009800     Manhattan          I 009800      98    MN19
## 3         1    1010000     Manhattan          I 010000     100    MN19
## 4         1    1010200     Manhattan          I 010200     102    MN17
## 5         1    1010400     Manhattan          I 010400     104    MN17
## 6         1    1011300     Manhattan          I 011300     113    MN17
##                                     ntaname puma shape_area shape_leng
## 1 West New Brighton-New Brighton-St. George 3903    2497010   7729.017
## 2                   Turtle Bay-East Midtown 3808    1906016   5534.200
## 3                   Turtle Bay-East Midtown 3808    1860938   5692.169
## 4                     Midtown-Midtown South 3807    1860993   5687.802
## 5                     Midtown-Midtown South 3807    1864600   5693.036
## 6                     Midtown-Midtown South 3807    1890907   5699.861
##                         geometry cntyFIPS tractFIPS
## 1 MULTIPOLYGON (((-74.07921 4...      085 085000900
## 2 MULTIPOLYGON (((-73.96433 4...      061 061009800
## 3 MULTIPOLYGON (((-73.96802 4...      061 061010000
## 4 MULTIPOLYGON (((-73.97124 4...      061 061010200
## 5 MULTIPOLYGON (((-73.97446 4...      061 061010400
## 6 MULTIPOLYGON (((-73.98412 4...      061 061011300

# how do i know if tractFips from nycCensus and GEO_ID from acsData have the same number of characters?

# Check for NA values in nycCensus
anyNA_nycCensus <- any(is.na(nycCensus))
print(paste("Any NA in nycCensus: ", anyNA_nycCensus))

## [1] "Any NA in nycCensus:  TRUE"

# Count NA values in each column of nycCensus
na_count_nycCensus <- colSums(is.na(nycCensus))
print(na_count_nycCensus) # there is one in cdeligibil

##  boro_code boro_ct201  boro_name cdeligibil     ct2010    ctlabel    ntacode 
##          0          0          0          1          0          0          0 
##    ntaname       puma shape_area shape_leng   geometry   cntyFIPS  tractFIPS 
##          0          0          0          0          0          0          0

# Remove the row 
nycCensus <- nycCensus[!is.na(nycCensus$cdeligibil), ]

acsData <- readLines("/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>% 
 magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") 

# Then use mutate() to convert the columns to numeric
original_data <- acsData %>%
  mutate(
    totPop = as.numeric(DP05_0001E),
    elderlyPop = as.numeric(DP05_0024E),
    malePop = as.numeric(DP05_0002E),
    femalePop = as.numeric(DP05_0003E),
    whitePop = as.numeric(DP05_0037E),
    blackPop = as.numeric(DP05_0038E),
    asianPop = as.numeric(DP05_0067E),
    hispanicPop = as.numeric(DP05_0071E),
    adultPop = as.numeric(DP05_0021E),
    citizenAdult = as.numeric(DP05_0087E),
    censusCode = stringr::str_sub(GEO_ID, -9,-1)
  )
# the issue is that I want to only keep these columns that were mutated

names(original_data)

##   [1] "GEO_ID"       "NAME"         "DP05_0031PM"  "DP05_0032E"   "DP05_0032M"  
##   [6] "DP05_0032PE"  "DP05_0032PM"  "DP05_0033E"   "DP05_0033M"   "DP05_0033PE" 
##  [11] "DP05_0033PM"  "DP05_0034E"   "DP05_0034M"   "DP05_0034PE"  "DP05_0034PM" 
##  [16] "DP05_0035E"   "DP05_0035M"   "DP05_0035PE"  "DP05_0035PM"  "DP05_0036E"  
##  [21] "DP05_0036M"   "DP05_0036PE"  "DP05_0036PM"  "DP05_0037E"   "DP05_0037M"  
##  [26] "DP05_0037PE"  "DP05_0037PM"  "DP05_0038E"   "DP05_0038M"   "DP05_0038PE" 
##  [31] "DP05_0038PM"  "DP05_0039E"   "DP05_0039M"   "DP05_0039PE"  "DP05_0039PM" 
##  [36] "DP05_0040E"   "DP05_0040M"   "DP05_0040PE"  "DP05_0040PM"  "DP05_0041E"  
##  [41] "DP05_0041M"   "DP05_0041PE"  "DP05_0041PM"  "DP05_0042E"   "DP05_0042M"  
##  [46] "DP05_0042PE"  "DP05_0042PM"  "DP05_0043E"   "DP05_0043M"   "DP05_0043PE" 
##  [51] "DP05_0043PM"  "DP05_0044E"   "DP05_0044M"   "DP05_0044PE"  "DP05_0044PM" 
##  [56] "DP05_0045E"   "DP05_0045M"   "DP05_0045PE"  "DP05_0045PM"  "DP05_0046E"  
##  [61] "DP05_0046M"   "DP05_0046PE"  "DP05_0046PM"  "DP05_0047E"   "DP05_0047M"  
##  [66] "DP05_0047PE"  "DP05_0047PM"  "DP05_0048E"   "DP05_0048M"   "DP05_0048PE" 
##  [71] "DP05_0048PM"  "DP05_0049E"   "DP05_0049M"   "DP05_0049PE"  "DP05_0049PM" 
##  [76] "DP05_0050E"   "DP05_0050M"   "DP05_0050PE"  "DP05_0050PM"  "DP05_0051E"  
##  [81] "DP05_0051M"   "DP05_0051PE"  "DP05_0051PM"  "DP05_0052E"   "DP05_0052M"  
##  [86] "DP05_0052PE"  "DP05_0052PM"  "DP05_0053E"   "DP05_0053M"   "DP05_0053PE" 
##  [91] "DP05_0053PM"  "DP05_0054E"   "DP05_0054M"   "DP05_0054PE"  "DP05_0054PM" 
##  [96] "DP05_0055E"   "DP05_0055M"   "DP05_0055PE"  "DP05_0055PM"  "DP05_0056E"  
## [101] "DP05_0056M"   "DP05_0056PE"  "DP05_0056PM"  "DP05_0057E"   "DP05_0057M"  
## [106] "DP05_0057PE"  "DP05_0057PM"  "DP05_0058E"   "DP05_0058M"   "DP05_0058PE" 
## [111] "DP05_0058PM"  "DP05_0059E"   "DP05_0059M"   "DP05_0059PE"  "DP05_0059PM" 
## [116] "DP05_0060E"   "DP05_0060M"   "DP05_0060PE"  "DP05_0060PM"  "DP05_0061E"  
## [121] "DP05_0061M"   "DP05_0061PE"  "DP05_0061PM"  "DP05_0062E"   "DP05_0062M"  
## [126] "DP05_0062PE"  "DP05_0062PM"  "DP05_0063E"   "DP05_0063M"   "DP05_0063PE" 
## [131] "DP05_0063PM"  "DP05_0064E"   "DP05_0064M"   "DP05_0064PE"  "DP05_0064PM" 
## [136] "DP05_0065E"   "DP05_0065M"   "DP05_0065PE"  "DP05_0065PM"  "DP05_0066E"  
## [141] "DP05_0066M"   "DP05_0066PE"  "DP05_0066PM"  "DP05_0067E"   "DP05_0067M"  
## [146] "DP05_0067PE"  "DP05_0067PM"  "DP05_0068E"   "DP05_0068M"   "DP05_0068PE" 
## [151] "DP05_0068PM"  "DP05_0069E"   "DP05_0069M"   "DP05_0069PE"  "DP05_0069PM" 
## [156] "DP05_0070E"   "DP05_0070M"   "DP05_0070PE"  "DP05_0070PM"  "DP05_0071E"  
## [161] "DP05_0071M"   "DP05_0071PE"  "DP05_0071PM"  "DP05_0072E"   "DP05_0072M"  
## [166] "DP05_0072PE"  "DP05_0072PM"  "DP05_0073E"   "DP05_0073M"   "DP05_0073PE" 
## [171] "DP05_0073PM"  "DP05_0074E"   "DP05_0074M"   "DP05_0074PE"  "DP05_0074PM" 
## [176] "DP05_0075E"   "DP05_0075M"   "DP05_0075PE"  "DP05_0075PM"  "DP05_0076E"  
## [181] "DP05_0076M"   "DP05_0076PE"  "DP05_0076PM"  "DP05_0077E"   "DP05_0077M"  
## [186] "DP05_0077PE"  "DP05_0077PM"  "DP05_0078E"   "DP05_0078M"   "DP05_0078PE" 
## [191] "DP05_0078PM"  "DP05_0079E"   "DP05_0079M"   "DP05_0079PE"  "DP05_0079PM" 
## [196] "DP05_0080E"   "DP05_0080M"   "DP05_0080PE"  "DP05_0080PM"  "DP05_0081E"  
## [201] "DP05_0081M"   "DP05_0081PE"  "DP05_0081PM"  "DP05_0082E"   "DP05_0082M"  
## [206] "DP05_0082PE"  "DP05_0082PM"  "DP05_0083E"   "DP05_0083M"   "DP05_0083PE" 
## [211] "DP05_0083PM"  "DP05_0084E"   "DP05_0084M"   "DP05_0084PE"  "DP05_0084PM" 
## [216] "DP05_0085E"   "DP05_0085M"   "DP05_0085PE"  "DP05_0085PM"  "DP05_0086E"  
## [221] "DP05_0086M"   "DP05_0086PE"  "DP05_0086PM"  "DP05_0087E"   "DP05_0087M"  
## [226] "DP05_0087PE"  "DP05_0087PM"  "DP05_0088E"   "DP05_0088M"   "DP05_0088PE" 
## [231] "DP05_0088PM"  "DP05_0089E"   "DP05_0089M"   "DP05_0089PE"  "DP05_0089PM" 
## [236] "DP05_0001E"   "DP05_0001M"   "DP05_0001PE"  "DP05_0001PM"  "DP05_0002E"  
## [241] "DP05_0002M"   "DP05_0002PE"  "DP05_0002PM"  "DP05_0003E"   "DP05_0003M"  
## [246] "DP05_0003PE"  "DP05_0003PM"  "DP05_0004E"   "DP05_0004M"   "DP05_0004PE" 
## [251] "DP05_0004PM"  "DP05_0005E"   "DP05_0005M"   "DP05_0005PE"  "DP05_0005PM" 
## [256] "DP05_0006E"   "DP05_0006M"   "DP05_0006PE"  "DP05_0006PM"  "DP05_0007E"  
## [261] "DP05_0007M"   "DP05_0007PE"  "DP05_0007PM"  "DP05_0008E"   "DP05_0008M"  
## [266] "DP05_0008PE"  "DP05_0008PM"  "DP05_0009E"   "DP05_0009M"   "DP05_0009PE" 
## [271] "DP05_0009PM"  "DP05_0010E"   "DP05_0010M"   "DP05_0010PE"  "DP05_0010PM" 
## [276] "DP05_0011E"   "DP05_0011M"   "DP05_0011PE"  "DP05_0011PM"  "DP05_0012E"  
## [281] "DP05_0012M"   "DP05_0012PE"  "DP05_0012PM"  "DP05_0013E"   "DP05_0013M"  
## [286] "DP05_0013PE"  "DP05_0013PM"  "DP05_0014E"   "DP05_0014M"   "DP05_0014PE" 
## [291] "DP05_0014PM"  "DP05_0015E"   "DP05_0015M"   "DP05_0015PE"  "DP05_0015PM" 
## [296] "DP05_0016E"   "DP05_0016M"   "DP05_0016PE"  "DP05_0016PM"  "DP05_0017E"  
## [301] "DP05_0017M"   "DP05_0017PE"  "DP05_0017PM"  "DP05_0018E"   "DP05_0018M"  
## [306] "DP05_0018PE"  "DP05_0018PM"  "DP05_0019E"   "DP05_0019M"   "DP05_0019PE" 
## [311] "DP05_0019PM"  "DP05_0020E"   "DP05_0020M"   "DP05_0020PE"  "DP05_0020PM" 
## [316] "DP05_0021E"   "DP05_0021M"   "DP05_0021PE"  "DP05_0021PM"  "DP05_0022E"  
## [321] "DP05_0022M"   "DP05_0022PE"  "DP05_0022PM"  "DP05_0023E"   "DP05_0023M"  
## [326] "DP05_0023PE"  "DP05_0023PM"  "DP05_0024E"   "DP05_0024M"   "DP05_0024PE" 
## [331] "DP05_0024PM"  "DP05_0025E"   "DP05_0025M"   "DP05_0025PE"  "DP05_0025PM" 
## [336] "DP05_0026E"   "DP05_0026M"   "DP05_0026PE"  "DP05_0026PM"  "DP05_0027E"  
## [341] "DP05_0027M"   "DP05_0027PE"  "DP05_0027PM"  "DP05_0028E"   "DP05_0028M"  
## [346] "DP05_0028PE"  "DP05_0028PM"  "DP05_0029E"   "DP05_0029M"   "DP05_0029PE" 
## [351] "DP05_0029PM"  "DP05_0030E"   "DP05_0030M"   "DP05_0030PE"  "DP05_0030PM" 
## [356] "DP05_0031E"   "DP05_0031M"   "DP05_0031PE"  "totPop"       "elderlyPop"  
## [361] "malePop"      "femalePop"    "whitePop"     "blackPop"     "asianPop"    
## [366] "hispanicPop"  "adultPop"     "citizenAdult" "censusCode"

extracted <- original_data %>%
  magrittr::extract(1:10,) # remember that censuscode and tractFIPS are supposed to be the same
names(extracted)

##   [1] "GEO_ID"       "NAME"         "DP05_0031PM"  "DP05_0032E"   "DP05_0032M"  
##   [6] "DP05_0032PE"  "DP05_0032PM"  "DP05_0033E"   "DP05_0033M"   "DP05_0033PE" 
##  [11] "DP05_0033PM"  "DP05_0034E"   "DP05_0034M"   "DP05_0034PE"  "DP05_0034PM" 
##  [16] "DP05_0035E"   "DP05_0035M"   "DP05_0035PE"  "DP05_0035PM"  "DP05_0036E"  
##  [21] "DP05_0036M"   "DP05_0036PE"  "DP05_0036PM"  "DP05_0037E"   "DP05_0037M"  
##  [26] "DP05_0037PE"  "DP05_0037PM"  "DP05_0038E"   "DP05_0038M"   "DP05_0038PE" 
##  [31] "DP05_0038PM"  "DP05_0039E"   "DP05_0039M"   "DP05_0039PE"  "DP05_0039PM" 
##  [36] "DP05_0040E"   "DP05_0040M"   "DP05_0040PE"  "DP05_0040PM"  "DP05_0041E"  
##  [41] "DP05_0041M"   "DP05_0041PE"  "DP05_0041PM"  "DP05_0042E"   "DP05_0042M"  
##  [46] "DP05_0042PE"  "DP05_0042PM"  "DP05_0043E"   "DP05_0043M"   "DP05_0043PE" 
##  [51] "DP05_0043PM"  "DP05_0044E"   "DP05_0044M"   "DP05_0044PE"  "DP05_0044PM" 
##  [56] "DP05_0045E"   "DP05_0045M"   "DP05_0045PE"  "DP05_0045PM"  "DP05_0046E"  
##  [61] "DP05_0046M"   "DP05_0046PE"  "DP05_0046PM"  "DP05_0047E"   "DP05_0047M"  
##  [66] "DP05_0047PE"  "DP05_0047PM"  "DP05_0048E"   "DP05_0048M"   "DP05_0048PE" 
##  [71] "DP05_0048PM"  "DP05_0049E"   "DP05_0049M"   "DP05_0049PE"  "DP05_0049PM" 
##  [76] "DP05_0050E"   "DP05_0050M"   "DP05_0050PE"  "DP05_0050PM"  "DP05_0051E"  
##  [81] "DP05_0051M"   "DP05_0051PE"  "DP05_0051PM"  "DP05_0052E"   "DP05_0052M"  
##  [86] "DP05_0052PE"  "DP05_0052PM"  "DP05_0053E"   "DP05_0053M"   "DP05_0053PE" 
##  [91] "DP05_0053PM"  "DP05_0054E"   "DP05_0054M"   "DP05_0054PE"  "DP05_0054PM" 
##  [96] "DP05_0055E"   "DP05_0055M"   "DP05_0055PE"  "DP05_0055PM"  "DP05_0056E"  
## [101] "DP05_0056M"   "DP05_0056PE"  "DP05_0056PM"  "DP05_0057E"   "DP05_0057M"  
## [106] "DP05_0057PE"  "DP05_0057PM"  "DP05_0058E"   "DP05_0058M"   "DP05_0058PE" 
## [111] "DP05_0058PM"  "DP05_0059E"   "DP05_0059M"   "DP05_0059PE"  "DP05_0059PM" 
## [116] "DP05_0060E"   "DP05_0060M"   "DP05_0060PE"  "DP05_0060PM"  "DP05_0061E"  
## [121] "DP05_0061M"   "DP05_0061PE"  "DP05_0061PM"  "DP05_0062E"   "DP05_0062M"  
## [126] "DP05_0062PE"  "DP05_0062PM"  "DP05_0063E"   "DP05_0063M"   "DP05_0063PE" 
## [131] "DP05_0063PM"  "DP05_0064E"   "DP05_0064M"   "DP05_0064PE"  "DP05_0064PM" 
## [136] "DP05_0065E"   "DP05_0065M"   "DP05_0065PE"  "DP05_0065PM"  "DP05_0066E"  
## [141] "DP05_0066M"   "DP05_0066PE"  "DP05_0066PM"  "DP05_0067E"   "DP05_0067M"  
## [146] "DP05_0067PE"  "DP05_0067PM"  "DP05_0068E"   "DP05_0068M"   "DP05_0068PE" 
## [151] "DP05_0068PM"  "DP05_0069E"   "DP05_0069M"   "DP05_0069PE"  "DP05_0069PM" 
## [156] "DP05_0070E"   "DP05_0070M"   "DP05_0070PE"  "DP05_0070PM"  "DP05_0071E"  
## [161] "DP05_0071M"   "DP05_0071PE"  "DP05_0071PM"  "DP05_0072E"   "DP05_0072M"  
## [166] "DP05_0072PE"  "DP05_0072PM"  "DP05_0073E"   "DP05_0073M"   "DP05_0073PE" 
## [171] "DP05_0073PM"  "DP05_0074E"   "DP05_0074M"   "DP05_0074PE"  "DP05_0074PM" 
## [176] "DP05_0075E"   "DP05_0075M"   "DP05_0075PE"  "DP05_0075PM"  "DP05_0076E"  
## [181] "DP05_0076M"   "DP05_0076PE"  "DP05_0076PM"  "DP05_0077E"   "DP05_0077M"  
## [186] "DP05_0077PE"  "DP05_0077PM"  "DP05_0078E"   "DP05_0078M"   "DP05_0078PE" 
## [191] "DP05_0078PM"  "DP05_0079E"   "DP05_0079M"   "DP05_0079PE"  "DP05_0079PM" 
## [196] "DP05_0080E"   "DP05_0080M"   "DP05_0080PE"  "DP05_0080PM"  "DP05_0081E"  
## [201] "DP05_0081M"   "DP05_0081PE"  "DP05_0081PM"  "DP05_0082E"   "DP05_0082M"  
## [206] "DP05_0082PE"  "DP05_0082PM"  "DP05_0083E"   "DP05_0083M"   "DP05_0083PE" 
## [211] "DP05_0083PM"  "DP05_0084E"   "DP05_0084M"   "DP05_0084PE"  "DP05_0084PM" 
## [216] "DP05_0085E"   "DP05_0085M"   "DP05_0085PE"  "DP05_0085PM"  "DP05_0086E"  
## [221] "DP05_0086M"   "DP05_0086PE"  "DP05_0086PM"  "DP05_0087E"   "DP05_0087M"  
## [226] "DP05_0087PE"  "DP05_0087PM"  "DP05_0088E"   "DP05_0088M"   "DP05_0088PE" 
## [231] "DP05_0088PM"  "DP05_0089E"   "DP05_0089M"   "DP05_0089PE"  "DP05_0089PM" 
## [236] "DP05_0001E"   "DP05_0001M"   "DP05_0001PE"  "DP05_0001PM"  "DP05_0002E"  
## [241] "DP05_0002M"   "DP05_0002PE"  "DP05_0002PM"  "DP05_0003E"   "DP05_0003M"  
## [246] "DP05_0003PE"  "DP05_0003PM"  "DP05_0004E"   "DP05_0004M"   "DP05_0004PE" 
## [251] "DP05_0004PM"  "DP05_0005E"   "DP05_0005M"   "DP05_0005PE"  "DP05_0005PM" 
## [256] "DP05_0006E"   "DP05_0006M"   "DP05_0006PE"  "DP05_0006PM"  "DP05_0007E"  
## [261] "DP05_0007M"   "DP05_0007PE"  "DP05_0007PM"  "DP05_0008E"   "DP05_0008M"  
## [266] "DP05_0008PE"  "DP05_0008PM"  "DP05_0009E"   "DP05_0009M"   "DP05_0009PE" 
## [271] "DP05_0009PM"  "DP05_0010E"   "DP05_0010M"   "DP05_0010PE"  "DP05_0010PM" 
## [276] "DP05_0011E"   "DP05_0011M"   "DP05_0011PE"  "DP05_0011PM"  "DP05_0012E"  
## [281] "DP05_0012M"   "DP05_0012PE"  "DP05_0012PM"  "DP05_0013E"   "DP05_0013M"  
## [286] "DP05_0013PE"  "DP05_0013PM"  "DP05_0014E"   "DP05_0014M"   "DP05_0014PE" 
## [291] "DP05_0014PM"  "DP05_0015E"   "DP05_0015M"   "DP05_0015PE"  "DP05_0015PM" 
## [296] "DP05_0016E"   "DP05_0016M"   "DP05_0016PE"  "DP05_0016PM"  "DP05_0017E"  
## [301] "DP05_0017M"   "DP05_0017PE"  "DP05_0017PM"  "DP05_0018E"   "DP05_0018M"  
## [306] "DP05_0018PE"  "DP05_0018PM"  "DP05_0019E"   "DP05_0019M"   "DP05_0019PE" 
## [311] "DP05_0019PM"  "DP05_0020E"   "DP05_0020M"   "DP05_0020PE"  "DP05_0020PM" 
## [316] "DP05_0021E"   "DP05_0021M"   "DP05_0021PE"  "DP05_0021PM"  "DP05_0022E"  
## [321] "DP05_0022M"   "DP05_0022PE"  "DP05_0022PM"  "DP05_0023E"   "DP05_0023M"  
## [326] "DP05_0023PE"  "DP05_0023PM"  "DP05_0024E"   "DP05_0024M"   "DP05_0024PE" 
## [331] "DP05_0024PM"  "DP05_0025E"   "DP05_0025M"   "DP05_0025PE"  "DP05_0025PM" 
## [336] "DP05_0026E"   "DP05_0026M"   "DP05_0026PE"  "DP05_0026PM"  "DP05_0027E"  
## [341] "DP05_0027M"   "DP05_0027PE"  "DP05_0027PM"  "DP05_0028E"   "DP05_0028M"  
## [346] "DP05_0028PE"  "DP05_0028PM"  "DP05_0029E"   "DP05_0029M"   "DP05_0029PE" 
## [351] "DP05_0029PM"  "DP05_0030E"   "DP05_0030M"   "DP05_0030PE"  "DP05_0030PM" 
## [356] "DP05_0031E"   "DP05_0031M"   "DP05_0031PE"  "totPop"       "elderlyPop"  
## [361] "malePop"      "femalePop"    "whitePop"     "blackPop"     "asianPop"    
## [366] "hispanicPop"  "adultPop"     "citizenAdult" "censusCode"

# Merge (JOIN) ACS data to the census tracts
popData <- merge(nycCensus, original_data[, c("censusCode", "totPop", "elderlyPop", "malePop", "femalePop", "whitePop", "blackPop", "asianPop", "hispanicPop", "adultPop", "citizenAdult")], by.x ='tractFIPS', by.y = 'censusCode')

# verify the data
sum(popData$totPop)

## [1] 8443713

str(popData)

## Classes 'sf' and 'data.frame':   2164 obs. of  24 variables:
##  $ tractFIPS   : chr  "005000100" "005000200" "005000400" "005001600" ...
##  $ boro_code   : chr  "2" "2" "2" "2" ...
##  $ boro_ct201  : chr  "2000100" "2000200" "2000400" "2001600" ...
##  $ boro_name   : chr  "Bronx" "Bronx" "Bronx" "Bronx" ...
##  $ cdeligibil  : chr  "I" "I" "I" "E" ...
##  $ ct2010      : chr  "000100" "000200" "000400" "001600" ...
##  $ ctlabel     : chr  "1" "2" "4" "16" ...
##  $ ntacode     : chr  "BX98" "BX09" "BX09" "BX09" ...
##  $ ntaname     : chr  "Rikers Island" "Soundview-Castle Hill-Clason Point-Harding Park" "Soundview-Castle Hill-Clason Point-Harding Park" "Soundview-Castle Hill-Clason Point-Harding Park" ...
##  $ puma        : chr  "3710" "3709" "3709" "3709" ...
##  $ shape_area  : num  18163828 5006558 8561175 5221330 17961359 ...
##  $ shape_leng  : num  18898 15611 24725 9671 30000 ...
##  $ cntyFIPS    : chr  "005" "005" "005" "005" ...
##  $ totPop      : num  7080 4542 5634 5917 2765 ...
##  $ elderlyPop  : num  51 950 710 989 76 977 648 0 548 243 ...
##  $ malePop     : num  6503 2264 2807 2365 1363 ...
##  $ femalePop   : num  577 2278 2827 3552 1402 ...
##  $ whitePop    : num  1773 2165 2623 2406 585 ...
##  $ blackPop    : num  4239 1279 1699 2434 1041 ...
##  $ asianPop    : num  130 119 226 68 130 29 27 14 68 0 ...
##  $ hispanicPop : num  2329 3367 3873 3603 1413 ...
##  $ adultPop    : num  6909 3582 4507 4416 2008 ...
##  $ citizenAdult: num  6100 2952 4214 3851 1787 ...
##  $ geometry    :sfc_MULTIPOLYGON of length 2164; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:499, 1:2] -73.9 -73.9 -73.9 -73.9 -73.9 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:23] "tractFIPS" "boro_code" "boro_ct201" "boro_name" ...

st_crs(popData)

## Coordinate Reference System:
##   User input: WGS84(DD) 
##   wkt:
## GEOGCRS["WGS84(DD)",
##     DATUM["WGS84",
##         ELLIPSOID["WGS84",6378137,298.257223563,
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic longitude",east,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic latitude",north,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]]]

popNYC <- sf::st_transform(popData, st_crs(nyc_sf_merged_transformed)) # zpNYC is the JOINED zip code data from task 1.

Tasks 5:Aggregate the ACS census data to zip code area data.

# Now aggregate to the zip code level
covidPopZipNYC <- sf::st_join(nyc_sf_merged_transformed, 
                              popNYC %>% sf::st_centroid(), # this essentially converts census tracts to points
                              join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>% # use names(zpNYC) and names(popNYC) to see what we have
  summarise(totPop = sum(totPop),
            malePctg = sum(malePop)/totPop*100, # note the totPop is the newly calculated
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop))

## Warning: st_centroid assumes attributes are constant over geometries

## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION',
## 'COUNTY', 'COVID_CASE_COUNT'. You can override using the `.groups` argument.

covidPopZipNYC %>% head()

## Simple feature collection with 6 features and 12 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -74.0473 ymin: 40.68392 xmax: -73.97141 ymax: 40.75769
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 13
## # Groups:   ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [6]
##   ZIPCODE PO_NAME  POPUL…¹ COUNTY COVID…² TOTAL…³ totPop maleP…⁴ asian…⁵ black…⁶
##   <chr>   <chr>      <dbl> <chr>    <dbl>   <dbl>  <dbl>   <dbl>   <dbl>   <dbl>
## 1 10001   New York   22413 New Y…    1542   20158  19146    51.2    4837    1092
## 2 10002   New York   81305 New Y…    5902   48197  74310    48.4   32149    5969
## 3 10003   New York   55878 New Y…    2803   41076  53487    50.3    8027    3130
## 4 10004   New York    2187 New Y…     247    3599     NA    NA        NA      NA
## 5 10005   New York    8107 New Y…     413    6102   8809    42.8    1974     185
## 6 10006   New York    3011 New Y…     164    2441   4639    44.0    1195      52
## # … with 3 more variables: hispanicPop <dbl>, whitePop <dbl>,
## #   geometry <GEOMETRY [°]>, and abbreviated variable names ¹POPULATION,
## #   ²COVID_CASE_COUNT, ³TOTAL_COVID_TESTS, ⁴malePctg, ⁵asianPop, ⁶blackPop

covidPopZipNYC <- covidPopZipNYC %>% filter(!is.na(malePctg))

In the end, we should have the confirmed and tested cases of covid-19, numbers of specific types of food stores, numbers of specific types of health facilities, and population (total population, elderly, by race, etc.) at the zip code level. We should also have boroughs, names, etc. for each zip code area.