COVID_19_data <- readr::read_csv("/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv", lazy = FALSE)
## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(COVID_19_data)
## spc_tbl_ [177 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ MODIFIED_ZCTA : num [1:177] 10001 10002 10003 10004 10005 ...
## $ NEIGHBORHOOD_NAME: chr [1:177] "Chelsea/NoMad/West Chelsea" "Chinatown/Lower East Side" "East Village/Gramercy/Greenwich Village" "Financial District" ...
## $ BOROUGH_GROUP : chr [1:177] "Manhattan" "Manhattan" "Manhattan" "Manhattan" ...
## $ label : chr [1:177] "10001, 10118" "10002" "10003" "10004" ...
## $ lat : num [1:177] 40.8 40.7 40.7 40.7 40.7 ...
## $ lon : num [1:177] -74 -74 -74 -74 -74 ...
## $ COVID_CASE_COUNT : num [1:177] 1542 5902 2803 247 413 ...
## $ COVID_CASE_RATE : num [1:177] 5584 7836 5193 8311 4716 ...
## $ POP_DENOMINATOR : num [1:177] 27613 75323 53978 2972 8757 ...
## $ COVID_DEATH_COUNT: num [1:177] 35 264 48 2 0 1 4 118 37 62 ...
## $ COVID_DEATH_RATE : num [1:177] 126.8 350.5 88.9 67.3 0 ...
## $ PERCENT_POSITIVE : num [1:177] 7.86 12.63 6.93 6.92 6.72 ...
## $ TOTAL_COVID_TESTS: num [1:177] 20158 48197 41076 3599 6102 ...
## - attr(*, "spec")=
## .. cols(
## .. MODIFIED_ZCTA = col_double(),
## .. NEIGHBORHOOD_NAME = col_character(),
## .. BOROUGH_GROUP = col_character(),
## .. label = col_character(),
## .. lat = col_double(),
## .. lon = col_double(),
## .. COVID_CASE_COUNT = col_double(),
## .. COVID_CASE_RATE = col_double(),
## .. POP_DENOMINATOR = col_double(),
## .. COVID_DEATH_COUNT = col_double(),
## .. COVID_DEATH_RATE = col_double(),
## .. PERCENT_POSITIVE = col_double(),
## .. TOTAL_COVID_TESTS = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# head(COVID_19_data)
# according to 04.23.2021
NYC_zipcode_dat_sf <- st_read("/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source
## `/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
str(NYC_zipcode_dat_sf)
## Classes 'sf' and 'data.frame': 263 obs. of 13 variables:
## $ ZIPCODE : chr "11436" "11213" "11212" "11225" ...
## $ BLDGZIP : chr "0" "0" "0" "0" ...
## $ PO_NAME : chr "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
## $ POPULATION: num 18681 62426 83866 56527 72280 ...
## $ AREA : num 22699295 29631004 41972104 23698630 36868799 ...
## $ STATE : chr "NY" "NY" "NY" "NY" ...
## $ COUNTY : chr "Queens" "Kings" "Kings" "Kings" ...
## $ ST_FIPS : chr "36" "36" "36" "36" ...
## $ CTY_FIPS : chr "081" "047" "047" "047" ...
## $ URL : chr "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
## $ SHAPE_AREA: num 0 0 0 0 0 0 0 0 0 0 ...
## $ SHAPE_LEN : num 0 0 0 0 0 0 0 0 0 0 ...
## $ geometry :sfc_POLYGON of length 263; first list element: List of 1
## ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
# joining the files
nyc_sf_merged <- base::merge(NYC_zipcode_dat_sf, COVID_19_data, by.x = "ZIPCODE", by.y = "MODIFIED_ZCTA")
head(nyc_sf_merged)
## Simple feature collection with 6 features and 24 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 190102.9 xmax: 992172.8 ymax: 215324.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## ZIPCODE BLDGZIP PO_NAME POPULATION AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1 10001 0 New York 22413 17794941 NY New York 36 061
## 2 10002 0 New York 81305 26280129 NY New York 36 061
## 3 10003 0 New York 55878 15538376 NY New York 36 061
## 4 10004 0 New York 2187 670708 NY New York 36 061
## 5 10004 0 New York 2187 4002521 NY New York 36 061
## 6 10004 0 New York 2187 1202708 NY New York 36 061
## URL SHAPE_AREA SHAPE_LEN
## 1 http://www.usps.com/ 0 0
## 2 http://www.usps.com/ 0 0
## 3 http://www.usps.com/ 0 0
## 4 http://www.usps.com/ 0 0
## 5 http://www.usps.com/ 0 0
## 6 http://www.usps.com/ 0 0
## NEIGHBORHOOD_NAME BOROUGH_GROUP label lat
## 1 Chelsea/NoMad/West Chelsea Manhattan 10001, 10118 40.75069
## 2 Chinatown/Lower East Side Manhattan 10002 40.71578
## 3 East Village/Gramercy/Greenwich Village Manhattan 10003 40.73183
## 4 Financial District Manhattan 10004 40.70367
## 5 Financial District Manhattan 10004 40.70367
## 6 Financial District Manhattan 10004 40.70367
## lon COVID_CASE_COUNT COVID_CASE_RATE POP_DENOMINATOR COVID_DEATH_COUNT
## 1 -73.99714 1542 5584.31 27613.09 35
## 2 -73.98618 5902 7835.62 75322.71 264
## 3 -73.98916 2803 5192.87 53977.81 48
## 4 -74.01311 247 8310.58 2972.12 2
## 5 -74.01311 247 8310.58 2972.12 2
## 6 -74.01311 247 8310.58 2972.12 2
## COVID_DEATH_RATE PERCENT_POSITIVE TOTAL_COVID_TESTS
## 1 126.75 7.86 20158
## 2 350.49 12.63 48197
## 3 88.93 6.93 41076
## 4 67.29 6.92 3599
## 5 67.29 6.92 3599
## 6 67.29 6.92 3599
## geometry
## 1 POLYGON ((981958.6 213464.5...
## 2 POLYGON ((991339.9 207576.8...
## 3 POLYGON ((989830.5 207048.1...
## 4 POLYGON ((971218.4 190710.9...
## 5 POLYGON ((981197.4 196571.1...
## 6 POLYGON ((972696.8 193014.5...
# names(nyc_sf_merged) # removed the "MODIFIED_ZCTA" column
# read in the food store data
nycFoodStoreSF <- st_read("/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/nycFoodStore.shp")
## Reading layer `nycFoodStore' from data source
## `/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/nycFoodStore.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS: WGS 84
str(nycFoodStoreSF)
## Classes 'sf' and 'data.frame': 11300 obs. of 17 variables:
## $ ï__Cnty : chr "Bronx" "Bronx" "Bronx" "Bronx" ...
## $ Lcns_Nm : int 734149 606221 606228 723375 724807 712943 703060 609065 722972 609621 ...
## $ Oprtn_T : chr "Store" "Store" "Store" "Store" ...
## $ Estbl_T : chr "JAC" "JAC" "JAC" "JAC" ...
## $ Entty_N : chr "7 ELEVEN FOOD STORE #37933H" "1001 SAN MIGUEL FOOD CENTER INC" "1029 FOOD PLAZA INC" "1078 DELI GROCERY CORP" ...
## $ DBA_Nam : chr NA "1001 SAN MIGUEL FD CNTR" "1029 FOOD PLAZA" "1078 DELI GROCERY" ...
## $ Strt_Nmb: chr "500" "1001" "122" "1078" ...
## $ Stret_Nm: chr "BAYCHESTER AVE" "SHERIDAN AVE" "E 181ST ST" "EAST 165TH STREET" ...
## $ Add_L_2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Add_L_3 : int NA NA NA NA NA NA NA NA NA NA ...
## $ City : chr "BRONX" "BRONX" "BRONX" "BRONX" ...
## $ State : chr "NY" "NY" "NY" "NY" ...
## $ Zip_Cod : int 10475 10456 10453 10459 10456 10453 10467 10456 10456 10472 ...
## $ Sqr_Ftg : chr "0" "1,100" "2,000" "1,200" ...
## $ Locatin : chr "500 BAYCHESTER AVE\nBRONX, NY 10475\n(40.869156, -73.831875)" "1001 SHERIDAN AVE\nBRONX, NY 10456\n(40.829061, -73.919613)" "122 E 181ST ST\nBRONX, NY 10453\n(40.854755, -73.902853)" "1078 EAST 165TH STREET\nBRONX, NY 10459\n(40.825105, -73.890589)" ...
## $ Coords : chr "40.869156, -73.831875" "40.829061, -73.919613" "40.854755, -73.902853" "40.825105, -73.890589" ...
## $ geometry:sfc_POINT of length 11300; first list element: 'XY' num -73.8 40.9
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:16] "ï__Cnty" "Lcns_Nm" "Oprtn_T" "Estbl_T" ...
head(nycFoodStoreSF)
## Simple feature collection with 6 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -73.91961 ymin: 40.8251 xmax: -73.83188 ymax: 40.86916
## Geodetic CRS: WGS 84
## ï__Cnty Lcns_Nm Oprtn_T Estbl_T Entty_N
## 1 Bronx 734149 Store JAC 7 ELEVEN FOOD STORE #37933H
## 2 Bronx 606221 Store JAC 1001 SAN MIGUEL FOOD CENTER INC
## 3 Bronx 606228 Store JAC 1029 FOOD PLAZA INC
## 4 Bronx 723375 Store JAC 1078 DELI GROCERY CORP
## 5 Bronx 724807 Store JAC 1086 LUNA DELI GROCERY CORP
## 6 Bronx 712943 Store JAC 109 AJ DELI GROCERY CORP
## DBA_Nam Strt_Nmb Stret_Nm Add_L_2 Add_L_3 City
## 1 <NA> 500 BAYCHESTER AVE NA NA BRONX
## 2 1001 SAN MIGUEL FD CNTR 1001 SHERIDAN AVE NA NA BRONX
## 3 1029 FOOD PLAZA 122 E 181ST ST NA NA BRONX
## 4 1078 DELI GROCERY 1078 EAST 165TH STREET NA NA BRONX
## 5 1086 LUNA DELI GROCERY 1086 BOSTON ROAD NA NA BRONX
## 6 109 AJ DELI GROCERY 109 E TREMONT AVE NA NA BRONX
## State Zip_Cod Sqr_Ftg
## 1 NY 10475 0
## 2 NY 10456 1,100
## 3 NY 10453 2,000
## 4 NY 10459 1,200
## 5 NY 10456 1,500
## 6 NY 10453 2,400
## Locatin
## 1 500 BAYCHESTER AVE\nBRONX, NY 10475\n(40.869156, -73.831875)
## 2 1001 SHERIDAN AVE\nBRONX, NY 10456\n(40.829061, -73.919613)
## 3 122 E 181ST ST\nBRONX, NY 10453\n(40.854755, -73.902853)
## 4 1078 EAST 165TH STREET\nBRONX, NY 10459\n(40.825105, -73.890589)
## 5 1086 BOSTON ROAD\nBRONX, NY 10456\n(40.827096, -73.905123)
## 6 109 E TREMONT AVE\nBRONX, NY 10453\n(40.850537, -73.907137)
## Coords geometry
## 1 40.869156, -73.831875 POINT (-73.83187 40.86916)
## 2 40.829061, -73.919613 POINT (-73.91961 40.82906)
## 3 40.854755, -73.902853 POINT (-73.90285 40.85475)
## 4 40.825105, -73.890589 POINT (-73.89059 40.82511)
## 5 40.827096, -73.905123 POINT (-73.90512 40.8271)
## 6 40.850537, -73.907137 POINT (-73.90714 40.85054)
names(nycFoodStoreSF)
## [1] "ï__Cnty" "Lcns_Nm" "Oprtn_T" "Estbl_T" "Entty_N" "DBA_Nam"
## [7] "Strt_Nmb" "Stret_Nm" "Add_L_2" "Add_L_3" "City" "State"
## [13] "Zip_Cod" "Sqr_Ftg" "Locatin" "Coords" "geometry"
unique(nycFoodStoreSF$Estbl_T) # am i supposed to choose specific codes????
## [1] "JAC" "A" "JACD" "JACDK" "JAD" "JABCHK" "JACHK" "JABC"
## [9] "JAZ" "JABCK" "JACK" "JACDHK" "JABH" "JACH" "JACDE" "JABCH"
## [17] "JABCDH" "JABK" "JABHK" "JABCD" "JACG" "JACDH" "JADHK" "JKA"
## [25] "JADK" "JAB" "JAHK" "JABCDK" "JACZ" "JAK" "JADO" "JDA"
# Check the CRS of both datasets
crs_nyc_sf_merged <- st_crs(nyc_sf_merged)
print(crs_nyc_sf_merged) # NAD83 / New York Long Island
## Coordinate Reference System:
## User input: NAD83 / New York Long Island (ftUS)
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["Engineering survey, topographic mapping."],
## AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
crs_nycFoodStoreSF <- st_crs(nycFoodStoreSF)
print(crs_nycFoodStoreSF) # WGS 84
## Coordinate Reference System:
## User input: WGS 84
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["latitude",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["longitude",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]]
# set to the same CRS
nycFoodStoreSF_transformed <- nycFoodStoreSF %>% sf::st_transform(crs = 4326)
nyc_sf_merged_transformed <- nyc_sf_merged %>% sf::st_transform(crs = 4326)
# i wonder if there is a way to save this dataset with all the rows that have the A,J, or D
# what does the A, J, or D symbolize according to the metadata?
# A = store
# D = Food marehouse
# J = multiple operations
head(nycFoodStoreSF_transformed)
## Simple feature collection with 6 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -73.91961 ymin: 40.8251 xmax: -73.83188 ymax: 40.86916
## Geodetic CRS: WGS 84
## ï__Cnty Lcns_Nm Oprtn_T Estbl_T Entty_N
## 1 Bronx 734149 Store JAC 7 ELEVEN FOOD STORE #37933H
## 2 Bronx 606221 Store JAC 1001 SAN MIGUEL FOOD CENTER INC
## 3 Bronx 606228 Store JAC 1029 FOOD PLAZA INC
## 4 Bronx 723375 Store JAC 1078 DELI GROCERY CORP
## 5 Bronx 724807 Store JAC 1086 LUNA DELI GROCERY CORP
## 6 Bronx 712943 Store JAC 109 AJ DELI GROCERY CORP
## DBA_Nam Strt_Nmb Stret_Nm Add_L_2 Add_L_3 City
## 1 <NA> 500 BAYCHESTER AVE NA NA BRONX
## 2 1001 SAN MIGUEL FD CNTR 1001 SHERIDAN AVE NA NA BRONX
## 3 1029 FOOD PLAZA 122 E 181ST ST NA NA BRONX
## 4 1078 DELI GROCERY 1078 EAST 165TH STREET NA NA BRONX
## 5 1086 LUNA DELI GROCERY 1086 BOSTON ROAD NA NA BRONX
## 6 109 AJ DELI GROCERY 109 E TREMONT AVE NA NA BRONX
## State Zip_Cod Sqr_Ftg
## 1 NY 10475 0
## 2 NY 10456 1,100
## 3 NY 10453 2,000
## 4 NY 10459 1,200
## 5 NY 10456 1,500
## 6 NY 10453 2,400
## Locatin
## 1 500 BAYCHESTER AVE\nBRONX, NY 10475\n(40.869156, -73.831875)
## 2 1001 SHERIDAN AVE\nBRONX, NY 10456\n(40.829061, -73.919613)
## 3 122 E 181ST ST\nBRONX, NY 10453\n(40.854755, -73.902853)
## 4 1078 EAST 165TH STREET\nBRONX, NY 10459\n(40.825105, -73.890589)
## 5 1086 BOSTON ROAD\nBRONX, NY 10456\n(40.827096, -73.905123)
## 6 109 E TREMONT AVE\nBRONX, NY 10453\n(40.850537, -73.907137)
## Coords geometry
## 1 40.869156, -73.831875 POINT (-73.83187 40.86916)
## 2 40.829061, -73.919613 POINT (-73.91961 40.82906)
## 3 40.854755, -73.902853 POINT (-73.90285 40.85475)
## 4 40.825105, -73.890589 POINT (-73.89059 40.82511)
## 5 40.827096, -73.905123 POINT (-73.90512 40.8271)
## 6 40.850537, -73.907137 POINT (-73.90714 40.85054)
#Filter certain types of food stores and count the number of stores in each zip code area
# Can you explain what happens in each step of this pipeline?
nycFoodStoreSF_transformed %>% dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>% # looking at any row with a A,J, or D code
sf::st_join(nyc_sf_merged_transformed, ., join= st_contains) %>% # REPLACE WITH nyc_sf_merged
group_by(ZIPCODE) %>%
summarise(FoodStoreNum = n()) %>% # how come i can't find this as a new column ?
magrittr::extract('FoodStoreNum') %>%
plot(breaks = "jenks", main="Number of Food Stores")
# read in the new dataset for the NYC health facilities
nycHealthFacilities <- readr::read_csv("/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/NYS_Health_Facility.csv", lazy = FALSE)
## Rows: 3990 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (28): Facility Name, Short Description, Description, Facility Open Date,...
## dbl (8): Facility ID, Facility Phone Number, Facility Fax Number, Facility ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(nycHealthFacilities)
## spc_tbl_ [3,990 × 36] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Facility ID : num [1:3990] 204 620 654 1156 2589 ...
## $ Facility Name : chr [1:3990] "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "Central Park Rehabilitation and Nursing Center" "East Side Nursing Home" ...
## $ Short Description : chr [1:3990] "HSPC" "NH" "NH" "NH" ...
## $ Description : chr [1:3990] "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
## $ Facility Open Date : chr [1:3990] "06/01/1985" "02/01/1989" "02/01/1989" "08/01/1979" ...
## $ Facility Address 1 : chr [1:3990] "4102 Old Vestal Road" "2050 Tilden Avenue" "116 Martin Luther King East" "62 Prospect St" ...
## $ Facility Address 2 : chr [1:3990] NA NA NA NA ...
## $ Facility City : chr [1:3990] "Vestal" "New Hartford" "Syracuse" "Warsaw" ...
## $ Facility State : chr [1:3990] "New York" "New York" "New York" "New York" ...
## $ Facility Zip Code : chr [1:3990] "13850" "13413" "13205" "14569" ...
## $ Facility Phone Number : num [1:3990] 6.08e+09 3.16e+09 3.15e+09 5.86e+09 5.86e+09 ...
## $ Facility Fax Number : num [1:3990] NA NA NA NA NA ...
## $ Facility Website : chr [1:3990] NA NA NA NA ...
## $ Facility County Code : num [1:3990] 3 32 33 60 2 ...
## $ Facility County : chr [1:3990] "Broome" "Oneida" "Onondaga" "Wyoming" ...
## $ Regional Office ID : num [1:3990] 3 3 3 1 1 1 7 1 7 5 ...
## $ Regional Office : chr [1:3990] "Central New York Regional Office" "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" ...
## $ Main Site Name : chr [1:3990] NA NA NA NA ...
## $ Main Site Facility ID : num [1:3990] NA NA NA NA NA ...
## $ Operating Certificate Number: chr [1:3990] "0301501F" "3227304N" "3301326N" "6027303N" ...
## $ Operator Name : chr [1:3990] "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "CPRNC, LLC" "East Side Nursing Home Inc" ...
## $ Operator Address 1 : chr [1:3990] "169 Riverside Drive" "Box 1000 Tilden Avenue" "116 Martin Luther King East" "62 Prospect Street" ...
## $ Operator Address 2 : chr [1:3990] NA NA NA NA ...
## $ Operator City : chr [1:3990] "Binghamton" "New Hartford" "Syracuse" "Warsaw" ...
## $ Operator State : chr [1:3990] "New York" "New York" "New York" "New York" ...
## $ Operator Zip Code : chr [1:3990] "13905" "13413" "13205" "14569" ...
## $ Cooperator Name : chr [1:3990] NA NA NA NA ...
## $ Cooperator Address : chr [1:3990] NA NA NA NA ...
## $ Cooperator Address 2 : chr [1:3990] NA NA NA NA ...
## $ Cooperator City : chr [1:3990] NA NA NA NA ...
## $ Cooperator State : chr [1:3990] "New York" "New York" "New York" "New York" ...
## $ Cooperator Zip Code : chr [1:3990] NA NA NA NA ...
## $ Ownership Type : chr [1:3990] "Not for Profit Corporation" "Not for Profit Corporation" "LLC" "Business Corporation" ...
## $ Facility Latitude : num [1:3990] 42.1 43.1 NA 42.7 42.1 ...
## $ Facility Longitude : num [1:3990] -76 -75.2 NA -78.1 -78 ...
## $ Facility Location : chr [1:3990] "(42.097095, -75.975243)" "(43.05497, -75.228828)" NA "(42.738979, -78.12867)" ...
## - attr(*, "spec")=
## .. cols(
## .. `Facility ID` = col_double(),
## .. `Facility Name` = col_character(),
## .. `Short Description` = col_character(),
## .. Description = col_character(),
## .. `Facility Open Date` = col_character(),
## .. `Facility Address 1` = col_character(),
## .. `Facility Address 2` = col_character(),
## .. `Facility City` = col_character(),
## .. `Facility State` = col_character(),
## .. `Facility Zip Code` = col_character(),
## .. `Facility Phone Number` = col_double(),
## .. `Facility Fax Number` = col_double(),
## .. `Facility Website` = col_character(),
## .. `Facility County Code` = col_double(),
## .. `Facility County` = col_character(),
## .. `Regional Office ID` = col_double(),
## .. `Regional Office` = col_character(),
## .. `Main Site Name` = col_character(),
## .. `Main Site Facility ID` = col_double(),
## .. `Operating Certificate Number` = col_character(),
## .. `Operator Name` = col_character(),
## .. `Operator Address 1` = col_character(),
## .. `Operator Address 2` = col_character(),
## .. `Operator City` = col_character(),
## .. `Operator State` = col_character(),
## .. `Operator Zip Code` = col_character(),
## .. `Cooperator Name` = col_character(),
## .. `Cooperator Address` = col_character(),
## .. `Cooperator Address 2` = col_character(),
## .. `Cooperator City` = col_character(),
## .. `Cooperator State` = col_character(),
## .. `Cooperator Zip Code` = col_character(),
## .. `Ownership Type` = col_character(),
## .. `Facility Latitude` = col_double(),
## .. `Facility Longitude` = col_double(),
## .. `Facility Location` = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
# check the name of columns before joining
# names(nycHealthFacilities)
# names(nyc_sf_merged)
# Remove all rows that have a nan in the "Facility Longitude" and "Facility Latitude" columns
new_df <- nycHealthFacilities %>%
dplyr::filter(!is.na(`Facility Longitude`) & !is.nan(`Facility Longitude`) & !is.na(`Facility Latitude`) & !is.nan(`Facility Latitude`))
head(new_df)
## # A tibble: 6 × 36
## `Facility ID` Facili…¹ Short…² Descr…³ Facil…⁴ Facil…⁵ Facil…⁶ Facil…⁷ Facil…⁸
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 204 Hospice… HSPC Hospice 06/01/… 4102 O… <NA> Vestal New Yo…
## 2 620 Charles… NH Reside… 02/01/… 2050 T… <NA> New Ha… New Yo…
## 3 1156 East Si… NH Reside… 08/01/… 62 Pro… <NA> Warsaw New Yo…
## 4 2589 Wellsvi… NH Reside… 02/01/… 4192A … <NA> Wellsv… New Yo…
## 5 3455 Harris … NH Reside… 04/08/… 2699 W… <NA> Willia… New Yo…
## 6 3853 Garden … DTC Diagno… 04/07/… 400 En… <NA> Garden… New Yo…
## # … with 27 more variables: `Facility Zip Code` <chr>,
## # `Facility Phone Number` <dbl>, `Facility Fax Number` <dbl>,
## # `Facility Website` <chr>, `Facility County Code` <dbl>,
## # `Facility County` <chr>, `Regional Office ID` <dbl>,
## # `Regional Office` <chr>, `Main Site Name` <chr>,
## # `Main Site Facility ID` <dbl>, `Operating Certificate Number` <chr>,
## # `Operator Name` <chr>, `Operator Address 1` <chr>, …
# Convert the dataframe into a sf file
nycHealthFacilities_sf <- new_df %>%
sf::st_as_sf(coords = c("Facility Longitude", "Facility Latitude"), crs = 4326)
# Filter NH: Nursing homes
nycNursingHome <- nycHealthFacilities_sf %>%
dplyr::filter(`Short Description` == 'NH')
unique(nycHealthFacilities_sf$Description)
## [1] "Hospice"
## [2] "Residential Health Care Facility - SNF"
## [3] "Diagnostic and Treatment Center"
## [4] "Certified Home Health Agency"
## [5] "Hospital Extension Clinic"
## [6] "School Based Diagnostic and Treatment Center Extension Clinic"
## [7] "Diagnostic and Treatment Center Extension Clinic"
## [8] "School Based Hospital Extension Clinic"
## [9] "Hospital"
## [10] "Primary Care Hospital - Critical Access Hospital Extension Clinic"
## [11] "Adult Day Health Care Program - Offsite"
## [12] "Mobile Diagnostic and Treatment Center Extension Clinic"
## [13] "Long Term Home Health Care Program"
## [14] "Mobile Hospital Extension Clinic"
## [15] "Primary Care Hospital - Critical Access Hospital"
## [16] "School Based Primary Care Hospital - Critical Access Extension Clinic"
head(nycNursingHome)
## Simple feature collection with 6 features and 34 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -78.68856 ymin: 40.88436 xmax: -73.88845 ymax: 43.05497
## Geodetic CRS: WGS 84
## # A tibble: 6 × 35
## `Facility ID` Facili…¹ Short…² Descr…³ Facil…⁴ Facil…⁵ Facil…⁶ Facil…⁷ Facil…⁸
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 620 Charles… NH Reside… 02/01/… 2050 T… <NA> New Ha… New Yo…
## 2 1156 East Si… NH Reside… 08/01/… 62 Pro… <NA> Warsaw New Yo…
## 3 2589 Wellsvi… NH Reside… 02/01/… 4192A … <NA> Wellsv… New Yo…
## 4 3455 Harris … NH Reside… 04/08/… 2699 W… <NA> Willia… New Yo…
## 5 1217 St Patr… NH Reside… 01/01/… 66 Van… <NA> Bronx New Yo…
## 6 124 Elcor N… NH Reside… 02/01/… 48 Col… <NA> Horseh… New Yo…
## # … with 26 more variables: `Facility Zip Code` <chr>,
## # `Facility Phone Number` <dbl>, `Facility Fax Number` <dbl>,
## # `Facility Website` <chr>, `Facility County Code` <dbl>,
## # `Facility County` <chr>, `Regional Office ID` <dbl>,
## # `Regional Office` <chr>, `Main Site Name` <chr>,
## # `Main Site Facility ID` <dbl>, `Operating Certificate Number` <chr>,
## # `Operator Name` <chr>, `Operator Address 1` <chr>, …
# create a csv from the nycNursingHome file
nycNursingHome %>% dplyr::filter(stringr::str_detect('Short Description', 'NH')) %>%
sf::st_join(nyc_sf_merged_transformed, ., join= st_contains) %>%
group_by(ZIPCODE) %>%
summarise(NursingHomeNum = n()) %>% # how come i can't find this as a new column ?
magrittr::extract('NursingHomeNum') %>%
plot(breaks = "jenks", main="Number of Nursing Homes")
nycCensus <- sf::st_read('/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
str(nycCensus)
## Classes 'sf' and 'data.frame': 2165 obs. of 12 variables:
## $ boro_code : chr "5" "1" "1" "1" ...
## $ boro_ct201: chr "5000900" "1009800" "1010000" "1010200" ...
## $ boro_name : chr "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
## $ cdeligibil: chr "E" "I" "I" "I" ...
## $ ct2010 : chr "000900" "009800" "010000" "010200" ...
## $ ctlabel : chr "9" "98" "100" "102" ...
## $ ntacode : chr "SI22" "MN19" "MN19" "MN17" ...
## $ ntaname : chr "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
## $ puma : chr "3903" "3808" "3808" "3807" ...
## $ shape_area: num 2497010 1906016 1860938 1860993 1864600 ...
## $ shape_leng: num 7729 5534 5692 5688 5693 ...
## $ geometry :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
# But it does not use the standard County FIPS, but boro code
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='') # have to put back together columns to make the ACS data
)
# created the tractFIPS from the cntyFIPS, ct2010,
unique(nycCensus)
## Simple feature collection with 2165 features and 13 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
## First 10 features:
## boro_code boro_ct201 boro_name cdeligibil ct2010 ctlabel ntacode
## 1 5 5000900 Staten Island E 000900 9 SI22
## 2 1 1009800 Manhattan I 009800 98 MN19
## 3 1 1010000 Manhattan I 010000 100 MN19
## 4 1 1010200 Manhattan I 010200 102 MN17
## 5 1 1010400 Manhattan I 010400 104 MN17
## 6 1 1011300 Manhattan I 011300 113 MN17
## 7 1 1011402 Manhattan I 011402 114.02 MN40
## 8 1 1013000 Manhattan I 013000 130 MN40
## 9 1 1014000 Manhattan I 014000 140 MN40
## 10 1 1014801 Manhattan I 014801 148.01 MN40
## ntaname puma shape_area shape_leng
## 1 West New Brighton-New Brighton-St. George 3903 2497009.7 7729.017
## 2 Turtle Bay-East Midtown 3808 1906016.4 5534.200
## 3 Turtle Bay-East Midtown 3808 1860938.4 5692.169
## 4 Midtown-Midtown South 3807 1860992.7 5687.802
## 5 Midtown-Midtown South 3807 1864600.4 5693.036
## 6 Midtown-Midtown South 3807 1890907.3 5699.861
## 7 Upper East Side-Carnegie Hill 3805 1063547.4 4125.256
## 8 Upper East Side-Carnegie Hill 3805 1918144.5 5807.973
## 9 Upper East Side-Carnegie Hill 3805 1925984.2 5820.816
## 10 Upper East Side-Carnegie Hill 3805 559216.2 3135.951
## geometry cntyFIPS tractFIPS
## 1 MULTIPOLYGON (((-74.07921 4... 085 085000900
## 2 MULTIPOLYGON (((-73.96433 4... 061 061009800
## 3 MULTIPOLYGON (((-73.96802 4... 061 061010000
## 4 MULTIPOLYGON (((-73.97124 4... 061 061010200
## 5 MULTIPOLYGON (((-73.97446 4... 061 061010400
## 6 MULTIPOLYGON (((-73.98412 4... 061 061011300
## 7 MULTIPOLYGON (((-73.96476 4... 061 061011402
## 8 MULTIPOLYGON (((-73.96148 4... 061 061013000
## 9 MULTIPOLYGON (((-73.95495 4... 061 061014000
## 10 MULTIPOLYGON (((-73.95398 4... 061 061014801
head(nycCensus)
## Simple feature collection with 6 features and 13 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.08721 ymin: 40.63981 xmax: -73.96433 ymax: 40.76364
## Geodetic CRS: WGS84(DD)
## boro_code boro_ct201 boro_name cdeligibil ct2010 ctlabel ntacode
## 1 5 5000900 Staten Island E 000900 9 SI22
## 2 1 1009800 Manhattan I 009800 98 MN19
## 3 1 1010000 Manhattan I 010000 100 MN19
## 4 1 1010200 Manhattan I 010200 102 MN17
## 5 1 1010400 Manhattan I 010400 104 MN17
## 6 1 1011300 Manhattan I 011300 113 MN17
## ntaname puma shape_area shape_leng
## 1 West New Brighton-New Brighton-St. George 3903 2497010 7729.017
## 2 Turtle Bay-East Midtown 3808 1906016 5534.200
## 3 Turtle Bay-East Midtown 3808 1860938 5692.169
## 4 Midtown-Midtown South 3807 1860993 5687.802
## 5 Midtown-Midtown South 3807 1864600 5693.036
## 6 Midtown-Midtown South 3807 1890907 5699.861
## geometry cntyFIPS tractFIPS
## 1 MULTIPOLYGON (((-74.07921 4... 085 085000900
## 2 MULTIPOLYGON (((-73.96433 4... 061 061009800
## 3 MULTIPOLYGON (((-73.96802 4... 061 061010000
## 4 MULTIPOLYGON (((-73.97124 4... 061 061010200
## 5 MULTIPOLYGON (((-73.97446 4... 061 061010400
## 6 MULTIPOLYGON (((-73.98412 4... 061 061011300
# how do i know if tractFips from nycCensus and GEO_ID from acsData have the same number of characters?
# Check for NA values in nycCensus
anyNA_nycCensus <- any(is.na(nycCensus))
print(paste("Any NA in nycCensus: ", anyNA_nycCensus))
## [1] "Any NA in nycCensus: TRUE"
# Count NA values in each column of nycCensus
na_count_nycCensus <- colSums(is.na(nycCensus))
print(na_count_nycCensus) # there is one in cdeligibil
## boro_code boro_ct201 boro_name cdeligibil ct2010 ctlabel ntacode
## 0 0 0 1 0 0 0
## ntaname puma shape_area shape_leng geometry cntyFIPS tractFIPS
## 0 0 0 0 0 0 0
# Remove the row
nycCensus <- nycCensus[!is.na(nycCensus$cdeligibil), ]
acsData <- readLines("/Users/timothymedina/Documents/GTECH38520_RStudio/R-spatial/Session_11/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"")
# Then use mutate() to convert the columns to numeric
original_data <- acsData %>%
mutate(
totPop = as.numeric(DP05_0001E),
elderlyPop = as.numeric(DP05_0024E),
malePop = as.numeric(DP05_0002E),
femalePop = as.numeric(DP05_0003E),
whitePop = as.numeric(DP05_0037E),
blackPop = as.numeric(DP05_0038E),
asianPop = as.numeric(DP05_0067E),
hispanicPop = as.numeric(DP05_0071E),
adultPop = as.numeric(DP05_0021E),
citizenAdult = as.numeric(DP05_0087E),
censusCode = stringr::str_sub(GEO_ID, -9,-1)
)
# the issue is that I want to only keep these columns that were mutated
names(original_data)
## [1] "GEO_ID" "NAME" "DP05_0031PM" "DP05_0032E" "DP05_0032M"
## [6] "DP05_0032PE" "DP05_0032PM" "DP05_0033E" "DP05_0033M" "DP05_0033PE"
## [11] "DP05_0033PM" "DP05_0034E" "DP05_0034M" "DP05_0034PE" "DP05_0034PM"
## [16] "DP05_0035E" "DP05_0035M" "DP05_0035PE" "DP05_0035PM" "DP05_0036E"
## [21] "DP05_0036M" "DP05_0036PE" "DP05_0036PM" "DP05_0037E" "DP05_0037M"
## [26] "DP05_0037PE" "DP05_0037PM" "DP05_0038E" "DP05_0038M" "DP05_0038PE"
## [31] "DP05_0038PM" "DP05_0039E" "DP05_0039M" "DP05_0039PE" "DP05_0039PM"
## [36] "DP05_0040E" "DP05_0040M" "DP05_0040PE" "DP05_0040PM" "DP05_0041E"
## [41] "DP05_0041M" "DP05_0041PE" "DP05_0041PM" "DP05_0042E" "DP05_0042M"
## [46] "DP05_0042PE" "DP05_0042PM" "DP05_0043E" "DP05_0043M" "DP05_0043PE"
## [51] "DP05_0043PM" "DP05_0044E" "DP05_0044M" "DP05_0044PE" "DP05_0044PM"
## [56] "DP05_0045E" "DP05_0045M" "DP05_0045PE" "DP05_0045PM" "DP05_0046E"
## [61] "DP05_0046M" "DP05_0046PE" "DP05_0046PM" "DP05_0047E" "DP05_0047M"
## [66] "DP05_0047PE" "DP05_0047PM" "DP05_0048E" "DP05_0048M" "DP05_0048PE"
## [71] "DP05_0048PM" "DP05_0049E" "DP05_0049M" "DP05_0049PE" "DP05_0049PM"
## [76] "DP05_0050E" "DP05_0050M" "DP05_0050PE" "DP05_0050PM" "DP05_0051E"
## [81] "DP05_0051M" "DP05_0051PE" "DP05_0051PM" "DP05_0052E" "DP05_0052M"
## [86] "DP05_0052PE" "DP05_0052PM" "DP05_0053E" "DP05_0053M" "DP05_0053PE"
## [91] "DP05_0053PM" "DP05_0054E" "DP05_0054M" "DP05_0054PE" "DP05_0054PM"
## [96] "DP05_0055E" "DP05_0055M" "DP05_0055PE" "DP05_0055PM" "DP05_0056E"
## [101] "DP05_0056M" "DP05_0056PE" "DP05_0056PM" "DP05_0057E" "DP05_0057M"
## [106] "DP05_0057PE" "DP05_0057PM" "DP05_0058E" "DP05_0058M" "DP05_0058PE"
## [111] "DP05_0058PM" "DP05_0059E" "DP05_0059M" "DP05_0059PE" "DP05_0059PM"
## [116] "DP05_0060E" "DP05_0060M" "DP05_0060PE" "DP05_0060PM" "DP05_0061E"
## [121] "DP05_0061M" "DP05_0061PE" "DP05_0061PM" "DP05_0062E" "DP05_0062M"
## [126] "DP05_0062PE" "DP05_0062PM" "DP05_0063E" "DP05_0063M" "DP05_0063PE"
## [131] "DP05_0063PM" "DP05_0064E" "DP05_0064M" "DP05_0064PE" "DP05_0064PM"
## [136] "DP05_0065E" "DP05_0065M" "DP05_0065PE" "DP05_0065PM" "DP05_0066E"
## [141] "DP05_0066M" "DP05_0066PE" "DP05_0066PM" "DP05_0067E" "DP05_0067M"
## [146] "DP05_0067PE" "DP05_0067PM" "DP05_0068E" "DP05_0068M" "DP05_0068PE"
## [151] "DP05_0068PM" "DP05_0069E" "DP05_0069M" "DP05_0069PE" "DP05_0069PM"
## [156] "DP05_0070E" "DP05_0070M" "DP05_0070PE" "DP05_0070PM" "DP05_0071E"
## [161] "DP05_0071M" "DP05_0071PE" "DP05_0071PM" "DP05_0072E" "DP05_0072M"
## [166] "DP05_0072PE" "DP05_0072PM" "DP05_0073E" "DP05_0073M" "DP05_0073PE"
## [171] "DP05_0073PM" "DP05_0074E" "DP05_0074M" "DP05_0074PE" "DP05_0074PM"
## [176] "DP05_0075E" "DP05_0075M" "DP05_0075PE" "DP05_0075PM" "DP05_0076E"
## [181] "DP05_0076M" "DP05_0076PE" "DP05_0076PM" "DP05_0077E" "DP05_0077M"
## [186] "DP05_0077PE" "DP05_0077PM" "DP05_0078E" "DP05_0078M" "DP05_0078PE"
## [191] "DP05_0078PM" "DP05_0079E" "DP05_0079M" "DP05_0079PE" "DP05_0079PM"
## [196] "DP05_0080E" "DP05_0080M" "DP05_0080PE" "DP05_0080PM" "DP05_0081E"
## [201] "DP05_0081M" "DP05_0081PE" "DP05_0081PM" "DP05_0082E" "DP05_0082M"
## [206] "DP05_0082PE" "DP05_0082PM" "DP05_0083E" "DP05_0083M" "DP05_0083PE"
## [211] "DP05_0083PM" "DP05_0084E" "DP05_0084M" "DP05_0084PE" "DP05_0084PM"
## [216] "DP05_0085E" "DP05_0085M" "DP05_0085PE" "DP05_0085PM" "DP05_0086E"
## [221] "DP05_0086M" "DP05_0086PE" "DP05_0086PM" "DP05_0087E" "DP05_0087M"
## [226] "DP05_0087PE" "DP05_0087PM" "DP05_0088E" "DP05_0088M" "DP05_0088PE"
## [231] "DP05_0088PM" "DP05_0089E" "DP05_0089M" "DP05_0089PE" "DP05_0089PM"
## [236] "DP05_0001E" "DP05_0001M" "DP05_0001PE" "DP05_0001PM" "DP05_0002E"
## [241] "DP05_0002M" "DP05_0002PE" "DP05_0002PM" "DP05_0003E" "DP05_0003M"
## [246] "DP05_0003PE" "DP05_0003PM" "DP05_0004E" "DP05_0004M" "DP05_0004PE"
## [251] "DP05_0004PM" "DP05_0005E" "DP05_0005M" "DP05_0005PE" "DP05_0005PM"
## [256] "DP05_0006E" "DP05_0006M" "DP05_0006PE" "DP05_0006PM" "DP05_0007E"
## [261] "DP05_0007M" "DP05_0007PE" "DP05_0007PM" "DP05_0008E" "DP05_0008M"
## [266] "DP05_0008PE" "DP05_0008PM" "DP05_0009E" "DP05_0009M" "DP05_0009PE"
## [271] "DP05_0009PM" "DP05_0010E" "DP05_0010M" "DP05_0010PE" "DP05_0010PM"
## [276] "DP05_0011E" "DP05_0011M" "DP05_0011PE" "DP05_0011PM" "DP05_0012E"
## [281] "DP05_0012M" "DP05_0012PE" "DP05_0012PM" "DP05_0013E" "DP05_0013M"
## [286] "DP05_0013PE" "DP05_0013PM" "DP05_0014E" "DP05_0014M" "DP05_0014PE"
## [291] "DP05_0014PM" "DP05_0015E" "DP05_0015M" "DP05_0015PE" "DP05_0015PM"
## [296] "DP05_0016E" "DP05_0016M" "DP05_0016PE" "DP05_0016PM" "DP05_0017E"
## [301] "DP05_0017M" "DP05_0017PE" "DP05_0017PM" "DP05_0018E" "DP05_0018M"
## [306] "DP05_0018PE" "DP05_0018PM" "DP05_0019E" "DP05_0019M" "DP05_0019PE"
## [311] "DP05_0019PM" "DP05_0020E" "DP05_0020M" "DP05_0020PE" "DP05_0020PM"
## [316] "DP05_0021E" "DP05_0021M" "DP05_0021PE" "DP05_0021PM" "DP05_0022E"
## [321] "DP05_0022M" "DP05_0022PE" "DP05_0022PM" "DP05_0023E" "DP05_0023M"
## [326] "DP05_0023PE" "DP05_0023PM" "DP05_0024E" "DP05_0024M" "DP05_0024PE"
## [331] "DP05_0024PM" "DP05_0025E" "DP05_0025M" "DP05_0025PE" "DP05_0025PM"
## [336] "DP05_0026E" "DP05_0026M" "DP05_0026PE" "DP05_0026PM" "DP05_0027E"
## [341] "DP05_0027M" "DP05_0027PE" "DP05_0027PM" "DP05_0028E" "DP05_0028M"
## [346] "DP05_0028PE" "DP05_0028PM" "DP05_0029E" "DP05_0029M" "DP05_0029PE"
## [351] "DP05_0029PM" "DP05_0030E" "DP05_0030M" "DP05_0030PE" "DP05_0030PM"
## [356] "DP05_0031E" "DP05_0031M" "DP05_0031PE" "totPop" "elderlyPop"
## [361] "malePop" "femalePop" "whitePop" "blackPop" "asianPop"
## [366] "hispanicPop" "adultPop" "citizenAdult" "censusCode"
extracted <- original_data %>%
magrittr::extract(1:10,) # remember that censuscode and tractFIPS are supposed to be the same
names(extracted)
## [1] "GEO_ID" "NAME" "DP05_0031PM" "DP05_0032E" "DP05_0032M"
## [6] "DP05_0032PE" "DP05_0032PM" "DP05_0033E" "DP05_0033M" "DP05_0033PE"
## [11] "DP05_0033PM" "DP05_0034E" "DP05_0034M" "DP05_0034PE" "DP05_0034PM"
## [16] "DP05_0035E" "DP05_0035M" "DP05_0035PE" "DP05_0035PM" "DP05_0036E"
## [21] "DP05_0036M" "DP05_0036PE" "DP05_0036PM" "DP05_0037E" "DP05_0037M"
## [26] "DP05_0037PE" "DP05_0037PM" "DP05_0038E" "DP05_0038M" "DP05_0038PE"
## [31] "DP05_0038PM" "DP05_0039E" "DP05_0039M" "DP05_0039PE" "DP05_0039PM"
## [36] "DP05_0040E" "DP05_0040M" "DP05_0040PE" "DP05_0040PM" "DP05_0041E"
## [41] "DP05_0041M" "DP05_0041PE" "DP05_0041PM" "DP05_0042E" "DP05_0042M"
## [46] "DP05_0042PE" "DP05_0042PM" "DP05_0043E" "DP05_0043M" "DP05_0043PE"
## [51] "DP05_0043PM" "DP05_0044E" "DP05_0044M" "DP05_0044PE" "DP05_0044PM"
## [56] "DP05_0045E" "DP05_0045M" "DP05_0045PE" "DP05_0045PM" "DP05_0046E"
## [61] "DP05_0046M" "DP05_0046PE" "DP05_0046PM" "DP05_0047E" "DP05_0047M"
## [66] "DP05_0047PE" "DP05_0047PM" "DP05_0048E" "DP05_0048M" "DP05_0048PE"
## [71] "DP05_0048PM" "DP05_0049E" "DP05_0049M" "DP05_0049PE" "DP05_0049PM"
## [76] "DP05_0050E" "DP05_0050M" "DP05_0050PE" "DP05_0050PM" "DP05_0051E"
## [81] "DP05_0051M" "DP05_0051PE" "DP05_0051PM" "DP05_0052E" "DP05_0052M"
## [86] "DP05_0052PE" "DP05_0052PM" "DP05_0053E" "DP05_0053M" "DP05_0053PE"
## [91] "DP05_0053PM" "DP05_0054E" "DP05_0054M" "DP05_0054PE" "DP05_0054PM"
## [96] "DP05_0055E" "DP05_0055M" "DP05_0055PE" "DP05_0055PM" "DP05_0056E"
## [101] "DP05_0056M" "DP05_0056PE" "DP05_0056PM" "DP05_0057E" "DP05_0057M"
## [106] "DP05_0057PE" "DP05_0057PM" "DP05_0058E" "DP05_0058M" "DP05_0058PE"
## [111] "DP05_0058PM" "DP05_0059E" "DP05_0059M" "DP05_0059PE" "DP05_0059PM"
## [116] "DP05_0060E" "DP05_0060M" "DP05_0060PE" "DP05_0060PM" "DP05_0061E"
## [121] "DP05_0061M" "DP05_0061PE" "DP05_0061PM" "DP05_0062E" "DP05_0062M"
## [126] "DP05_0062PE" "DP05_0062PM" "DP05_0063E" "DP05_0063M" "DP05_0063PE"
## [131] "DP05_0063PM" "DP05_0064E" "DP05_0064M" "DP05_0064PE" "DP05_0064PM"
## [136] "DP05_0065E" "DP05_0065M" "DP05_0065PE" "DP05_0065PM" "DP05_0066E"
## [141] "DP05_0066M" "DP05_0066PE" "DP05_0066PM" "DP05_0067E" "DP05_0067M"
## [146] "DP05_0067PE" "DP05_0067PM" "DP05_0068E" "DP05_0068M" "DP05_0068PE"
## [151] "DP05_0068PM" "DP05_0069E" "DP05_0069M" "DP05_0069PE" "DP05_0069PM"
## [156] "DP05_0070E" "DP05_0070M" "DP05_0070PE" "DP05_0070PM" "DP05_0071E"
## [161] "DP05_0071M" "DP05_0071PE" "DP05_0071PM" "DP05_0072E" "DP05_0072M"
## [166] "DP05_0072PE" "DP05_0072PM" "DP05_0073E" "DP05_0073M" "DP05_0073PE"
## [171] "DP05_0073PM" "DP05_0074E" "DP05_0074M" "DP05_0074PE" "DP05_0074PM"
## [176] "DP05_0075E" "DP05_0075M" "DP05_0075PE" "DP05_0075PM" "DP05_0076E"
## [181] "DP05_0076M" "DP05_0076PE" "DP05_0076PM" "DP05_0077E" "DP05_0077M"
## [186] "DP05_0077PE" "DP05_0077PM" "DP05_0078E" "DP05_0078M" "DP05_0078PE"
## [191] "DP05_0078PM" "DP05_0079E" "DP05_0079M" "DP05_0079PE" "DP05_0079PM"
## [196] "DP05_0080E" "DP05_0080M" "DP05_0080PE" "DP05_0080PM" "DP05_0081E"
## [201] "DP05_0081M" "DP05_0081PE" "DP05_0081PM" "DP05_0082E" "DP05_0082M"
## [206] "DP05_0082PE" "DP05_0082PM" "DP05_0083E" "DP05_0083M" "DP05_0083PE"
## [211] "DP05_0083PM" "DP05_0084E" "DP05_0084M" "DP05_0084PE" "DP05_0084PM"
## [216] "DP05_0085E" "DP05_0085M" "DP05_0085PE" "DP05_0085PM" "DP05_0086E"
## [221] "DP05_0086M" "DP05_0086PE" "DP05_0086PM" "DP05_0087E" "DP05_0087M"
## [226] "DP05_0087PE" "DP05_0087PM" "DP05_0088E" "DP05_0088M" "DP05_0088PE"
## [231] "DP05_0088PM" "DP05_0089E" "DP05_0089M" "DP05_0089PE" "DP05_0089PM"
## [236] "DP05_0001E" "DP05_0001M" "DP05_0001PE" "DP05_0001PM" "DP05_0002E"
## [241] "DP05_0002M" "DP05_0002PE" "DP05_0002PM" "DP05_0003E" "DP05_0003M"
## [246] "DP05_0003PE" "DP05_0003PM" "DP05_0004E" "DP05_0004M" "DP05_0004PE"
## [251] "DP05_0004PM" "DP05_0005E" "DP05_0005M" "DP05_0005PE" "DP05_0005PM"
## [256] "DP05_0006E" "DP05_0006M" "DP05_0006PE" "DP05_0006PM" "DP05_0007E"
## [261] "DP05_0007M" "DP05_0007PE" "DP05_0007PM" "DP05_0008E" "DP05_0008M"
## [266] "DP05_0008PE" "DP05_0008PM" "DP05_0009E" "DP05_0009M" "DP05_0009PE"
## [271] "DP05_0009PM" "DP05_0010E" "DP05_0010M" "DP05_0010PE" "DP05_0010PM"
## [276] "DP05_0011E" "DP05_0011M" "DP05_0011PE" "DP05_0011PM" "DP05_0012E"
## [281] "DP05_0012M" "DP05_0012PE" "DP05_0012PM" "DP05_0013E" "DP05_0013M"
## [286] "DP05_0013PE" "DP05_0013PM" "DP05_0014E" "DP05_0014M" "DP05_0014PE"
## [291] "DP05_0014PM" "DP05_0015E" "DP05_0015M" "DP05_0015PE" "DP05_0015PM"
## [296] "DP05_0016E" "DP05_0016M" "DP05_0016PE" "DP05_0016PM" "DP05_0017E"
## [301] "DP05_0017M" "DP05_0017PE" "DP05_0017PM" "DP05_0018E" "DP05_0018M"
## [306] "DP05_0018PE" "DP05_0018PM" "DP05_0019E" "DP05_0019M" "DP05_0019PE"
## [311] "DP05_0019PM" "DP05_0020E" "DP05_0020M" "DP05_0020PE" "DP05_0020PM"
## [316] "DP05_0021E" "DP05_0021M" "DP05_0021PE" "DP05_0021PM" "DP05_0022E"
## [321] "DP05_0022M" "DP05_0022PE" "DP05_0022PM" "DP05_0023E" "DP05_0023M"
## [326] "DP05_0023PE" "DP05_0023PM" "DP05_0024E" "DP05_0024M" "DP05_0024PE"
## [331] "DP05_0024PM" "DP05_0025E" "DP05_0025M" "DP05_0025PE" "DP05_0025PM"
## [336] "DP05_0026E" "DP05_0026M" "DP05_0026PE" "DP05_0026PM" "DP05_0027E"
## [341] "DP05_0027M" "DP05_0027PE" "DP05_0027PM" "DP05_0028E" "DP05_0028M"
## [346] "DP05_0028PE" "DP05_0028PM" "DP05_0029E" "DP05_0029M" "DP05_0029PE"
## [351] "DP05_0029PM" "DP05_0030E" "DP05_0030M" "DP05_0030PE" "DP05_0030PM"
## [356] "DP05_0031E" "DP05_0031M" "DP05_0031PE" "totPop" "elderlyPop"
## [361] "malePop" "femalePop" "whitePop" "blackPop" "asianPop"
## [366] "hispanicPop" "adultPop" "citizenAdult" "censusCode"
# Merge (JOIN) ACS data to the census tracts
popData <- merge(nycCensus, original_data[, c("censusCode", "totPop", "elderlyPop", "malePop", "femalePop", "whitePop", "blackPop", "asianPop", "hispanicPop", "adultPop", "citizenAdult")], by.x ='tractFIPS', by.y = 'censusCode')
# verify the data
sum(popData$totPop)
## [1] 8443713
str(popData)
## Classes 'sf' and 'data.frame': 2164 obs. of 24 variables:
## $ tractFIPS : chr "005000100" "005000200" "005000400" "005001600" ...
## $ boro_code : chr "2" "2" "2" "2" ...
## $ boro_ct201 : chr "2000100" "2000200" "2000400" "2001600" ...
## $ boro_name : chr "Bronx" "Bronx" "Bronx" "Bronx" ...
## $ cdeligibil : chr "I" "I" "I" "E" ...
## $ ct2010 : chr "000100" "000200" "000400" "001600" ...
## $ ctlabel : chr "1" "2" "4" "16" ...
## $ ntacode : chr "BX98" "BX09" "BX09" "BX09" ...
## $ ntaname : chr "Rikers Island" "Soundview-Castle Hill-Clason Point-Harding Park" "Soundview-Castle Hill-Clason Point-Harding Park" "Soundview-Castle Hill-Clason Point-Harding Park" ...
## $ puma : chr "3710" "3709" "3709" "3709" ...
## $ shape_area : num 18163828 5006558 8561175 5221330 17961359 ...
## $ shape_leng : num 18898 15611 24725 9671 30000 ...
## $ cntyFIPS : chr "005" "005" "005" "005" ...
## $ totPop : num 7080 4542 5634 5917 2765 ...
## $ elderlyPop : num 51 950 710 989 76 977 648 0 548 243 ...
## $ malePop : num 6503 2264 2807 2365 1363 ...
## $ femalePop : num 577 2278 2827 3552 1402 ...
## $ whitePop : num 1773 2165 2623 2406 585 ...
## $ blackPop : num 4239 1279 1699 2434 1041 ...
## $ asianPop : num 130 119 226 68 130 29 27 14 68 0 ...
## $ hispanicPop : num 2329 3367 3873 3603 1413 ...
## $ adultPop : num 6909 3582 4507 4416 2008 ...
## $ citizenAdult: num 6100 2952 4214 3851 1787 ...
## $ geometry :sfc_MULTIPOLYGON of length 2164; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:499, 1:2] -73.9 -73.9 -73.9 -73.9 -73.9 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:23] "tractFIPS" "boro_code" "boro_ct201" "boro_name" ...
st_crs(popData)
## Coordinate Reference System:
## User input: WGS84(DD)
## wkt:
## GEOGCRS["WGS84(DD)",
## DATUM["WGS84",
## ELLIPSOID["WGS84",6378137,298.257223563,
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic longitude",east,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic latitude",north,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]]]
popNYC <- sf::st_transform(popData, st_crs(nyc_sf_merged_transformed)) # zpNYC is the JOINED zip code data from task 1.
# Now aggregate to the zip code level
covidPopZipNYC <- sf::st_join(nyc_sf_merged_transformed,
popNYC %>% sf::st_centroid(), # this essentially converts census tracts to points
join = st_contains) %>%
group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>% # use names(zpNYC) and names(popNYC) to see what we have
summarise(totPop = sum(totPop),
malePctg = sum(malePop)/totPop*100, # note the totPop is the newly calculated
asianPop = sum(asianPop),
blackPop = sum(blackPop),
hispanicPop = sum(hispanicPop),
whitePop = sum(whitePop))
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION',
## 'COUNTY', 'COVID_CASE_COUNT'. You can override using the `.groups` argument.
covidPopZipNYC %>% head()
## Simple feature collection with 6 features and 12 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -74.0473 ymin: 40.68392 xmax: -73.97141 ymax: 40.75769
## Geodetic CRS: WGS 84
## # A tibble: 6 × 13
## # Groups: ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [6]
## ZIPCODE PO_NAME POPUL…¹ COUNTY COVID…² TOTAL…³ totPop maleP…⁴ asian…⁵ black…⁶
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 10001 New York 22413 New Y… 1542 20158 19146 51.2 4837 1092
## 2 10002 New York 81305 New Y… 5902 48197 74310 48.4 32149 5969
## 3 10003 New York 55878 New Y… 2803 41076 53487 50.3 8027 3130
## 4 10004 New York 2187 New Y… 247 3599 NA NA NA NA
## 5 10005 New York 8107 New Y… 413 6102 8809 42.8 1974 185
## 6 10006 New York 3011 New Y… 164 2441 4639 44.0 1195 52
## # … with 3 more variables: hispanicPop <dbl>, whitePop <dbl>,
## # geometry <GEOMETRY [°]>, and abbreviated variable names ¹POPULATION,
## # ²COVID_CASE_COUNT, ³TOTAL_COVID_TESTS, ⁴malePctg, ⁵asianPop, ⁶blackPop
covidPopZipNYC <- covidPopZipNYC %>% filter(!is.na(malePctg))
In the end, we should have the confirmed and tested cases of covid-19, numbers of specific types of food stores, numbers of specific types of health facilities, and population (total population, elderly, by race, etc.) at the zip code level. We should also have boroughs, names, etc. for each zip code area.