R Spatial Lab Assignment /# 2

Task 1:

# Load week 7 data
load("../spatial_data.RData")

# Check objects
ls()
## [1] "nyc_zipcode_sf" "nys_health_sf"  "nys_retail_sf"
# Check classes
class(nyc_zipcode_sf)
## [1] "sf"         "data.frame"
class(nys_health_sf)
## [1] "sf"         "tbl_df"     "tbl"        "data.frame"
class(nys_retail_sf)
## [1] "sf"         "tbl_df"     "tbl"        "data.frame"
# Check CRS
st_crs(nyc_zipcode_sf)
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US survey foot)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
st_crs(nys_health_sf)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         MEMBER["World Geodetic System 1984 (G2139)"],
##         MEMBER["World Geodetic System 1984 (G2296)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
st_crs(nys_retail_sf)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         MEMBER["World Geodetic System 1984 (G2139)"],
##         MEMBER["World Geodetic System 1984 (G2296)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
# Load COVID data
covid_data <- read_csv("../data/tests-by-zcta_2020_04_19.csv")

# Preview data
head(covid_data)
## # A tibble: 6 × 4
##   MODZCTA Positive Total zcta_cum.perc_pos
##     <dbl>    <dbl> <dbl>             <dbl>
## 1      NA     1864  2078              89.7
## 2   10001      260   571              45.5
## 3   10002      712  1358              52.4
## 4   10003      347   830              41.8
## 5   10004       24    64              37.5
## 6   10005       44   137              32.1
str(covid_data)
## spc_tbl_ [178 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ MODZCTA          : num [1:178] NA 10001 10002 10003 10004 ...
##  $ Positive         : num [1:178] 1864 260 712 347 24 ...
##  $ Total            : num [1:178] 2078 571 1358 830 64 ...
##  $ zcta_cum.perc_pos: num [1:178] 89.7 45.5 52.4 41.8 37.5 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   MODZCTA = col_double(),
##   ..   Positive = col_double(),
##   ..   Total = col_double(),
##   ..   zcta_cum.perc_pos = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

Task 2:

# Remove NA ZIPs
covid_data <- covid_data %>%
  filter(!is.na(MODZCTA))

# Check ZIP column names
names(nyc_zipcode_sf)
##  [1] "ZIPCODE"    "BLDGZIP"    "PO_NAME"    "POPULATION" "AREA"      
##  [6] "STATE"      "COUNTY"     "ST_FIPS"    "CTY_FIPS"   "URL"       
## [11] "SHAPE_AREA" "SHAPE_LEN"  "geometry"
# Match types
nyc_zipcode_sf$ZIPCODE <- as.numeric(nyc_zipcode_sf$ZIPCODE)

# Join data
zip_covid_sf <- nyc_zipcode_sf %>%
  left_join(covid_data, by = c("ZIPCODE" = "MODZCTA"))

# Check result
head(zip_covid_sf)
## Simple feature collection with 6 features and 15 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 986490.1 ymin: 168910.5 xmax: 1043042 ymax: 189382.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
##   ZIPCODE BLDGZIP  PO_NAME POPULATION     AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1   11436       0  Jamaica      18681 22699295    NY Queens      36      081
## 2   11213       0 Brooklyn      62426 29631004    NY  Kings      36      047
## 3   11212       0 Brooklyn      83866 41972104    NY  Kings      36      047
## 4   11225       0 Brooklyn      56527 23698630    NY  Kings      36      047
## 5   11218       0 Brooklyn      72280 36868799    NY  Kings      36      047
## 6   11226       0 Brooklyn     106132 39408598    NY  Kings      36      047
##                    URL SHAPE_AREA SHAPE_LEN Positive Total zcta_cum.perc_pos
## 1 http://www.usps.com/          0         0      342   567             60.32
## 2 http://www.usps.com/          0         0      972  1653             58.80
## 3 http://www.usps.com/          0         0     1086  1793             60.57
## 4 http://www.usps.com/          0         0      814  1359             59.90
## 5 http://www.usps.com/          0         0     1163  1967             59.13
## 6 http://www.usps.com/          0         0     1336  2170             61.57
##                         geometry
## 1 POLYGON ((1038098 188138.4,...
## 2 POLYGON ((1001614 186926.4,...
## 3 POLYGON ((1011174 183696.3,...
## 4 POLYGON ((995908.4 183617.6...
## 5 POLYGON ((991997.1 176307.5...
## 6 POLYGON ((994821.5 177865.7...

Task 3:

# Check types
unique(nys_retail_sf$Establishment.Type)
##  [1] "A"      "JAC"    "JABC"   "JAS"    "JABCP"  "JACDK"  "JAB"    "JAE"   
##  [9] "JABCDP" "JABHK"  "JACD"   "JACFS"  "JAD"    "JABCHK" "JACHK"  "JAZ"   
## [17] "JABCK"  "JACK"   "JACDHK" "JABH"   "JACH"   "JACDE"  "JABCH"  "JABCDH"
## [25] "JABK"   "JABCD"  "JADK"   "JACE"   "JAW"    "JACI"   "JAM"    "JACO"  
## [33] "JACDH"  "JABCG"  "JACV"   "JABCOP" "JAK"    "JACL"   "JACG"   "JADHK" 
## [41] "JKA"    "JAHK"   "JABCDK" "JACZ"   "JACW"   "JCA"    "JACDKM" "JADE"  
## [49] "JABCGP" "JACN"   "JADO"   "JDA"    "JABCKO" "JABCW"  "JADN"   "JACDIK"
## [57] "JAHW"   "JACS"   "JABCO"  "JACDG"  "JACP"   "JABCHO" "JAEH"   "JACHOP"
## [65] "JACHO"  "JACEW"  "JDAC"   "JKDAC"  "JAV"    "JADEH"  "JACHKO" "JAN"   
## [73] "JACEK"  "JKDA"   "JABDK"
# Match CRS
nys_retail_sf <- st_transform(nys_retail_sf, st_crs(zip_covid_sf))

# Filter stores
food_store_sf <- nys_retail_sf %>%
  filter(str_detect(Establishment.Type, "[AJD]"))

# Join data
food_zip_sf <- zip_covid_sf %>%
  st_join(food_store_sf, join = st_contains)

# Count stores
food_count_sf <- food_zip_sf %>%
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total, zcta_cum.perc_pos) %>%
  summarise(food_store_count = n())

# Check result
names(food_count_sf)
## [1] "ZIPCODE"           "PO_NAME"           "POPULATION"       
## [4] "COUNTY"            "Positive"          "Total"            
## [7] "zcta_cum.perc_pos" "food_store_count"  "geometry"
head(food_count_sf[, c("ZIPCODE", "food_store_count")])
## Simple feature collection with 6 features and 2 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 188447.3 xmax: 998309.7 ymax: 230942.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 × 3
## # Groups:   ZIPCODE [6]
##   ZIPCODE food_store_count                                              geometry
##     <dbl>            <int>                           <GEOMETRY [US_survey_foot]>
## 1      83                1 POLYGON ((998309.7 229616.7, 998282.9 229566.7, 9982…
## 2   10001               54 POLYGON ((981958.6 213464.5, 981980.3 213539.5, 9819…
## 3   10002              199 POLYGON ((991339.9 207576.8, 991330.5 207541.4, 9913…
## 4   10003               94 POLYGON ((989830.5 207048.1, 989716.4 206842.5, 9895…
## 5   10004               13 MULTIPOLYGON (((977493.2 188450.6, 977481.3 188451.7…
## 6   10005                9 POLYGON ((982595.7 195880.8, 982587.4 195873.8, 9825…

Task 4:

# Check types
unique(nys_health_sf$`Short Description`)
##  [1] "HSPC"    "NH"      "DTC"     "CHHA"    "HOSP-EC" NA        "DTC-EC" 
##  [8] "HOSP-SB" "HOSP"    "ADHCP"   "LTHHCP"
# Match CRS
nys_health_sf <- st_transform(nys_health_sf, st_crs(zip_covid_sf))

# Filter facilities
health_facility_sf <- nys_health_sf %>%
  filter(`Short Description` == "NH")

# Join data
health_zip_sf <- zip_covid_sf %>%
  st_join(health_facility_sf, join = st_contains)

# Count facilities
health_count_sf <- health_zip_sf %>%
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total, zcta_cum.perc_pos) %>%
  summarise(health_facility_count = n())

# Check result
names(health_count_sf)
## [1] "ZIPCODE"               "PO_NAME"               "POPULATION"           
## [4] "COUNTY"                "Positive"              "Total"                
## [7] "zcta_cum.perc_pos"     "health_facility_count" "geometry"
head(health_count_sf[, c("ZIPCODE", "health_facility_count")])
## Simple feature collection with 6 features and 2 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 188447.3 xmax: 998309.7 ymax: 230942.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 × 3
## # Groups:   ZIPCODE [6]
##   ZIPCODE health_facility_count                                         geometry
##     <dbl>                 <int>                      <GEOMETRY [US_survey_foot]>
## 1      83                     1 POLYGON ((998309.7 229616.7, 998282.9 229566.7,…
## 2   10001                     1 POLYGON ((981958.6 213464.5, 981980.3 213539.5,…
## 3   10002                     2 POLYGON ((991339.9 207576.8, 991330.5 207541.4,…
## 4   10003                     1 POLYGON ((989830.5 207048.1, 989716.4 206842.5,…
## 5   10004                     4 MULTIPOLYGON (((977503.7 188449.2, 977493.2 188…
## 6   10005                     1 POLYGON ((982595.7 195880.8, 982587.4 195873.8,…

Task 5:

# Read tracts
nyc_census_sf <- st_read("../data/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp")
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/user/Library/CloudStorage/OneDrive-Hunter-CUNY/Hunter/Junior - 2nd Semester/GTECH 38520/RStudio/spatial/data/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
# Check tracts
names(nyc_census_sf)
##  [1] "boro_code"  "boro_ct201" "boro_name"  "cdeligibil" "ct2010"    
##  [6] "ctlabel"    "ntacode"    "ntaname"    "puma"       "shape_area"
## [11] "shape_leng" "geometry"
# Make tract ID
nyc_census_sf <- nyc_census_sf %>%
  mutate(
    cntyFIPS = case_when(
      boro_name == "Bronx" ~ "005",
      boro_name == "Brooklyn" ~ "047",
      boro_name == "Manhattan" ~ "061",
      boro_name == "Queens" ~ "081",
      boro_name == "Staten Island" ~ "085"
    ),
    tractFIPS = paste0(cntyFIPS, ct2010)
  )

# Read ACS data
acs_data <- readLines("../data/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  extract(-2) %>%
  textConnection() %>%
  read.csv(header = TRUE, quote = "\"") %>%
  select(
    GEO_ID,
    totPop = DP05_0001E,
    elderlyPop = DP05_0024E,
    malePop = DP05_0002E,
    femalePop = DP05_0003E,
    whitePop = DP05_0037E,
    blackPop = DP05_0038E,
    asianPop = DP05_0067E,
    hispanicPop = DP05_0071E,
    adultPop = DP05_0021E,
    citizenAdult = DP05_0087E
  ) %>%
  mutate(censusCode = str_sub(GEO_ID, -9, -1))

# Check ACS data
head(acs_data)
##                 GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1 1400000US36005000100   7080         51    6503       577     1773     4239
## 2 1400000US36005000200   4542        950    2264      2278     2165     1279
## 3 1400000US36005000400   5634        710    2807      2827     2623     1699
## 4 1400000US36005001600   5917        989    2365      3552     2406     2434
## 5 1400000US36005001900   2765         76    1363      1402      585     1041
## 6 1400000US36005002000   9409        977    4119      5290     3185     4487
##   asianPop hispanicPop adultPop citizenAdult censusCode
## 1      130        2329     6909         6100  005000100
## 2      119        3367     3582         2952  005000200
## 3      226        3873     4507         4214  005000400
## 4       68        3603     4416         3851  005001600
## 5      130        1413     2008         1787  005001900
## 6       29        5905     6851         6170  005002000
names(acs_data)
##  [1] "GEO_ID"       "totPop"       "elderlyPop"   "malePop"      "femalePop"   
##  [6] "whitePop"     "blackPop"     "asianPop"     "hispanicPop"  "adultPop"    
## [11] "citizenAdult" "censusCode"
# Join ACS to tracts
pop_tract_sf <- merge(nyc_census_sf, acs_data, by.x = "tractFIPS", by.y = "censusCode")

# Match CRS
pop_tract_sf <- st_transform(pop_tract_sf, st_crs(zip_covid_sf))

# Check result
names(pop_tract_sf)
##  [1] "tractFIPS"    "boro_code"    "boro_ct201"   "boro_name"    "cdeligibil"  
##  [6] "ct2010"       "ctlabel"      "ntacode"      "ntaname"      "puma"        
## [11] "shape_area"   "shape_leng"   "cntyFIPS"     "GEO_ID"       "totPop"      
## [16] "elderlyPop"   "malePop"      "femalePop"    "whitePop"     "blackPop"    
## [21] "asianPop"     "hispanicPop"  "adultPop"     "citizenAdult" "geometry"
head(pop_tract_sf)
## Simple feature collection with 6 features and 24 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1003117 ymin: 225426.9 xmax: 1026945 ymax: 239221.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
##   tractFIPS boro_code boro_ct201 boro_name cdeligibil ct2010 ctlabel ntacode
## 1 005000100         2    2000100     Bronx          I 000100       1    BX98
## 2 005000200         2    2000200     Bronx          I 000200       2    BX09
## 3 005000400         2    2000400     Bronx          I 000400       4    BX09
## 4 005001600         2    2001600     Bronx          E 001600      16    BX09
## 5 005001900         2    2001900     Bronx          I 001900      19    BX39
## 6 005002000         2    2002000     Bronx          E 002000      20    BX09
##                                           ntaname puma shape_area shape_leng
## 1                                   Rikers Island 3710   18163828  18898.116
## 2 Soundview-Castle Hill-Clason Point-Harding Park 3709    5006558  15610.702
## 3 Soundview-Castle Hill-Clason Point-Harding Park 3709    8561175  24725.472
## 4 Soundview-Castle Hill-Clason Point-Harding Park 3709    5221330   9671.306
## 5                          Mott Haven-Port Morris 3710   17961359  29999.575
## 6 Soundview-Castle Hill-Clason Point-Harding Park 3709    4336906   8383.698
##   cntyFIPS               GEO_ID totPop elderlyPop malePop femalePop whitePop
## 1      005 1400000US36005000100   7080         51    6503       577     1773
## 2      005 1400000US36005000200   4542        950    2264      2278     2165
## 3      005 1400000US36005000400   5634        710    2807      2827     2623
## 4      005 1400000US36005001600   5917        989    2365      3552     2406
## 5      005 1400000US36005001900   2765         76    1363      1402      585
## 6      005 1400000US36005002000   9409        977    4119      5290     3185
##   blackPop asianPop hispanicPop adultPop citizenAdult
## 1     4239      130        2329     6909         6100
## 2     1279      119        3367     3582         2952
## 3     1699      226        3873     4507         4214
## 4     2434       68        3603     4416         3851
## 5     1041      130        1413     2008         1787
## 6     4487       29        5905     6851         6170
##                         geometry
## 1 MULTIPOLYGON (((1019455 225...
## 2 MULTIPOLYGON (((1023973 232...
## 3 MULTIPOLYGON (((1026849 235...
## 4 MULTIPOLYGON (((1024344 238...
## 5 MULTIPOLYGON (((1012822 229...
## 6 MULTIPOLYGON (((1022318 237...

Task 6:

# Make centroids
pop_centroid_sf <- st_centroid(pop_tract_sf)

# Join data
pop_zip_sf <- zip_covid_sf %>%
  st_join(pop_centroid_sf, join = st_contains)

# Summarize data
pop_count_sf <- pop_zip_sf %>%
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total, zcta_cum.perc_pos) %>%
  summarise(
    totPop = sum(totPop, na.rm = TRUE),
    elderlyPop = sum(elderlyPop, na.rm = TRUE),
    whitePop = sum(whitePop, na.rm = TRUE),
    blackPop = sum(blackPop, na.rm = TRUE),
    asianPop = sum(asianPop, na.rm = TRUE),
    hispanicPop = sum(hispanicPop, na.rm = TRUE),
    elderlyPct = elderlyPop / totPop * 100
  )

# Check result
names(pop_count_sf)
##  [1] "ZIPCODE"           "PO_NAME"           "POPULATION"       
##  [4] "COUNTY"            "Positive"          "Total"            
##  [7] "zcta_cum.perc_pos" "totPop"            "elderlyPop"       
## [10] "whitePop"          "blackPop"          "asianPop"         
## [13] "hispanicPop"       "elderlyPct"        "geometry"
head(pop_count_sf[, c("ZIPCODE", "totPop", "elderlyPop", "elderlyPct")])
## Simple feature collection with 6 features and 4 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 188447.3 xmax: 998309.7 ymax: 230942.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 × 5
## # Groups:   ZIPCODE [6]
##   ZIPCODE totPop elderlyPop elderlyPct                                  geometry
##     <dbl>  <int>      <int>      <dbl>               <GEOMETRY [US_survey_foot]>
## 1      83      3          0       0    POLYGON ((998309.7 229616.7, 998282.9 22…
## 2   10001  19146       2500      13.1  POLYGON ((981958.6 213464.5, 981980.3 21…
## 3   10002  74310      15815      21.3  POLYGON ((991339.9 207576.8, 991330.5 20…
## 4   10003  53487       6296      11.8  POLYGON ((989830.5 207048.1, 989716.4 20…
## 5   10004   1731         52       3.00 MULTIPOLYGON (((977503.7 188449.2, 97749…
## 6   10005   8809        209       2.37 POLYGON ((982595.7 195880.8, 982587.4 19…

Task 7:

# Join food counts
final_zip_sf <- zip_covid_sf %>%
  left_join(
    st_drop_geometry(food_count_sf) %>%
      select(ZIPCODE, food_store_count),
    by = "ZIPCODE"
  )

# Join health counts
final_zip_sf <- final_zip_sf %>%
  left_join(
    st_drop_geometry(health_count_sf) %>%
      select(ZIPCODE, health_facility_count),
    by = "ZIPCODE"
  )

# Join census data
final_zip_sf <- final_zip_sf %>%
  left_join(
    st_drop_geometry(pop_count_sf) %>%
      select(
        ZIPCODE,
        totPop,
        elderlyPop,
        whitePop,
        blackPop,
        asianPop,
        hispanicPop,
        elderlyPct
      ),
    by = "ZIPCODE"
  )

# Replace missing counts
final_zip_sf <- final_zip_sf %>%
  mutate(
    food_store_count = replace_na(food_store_count, 0),
    health_facility_count = replace_na(health_facility_count, 0)
  )

# Clean columns
final_zip_sf <- final_zip_sf %>%
  select(
    ZIPCODE,
    PO_NAME = PO_NAME.x,
    POPULATION = POPULATION.x,
    COUNTY = COUNTY.x,
    Positive = Positive.x,
    Total = Total.x,
    zcta_cum.perc_pos,
    food_store_count,
    health_facility_count,
    totPop,
    elderlyPop,
    whitePop,
    blackPop,
    asianPop,
    hispanicPop,
    elderlyPct,
    geometry
  )

# Check result
names(final_zip_sf)
##  [1] "ZIPCODE"               "PO_NAME"               "POPULATION"           
##  [4] "COUNTY"                "Positive"              "Total"                
##  [7] "zcta_cum.perc_pos"     "food_store_count"      "health_facility_count"
## [10] "totPop"                "elderlyPop"            "whitePop"             
## [13] "blackPop"              "asianPop"              "hispanicPop"          
## [16] "elderlyPct"            "geometry"
head(final_zip_sf)
## Simple feature collection with 6 features and 16 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 986490.1 ymin: 168910.5 xmax: 1043042 ymax: 189382.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
##   ZIPCODE  PO_NAME POPULATION COUNTY Positive Total zcta_cum.perc_pos
## 1   11436  Jamaica      18681 Queens      342   567             60.32
## 2   11213 Brooklyn      62426  Kings      972  1653             58.80
## 3   11212 Brooklyn      83866  Kings     1086  1793             60.57
## 4   11225 Brooklyn      56527  Kings      814  1359             59.90
## 5   11218 Brooklyn      72280  Kings     1163  1967             59.13
## 6   11226 Brooklyn     106132  Kings     1336  2170             61.57
##   food_store_count health_facility_count totPop elderlyPop whitePop blackPop
## 1                3                     1  22377       2456     1192    13972
## 2              118                     1  66602       7662    16483    43625
## 3              187                     1  73069       9518     5278    59541
## 4              103                     1  60958       7592    15300    40048
## 5              135                     1  67426       8144    40315     5045
## 6              236                     3 103729      12278    16173    69058
##   asianPop hispanicPop elderlyPct                       geometry
## 1     2626        3226   10.97556 POLYGON ((1038098 188138.4,...
## 2     1836        6738   11.50416 POLYGON ((1001614 186926.4,...
## 3     1330       13900   13.02604 POLYGON ((1011174 183696.3,...
## 4     2272        5293   12.45448 POLYGON ((995908.4 183617.6...
## 5    14695       11169   12.07843 POLYGON ((991997.1 176307.5...
## 6     5207       18244   11.83661 POLYGON ((994821.5 177865.7...
class(final_zip_sf)
## [1] "sf"         "data.frame"