Introduction

In this lab, we aggregate data from different sources to the zip code level because COVID-19 data are available at that scale. We will:

Setup

Load the necessary libraries. (Make sure you’ve completed Lab 1 so that your spatial objects for zip codes, health facilities, and retail stores are available.)

covid_data <- read_csv("C:/Users/vikto/OneDrive - Hunter - CUNY/GTECH785/R-Spatial/Session_8/R-Spatial_II_Lab/R-Spatial_II_Lab/tests-by-zcta_2020_04_12.csv", show_col_types = FALSE)
head(covid_data)
## # A tibble: 6 × 4
##   MODZCTA Positive Total zcta_cum.perc_pos
##     <dbl>    <dbl> <dbl>             <dbl>
## 1      NA     1934  2082              92.9
## 2   10001      211   448              47.1
## 3   10002      539  1024              52.6
## 4   10003      279   662              42.2
## 5   10004       23    59              39.0
## 6   10005       38   116              32.8
covid_data <- covid_data %>% 
  rename(ZIPCODE = MODZCTA) %>% 
  mutate(ZIPCODE = as.character(ZIPCODE))
head(covid_data)
## # A tibble: 6 × 4
##   ZIPCODE Positive Total zcta_cum.perc_pos
##   <chr>      <dbl> <dbl>             <dbl>
## 1 <NA>        1934  2082              92.9
## 2 10001        211   448              47.1
## 3 10002        539  1024              52.6
## 4 10003        279   662              42.2
## 5 10004         23    59              39.0
## 6 10005         38   116              32.8
nyc_postal <- st_read("C:/Users/vikto/OneDrive - Hunter - CUNY/GTECH785/R-Spatial/Session_7/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\vikto\OneDrive - Hunter - CUNY\GTECH785\R-Spatial\Session_7\R-Spatial_I_Lab\ZIP_CODE_040114\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
nyc_postal <- nyc_postal %>% mutate(ZIPCODE = as.character(ZIPCODE))
print(nyc_postal)
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
##    ZIPCODE BLDGZIP  PO_NAME POPULATION     AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1    11436       0  Jamaica      18681 22699295    NY Queens      36      081
## 2    11213       0 Brooklyn      62426 29631004    NY  Kings      36      047
## 3    11212       0 Brooklyn      83866 41972104    NY  Kings      36      047
## 4    11225       0 Brooklyn      56527 23698630    NY  Kings      36      047
## 5    11218       0 Brooklyn      72280 36868799    NY  Kings      36      047
## 6    11226       0 Brooklyn     106132 39408598    NY  Kings      36      047
## 7    11219       0 Brooklyn      92561 42002738    NY  Kings      36      047
## 8    11210       0 Brooklyn      67067 47887023    NY  Kings      36      047
## 9    11230       0 Brooklyn      80857 49926703    NY  Kings      36      047
## 10   11204       0 Brooklyn      77354 43555185    NY  Kings      36      047
##                     URL SHAPE_AREA SHAPE_LEN                       geometry
## 1  http://www.usps.com/          0         0 POLYGON ((1038098 188138.4,...
## 2  http://www.usps.com/          0         0 POLYGON ((1001614 186926.4,...
## 3  http://www.usps.com/          0         0 POLYGON ((1011174 183696.3,...
## 4  http://www.usps.com/          0         0 POLYGON ((995908.4 183617.6...
## 5  http://www.usps.com/          0         0 POLYGON ((991997.1 176307.5...
## 6  http://www.usps.com/          0         0 POLYGON ((994821.5 177865.7...
## 7  http://www.usps.com/          0         0 POLYGON ((987286.4 173946.5...
## 8  http://www.usps.com/          0         0 POLYGON ((995796 171110.1, ...
## 9  http://www.usps.com/          0         0 POLYGON ((994099.3 171240.7...
## 10 http://www.usps.com/          0         0 POLYGON ((989500.2 170730.2...
nyc_postal_covid <- nyc_postal %>% 
  left_join(covid_data, by = "ZIPCODE")
print(nyc_postal_covid)
## Simple feature collection with 263 features and 15 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
##    ZIPCODE BLDGZIP  PO_NAME POPULATION     AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1    11436       0  Jamaica      18681 22699295    NY Queens      36      081
## 2    11213       0 Brooklyn      62426 29631004    NY  Kings      36      047
## 3    11212       0 Brooklyn      83866 41972104    NY  Kings      36      047
## 4    11225       0 Brooklyn      56527 23698630    NY  Kings      36      047
## 5    11218       0 Brooklyn      72280 36868799    NY  Kings      36      047
## 6    11226       0 Brooklyn     106132 39408598    NY  Kings      36      047
## 7    11219       0 Brooklyn      92561 42002738    NY  Kings      36      047
## 8    11210       0 Brooklyn      67067 47887023    NY  Kings      36      047
## 9    11230       0 Brooklyn      80857 49926703    NY  Kings      36      047
## 10   11204       0 Brooklyn      77354 43555185    NY  Kings      36      047
##                     URL SHAPE_AREA SHAPE_LEN Positive Total zcta_cum.perc_pos
## 1  http://www.usps.com/          0         0      269   412             65.29
## 2  http://www.usps.com/          0         0      793  1296             61.19
## 3  http://www.usps.com/          0         0      842  1302             64.67
## 4  http://www.usps.com/          0         0      632  1001             63.14
## 5  http://www.usps.com/          0         0      976  1606             60.77
## 6  http://www.usps.com/          0         0      995  1527             65.16
## 7  http://www.usps.com/          0         0     1679  2476             67.81
## 8  http://www.usps.com/          0         0      898  1427             62.93
## 9  http://www.usps.com/          0         0     1301  2091             62.22
## 10 http://www.usps.com/          0         0     1110  1791             61.98
##                          geometry
## 1  POLYGON ((1038098 188138.4,...
## 2  POLYGON ((1001614 186926.4,...
## 3  POLYGON ((1011174 183696.3,...
## 4  POLYGON ((995908.4 183617.6...
## 5  POLYGON ((991997.1 176307.5...
## 6  POLYGON ((994821.5 177865.7...
## 7  POLYGON ((987286.4 173946.5...
## 8  POLYGON ((995796 171110.1, ...
## 9  POLYGON ((994099.3 171240.7...
## 10 POLYGON ((989500.2 170730.2...
retail_data <- read_csv("D:/Session_7/R-Spatial_I_Lab/NYS_Retail_Food_Stores.csv", show_col_types = FALSE)
head(retail_data)
## # A tibble: 6 × 15
##   County `License Number` `Operation Type` `Establishment Type` `Entity Name`   
##   <chr>  <chr>            <chr>            <chr>                <chr>           
## 1 Albany 733149           Store            A                    SPEEDWAY LLC    
## 2 Albany 704590           Store            JAC                  1250 SELKIRK INC
## 3 Albany 727909           Store            JAC                  RED-KAP SALES I…
## 4 Albany 720557           Store            JAC                  SAEED SADIQ, SA…
## 5 Albany 015890           Store            A                    AZIZ MOHAMMAD S 
## 6 Albany 735254           Store            JAC                  7-ELEVEN INC    
## # ℹ 10 more variables: `DBA Name` <chr>, `Street Number` <chr>,
## #   `Street Name` <chr>, `Address Line 2` <lgl>, `Address Line 3` <lgl>,
## #   City <chr>, State <chr>, `Zip Code` <dbl>, `Square Footage` <dbl>,
## #   Location <chr>
retail_data <- retail_data %>%
  mutate(Coords = str_extract(Location, "\\(.*\\)"),
         Coords = str_remove_all(Coords, "[\\(\\)]")) %>%
  separate(Coords, into = c("Lat", "Long"), sep = ",\\s*") %>%
  mutate(Lat = as.numeric(Lat), Long = as.numeric(Long))
retail_data_clean <- retail_data %>% drop_na(Lat, Long)
retail_sf <- st_as_sf(retail_data_clean, coords = c("Long", "Lat"), crs = 4326)
print(retail_sf)
## Simple feature collection with 23974 features and 15 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -79.75953 ymin: 40.50782 xmax: -71.93873 ymax: 44.99484
## Geodetic CRS:  WGS 84
## # A tibble: 23,974 × 16
##    County `License Number` `Operation Type` `Establishment Type` `Entity Name`  
##  * <chr>  <chr>            <chr>            <chr>                <chr>          
##  1 Albany 733149           Store            A                    SPEEDWAY LLC   
##  2 Albany 704590           Store            JAC                  1250 SELKIRK I…
##  3 Albany 727909           Store            JAC                  RED-KAP SALES …
##  4 Albany 720557           Store            JAC                  SAEED SADIQ, S…
##  5 Albany 015890           Store            A                    AZIZ MOHAMMAD S
##  6 Albany 735254           Store            JAC                  7-ELEVEN INC   
##  7 Albany 708848           Store            JAC                  ADVANCED FRESH…
##  8 Albany 713889           Store            JAC                  ADVANCED FRESH…
##  9 Albany 715759           Store            JAC                  ADVANCED FRESH…
## 10 Albany 723927           Store            JAC                  ADVANCED FRESH…
## # ℹ 23,964 more rows
## # ℹ 11 more variables: `DBA Name` <chr>, `Street Number` <chr>,
## #   `Street Name` <chr>, `Address Line 2` <lgl>, `Address Line 3` <lgl>,
## #   City <chr>, State <chr>, `Zip Code` <dbl>, `Square Footage` <dbl>,
## #   Location <chr>, geometry <POINT [°]>
nyc_postal_4326 <- st_transform(nyc_postal, 4326)

retail_join <- st_join(retail_sf, nyc_postal_4326, join = st_intersects)

retail_by_zip <- retail_join %>% 
  group_by(ZIPCODE) %>% 
  summarize(num_retail = n())
print(retail_by_zip)
## Simple feature collection with 184 features and 2 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -79.75953 ymin: 40.50782 xmax: -71.93873 ymax: 44.99484
## Geodetic CRS:  WGS 84
## # A tibble: 184 × 3
##    ZIPCODE num_retail                                                   geometry
##    <chr>        <int>                                           <MULTIPOINT [°]>
##  1 10001           54 ((-73.99079 40.74566), (-73.99128 40.74498), (-73.98799 4…
##  2 10002          199 ((-73.98288 40.7211), (-73.98293 40.71977), (-73.98288 40…
##  3 10003           94 ((-73.98276 40.73535), (-73.98062 40.734), (-73.98038 40.…
##  4 10004           10 ((-74.01357 40.70553), (-74.0158 40.70494), (-74.01267 40…
##  5 10005            9 ((-74.00718 40.70498), (-74.00774 40.70621), (-74.00858 4…
##  6 10006            5 ((-74.01311 40.70753), (-74.01316 40.70783), (-74.013 40.…
##  7 10007           20 ((-74.00739 40.71287), (-74.00897 40.71313), (-74.00823 4…
##  8 10009           84 ((-73.9769 40.72673), (-73.9768 40.72687), (-73.97664 40.…
##  9 10010           48 ((-73.98189 40.74054), (-73.98206 40.74031), (-73.9825 40…
## 10 10011          112 ((-74.00028 40.73263), (-74.00089 40.7369), (-74.00131 40…
## # ℹ 174 more rows
health_data <- read_csv("D:/Session_7/R-Spatial_I_Lab/NYS_Health_Facility.csv", show_col_types = FALSE)
head(health_data)
## # A tibble: 6 × 36
##   `Facility ID` `Facility Name`                  `Short Description` Description
##           <dbl> <chr>                            <chr>               <chr>      
## 1           204 Hospice at Lourdes               HSPC                Hospice    
## 2           620 Charles T Sitrin Health Care Ce… NH                  Residentia…
## 3           654 Central Park Rehabilitation and… NH                  Residentia…
## 4          1156 East Side Nursing Home           NH                  Residentia…
## 5          2589 Wellsville Manor Care Center     NH                  Residentia…
## 6          3455 Harris Hill Nursing Facility, L… NH                  Residentia…
## # ℹ 32 more variables: `Facility Open Date` <chr>, `Facility Address 1` <chr>,
## #   `Facility Address 2` <chr>, `Facility City` <chr>, `Facility State` <chr>,
## #   `Facility Zip Code` <chr>, `Facility Phone Number` <dbl>,
## #   `Facility Fax Number` <dbl>, `Facility Website` <chr>,
## #   `Facility County Code` <dbl>, `Facility County` <chr>,
## #   `Regional Office ID` <dbl>, `Regional Office` <chr>,
## #   `Main Site Name` <chr>, `Main Site Facility ID` <dbl>, …
health_data_clean <- health_data %>% 
  drop_na(`Facility Longitude`, `Facility Latitude`)
health_sf <- st_as_sf(health_data_clean, 
                      coords = c("Facility Longitude", "Facility Latitude"), 
                      crs = 4326)
print(health_sf)
## Simple feature collection with 3848 features and 34 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -79.6299 ymin: -75.45935 xmax: 43.21162 ymax: 44.97849
## Geodetic CRS:  WGS 84
## # A tibble: 3,848 × 35
##    `Facility ID` `Facility Name`                 `Short Description` Description
##  *         <dbl> <chr>                           <chr>               <chr>      
##  1           204 Hospice at Lourdes              HSPC                Hospice    
##  2           620 Charles T Sitrin Health Care C… NH                  Residentia…
##  3          1156 East Side Nursing Home          NH                  Residentia…
##  4          2589 Wellsville Manor Care Center    NH                  Residentia…
##  5          3455 Harris Hill Nursing Facility, … NH                  Residentia…
##  6          3853 Garden City Surgi Center        DTC                 Diagnostic…
##  7          4249 Willcare                        CHHA                Certified …
##  8          4473 Good Shepherd Hospice           HSPC                Hospice    
##  9          6230 NYU Langone Rutherford          HOSP-EC             Hospital E…
## 10          6482 Endoscopy Center of Long Islan… DTC                 Diagnostic…
## # ℹ 3,838 more rows
## # ℹ 31 more variables: `Facility Open Date` <chr>, `Facility Address 1` <chr>,
## #   `Facility Address 2` <chr>, `Facility City` <chr>, `Facility State` <chr>,
## #   `Facility Zip Code` <chr>, `Facility Phone Number` <dbl>,
## #   `Facility Fax Number` <dbl>, `Facility Website` <chr>,
## #   `Facility County Code` <dbl>, `Facility County` <chr>,
## #   `Regional Office ID` <dbl>, `Regional Office` <chr>, …
health_join <- st_join(health_sf, nyc_postal_4326, join = st_intersects)

health_by_zip <- health_join %>% 
  group_by(ZIPCODE) %>% 
  summarize(num_health = n())
print(health_by_zip)
## Simple feature collection with 160 features and 2 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -79.6299 ymin: -75.45935 xmax: 43.21162 ymax: 44.97849
## Geodetic CRS:  WGS 84
## # A tibble: 160 × 3
##    ZIPCODE num_health                                                   geometry
##    <chr>        <int>                                             <GEOMETRY [°]>
##  1 10001           11 MULTIPOINT ((-73.99438 40.74568), (-73.99186 40.74573), (…
##  2 10002           20 MULTIPOINT ((-73.97523 40.71873), (-73.97976 40.71812), (…
##  3 10003           15 MULTIPOINT ((-73.9816 40.73267), (-73.99121 40.72608), (-…
##  4 10004            1                                 POINT (-74.01255 40.70661)
##  5 10006            1                                 POINT (-74.01294 40.70633)
##  6 10007            1                                 POINT (-74.01108 40.71315)
##  7 10009            7 MULTIPOINT ((-73.97613 40.72456), (-73.9792 40.72897), (-…
##  8 10010           13 MULTIPOINT ((-73.98011 40.73908), (-73.98014 40.73904), (…
##  9 10011           11 MULTIPOINT ((-74.00049 40.73758), (-73.99702 40.73697), (…
## 10 10012            6 MULTIPOINT ((-73.99354 40.72552), (-73.99577 40.72169), (…
## # ℹ 150 more rows
census_tracts <- st_read("D:/Session_8/R-Spatial_II_Lab/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp")
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `D:\Session_8\R-Spatial_II_Lab\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
print(census_tracts)
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
## First 10 features:
##    boro_code boro_ct201     boro_name cdeligibil ct2010 ctlabel ntacode
## 1          5    5000900 Staten Island          E 000900       9    SI22
## 2          1    1009800     Manhattan          I 009800      98    MN19
## 3          1    1010000     Manhattan          I 010000     100    MN19
## 4          1    1010200     Manhattan          I 010200     102    MN17
## 5          1    1010400     Manhattan          I 010400     104    MN17
## 6          1    1011300     Manhattan          I 011300     113    MN17
## 7          1    1011402     Manhattan          I 011402  114.02    MN40
## 8          1    1013000     Manhattan          I 013000     130    MN40
## 9          1    1014000     Manhattan          I 014000     140    MN40
## 10         1    1014801     Manhattan          I 014801  148.01    MN40
##                                      ntaname puma shape_area shape_leng
## 1  West New Brighton-New Brighton-St. George 3903  2497009.7   7729.017
## 2                    Turtle Bay-East Midtown 3808  1906016.4   5534.200
## 3                    Turtle Bay-East Midtown 3808  1860938.4   5692.169
## 4                      Midtown-Midtown South 3807  1860992.7   5687.802
## 5                      Midtown-Midtown South 3807  1864600.4   5693.036
## 6                      Midtown-Midtown South 3807  1890907.3   5699.861
## 7              Upper East Side-Carnegie Hill 3805  1063547.4   4125.256
## 8              Upper East Side-Carnegie Hill 3805  1918144.5   5807.973
## 9              Upper East Side-Carnegie Hill 3805  1925984.2   5820.816
## 10             Upper East Side-Carnegie Hill 3805   559216.2   3135.951
##                          geometry
## 1  MULTIPOLYGON (((-74.07921 4...
## 2  MULTIPOLYGON (((-73.96433 4...
## 3  MULTIPOLYGON (((-73.96802 4...
## 4  MULTIPOLYGON (((-73.97124 4...
## 5  MULTIPOLYGON (((-73.97446 4...
## 6  MULTIPOLYGON (((-73.98412 4...
## 7  MULTIPOLYGON (((-73.96476 4...
## 8  MULTIPOLYGON (((-73.96148 4...
## 9  MULTIPOLYGON (((-73.95495 4...
## 10 MULTIPOLYGON (((-73.95398 4...
acs_data <- read_csv("D:/Session_8/R-Spatial_II_Lab/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv",
                     skip = 1, show_col_types = FALSE)
## New names:
## • `Estimate!!RACE!!Total population!!One race` -> `Estimate!!RACE!!Total
##   population!!One race...12`
## • `Margin of Error!!RACE!!Total population!!One race` -> `Margin of
##   Error!!RACE!!Total population!!One race...13`
## • `Percent Estimate!!RACE!!Total population!!One race` -> `Percent
##   Estimate!!RACE!!Total population!!One race...14`
## • `Percent Margin of Error!!RACE!!Total population!!One race` -> `Percent
##   Margin of Error!!RACE!!Total population!!One race...15`
## • `Estimate!!RACE!!Total population!!Two or more races` ->
##   `Estimate!!RACE!!Total population!!Two or more races...16`
## • `Margin of Error!!RACE!!Total population!!Two or more races` -> `Margin of
##   Error!!RACE!!Total population!!Two or more races...17`
## • `Percent Estimate!!RACE!!Total population!!Two or more races` -> `Percent
##   Estimate!!RACE!!Total population!!Two or more races...18`
## • `Percent Margin of Error!!RACE!!Total population!!Two or more races` ->
##   `Percent Margin of Error!!RACE!!Total population!!Two or more races...19`
## • `Estimate!!RACE!!Total population!!One race` -> `Estimate!!RACE!!Total
##   population!!One race...20`
## • `Margin of Error!!RACE!!Total population!!One race` -> `Margin of
##   Error!!RACE!!Total population!!One race...21`
## • `Percent Estimate!!RACE!!Total population!!One race` -> `Percent
##   Estimate!!RACE!!Total population!!One race...22`
## • `Percent Margin of Error!!RACE!!Total population!!One race` -> `Percent
##   Margin of Error!!RACE!!Total population!!One race...23`
## • `Estimate!!RACE!!Total population!!Two or more races` ->
##   `Estimate!!RACE!!Total population!!Two or more races...108`
## • `Margin of Error!!RACE!!Total population!!Two or more races` -> `Margin of
##   Error!!RACE!!Total population!!Two or more races...109`
## • `Percent Estimate!!RACE!!Total population!!Two or more races` -> `Percent
##   Estimate!!RACE!!Total population!!Two or more races...110`
## • `Percent Margin of Error!!RACE!!Total population!!Two or more races` ->
##   `Percent Margin of Error!!RACE!!Total population!!Two or more races...111`
## • `Estimate!!SEX AND AGE!!Total population!!18 years and over` ->
##   `Estimate!!SEX AND AGE!!Total population!!18 years and over...316`
## • `Margin of Error!!SEX AND AGE!!Total population!!18 years and over` ->
##   `Margin of Error!!SEX AND AGE!!Total population!!18 years and over...317`
## • `Percent Estimate!!SEX AND AGE!!Total population!!18 years and over` ->
##   `Percent Estimate!!SEX AND AGE!!Total population!!18 years and over...318`
## • `Percent Margin of Error!!SEX AND AGE!!Total population!!18 years and over`
##   -> `Percent Margin of Error!!SEX AND AGE!!Total population!!18 years and
##   over...319`
## • `Estimate!!SEX AND AGE!!Total population!!65 years and over` ->
##   `Estimate!!SEX AND AGE!!Total population!!65 years and over...328`
## • `Margin of Error!!SEX AND AGE!!Total population!!65 years and over` ->
##   `Margin of Error!!SEX AND AGE!!Total population!!65 years and over...329`
## • `Percent Estimate!!SEX AND AGE!!Total population!!65 years and over` ->
##   `Percent Estimate!!SEX AND AGE!!Total population!!65 years and over...330`
## • `Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and over`
##   -> `Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and
##   over...331`
## • `Estimate!!SEX AND AGE!!Total population!!18 years and over` ->
##   `Estimate!!SEX AND AGE!!Total population!!18 years and over...332`
## • `Margin of Error!!SEX AND AGE!!Total population!!18 years and over` ->
##   `Margin of Error!!SEX AND AGE!!Total population!!18 years and over...333`
## • `Percent Estimate!!SEX AND AGE!!Total population!!18 years and over` ->
##   `Percent Estimate!!SEX AND AGE!!Total population!!18 years and over...334`
## • `Percent Margin of Error!!SEX AND AGE!!Total population!!18 years and over`
##   -> `Percent Margin of Error!!SEX AND AGE!!Total population!!18 years and
##   over...335`
## • `Estimate!!SEX AND AGE!!Total population!!65 years and over` ->
##   `Estimate!!SEX AND AGE!!Total population!!65 years and over...348`
## • `Margin of Error!!SEX AND AGE!!Total population!!65 years and over` ->
##   `Margin of Error!!SEX AND AGE!!Total population!!65 years and over...349`
## • `Percent Estimate!!SEX AND AGE!!Total population!!65 years and over` ->
##   `Percent Estimate!!SEX AND AGE!!Total population!!65 years and over...350`
## • `Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and over`
##   -> `Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and
##   over...351`
head(acs_data)
## # A tibble: 6 × 358
##   id        `Geographic Area Name` Percent Margin of Er…¹ Estimate!!SEX AND AG…²
##   <chr>     <chr>                  <chr>                  <chr>                 
## 1 1400000U… Census Tract 1, Bronx… 50.4                   200.0                 
## 2 1400000U… Census Tract 2, Bronx… 9.5                    95.1                  
## 3 1400000U… Census Tract 4, Bronx… 8.8                    91.4                  
## 4 1400000U… Census Tract 16, Bron… 8.9                    42.5                  
## 5 1400000U… Census Tract 19, Bron… 25.5                   245.5                 
## 6 1400000U… Census Tract 20, Bron… 10.9                   35.9                  
## # ℹ abbreviated names:
## #   ¹​`Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and over!!Female`,
## #   ²​`Estimate!!SEX AND AGE!!Total population!!65 years and over!!Sex ratio (males per 100 females)`
## # ℹ 354 more variables:
## #   `Margin of Error!!SEX AND AGE!!Total population!!65 years and over!!Sex ratio (males per 100 females)` <chr>,
## #   `Percent Estimate!!SEX AND AGE!!Total population!!65 years and over!!Sex ratio (males per 100 females)` <chr>,
## #   `Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and over!!Sex ratio (males per 100 females)` <chr>, …
acs_data <- acs_data %>% 
  mutate(GEOID_clean = str_sub(id, -11, -1))
head(acs_data$GEOID_clean)
## [1] "36005000100" "36005000200" "36005000400" "36005001600" "36005001900"
## [6] "36005002000"
census_tracts <- census_tracts %>%
  mutate(
    countyFIPS = case_when(
      boro_code == 1 ~ "061",  # Manhattan
      boro_code == 2 ~ "005",  # Bronx
      boro_code == 3 ~ "047",  # Brooklyn
      boro_code == 4 ~ "081",  # Queens
      boro_code == 5 ~ "085",  # Staten Island
      TRUE ~ NA_character_
    ),
    fullGEOID = paste0("36", countyFIPS, ct2010)
  )
head(census_tracts[, c("ct2010", "countyFIPS", "fullGEOID")])
## Simple feature collection with 6 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.08721 ymin: 40.63981 xmax: -73.96433 ymax: 40.76364
## Geodetic CRS:  WGS84(DD)
##   ct2010 countyFIPS   fullGEOID                       geometry
## 1 000900        085 36085000900 MULTIPOLYGON (((-74.07921 4...
## 2 009800        061 36061009800 MULTIPOLYGON (((-73.96433 4...
## 3 010000        061 36061010000 MULTIPOLYGON (((-73.96802 4...
## 4 010200        061 36061010200 MULTIPOLYGON (((-73.97124 4...
## 5 010400        061 36061010400 MULTIPOLYGON (((-73.97446 4...
## 6 011300        061 36061011300 MULTIPOLYGON (((-73.98412 4...
census_tracts_acs <- census_tracts %>% 
  left_join(acs_data, by = c("fullGEOID" = "GEOID_clean"))
head(census_tracts_acs[, c("fullGEOID", "ct2010", "Estimate!!RACE!!Total population", 
                           "Estimate!!SEX AND AGE!!Total population!!65 years and over...348")])
## Simple feature collection with 6 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.08721 ymin: 40.63981 xmax: -73.96433 ymax: 40.76364
## Geodetic CRS:  WGS84(DD)
##     fullGEOID ct2010 Estimate!!RACE!!Total population
## 1 36085000900 000900                             1826
## 2 36061009800 009800                             7200
## 3 36061010000 010000                             1768
## 4 36061010200 010200                              100
## 5 36061010400 010400                              919
## 6 36061011300 011300                              110
##   Estimate!!SEX AND AGE!!Total population!!65 years and over...348
## 1                                                              173
## 2                                                             1624
## 3                                                              211
## 4                                                               24
## 5                                                              263
## 6                                                                8
##                         geometry
## 1 MULTIPOLYGON (((-74.07921 4...
## 2 MULTIPOLYGON (((-73.96433 4...
## 3 MULTIPOLYGON (((-73.96802 4...
## 4 MULTIPOLYGON (((-73.97124 4...
## 5 MULTIPOLYGON (((-73.97446 4...
## 6 MULTIPOLYGON (((-73.98412 4...
nyc_postal_4326 <- st_transform(nyc_postal, 4326)
census_tracts_acs_aligned <- st_transform(census_tracts_acs, st_crs(nyc_postal_4326))

tracts_in_zip <- st_join(census_tracts_acs_aligned, nyc_postal_4326, join = st_intersects)
summary(tracts_in_zip$ZIPCODE)
##    Length     Class      Mode 
##      4021 character character
tracts_in_zip <- tracts_in_zip %>% 
  mutate(
    total_pop = as.numeric(`Estimate!!RACE!!Total population`),
    elderly_pop = as.numeric(`Estimate!!SEX AND AGE!!Total population!!65 years and over...348`)
  )
summary(tracts_in_zip$total_pop)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0    2345    3586    3926    5102   28272
summary(tracts_in_zip$elderly_pop)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0   276.0   443.0   551.3   689.0  7634.0
acs_by_zip <- tracts_in_zip %>%
  group_by(ZIPCODE) %>%
  summarize(
    total_population = sum(total_pop, na.rm = TRUE),
    elderly_population = sum(elderly_pop, na.rm = TRUE)
  )
print(acs_by_zip)
## Simple feature collection with 249 features and 3 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS 84
## # A tibble: 249 × 4
##    ZIPCODE total_population elderly_population                          geometry
##    <chr>              <dbl>              <dbl>                    <GEOMETRY [°]>
##  1 00083             141819              31179 POLYGON ((-73.95817 40.80058, -7…
##  2 10001              53108               6843 POLYGON ((-73.99329 40.74223, -7…
##  3 10002             145548              28792 MULTIPOLYGON (((-73.99353 40.721…
##  4 10003              95677              12213 POLYGON ((-73.99677 40.72543, -7…
##  5 10004              15179                327 MULTIPOLYGON (((-73.9975 40.6996…
##  6 10005              23105                514 MULTIPOLYGON (((-74.0007 40.6943…
##  7 10006              23105                514 MULTIPOLYGON (((-74.0007 40.6943…
##  8 10007              53423               5888 MULTIPOLYGON (((-73.9948 40.7032…
##  9 10009             117245              16686 MULTIPOLYGON (((-73.98718 40.733…
## 10 10010              86369              12220 MULTIPOLYGON (((-73.96135 40.730…
## # ℹ 239 more rows
nyc_zip_final <- nyc_postal %>%
  left_join(covid_data, by = "ZIPCODE") %>%       
  left_join(st_set_geometry(retail_by_zip, NULL), by = "ZIPCODE") %>%      # Retail store counts
  left_join(st_set_geometry(health_by_zip, NULL), by = "ZIPCODE") %>%     
  left_join(st_set_geometry(acs_by_zip, NULL), by = "ZIPCODE")        
print(nyc_zip_final)
## Simple feature collection with 263 features and 19 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
##    ZIPCODE BLDGZIP  PO_NAME POPULATION     AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1    11436       0  Jamaica      18681 22699295    NY Queens      36      081
## 2    11213       0 Brooklyn      62426 29631004    NY  Kings      36      047
## 3    11212       0 Brooklyn      83866 41972104    NY  Kings      36      047
## 4    11225       0 Brooklyn      56527 23698630    NY  Kings      36      047
## 5    11218       0 Brooklyn      72280 36868799    NY  Kings      36      047
## 6    11226       0 Brooklyn     106132 39408598    NY  Kings      36      047
## 7    11219       0 Brooklyn      92561 42002738    NY  Kings      36      047
## 8    11210       0 Brooklyn      67067 47887023    NY  Kings      36      047
## 9    11230       0 Brooklyn      80857 49926703    NY  Kings      36      047
## 10   11204       0 Brooklyn      77354 43555185    NY  Kings      36      047
##                     URL SHAPE_AREA SHAPE_LEN Positive Total zcta_cum.perc_pos
## 1  http://www.usps.com/          0         0      269   412             65.29
## 2  http://www.usps.com/          0         0      793  1296             61.19
## 3  http://www.usps.com/          0         0      842  1302             64.67
## 4  http://www.usps.com/          0         0      632  1001             63.14
## 5  http://www.usps.com/          0         0      976  1606             60.77
## 6  http://www.usps.com/          0         0      995  1527             65.16
## 7  http://www.usps.com/          0         0     1679  2476             67.81
## 8  http://www.usps.com/          0         0      898  1427             62.93
## 9  http://www.usps.com/          0         0     1301  2091             62.22
## 10 http://www.usps.com/          0         0     1110  1791             61.98
##    num_retail num_health total_population elderly_population
## 1           3          2            53493               6330
## 2         118          9           122924              14917
## 3         187          9           117274              15428
## 4         103          4           100168              12478
## 5         135         10           110315              12753
## 6         236         13           160919              19368
## 7         176         15           144061              17283
## 8          73          7           116134              17179
## 9         128         15           143974              20794
## 10        144          4           130209              18084
##                          geometry
## 1  POLYGON ((1038098 188138.4,...
## 2  POLYGON ((1001614 186926.4,...
## 3  POLYGON ((1011174 183696.3,...
## 4  POLYGON ((995908.4 183617.6...
## 5  POLYGON ((991997.1 176307.5...
## 6  POLYGON ((994821.5 177865.7...
## 7  POLYGON ((987286.4 173946.5...
## 8  POLYGON ((995796 171110.1, ...
## 9  POLYGON ((994099.3 171240.7...
## 10 POLYGON ((989500.2 170730.2...
save(nyc_zip_final, file = "D:/Session_7/R-Spatial_I_Lab/nyc_zip_final.RData")

st_write(nyc_zip_final, "D:/Session_7/R-Spatial_I_Lab/nyc_zip_final.gpkg", delete_layer = TRUE)
## Deleting layer `nyc_zip_final' using driver `GPKG'
## Writing layer `nyc_zip_final' to data source 
##   `D:/Session_7/R-Spatial_I_Lab/nyc_zip_final.gpkg' using driver `GPKG'
## Writing 263 features with 19 fields and geometry type Polygon.
library(ggplot2)

ggplot(nyc_zip_final) +
  geom_sf(aes(fill = Positive)) +
  scale_fill_viridis_c(option = "plasma") +
  labs(title = "NYC COVID‑19 Positive Cases by Zip Code",
       subtitle = "Variation in COVID‑19 Cases Across NYC Zip Codes",
       fill = "Positive Cases") +
  theme_minimal()

my_map <- mapview(nyc_zip_final, zcol = "Positive", layer.name = "Positive Cases")
my_map
library(mapview)

mapview(nyc_zip_final, 
        zcol = "total_population", 
        legend = TRUE, 
        layer.name = "Total Population")