In this lab, we aggregate data from different sources to the zip code level because COVID-19 data are available at that scale. We will:
Load the necessary libraries. (Make sure you’ve completed Lab 1 so that your spatial objects for zip codes, health facilities, and retail stores are available.)
covid_data <- read_csv("C:/Users/vikto/OneDrive - Hunter - CUNY/GTECH785/R-Spatial/Session_8/R-Spatial_II_Lab/R-Spatial_II_Lab/tests-by-zcta_2020_04_12.csv", show_col_types = FALSE)
head(covid_data)
## # A tibble: 6 × 4
## MODZCTA Positive Total zcta_cum.perc_pos
## <dbl> <dbl> <dbl> <dbl>
## 1 NA 1934 2082 92.9
## 2 10001 211 448 47.1
## 3 10002 539 1024 52.6
## 4 10003 279 662 42.2
## 5 10004 23 59 39.0
## 6 10005 38 116 32.8
covid_data <- covid_data %>%
rename(ZIPCODE = MODZCTA) %>%
mutate(ZIPCODE = as.character(ZIPCODE))
head(covid_data)
## # A tibble: 6 × 4
## ZIPCODE Positive Total zcta_cum.perc_pos
## <chr> <dbl> <dbl> <dbl>
## 1 <NA> 1934 2082 92.9
## 2 10001 211 448 47.1
## 3 10002 539 1024 52.6
## 4 10003 279 662 42.2
## 5 10004 23 59 39.0
## 6 10005 38 116 32.8
nyc_postal <- st_read("C:/Users/vikto/OneDrive - Hunter - CUNY/GTECH785/R-Spatial/Session_7/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source
## `C:\Users\vikto\OneDrive - Hunter - CUNY\GTECH785\R-Spatial\Session_7\R-Spatial_I_Lab\ZIP_CODE_040114\ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
nyc_postal <- nyc_postal %>% mutate(ZIPCODE = as.character(ZIPCODE))
print(nyc_postal)
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
## ZIPCODE BLDGZIP PO_NAME POPULATION AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1 11436 0 Jamaica 18681 22699295 NY Queens 36 081
## 2 11213 0 Brooklyn 62426 29631004 NY Kings 36 047
## 3 11212 0 Brooklyn 83866 41972104 NY Kings 36 047
## 4 11225 0 Brooklyn 56527 23698630 NY Kings 36 047
## 5 11218 0 Brooklyn 72280 36868799 NY Kings 36 047
## 6 11226 0 Brooklyn 106132 39408598 NY Kings 36 047
## 7 11219 0 Brooklyn 92561 42002738 NY Kings 36 047
## 8 11210 0 Brooklyn 67067 47887023 NY Kings 36 047
## 9 11230 0 Brooklyn 80857 49926703 NY Kings 36 047
## 10 11204 0 Brooklyn 77354 43555185 NY Kings 36 047
## URL SHAPE_AREA SHAPE_LEN geometry
## 1 http://www.usps.com/ 0 0 POLYGON ((1038098 188138.4,...
## 2 http://www.usps.com/ 0 0 POLYGON ((1001614 186926.4,...
## 3 http://www.usps.com/ 0 0 POLYGON ((1011174 183696.3,...
## 4 http://www.usps.com/ 0 0 POLYGON ((995908.4 183617.6...
## 5 http://www.usps.com/ 0 0 POLYGON ((991997.1 176307.5...
## 6 http://www.usps.com/ 0 0 POLYGON ((994821.5 177865.7...
## 7 http://www.usps.com/ 0 0 POLYGON ((987286.4 173946.5...
## 8 http://www.usps.com/ 0 0 POLYGON ((995796 171110.1, ...
## 9 http://www.usps.com/ 0 0 POLYGON ((994099.3 171240.7...
## 10 http://www.usps.com/ 0 0 POLYGON ((989500.2 170730.2...
nyc_postal_covid <- nyc_postal %>%
left_join(covid_data, by = "ZIPCODE")
print(nyc_postal_covid)
## Simple feature collection with 263 features and 15 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
## ZIPCODE BLDGZIP PO_NAME POPULATION AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1 11436 0 Jamaica 18681 22699295 NY Queens 36 081
## 2 11213 0 Brooklyn 62426 29631004 NY Kings 36 047
## 3 11212 0 Brooklyn 83866 41972104 NY Kings 36 047
## 4 11225 0 Brooklyn 56527 23698630 NY Kings 36 047
## 5 11218 0 Brooklyn 72280 36868799 NY Kings 36 047
## 6 11226 0 Brooklyn 106132 39408598 NY Kings 36 047
## 7 11219 0 Brooklyn 92561 42002738 NY Kings 36 047
## 8 11210 0 Brooklyn 67067 47887023 NY Kings 36 047
## 9 11230 0 Brooklyn 80857 49926703 NY Kings 36 047
## 10 11204 0 Brooklyn 77354 43555185 NY Kings 36 047
## URL SHAPE_AREA SHAPE_LEN Positive Total zcta_cum.perc_pos
## 1 http://www.usps.com/ 0 0 269 412 65.29
## 2 http://www.usps.com/ 0 0 793 1296 61.19
## 3 http://www.usps.com/ 0 0 842 1302 64.67
## 4 http://www.usps.com/ 0 0 632 1001 63.14
## 5 http://www.usps.com/ 0 0 976 1606 60.77
## 6 http://www.usps.com/ 0 0 995 1527 65.16
## 7 http://www.usps.com/ 0 0 1679 2476 67.81
## 8 http://www.usps.com/ 0 0 898 1427 62.93
## 9 http://www.usps.com/ 0 0 1301 2091 62.22
## 10 http://www.usps.com/ 0 0 1110 1791 61.98
## geometry
## 1 POLYGON ((1038098 188138.4,...
## 2 POLYGON ((1001614 186926.4,...
## 3 POLYGON ((1011174 183696.3,...
## 4 POLYGON ((995908.4 183617.6...
## 5 POLYGON ((991997.1 176307.5...
## 6 POLYGON ((994821.5 177865.7...
## 7 POLYGON ((987286.4 173946.5...
## 8 POLYGON ((995796 171110.1, ...
## 9 POLYGON ((994099.3 171240.7...
## 10 POLYGON ((989500.2 170730.2...
retail_data <- read_csv("D:/Session_7/R-Spatial_I_Lab/NYS_Retail_Food_Stores.csv", show_col_types = FALSE)
head(retail_data)
## # A tibble: 6 × 15
## County `License Number` `Operation Type` `Establishment Type` `Entity Name`
## <chr> <chr> <chr> <chr> <chr>
## 1 Albany 733149 Store A SPEEDWAY LLC
## 2 Albany 704590 Store JAC 1250 SELKIRK INC
## 3 Albany 727909 Store JAC RED-KAP SALES I…
## 4 Albany 720557 Store JAC SAEED SADIQ, SA…
## 5 Albany 015890 Store A AZIZ MOHAMMAD S
## 6 Albany 735254 Store JAC 7-ELEVEN INC
## # ℹ 10 more variables: `DBA Name` <chr>, `Street Number` <chr>,
## # `Street Name` <chr>, `Address Line 2` <lgl>, `Address Line 3` <lgl>,
## # City <chr>, State <chr>, `Zip Code` <dbl>, `Square Footage` <dbl>,
## # Location <chr>
retail_data <- retail_data %>%
mutate(Coords = str_extract(Location, "\\(.*\\)"),
Coords = str_remove_all(Coords, "[\\(\\)]")) %>%
separate(Coords, into = c("Lat", "Long"), sep = ",\\s*") %>%
mutate(Lat = as.numeric(Lat), Long = as.numeric(Long))
retail_data_clean <- retail_data %>% drop_na(Lat, Long)
retail_sf <- st_as_sf(retail_data_clean, coords = c("Long", "Lat"), crs = 4326)
print(retail_sf)
## Simple feature collection with 23974 features and 15 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -79.75953 ymin: 40.50782 xmax: -71.93873 ymax: 44.99484
## Geodetic CRS: WGS 84
## # A tibble: 23,974 × 16
## County `License Number` `Operation Type` `Establishment Type` `Entity Name`
## * <chr> <chr> <chr> <chr> <chr>
## 1 Albany 733149 Store A SPEEDWAY LLC
## 2 Albany 704590 Store JAC 1250 SELKIRK I…
## 3 Albany 727909 Store JAC RED-KAP SALES …
## 4 Albany 720557 Store JAC SAEED SADIQ, S…
## 5 Albany 015890 Store A AZIZ MOHAMMAD S
## 6 Albany 735254 Store JAC 7-ELEVEN INC
## 7 Albany 708848 Store JAC ADVANCED FRESH…
## 8 Albany 713889 Store JAC ADVANCED FRESH…
## 9 Albany 715759 Store JAC ADVANCED FRESH…
## 10 Albany 723927 Store JAC ADVANCED FRESH…
## # ℹ 23,964 more rows
## # ℹ 11 more variables: `DBA Name` <chr>, `Street Number` <chr>,
## # `Street Name` <chr>, `Address Line 2` <lgl>, `Address Line 3` <lgl>,
## # City <chr>, State <chr>, `Zip Code` <dbl>, `Square Footage` <dbl>,
## # Location <chr>, geometry <POINT [°]>
nyc_postal_4326 <- st_transform(nyc_postal, 4326)
retail_join <- st_join(retail_sf, nyc_postal_4326, join = st_intersects)
retail_by_zip <- retail_join %>%
group_by(ZIPCODE) %>%
summarize(num_retail = n())
print(retail_by_zip)
## Simple feature collection with 184 features and 2 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -79.75953 ymin: 40.50782 xmax: -71.93873 ymax: 44.99484
## Geodetic CRS: WGS 84
## # A tibble: 184 × 3
## ZIPCODE num_retail geometry
## <chr> <int> <MULTIPOINT [°]>
## 1 10001 54 ((-73.99079 40.74566), (-73.99128 40.74498), (-73.98799 4…
## 2 10002 199 ((-73.98288 40.7211), (-73.98293 40.71977), (-73.98288 40…
## 3 10003 94 ((-73.98276 40.73535), (-73.98062 40.734), (-73.98038 40.…
## 4 10004 10 ((-74.01357 40.70553), (-74.0158 40.70494), (-74.01267 40…
## 5 10005 9 ((-74.00718 40.70498), (-74.00774 40.70621), (-74.00858 4…
## 6 10006 5 ((-74.01311 40.70753), (-74.01316 40.70783), (-74.013 40.…
## 7 10007 20 ((-74.00739 40.71287), (-74.00897 40.71313), (-74.00823 4…
## 8 10009 84 ((-73.9769 40.72673), (-73.9768 40.72687), (-73.97664 40.…
## 9 10010 48 ((-73.98189 40.74054), (-73.98206 40.74031), (-73.9825 40…
## 10 10011 112 ((-74.00028 40.73263), (-74.00089 40.7369), (-74.00131 40…
## # ℹ 174 more rows
health_data <- read_csv("D:/Session_7/R-Spatial_I_Lab/NYS_Health_Facility.csv", show_col_types = FALSE)
head(health_data)
## # A tibble: 6 × 36
## `Facility ID` `Facility Name` `Short Description` Description
## <dbl> <chr> <chr> <chr>
## 1 204 Hospice at Lourdes HSPC Hospice
## 2 620 Charles T Sitrin Health Care Ce… NH Residentia…
## 3 654 Central Park Rehabilitation and… NH Residentia…
## 4 1156 East Side Nursing Home NH Residentia…
## 5 2589 Wellsville Manor Care Center NH Residentia…
## 6 3455 Harris Hill Nursing Facility, L… NH Residentia…
## # ℹ 32 more variables: `Facility Open Date` <chr>, `Facility Address 1` <chr>,
## # `Facility Address 2` <chr>, `Facility City` <chr>, `Facility State` <chr>,
## # `Facility Zip Code` <chr>, `Facility Phone Number` <dbl>,
## # `Facility Fax Number` <dbl>, `Facility Website` <chr>,
## # `Facility County Code` <dbl>, `Facility County` <chr>,
## # `Regional Office ID` <dbl>, `Regional Office` <chr>,
## # `Main Site Name` <chr>, `Main Site Facility ID` <dbl>, …
health_data_clean <- health_data %>%
drop_na(`Facility Longitude`, `Facility Latitude`)
health_sf <- st_as_sf(health_data_clean,
coords = c("Facility Longitude", "Facility Latitude"),
crs = 4326)
print(health_sf)
## Simple feature collection with 3848 features and 34 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -79.6299 ymin: -75.45935 xmax: 43.21162 ymax: 44.97849
## Geodetic CRS: WGS 84
## # A tibble: 3,848 × 35
## `Facility ID` `Facility Name` `Short Description` Description
## * <dbl> <chr> <chr> <chr>
## 1 204 Hospice at Lourdes HSPC Hospice
## 2 620 Charles T Sitrin Health Care C… NH Residentia…
## 3 1156 East Side Nursing Home NH Residentia…
## 4 2589 Wellsville Manor Care Center NH Residentia…
## 5 3455 Harris Hill Nursing Facility, … NH Residentia…
## 6 3853 Garden City Surgi Center DTC Diagnostic…
## 7 4249 Willcare CHHA Certified …
## 8 4473 Good Shepherd Hospice HSPC Hospice
## 9 6230 NYU Langone Rutherford HOSP-EC Hospital E…
## 10 6482 Endoscopy Center of Long Islan… DTC Diagnostic…
## # ℹ 3,838 more rows
## # ℹ 31 more variables: `Facility Open Date` <chr>, `Facility Address 1` <chr>,
## # `Facility Address 2` <chr>, `Facility City` <chr>, `Facility State` <chr>,
## # `Facility Zip Code` <chr>, `Facility Phone Number` <dbl>,
## # `Facility Fax Number` <dbl>, `Facility Website` <chr>,
## # `Facility County Code` <dbl>, `Facility County` <chr>,
## # `Regional Office ID` <dbl>, `Regional Office` <chr>, …
health_join <- st_join(health_sf, nyc_postal_4326, join = st_intersects)
health_by_zip <- health_join %>%
group_by(ZIPCODE) %>%
summarize(num_health = n())
print(health_by_zip)
## Simple feature collection with 160 features and 2 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -79.6299 ymin: -75.45935 xmax: 43.21162 ymax: 44.97849
## Geodetic CRS: WGS 84
## # A tibble: 160 × 3
## ZIPCODE num_health geometry
## <chr> <int> <GEOMETRY [°]>
## 1 10001 11 MULTIPOINT ((-73.99438 40.74568), (-73.99186 40.74573), (…
## 2 10002 20 MULTIPOINT ((-73.97523 40.71873), (-73.97976 40.71812), (…
## 3 10003 15 MULTIPOINT ((-73.9816 40.73267), (-73.99121 40.72608), (-…
## 4 10004 1 POINT (-74.01255 40.70661)
## 5 10006 1 POINT (-74.01294 40.70633)
## 6 10007 1 POINT (-74.01108 40.71315)
## 7 10009 7 MULTIPOINT ((-73.97613 40.72456), (-73.9792 40.72897), (-…
## 8 10010 13 MULTIPOINT ((-73.98011 40.73908), (-73.98014 40.73904), (…
## 9 10011 11 MULTIPOINT ((-74.00049 40.73758), (-73.99702 40.73697), (…
## 10 10012 6 MULTIPOINT ((-73.99354 40.72552), (-73.99577 40.72169), (…
## # ℹ 150 more rows
census_tracts <- st_read("D:/Session_8/R-Spatial_II_Lab/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp")
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `D:\Session_8\R-Spatial_II_Lab\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
print(census_tracts)
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
## First 10 features:
## boro_code boro_ct201 boro_name cdeligibil ct2010 ctlabel ntacode
## 1 5 5000900 Staten Island E 000900 9 SI22
## 2 1 1009800 Manhattan I 009800 98 MN19
## 3 1 1010000 Manhattan I 010000 100 MN19
## 4 1 1010200 Manhattan I 010200 102 MN17
## 5 1 1010400 Manhattan I 010400 104 MN17
## 6 1 1011300 Manhattan I 011300 113 MN17
## 7 1 1011402 Manhattan I 011402 114.02 MN40
## 8 1 1013000 Manhattan I 013000 130 MN40
## 9 1 1014000 Manhattan I 014000 140 MN40
## 10 1 1014801 Manhattan I 014801 148.01 MN40
## ntaname puma shape_area shape_leng
## 1 West New Brighton-New Brighton-St. George 3903 2497009.7 7729.017
## 2 Turtle Bay-East Midtown 3808 1906016.4 5534.200
## 3 Turtle Bay-East Midtown 3808 1860938.4 5692.169
## 4 Midtown-Midtown South 3807 1860992.7 5687.802
## 5 Midtown-Midtown South 3807 1864600.4 5693.036
## 6 Midtown-Midtown South 3807 1890907.3 5699.861
## 7 Upper East Side-Carnegie Hill 3805 1063547.4 4125.256
## 8 Upper East Side-Carnegie Hill 3805 1918144.5 5807.973
## 9 Upper East Side-Carnegie Hill 3805 1925984.2 5820.816
## 10 Upper East Side-Carnegie Hill 3805 559216.2 3135.951
## geometry
## 1 MULTIPOLYGON (((-74.07921 4...
## 2 MULTIPOLYGON (((-73.96433 4...
## 3 MULTIPOLYGON (((-73.96802 4...
## 4 MULTIPOLYGON (((-73.97124 4...
## 5 MULTIPOLYGON (((-73.97446 4...
## 6 MULTIPOLYGON (((-73.98412 4...
## 7 MULTIPOLYGON (((-73.96476 4...
## 8 MULTIPOLYGON (((-73.96148 4...
## 9 MULTIPOLYGON (((-73.95495 4...
## 10 MULTIPOLYGON (((-73.95398 4...
acs_data <- read_csv("D:/Session_8/R-Spatial_II_Lab/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv",
skip = 1, show_col_types = FALSE)
## New names:
## • `Estimate!!RACE!!Total population!!One race` -> `Estimate!!RACE!!Total
## population!!One race...12`
## • `Margin of Error!!RACE!!Total population!!One race` -> `Margin of
## Error!!RACE!!Total population!!One race...13`
## • `Percent Estimate!!RACE!!Total population!!One race` -> `Percent
## Estimate!!RACE!!Total population!!One race...14`
## • `Percent Margin of Error!!RACE!!Total population!!One race` -> `Percent
## Margin of Error!!RACE!!Total population!!One race...15`
## • `Estimate!!RACE!!Total population!!Two or more races` ->
## `Estimate!!RACE!!Total population!!Two or more races...16`
## • `Margin of Error!!RACE!!Total population!!Two or more races` -> `Margin of
## Error!!RACE!!Total population!!Two or more races...17`
## • `Percent Estimate!!RACE!!Total population!!Two or more races` -> `Percent
## Estimate!!RACE!!Total population!!Two or more races...18`
## • `Percent Margin of Error!!RACE!!Total population!!Two or more races` ->
## `Percent Margin of Error!!RACE!!Total population!!Two or more races...19`
## • `Estimate!!RACE!!Total population!!One race` -> `Estimate!!RACE!!Total
## population!!One race...20`
## • `Margin of Error!!RACE!!Total population!!One race` -> `Margin of
## Error!!RACE!!Total population!!One race...21`
## • `Percent Estimate!!RACE!!Total population!!One race` -> `Percent
## Estimate!!RACE!!Total population!!One race...22`
## • `Percent Margin of Error!!RACE!!Total population!!One race` -> `Percent
## Margin of Error!!RACE!!Total population!!One race...23`
## • `Estimate!!RACE!!Total population!!Two or more races` ->
## `Estimate!!RACE!!Total population!!Two or more races...108`
## • `Margin of Error!!RACE!!Total population!!Two or more races` -> `Margin of
## Error!!RACE!!Total population!!Two or more races...109`
## • `Percent Estimate!!RACE!!Total population!!Two or more races` -> `Percent
## Estimate!!RACE!!Total population!!Two or more races...110`
## • `Percent Margin of Error!!RACE!!Total population!!Two or more races` ->
## `Percent Margin of Error!!RACE!!Total population!!Two or more races...111`
## • `Estimate!!SEX AND AGE!!Total population!!18 years and over` ->
## `Estimate!!SEX AND AGE!!Total population!!18 years and over...316`
## • `Margin of Error!!SEX AND AGE!!Total population!!18 years and over` ->
## `Margin of Error!!SEX AND AGE!!Total population!!18 years and over...317`
## • `Percent Estimate!!SEX AND AGE!!Total population!!18 years and over` ->
## `Percent Estimate!!SEX AND AGE!!Total population!!18 years and over...318`
## • `Percent Margin of Error!!SEX AND AGE!!Total population!!18 years and over`
## -> `Percent Margin of Error!!SEX AND AGE!!Total population!!18 years and
## over...319`
## • `Estimate!!SEX AND AGE!!Total population!!65 years and over` ->
## `Estimate!!SEX AND AGE!!Total population!!65 years and over...328`
## • `Margin of Error!!SEX AND AGE!!Total population!!65 years and over` ->
## `Margin of Error!!SEX AND AGE!!Total population!!65 years and over...329`
## • `Percent Estimate!!SEX AND AGE!!Total population!!65 years and over` ->
## `Percent Estimate!!SEX AND AGE!!Total population!!65 years and over...330`
## • `Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and over`
## -> `Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and
## over...331`
## • `Estimate!!SEX AND AGE!!Total population!!18 years and over` ->
## `Estimate!!SEX AND AGE!!Total population!!18 years and over...332`
## • `Margin of Error!!SEX AND AGE!!Total population!!18 years and over` ->
## `Margin of Error!!SEX AND AGE!!Total population!!18 years and over...333`
## • `Percent Estimate!!SEX AND AGE!!Total population!!18 years and over` ->
## `Percent Estimate!!SEX AND AGE!!Total population!!18 years and over...334`
## • `Percent Margin of Error!!SEX AND AGE!!Total population!!18 years and over`
## -> `Percent Margin of Error!!SEX AND AGE!!Total population!!18 years and
## over...335`
## • `Estimate!!SEX AND AGE!!Total population!!65 years and over` ->
## `Estimate!!SEX AND AGE!!Total population!!65 years and over...348`
## • `Margin of Error!!SEX AND AGE!!Total population!!65 years and over` ->
## `Margin of Error!!SEX AND AGE!!Total population!!65 years and over...349`
## • `Percent Estimate!!SEX AND AGE!!Total population!!65 years and over` ->
## `Percent Estimate!!SEX AND AGE!!Total population!!65 years and over...350`
## • `Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and over`
## -> `Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and
## over...351`
head(acs_data)
## # A tibble: 6 × 358
## id `Geographic Area Name` Percent Margin of Er…¹ Estimate!!SEX AND AG…²
## <chr> <chr> <chr> <chr>
## 1 1400000U… Census Tract 1, Bronx… 50.4 200.0
## 2 1400000U… Census Tract 2, Bronx… 9.5 95.1
## 3 1400000U… Census Tract 4, Bronx… 8.8 91.4
## 4 1400000U… Census Tract 16, Bron… 8.9 42.5
## 5 1400000U… Census Tract 19, Bron… 25.5 245.5
## 6 1400000U… Census Tract 20, Bron… 10.9 35.9
## # ℹ abbreviated names:
## # ¹​`Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and over!!Female`,
## # ²​`Estimate!!SEX AND AGE!!Total population!!65 years and over!!Sex ratio (males per 100 females)`
## # ℹ 354 more variables:
## # `Margin of Error!!SEX AND AGE!!Total population!!65 years and over!!Sex ratio (males per 100 females)` <chr>,
## # `Percent Estimate!!SEX AND AGE!!Total population!!65 years and over!!Sex ratio (males per 100 females)` <chr>,
## # `Percent Margin of Error!!SEX AND AGE!!Total population!!65 years and over!!Sex ratio (males per 100 females)` <chr>, …
acs_data <- acs_data %>%
mutate(GEOID_clean = str_sub(id, -11, -1))
head(acs_data$GEOID_clean)
## [1] "36005000100" "36005000200" "36005000400" "36005001600" "36005001900"
## [6] "36005002000"
census_tracts <- census_tracts %>%
mutate(
countyFIPS = case_when(
boro_code == 1 ~ "061", # Manhattan
boro_code == 2 ~ "005", # Bronx
boro_code == 3 ~ "047", # Brooklyn
boro_code == 4 ~ "081", # Queens
boro_code == 5 ~ "085", # Staten Island
TRUE ~ NA_character_
),
fullGEOID = paste0("36", countyFIPS, ct2010)
)
head(census_tracts[, c("ct2010", "countyFIPS", "fullGEOID")])
## Simple feature collection with 6 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.08721 ymin: 40.63981 xmax: -73.96433 ymax: 40.76364
## Geodetic CRS: WGS84(DD)
## ct2010 countyFIPS fullGEOID geometry
## 1 000900 085 36085000900 MULTIPOLYGON (((-74.07921 4...
## 2 009800 061 36061009800 MULTIPOLYGON (((-73.96433 4...
## 3 010000 061 36061010000 MULTIPOLYGON (((-73.96802 4...
## 4 010200 061 36061010200 MULTIPOLYGON (((-73.97124 4...
## 5 010400 061 36061010400 MULTIPOLYGON (((-73.97446 4...
## 6 011300 061 36061011300 MULTIPOLYGON (((-73.98412 4...
census_tracts_acs <- census_tracts %>%
left_join(acs_data, by = c("fullGEOID" = "GEOID_clean"))
head(census_tracts_acs[, c("fullGEOID", "ct2010", "Estimate!!RACE!!Total population",
"Estimate!!SEX AND AGE!!Total population!!65 years and over...348")])
## Simple feature collection with 6 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.08721 ymin: 40.63981 xmax: -73.96433 ymax: 40.76364
## Geodetic CRS: WGS84(DD)
## fullGEOID ct2010 Estimate!!RACE!!Total population
## 1 36085000900 000900 1826
## 2 36061009800 009800 7200
## 3 36061010000 010000 1768
## 4 36061010200 010200 100
## 5 36061010400 010400 919
## 6 36061011300 011300 110
## Estimate!!SEX AND AGE!!Total population!!65 years and over...348
## 1 173
## 2 1624
## 3 211
## 4 24
## 5 263
## 6 8
## geometry
## 1 MULTIPOLYGON (((-74.07921 4...
## 2 MULTIPOLYGON (((-73.96433 4...
## 3 MULTIPOLYGON (((-73.96802 4...
## 4 MULTIPOLYGON (((-73.97124 4...
## 5 MULTIPOLYGON (((-73.97446 4...
## 6 MULTIPOLYGON (((-73.98412 4...
nyc_postal_4326 <- st_transform(nyc_postal, 4326)
census_tracts_acs_aligned <- st_transform(census_tracts_acs, st_crs(nyc_postal_4326))
tracts_in_zip <- st_join(census_tracts_acs_aligned, nyc_postal_4326, join = st_intersects)
summary(tracts_in_zip$ZIPCODE)
## Length Class Mode
## 4021 character character
tracts_in_zip <- tracts_in_zip %>%
mutate(
total_pop = as.numeric(`Estimate!!RACE!!Total population`),
elderly_pop = as.numeric(`Estimate!!SEX AND AGE!!Total population!!65 years and over...348`)
)
summary(tracts_in_zip$total_pop)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 2345 3586 3926 5102 28272
summary(tracts_in_zip$elderly_pop)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 276.0 443.0 551.3 689.0 7634.0
acs_by_zip <- tracts_in_zip %>%
group_by(ZIPCODE) %>%
summarize(
total_population = sum(total_pop, na.rm = TRUE),
elderly_population = sum(elderly_pop, na.rm = TRUE)
)
print(acs_by_zip)
## Simple feature collection with 249 features and 3 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS 84
## # A tibble: 249 × 4
## ZIPCODE total_population elderly_population geometry
## <chr> <dbl> <dbl> <GEOMETRY [°]>
## 1 00083 141819 31179 POLYGON ((-73.95817 40.80058, -7…
## 2 10001 53108 6843 POLYGON ((-73.99329 40.74223, -7…
## 3 10002 145548 28792 MULTIPOLYGON (((-73.99353 40.721…
## 4 10003 95677 12213 POLYGON ((-73.99677 40.72543, -7…
## 5 10004 15179 327 MULTIPOLYGON (((-73.9975 40.6996…
## 6 10005 23105 514 MULTIPOLYGON (((-74.0007 40.6943…
## 7 10006 23105 514 MULTIPOLYGON (((-74.0007 40.6943…
## 8 10007 53423 5888 MULTIPOLYGON (((-73.9948 40.7032…
## 9 10009 117245 16686 MULTIPOLYGON (((-73.98718 40.733…
## 10 10010 86369 12220 MULTIPOLYGON (((-73.96135 40.730…
## # ℹ 239 more rows
nyc_zip_final <- nyc_postal %>%
left_join(covid_data, by = "ZIPCODE") %>%
left_join(st_set_geometry(retail_by_zip, NULL), by = "ZIPCODE") %>% # Retail store counts
left_join(st_set_geometry(health_by_zip, NULL), by = "ZIPCODE") %>%
left_join(st_set_geometry(acs_by_zip, NULL), by = "ZIPCODE")
print(nyc_zip_final)
## Simple feature collection with 263 features and 19 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
## ZIPCODE BLDGZIP PO_NAME POPULATION AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1 11436 0 Jamaica 18681 22699295 NY Queens 36 081
## 2 11213 0 Brooklyn 62426 29631004 NY Kings 36 047
## 3 11212 0 Brooklyn 83866 41972104 NY Kings 36 047
## 4 11225 0 Brooklyn 56527 23698630 NY Kings 36 047
## 5 11218 0 Brooklyn 72280 36868799 NY Kings 36 047
## 6 11226 0 Brooklyn 106132 39408598 NY Kings 36 047
## 7 11219 0 Brooklyn 92561 42002738 NY Kings 36 047
## 8 11210 0 Brooklyn 67067 47887023 NY Kings 36 047
## 9 11230 0 Brooklyn 80857 49926703 NY Kings 36 047
## 10 11204 0 Brooklyn 77354 43555185 NY Kings 36 047
## URL SHAPE_AREA SHAPE_LEN Positive Total zcta_cum.perc_pos
## 1 http://www.usps.com/ 0 0 269 412 65.29
## 2 http://www.usps.com/ 0 0 793 1296 61.19
## 3 http://www.usps.com/ 0 0 842 1302 64.67
## 4 http://www.usps.com/ 0 0 632 1001 63.14
## 5 http://www.usps.com/ 0 0 976 1606 60.77
## 6 http://www.usps.com/ 0 0 995 1527 65.16
## 7 http://www.usps.com/ 0 0 1679 2476 67.81
## 8 http://www.usps.com/ 0 0 898 1427 62.93
## 9 http://www.usps.com/ 0 0 1301 2091 62.22
## 10 http://www.usps.com/ 0 0 1110 1791 61.98
## num_retail num_health total_population elderly_population
## 1 3 2 53493 6330
## 2 118 9 122924 14917
## 3 187 9 117274 15428
## 4 103 4 100168 12478
## 5 135 10 110315 12753
## 6 236 13 160919 19368
## 7 176 15 144061 17283
## 8 73 7 116134 17179
## 9 128 15 143974 20794
## 10 144 4 130209 18084
## geometry
## 1 POLYGON ((1038098 188138.4,...
## 2 POLYGON ((1001614 186926.4,...
## 3 POLYGON ((1011174 183696.3,...
## 4 POLYGON ((995908.4 183617.6...
## 5 POLYGON ((991997.1 176307.5...
## 6 POLYGON ((994821.5 177865.7...
## 7 POLYGON ((987286.4 173946.5...
## 8 POLYGON ((995796 171110.1, ...
## 9 POLYGON ((994099.3 171240.7...
## 10 POLYGON ((989500.2 170730.2...
save(nyc_zip_final, file = "D:/Session_7/R-Spatial_I_Lab/nyc_zip_final.RData")
st_write(nyc_zip_final, "D:/Session_7/R-Spatial_I_Lab/nyc_zip_final.gpkg", delete_layer = TRUE)
## Deleting layer `nyc_zip_final' using driver `GPKG'
## Writing layer `nyc_zip_final' to data source
## `D:/Session_7/R-Spatial_I_Lab/nyc_zip_final.gpkg' using driver `GPKG'
## Writing 263 features with 19 fields and geometry type Polygon.
library(ggplot2)
ggplot(nyc_zip_final) +
geom_sf(aes(fill = Positive)) +
scale_fill_viridis_c(option = "plasma") +
labs(title = "NYC COVID‑19 Positive Cases by Zip Code",
subtitle = "Variation in COVID‑19 Cases Across NYC Zip Codes",
fill = "Positive Cases") +
theme_minimal()
my_map <- mapview(nyc_zip_final, zcol = "Positive", layer.name = "Positive Cases")
my_map
library(mapview)
mapview(nyc_zip_final,
zcol = "total_population",
legend = TRUE,
layer.name = "Total Population")