setwd("~/Desktop/R-spatial")

R Spatial Lab Assignment # 2

task 1:

Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

setwd("~/Desktop/R-spatial")
covid_4_12<-read_csv("/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-spatial_II_Lab/tests-by-zcta_2020_04_12.csv")
## Rows: 178 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): MODZCTA, Positive, Total, zcta_cum.perc_pos
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nyc_zipsf<-st_read("/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-spatial-data/R-Spatial_I_Labb/ZIP_CODE/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source 
##   `/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-spatial-data/R-Spatial_I_Labb/ZIP_CODE/ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
covid_4_12<-covid_4_12 %>%
  rename(ZIPCODE=MODZCTA)

if (is.na(st_crs(nyc_zipsf))) {
  st_crs(nyc_zipsf) <- 2263 
} else {
  nyc_zipsf <- st_transform(nyc_zipsf, 2263)
}

covid_zip<-base::merge(covid_4_12,nyc_zipsf,by.x="ZIPCODE",by.y="ZIPCODE")
covid_zip <- st_as_sf(covid_zip)

task 2:

Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

load("/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-spatial-data/R-Spatial_I_Labb/nysFoodStoreSF.RData")

nysFoodStoreSF<-nysFoodStoreSF %>%
  rename(ZIPCODE=Zip.Code)

covid_zip <- st_as_sf(covid_zip)

if (st_crs(nysFoodStoreSF) != st_crs(covid_zip)) {
    covid_zip <- st_transform(covid_zip, st_crs(nysFoodStoreSF))
}

food_store_map <- nysFoodStoreSF %>%
  filter(str_detect(Establishment.Type, '[AJD]')) %>%
  st_join(covid_zip, join = st_contains) %>%
  group_by('ZIPCODE') %>%
  summarise(FoodStoreNum = n())

plot(food_store_map["FoodStoreNum"], breaks = "jenks", main = "Number of Food Stores")

st_crs(nysFoodStoreSF)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         MEMBER["World Geodetic System 1984 (G2139)"],
##         MEMBER["World Geodetic System 1984 (G2296)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
nycRetailFood<-sf::st_transform(nysFoodStoreSF,st_crs(covid_zip))

food_aggregatepoints<-nysFoodStoreSF%>%
  group_by(ZIPCODE) %>%
  summarise(store_count = n())

food_aggregatepoints %>% head()
## Simple feature collection with 6 features and 2 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -74.0167 ymin: 40.70184 xmax: -73.95836 ymax: 40.77978
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 3
##   ZIPCODE store_count                                                   geometry
##     <int>       <int>                                           <MULTIPOINT [°]>
## 1   10001          57 ((-74.00089 40.7369), (-74.001 40.73673), (-73.99079 40.7…
## 2   10002         192 ((-73.95836 40.77978), (-73.96342 40.77183), (-73.98288 4…
## 3   10003          87 ((-73.98276 40.73535), (-73.98062 40.734), (-73.98038 40.…
## 4   10004          10 ((-74.01357 40.70553), (-74.0158 40.70494), (-74.01267 40…
## 5   10005          10 ((-73.98981 40.73894), (-74.00718 40.70498), (-74.00774 4…
## 6   10006           4 ((-74.01311 40.70753), (-74.01316 40.70783), (-74.013 40.…
sum(food_aggregatepoints$ZIPCODE, na.rm = T)
## [1] 2013357
food_aggregatepoints %>% dplyr::filter(is.na(ZIPCODE))
## Simple feature collection with 0 features and 2 fields
## Bounding box:  xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS:  WGS 84
## # A tibble: 0 × 3
## # ℹ 3 variables: ZIPCODE <int>, store_count <int>, geometry <GEOMETRY [°]>
plot(food_aggregatepoints["ZIPCODE"], breaks='jenks')

mapview(food_aggregatepoints,cex=2)

task 3:

Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

load("/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-spatial-data/nys_facilities_df")

nycNursingHome<-nys_facilities_df %>% 
  dplyr::filter(`Short Description` == "NH")

nycNursingHome<-st_transform(nycNursingHome,st_crs(covid_zip))
st_crs(nycNursingHome)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         MEMBER["World Geodetic System 1984 (G2139)"],
##         MEMBER["World Geodetic System 1984 (G2296)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
NH_aggregatepoints<-nycNursingHome%>%
        group_by("ZIPCODE","Facility Name","Facility ID") %>%
        summarise(ZIPCODE = n())
## `summarise()` has grouped output by '"ZIPCODE"', '"Facility Name"'. You can
## override using the `.groups` argument.
NH_aggregatepoints %>% head()
## Simple feature collection with 1 feature and 4 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -74.13534 ymin: 40.56918 xmax: -73.70622 ymax: 40.90923
## Geodetic CRS:  WGS 84
## # A tibble: 1 × 5
## # Groups:   "ZIPCODE", "Facility Name" [1]
##   `"ZIPCODE"` `"Facility Name"` `"Facility ID"` ZIPCODE
##   <chr>       <chr>             <chr>             <int>
## 1 ZIPCODE     Facility Name     Facility ID         169
## # ℹ 1 more variable: geometry <MULTIPOINT [°]>
sum(NH_aggregatepoints$ZIPCODE, na.rm = T)
## [1] 169
NH_aggregatepoints %>% dplyr::filter(is.na(ZIPCODE))
## Simple feature collection with 0 features and 4 fields
## Bounding box:  xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS:  WGS 84
## # A tibble: 0 × 5
## # Groups:   "ZIPCODE", "Facility Name" [0]
## # ℹ 5 variables: "ZIPCODE" <chr>, "Facility Name" <chr>, "Facility ID" <chr>,
## #   ZIPCODE <int>, geometry <GEOMETRY [°]>
plot(st_geometry(covid_zip),border= "gray",main ="Nursing Homes")
plot(st_geometry(NH_aggregatepoints), col= "purple",pch =16, add = TRUE)

mapview(NH_aggregatepoints,cex=2)

task 4:

Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

nycCensus<-sf::st_read('/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-Spatial_II_Lab/2010_Census_Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp',stringsAsFactors=FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-Spatial_II_Lab/2010_Census_Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
str(nycCensus)
## Classes 'sf' and 'data.frame':   2165 obs. of  12 variables:
##  $ boro_code : chr  "5" "1" "1" "1" ...
##  $ boro_ct201: chr  "5000900" "1009800" "1010000" "1010200" ...
##  $ boro_name : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ cdeligibil: chr  "E" "I" "I" "I" ...
##  $ ct2010    : chr  "000900" "009800" "010000" "010200" ...
##  $ ctlabel   : chr  "9" "98" "100" "102" ...
##  $ ntacode   : chr  "SI22" "MN19" "MN19" "MN17" ...
##  $ ntaname   : chr  "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
##  $ puma      : chr  "3903" "3808" "3808" "3807" ...
##  $ shape_area: num  2497010 1906016 1860938 1860993 1864600 ...
##  $ shape_leng: num  7729 5534 5692 5688 5693 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

acsData<-readLines("/Users/kaitlynmaritan/Desktop/R-spatial/Data/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

acsData %>%
  magrittr::extract(1:10,)
##                  GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1  1400000US36005000100   7080         51    6503       577     1773     4239
## 2  1400000US36005000200   4542        950    2264      2278     2165     1279
## 3  1400000US36005000400   5634        710    2807      2827     2623     1699
## 4  1400000US36005001600   5917        989    2365      3552     2406     2434
## 5  1400000US36005001900   2765         76    1363      1402      585     1041
## 6  1400000US36005002000   9409        977    4119      5290     3185     4487
## 7  1400000US36005002300   4600        648    2175      2425      479     2122
## 8  1400000US36005002400    172          0     121        51       69       89
## 9  1400000US36005002500   5887        548    2958      2929      903     1344
## 10 1400000US36005002701   2868        243    1259      1609      243      987
##    asianPop hispanicPop adultPop citizenAdult censusCode
## 1       130        2329     6909         6100  005000100
## 2       119        3367     3582         2952  005000200
## 3       226        3873     4507         4214  005000400
## 4        68        3603     4416         3851  005001600
## 5       130        1413     2008         1787  005001900
## 6        29        5905     6851         6170  005002000
## 7        27        2674     3498         3056  005002300
## 8        14           0      131           42  005002400
## 9        68        4562     4237         2722  005002500
## 10        0        1985     1848         1412  005002701
popData<-merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')
sum(popData$totPop)
## [1] 8443713

task 5:

Aggregate the ACS census data to zip code area data.

st_crs(popData)
## Coordinate Reference System:
##   User input: WGS84(DD) 
##   wkt:
## GEOGCRS["WGS84(DD)",
##     DATUM["WGS84",
##         ELLIPSOID["WGS84",6378137,298.257223563,
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["longitude",east,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["latitude",north,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]]]
popNYC <- sf::st_transform(popData, st_crs(covid_zip))

covidPopZipNYC<-sf::st_join(covid_zip, 
                              popNYC %>% sf::st_centroid(), 
                              join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total) %>% 
  summarise(totPop = sum(totPop),
            malePctg = sum(malePop)/totPop*100, 
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop)) 
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION',
## 'COUNTY', 'Positive'. You can override using the `.groups` argument.
covidPopZipNYC %>% head()
## Simple feature collection with 6 features and 12 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -74.0473 ymin: 40.68392 xmax: -73.97141 ymax: 40.75769
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 13
## # Groups:   ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive [6]
##   ZIPCODE PO_NAME  POPULATION COUNTY   Positive Total totPop malePctg asianPop
##     <dbl> <chr>         <dbl> <chr>       <dbl> <dbl>  <int>    <dbl>    <int>
## 1   10001 New York      22413 New York      211   448  19146     51.2     4837
## 2   10002 New York      81305 New York      539  1024  74310     48.4    32149
## 3   10003 New York      55878 New York      279   662  53487     50.3     8027
## 4   10004 New York       2187 New York       23    59     NA     NA         NA
## 5   10005 New York       8107 New York       38   116   8809     42.8     1974
## 6   10006 New York       3011 New York       10    43   4639     44.0     1195
## # ℹ 4 more variables: blackPop <int>, hispanicPop <int>, whitePop <int>,
## #   geometry <GEOMETRY [°]>
sum(covidPopZipNYC$totPop, na.rm = T)
## [1] 8334092
covidPopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 7 features and 12 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -74.0473 ymin: 40.58123 xmax: -73.76521 ymax: 40.88939
## Geodetic CRS:  WGS 84
## # A tibble: 7 × 13
## # Groups:   ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive [7]
##   ZIPCODE PO_NAME      POPULATION COUNTY Positive Total totPop malePctg asianPop
## *   <dbl> <chr>             <dbl> <chr>     <dbl> <dbl>  <int>    <dbl>    <int>
## 1   10004 New York           2187 New Y…       23    59     NA       NA       NA
## 2   10075 New York          25203 New Y…      262   564     NA       NA       NA
## 3   10464 Bronx              4438 Bronx        75   194     NA       NA       NA
## 4   11109 Long Island…       2752 Queens       37   103     NA       NA       NA
## 5   11231 Brooklyn          33144 Kings       241   504     NA       NA       NA
## 6   11693 Far Rockaway      11052 Kings       197   334     NA       NA       NA
## 7   11693 Far Rockaway      11052 Queens      197   334     NA       NA       NA
## # ℹ 4 more variables: blackPop <int>, hispanicPop <int>, whitePop <int>,
## #   geometry <GEOMETRY [°]>
plot(covidPopZipNYC["Total"], breaks='jenks')