R Spatial Lab Assignment # 8

task 1:

nyc_zip <- st_read("/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source 
##   `/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/ZIP_CODE_040114/ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
covid_data <- readr::read_csv("/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv", lazy = FALSE)
## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nyc_merged <- base::merge(nyc_zip, covid_data, by.x = "ZIPCODE", by.y = "MODIFIED_ZCTA")
names(nyc_merged)
##  [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
##  [4] "POPULATION"        "AREA"              "STATE"            
##  [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
## [10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
## [13] "NEIGHBORHOOD_NAME" "BOROUGH_GROUP"     "label"            
## [16] "lat"               "lon"               "COVID_CASE_COUNT" 
## [19] "COVID_CASE_RATE"   "POP_DENOMINATOR"   "COVID_DEATH_COUNT"
## [22] "COVID_DEATH_RATE"  "PERCENT_POSITIVE"  "TOTAL_COVID_TESTS"
## [25] "geometry"
mapview(nyc_merged, zcol="COVID_DEATH_COUNT")

task 2:

nyc_food <- st_read("/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/nycFoodStore.shp")
## Reading layer `nycFoodStore' from data source 
##   `/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/nycFoodStore.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84
nyc_food <- st_transform(nyc_food, st_crs(nyc_zip))

food_counts <- nyc_food %>% 
  dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>%
  sf::st_join(nyc_zip, ., join = st_contains) %>%
  group_by(Zip_Cod) %>%
  summarise(FoodStoreNum = n()) %>%
  mutate(Zip_Cod = as.character(Zip_Cod)) 

nyc_merged <- nyc_merged %>%
  left_join(st_drop_geometry(food_counts), by = c("ZIPCODE" = "Zip_Cod"))

names(nyc_merged)
##  [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
##  [4] "POPULATION"        "AREA"              "STATE"            
##  [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
## [10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
## [13] "NEIGHBORHOOD_NAME" "BOROUGH_GROUP"     "label"            
## [16] "lat"               "lon"               "COVID_CASE_COUNT" 
## [19] "COVID_CASE_RATE"   "POP_DENOMINATOR"   "COVID_DEATH_COUNT"
## [22] "COVID_DEATH_RATE"  "PERCENT_POSITIVE"  "TOTAL_COVID_TESTS"
## [25] "FoodStoreNum"      "geometry"
plot(nyc_merged["FoodStoreNum"], breaks = "jenks", main = "Number of Food Stores")

task 3:

nys_health <- read_csv("/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/NYS_Health_Facility.csv", 
                       show_col_types = FALSE, lazy = FALSE)

# Drop the rows without coordinates
missing_coords <- is.na(nys_health$`Facility Longitude`) | is.na(nys_health$`Facility Latitude`)
sum(missing_coords)
## [1] 142
nys_health <- nys_health[!missing_coords, ]

# Drop the rows where coordinates are 0 
nys_health <- nys_health %>%
  filter(!(nys_health$`Facility Longitude` == 0 & nys_health$`Facility Latitude` == 0))

# Swap Longtitude and Latitude for the row with false coordinates 
nys_health <- nys_health %>%
  mutate(
    temp_lon = `Facility Longitude`,
    `Facility Longitude` = ifelse(`Facility ID` == 5764, `Facility Latitude`, `Facility Longitude`),
    `Facility Latitude` = ifelse(`Facility ID` == 5764, temp_lon, `Facility Latitude`)
  ) %>%
  select(-temp_lon)

nys_health_sf <- st_as_sf(nys_health, coords = c("Facility Longitude", "Facility Latitude"))

st_crs(nys_health_sf) <- 4326

healthfacilities_count <- nyc_merged %>% 
    mutate(ziparea = st_area(geometry)) %>% 
    st_transform(4326) %>%
    st_join(nys_health_sf) %>%
    group_by(`Facility Zip Code`) %>% 
    summarize(healthfacilities_NUM = n(),
              ziparea = max(ziparea),
              rate = healthfacilities_NUM/ziparea * 1e6) 

mapview(healthfacilities_count, zcol='healthfacilities_NUM', legend=FALSE)
nyc_merged <- nyc_merged %>%
  left_join(
    healthfacilities_count %>% 
      st_drop_geometry(), 
    by = c("ZIPCODE" = "Facility Zip Code")
  )

task 4:

nycCensus <- sf::st_read('/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
str(nycCensus)
## Classes 'sf' and 'data.frame':   2165 obs. of  12 variables:
##  $ boro_code : chr  "5" "1" "1" "1" ...
##  $ boro_ct201: chr  "5000900" "1009800" "1010000" "1010200" ...
##  $ boro_name : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ cdeligibil: chr  "E" "I" "I" "I" ...
##  $ ct2010    : chr  "000900" "009800" "010000" "010200" ...
##  $ ctlabel   : chr  "9" "98" "100" "102" ...
##  $ ntacode   : chr  "SI22" "MN19" "MN19" "MN17" ...
##  $ ntaname   : chr  "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
##  $ puma      : chr  "3903" "3808" "3808" "3807" ...
##  $ shape_area: num  2497010 1906016 1860938 1860993 1864600 ...
##  $ shape_leng: num  7729 5534 5692 5688 5693 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

acsData <- readLines("/Users/artyom/Documents/study/hunter/25S-GTECH385_DataAnalysisViz/R-spatial/data/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))

popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')

# verify the data
sum(popData$totPop)
## [1] 8443713
st_crs(popData)
## Coordinate Reference System:
##   User input: WGS84(DD) 
##   wkt:
## GEOGCRS["WGS84(DD)",
##     DATUM["WGS84",
##         ELLIPSOID["WGS84",6378137,298.257223563,
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["longitude",east,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["latitude",north,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]]]
popNYC <- sf::st_transform(popData, st_crs(nyc_merged)) 

popData %>% magrittr::extract('elderlyPop') %>% plot(breaks = 'jenks')

names(popNYC)
##  [1] "tractFIPS"    "boro_code"    "boro_ct201"   "boro_name"    "cdeligibil"  
##  [6] "ct2010"       "ctlabel"      "ntacode"      "ntaname"      "puma"        
## [11] "shape_area"   "shape_leng"   "cntyFIPS"     "GEO_ID"       "totPop"      
## [16] "elderlyPop"   "malePop"      "femalePop"    "whitePop"     "blackPop"    
## [21] "asianPop"     "hispanicPop"  "adultPop"     "citizenAdult" "geometry"

task 5:

covidPopZipNYC <- sf::st_join(nyc_merged, 
                              popNYC %>% sf::st_centroid(),
                              join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY,NEIGHBORHOOD_NAME, COVID_CASE_COUNT, TOTAL_COVID_TESTS, COVID_DEATH_COUNT, 
           FoodStoreNum, healthfacilities_NUM
           ) %>% 
  summarise(totPop = sum(totPop),
            adultPop = sum(adultPop),
            malePctg = sum(malePop)/totPop*100,
            femalePop = sum(femalePop)/totPop*100,
            elderlyPop = sum(elderlyPop),
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop)) 
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION',
## 'COUNTY', 'NEIGHBORHOOD_NAME', 'COVID_CASE_COUNT', 'TOTAL_COVID_TESTS',
## 'COVID_DEATH_COUNT', 'FoodStoreNum'. You can override using the `.groups`
## argument.
covidPopZipNYC %>% head()
## Simple feature collection with 6 features and 19 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 188447.3 xmax: 992172.8 ymax: 215324.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 × 20
## # Groups:   ZIPCODE, PO_NAME, POPULATION, COUNTY, NEIGHBORHOOD_NAME,
## #   COVID_CASE_COUNT, TOTAL_COVID_TESTS, COVID_DEATH_COUNT, FoodStoreNum [6]
##   ZIPCODE PO_NAME  POPULATION COUNTY   NEIGHBORHOOD_NAME        COVID_CASE_COUNT
##   <chr>   <chr>         <dbl> <chr>    <chr>                               <dbl>
## 1 10001   New York      22413 New York Chelsea/NoMad/West Chel…             1542
## 2 10002   New York      81305 New York Chinatown/Lower East Si…             5902
## 3 10003   New York      55878 New York East Village/Gramercy/G…             2803
## 4 10004   New York       2187 New York Financial District                    247
## 5 10005   New York       8107 New York Financial District                    413
## 6 10006   New York       3011 New York Financial District                    164
## # ℹ 14 more variables: TOTAL_COVID_TESTS <dbl>, COVID_DEATH_COUNT <dbl>,
## #   FoodStoreNum <int>, healthfacilities_NUM <int>, totPop <int>,
## #   adultPop <int>, malePctg <dbl>, femalePop <dbl>, elderlyPop <int>,
## #   asianPop <int>, blackPop <int>, hispanicPop <int>, whitePop <int>,
## #   geometry <GEOMETRY [US_survey_foot]>
sum(covidPopZipNYC$totPop, na.rm = T)
## [1] 8334092
covidPopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 7 features and 19 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 151085.5 xmax: 1049202 ymax: 263362
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 7 × 20
## # Groups:   ZIPCODE, PO_NAME, POPULATION, COUNTY, NEIGHBORHOOD_NAME,
## #   COVID_CASE_COUNT, TOTAL_COVID_TESTS, COVID_DEATH_COUNT, FoodStoreNum [7]
##   ZIPCODE PO_NAME          POPULATION COUNTY  NEIGHBORHOOD_NAME COVID_CASE_COUNT
## * <chr>   <chr>                 <dbl> <chr>   <chr>                        <dbl>
## 1 10004   New York               2187 New Yo… Financial Distri…              247
## 2 10075   New York              25203 New Yo… Lenox Hill/Upper…             1453
## 3 10464   Bronx                  4438 Bronx   City Island                    387
## 4 11109   Long Island City       2752 Queens  Long Island City               354
## 5 11231   Brooklyn              33144 Kings   Carroll Gardens/…             1842
## 6 11693   Far Rockaway          11052 Kings   Arverne/Broad Ch…             1111
## 7 11693   Far Rockaway          11052 Queens  Arverne/Broad Ch…             1111
## # ℹ 14 more variables: TOTAL_COVID_TESTS <dbl>, COVID_DEATH_COUNT <dbl>,
## #   FoodStoreNum <int>, healthfacilities_NUM <int>, totPop <int>,
## #   adultPop <int>, malePctg <dbl>, femalePop <dbl>, elderlyPop <int>,
## #   asianPop <int>, blackPop <int>, hispanicPop <int>, whitePop <int>,
## #   geometry <GEOMETRY [US_survey_foot]>
plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks')

plot(covidPopZipNYC["blackPop"], breaks='jenks')

names(covidPopZipNYC)
##  [1] "ZIPCODE"              "PO_NAME"              "POPULATION"          
##  [4] "COUNTY"               "NEIGHBORHOOD_NAME"    "COVID_CASE_COUNT"    
##  [7] "TOTAL_COVID_TESTS"    "COVID_DEATH_COUNT"    "FoodStoreNum"        
## [10] "healthfacilities_NUM" "totPop"               "adultPop"            
## [13] "malePctg"             "femalePop"            "elderlyPop"          
## [16] "asianPop"             "blackPop"             "hispanicPop"         
## [19] "whitePop"             "geometry"