Week 12 Homework

Task 1 & 2

library(sf)

## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1

library(ggplot2)
library(ggmap)

## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.

## Please cite ggmap if you use it! See citation("ggmap") for details.

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ tibble  3.1.1     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ✓ purrr   0.3.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(tidyr)
library(dplyr)

st_read("./ZIP_CODE_040114/ZIP_CODE_040114.shp") -> nycZIP

## Reading layer `ZIP_CODE_040114' from data source `/Users/radleyciego/Documents/Hunter College/Spring 2021/GTECH 78520 -  Data Analysis & Visualization/Homework/Week 12/R-Spatial_II_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp' using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)

read.csv("./tests-by-zcta_2021_04_23.csv") -> nycCOVID

# Join the COVID-19 data to the NYC zipcode area
nycZIP %>% dplyr::mutate(ZIP = as.numeric(ZIPCODE)) %>%
  dplyr::left_join(nycCOVID, by = c('ZIP' = 'MODIFIED_ZCTA')) -> nycZIPcovid

sf::st_read("./nycFoodStore.shp") -> nycfoodstoreSF

## Reading layer `nycFoodStore' from data source `/Users/radleyciego/Documents/Hunter College/Spring 2021/GTECH 78520 -  Data Analysis & Visualization/Homework/Week 12/R-Spatial_II_Lab/nycFoodStore.shp' using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84

st_crs(nycfoodstoreSF)

## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]

st_crs(nycZIP)

## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US Survey feet)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["USA - New York - SPCS - Long Island"],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]

sf::st_transform(nycfoodstoreSF, sf::st_crs(2263)) -> nycfoodstoreSFLocal

nycfoodstoreSF %>% sf::st_transform(sf::st_crs(nycZIP))

## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 915179.3 ymin: 124371.4 xmax: 1075512 ymax: 270861.1
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
##    ï__Cnty Lcns_Nm Oprtn_T Estbl_T                           Entty_N
## 1    Bronx  734149   Store     JAC       7 ELEVEN FOOD STORE #37933H
## 2    Bronx  606221   Store     JAC   1001 SAN MIGUEL FOOD CENTER INC
## 3    Bronx  606228   Store     JAC               1029 FOOD PLAZA INC
## 4    Bronx  723375   Store     JAC            1078 DELI GROCERY CORP
## 5    Bronx  724807   Store     JAC       1086 LUNA DELI GROCERY CORP
## 6    Bronx  712943   Store     JAC          109 AJ DELI GROCERY CORP
## 7    Bronx  703060   Store     JAC 10 NEIGHBORHOOD CANDY GROCERY COR
## 8    Bronx  609065   Store     JAC     1105 TINTON DELI GROCERY CORP
## 9    Bronx  722972   Store       A         1150 WEBSTER PHARMACY INC
## 10   Bronx  609621   Store     JAC           1158 GROCERY & DELI INC
##                    DBA_Nam Strt_Nmb          Stret_Nm Add_L_2 Add_L_3  City
## 1                     <NA>      500    BAYCHESTER AVE      NA      NA BRONX
## 2  1001 SAN MIGUEL FD CNTR     1001      SHERIDAN AVE      NA      NA BRONX
## 3          1029 FOOD PLAZA      122        E 181ST ST      NA      NA BRONX
## 4        1078 DELI GROCERY     1078 EAST 165TH STREET      NA      NA BRONX
## 5   1086 LUNA DELI GROCERY     1086       BOSTON ROAD      NA      NA BRONX
## 6      109 AJ DELI GROCERY      109     E TREMONT AVE      NA      NA BRONX
## 7  10 NEIGHBORHOOD CANDY G       10   W. GUN HILL RD.      NA      NA BRONX
## 8    1105 TINTON DELI GRCY     1105        TINTON AVE      NA      NA BRONX
## 9  1150 WEBSTER PHARMACY I     1150    WEBSTER AVENUE      NA      NA BRONX
## 10     1158 GROCERY & DELI     1158   ST LAWRENCE AVE      NA      NA BRONX
##    State Zip_Cod Sqr_Ftg
## 1     NY   10475       0
## 2     NY   10456   1,100
## 3     NY   10453   2,000
## 4     NY   10459   1,200
## 5     NY   10456   1,500
## 6     NY   10453   2,400
## 7     NY   10467   1,000
## 8     NY   10456   1,200
## 9     NY   10456   3,400
## 10    NY   10472     500
##                                                             Locatin
## 1      500 BAYCHESTER AVE\nBRONX, NY 10475\n(40.869156, -73.831875)
## 2       1001 SHERIDAN AVE\nBRONX, NY 10456\n(40.829061, -73.919613)
## 3          122 E 181ST ST\nBRONX, NY 10453\n(40.854755, -73.902853)
## 4  1078 EAST 165TH STREET\nBRONX, NY 10459\n(40.825105, -73.890589)
## 5        1086 BOSTON ROAD\nBRONX, NY 10456\n(40.827096, -73.905123)
## 6       109 E TREMONT AVE\nBRONX, NY 10453\n(40.850537, -73.907137)
## 7        10 W GUN HILL RD\nBRONX, NY 10467\n(40.882869, -73.881552)
## 8         1105 TINTON AVE\nBRONX, NY 10456\n(40.826607, -73.901498)
## 9      1150 WEBSTER AVENUE\nBRONX, NY 10456\n(40.830425, -73.91063)
## 10   1158 ST LAWRENCE AVE\nBRONX, NY 10472\n(40.829105, -73.866678)
##                   Coords                 geometry
## 1  40.869156, -73.831875 POINT (1030750 255979.3)
## 2  40.829061, -73.919613 POINT (1006497 241336.7)
## 3  40.854755, -73.902853 POINT (1011124 250702.7)
## 4  40.825105, -73.890589 POINT (1014531 239904.1)
## 5  40.827096, -73.905123 POINT (1010507 240624.8)
## 6  40.850537, -73.907137 POINT (1009941 249164.7)
## 7  40.882869, -73.881552   POINT (1017003 260953)
## 8  40.826607, -73.901498 POINT (1011511 240447.8)
## 9   40.830425, -73.91063 POINT (1008982 241836.1)
## 10 40.829105, -73.866678 POINT (1021146 241370.6)

nycfoodstoreSFLocal %>% dplyr::filter(stringr::str_detect(Estbl_T, '[AJD]')) %>%
  sf::st_join(nycZIP, ., join = st_contains) %>%
  group_by(ZIPCODE, Estbl_T) %>%
  summarise(FoodStoreNum = n()) %>%
  magrittr::extract('FoodStoreNum') %>%
  plot(breaks = "jenks", main = "Number of Retail Stores")

## `summarise()` has grouped output by 'ZIPCODE'. You can override using the `.groups` argument.

Task 3

read.csv("./NYS_Health_Facility.csv") -> nysHealth

# Filter for NYC counties & facilities, mutate
c("Bronx", "New York", "Richmond", "Queens", "Kings") -> Counties
c("NH", "HOSP", "HCPC") -> Facilities
nysHealth %>%
  filter(Facility.County %in% Counties) %>%
  filter(Short.Description %in% Facilities) %>%
  mutate(Latitude =as.numeric(Facility.Latitude),
       Longitude = as.numeric(Facility.Longitude),
       ZIPCODE = as.numeric(Facility.Zip.Code)) -> nycHealth

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

# Remove missing values
nycHealth[!is.na(nycHealth$Facility.Longitude),] -> nycHealth

# Remove dirty longitude data
nycHealth[nycHealth$Facility.Longitude != 0.0000,] -> nycHealth

# Join nycHealth to nycZIP
nycZIP %>% 
  dplyr::left_join(nycHealth, by = c('ZIPCODE' = 'Facility.Zip.Code')) -> nycHealth

st_as_sf(nycHealth, coords = c("Facility.Longitude", "Facility.Latitude")) -> nycHealthSF
st_crs(nycHealthSF) <- 2263

nycHealthSF %>%
  group_by(ZIPCODE, Short.Description) %>%
  summarise(HealthFacNum = n()) %>%
  magrittr::extract('HealthFacNum') %>%
  plot(breaks = "jenks", main = "Number of Health Facilities")

## `summarise()` has grouped output by 'ZIPCODE'. You can override using the `.groups` argument.

Task 4 & 5

require(sf);
require(magrittr);

## Loading required package: magrittr

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

## The following object is masked from 'package:ggmap':
## 
##     inset

require(tidyverse);

sf::st_read('./geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE) -> nycCensus

## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/radleyciego/Documents/Hunter College/Spring 2021/GTECH 78520 -  Data Analysis & Visualization/Homework/Week 12/R-Spatial_II_Lab/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)

str(nycCensus)

## Classes 'sf' and 'data.frame':   2165 obs. of  12 variables:
##  $ boro_code : chr  "5" "1" "1" "1" ...
##  $ boro_ct201: chr  "5000900" "1009800" "1010000" "1010200" ...
##  $ boro_name : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ cdeligibil: chr  "E" "I" "I" "I" ...
##  $ ct2010    : chr  "000900" "009800" "010000" "010200" ...
##  $ ctlabel   : chr  "9" "98" "100" "102" ...
##  $ ntacode   : chr  "SI22" "MN19" "MN19" "MN17" ...
##  $ ntaname   : chr  "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
##  $ puma      : chr  "3903" "3808" "3808" "3807" ...
##  $ shape_area: num  2497010 1906016 1860938 1860993 1864600 ...
##  $ shape_leng: num  7729 5534 5692 5688 5693 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...

nycCensus %<>% dplyr::mutate(cntyFIPS = dplyr::case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

acsData <- readLines("./ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

acsData %>%
  magrittr::extract(1:10,)

##                  GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1  1400000US36005000100   7080         51    6503       577     1773     4239
## 2  1400000US36005000200   4542        950    2264      2278     2165     1279
## 3  1400000US36005000400   5634        710    2807      2827     2623     1699
## 4  1400000US36005001600   5917        989    2365      3552     2406     2434
## 5  1400000US36005001900   2765         76    1363      1402      585     1041
## 6  1400000US36005002000   9409        977    4119      5290     3185     4487
## 7  1400000US36005002300   4600        648    2175      2425      479     2122
## 8  1400000US36005002400    172          0     121        51       69       89
## 9  1400000US36005002500   5887        548    2958      2929      903     1344
## 10 1400000US36005002701   2868        243    1259      1609      243      987
##    asianPop hispanicPop adultPop citizenAdult censusCode
## 1       130        2329     6909         6100  005000100
## 2       119        3367     3582         2952  005000200
## 3       226        3873     4507         4214  005000400
## 4        68        3603     4416         3851  005001600
## 5       130        1413     2008         1787  005001900
## 6        29        5905     6851         6170  005002000
## 7        27        2674     3498         3056  005002300
## 8        14           0      131           42  005002400
## 9        68        4562     4237         2722  005002500
## 10        0        1985     1848         1412  005002701

# Merge (JOIN) ACS data to the census tracts
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')

# verify the data
sum(popData$totPop)

## [1] 8443713

st_crs(popData)

## Coordinate Reference System:
##   User input: WGS84(DD) 
##   wkt:
## GEOGCRS["WGS84(DD)",
##     DATUM["WGS84",
##         ELLIPSOID["WGS84",6378137,298.257223563,
##             LENGTHUNIT["metre",1,
##                 ID["EPSG",9001]]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic longitude",east,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic latitude",north,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]]]

popNYC <- sf::st_transform(popData, st_crs(nycZIP))

# this may take a long time to run
popData %>% 
  magrittr::extract('elderlyPop') %>% 
  plot(breaks = 'jenks')

# Now aggregate to the zip code level
covidPopZipNYC <- sf::st_join(nycZIPcovid, 
                              popNYC %>% sf::st_centroid(), # this essentially converts census tracts to points
                              join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>% # use names(zpNYC) and names(popNYC) to see what we have
  summarise(totPop = sum(totPop),
            malePctg = sum(malePop)/totPop*100, # note the totPop is the newly calculated
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop))

## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x

## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION', 'COUNTY', 'COVID_CASE_COUNT'. You can override using the `.groups` argument.

covidPopZipNYC %>% head()

## Simple feature collection with 6 features and 12 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 188447.3 xmax: 998309.7 ymax: 230942.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 x 13
## # Groups:   ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [6]
##   ZIPCODE PO_NAME    POPULATION COUNTY  COVID_CASE_COUNT TOTAL_COVID_TES… totPop
##   <chr>   <chr>           <dbl> <chr>              <int>            <int>  <int>
## 1 00083   Central P…         25 New Yo…               NA               NA      3
## 2 10001   New York        22413 New Yo…             1542            20158  19146
## 3 10002   New York        81305 New Yo…             5902            48197  74310
## 4 10003   New York        55878 New Yo…             2803            41076  53487
## 5 10004   New York         2187 New Yo…              247             3599     NA
## 6 10005   New York         8107 New Yo…              413             6102   8809
## # … with 6 more variables: malePctg <dbl>, asianPop <int>, blackPop <int>,
## #   hispanicPop <int>, whitePop <int>, geometry <GEOMETRY [US_survey_foot]>

# Check and verify the data again
sum(covidPopZipNYC$totPop, na.rm = T)

## [1] 8395306

# See where we have those NAs 
covidPopZipNYC %>% dplyr::filter(is.na(totPop))

## Simple feature collection with 68 features and 12 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 151085.5 xmax: 1067494 ymax: 263362
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 68 x 13
## # Groups:   ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [68]
##    ZIPCODE PO_NAME  POPULATION COUNTY   COVID_CASE_COUNT TOTAL_COVID_TES… totPop
##  * <chr>   <chr>         <dbl> <chr>               <int>            <int>  <int>
##  1 10004   New York       2187 New York              247             3599     NA
##  2 10020   New York          0 New York               NA               NA     NA
##  3 10041   New York          0 New York               NA               NA     NA
##  4 10043   New York          0 New York               NA               NA     NA
##  5 10045   New York          0 New York               NA               NA     NA
##  6 10047   New York          0 New York               NA               NA     NA
##  7 10048   New York          0 New York               NA               NA     NA
##  8 10055   New York         12 New York               NA               NA     NA
##  9 10075   New York      25203 New York             1453            18391     NA
## 10 10080   New York          0 New York               NA               NA     NA
## # … with 58 more rows, and 6 more variables: malePctg <dbl>, asianPop <int>,
## #   blackPop <int>, hispanicPop <int>, whitePop <int>,
## #   geometry <GEOMETRY [US_survey_foot]>

plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks')

plot(covidPopZipNYC["asianPop"], breaks='jenks')

Week 12 Homework

Radley Ciego

5/2/2021

Task 1 & 2

Task 3

Task 4 & 5