R Markdown Lecture 8

Part 1

Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

# Load packages
library(sf)
## Warning: package 'sf' was built under R version 4.3.3
## Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(mapview)
## Warning: package 'mapview' was built under R version 4.3.3
library(readr)
## Warning: package 'readr' was built under R version 4.3.1
library(ggmap)
## Warning: package 'ggmap' was built under R version 4.3.3
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
##   Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
##   OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
## Warning: package 'stringr' was built under R version 4.3.3
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:ggmap':
## 
##     inset
# Read in data
covid19 <- read.csv("tests-by-zcta_2021_04_23.csv")

nyc_zips_sf <- st_read('ZIP_CODE_040114/ZIP_CODE_040114.shp')
## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\naial\OneDrive\Documents\School\Geog393\Section 3\Session_8\R-Spatial_II_Lab\R-Spatial_II_Lab\ZIP_CODE_040114\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
str(nyc_zips_sf)
## Classes 'sf' and 'data.frame':   263 obs. of  13 variables:
##  $ ZIPCODE   : chr  "11436" "11213" "11212" "11225" ...
##  $ BLDGZIP   : chr  "0" "0" "0" "0" ...
##  $ PO_NAME   : chr  "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
##  $ POPULATION: num  18681 62426 83866 56527 72280 ...
##  $ AREA      : num  22699295 29631004 41972104 23698630 36868799 ...
##  $ STATE     : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY    : chr  "Queens" "Kings" "Kings" "Kings" ...
##  $ ST_FIPS   : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS  : chr  "081" "047" "047" "047" ...
##  $ URL       : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ geometry  :sfc_POLYGON of length 263; first list element: List of 1
##   ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
# Join data

nyc_sf_merged <- nyc_zips_sf %>% 
  mutate(ZIP = as.numeric(ZIPCODE)) %>%
  left_join(covid19, by = c('ZIP' = 'MODIFIED_ZCTA'))

names(nyc_sf_merged)
##  [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
##  [4] "POPULATION"        "AREA"              "STATE"            
##  [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
## [10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
## [13] "ZIP"               "NEIGHBORHOOD_NAME" "BOROUGH_GROUP"    
## [16] "label"             "lat"               "lon"              
## [19] "COVID_CASE_COUNT"  "COVID_CASE_RATE"   "POP_DENOMINATOR"  
## [22] "COVID_DEATH_COUNT" "COVID_DEATH_RATE"  "PERCENT_POSITIVE" 
## [25] "TOTAL_COVID_TESTS" "geometry"
mapview(nyc_sf_merged, zcol = "POPULATION", layer.name = "Covid Pop")

Part 2

Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

# Read in food retail store data and convert to points
retail_food <- readr::read_csv("nys_retail_food_store_xy.csv", lazy = FALSE) 
## Rows: 29389 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): County, Operation.Type, Establishment.Type, Entity.Name, DBA.Name,...
## dbl  (4): License.Number, Zip.Code, Y, X
## num  (1): Square.Footage
## lgl  (2): Address.Line.2, Address.Line.3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(retail_food)
## spc_tbl_ [29,389 × 18] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ County            : chr [1:29389] "Albany" "Albany" "Albany" "Albany" ...
##  $ License.Number    : num [1:29389] 733149 704590 727909 720557 15890 ...
##  $ Operation.Type    : chr [1:29389] "Store" "Store" "Store" "Store" ...
##  $ Establishment.Type: chr [1:29389] "A" "JAC" "JAC" "JAC" ...
##  $ Entity.Name       : chr [1:29389] "SPEEDWAY LLC" "1250 SELKIRK INC" "RED-KAP SALES INC" "SAEED SADIQ, SAIKA NOREEN" ...
##  $ DBA.Name          : chr [1:29389] "12110" "1250 SELKIRK" "1667 GENERAL STORE" "19 STREET QUICK STOP" ...
##  $ Street.Number     : chr [1:29389] "719" "1250" "1667" "315" ...
##  $ Street.Name       : chr [1:29389] "NEW LOUDON RD" "RTE 9W & 396" "WESTERN AVENUE" "19TH STREET" ...
##  $ Address.Line.2    : logi [1:29389] NA NA NA NA NA NA ...
##  $ Address.Line.3    : logi [1:29389] NA NA NA NA NA NA ...
##  $ City              : chr [1:29389] "LATHAM" "SELKIRK" "ALBANY" "WATERVLIET" ...
##  $ State             : chr [1:29389] "NY" "NY" "NY" "NY" ...
##  $ Zip.Code          : num [1:29389] 12110 12158 12203 12189 12210 ...
##  $ Square.Footage    : num [1:29389] 300 3000 2000 1200 1800 0 0 200 0 2000 ...
##  $ Location          : chr [1:29389] "719 NEW LOUDON RD\r\nLATHAM, NY 12110\r\n(42.739618, -73.761949)" "1250 RTE 9 W\r\nSELKIRK, NY 12158\r\n(42.547591, -73.8073)" "1667 WESTERN AVENUE\r\nALBANY, NY 12203\r\n(42.686553, -73.854665)" "315 19TH STREET\r\nWATERVLIET, NY 12189\r\n(42.73063, -73.703443)" ...
##  $ Coords            : chr [1:29389] "42.739618, -73.761949" "42.547591, -73.8073" "42.686553, -73.854665" "42.73063, -73.703443" ...
##  $ Y                 : num [1:29389] 42.7 42.5 42.7 42.7 42.7 ...
##  $ X                 : num [1:29389] -73.8 -73.8 -73.9 -73.7 -73.8 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   County = col_character(),
##   ..   License.Number = col_double(),
##   ..   Operation.Type = col_character(),
##   ..   Establishment.Type = col_character(),
##   ..   Entity.Name = col_character(),
##   ..   DBA.Name = col_character(),
##   ..   Street.Number = col_character(),
##   ..   Street.Name = col_character(),
##   ..   Address.Line.2 = col_logical(),
##   ..   Address.Line.3 = col_logical(),
##   ..   City = col_character(),
##   ..   State = col_character(),
##   ..   Zip.Code = col_double(),
##   ..   Square.Footage = col_number(),
##   ..   Location = col_character(),
##   ..   Coords = col_character(),
##   ..   Y = col_double(),
##   ..   X = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
retail_food <- retail_food[!is.na(retail_food$`Y`) & !is.na(retail_food$`X`),]

retail_food <- retail_food %>% 
  filter(County %in% c("Bronx", "Kings", "New York", "Queens", "Richmond"))

retail_food_sf <- st_as_sf(retail_food, 
                               coords = c("X", "Y"), crs = 2831)

#tranform CRS on zipcode data

nyc_sf_2831 <- st_transform(nyc_sf_merged, 2831)

st_crs(retail_food_sf)
## Coordinate Reference System:
##   User input: EPSG:2831 
##   wkt:
## PROJCRS["NAD83(HARN) / New York Long Island",
##     BASEGEOGCRS["NAD83(HARN)",
##         DATUM["NAD83 (High Accuracy Reference Network)",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4152]],
##     CONVERSION["SPCS83 New York Long Island zone (meter)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",300000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2831]]
st_crs(nyc_sf_2831)
## Coordinate Reference System:
##   User input: EPSG:2831 
##   wkt:
## PROJCRS["NAD83(HARN) / New York Long Island",
##     BASEGEOGCRS["NAD83(HARN)",
##         DATUM["NAD83 (High Accuracy Reference Network)",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4152]],
##     CONVERSION["SPCS83 New York Long Island zone (meter)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",300000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2831]]
# Filter store type

retail_food_sf_A <- filter(retail_food_sf, Establishment.Type == 'A') 

#Aggregate datasets

store_num_sf <- nyc_sf_2831 %>% 
      mutate(tract_area = st_area(geometry)) %>%
      st_join(retail_food_sf_A) %>%
      group_by(ZIPCODE) %>% 
      summarize(n_stores = n(),
                tract_area = max(tract_area),
                store_num = n_stores/tract_area * 1e6) 

mapview(store_num_sf, zcol='store_num', legend=FALSE)

Part 3

Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

# Read in health facilities csv
HealthFac <- read.csv("NYS_Health_Facility.csv")

#Convert health fac zip code to character type
HealthFac$Facility.Zip.Code <- as.numeric(as.character(HealthFac$Facility.Zip.Code))
## Warning: NAs introduced by coercion
#Left join zip code data to health data
ZipHealth <- dplyr::left_join(
  nyc_sf_2831%>% dplyr::mutate(ZIPCODE = as.numeric(as.character(ZIPCODE))),
  HealthFac, by= c('ZIPCODE' = 'Facility.Zip.Code'))
## Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 2 of `x` matches multiple rows in `y`.
## ℹ Row 32 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
# Join and filter for Nursing Homes
ZipHealth_NH <- ZipHealth %>%
  filter(Short.Description == 'NH') %>%
  tidyr::drop_na(Facility.Latitude, Facility.Longitude) %>%
  group_by(ZIPCODE, Short.Description) %>%
  summarise(Health= n())
## `summarise()` has grouped output by 'ZIPCODE'. You can override using the
## `.groups` argument.
mapview(ZipHealth_NH, zcol = 'Health', layer.name = "Nursing Homes")

Part 4

Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

#Read in census data
Census1 <- sf::st_read("2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\naial\OneDrive\Documents\School\Geog393\Section 3\Session_8\R-Spatial_II_Lab\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
str(Census1)
## Classes 'sf' and 'data.frame':   2165 obs. of  12 variables:
##  $ boro_code : chr  "5" "1" "1" "1" ...
##  $ boro_ct201: chr  "5000900" "1009800" "1010000" "1010200" ...
##  $ boro_name : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ cdeligibil: chr  "E" "I" "I" "I" ...
##  $ ct2010    : chr  "000900" "009800" "010000" "010200" ...
##  $ ctlabel   : chr  "9" "98" "100" "102" ...
##  $ ntacode   : chr  "SI22" "MN19" "MN19" "MN17" ...
##  $ ntaname   : chr  "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
##  $ puma      : chr  "3903" "3808" "3808" "3807" ...
##  $ shape_area: num  2497010 1906016 1860938 1860993 1864600 ...
##  $ shape_leng: num  7729 5534 5692 5688 5693 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
# Concatenating borough code and ct2010 to join with acs "GEO_ID" column
Census <-  Census1 %>% dplyr::mutate(county = dplyr::case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(county,ct2010, sep = '')
)

# Read in acs data + clean data + reassign names based on metadata + assign numeric value
ACS <- read_csv("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv", show_col_types = FALSE, lazy = FALSE) 


ACS <- ACS[-1,]

ACSMod <- ACS %>%
  mutate(tractFIPS=str_sub(GEO_ID,-9,-1)) %>%
  select(tractFIPS, 
         total = DP05_0001E, 
         elderly = DP05_0024E,
         male = DP05_0002E, 
         female = DP05_0003E,
         white = DP05_0037E, 
         black = DP05_0038E,
         asian = DP05_0067E, 
         hispanic = DP05_0071E,
         adult21 = DP05_0021E, 
         adult18 = DP05_0087E) %>%
  mutate_at(c('total','elderly','male','female','white','black','asian','hispanic','adult21','adult18'),as.numeric)



# Merge datasets
popDemo <- merge(Census, ACSMod, by = 'tractFIPS')

Part 5

Aggregate the ACS census data to zip code area data.

#Transform popDemo data to 2831 crs
popNYC <- sf::st_transform(popDemo, st_crs(nyc_sf_2831))

# Remove na values
sum(popNYC$total, na.rm = T)
## [1] 8443713
# Aggregate data and assign new pctg values of total population per demographic
ACSZip <- sf::st_join(nyc_sf_2831, popNYC) %>% 
  group_by(ZIPCODE) %>%
  summarize(total = sum(total),
            hispanic = sum(hispanic),            
            malePctg = sum(male)/total*100, 
            asianPctg = sum(asian)/total*100,
            blackPctg = sum(black)/total*100,
            hispanicPctg = sum(hispanic)/total*100,
            elderlyPctg=sum(elderly)/total*100) 


mapview(ACSZip, zcol = c('total','hispanic','hispanicPctg'), layer.name = "Hispanic Data (number of)")
str(ACSZip)
## sf [248 × 9] (S3: sf/tbl_df/tbl/data.frame)
##  $ ZIPCODE     : chr [1:248] "00083" "10001" "10002" "10003" ...
##  $ total       : num [1:248] 141819 53108 145548 95677 15179 ...
##  $ hispanic    : num [1:248] 28826 6054 33166 8818 1161 ...
##  $ malePctg    : num [1:248] 45.3 50.9 47.8 49.9 44.5 ...
##  $ asianPctg   : num [1:248] 9.28 23.35 36.44 15.98 23.24 ...
##  $ blackPctg   : num [1:248] 15.25 5.44 9.12 5.26 2.03 ...
##  $ hispanicPctg: num [1:248] 20.33 11.4 22.79 9.22 7.65 ...
##  $ elderlyPctg : num [1:248] 21.99 12.89 19.78 12.76 2.15 ...
##  $ geometry    :sfc_GEOMETRY of length 248; first list element: List of 1
##   ..$ : num [1:216, 1:2] 304286 304277 304268 304245 304206 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:8] "ZIPCODE" "total" "hispanic" "malePctg" ...
getwd()
## [1] "C:/Users/naial/OneDrive/Documents/School/Geog393/Section 3/Session_8/R-Spatial_II_Lab/R-Spatial_II_Lab"
st_write(ACSZip, "NYC_Covid.shp", delete_layer = TRUE)
## Warning in abbreviate_shapefile_names(obj): Field names abbreviated for ESRI
## Shapefile driver
## Deleting layer `NYC_Covid' using driver `ESRI Shapefile'
## Writing layer `NYC_Covid' to data source `NYC_Covid.shp' using driver `ESRI Shapefile'
## Writing 248 features with 8 fields and geometry type Unknown (any).