R Week 08 Assignment

R Spatial Lab Assignment # 1

task 1:

nyc_zip <- st_read("./Week_07/Data/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")

## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\erins\OneDrive - SOHO SOLUTIONS INC\Documents\assignment_4\Week_07\Data\R-Spatial_I_Lab\ZIP_CODE_040114\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)

tests <- readr::read_csv("data/R-Spatial_II_Lab/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv", lazy = FALSE)

## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

covid_data <- tests %>% mutate(MODIFIED_ZCTA = as.character(MODIFIED_ZCTA))

#perform the actual join
nyc_zip_joined <- nyc_zip %>%
  left_join(covid_data, by = c("ZIPCODE" = "MODIFIED_ZCTA"))

task 2:

food_stores <- st_read('data/R-Spatial_II_Lab/R-Spatial_II_Lab/nycFoodStore.shp')

## Reading layer `nycFoodStore' from data source 
##   `C:\Users\erins\OneDrive - SOHO SOLUTIONS INC\Documents\assignment_4\data\R-Spatial_II_Lab\R-Spatial_II_Lab\nycFoodStore.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84

nyc_zip <- st_transform(nyc_zip, crs = st_crs(food_stores))

food_retail <- food_stores %>%
  filter(str_detect(Estbl_T, "A"))

# assign each of the food stores to a zip code
food_by_zip <- st_join(food_retail %>% st_transform(st_crs(nyc_zip_joined)), 
                       nyc_zip_joined, join = st_within)

# count the number of food stores per zip
food_count <- food_by_zip %>%
  group_by(ZIPCODE) %>%
  summarise(store_count = n(), .groups = "drop")

# join the store count data back to the zip code polygons
nyc_zip_joined <- nyc_zip_joined %>%
  left_join(st_drop_geometry(food_count), by = "ZIPCODE") 

# get rid of all the NA values so they just say 0
nyc_zip_joined$store_count[is.na(nyc_zip_joined$store_count)] <- 0

task 3:

health <- readr::read_csv("data/R-Spatial_II_Lab/R-Spatial_II_Lab/NYS_Health_Facility.csv", lazy = FALSE)

## Rows: 3990 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (28): Facility Name, Short Description, Description, Facility Open Date,...
## dbl  (8): Facility ID, Facility Phone Number, Facility Fax Number, Facility ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

health <- health %>%
  filter(!is.na(`Facility Longitude`), !is.na(`Facility Latitude`))  # Fix column reference

# Convert to sf object
facilities_sf <- st_as_sf(health, coords = c("Facility Longitude", "Facility Latitude"), crs = 4326)

# Ensure ZIP code shapefile has the same CRS
nyc_zip_joined <- st_transform(nyc_zip_joined, crs = st_crs(facilities_sf))  # Reproject ZIP codes

# Perform spatial join
facilities_with_zip <- st_join(facilities_sf, nyc_zip_joined, join = st_within)

# count number of facilities per ZIP code
facility_count <- facilities_with_zip %>%
  group_by(ZIPCODE) %>%
  summarise(count = n())

# join count data to ZIP code shapefile
zip_codes_count <- st_join(nyc_zip_joined, facility_count, join = st_within)

hospice_sf <- facilities_sf %>%
  filter(str_detect(`Short Description`, "HOSP-EC"))

hospice_with_zip <- st_join(hospice_sf, nyc_zip_joined, join = st_within)

# group them by zip codes 
hospice_count_by_zip <- hospice_with_zip %>%
  group_by(ZIPCODE) %>%
  summarise(hospice_count = n())

nyc_zip_joined <- st_join(nyc_zip_joined, hospice_count_by_zip, left = TRUE)

mapview(nyc_zip_joined, zcol = "hospice_count")

task 4:

# I kind of struggled with this one and am going to go to office hours this week to get more feedback on how to do this. 

censustracts <- sf::st_read('data/R-Spatial_II_Lab/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)

## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\erins\OneDrive - SOHO SOLUTIONS INC\Documents\assignment_4\data\R-Spatial_II_Lab\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)

acsData <- readLines("data/R-Spatial_II_Lab/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

acsData %>%
  magrittr::extract(1:10,)

##                  GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1  1400000US36005000100   7080         51    6503       577     1773     4239
## 2  1400000US36005000200   4542        950    2264      2278     2165     1279
## 3  1400000US36005000400   5634        710    2807      2827     2623     1699
## 4  1400000US36005001600   5917        989    2365      3552     2406     2434
## 5  1400000US36005001900   2765         76    1363      1402      585     1041
## 6  1400000US36005002000   9409        977    4119      5290     3185     4487
## 7  1400000US36005002300   4600        648    2175      2425      479     2122
## 8  1400000US36005002400    172          0     121        51       69       89
## 9  1400000US36005002500   5887        548    2958      2929      903     1344
## 10 1400000US36005002701   2868        243    1259      1609      243      987
##    asianPop hispanicPop adultPop citizenAdult censusCode
## 1       130        2329     6909         6100  005000100
## 2       119        3367     3582         2952  005000200
## 3       226        3873     4507         4214  005000400
## 4        68        3603     4416         3851  005001600
## 5       130        1413     2008         1787  005001900
## 6        29        5905     6851         6170  005002000
## 7        27        2674     3498         3056  005002300
## 8        14           0      131           42  005002400
## 9        68        4562     4237         2722  005002500
## 10        0        1985     1848         1412  005002701

censustracts %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

str(censustracts)

## Classes 'sf' and 'data.frame':   2165 obs. of  14 variables:
##  $ boro_code : chr  "5" "1" "1" "1" ...
##  $ boro_ct201: chr  "5000900" "1009800" "1010000" "1010200" ...
##  $ boro_name : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ cdeligibil: chr  "E" "I" "I" "I" ...
##  $ ct2010    : chr  "000900" "009800" "010000" "010200" ...
##  $ ctlabel   : chr  "9" "98" "100" "102" ...
##  $ ntacode   : chr  "SI22" "MN19" "MN19" "MN17" ...
##  $ ntaname   : chr  "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
##  $ puma      : chr  "3903" "3808" "3808" "3807" ...
##  $ shape_area: num  2497010 1906016 1860938 1860993 1864600 ...
##  $ shape_leng: num  7729 5534 5692 5688 5693 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  $ cntyFIPS  : chr  "085" "061" "061" "061" ...
##  $ tractFIPS : chr  "085000900" "061009800" "061010000" "061010200" ...
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:13] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...

popData <- merge(censustracts, acsData, by.x ='tractFIPS', by.y = 'censusCode')
sum(popData$totPop)

## [1] 8443713

popNYC <- sf::st_transform(popData, st_crs(nyc_zip_joined)) 

covidPopZipNYC <- sf::st_join(
   nyc_zip_joined, 
  popNYC %>% sf::st_centroid(),  # Convert census tracts to points for spatial join
  join = st_contains
) %>%
  group_by(ZIPCODE.x) %>%  # Only group by ZIPCODE
  summarise(
    totPop = sum(totPop, na.rm = TRUE), 
    malePctg = sum(malePop, na.rm = TRUE) / totPop * 100,
    asianPop = sum(asianPop, na.rm = TRUE),
    blackPop = sum(blackPop, na.rm = TRUE),
    hispanicPop = sum(hispanicPop, na.rm = TRUE),
    whitePop = sum(whitePop, na.rm = TRUE),
    COVID_CASE_COUNT = sum(COVID_CASE_COUNT, na.rm = TRUE)
  )

## Warning: st_centroid assumes attributes are constant over geometries

covidPopZipNYC %>% dplyr::filter(is.na(totPop))

## Simple feature collection with 0 features and 8 fields
## Bounding box:  xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS:  WGS 84
## # A tibble: 0 × 9
## # ℹ 9 variables: ZIPCODE.x <chr>, totPop <int>, malePctg <dbl>, asianPop <int>,
## #   blackPop <int>, hispanicPop <int>, whitePop <int>, COVID_CASE_COUNT <dbl>,
## #   geometry <GEOMETRY [°]>

plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks')

plot(covidPopZipNYC["asianPop"], breaks='jenks')

R Week 08 Assignment

Erin Witt

3/28/2025

Explanation of the template

R Spatial Lab Assignment # 1

task 1:

task 2:

task 3:

task 4: