Explanation of the template

Update the title with your information. Make sure to include identification information so that we know it is your submission.

Also update the author name and date accordingly.

Check out the Source Code from the top-right corner </>Code menu.

In the following R code chunk, load_packages is the code chunk name. include=FALSE suggests that the code chunk will run, but the code itself and its outputs will not be included in the rendered HTML. echo=TRUE in the following code chunk suggests that the code and results from running the code will be included in the rendered HTML.

R Spatial Lab Assignment # 1

task 1:

nyc_zip <- st_read("./Week_07/Data/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\erins\OneDrive - SOHO SOLUTIONS INC\Documents\assignment_4\Week_07\Data\R-Spatial_I_Lab\ZIP_CODE_040114\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
tests <- readr::read_csv("data/R-Spatial_II_Lab/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv", lazy = FALSE)
## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
covid_data <- tests %>% mutate(MODIFIED_ZCTA = as.character(MODIFIED_ZCTA))

#perform the actual join
nyc_zip_joined <- nyc_zip %>%
  left_join(covid_data, by = c("ZIPCODE" = "MODIFIED_ZCTA"))

task 2:

food_stores <- st_read('data/R-Spatial_II_Lab/R-Spatial_II_Lab/nycFoodStore.shp')
## Reading layer `nycFoodStore' from data source 
##   `C:\Users\erins\OneDrive - SOHO SOLUTIONS INC\Documents\assignment_4\data\R-Spatial_II_Lab\R-Spatial_II_Lab\nycFoodStore.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84
nyc_zip <- st_transform(nyc_zip, crs = st_crs(food_stores))

food_retail <- food_stores %>%
  filter(str_detect(Estbl_T, "A"))

# assign each of the food stores to a zip code
food_by_zip <- st_join(food_retail %>% st_transform(st_crs(nyc_zip_joined)), 
                       nyc_zip_joined, join = st_within)

# count the number of food stores per zip
food_count <- food_by_zip %>%
  group_by(ZIPCODE) %>%
  summarise(store_count = n(), .groups = "drop")

# join the store count data back to the zip code polygons
nyc_zip_joined <- nyc_zip_joined %>%
  left_join(st_drop_geometry(food_count), by = "ZIPCODE") 

# get rid of all the NA values so they just say 0
nyc_zip_joined$store_count[is.na(nyc_zip_joined$store_count)] <- 0

task 3:

health <- readr::read_csv("data/R-Spatial_II_Lab/R-Spatial_II_Lab/NYS_Health_Facility.csv", lazy = FALSE)
## Rows: 3990 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (28): Facility Name, Short Description, Description, Facility Open Date,...
## dbl  (8): Facility ID, Facility Phone Number, Facility Fax Number, Facility ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
health <- health %>%
  filter(!is.na(`Facility Longitude`), !is.na(`Facility Latitude`))  # Fix column reference

# Convert to sf object
facilities_sf <- st_as_sf(health, coords = c("Facility Longitude", "Facility Latitude"), crs = 4326)

# Ensure ZIP code shapefile has the same CRS
nyc_zip_joined <- st_transform(nyc_zip_joined, crs = st_crs(facilities_sf))  # Reproject ZIP codes

# Perform spatial join
facilities_with_zip <- st_join(facilities_sf, nyc_zip_joined, join = st_within)

# count number of facilities per ZIP code
facility_count <- facilities_with_zip %>%
  group_by(ZIPCODE) %>%
  summarise(count = n())

# join count data to ZIP code shapefile
zip_codes_count <- st_join(nyc_zip_joined, facility_count, join = st_within)

hospice_sf <- facilities_sf %>%
  filter(str_detect(`Short Description`, "HOSP-EC"))

hospice_with_zip <- st_join(hospice_sf, nyc_zip_joined, join = st_within)

# group them by zip codes 
hospice_count_by_zip <- hospice_with_zip %>%
  group_by(ZIPCODE) %>%
  summarise(hospice_count = n())

nyc_zip_joined <- st_join(nyc_zip_joined, hospice_count_by_zip, left = TRUE)

mapview(nyc_zip_joined, zcol = "hospice_count")

task 4:

# I kind of struggled with this one and am going to go to office hours this week to get more feedback on how to do this. 

censustracts <- sf::st_read('data/R-Spatial_II_Lab/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\erins\OneDrive - SOHO SOLUTIONS INC\Documents\assignment_4\data\R-Spatial_II_Lab\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
acsData <- readLines("data/R-Spatial_II_Lab/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

acsData %>%
  magrittr::extract(1:10,)
##                  GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1  1400000US36005000100   7080         51    6503       577     1773     4239
## 2  1400000US36005000200   4542        950    2264      2278     2165     1279
## 3  1400000US36005000400   5634        710    2807      2827     2623     1699
## 4  1400000US36005001600   5917        989    2365      3552     2406     2434
## 5  1400000US36005001900   2765         76    1363      1402      585     1041
## 6  1400000US36005002000   9409        977    4119      5290     3185     4487
## 7  1400000US36005002300   4600        648    2175      2425      479     2122
## 8  1400000US36005002400    172          0     121        51       69       89
## 9  1400000US36005002500   5887        548    2958      2929      903     1344
## 10 1400000US36005002701   2868        243    1259      1609      243      987
##    asianPop hispanicPop adultPop citizenAdult censusCode
## 1       130        2329     6909         6100  005000100
## 2       119        3367     3582         2952  005000200
## 3       226        3873     4507         4214  005000400
## 4        68        3603     4416         3851  005001600
## 5       130        1413     2008         1787  005001900
## 6        29        5905     6851         6170  005002000
## 7        27        2674     3498         3056  005002300
## 8        14           0      131           42  005002400
## 9        68        4562     4237         2722  005002500
## 10        0        1985     1848         1412  005002701
censustracts %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

str(censustracts)
## Classes 'sf' and 'data.frame':   2165 obs. of  14 variables:
##  $ boro_code : chr  "5" "1" "1" "1" ...
##  $ boro_ct201: chr  "5000900" "1009800" "1010000" "1010200" ...
##  $ boro_name : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ cdeligibil: chr  "E" "I" "I" "I" ...
##  $ ct2010    : chr  "000900" "009800" "010000" "010200" ...
##  $ ctlabel   : chr  "9" "98" "100" "102" ...
##  $ ntacode   : chr  "SI22" "MN19" "MN19" "MN17" ...
##  $ ntaname   : chr  "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
##  $ puma      : chr  "3903" "3808" "3808" "3807" ...
##  $ shape_area: num  2497010 1906016 1860938 1860993 1864600 ...
##  $ shape_leng: num  7729 5534 5692 5688 5693 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  $ cntyFIPS  : chr  "085" "061" "061" "061" ...
##  $ tractFIPS : chr  "085000900" "061009800" "061010000" "061010200" ...
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:13] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
popData <- merge(censustracts, acsData, by.x ='tractFIPS', by.y = 'censusCode')
sum(popData$totPop)
## [1] 8443713
popNYC <- sf::st_transform(popData, st_crs(nyc_zip_joined)) 

covidPopZipNYC <- sf::st_join(
   nyc_zip_joined, 
  popNYC %>% sf::st_centroid(),  # Convert census tracts to points for spatial join
  join = st_contains
) %>%
  group_by(ZIPCODE.x) %>%  # Only group by ZIPCODE
  summarise(
    totPop = sum(totPop, na.rm = TRUE), 
    malePctg = sum(malePop, na.rm = TRUE) / totPop * 100,
    asianPop = sum(asianPop, na.rm = TRUE),
    blackPop = sum(blackPop, na.rm = TRUE),
    hispanicPop = sum(hispanicPop, na.rm = TRUE),
    whitePop = sum(whitePop, na.rm = TRUE),
    COVID_CASE_COUNT = sum(COVID_CASE_COUNT, na.rm = TRUE)
  )
## Warning: st_centroid assumes attributes are constant over geometries
covidPopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 0 features and 8 fields
## Bounding box:  xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS:  WGS 84
## # A tibble: 0 × 9
## # ℹ 9 variables: ZIPCODE.x <chr>, totPop <int>, malePctg <dbl>, asianPop <int>,
## #   blackPop <int>, hispanicPop <int>, whitePop <int>, COVID_CASE_COUNT <dbl>,
## #   geometry <GEOMETRY [°]>
plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks')

plot(covidPopZipNYC["asianPop"], breaks='jenks')