R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(sf)
## Warning: package 'sf' was built under R version 4.4.3
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(mapview)
## Warning: package 'mapview' was built under R version 4.4.3
load("R_spatial_lab7_objects.RData")
tracts <- st_read("geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp")
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\alyss\Documents\R\week8\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
covid <- read_csv("tests-by-zcta_2021_04_23.csv")
## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
acs <- read_csv("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv")
## Rows: 2168 Columns: 358
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (358): GEO_ID, NAME, DP05_0031PM, DP05_0032E, DP05_0032M, DP05_0032PE, D...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(nyc_zip)
##  [1] "ZIPCODE"    "BLDGZIP"    "PO_NAME"    "POPULATION" "AREA"      
##  [6] "STATE"      "COUNTY"     "ST_FIPS"    "CTY_FIPS"   "URL"       
## [11] "SHAPE_AREA" "SHAPE_LEN"  "geometry"
names(covid)
##  [1] "MODIFIED_ZCTA"     "NEIGHBORHOOD_NAME" "BOROUGH_GROUP"    
##  [4] "label"             "lat"               "lon"              
##  [7] "COVID_CASE_COUNT"  "COVID_CASE_RATE"   "POP_DENOMINATOR"  
## [10] "COVID_DEATH_COUNT" "COVID_DEATH_RATE"  "PERCENT_POSITIVE" 
## [13] "TOTAL_COVID_TESTS"
names(acs)
##   [1] "GEO_ID"      "NAME"        "DP05_0031PM" "DP05_0032E"  "DP05_0032M" 
##   [6] "DP05_0032PE" "DP05_0032PM" "DP05_0033E"  "DP05_0033M"  "DP05_0033PE"
##  [11] "DP05_0033PM" "DP05_0034E"  "DP05_0034M"  "DP05_0034PE" "DP05_0034PM"
##  [16] "DP05_0035E"  "DP05_0035M"  "DP05_0035PE" "DP05_0035PM" "DP05_0036E" 
##  [21] "DP05_0036M"  "DP05_0036PE" "DP05_0036PM" "DP05_0037E"  "DP05_0037M" 
##  [26] "DP05_0037PE" "DP05_0037PM" "DP05_0038E"  "DP05_0038M"  "DP05_0038PE"
##  [31] "DP05_0038PM" "DP05_0039E"  "DP05_0039M"  "DP05_0039PE" "DP05_0039PM"
##  [36] "DP05_0040E"  "DP05_0040M"  "DP05_0040PE" "DP05_0040PM" "DP05_0041E" 
##  [41] "DP05_0041M"  "DP05_0041PE" "DP05_0041PM" "DP05_0042E"  "DP05_0042M" 
##  [46] "DP05_0042PE" "DP05_0042PM" "DP05_0043E"  "DP05_0043M"  "DP05_0043PE"
##  [51] "DP05_0043PM" "DP05_0044E"  "DP05_0044M"  "DP05_0044PE" "DP05_0044PM"
##  [56] "DP05_0045E"  "DP05_0045M"  "DP05_0045PE" "DP05_0045PM" "DP05_0046E" 
##  [61] "DP05_0046M"  "DP05_0046PE" "DP05_0046PM" "DP05_0047E"  "DP05_0047M" 
##  [66] "DP05_0047PE" "DP05_0047PM" "DP05_0048E"  "DP05_0048M"  "DP05_0048PE"
##  [71] "DP05_0048PM" "DP05_0049E"  "DP05_0049M"  "DP05_0049PE" "DP05_0049PM"
##  [76] "DP05_0050E"  "DP05_0050M"  "DP05_0050PE" "DP05_0050PM" "DP05_0051E" 
##  [81] "DP05_0051M"  "DP05_0051PE" "DP05_0051PM" "DP05_0052E"  "DP05_0052M" 
##  [86] "DP05_0052PE" "DP05_0052PM" "DP05_0053E"  "DP05_0053M"  "DP05_0053PE"
##  [91] "DP05_0053PM" "DP05_0054E"  "DP05_0054M"  "DP05_0054PE" "DP05_0054PM"
##  [96] "DP05_0055E"  "DP05_0055M"  "DP05_0055PE" "DP05_0055PM" "DP05_0056E" 
## [101] "DP05_0056M"  "DP05_0056PE" "DP05_0056PM" "DP05_0057E"  "DP05_0057M" 
## [106] "DP05_0057PE" "DP05_0057PM" "DP05_0058E"  "DP05_0058M"  "DP05_0058PE"
## [111] "DP05_0058PM" "DP05_0059E"  "DP05_0059M"  "DP05_0059PE" "DP05_0059PM"
## [116] "DP05_0060E"  "DP05_0060M"  "DP05_0060PE" "DP05_0060PM" "DP05_0061E" 
## [121] "DP05_0061M"  "DP05_0061PE" "DP05_0061PM" "DP05_0062E"  "DP05_0062M" 
## [126] "DP05_0062PE" "DP05_0062PM" "DP05_0063E"  "DP05_0063M"  "DP05_0063PE"
## [131] "DP05_0063PM" "DP05_0064E"  "DP05_0064M"  "DP05_0064PE" "DP05_0064PM"
## [136] "DP05_0065E"  "DP05_0065M"  "DP05_0065PE" "DP05_0065PM" "DP05_0066E" 
## [141] "DP05_0066M"  "DP05_0066PE" "DP05_0066PM" "DP05_0067E"  "DP05_0067M" 
## [146] "DP05_0067PE" "DP05_0067PM" "DP05_0068E"  "DP05_0068M"  "DP05_0068PE"
## [151] "DP05_0068PM" "DP05_0069E"  "DP05_0069M"  "DP05_0069PE" "DP05_0069PM"
## [156] "DP05_0070E"  "DP05_0070M"  "DP05_0070PE" "DP05_0070PM" "DP05_0071E" 
## [161] "DP05_0071M"  "DP05_0071PE" "DP05_0071PM" "DP05_0072E"  "DP05_0072M" 
## [166] "DP05_0072PE" "DP05_0072PM" "DP05_0073E"  "DP05_0073M"  "DP05_0073PE"
## [171] "DP05_0073PM" "DP05_0074E"  "DP05_0074M"  "DP05_0074PE" "DP05_0074PM"
## [176] "DP05_0075E"  "DP05_0075M"  "DP05_0075PE" "DP05_0075PM" "DP05_0076E" 
## [181] "DP05_0076M"  "DP05_0076PE" "DP05_0076PM" "DP05_0077E"  "DP05_0077M" 
## [186] "DP05_0077PE" "DP05_0077PM" "DP05_0078E"  "DP05_0078M"  "DP05_0078PE"
## [191] "DP05_0078PM" "DP05_0079E"  "DP05_0079M"  "DP05_0079PE" "DP05_0079PM"
## [196] "DP05_0080E"  "DP05_0080M"  "DP05_0080PE" "DP05_0080PM" "DP05_0081E" 
## [201] "DP05_0081M"  "DP05_0081PE" "DP05_0081PM" "DP05_0082E"  "DP05_0082M" 
## [206] "DP05_0082PE" "DP05_0082PM" "DP05_0083E"  "DP05_0083M"  "DP05_0083PE"
## [211] "DP05_0083PM" "DP05_0084E"  "DP05_0084M"  "DP05_0084PE" "DP05_0084PM"
## [216] "DP05_0085E"  "DP05_0085M"  "DP05_0085PE" "DP05_0085PM" "DP05_0086E" 
## [221] "DP05_0086M"  "DP05_0086PE" "DP05_0086PM" "DP05_0087E"  "DP05_0087M" 
## [226] "DP05_0087PE" "DP05_0087PM" "DP05_0088E"  "DP05_0088M"  "DP05_0088PE"
## [231] "DP05_0088PM" "DP05_0089E"  "DP05_0089M"  "DP05_0089PE" "DP05_0089PM"
## [236] "DP05_0001E"  "DP05_0001M"  "DP05_0001PE" "DP05_0001PM" "DP05_0002E" 
## [241] "DP05_0002M"  "DP05_0002PE" "DP05_0002PM" "DP05_0003E"  "DP05_0003M" 
## [246] "DP05_0003PE" "DP05_0003PM" "DP05_0004E"  "DP05_0004M"  "DP05_0004PE"
## [251] "DP05_0004PM" "DP05_0005E"  "DP05_0005M"  "DP05_0005PE" "DP05_0005PM"
## [256] "DP05_0006E"  "DP05_0006M"  "DP05_0006PE" "DP05_0006PM" "DP05_0007E" 
## [261] "DP05_0007M"  "DP05_0007PE" "DP05_0007PM" "DP05_0008E"  "DP05_0008M" 
## [266] "DP05_0008PE" "DP05_0008PM" "DP05_0009E"  "DP05_0009M"  "DP05_0009PE"
## [271] "DP05_0009PM" "DP05_0010E"  "DP05_0010M"  "DP05_0010PE" "DP05_0010PM"
## [276] "DP05_0011E"  "DP05_0011M"  "DP05_0011PE" "DP05_0011PM" "DP05_0012E" 
## [281] "DP05_0012M"  "DP05_0012PE" "DP05_0012PM" "DP05_0013E"  "DP05_0013M" 
## [286] "DP05_0013PE" "DP05_0013PM" "DP05_0014E"  "DP05_0014M"  "DP05_0014PE"
## [291] "DP05_0014PM" "DP05_0015E"  "DP05_0015M"  "DP05_0015PE" "DP05_0015PM"
## [296] "DP05_0016E"  "DP05_0016M"  "DP05_0016PE" "DP05_0016PM" "DP05_0017E" 
## [301] "DP05_0017M"  "DP05_0017PE" "DP05_0017PM" "DP05_0018E"  "DP05_0018M" 
## [306] "DP05_0018PE" "DP05_0018PM" "DP05_0019E"  "DP05_0019M"  "DP05_0019PE"
## [311] "DP05_0019PM" "DP05_0020E"  "DP05_0020M"  "DP05_0020PE" "DP05_0020PM"
## [316] "DP05_0021E"  "DP05_0021M"  "DP05_0021PE" "DP05_0021PM" "DP05_0022E" 
## [321] "DP05_0022M"  "DP05_0022PE" "DP05_0022PM" "DP05_0023E"  "DP05_0023M" 
## [326] "DP05_0023PE" "DP05_0023PM" "DP05_0024E"  "DP05_0024M"  "DP05_0024PE"
## [331] "DP05_0024PM" "DP05_0025E"  "DP05_0025M"  "DP05_0025PE" "DP05_0025PM"
## [336] "DP05_0026E"  "DP05_0026M"  "DP05_0026PE" "DP05_0026PM" "DP05_0027E" 
## [341] "DP05_0027M"  "DP05_0027PE" "DP05_0027PM" "DP05_0028E"  "DP05_0028M" 
## [346] "DP05_0028PE" "DP05_0028PM" "DP05_0029E"  "DP05_0029M"  "DP05_0029PE"
## [351] "DP05_0029PM" "DP05_0030E"  "DP05_0030M"  "DP05_0030PE" "DP05_0030PM"
## [356] "DP05_0031E"  "DP05_0031M"  "DP05_0031PE"
#covid join with zip code
nyc_zip2 <- nyc_zip %>%
  mutate(ZIPCODE = as.character(ZIPCODE))

covid2 <- covid %>%
  mutate(MODIFIED_ZCTA = sprintf("%05s", MODIFIED_ZCTA))

zpNYC <- nyc_zip2 %>%
  left_join(covid2, by = c("ZIPCODE" = "MODIFIED_ZCTA"))

names(zpNYC)
##  [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
##  [4] "POPULATION"        "AREA"              "STATE"            
##  [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
## [10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
## [13] "NEIGHBORHOOD_NAME" "BOROUGH_GROUP"     "label"            
## [16] "lat"               "lon"               "COVID_CASE_COUNT" 
## [19] "COVID_CASE_RATE"   "POP_DENOMINATOR"   "COVID_DEATH_COUNT"
## [22] "COVID_DEATH_RATE"  "PERCENT_POSITIVE"  "TOTAL_COVID_TESTS"
## [25] "geometry"
mapview(zpNYC, zcol = "COVID_CASE_COUNT")
#food stores with zip code
food_selected <- food_sf %>%
  filter(stringr::str_detect(Establishment.Type, "[AJD]"))

food_zip <- st_join(zpNYC, food_selected, join = st_contains) %>%
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>%
  summarise(FoodStoreNum = n(), .groups = "drop")

mapview(food_zip, zcol = "FoodStoreNum")
#nursing home with zip code
nursing_home_sf <- health_sf %>%
  filter(`Short Description` == "NH")

health_zip <- st_join(food_zip, nursing_home_sf, join = st_contains) %>%
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS, FoodStoreNum) %>%
  summarise(NursingHomeNum = n(), .groups = "drop")

mapview(health_zip, zcol = "NursingHomeNum")
acsData <- readLines("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>%
  textConnection() %>%
  read.csv(header = TRUE, quote = "\"") %>%
  dplyr::select(
    GEO_ID,
    totPop = DP05_0001E,
    elderlyPop = DP05_0024E,
    malePop = DP05_0002E,
    femalePop = DP05_0003E,
    whitePop = DP05_0037E,
    blackPop = DP05_0038E,
    asianPop = DP05_0067E,
    hispanicPop = DP05_0071E,
    adultPop = DP05_0021E,
    citizenAdult = DP05_0087E
  ) %>%
  mutate(censusCode = stringr::str_sub(GEO_ID, -9, -1))
#Build tracts with FIDS
tracts2 <- tracts %>%
  mutate(
    cntyFIPS = case_when(
      boro_name == "Bronx" ~ "005",
      boro_name == "Brooklyn" ~ "047",
      boro_name == "Manhattan" ~ "061",
      boro_name == "Queens" ~ "081",
      boro_name == "Staten Island" ~ "085"
    ),
    tractFIPS = paste(cntyFIPS, ct2010, sep = "")
  )

popData <- merge(tracts2, acsData, by.x = "tractFIPS", by.y = "censusCode")

sum(popData$totPop, na.rm = TRUE)
## [1] 8443713
names(popData)
##  [1] "tractFIPS"    "boro_code"    "boro_ct201"   "boro_name"    "cdeligibil"  
##  [6] "ct2010"       "ctlabel"      "ntacode"      "ntaname"      "puma"        
## [11] "shape_area"   "shape_leng"   "cntyFIPS"     "GEO_ID"       "totPop"      
## [16] "elderlyPop"   "malePop"      "femalePop"    "whitePop"     "blackPop"    
## [21] "asianPop"     "hispanicPop"  "adultPop"     "citizenAdult" "geometry"
popNYC <- st_transform(popData, st_crs(health_zip))
#Aggregate ACE data with tracts and zip code 
final_zip_sf <- st_join(
  health_zip,
  popNYC %>% st_centroid(),
  join = st_contains
) %>%
  group_by(
    ZIPCODE, PO_NAME, POPULATION, COUNTY,
    COVID_CASE_COUNT, TOTAL_COVID_TESTS,
    FoodStoreNum, NursingHomeNum
  ) %>%
  summarise(
    totPop = sum(totPop, na.rm = TRUE),
    elderlyPop = sum(elderlyPop, na.rm = TRUE),
    malePop = sum(malePop, na.rm = TRUE),
    femalePop = sum(femalePop, na.rm = TRUE),
    whitePop = sum(whitePop, na.rm = TRUE),
    blackPop = sum(blackPop, na.rm = TRUE),
    asianPop = sum(asianPop, na.rm = TRUE),
    hispanicPop = sum(hispanicPop, na.rm = TRUE),
    adultPop = sum(adultPop, na.rm = TRUE),
    citizenAdult = sum(citizenAdult, na.rm = TRUE),
    malePctg = ifelse(sum(totPop, na.rm = TRUE) > 0,
                      sum(malePop, na.rm = TRUE) / sum(totPop, na.rm = TRUE) * 100,
                      NA_real_),
    .groups = "drop"
  )
## Warning: st_centroid assumes attributes are constant over geometries
names(final_zip_sf)
##  [1] "ZIPCODE"           "PO_NAME"           "POPULATION"       
##  [4] "COUNTY"            "COVID_CASE_COUNT"  "TOTAL_COVID_TESTS"
##  [7] "FoodStoreNum"      "NursingHomeNum"    "totPop"           
## [10] "elderlyPop"        "malePop"           "femalePop"        
## [13] "whitePop"          "blackPop"          "asianPop"         
## [16] "hispanicPop"       "adultPop"          "citizenAdult"     
## [19] "malePctg"          "geometry"
sum(final_zip_sf$totPop, na.rm = TRUE)
## [1] 8446394
mapview(final_zip_sf, zcol = "COVID_CASE_COUNT")
mapview(final_zip_sf, zcol = "FoodStoreNum")
mapview(final_zip_sf, zcol = "NursingHomeNum")
mapview(final_zip_sf, zcol = "elderlyPop")
save(final_zip_sf, file = "R_spatial_lab8_sf.RData")