R Spatial Lab Assignment # 2

Task 1: Join the COVID-19 data to the NYC zip code area data

setwd("~/R-Spatial/R-Spatial_II_Lab")

COVID_df <- read_csv("tests-by-zcta_2020_04_19.csv")
## Rows: 178 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): MODZCTA, Positive, Total, zcta_cum.perc_pos
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ZIP_sf <- ZIP_sf %>%
  mutate(ZIPCODE = as.character(ZIPCODE))

COVID_df <- COVID_df %>%
  mutate(MODZCTA = as.character(MODZCTA))

ZIP_covid_sf <- ZIP_sf %>%
  left_join(COVID_df, by = c("ZIPCODE" = "MODZCTA"))

Task 2:Join NYC food retails store data (points) to zip code data

##Filter FOOD Establishment types and join data to task 1 data 

ZIP_covid_food_sf <- ZIP_covid_sf %>%
  left_join(
    NYS_Retail_Food_sf %>%
      filter(`Establishment Type` %in% c("A", "B", "J")) %>%
      st_join(ZIP_covid_sf, join = st_within) %>%
      st_drop_geometry() %>%
      count(ZIPCODE, name = "FoodStoreNum"),
    by = "ZIPCODE"
  ) %>%
  mutate(FoodStoreNum = ifelse(is.na(FoodStoreNum), 0, FoodStoreNum))

Task 3: Aggregate NYC Health Facilities (points) to Zip code data

#Filter HEALTH Facilities, here we will only use hospitals, hospital extensions and nursing homes 
Health_Filtered_sf <- NYS_health_facility_sf %>% 
  dplyr::filter(`Short Description` %in% c("HOSP", "HOSP-EC","NH"))

#Transform CRS to match ZIP codes
Health_Filtered_sf <- st_transform(Health_Filtered_sf, st_crs(ZIP_covid_food_sf))
Health_Filtered_sf <- st_filter(Health_Filtered_sf, ZIP_sf)

#Join data to Task 2 data 
Health_count_by_zip <- Health_Filtered_sf %>%
  st_join(ZIP_covid_food_sf, join = st_within) %>%
  st_drop_geometry() %>%
  count(ZIPCODE, name = "HealthFacilityNum")

ZIP_covid_food_health_sf <- ZIP_covid_food_sf %>%
  left_join(Health_count_by_zip, by = "ZIPCODE") %>%
  mutate(HealthFacilityNum = ifelse(is.na(HealthFacilityNum), 0, HealthFacilityNum))

Task 4: Join Census ACS data to NYC Planning Census Tract Data

#ACS Data and Census 

NYC_Census <- sf::st_read('~/R-Spatial/R-Spatial_II_Lab/2010 Census Tracts', stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/mariaosorio/R-Spatial/R-Spatial_II_Lab/2010 Census Tracts' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
NYC_Census %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

ACS_Data<- readLines("~/R-Spatial/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  extract(-2) %>%
  textConnection() %>%
  read.csv(header = TRUE, quote = "\"") %>%
  select(
    GEO_ID,
    totPop = DP05_0001E,
    elderlyPop = DP05_0024E,
    malePop = DP05_0002E,
    femalePop = DP05_0003E,
    whitePop = DP05_0037E,
    blackPop = DP05_0038E,
    asianPop = DP05_0067E,
    hispanicPop = DP05_0071E,
    adultPop = DP05_0021E,
    citizenAdult = DP05_0087E
  ) %>%
  mutate(censusCode = str_sub(GEO_ID, -9, -1))

ACS_Data %>%
  magrittr::extract(1:10,)
##                  GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1  1400000US36005000100   7080         51    6503       577     1773     4239
## 2  1400000US36005000200   4542        950    2264      2278     2165     1279
## 3  1400000US36005000400   5634        710    2807      2827     2623     1699
## 4  1400000US36005001600   5917        989    2365      3552     2406     2434
## 5  1400000US36005001900   2765         76    1363      1402      585     1041
## 6  1400000US36005002000   9409        977    4119      5290     3185     4487
## 7  1400000US36005002300   4600        648    2175      2425      479     2122
## 8  1400000US36005002400    172          0     121        51       69       89
## 9  1400000US36005002500   5887        548    2958      2929      903     1344
## 10 1400000US36005002701   2868        243    1259      1609      243      987
##    asianPop hispanicPop adultPop citizenAdult censusCode
## 1       130        2329     6909         6100  005000100
## 2       119        3367     3582         2952  005000200
## 3       226        3873     4507         4214  005000400
## 4        68        3603     4416         3851  005001600
## 5       130        1413     2008         1787  005001900
## 6        29        5905     6851         6170  005002000
## 7        27        2674     3498         3056  005002300
## 8        14           0      131           42  005002400
## 9        68        4562     4237         2722  005002500
## 10        0        1985     1848         1412  005002701
#Join ACS to track 
Population_Data <- merge(NYC_Census, ACS_Data, by.x = "tractFIPS", by.y = "censusCode")

Task 5: Aggregate ACS census data to ZIP code data

#Make sure CRS is same as ZIP 
Population_NYC <- st_transform(Population_Data, st_crs(ZIP_covid_food_health_sf))

COVID_POP_ZIP_NYC <- st_join(
  ZIP_covid_food_health_sf,
  Population_NYC %>% st_centroid(),
  join = st_contains
) %>%
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total, FoodStoreNum, HealthFacilityNum) %>%
  summarise(
    totPop = sum(totPop, na.rm = TRUE),
    malePctg = sum(malePop, na.rm = TRUE) / totPop * 100,
    asianPop = sum(asianPop, na.rm = TRUE),
    blackPop = sum(blackPop, na.rm = TRUE),
    hispanicPop = sum(hispanicPop, na.rm = TRUE),
    whitePop = sum(whitePop, na.rm = TRUE)
  )
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by ZIPCODE, PO_NAME, POPULATION, COUNTY,
##   Positive, Total, FoodStoreNum, and HealthFacilityNum.
## ℹ Output is grouped by ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total,
##   and FoodStoreNum.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total,
##   FoodStoreNum, HealthFacilityNum))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
#Check
sum(COVID_POP_ZIP_NYC$totPop, na.rm = TRUE)
## [1] 8446394
COVID_POP_ZIP_NYC <- COVID_POP_ZIP_NYC %>%
      rename(
      Positive_Covid= Positive,
      Total_Tests= Total
    )
str(COVID_POP_ZIP_NYC)
## sf [251 × 15] (S3: sf/grouped_df/tbl_df/tbl/data.frame)
##  $ ZIPCODE          : chr [1:251] "00083" "10001" "10002" "10003" ...
##  $ PO_NAME          : chr [1:251] "Central Park" "New York" "New York" "New York" ...
##  $ POPULATION       : num [1:251] 25 22413 81305 55878 2187 ...
##  $ COUNTY           : chr [1:251] "New York" "New York" "New York" "New York" ...
##  $ Positive_Covid   : num [1:251] NA 260 712 347 24 44 14 40 518 201 ...
##  $ Total_Tests      : num [1:251] NA 571 1358 830 64 ...
##  $ FoodStoreNum     : num [1:251] 0 16 51 27 6 2 2 9 14 14 ...
##  $ HealthFacilityNum: num [1:251] 0 1 4 6 0 0 0 1 2 6 ...
##  $ totPop           : int [1:251] 3 19146 74310 53487 1731 8809 4639 15510 58683 28129 ...
##  $ malePctg         : num [1:251] 0 51.2 48.4 50.3 54.8 ...
##  $ asianPop         : int [1:251] 0 4837 32149 8027 358 1974 1195 6040 8503 4702 ...
##  $ blackPop         : int [1:251] 2 1092 5969 3130 71 185 52 1008 5745 1159 ...
##  $ hispanicPop      : int [1:251] 1 2435 20065 4375 150 504 507 1096 14672 2434 ...
##  $ whitePop         : int [1:251] 1 12485 24033 40503 1207 6419 3130 7912 35952 21438 ...
##  $ geometry         :sfc_GEOMETRY of length 251; first list element: List of 1
##   ..$ : num [1:216, 1:2] 998310 998283 998251 998178 998050 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "groups")= tibble [251 × 8] (S3: tbl_df/tbl/data.frame)
##   ..$ ZIPCODE       : chr [1:251] "00083" "10001" "10002" "10003" ...
##   ..$ PO_NAME       : chr [1:251] "Central Park" "New York" "New York" "New York" ...
##   ..$ POPULATION    : num [1:251] 25 22413 81305 55878 2187 ...
##   ..$ COUNTY        : chr [1:251] "New York" "New York" "New York" "New York" ...
##   ..$ Positive_Covid: num [1:251] NA 260 712 347 24 44 14 40 518 201 ...
##   ..$ Total_Tests   : num [1:251] NA 571 1358 830 64 ...
##   ..$ FoodStoreNum  : num [1:251] 0 16 51 27 6 2 2 9 14 14 ...
##   ..$ .rows         : list<int> [1:251] 
##   .. ..$ : int 1
##   .. ..$ : int 2
##   .. ..$ : int 3
##   .. ..$ : int 4
##   .. ..$ : int 5
##   .. ..$ : int 6
##   .. ..$ : int 7
##   .. ..$ : int 8
##   .. ..$ : int 9
##   .. ..$ : int 10
##   .. ..$ : int 11
##   .. ..$ : int 12
##   .. ..$ : int 13
##   .. ..$ : int 14
##   .. ..$ : int 15
##   .. ..$ : int 16
##   .. ..$ : int 17
##   .. ..$ : int 18
##   .. ..$ : int 19
##   .. ..$ : int 20
##   .. ..$ : int 21
##   .. ..$ : int 22
##   .. ..$ : int 23
##   .. ..$ : int 24
##   .. ..$ : int 25
##   .. ..$ : int 26
##   .. ..$ : int 27
##   .. ..$ : int 28
##   .. ..$ : int 29
##   .. ..$ : int 30
##   .. ..$ : int 31
##   .. ..$ : int 32
##   .. ..$ : int 33
##   .. ..$ : int 34
##   .. ..$ : int 35
##   .. ..$ : int 36
##   .. ..$ : int 37
##   .. ..$ : int 38
##   .. ..$ : int 39
##   .. ..$ : int 40
##   .. ..$ : int 41
##   .. ..$ : int 42
##   .. ..$ : int 43
##   .. ..$ : int 44
##   .. ..$ : int 45
##   .. ..$ : int 46
##   .. ..$ : int 47
##   .. ..$ : int 48
##   .. ..$ : int 49
##   .. ..$ : int 50
##   .. ..$ : int 51
##   .. ..$ : int 52
##   .. ..$ : int 53
##   .. ..$ : int 54
##   .. ..$ : int 55
##   .. ..$ : int 56
##   .. ..$ : int 57
##   .. ..$ : int 58
##   .. ..$ : int 59
##   .. ..$ : int 60
##   .. ..$ : int 61
##   .. ..$ : int 62
##   .. ..$ : int 63
##   .. ..$ : int 64
##   .. ..$ : int 65
##   .. ..$ : int 66
##   .. ..$ : int 67
##   .. ..$ : int 68
##   .. ..$ : int 69
##   .. ..$ : int 70
##   .. ..$ : int 71
##   .. ..$ : int 72
##   .. ..$ : int 73
##   .. ..$ : int 74
##   .. ..$ : int 75
##   .. ..$ : int 76
##   .. ..$ : int 77
##   .. ..$ : int 78
##   .. ..$ : int 79
##   .. ..$ : int 80
##   .. ..$ : int 81
##   .. ..$ : int 82
##   .. ..$ : int 83
##   .. ..$ : int 84
##   .. ..$ : int 85
##   .. ..$ : int 86
##   .. ..$ : int 87
##   .. ..$ : int 88
##   .. ..$ : int 89
##   .. ..$ : int 90
##   .. ..$ : int 91
##   .. ..$ : int 92
##   .. ..$ : int 93
##   .. ..$ : int 94
##   .. ..$ : int 95
##   .. ..$ : int 96
##   .. ..$ : int 97
##   .. ..$ : int 98
##   .. ..$ : int 99
##   .. .. [list output truncated]
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:14] "ZIPCODE" "PO_NAME" "POPULATION" "COUNTY" ...
mapview(COVID_POP_ZIP_NYC, zcol = "Positive_Covid")
mapview(COVID_POP_ZIP_NYC, zcol = "hispanicPop")
mapview(COVID_POP_ZIP_NYC, zcol = "HealthFacilityNum")