R Spatial Lab Assignment # 2
Task 1: Join the COVID-19 data to the NYC zip code area data
setwd("~/R-Spatial/R-Spatial_II_Lab")
COVID_df <- read_csv("tests-by-zcta_2020_04_19.csv")
## Rows: 178 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): MODZCTA, Positive, Total, zcta_cum.perc_pos
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ZIP_sf <- ZIP_sf %>%
mutate(ZIPCODE = as.character(ZIPCODE))
COVID_df <- COVID_df %>%
mutate(MODZCTA = as.character(MODZCTA))
ZIP_covid_sf <- ZIP_sf %>%
left_join(COVID_df, by = c("ZIPCODE" = "MODZCTA"))
Task 2:Join NYC food retails store data (points) to zip code
data
##Filter FOOD Establishment types and join data to task 1 data
ZIP_covid_food_sf <- ZIP_covid_sf %>%
left_join(
NYS_Retail_Food_sf %>%
filter(`Establishment Type` %in% c("A", "B", "J")) %>%
st_join(ZIP_covid_sf, join = st_within) %>%
st_drop_geometry() %>%
count(ZIPCODE, name = "FoodStoreNum"),
by = "ZIPCODE"
) %>%
mutate(FoodStoreNum = ifelse(is.na(FoodStoreNum), 0, FoodStoreNum))
Task 3: Aggregate NYC Health Facilities (points) to Zip code
data
#Filter HEALTH Facilities, here we will only use hospitals, hospital extensions and nursing homes
Health_Filtered_sf <- NYS_health_facility_sf %>%
dplyr::filter(`Short Description` %in% c("HOSP", "HOSP-EC","NH"))
#Transform CRS to match ZIP codes
Health_Filtered_sf <- st_transform(Health_Filtered_sf, st_crs(ZIP_covid_food_sf))
Health_Filtered_sf <- st_filter(Health_Filtered_sf, ZIP_sf)
#Join data to Task 2 data
Health_count_by_zip <- Health_Filtered_sf %>%
st_join(ZIP_covid_food_sf, join = st_within) %>%
st_drop_geometry() %>%
count(ZIPCODE, name = "HealthFacilityNum")
ZIP_covid_food_health_sf <- ZIP_covid_food_sf %>%
left_join(Health_count_by_zip, by = "ZIPCODE") %>%
mutate(HealthFacilityNum = ifelse(is.na(HealthFacilityNum), 0, HealthFacilityNum))
Task 4: Join Census ACS data to NYC Planning Census Tract Data
#ACS Data and Census
NYC_Census <- sf::st_read('~/R-Spatial/R-Spatial_II_Lab/2010 Census Tracts', stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/mariaosorio/R-Spatial/R-Spatial_II_Lab/2010 Census Tracts'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
NYC_Census %<>% dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
ACS_Data<- readLines("~/R-Spatial/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
extract(-2) %>%
textConnection() %>%
read.csv(header = TRUE, quote = "\"") %>%
select(
GEO_ID,
totPop = DP05_0001E,
elderlyPop = DP05_0024E,
malePop = DP05_0002E,
femalePop = DP05_0003E,
whitePop = DP05_0037E,
blackPop = DP05_0038E,
asianPop = DP05_0067E,
hispanicPop = DP05_0071E,
adultPop = DP05_0021E,
citizenAdult = DP05_0087E
) %>%
mutate(censusCode = str_sub(GEO_ID, -9, -1))
ACS_Data %>%
magrittr::extract(1:10,)
## GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1 1400000US36005000100 7080 51 6503 577 1773 4239
## 2 1400000US36005000200 4542 950 2264 2278 2165 1279
## 3 1400000US36005000400 5634 710 2807 2827 2623 1699
## 4 1400000US36005001600 5917 989 2365 3552 2406 2434
## 5 1400000US36005001900 2765 76 1363 1402 585 1041
## 6 1400000US36005002000 9409 977 4119 5290 3185 4487
## 7 1400000US36005002300 4600 648 2175 2425 479 2122
## 8 1400000US36005002400 172 0 121 51 69 89
## 9 1400000US36005002500 5887 548 2958 2929 903 1344
## 10 1400000US36005002701 2868 243 1259 1609 243 987
## asianPop hispanicPop adultPop citizenAdult censusCode
## 1 130 2329 6909 6100 005000100
## 2 119 3367 3582 2952 005000200
## 3 226 3873 4507 4214 005000400
## 4 68 3603 4416 3851 005001600
## 5 130 1413 2008 1787 005001900
## 6 29 5905 6851 6170 005002000
## 7 27 2674 3498 3056 005002300
## 8 14 0 131 42 005002400
## 9 68 4562 4237 2722 005002500
## 10 0 1985 1848 1412 005002701
#Join ACS to track
Population_Data <- merge(NYC_Census, ACS_Data, by.x = "tractFIPS", by.y = "censusCode")
Task 5: Aggregate ACS census data to ZIP code data
#Make sure CRS is same as ZIP
Population_NYC <- st_transform(Population_Data, st_crs(ZIP_covid_food_health_sf))
COVID_POP_ZIP_NYC <- st_join(
ZIP_covid_food_health_sf,
Population_NYC %>% st_centroid(),
join = st_contains
) %>%
group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total, FoodStoreNum, HealthFacilityNum) %>%
summarise(
totPop = sum(totPop, na.rm = TRUE),
malePctg = sum(malePop, na.rm = TRUE) / totPop * 100,
asianPop = sum(asianPop, na.rm = TRUE),
blackPop = sum(blackPop, na.rm = TRUE),
hispanicPop = sum(hispanicPop, na.rm = TRUE),
whitePop = sum(whitePop, na.rm = TRUE)
)
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by ZIPCODE, PO_NAME, POPULATION, COUNTY,
## Positive, Total, FoodStoreNum, and HealthFacilityNum.
## ℹ Output is grouped by ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total,
## and FoodStoreNum.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total,
## FoodStoreNum, HealthFacilityNum))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
#Check
sum(COVID_POP_ZIP_NYC$totPop, na.rm = TRUE)
## [1] 8446394
COVID_POP_ZIP_NYC <- COVID_POP_ZIP_NYC %>%
rename(
Positive_Covid= Positive,
Total_Tests= Total
)
str(COVID_POP_ZIP_NYC)
## sf [251 × 15] (S3: sf/grouped_df/tbl_df/tbl/data.frame)
## $ ZIPCODE : chr [1:251] "00083" "10001" "10002" "10003" ...
## $ PO_NAME : chr [1:251] "Central Park" "New York" "New York" "New York" ...
## $ POPULATION : num [1:251] 25 22413 81305 55878 2187 ...
## $ COUNTY : chr [1:251] "New York" "New York" "New York" "New York" ...
## $ Positive_Covid : num [1:251] NA 260 712 347 24 44 14 40 518 201 ...
## $ Total_Tests : num [1:251] NA 571 1358 830 64 ...
## $ FoodStoreNum : num [1:251] 0 16 51 27 6 2 2 9 14 14 ...
## $ HealthFacilityNum: num [1:251] 0 1 4 6 0 0 0 1 2 6 ...
## $ totPop : int [1:251] 3 19146 74310 53487 1731 8809 4639 15510 58683 28129 ...
## $ malePctg : num [1:251] 0 51.2 48.4 50.3 54.8 ...
## $ asianPop : int [1:251] 0 4837 32149 8027 358 1974 1195 6040 8503 4702 ...
## $ blackPop : int [1:251] 2 1092 5969 3130 71 185 52 1008 5745 1159 ...
## $ hispanicPop : int [1:251] 1 2435 20065 4375 150 504 507 1096 14672 2434 ...
## $ whitePop : int [1:251] 1 12485 24033 40503 1207 6419 3130 7912 35952 21438 ...
## $ geometry :sfc_GEOMETRY of length 251; first list element: List of 1
## ..$ : num [1:216, 1:2] 998310 998283 998251 998178 998050 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "groups")= tibble [251 × 8] (S3: tbl_df/tbl/data.frame)
## ..$ ZIPCODE : chr [1:251] "00083" "10001" "10002" "10003" ...
## ..$ PO_NAME : chr [1:251] "Central Park" "New York" "New York" "New York" ...
## ..$ POPULATION : num [1:251] 25 22413 81305 55878 2187 ...
## ..$ COUNTY : chr [1:251] "New York" "New York" "New York" "New York" ...
## ..$ Positive_Covid: num [1:251] NA 260 712 347 24 44 14 40 518 201 ...
## ..$ Total_Tests : num [1:251] NA 571 1358 830 64 ...
## ..$ FoodStoreNum : num [1:251] 0 16 51 27 6 2 2 9 14 14 ...
## ..$ .rows : list<int> [1:251]
## .. ..$ : int 1
## .. ..$ : int 2
## .. ..$ : int 3
## .. ..$ : int 4
## .. ..$ : int 5
## .. ..$ : int 6
## .. ..$ : int 7
## .. ..$ : int 8
## .. ..$ : int 9
## .. ..$ : int 10
## .. ..$ : int 11
## .. ..$ : int 12
## .. ..$ : int 13
## .. ..$ : int 14
## .. ..$ : int 15
## .. ..$ : int 16
## .. ..$ : int 17
## .. ..$ : int 18
## .. ..$ : int 19
## .. ..$ : int 20
## .. ..$ : int 21
## .. ..$ : int 22
## .. ..$ : int 23
## .. ..$ : int 24
## .. ..$ : int 25
## .. ..$ : int 26
## .. ..$ : int 27
## .. ..$ : int 28
## .. ..$ : int 29
## .. ..$ : int 30
## .. ..$ : int 31
## .. ..$ : int 32
## .. ..$ : int 33
## .. ..$ : int 34
## .. ..$ : int 35
## .. ..$ : int 36
## .. ..$ : int 37
## .. ..$ : int 38
## .. ..$ : int 39
## .. ..$ : int 40
## .. ..$ : int 41
## .. ..$ : int 42
## .. ..$ : int 43
## .. ..$ : int 44
## .. ..$ : int 45
## .. ..$ : int 46
## .. ..$ : int 47
## .. ..$ : int 48
## .. ..$ : int 49
## .. ..$ : int 50
## .. ..$ : int 51
## .. ..$ : int 52
## .. ..$ : int 53
## .. ..$ : int 54
## .. ..$ : int 55
## .. ..$ : int 56
## .. ..$ : int 57
## .. ..$ : int 58
## .. ..$ : int 59
## .. ..$ : int 60
## .. ..$ : int 61
## .. ..$ : int 62
## .. ..$ : int 63
## .. ..$ : int 64
## .. ..$ : int 65
## .. ..$ : int 66
## .. ..$ : int 67
## .. ..$ : int 68
## .. ..$ : int 69
## .. ..$ : int 70
## .. ..$ : int 71
## .. ..$ : int 72
## .. ..$ : int 73
## .. ..$ : int 74
## .. ..$ : int 75
## .. ..$ : int 76
## .. ..$ : int 77
## .. ..$ : int 78
## .. ..$ : int 79
## .. ..$ : int 80
## .. ..$ : int 81
## .. ..$ : int 82
## .. ..$ : int 83
## .. ..$ : int 84
## .. ..$ : int 85
## .. ..$ : int 86
## .. ..$ : int 87
## .. ..$ : int 88
## .. ..$ : int 89
## .. ..$ : int 90
## .. ..$ : int 91
## .. ..$ : int 92
## .. ..$ : int 93
## .. ..$ : int 94
## .. ..$ : int 95
## .. ..$ : int 96
## .. ..$ : int 97
## .. ..$ : int 98
## .. ..$ : int 99
## .. .. [list output truncated]
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:14] "ZIPCODE" "PO_NAME" "POPULATION" "COUNTY" ...
mapview(COVID_POP_ZIP_NYC, zcol = "Positive_Covid")
mapview(COVID_POP_ZIP_NYC, zcol = "hispanicPop")