---
title: "R Week 8 Assignment"
author: "Alexandra Mejia"
date: "3/18/2026"
format:
html:
toc: true
toc-location: left
code-fold: true
code-summary: "Show the code"
code-tools: true
---
## Explanation of the template
Update the title with your information. Make sure to include identification information so that we know it is your submission.
Also update the author name and date accordingly.
Check out the Source Code from the top-right corner `</>Code` menu.
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
In the following R code chunk, `load_packages` is the code chunk name. `include=FALSE` suggests that the code chunk will run, but the code itself and its outputs will not be included in the rendered HTML. `echo=TRUE` in the following code chunk suggests that the code and results from running the code will be included in the rendered HTML.
```{r load_packages, include=FALSE}
require(tidyverse);
require(sf);
require(mapview);
require(magrittr)
```
## Lab 2 Assignment
Reading in files of data:
Reading NYC postal areas
```{r}
library(sf)
zip_code <- st_read("../Week_07/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp")
st_crs(zip_code)
```
Reading NYS health facilities
```{r}
nys_health <- read_csv("R-Spatial_II_Lab/NYS_Health_Facility.csv", show_col_types = FALSE)
nys_health_clean <- nys_health %>%
filter(
!is.na(`Facility Longitude`),
!is.na(`Facility Latitude`),
`Facility Longitude` != 0,
`Facility Latitude` != 0
)
nys_health_sf <- st_as_sf(nys_health_clean, coords = c("Facility Longitude", "Facility Latitude"), crs = 4326)
```
Reading NYS retail food stores
```{r}
nys_food_store <- read_csv("../Week_07/R-Spatial_I_Lab/nys_retail_food_store_xy.csv",
locale = locale(encoding = "Latin1"),
show_col_types = FALSE)
nys_food_store_clean <- nys_food_store %>%
filter(
!is.na(`X`),
!is.na(`Y`),
`X` != 0,
`Y` != 0,
`ï..County` %in% c("Bronx", "Kings", "New York", "Queens", "Richmond")
)
nys_food_store_sf <- st_as_sf(nys_food_store_clean, coords = c("X", "Y"), crs = 4326)
```
Reading the COVID-19 data
```{r}
covid19 <- read_csv("R-Spatial_II_Lab/tests-by-zcta_2020_04_12.csv", show_col_types = FALSE)
```
1. Join the COVID-19 data to the NYC zip code area data
```{r}
nyc_sf_merged <- base::merge(zip_code, covid19, by.x = "ZIPCODE", by.y = "MODZCTA")
dplyr::left_join(zip_code %>%
dplyr::mutate(ZIPCODE=as.numeric(as.character(ZIPCODE))),
covid19,
by = c('ZIPCODE' = 'MODZCTA')) -> nyc_sf_merged
names(nyc_sf_merged)
```
2. Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area.
```{r}
nys_food_store_sf <- st_transform(nys_food_store_sf, st_crs(nyc_sf_merged))
food_store_count <- nys_food_store_sf %>% dplyr::filter(stringr::str_detect(Establishment.Type, 'JAC')) %>%
st_join(nyc_sf_merged, ., join= st_intersects) %>%
group_by(ZIPCODE) %>%
summarise(FOOD_STORE_COUNT = n()) %>%
st_set_geometry(NULL)
food_store_merged <- dplyr::left_join(nyc_sf_merged, food_store_count, by = "ZIPCODE")
names(food_store_merged)
```
3. Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.
```{r}
nys_health_sf <- st_transform(nys_health_sf, st_crs(food_store_merged))
facilities_count <- nys_health_sf %>% dplyr::filter(`Short Description` == "NH") %>%
sf::st_join(food_store_merged, ., join = st_intersects) %>%
dplyr::group_by(ZIPCODE) %>%
dplyr::summarise(NURSING_HOME_COUNT = n()) %>%
sf::st_set_geometry(NULL)
facilities_merged <- dplyr::left_join(food_store_merged, facilities_count, by = "ZIPCODE")
names(facilities_merged)
```
4.Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.
```{r}
tracts <- st_read("R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp",
stringsAsFactors = FALSE)
acs <- readLines("R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv")
tracts <- tracts %>%
dplyr::mutate(
cntyFIPS = dplyr::case_when(
boro_name == "Bronx" ~ "005",
boro_name == "Brooklyn" ~ "047",
boro_name == "Manhattan" ~ "061",
boro_name == "Queens" ~ "081",
boro_name == "Staten Island" ~ "085"
),
tractFIPS = paste(cntyFIPS, ct2010, sep = "")
)
tracts <- tracts %>%
dplyr::mutate(
cntyFIPS = dplyr::case_when(
boro_name == "Bronx" ~ "005",
boro_name == "Brooklyn" ~ "047",
boro_name == "Manhattan" ~ "061",
boro_name == "Queens" ~ "081",
boro_name == "Staten Island" ~ "085"
),
tractFIPS = paste(cntyFIPS, ct2010, sep = "")
)
acs <- acs %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header = TRUE, quote = "\"") %>%
dplyr::select(
GEO_ID,
totPop = DP05_0001E,
elderlyPop = DP05_0024E,
malePop = DP05_0002E,
femalePop = DP05_0003E,
whitePop = DP05_0037E,
blackPop = DP05_0038E,
asianPop = DP05_0067E,
hispanicPop = DP05_0071E,
adultPop = DP05_0021E,
citizenAdult = DP05_0087E
) %>%
dplyr::mutate(
censusCode = stringr::str_sub(GEO_ID, -9, -1)
)
popData <- merge(tracts, acs, by.x = "tractFIPS", by.y = "censusCode")
names(popData)
sum(popData$totPop, na.rm = TRUE)
st_crs(popData)
```
5. Aggregate the ACS census data to zip code area data.
```{r}
popNYC <- sf::st_transform(popData, st_crs(facilities_merged))
acsZipNYC <- sf::st_join(facilities_merged,
popNYC %>% sf::st_centroid(),
join = st_contains) %>%
dplyr::group_by(
ZIPCODE, PO_NAME, POPULATION, COUNTY,
Positive, Total, zcta_cum.perc_pos,
FOOD_STORE_COUNT, NURSING_HOME_COUNT
) %>%
dplyr::summarise(
totPop = sum(totPop, na.rm = TRUE),
elderlyPop = sum(elderlyPop, na.rm = TRUE),
malePop = sum(malePop, na.rm = TRUE),
femalePop = sum(femalePop, na.rm = TRUE),
whitePop = sum(whitePop, na.rm = TRUE),
blackPop = sum(blackPop, na.rm = TRUE),
asianPop = sum(asianPop, na.rm = TRUE),
hispanicPop = sum(hispanicPop, na.rm = TRUE),
adultPop = sum(adultPop, na.rm = TRUE),
citizenAdult = sum(citizenAdult, na.rm = TRUE),
malePctg = sum(malePop, na.rm = TRUE) / sum(totPop, na.rm = TRUE) * 100
)
names(acsZipNYC)
sum(acsZipNYC$totPop, na.rm = TRUE)
```