Don’t use a single chunk for the entire assignment. Break it into multiple chunks.
file_path <- "./nyc_spatial.gpkg"
if (!file.exists(file_path)) {
stop(paste("ERROR: File not found at", getwd(), ". Check the Files pane!"))
}
nyc_zips <- st_read(file_path, layer = "nyc_postal", quiet = TRUE)
food_stores <- st_read(file_path, layer = "food_stores", quiet = TRUE)
health_fac <- st_read(file_path, layer = "health_fac", quiet = TRUE)
covid_path <- "./data/tests-by-zcta_2020_04_19.csv"
if (file.exists(covid_path)) {
covid_data <- read_csv(covid_path)
} else {
print("Warning: COVID CSV not found in /data folder")
}
## Rows: 178 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): MODZCTA, Positive, Total, zcta_cum.perc_pos
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Note: Had to use AI to help me write this since I couldn't figure out why it wouldn't read nyc_spatial.gpkg despite being in my working directory :(
task1_output <- nyc_zips %>%
mutate(ZIPCODE = as.character(ZIPCODE)) %>%
left_join(covid_data %>% mutate(MODZCTA = as.character(MODZCTA)),
by = c("ZIPCODE" = "MODZCTA"))
head(task1_output)
## Simple feature collection with 6 features and 15 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 986490.1 ymin: 168910.5 xmax: 1043042 ymax: 189382.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## ZIPCODE BLDGZIP PO_NAME POPULATION AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1 11436 0 Jamaica 18681 22699295 NY Queens 36 081
## 2 11213 0 Brooklyn 62426 29631004 NY Kings 36 047
## 3 11212 0 Brooklyn 83866 41972104 NY Kings 36 047
## 4 11225 0 Brooklyn 56527 23698630 NY Kings 36 047
## 5 11218 0 Brooklyn 72280 36868799 NY Kings 36 047
## 6 11226 0 Brooklyn 106132 39408598 NY Kings 36 047
## URL SHAPE_AREA SHAPE_LEN Positive Total zcta_cum.perc_pos
## 1 http://www.usps.com/ 0 0 342 567 60.32
## 2 http://www.usps.com/ 0 0 972 1653 58.80
## 3 http://www.usps.com/ 0 0 1086 1793 60.57
## 4 http://www.usps.com/ 0 0 814 1359 59.90
## 5 http://www.usps.com/ 0 0 1163 1967 59.13
## 6 http://www.usps.com/ 0 0 1336 2170 61.57
## geom
## 1 POLYGON ((1038098 188138.4,...
## 2 POLYGON ((1001614 186926.4,...
## 3 POLYGON ((1011174 183696.3,...
## 4 POLYGON ((995908.4 183617.6...
## 5 POLYGON ((991997.1 176307.5...
## 6 POLYGON ((994821.5 177865.7...
grocery_stores <- food_stores %>%
filter(Entity.Name == "Grocery Store")
grocery_stores_fixed <- st_transform(grocery_stores, st_crs(task1_output))
food_joined <- st_join(grocery_stores_fixed, task1_output)
food_counts <- food_joined %>%
st_drop_geometry() %>%
group_by(ZIPCODE) %>%
summarize(store_count = n())
task2_output <- task1_output %>%
left_join(food_counts, by = "ZIPCODE") %>%
mutate(store_count = replace_na(store_count, 0))
nursing_homes <- health_fac %>%
filter(Description == "Residential Health Care Facility")
health_fixed <- st_transform(nursing_homes, st_crs(task2_output))
health_joined <- st_join(health_fixed, task2_output)
health_counts <- health_joined %>%
st_drop_geometry() %>%
group_by(ZIPCODE) %>%
summarize(nursing_home_count = n())
task3_output <- task2_output %>%
left_join(health_counts, by = "ZIPCODE") %>%
mutate(nursing_home_count = replace_na(nursing_home_count, 0))
nycCensus <- sf::st_read("data/2010CensusTracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\Antonio\Documents\Week8\data\2010CensusTracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
nycCensus <- nycCensus %>%
dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
acsData <- readLines("R-Spatial_II_Lab/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E,
malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E,
asianPop = DP05_0067E, hispanicPop = DP05_0071E,
adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1))
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')
popNYC <- sf::st_transform(popData, st_crs(task3_output))
tract_points <- st_centroid(popNYC)
## Warning: st_centroid assumes attributes are constant over geometries
census_by_zip <- st_join(tract_points, task3_output) %>%
st_drop_geometry() %>%
group_by(ZIPCODE) %>%
summarize(
total_pop = sum(totPop, na.rm = TRUE),
elderly_pop = sum(elderlyPop, na.rm = TRUE),
white_pop = sum(whitePop, na.rm = TRUE),
black_pop = sum(blackPop, na.rm = TRUE),
hispanic_pop = sum(hispanicPop, na.rm = TRUE)
)
final_nyc_data <- task3_output %>%
left_join(census_by_zip, by = "ZIPCODE")
# Preview final aggregated dataset
head(final_nyc_data)
## Simple feature collection with 6 features and 22 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 986490.1 ymin: 168910.5 xmax: 1043042 ymax: 189382.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## ZIPCODE BLDGZIP PO_NAME POPULATION AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1 11436 0 Jamaica 18681 22699295 NY Queens 36 081
## 2 11213 0 Brooklyn 62426 29631004 NY Kings 36 047
## 3 11212 0 Brooklyn 83866 41972104 NY Kings 36 047
## 4 11225 0 Brooklyn 56527 23698630 NY Kings 36 047
## 5 11218 0 Brooklyn 72280 36868799 NY Kings 36 047
## 6 11226 0 Brooklyn 106132 39408598 NY Kings 36 047
## URL SHAPE_AREA SHAPE_LEN Positive Total zcta_cum.perc_pos
## 1 http://www.usps.com/ 0 0 342 567 60.32
## 2 http://www.usps.com/ 0 0 972 1653 58.80
## 3 http://www.usps.com/ 0 0 1086 1793 60.57
## 4 http://www.usps.com/ 0 0 814 1359 59.90
## 5 http://www.usps.com/ 0 0 1163 1967 59.13
## 6 http://www.usps.com/ 0 0 1336 2170 61.57
## store_count nursing_home_count total_pop elderly_pop white_pop black_pop
## 1 0 0 22377 2456 1192 13972
## 2 0 0 66602 7662 16483 43625
## 3 0 0 73069 9518 5278 59541
## 4 0 0 60958 7592 15300 40048
## 5 0 0 67426 8144 40315 5045
## 6 0 0 103729 12278 16173 69058
## hispanic_pop geom
## 1 3226 POLYGON ((1038098 188138.4,...
## 2 6738 POLYGON ((1001614 186926.4,...
## 3 13900 POLYGON ((1011174 183696.3,...
## 4 5293 POLYGON ((995908.4 183617.6...
## 5 11169 POLYGON ((991997.1 176307.5...
## 6 18244 POLYGON ((994821.5 177865.7...