zip_codes <- st_read('../assign7/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp')
## Reading layer `ZIP_CODE_040114' from data source
## `/Users/williamcornejo/Desktop/Desktop - william’s MacBook Air/school/gtech705/gtech78520/assign7/R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
retail_df_xy <- read.csv('../assign7/R-Spatial_I_Lab/nys_retail_food_store_xy.csv')
r_sf <- retail_df_xy[!is.na(retail_df_xy$X),]
r_sf <- r_sf[!is.na(r_sf$Y),]
r_sf <- st_as_sf(r_sf, coords = c("X", "Y"), crs="4326")
health_facilities <- read.csv('../assign7/R-Spatial_I_Lab/NYS_Health_Facility 3(in).csv')
hf_df <- health_facilities[!is.na(health_facilities$Facility.Longitude), ]
hf_df <- hf_df[!is.na(hf_df$Facility.Latitude), ]
hf_df <- hf_df[!is.na(hf_df$Facility.Longitude), ]
hf_df <- hf_df[hf_df$Facility.Latitude != 0, ]
hf_df <- hf_df[hf_df$Facility.Longitude != 0, ]
hf_sf <- st_as_sf(hf_df, coords = c("Facility.Longitude", "Facility.Latitude"))
# Read covid data
#covid_1 <- read.csv('R-Spatial_II_Lab/tests-by-zcta_2020_04_12.csv')
#covid_2 <- read.csv('R-Spatial_II_Lab/tests-by-zcta_2020_04_19.csv')
covid_3 <- read.csv('R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv')
# Read zip code data as spatial
zip_codes_1 <- st_transform(zip_codes, 4326)
# Join by zipcode
# Convert zcta to char
covid_3 <- covid_3[!is.na(covid_3$MODIFIED_ZCTA), ]
covid_3_sf <- st_as_sf(covid_3, coords = c("lon", "lat"), crs=4326)
covid_3$MODZCTA_2 <- as.character(covid_3$MODIFIED_ZCTA)
covid_3_sf$MODZCTA_2 <- as.character(covid_3_sf$MODIFIED_ZCTA)
covid_3_join <- base::merge(covid_3, zip_codes_1, by.x='MODZCTA_2', by.y='ZIPCODE')
covid_3_join <- st_as_sf(covid_3_join, coords = c("lon", "lat"), crs=4326)
covid_3_join <- covid_3_join %>%
rename(ZIPCODE = MODZCTA_2)
# Join with just st_join
covid_3_sf <- st_join(zip_codes_1, covid_3_sf)
#mapview(covid_3_join, zcol=c('COVID_CASE_COUNT'))
knitr::include_graphics('Screen Shot 2025-03-26 at 4.59.23 PM.png')
#r_sf$zip_code <- as.character(r_sf$Zip.Code)
nyc_food_store <- st_read('R-Spatial_II_Lab/nycFoodStore.shp')
## Reading layer `nycFoodStore' from data source
## `/Users/williamcornejo/Desktop/Desktop - william’s MacBook Air/school/gtech705/gtech78520/assign8/R-Spatial_II_Lab/nycFoodStore.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS: WGS 84
st_crs(r_sf) <- 4326
r_sf_filt <- r_sf %>%
dplyr::filter(stringr::str_detect(Establishment.Type, '[ABDJ]'))
retail_zips <- sf::st_join(zip_codes_1, r_sf_filt, join= st_contains) %>%
group_by(ZIPCODE) %>%
summarise(FoodStoreNum = n())
covid_3_sf_new <- covid_3_join %>%
left_join(st_drop_geometry(retail_zips), by ="ZIPCODE")
#mapview(covid_3_sf_new, zcol=c('FoodStoreNum'))
#mapview(retail_zips_2)
knitr::include_graphics('Screen Shot 2025-03-27 at 12.47.54 PM.png')
# Adjust CRS of health facilities
st_crs(hf_sf) <- 4326
# Filter for hospitals
hf_sf_hospital <- hf_sf %>%
filter(Short.Description == 'HOSP')
# Join zipcodes sf to hospitals to get count by zip
hf_zip <- sf::st_join(zip_codes_1, hf_sf_hospital, join= st_contains) %>%
group_by(ZIPCODE) %>%
summarise(hfNum = n())
# Ignore, other join based in geometry
#hf_zip_1 <- zip_codes_1 %>%
# st_join(hf_sf_hospital) %>%
# group_by(geometry) %>%
# summarise(hfNum = n())
# join hospital count by zip to sf which will have everything (retail, health and acs)
covid_3_sf_new_1 <- covid_3_sf_new %>%
left_join(st_drop_geometry(hf_zip), by = "ZIPCODE")
mapview(hf_zip['hfNum'])
knitr::include_graphics('Screen Shot 2025-03-28 at 2.51.17 PM.png')
# Some code from key tasks file
acsData <- readLines("R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E,
asianPop = DP05_0067E, hispanicPop = DP05_0071E,
adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));
nycCensus <- sf::st_read('R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `/Users/williamcornejo/Desktop/Desktop - william’s MacBook Air/school/gtech705/gtech78520/assign8/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
# Make new column from nycCensus$boroct201, where we add 1400000US3600 to it
# And name it GEO_ID, which is optional
acsData %>%
magrittr::extract(1:10,)
## GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1 1400000US36005000100 7080 51 6503 577 1773 4239
## 2 1400000US36005000200 4542 950 2264 2278 2165 1279
## 3 1400000US36005000400 5634 710 2807 2827 2623 1699
## 4 1400000US36005001600 5917 989 2365 3552 2406 2434
## 5 1400000US36005001900 2765 76 1363 1402 585 1041
## 6 1400000US36005002000 9409 977 4119 5290 3185 4487
## 7 1400000US36005002300 4600 648 2175 2425 479 2122
## 8 1400000US36005002400 172 0 121 51 69 89
## 9 1400000US36005002500 5887 548 2958 2929 903 1344
## 10 1400000US36005002701 2868 243 1259 1609 243 987
## asianPop hispanicPop adultPop citizenAdult censusCode
## 1 130 2329 6909 6100 005000100
## 2 119 3367 3582 2952 005000200
## 3 226 3873 4507 4214 005000400
## 4 68 3603 4416 3851 005001600
## 5 130 1413 2008 1787 005001900
## 6 29 5905 6851 6170 005002000
## 7 27 2674 3498 3056 005002300
## 8 14 0 131 42 005002400
## 9 68 4562 4237 2722 005002500
## 10 0 1985 1848 1412 005002701
# Merge (JOIN) ACS data to the census tracts
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')
#mapview(popData, zcol=c('totPop'))
knitr::include_graphics('Screen Shot 2025-03-26 at 10.17.40 PM.png')
popData_1 <- st_transform(popData, 4326)
# Ignore
#covid_3_sf_new_f <- covid_3_sf_new_1 %>%
# left_join(
# st_drop_geometry(popData_1), # Ensure no geometry conflict
# by = c("CTY_FIPS" = "cntyFIPS") # Correct join syntax
# )
# Final dataframe with zipcodes, census tracts,
c_final <- st_join(covid_3_sf_new_1, popData_1)
#mapview(c_final['asianPop'])
#plot(covid_3_sf_new_f['totPop'])
colnames(c_final)
## [1] "ZIPCODE" "MODIFIED_ZCTA" "NEIGHBORHOOD_NAME"
## [4] "BOROUGH_GROUP" "label" "COVID_CASE_COUNT"
## [7] "COVID_CASE_RATE" "POP_DENOMINATOR" "COVID_DEATH_COUNT"
## [10] "COVID_DEATH_RATE" "PERCENT_POSITIVE" "TOTAL_COVID_TESTS"
## [13] "BLDGZIP" "PO_NAME" "POPULATION"
## [16] "AREA" "STATE" "COUNTY"
## [19] "ST_FIPS" "CTY_FIPS" "URL"
## [22] "SHAPE_AREA" "SHAPE_LEN" "FoodStoreNum"
## [25] "hfNum" "tractFIPS" "boro_code"
## [28] "boro_ct201" "boro_name" "cdeligibil"
## [31] "ct2010" "ctlabel" "ntacode"
## [34] "ntaname" "puma" "shape_area"
## [37] "shape_leng" "cntyFIPS" "GEO_ID"
## [40] "totPop" "elderlyPop" "malePop"
## [43] "femalePop" "whitePop" "blackPop"
## [46] "asianPop" "hispanicPop" "adultPop"
## [49] "citizenAdult" "geometry"
knitr::include_graphics('Screen Shot 2025-03-28 at 8.44.06 PM.png')
In the end, we should have the confirmed and tested cases of
covid-19, numbers of specific types of food stores, numbers of specific
types of health facilities, and population (total population, elderly,
by race, etc.) at the zip code level. We should also have boroughs,
names, etc. for each zip code area.