| title: “R Week 8 Assignment” author: “Genesis Santos” date: “3/22/2025” output: html_document: |
In the following R code chunk, load_packages is the code
chunk name. include=FALSE suggests that the code chunk will
run, but the code itself and its outputs will not be included in the
rendered HTML. echo=TRUE in the following code chunk
suggests that the code and results from running the code will be
included in the rendered HTML.
##Don’t use a single chunk for the entire assignment. Break it into multiple chunks.
##This task requires an attribute join by matching columns with base::merge or dplyr::left_join. ##Between the two, left_join is recommended. ##Pay attention to the order of the inputs in left_join. ##You can choose one or multiple weeks of COVID test data.
zpNYC <- st_read("ZIP_CODE_040114.shp", quiet = TRUE) %>%
mutate(ZIPCODE = as.character(ZIPCODE))
covid_2021 <- read_csv("tests-by-zcta_2021_04_23.csv", show_col_types = FALSE) %>%
mutate(MODIFIED_ZCTA = as.character(MODIFIED_ZCTA)) %>%
rename(ZIPCODE = MODIFIED_ZCTA)
zpNYC_covid <- zpNYC %>%
left_join(covid_2021, by = "ZIPCODE")
nycFoodStoreSF <- st_read("nycFoodStore.shp", quiet = TRUE) %>%
st_transform(st_crs(zpNYC_covid))
##This task requires a spatial join with sf::st_join. ##It needs to use the result from task 1 as one input so that the output includes information from task 1.
# A = Store
# D = Food Warehouse
# J = Multiple Operations
nycFoodRetail <- nycFoodStoreSF %>%
filter(str_detect(Estbl_T, "[AJD]"))
zpNYC_covid_food <- st_join(
zpNYC_covid,
nycFoodRetail,
join = st_contains,
left = TRUE
) %>%
group_by(
ZIPCODE,
PO_NAME,
POPULATION,
COUNTY,
COVID_CASE_COUNT,
TOTAL_COVID_TESTS
) %>%
summarise(
FoodStoreNum = sum(!is.na(Lcns_Nm)),
.groups = "drop"
)
##Like task 2, this also requires a spatial join. ##Again, use output from task 2 as one input for task 3.
nycHealthFacilities <- read_csv("NYS_Health_Facility.csv", show_col_types = FALSE)
nycNursingHomeSF <- nycHealthFacilities %>%
filter(`Short Description` == "NH") %>%
filter(!is.na(`Facility Latitude`), !is.na(`Facility Longitude`)) %>%
st_as_sf(
coords = c("Facility Longitude", "Facility Latitude"),
crs = 4326,
remove = FALSE
) %>%
st_transform(st_crs(zpNYC_covid_food))
zpNYC_final <- st_join(
zpNYC_covid_food,
nycNursingHomeSF,
join = st_contains,
left = TRUE
) %>%
group_by(
ZIPCODE,
PO_NAME,
POPULATION,
COUNTY,
COVID_CASE_COUNT,
TOTAL_COVID_TESTS,
FoodStoreNum
) %>%
summarise(
NursingHomeNum = sum(!is.na(`Facility ID`)),
.groups = "drop"
)
#This is an attribute join. #It may need to process the data first to derive matching columns. #Check out the sample code from our Dropbox site.
nycCensus <- st_read("geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", quiet = TRUE) %>%
mutate(
cntyFIPS = case_when(
boro_name == "Bronx" ~ "005",
boro_name == "Brooklyn" ~ "047",
boro_name == "Manhattan" ~ "061",
boro_name == "Queens" ~ "081",
boro_name == "Staten Island" ~ "085"
),
tractFIPS = paste0(cntyFIPS, ct2010)
)
names(nycCensus)
## [1] "boro_code" "boro_ct201" "boro_name" "cdeligibil" "ct2010"
## [6] "ctlabel" "ntacode" "ntaname" "puma" "shape_area"
## [11] "shape_leng" "geometry" "cntyFIPS" "tractFIPS"
head(nycCensus)
## Simple feature collection with 6 features and 13 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.08721 ymin: 40.63981 xmax: -73.96433 ymax: 40.76364
## Geodetic CRS: WGS84(DD)
## boro_code boro_ct201 boro_name cdeligibil ct2010 ctlabel ntacode
## 1 5 5000900 Staten Island E 000900 9 SI22
## 2 1 1009800 Manhattan I 009800 98 MN19
## 3 1 1010000 Manhattan I 010000 100 MN19
## 4 1 1010200 Manhattan I 010200 102 MN17
## 5 1 1010400 Manhattan I 010400 104 MN17
## 6 1 1011300 Manhattan I 011300 113 MN17
## ntaname puma shape_area shape_leng
## 1 West New Brighton-New Brighton-St. George 3903 2497010 7729.017
## 2 Turtle Bay-East Midtown 3808 1906016 5534.200
## 3 Turtle Bay-East Midtown 3808 1860938 5692.169
## 4 Midtown-Midtown South 3807 1860993 5687.802
## 5 Midtown-Midtown South 3807 1864600 5693.036
## 6 Midtown-Midtown South 3807 1890907 5699.861
## geometry cntyFIPS tractFIPS
## 1 MULTIPOLYGON (((-74.07921 4... 085 085000900
## 2 MULTIPOLYGON (((-73.96433 4... 061 061009800
## 3 MULTIPOLYGON (((-73.96802 4... 061 061010000
## 4 MULTIPOLYGON (((-73.97124 4... 061 061010200
## 5 MULTIPOLYGON (((-73.97446 4... 061 061010400
## 6 MULTIPOLYGON (((-73.98412 4... 061 061011300
acs_lines <- readLines("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv", warn = FALSE)
acs_text <- paste(c(acs_lines[1], acs_lines[-c(1, 2)]), collapse = "\n")
acs_raw <- read_csv(I(acs_text), show_col_types = FALSE)
acsData <- acs_raw %>%
transmute(
GEO_ID,
totPop = as.numeric(DP05_0001E),
elderlyPop = as.numeric(DP05_0024E),
malePop = as.numeric(DP05_0002E),
femalePop = as.numeric(DP05_0003E),
whitePop = as.numeric(DP05_0037E),
blackPop = as.numeric(DP05_0038E),
asianPop = as.numeric(DP05_0067E),
hispanicPop = as.numeric(DP05_0071E),
adultPop = as.numeric(DP05_0021E),
citizenAdult = as.numeric(DP05_0087E),
censusCode = str_sub(GEO_ID, -9, -1)
)
popData <- nycCensus %>%
left_join(acsData, by = c("tractFIPS" = "censusCode"))
class(popData)
## [1] "sf" "data.frame"
inherits(popData, "sf")
## [1] TRUE
###task 5:Aggregate the ACS census data to zip code area data. ##This requires a spatial join (see tips in the sample code). ##Use outputs from task 3 and 4 as inputs.
st_crs(zpNYC_final)
## Coordinate Reference System:
## User input: NAD83 / New York Long Island (ftUS)
## wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4269]],
## CONVERSION["SPCS83 New York Long Island zone (US survey foot)",
## METHOD["Lambert Conic Conformal (2SP)",
## ID["EPSG",9802]],
## PARAMETER["Latitude of false origin",40.1666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-74,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",984250,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",0,
## LENGTHUNIT["US survey foot",0.304800609601219],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["US survey foot",0.304800609601219]],
## USAGE[
## SCOPE["Engineering survey, topographic mapping."],
## AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
## BBOX[40.47,-74.26,41.3,-71.8]],
## ID["EPSG",2263]]
st_crs(popData)
## Coordinate Reference System:
## User input: WGS84(DD)
## wkt:
## GEOGCRS["WGS84(DD)",
## DATUM["WGS84",
## ELLIPSOID["WGS84",6378137,298.257223563,
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["longitude",east,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["latitude",north,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]]]
popNYC <- st_transform(popData, st_crs(zpNYC_final))
popNYC_centroid <- st_centroid(popNYC)
## Warning: st_centroid assumes attributes are constant over geometries
zip_joined <- st_join(
zpNYC_final,
popNYC_centroid,
join = st_contains,
left = TRUE
)
zip_grouped <- zip_joined %>%
group_by(
ZIPCODE,
PO_NAME,
POPULATION,
COUNTY,
COVID_CASE_COUNT,
TOTAL_COVID_TESTS,
FoodStoreNum,
NursingHomeNum
)
zipACS <- zip_grouped %>%
summarise(
totPop = sum(totPop, na.rm = TRUE),
elderlyPop = sum(elderlyPop, na.rm = TRUE),
adultPop = sum(adultPop, na.rm = TRUE),
citizenAdult = sum(citizenAdult, na.rm = TRUE),
asianPop = sum(asianPop, na.rm = TRUE),
blackPop = sum(blackPop, na.rm = TRUE),
hispanicPop = sum(hispanicPop, na.rm = TRUE),
whitePop = sum(whitePop, na.rm = TRUE),
malePop = sum(malePop, na.rm = TRUE),
femalePop = sum(femalePop, na.rm = TRUE),
malePctg = ifelse(totPop > 0, malePop / totPop * 100, NA_real_),
femalePctg = ifelse(totPop > 0, femalePop / totPop * 100, NA_real_),
elderlyPctg = ifelse(totPop > 0, elderlyPop / totPop * 100, NA_real_),
.groups = "drop"
)
names(zipACS)
## [1] "ZIPCODE" "PO_NAME" "POPULATION"
## [4] "COUNTY" "COVID_CASE_COUNT" "TOTAL_COVID_TESTS"
## [7] "FoodStoreNum" "NursingHomeNum" "totPop"
## [10] "elderlyPop" "adultPop" "citizenAdult"
## [13] "asianPop" "blackPop" "hispanicPop"
## [16] "whitePop" "malePop" "femalePop"
## [19] "malePctg" "femalePctg" "elderlyPctg"
## [22] "geometry"
head(zipACS)
## Simple feature collection with 6 features and 21 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 188447.3 xmax: 998309.7 ymax: 230942.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 × 22
## ZIPCODE PO_NAME POPULATION COUNTY COVID_CASE_COUNT TOTAL_COVID_TESTS
## <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 00083 Central Park 25 New York NA NA
## 2 10001 New York 22413 New York 1542 20158
## 3 10002 New York 81305 New York 5902 48197
## 4 10003 New York 55878 New York 2803 41076
## 5 10004 New York 2187 New York 247 3599
## 6 10005 New York 8107 New York 413 6102
## # ℹ 16 more variables: FoodStoreNum <int>, NursingHomeNum <int>, totPop <dbl>,
## # elderlyPop <dbl>, adultPop <dbl>, citizenAdult <dbl>, asianPop <dbl>,
## # blackPop <dbl>, hispanicPop <dbl>, whitePop <dbl>, malePop <dbl>,
## # femalePop <dbl>, malePctg <dbl>, femalePctg <dbl>, elderlyPctg <dbl>,
## # geometry <GEOMETRY [US_survey_foot]>
sum(zipACS$totPop, na.rm = TRUE)
## [1] 8446394
zipACS %>% filter(is.na(totPop))
## Simple feature collection with 0 features and 21 fields
## Bounding box: xmin: NA ymin: NA xmax: NA ymax: NA
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 0 × 22
## # ℹ 22 variables: ZIPCODE <chr>, PO_NAME <chr>, POPULATION <dbl>, COUNTY <chr>,
## # COVID_CASE_COUNT <dbl>, TOTAL_COVID_TESTS <dbl>, FoodStoreNum <int>,
## # NursingHomeNum <int>, totPop <dbl>, elderlyPop <dbl>, adultPop <dbl>,
## # citizenAdult <dbl>, asianPop <dbl>, blackPop <dbl>, hispanicPop <dbl>,
## # whitePop <dbl>, malePop <dbl>, femalePop <dbl>, malePctg <dbl>,
## # femalePctg <dbl>, elderlyPctg <dbl>, geometry <GEOMETRY [US_survey_foot]>
plot(zipACS["totPop"])
plot(zipACS["asianPop"])
plot(zipACS["COVID_CASE_COUNT"])