This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
library(stringr)
library(mapview)
#############################################################################
# Import COVID data
covid_2021_04_23 <- st_read('R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv',
stringsAsFactors = FALSE)
## Reading layer `tests-by-zcta_2021_04_23' from data source
## `C:\Users\ranae\Documents\Hunter\Spring 2026\GTECH 78520 - Data Analysis & Viz\R-Spatial\R-Spatial_II_Lab\tests-by-zcta_2021_04_23.csv'
## using driver `CSV'
## Warning: no simple feature geometries present: returning a data.frame or tbl_df
# Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).
nyc_040114 <- st_read('R-Spatial_I_Lab/ZIP_CODE_040114/ZIP_CODE_040114.shp',
stringsAsFactors = FALSE)
## Reading layer `ZIP_CODE_040114' from data source
## `C:\Users\ranae\Documents\Hunter\Spring 2026\GTECH 78520 - Data Analysis & Viz\R-Spatial\R-Spatial_I_Lab\ZIP_CODE_040114\ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
covid_merge <- base::merge(covid_2021_04_23,
nyc_040114,
by.x = "MODIFIED_ZCTA",
by.y = "ZIPCODE")
# Make sf
covid_merge_sf <- st_as_sf(covid_merge)
############################################################################
# Aggregate the NYC food retails store data (points) to the zip code data, so
# that we know how many retail stores in each zip code area. Note that not all
# locations are for food retail. And we need to choose the specific types
# according to the data.
#from week 7
nys_rfs <- read.csv('R-Spatial_I_Lab/NYS_Retail_Food_Stores.csv',
stringsAsFactors = FALSE)
# need to process the Location column using "stringr"
leftPos <- stringr::str_locate(nys_rfs$Location, "\\(")[,1]
rghtPos <- stringr::str_locate(nys_rfs$Location, "\\)")[,1]
# Get the coordinates text
nys_rfs$coords <- nys_rfs$Location %>% stringr::str_sub(leftPos+1, rghtPos -1)
cmmaPos <- stringr::str_locate(nys_rfs$coords, ", ")
# Get the numeric coordinates.
nys_rfs$Y <- stringr::str_sub(nys_rfs$coords, 1, cmmaPos[,1]-1) %>% as.numeric()
## Warning in stringr::str_sub(nys_rfs$coords, 1, cmmaPos[, 1] - 1) %>%
## as.numeric(): NAs introduced by coercion
nys_rfs$X <- stringr::str_sub(nys_rfs$coords, cmmaPos[,2]+1) %>% as.numeric()
## Warning in stringr::str_sub(nys_rfs$coords, cmmaPos[, 2] + 1) %>% as.numeric():
## NAs introduced by coercion
# Take out the rows without coordinates and make a sf object
nys_rfs_sf <- st_as_sf(nys_rfs %>% tidyr::drop_na(X, Y), coords = c('X', 'Y'))
# assign coordinate reference system
st_crs(nys_rfs_sf) <- 4326
# Reprojection (week 8)
nys_rfs_food <- st_transform(nys_rfs_sf, st_crs(nyc_040114))
nys_rfs_food$County <- trimws(nys_rfs_food$County)
nys_rfs_food$Establishment.Type <- trimws(nys_rfs_food$Establishment.Type)
# FIlter business type and counties
nyc_food <- nys_rfs_food %>%
filter(
Establishment.Type == "A",
County %in% c("New York", "Kings", "Queens", "Bronx", "Richmond")
)
# Joining food data and zipcode data
nyc_food_merge <- st_join(nyc_food, nyc_040114, join = st_within)
# Filter out rows with NA values, group data by zipcode and get no. of food
# stores in each zipcode
zip_food_count <- nyc_food_merge %>%
filter(!is.na(ZIPCODE)) %>%
st_drop_geometry() %>%
group_by(ZIPCODE) %>%
summarise(store_count = n())
# Re join with zipcode geometry
nyc_zip_counts <- nyc_040114 %>%
left_join(zip_food_count, by = "ZIPCODE")
mapview(nyc_zip_counts , zcol = "store_count")
##########################################################################
# Aggregate the NYC health facilities (points) to the zip code data. Similarly,
# choose appropriate subtypes such as nursing homes from the facilities.
# from week 7
#import data
nys_hf <- read.csv('R-Spatial_I_Lab/NYS_Health_Facility.csv',
stringsAsFactors = FALSE)
#remove rows with no lat/long data and make sf object
nys_hf_sf <- st_as_sf(
nys_hf %>% tidyr::drop_na(Facility.Latitude, Facility.Longitude),
coords = c('Facility.Longitude', 'Facility.Latitude')
)
#project data
st_crs(nys_hf_sf) <- 4326
#checkpoint
str(nys_hf_sf)
## Classes 'sf' and 'data.frame': 3848 obs. of 35 variables:
## $ Facility.ID : int 204 620 1156 2589 3455 3853 4249 4473 6230 6482 ...
## $ Facility.Name : chr "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "East Side Nursing Home" "Wellsville Manor Care Center" ...
## $ Short.Description : chr "HSPC" "NH" "NH" "NH" ...
## $ Description : chr "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
## $ Facility.Open.Date : chr "06/01/1985" "02/01/1989" "08/01/1979" "02/01/1989" ...
## $ Facility.Address.1 : chr "4102 Old Vestal Road" "2050 Tilden Avenue" "62 Prospect St" "4192A Bolivar Road" ...
## $ Facility.Address.2 : chr "" "" "" "" ...
## $ Facility.City : chr "Vestal" "New Hartford" "Warsaw" "Wellsville" ...
## $ Facility.State : chr "New York" "New York" "New York" "New York" ...
## $ Facility.Zip.Code : chr "13850" "13413" "14569" "14895" ...
## $ Facility.Phone.Number : num 6.08e+09 3.16e+09 5.86e+09 5.86e+09 7.17e+09 ...
## $ Facility.Fax.Number : num NA NA NA NA NA ...
## $ Facility.Website : chr "" "" "" "" ...
## $ Facility.County.Code : int 3 32 60 2 14 29 14 29 7093 29 ...
## $ Facility.County : chr "Broome" "Oneida" "Wyoming" "Allegany" ...
## $ Regional.Office.ID : int 3 3 1 1 1 7 1 7 5 7 ...
## $ Regional.Office : chr "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" "Western Regional Office - Buffalo" ...
## $ Main.Site.Name : chr "" "" "" "" ...
## $ Main.Site.Facility.ID : int NA NA NA NA NA NA NA NA 1463 NA ...
## $ Operating.Certificate.Number: chr "0301501F" "3227304N" "6027303N" "0228305N" ...
## $ Operator.Name : chr "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "East Side Nursing Home Inc" "Wellsville Manor LLC" ...
## $ Operator.Address.1 : chr "169 Riverside Drive" "Box 1000 Tilden Avenue" "62 Prospect Street" "4192a Bolivar Road" ...
## $ Operator.Address.2 : chr "" "" "" "" ...
## $ Operator.City : chr "Binghamton" "New Hartford" "Warsaw" "Wellsville" ...
## $ Operator.State : chr "New York" "New York" "New York" "New York" ...
## $ Operator.Zip.Code : chr "13905" "13413" "14569" "14897" ...
## $ Cooperator.Name : chr "" "" "" "" ...
## $ Cooperator.Address : chr "" "" "" "" ...
## $ Cooperator.Address.2 : chr "" "" "" "" ...
## $ Cooperator.City : chr "" "" "" "" ...
## $ Cooperator.State : chr "New York" "New York" "New York" "New York" ...
## $ Cooperator.Zip.Code : int NA NA NA NA NA NA NA NA NA NA ...
## $ Ownership.Type : chr "Not for Profit Corporation" "Not for Profit Corporation" "Business Corporation" "LLC" ...
## $ Facility.Location : chr "(42.097095, -75.975243)" "(43.05497, -75.228828)" "(42.738979, -78.12867)" "(42.126461, -77.967834)" ...
## $ geometry :sfc_POINT of length 3848; first list element: 'XY' num -76 42.1
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:34] "Facility.ID" "Facility.Name" "Short.Description" "Description" ...
# Reprojection (week 8)
nys_hf_sf <- st_transform(nys_hf_sf, st_crs(nyc_040114))
# FIlter NH and counties
nyc_nh <- nys_hf_sf %>%
filter(
Short.Description == "NH",
Facility.County %in% c("New York", "Kings", "Queens", "Bronx", "Richmond")
)
# Joining nh and zipcode data
nyc_nh_merge <- st_join(nyc_nh, nyc_040114, join = st_within)
# Filter out rows with NA values, group data by zipcode and get no. of food
# stores in each zipcode
nyc_nh_count <- nyc_nh_merge %>%
filter(!is.na(ZIPCODE)) %>%
st_drop_geometry() %>%
group_by(ZIPCODE) %>%
summarise(nh_count = n())
# Re join with zipcode geometry
nyc_nh_count <- nyc_040114 %>%
left_join(nyc_nh_count, by = "ZIPCODE")
# mapview
mapview(nyc_nh_count , zcol = "nh_count")
##########################################################################
# Join the Census ACS population, race, and age data to the NYC Planning Census
# Tract Data
# Import census tract data
nycCensus <- sf::st_read('R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\ranae\Documents\Hunter\Spring 2026\GTECH 78520 - Data Analysis & Viz\R-Spatial\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
# ACS Data
acsData <- readLines(
'R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv'
) %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E,
elderlyPop = DP05_0024E,
malePop = DP05_0002E,
femalePop = DP05_0003E,
whitePop = DP05_0037E,
blackPop = DP05_0038E,
asianPop = DP05_0067E,
hispanicPop = DP05_0071E,
adultPop = DP05_0021E,
citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));
# merge census and ACS data
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')
# align coordinate system with zip code data
popData <- sf::st_transform(popData, st_crs(nyc_040114))
############################################################################
# Aggregate the ACS census data to zip code area data.
# joining COVID zip code data with the census/acs combo data
# converting census tracts (polygons) to points
# Performing group by and summarizing the population data to organize the
# data
covidPopZipNYC <- sf::st_join(covid_merge_sf,
popData %>% sf::st_centroid(),
join = st_contains) %>%
group_by(MODIFIED_ZCTA,
PO_NAME,
POPULATION,
COUNTY,
COVID_CASE_COUNT,
TOTAL_COVID_TESTS) %>%
summarise(totPop = sum(totPop),
malePctg = sum(malePop)/totPop*100,
asianPop = sum(asianPop),
blackPop = sum(blackPop),
hispanicPop = sum(hispanicPop),
whitePop = sum(whitePop))
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by MODIFIED_ZCTA, PO_NAME, POPULATION,
## COUNTY, COVID_CASE_COUNT, and TOTAL_COVID_TESTS.
## ℹ Output is grouped by MODIFIED_ZCTA, PO_NAME, POPULATION, COUNTY, and
## COVID_CASE_COUNT.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(MODIFIED_ZCTA, PO_NAME, POPULATION, COUNTY,
## COVID_CASE_COUNT, TOTAL_COVID_TESTS))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
# First 10 rows
covidPopZipNYC %>%
slice_head(n = 10)
## Simple feature collection with 180 features and 12 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067113 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 180 × 13
## # Groups: MODIFIED_ZCTA, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT [180]
## MODIFIED_ZCTA PO_NAME POPULATION COUNTY COVID_CASE_COUNT TOTAL_COVID_TESTS
## <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 10001 New York 22413 New York 1542 20158
## 2 10002 New York 81305 New York 5902 48197
## 3 10003 New York 55878 New York 2803 41076
## 4 10004 New York 2187 New York 247 3599
## 5 10005 New York 8107 New York 413 6102
## 6 10006 New York 3011 New York 164 2441
## 7 10007 New York 7323 New York 379 6342
## 8 10009 New York 61455 New York 3605 38773
## 9 10010 New York 29881 New York 1686 27864
## 10 10011 New York 50594 New York 2542 35539
## # ℹ 170 more rows
## # ℹ 7 more variables: totPop <int>, malePctg <dbl>, asianPop <int>,
## # blackPop <int>, hispanicPop <int>, whitePop <int>,
## # geometry <GEOMETRY [US_survey_foot]>