This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
require(sf); require(sp); require(rgdal); require(tidyverse)
require(dplyr)
library(ggmap)
library(rstudioapi)
wd <- dirname(getActiveDocumentContext()$path)
setwd(wd)
##Tasks for the first lab are:
## 1. Set up a R project for the R-Spatial section.
## 2. Read the NYC postal areas in Shapefiles into sf and sp objects. As NYC DOH publishes COVID-19 data by zip code,
## we will utilize the postal area data later.
## 3. Read and process the NYC public health services spreadsheet data. Create sf and sp objects from geographic coordinates. clean
## 4. Read and process the NYS retail food stores data. Create sf and sp objects from geographic coordinates for NYC. clean
# 5. Use simple mapping method with a basemap to verify the above datasets in terms of their geometry locations.
#read in as SF
NYC_zip_sf <- st_read("ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source `/Volumes/Storage/School/'21 (1) Spring/GTECH 38520 R/Homework/Wk 11_15/Wk 11/ZIP_CODE_040114/ZIP_CODE_040114.shp' using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
#read in as SP
getwd() %>% paste('/ZIP_CODE_040114', sep='') %>%
readOGR("ZIP_CODE_040114") -> NYC_zip_sp
## OGR data source with driver: ESRI Shapefile
## Source: "/Volumes/Storage/School/'21 (1) Spring/GTECH 38520 R/Homework/Wk 11_15/Wk 11/ZIP_CODE_040114", layer: "ZIP_CODE_040114"
## with 263 features
## It has 12 fields
#plot(st_geometry(NYC_zip_sf))
#plot(NYC_zip_sp)
#import NYS health facilities
Health_Fac <- read.csv("NYS_Health_Facility.csv")
NYC_Health_Hosp <- read.csv("Map_of_NYC_Health_and_Hospitals.csv")
# filter for only NYC counties
str(Health_Fac)
## 'data.frame': 3990 obs. of 36 variables:
## $ Facility.ID : int 204 620 654 1156 2589 3455 3853 4249 4473 6230 ...
## $ Facility.Name : chr "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "Central Park Rehabilitation and Nursing Center" "East Side Nursing Home" ...
## $ Short.Description : chr "HSPC" "NH" "NH" "NH" ...
## $ Description : chr "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
## $ Facility.Open.Date : chr "06/01/1985" "02/01/1989" "02/01/1989" "08/01/1979" ...
## $ Facility.Address.1 : chr "4102 Old Vestal Road" "2050 Tilden Avenue" "116 Martin Luther King East" "62 Prospect St" ...
## $ Facility.Address.2 : chr "" "" "" "" ...
## $ Facility.City : chr "Vestal" "New Hartford" "Syracuse" "Warsaw" ...
## $ Facility.State : chr "New York" "New York" "New York" "New York" ...
## $ Facility.Zip.Code : chr "13850" "13413" "13205" "14569" ...
## $ Facility.Phone.Number : num 6.08e+09 3.16e+09 3.15e+09 5.86e+09 5.86e+09 ...
## $ Facility.Fax.Number : num NA NA NA NA NA ...
## $ Facility.Website : chr "" "" "" "" ...
## $ Facility.County.Code : int 3 32 33 60 2 14 29 14 29 7093 ...
## $ Facility.County : chr "Broome" "Oneida" "Onondaga" "Wyoming" ...
## $ Regional.Office.ID : int 3 3 3 1 1 1 7 1 7 5 ...
## $ Regional.Office : chr "Central New York Regional Office" "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" ...
## $ Main.Site.Name : chr "" "" "" "" ...
## $ Main.Site.Facility.ID : int NA NA NA NA NA NA NA NA NA 1463 ...
## $ Operating.Certificate.Number: chr "0301501F" "3227304N" "3301326N" "6027303N" ...
## $ Operator.Name : chr "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "CPRNC, LLC" "East Side Nursing Home Inc" ...
## $ Operator.Address.1 : chr "169 Riverside Drive" "Box 1000 Tilden Avenue" "116 Martin Luther King East" "62 Prospect Street" ...
## $ Operator.Address.2 : chr "" "" "" "" ...
## $ Operator.City : chr "Binghamton" "New Hartford" "Syracuse" "Warsaw" ...
## $ Operator.State : chr "New York" "New York" "New York" "New York" ...
## $ Operator.Zip.Code : chr "13905" "13413" "13205" "14569" ...
## $ Cooperator.Name : chr "" "" "" "" ...
## $ Cooperator.Address : chr "" "" "" "" ...
## $ Cooperator.Address.2 : chr "" "" "" "" ...
## $ Cooperator.City : chr "" "" "" "" ...
## $ Cooperator.State : chr "New York" "New York" "New York" "New York" ...
## $ Cooperator.Zip.Code : int NA NA NA NA NA NA NA NA NA NA ...
## $ Ownership.Type : chr "Not for Profit Corporation" "Not for Profit Corporation" "LLC" "Business Corporation" ...
## $ Facility.Latitude : num 42.1 43.1 NA 42.7 42.1 ...
## $ Facility.Longitude : num -76 -75.2 NA -78.1 -78 ...
## $ Facility.Location : chr "(42.097095, -75.975243)" "(43.05497, -75.228828)" "" "(42.738979, -78.12867)" ...
Health_Fac %>%
dplyr::select(Facility.ID, Facility.Name, Description,
Facility.Address.1, Facility.Address.2,
Facility.City, Facility.State, Facility.Zip.Code,
Facility.County.Code, Facility.County, Facility.Latitude, Facility.Longitude) %>%
filter(Facility.County %in% c("Kings" ,"Queens" , "New York", "Richmond", "Bronx")) %>%
filter(Facility.Latitude != 'NA') %>%
filter(Facility.Latitude != 0) -> Clean_Health_Fac
#separate NYC_Health_Hosp location column to get coords.
# These are already part of the larger data set, but the code is successful in
# capturing the coordinates and the addresses and putting them in different columns.
NYC_Health_Hosp %>%
separate(Location.1, into = c("Address", "Coords"), sep = "([(])", convert = TRUE) %>%
separate(Coords, into = c("Latitude", "Longitude", "Nothing2"), sep = "([(,)])", convert = TRUE) %>%
dplyr::select(Facility.Type, Borough, Facility.Name, Cross.Streets, Phone, Address, Latitude, Longitude)-> Clean_NYC_Health_Hosp
Health_Fac_sf <- st_as_sf(Clean_Health_Fac, coords = c("Facility.Longitude", "Facility.Latitude"))
rm(Health_Fac)
str(Health_Fac_sf)
## Classes 'sf' and 'data.frame': 1292 obs. of 11 variables:
## $ Facility.ID : int 6230 7257 9006 9970 1217 1820 4326 5574 7579 1653 ...
## $ Facility.Name : chr "NYU Langone Rutherford" "Park Ridge Family Health Center" "FedCare, Inc." "Parkmed NYC, LLC" ...
## $ Description : chr "Hospital Extension Clinic" "Hospital Extension Clinic" "Diagnostic and Treatment Center" "Diagnostic and Treatment Center" ...
## $ Facility.Address.1 : chr "305 Second Ave" "6317 4th Avenue" "344 West 51 Street" "800 Second Avenue, 6th Floor" ...
## $ Facility.Address.2 : chr "" "" "" "" ...
## $ Facility.City : chr "New York" "Brooklyn" "New York" "New York" ...
## $ Facility.State : chr "New York" "New York" "New York" "New York" ...
## $ Facility.Zip.Code : chr "10003" "11220" "10019" "10017" ...
## $ Facility.County.Code: int 7093 7095 7093 7093 7094 7095 7094 7095 7096 7096 ...
## $ Facility.County : chr "New York" "Kings" "New York" "New York" ...
## $ geometry :sfc_POINT of length 1292; first list element: 'XY' num -74 40.7
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA
## ..- attr(*, "names")= chr [1:10] "Facility.ID" "Facility.Name" "Description" "Facility.Address.1" ...
st_crs(Health_Fac_sf)
## Coordinate Reference System: NA
st_crs(Health_Fac_sf) <- 4326
st_crs(Health_Fac_sf)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["unknown"],
## AREA["World"],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
require(ggmap)
Health_Fac_sf %>% st_union() %>% st_bbox() %>% as.vector() %>%
ggmap::get_stamenmap(zoom = 14) -> baseMap
ggmap(baseMap) +
geom_point(aes(x=X, y=Y),
data = Health_Fac_sf %>% st_coordinates() %>%
tibble::as_tibble(),
color = 'brown',
size = 1,
alpha = .5
)
#Output to folder
#st_write(Health_Fac_sf, "Health_Fac_SF", driver = "ESRI Shapefile")
## Create sp file
coordinates(Clean_Health_Fac) <- c("Facility.Longitude", "Facility.Latitude")
class(Clean_Health_Fac)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
is.projected(Clean_Health_Fac)
## [1] NA
proj4string(Clean_Health_Fac)
## [1] NA
proj4string(Clean_Health_Fac)<- CRS("+init=epsg:4326")
is.projected(Clean_Health_Fac)
## [1] FALSE
proj4string(Clean_Health_Fac)
## [1] "+proj=longlat +datum=WGS84 +no_defs"
Clean_Health_Fac@bbox %>% as.vector() %>%
ggmap::get_stamenmap(zoom = 11) %>%
ggmap() +
geom_point(aes(x= Facility.Longitude, y= Facility.Latitude),
data = Clean_Health_Fac %>% coordinates() %>%
tibble::as_tibble(),
color = 'blue',
size = 1,
alpha = .2
)
#writeOGR(Clean_Health_Fac, "Health_Fac_SP", "Health_Fac_SP", driver = "ESRI Shapefile")
## NYS Food Store
NYS_Food <- read.csv("NYS_Retail_Food_Stores.csv")
## extract coordinates, filter to 5 boroughs, filter out NAs
# NYS_Food %>%
# filter(Location != 'NA') %>%
# filter(Location != 0) %>%
# filter(County %in% c("Kings", "New York", "Richmond", "Bronx", "Queens")) %>%
# separate(Location, into = c("Address", "Coords"), sep = "([(])") %>%
# separate(Coords, into = c("Latitude", "Longitude"), sep = "([(,)])") %>%
# dplyr::select(County, License.Number, Operation.Type,
# Establishment.Type, Entity.Name, DBA.Name,
# Street.Number, Street.Name, City, State,
# Zip.Code, Square.Footage, Latitude, Longitude) %>%
# filter(Latitude != 'NA') %>%
# filter(Latitude != 0) -> Clean_NYC_Food_Stores
#Clean_NYC_Food_Stores[,c(13, 14)] <- sapply(Clean_NYC_Food_Stores[,c(13, 14)], as.numeric)
# My method is not working because the coordinate values end up as character type.
# When converted to numeric, they lose the sixth decimal place. No longer valid as coordinates?
NYS_Food %>%
filter(County %in% c("Kings", "New York", "Richmond", "Bronx", "Queens")) %>%
tidyr::extract(Location, into = c('Lat', 'Long'), regex = "(\\d+.\\d+),[ ]*([-]\\d+.\\d+)") %>%
dplyr::mutate(Lat = as.numeric(Lat), Long = as.numeric(Long)) %>%
tidyr::drop_na(Lat, Long) -> Clean_NYC_Food_Stores
Clean_NYC_Food_Stores %>%
sf::st_as_sf(coords = c('Long', 'Lat')) %>%
st_set_crs(4326) -> NYC_Food_Stores_sf
str(NYC_Food_Stores_sf)
## Classes 'sf' and 'data.frame': 11301 obs. of 15 variables:
## $ County : chr "Bronx" "Bronx" "Bronx" "Bronx" ...
## $ License.Number : int 734149 606221 606228 723375 724807 712943 703060 609065 722972 609621 ...
## $ Operation.Type : chr "Store" "Store" "Store" "Store" ...
## $ Establishment.Type: chr "JAC " "JAC " "JAC " "JAC " ...
## $ Entity.Name : chr "7 ELEVEN FOOD STORE #37933H " "1001 SAN MIGUEL FOOD CENTER INC " "1029 FOOD PLAZA INC " "1078 DELI GROCERY CORP " ...
## $ DBA.Name : chr " " "1001 SAN MIGUEL FD CNTR" "1029 FOOD PLAZA " "1078 DELI GROCERY " ...
## $ Street.Number : chr "500" "1001" "122" "1078" ...
## $ Street.Name : chr "BAYCHESTER AVE " "SHERIDAN AVE " "E 181ST ST " "EAST 165TH STREET " ...
## $ Address.Line.2 : logi NA NA NA NA NA NA ...
## $ Address.Line.3 : logi NA NA NA NA NA NA ...
## $ City : chr "BRONX " "BRONX " "BRONX " "BRONX " ...
## $ State : chr "NY" "NY" "NY" "NY" ...
## $ Zip.Code : int 10475 10456 10453 10459 10456 10453 10467 10456 10456 10472 ...
## $ Square.Footage : chr "0" "1,100" "2,000" "1,200" ...
## $ geometry :sfc_POINT of length 11301; first list element: 'XY' num -73.8 40.9
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:14] "County" "License.Number" "Operation.Type" "Establishment.Type" ...
require(ggmap)
NYC_Food_Stores_sf %>% st_bbox() %>% as.vector() %>%
ggmap::get_stamenmap(zoom = 13) -> baseMap;
ggmap(baseMap) +
geom_point(aes(x=X, y=Y),
data = NYC_Food_Stores_sf %>% st_coordinates() %>%
tibble::as_tibble(),
color = 'brown',
size = 1,
alpha = .5)
# output to folder
#st_write(NYC_Food_Stores_sf, "NYC_Food_Stores_SF", driver = "ESRI Shapefile")
## Create sp file
coordinates(Clean_NYC_Food_Stores) <- c("Long", "Lat")
class(Clean_NYC_Food_Stores)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
is.projected(Clean_NYC_Food_Stores)
## [1] NA
proj4string(Clean_NYC_Food_Stores)
## [1] NA
proj4string(Clean_NYC_Food_Stores)<- CRS("+init=epsg:4326")
is.projected(Clean_NYC_Food_Stores)
## [1] FALSE
proj4string(Clean_NYC_Food_Stores)
## [1] "+proj=longlat +datum=WGS84 +no_defs"
Clean_NYC_Food_Stores@bbox %>% as.vector() %>%
ggmap::get_stamenmap(zoom = 11) %>%
ggmap() +
geom_point(aes(x= Long, y= Lat),
data = Clean_NYC_Food_Stores %>% coordinates() %>%
tibble::as_tibble(),
color = 'green',
size = 1,
alpha = .2
)
#writeOGR(Clean_NYC_Food_Stores, "NYC_Food_Store_SP", "NYC_Food_Store_SP", driver = "ESRI Shapefile")
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.