R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

require(sf); require(sp); require(rgdal); require(tidyverse)
require(dplyr)
library(ggmap)
library(rstudioapi)

wd <- dirname(getActiveDocumentContext()$path)
setwd(wd)


##Tasks for the first lab are:

## 1. Set up a R project for the R-Spatial section.



## 2. Read the NYC postal areas in Shapefiles into sf and sp objects. As NYC DOH publishes COVID-19 data by zip code,
##       we will utilize the postal area data later.

## 3. Read and process the NYC public health services spreadsheet data. Create sf and sp objects from geographic coordinates. clean

## 4. Read and process the NYS retail food stores data. Create sf and sp objects from geographic coordinates for NYC. clean
#  5. Use simple mapping method with a basemap to verify the above datasets in terms of their geometry locations.

#read in as SF
NYC_zip_sf <- st_read("ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source `/Volumes/Storage/School/'21 (1) Spring/GTECH 38520 R/Homework/Wk 11_15/Wk 11/ZIP_CODE_040114/ZIP_CODE_040114.shp' using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
#read in as SP
getwd() %>% paste('/ZIP_CODE_040114', sep='') %>% 
readOGR("ZIP_CODE_040114") -> NYC_zip_sp
## OGR data source with driver: ESRI Shapefile 
## Source: "/Volumes/Storage/School/'21 (1) Spring/GTECH 38520 R/Homework/Wk 11_15/Wk 11/ZIP_CODE_040114", layer: "ZIP_CODE_040114"
## with 263 features
## It has 12 fields
#plot(st_geometry(NYC_zip_sf))

#plot(NYC_zip_sp)

#import NYS health facilities
Health_Fac <- read.csv("NYS_Health_Facility.csv")
NYC_Health_Hosp <- read.csv("Map_of_NYC_Health_and_Hospitals.csv")

# filter for only NYC counties
str(Health_Fac)
## 'data.frame':    3990 obs. of  36 variables:
##  $ Facility.ID                 : int  204 620 654 1156 2589 3455 3853 4249 4473 6230 ...
##  $ Facility.Name               : chr  "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "Central Park Rehabilitation and Nursing Center" "East Side Nursing Home" ...
##  $ Short.Description           : chr  "HSPC" "NH" "NH" "NH" ...
##  $ Description                 : chr  "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
##  $ Facility.Open.Date          : chr  "06/01/1985" "02/01/1989" "02/01/1989" "08/01/1979" ...
##  $ Facility.Address.1          : chr  "4102 Old Vestal Road" "2050 Tilden Avenue" "116 Martin Luther King East" "62 Prospect St" ...
##  $ Facility.Address.2          : chr  "" "" "" "" ...
##  $ Facility.City               : chr  "Vestal" "New Hartford" "Syracuse" "Warsaw" ...
##  $ Facility.State              : chr  "New York" "New York" "New York" "New York" ...
##  $ Facility.Zip.Code           : chr  "13850" "13413" "13205" "14569" ...
##  $ Facility.Phone.Number       : num  6.08e+09 3.16e+09 3.15e+09 5.86e+09 5.86e+09 ...
##  $ Facility.Fax.Number         : num  NA NA NA NA NA ...
##  $ Facility.Website            : chr  "" "" "" "" ...
##  $ Facility.County.Code        : int  3 32 33 60 2 14 29 14 29 7093 ...
##  $ Facility.County             : chr  "Broome" "Oneida" "Onondaga" "Wyoming" ...
##  $ Regional.Office.ID          : int  3 3 3 1 1 1 7 1 7 5 ...
##  $ Regional.Office             : chr  "Central New York Regional Office" "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" ...
##  $ Main.Site.Name              : chr  "" "" "" "" ...
##  $ Main.Site.Facility.ID       : int  NA NA NA NA NA NA NA NA NA 1463 ...
##  $ Operating.Certificate.Number: chr  "0301501F" "3227304N" "3301326N" "6027303N" ...
##  $ Operator.Name               : chr  "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "CPRNC, LLC" "East Side Nursing Home Inc" ...
##  $ Operator.Address.1          : chr  "169 Riverside Drive" "Box 1000 Tilden Avenue" "116 Martin Luther King East" "62 Prospect Street" ...
##  $ Operator.Address.2          : chr  "" "" "" "" ...
##  $ Operator.City               : chr  "Binghamton" "New Hartford" "Syracuse" "Warsaw" ...
##  $ Operator.State              : chr  "New York" "New York" "New York" "New York" ...
##  $ Operator.Zip.Code           : chr  "13905" "13413" "13205" "14569" ...
##  $ Cooperator.Name             : chr  "" "" "" "" ...
##  $ Cooperator.Address          : chr  "" "" "" "" ...
##  $ Cooperator.Address.2        : chr  "" "" "" "" ...
##  $ Cooperator.City             : chr  "" "" "" "" ...
##  $ Cooperator.State            : chr  "New York" "New York" "New York" "New York" ...
##  $ Cooperator.Zip.Code         : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Ownership.Type              : chr  "Not for Profit Corporation" "Not for Profit Corporation" "LLC" "Business Corporation" ...
##  $ Facility.Latitude           : num  42.1 43.1 NA 42.7 42.1 ...
##  $ Facility.Longitude          : num  -76 -75.2 NA -78.1 -78 ...
##  $ Facility.Location           : chr  "(42.097095, -75.975243)" "(43.05497, -75.228828)" "" "(42.738979, -78.12867)" ...
Health_Fac %>% 
  dplyr::select(Facility.ID, Facility.Name, Description, 
         Facility.Address.1, Facility.Address.2,
         Facility.City, Facility.State, Facility.Zip.Code, 
         Facility.County.Code, Facility.County, Facility.Latitude, Facility.Longitude) %>% 
  filter(Facility.County %in% c("Kings" ,"Queens" , "New York", "Richmond", "Bronx")) %>% 
  filter(Facility.Latitude != 'NA') %>% 
  filter(Facility.Latitude != 0) -> Clean_Health_Fac

#separate NYC_Health_Hosp location column to get coords. 
#  These are already part of the larger data set, but the code is successful in 
# capturing the coordinates and the addresses and putting them in different columns. 
NYC_Health_Hosp %>% 
  separate(Location.1, into = c("Address", "Coords"), sep = "([(])", convert = TRUE) %>% 
  separate(Coords, into = c("Latitude", "Longitude", "Nothing2"), sep = "([(,)])", convert = TRUE) %>% 
  dplyr::select(Facility.Type, Borough, Facility.Name, Cross.Streets, Phone, Address, Latitude, Longitude)-> Clean_NYC_Health_Hosp


Health_Fac_sf <- st_as_sf(Clean_Health_Fac, coords = c("Facility.Longitude", "Facility.Latitude"))

rm(Health_Fac)

str(Health_Fac_sf)
## Classes 'sf' and 'data.frame':   1292 obs. of  11 variables:
##  $ Facility.ID         : int  6230 7257 9006 9970 1217 1820 4326 5574 7579 1653 ...
##  $ Facility.Name       : chr  "NYU Langone Rutherford" "Park Ridge Family Health Center" "FedCare, Inc." "Parkmed NYC, LLC" ...
##  $ Description         : chr  "Hospital Extension Clinic" "Hospital Extension Clinic" "Diagnostic and Treatment Center" "Diagnostic and Treatment Center" ...
##  $ Facility.Address.1  : chr  "305 Second Ave" "6317 4th Avenue" "344 West 51 Street" "800 Second Avenue, 6th Floor" ...
##  $ Facility.Address.2  : chr  "" "" "" "" ...
##  $ Facility.City       : chr  "New York" "Brooklyn" "New York" "New York" ...
##  $ Facility.State      : chr  "New York" "New York" "New York" "New York" ...
##  $ Facility.Zip.Code   : chr  "10003" "11220" "10019" "10017" ...
##  $ Facility.County.Code: int  7093 7095 7093 7093 7094 7095 7094 7095 7096 7096 ...
##  $ Facility.County     : chr  "New York" "Kings" "New York" "New York" ...
##  $ geometry            :sfc_POINT of length 1292; first list element:  'XY' num  -74 40.7
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:10] "Facility.ID" "Facility.Name" "Description" "Facility.Address.1" ...
st_crs(Health_Fac_sf)
## Coordinate Reference System: NA
st_crs(Health_Fac_sf) <- 4326
st_crs(Health_Fac_sf)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["World"],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
require(ggmap)

Health_Fac_sf %>% st_union() %>% st_bbox() %>% as.vector() %>%
  ggmap::get_stamenmap(zoom = 14) -> baseMap

ggmap(baseMap) +
  geom_point(aes(x=X, y=Y), 
             data = Health_Fac_sf %>% st_coordinates() %>% 
               tibble::as_tibble(),
             color = 'brown',
             size = 1,
             alpha = .5
  )

#Output to folder
#st_write(Health_Fac_sf, "Health_Fac_SF", driver = "ESRI Shapefile")


## Create sp file

coordinates(Clean_Health_Fac) <- c("Facility.Longitude", "Facility.Latitude")
class(Clean_Health_Fac)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
is.projected(Clean_Health_Fac)
## [1] NA
proj4string(Clean_Health_Fac)
## [1] NA
proj4string(Clean_Health_Fac)<- CRS("+init=epsg:4326")
is.projected(Clean_Health_Fac)
## [1] FALSE
proj4string(Clean_Health_Fac)
## [1] "+proj=longlat +datum=WGS84 +no_defs"
Clean_Health_Fac@bbox %>%  as.vector() %>%
  ggmap::get_stamenmap(zoom = 11) %>%
  ggmap() +
  geom_point(aes(x= Facility.Longitude, y= Facility.Latitude), 
             data = Clean_Health_Fac %>% coordinates() %>% 
               tibble::as_tibble(),
             color = 'blue',
             size = 1,
             alpha = .2
  )

#writeOGR(Clean_Health_Fac, "Health_Fac_SP", "Health_Fac_SP", driver = "ESRI Shapefile")



## NYS Food Store

NYS_Food <- read.csv("NYS_Retail_Food_Stores.csv")

## extract coordinates, filter to 5 boroughs, filter out NAs 

# NYS_Food %>% 
#   filter(Location != 'NA') %>% 
#   filter(Location != 0) %>% 
#   filter(County %in% c("Kings", "New York", "Richmond", "Bronx", "Queens")) %>%
#   separate(Location, into = c("Address", "Coords"), sep = "([(])") %>% 
#   separate(Coords, into = c("Latitude", "Longitude"), sep = "([(,)])") %>% 
#   dplyr::select(County, License.Number, Operation.Type, 
#                 Establishment.Type, Entity.Name, DBA.Name,
#                 Street.Number, Street.Name, City, State, 
#                 Zip.Code, Square.Footage, Latitude, Longitude) %>% 
#   filter(Latitude != 'NA') %>% 
#   filter(Latitude != 0) -> Clean_NYC_Food_Stores

#Clean_NYC_Food_Stores[,c(13, 14)] <- sapply(Clean_NYC_Food_Stores[,c(13, 14)], as.numeric)
# My method is not working because the coordinate values end up as character type. 
# When converted to numeric, they lose the sixth decimal place. No longer valid as coordinates? 

NYS_Food %>%
  filter(County %in% c("Kings", "New York", "Richmond", "Bronx", "Queens")) %>%
  tidyr::extract(Location, into = c('Lat', 'Long'), regex = "(\\d+.\\d+),[ ]*([-]\\d+.\\d+)") %>%
  dplyr::mutate(Lat = as.numeric(Lat), Long = as.numeric(Long)) %>%
  tidyr::drop_na(Lat, Long) -> Clean_NYC_Food_Stores

Clean_NYC_Food_Stores %>% 
  sf::st_as_sf(coords = c('Long', 'Lat')) %>%
  st_set_crs(4326) -> NYC_Food_Stores_sf

str(NYC_Food_Stores_sf)
## Classes 'sf' and 'data.frame':   11301 obs. of  15 variables:
##  $ County            : chr  "Bronx" "Bronx" "Bronx" "Bronx" ...
##  $ License.Number    : int  734149 606221 606228 723375 724807 712943 703060 609065 722972 609621 ...
##  $ Operation.Type    : chr  "Store" "Store" "Store" "Store" ...
##  $ Establishment.Type: chr  "JAC   " "JAC   " "JAC   " "JAC   " ...
##  $ Entity.Name       : chr  "7 ELEVEN FOOD STORE #37933H      " "1001 SAN MIGUEL FOOD CENTER INC  " "1029 FOOD PLAZA INC              " "1078 DELI GROCERY CORP           " ...
##  $ DBA.Name          : chr  "                       " "1001 SAN MIGUEL FD CNTR" "1029 FOOD PLAZA        " "1078 DELI GROCERY      " ...
##  $ Street.Number     : chr  "500" "1001" "122" "1078" ...
##  $ Street.Name       : chr  "BAYCHESTER AVE               " "SHERIDAN AVE                " "E 181ST ST                   " "EAST 165TH STREET           " ...
##  $ Address.Line.2    : logi  NA NA NA NA NA NA ...
##  $ Address.Line.3    : logi  NA NA NA NA NA NA ...
##  $ City              : chr  "BRONX             " "BRONX             " "BRONX             " "BRONX             " ...
##  $ State             : chr  "NY" "NY" "NY" "NY" ...
##  $ Zip.Code          : int  10475 10456 10453 10459 10456 10453 10467 10456 10456 10472 ...
##  $ Square.Footage    : chr  "0" "1,100" "2,000" "1,200" ...
##  $ geometry          :sfc_POINT of length 11301; first list element:  'XY' num  -73.8 40.9
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:14] "County" "License.Number" "Operation.Type" "Establishment.Type" ...
require(ggmap)

NYC_Food_Stores_sf %>% st_bbox() %>% as.vector() %>%
  ggmap::get_stamenmap(zoom = 13) -> baseMap;

ggmap(baseMap) +
  geom_point(aes(x=X, y=Y), 
             data = NYC_Food_Stores_sf %>% st_coordinates() %>% 
               tibble::as_tibble(),
             color = 'brown',
             size = 1,
             alpha = .5)

# output to folder
#st_write(NYC_Food_Stores_sf, "NYC_Food_Stores_SF", driver = "ESRI Shapefile")

## Create sp file

coordinates(Clean_NYC_Food_Stores) <- c("Long", "Lat")
class(Clean_NYC_Food_Stores)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
is.projected(Clean_NYC_Food_Stores)
## [1] NA
proj4string(Clean_NYC_Food_Stores)
## [1] NA
proj4string(Clean_NYC_Food_Stores)<- CRS("+init=epsg:4326")
is.projected(Clean_NYC_Food_Stores)
## [1] FALSE
proj4string(Clean_NYC_Food_Stores)
## [1] "+proj=longlat +datum=WGS84 +no_defs"
Clean_NYC_Food_Stores@bbox %>%  as.vector() %>%
  ggmap::get_stamenmap(zoom = 11) %>%
  ggmap() +
  geom_point(aes(x= Long, y= Lat), 
             data = Clean_NYC_Food_Stores %>% coordinates() %>% 
               tibble::as_tibble(),
             color = 'green',
             size = 1,
             alpha = .2
  )

#writeOGR(Clean_NYC_Food_Stores, "NYC_Food_Store_SP", "NYC_Food_Store_SP", driver = "ESRI Shapefile")

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.