R Spatial Lab Assignment # 1.6

Task 1:

First, I set up the R project from the online text book section, “Prerequisites and Preparations”, fixed missing package dependencies for sf by installing units and sf as binary packages. Second, I set the working directory to R-Spatial project folder, containing a data sub folder with all data from “R-spatial-data.zip”

# install.packages("units", type = "binary")
# install.packages("sf", type = "binary", dependencies = TRUE, force = TRUE)

# Working directory 
getwd()
## [1] "C:/Users/moniq/Desktop/SPRING_26/DATA_ANALYSIS_R/Week_7/R-Spatial"

Task 2:

Read the NYC postal areas in Shapefiles into sf objects. I loaded the shapefile ZIP_CODE_040114.shp using st_read() into an sf object

zipcodes_nyc <- st_read("data/nyc/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\moniq\Desktop\SPRING_26\DATA_ANALYSIS_R\Week_7\R-Spatial\data\nyc\ZIP_CODE_040114\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
# Looking at data tables
dim(zipcodes_nyc) # 2166 rows and 114 cols
## [1] 263  13
st_crs(zipcodes_nyc) # 4269 NAD83 coord system
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US survey foot)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]

Task 3:

Read and process the NYS health facilities spreadsheet data. Create sf objects from geographic coordinates:

health_df <- read_csv("data/nyc/NYS_Health_Facility.csv",show_col_types = FALSE)
# looking at column names
names(health_df) # looking at the col names
##  [1] "Facility ID"                  "Facility Name"               
##  [3] "Short Description"            "Description"                 
##  [5] "Facility Open Date"           "Facility Address 1"          
##  [7] "Facility Address 2"           "Facility City"               
##  [9] "Facility State"               "Facility Zip Code"           
## [11] "Facility Phone Number"        "Facility Fax Number"         
## [13] "Facility Website"             "Facility County Code"        
## [15] "Facility County"              "Regional Office ID"          
## [17] "Regional Office"              "Main Site Name"              
## [19] "Main Site Facility ID"        "Operating Certificate Number"
## [21] "Operator Name"                "Operator Address 1"          
## [23] "Operator Address 2"           "Operator City"               
## [25] "Operator State"               "Operator Zip Code"           
## [27] "Cooperator Name"              "Cooperator Address"          
## [29] "Cooperator Address 2"         "Cooperator City"             
## [31] "Cooperator State"             "Cooperator Zip Code"         
## [33] "Ownership Type"               "Facility Latitude"           
## [35] "Facility Longitude"           "Facility Location"

Cleaning and processing the health_df data * a. removing any rows where lat and long is missing * b. set health_sf crs to EPSG code 4326 for WGS84 projection * c. setting health_sf to match the same crs as zipcodes_nyc

health_sf <- health_df %>%
  filter(!is.na(`Facility Latitude`), !is.na(`Facility Longitude`)) %>%
  st_as_sf(coords = c("Facility Longitude", "Facility Latitude"), crs = 4326) %>%
  st_transform(st_crs(zipcodes_nyc))   

# Check if health_sf is the same as zipcodes_nyc
st_crs(health_sf) == st_crs(zipcodes_nyc) # True 
## [1] TRUE
# Filter to NYC counties only 
nyc_counties <- c("New York", "Bronx", "Kings", "Queens", "Richmond")

health_sf_nyc <- health_sf %>%
  filter(`Facility County` %in% nyc_counties)

# Another way using st_filter() method
health_sf_nyc2 <- st_filter(health_sf, zipcodes_nyc)

# checking plots overlap correctly after fixing crs
# plot(st_geometry(zipcodes_nyc)) # map plot of loaded shapefile
# plot(st_geometry(health_sf_nyc2), add = TRUE) # healh facilities 

# using mapview and + for both plots 
map1 <- mapview(zipcodes_nyc, layer.name = "NYC Census Tracts", alpha.regions = 0.2) + 
  mapview(health_sf_nyc2, layer.name = "NYS Health Facilities",col.regions = "red")

map1

Task 4:

Read and process the NYS retail food stores data. Create sf objects from geographic coordinates for NYC.

food_df <- read_csv("data/nyc/nys_retail_food_store_xy.csv", show_col_types = FALSE, locale = locale(encoding = "UTF-8"))# added to fix utf-8 issue 
names(food_df) # look at col names for lat and long coords
##  [1] "\xef..County"       "License.Number"     "Operation.Type"    
##  [4] "Establishment.Type" "Entity.Name"        "DBA.Name"          
##  [7] "Street.Number"      "Street.Name"        "Address.Line.2"    
## [10] "Address.Line.3"     "City"               "State"             
## [13] "Zip.Code"           "Square.Footage"     "Location"          
## [16] "Coords"             "Y"                  "X"
nrow(food_df) # 29389 rows
## [1] 29389
names(food_df)[1] <- "County" # renamedd first col "\xef..County"

# Similar to the health data, making sure no row with missing x and y
# setting these coords to 4326 crs, set this new data to same crs as zipcodes_nyc
# using st_filter to boundaries of only zipcodes_nyc shapefile
food_sf_nyc <- food_df %>%
  filter(!is.na(X), !is.na(Y)) %>%
  st_as_sf(coords = c("X", "Y"), crs = 4326) %>%
  st_transform(st_crs(zipcodes_nyc)) %>%
  st_filter(zipcodes_nyc)   

nrow(food_sf_nyc) # 11296 rows after applying the filters
## [1] 11306
# Alternative method provided - extracting coords from Location col
# using stringr to parse the Location text column into X and Y coords
# I used nys_retail_food_store_xy.csv which has X and Y pre-extracted

Task 5:

Use simple mapping method such as mapview with a basemap to verify * The above datasets in terms of their geographic locations. * Using mapview for all three: NYC Census Tracts, NYC Health Facilities and NYC Retail Food Stores

map2 <- mapview(zipcodes_nyc, layer.name = "NYC Census Tracts", alpha.regions = 0.2) + mapview(health_sf_nyc2, layer.name = "NYC Health Facilities", col.regions = "red") + mapview(food_sf_nyc, layer.name = "NYC Retail Food Stores", col.regions = "blue")

map2

Task 6:

Saving the three sf objects in a single GeoPackage file/database.

st_write(zipcodes_nyc, 
         dsn = './data/nyc/nyc_data.gpkg', 
         layer = 'census_tracts',
         delete_layer = TRUE)
## Deleting layer `census_tracts' using driver `GPKG'
## Writing layer `census_tracts' to data source 
##   `./data/nyc/nyc_data.gpkg' using driver `GPKG'
## Writing 263 features with 12 fields and geometry type Polygon.
st_write(health_sf_nyc2, 
         dsn = './data/nyc/nyc_data.gpkg', 
         layer = 'health_facilities',
         delete_layer = TRUE)
## Deleting layer `health_facilities' using driver `GPKG'
## Writing layer `health_facilities' to data source 
##   `./data/nyc/nyc_data.gpkg' using driver `GPKG'
## Writing 1293 features with 34 fields and geometry type Point.
st_write(food_sf_nyc, 
         dsn = './data/nyc/nyc_data.gpkg', 
         layer = 'retail_food_stores',
         delete_layer = TRUE)
## Deleting layer `retail_food_stores' using driver `GPKG'
## Writing layer `retail_food_stores' to data source 
##   `./data/nyc/nyc_data.gpkg' using driver `GPKG'
## Writing 11306 features with 16 fields and geometry type Point.