R Spatial Lab Assignment #1

Task 1: Setting up Project directory

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(mapview)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
wd <- getwd() #Get working directory

data_dir <- file.path(wd,"data") #Set up file path

list.files("data") #See what data files are in the data folder
##  [1] "nyc_data.gpkg"                "nyc_spatial_data.RData"      
##  [3] "nyc_spatial_data_week7.RData" "NYS_Health_Facility.csv"     
##  [5] "nys_retail_food_store_xy.csv" "NYS_Retail_Food_Stores.csv"  
##  [7] "ZIP_CODE_040114.dbf"          "ZIP_CODE_040114.prj"         
##  [9] "ZIP_CODE_040114.sbn"          "ZIP_CODE_040114.sbx"         
## [11] "ZIP_CODE_040114.shp"          "ZIP_CODE_040114.shp.xml"     
## [13] "ZIP_CODE_040114.shx"          "ZIP_CODE_040114.zip"
unzip(file.path(data_dir,"ZIP_CODE_040114.zip"),
      exdir=data_dir) #Unzip zipped folder that contains zipcode shapefiles

Task 2: Read NYC Postal Areas Shapefile

nyc_zip <- st_read(file.path(data_dir,"ZIP_CODE_040114.shp"))
## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\wildk\OneDrive\Documents\2026 Spring\Visual in R\R-Spatial-Week7\data\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
class(nyc_zip) #Identify the classes that are in nyc_zip
## [1] "sf"         "data.frame"

Task 3: Process NYS Health Facility Data

health <- read.csv(file.path(data_dir,"NYS_Health_Facility.csv")) %>% clean_names()

colnames(health) #Idenitfying the names of the columns 
##  [1] "facility_id"                  "facility_name"               
##  [3] "short_description"            "description"                 
##  [5] "facility_open_date"           "facility_address_1"          
##  [7] "facility_address_2"           "facility_city"               
##  [9] "facility_state"               "facility_zip_code"           
## [11] "facility_phone_number"        "facility_fax_number"         
## [13] "facility_website"             "facility_county_code"        
## [15] "facility_county"              "regional_office_id"          
## [17] "regional_office"              "main_site_name"              
## [19] "main_site_facility_id"        "operating_certificate_number"
## [21] "operator_name"                "operator_address_1"          
## [23] "operator_address_2"           "operator_city"               
## [25] "operator_state"               "operator_zip_code"           
## [27] "cooperator_name"              "cooperator_address"          
## [29] "cooperator_address_2"         "cooperator_city"             
## [31] "cooperator_state"             "cooperator_zip_code"         
## [33] "ownership_type"               "facility_latitude"           
## [35] "facility_longitude"           "facility_location"
health_clean <- health %>%
  filter(!is.na(facility_latitude),
         !is.na(facility_longitude)) #Removing the N/A values in the coornidates

health_sf <- st_as_sf(
  health_clean,
  coords = c("facility_longitude","facility_latitude"),
  crs = 4326) #Set the health facilities in sf objects

health_sf <- health_sf %>%
  filter(!(st_coordinates(geometry)[,1] == 0 & 
             st_coordinates(geometry)[,2] == 0)) #Filter out outlier points

Task 4: Process NYS Retail Food Stores Data

food <- read_csv(
  file.path(data_dir,"nys_retail_food_store_xy.csv"),
  locale = locale(encoding = "latin1")) %>% clean_names()
## Rows: 29389 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): ï..County, Operation.Type, Establishment.Type, Entity.Name, DBA.Na...
## dbl  (4): License.Number, Zip.Code, Y, X
## num  (1): Square.Footage
## lgl  (2): Address.Line.2, Address.Line.3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
food_nyc <- food %>% filter(i_county %in% c("Bronx","Kings","New York","Queens","Richmond")) #Filter the county downs to those in NYC only

colnames(food) #Idenitfying the names of the columns
##  [1] "i_county"           "license_number"     "operation_type"    
##  [4] "establishment_type" "entity_name"        "dba_name"          
##  [7] "street_number"      "street_name"        "address_line_2"    
## [10] "address_line_3"     "city"               "state"             
## [13] "zip_code"           "square_footage"     "location"          
## [16] "coords"             "y"                  "x"
food_nyc <- food_nyc %>% filter(!is.na(x), !is.na(y)) #Removing the N/A values in the coornidates

food_sf <- st_as_sf(
  food_nyc,
  coords = c("x","y"),
  crs = 4326
) #Set the retail food stores to sf objects

Task 5: Verify Locations Using Mapview

mapview(nyc_zip)
mapview(health_sf)
mapview(food_sf)

Task 6: Save Spatial Objects

save(nyc_zip, health_sf, food_sf,
     file=file.path(data_dir,"nyc_spatial_data.RData"))