R Spatial Lab Assignment #1
Task 1: Setting up Project directory
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(mapview)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
wd <- getwd() #Get working directory
data_dir <- file.path(wd,"data") #Set up file path
list.files("data") #See what data files are in the data folder
## [1] "nyc_data.gpkg" "nyc_spatial_data.RData"
## [3] "nyc_spatial_data_week7.RData" "NYS_Health_Facility.csv"
## [5] "nys_retail_food_store_xy.csv" "NYS_Retail_Food_Stores.csv"
## [7] "ZIP_CODE_040114.dbf" "ZIP_CODE_040114.prj"
## [9] "ZIP_CODE_040114.sbn" "ZIP_CODE_040114.sbx"
## [11] "ZIP_CODE_040114.shp" "ZIP_CODE_040114.shp.xml"
## [13] "ZIP_CODE_040114.shx" "ZIP_CODE_040114.zip"
unzip(file.path(data_dir,"ZIP_CODE_040114.zip"),
exdir=data_dir) #Unzip zipped folder that contains zipcode shapefiles
Task 2: Read NYC Postal Areas Shapefile
nyc_zip <- st_read(file.path(data_dir,"ZIP_CODE_040114.shp"))
## Reading layer `ZIP_CODE_040114' from data source
## `C:\Users\wildk\OneDrive\Documents\2026 Spring\Visual in R\R-Spatial-Week7\data\ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
class(nyc_zip) #Identify the classes that are in nyc_zip
## [1] "sf" "data.frame"
Task 3: Process NYS Health Facility Data
health <- read.csv(file.path(data_dir,"NYS_Health_Facility.csv")) %>% clean_names()
colnames(health) #Idenitfying the names of the columns
## [1] "facility_id" "facility_name"
## [3] "short_description" "description"
## [5] "facility_open_date" "facility_address_1"
## [7] "facility_address_2" "facility_city"
## [9] "facility_state" "facility_zip_code"
## [11] "facility_phone_number" "facility_fax_number"
## [13] "facility_website" "facility_county_code"
## [15] "facility_county" "regional_office_id"
## [17] "regional_office" "main_site_name"
## [19] "main_site_facility_id" "operating_certificate_number"
## [21] "operator_name" "operator_address_1"
## [23] "operator_address_2" "operator_city"
## [25] "operator_state" "operator_zip_code"
## [27] "cooperator_name" "cooperator_address"
## [29] "cooperator_address_2" "cooperator_city"
## [31] "cooperator_state" "cooperator_zip_code"
## [33] "ownership_type" "facility_latitude"
## [35] "facility_longitude" "facility_location"
health_clean <- health %>%
filter(!is.na(facility_latitude),
!is.na(facility_longitude)) #Removing the N/A values in the coornidates
health_sf <- st_as_sf(
health_clean,
coords = c("facility_longitude","facility_latitude"),
crs = 4326) #Set the health facilities in sf objects
health_sf <- health_sf %>%
filter(!(st_coordinates(geometry)[,1] == 0 &
st_coordinates(geometry)[,2] == 0)) #Filter out outlier points
Task 4: Process NYS Retail Food Stores Data
food <- read_csv(
file.path(data_dir,"nys_retail_food_store_xy.csv"),
locale = locale(encoding = "latin1")) %>% clean_names()
## Rows: 29389 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): ï..County, Operation.Type, Establishment.Type, Entity.Name, DBA.Na...
## dbl (4): License.Number, Zip.Code, Y, X
## num (1): Square.Footage
## lgl (2): Address.Line.2, Address.Line.3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
food_nyc <- food %>% filter(i_county %in% c("Bronx","Kings","New York","Queens","Richmond")) #Filter the county downs to those in NYC only
colnames(food) #Idenitfying the names of the columns
## [1] "i_county" "license_number" "operation_type"
## [4] "establishment_type" "entity_name" "dba_name"
## [7] "street_number" "street_name" "address_line_2"
## [10] "address_line_3" "city" "state"
## [13] "zip_code" "square_footage" "location"
## [16] "coords" "y" "x"
food_nyc <- food_nyc %>% filter(!is.na(x), !is.na(y)) #Removing the N/A values in the coornidates
food_sf <- st_as_sf(
food_nyc,
coords = c("x","y"),
crs = 4326
) #Set the retail food stores to sf objects
Task 5: Verify Locations Using Mapview
mapview(nyc_zip)
mapview(health_sf)