Instead of using the rstudioapi, I used the
here package to access or point to all the directories when
loading or writing files.
zip_data <- st_read(dsn = here("Module 9/Data/ZIP_CODE_040114.shp")) %>%
janitor::clean_names()
Reading the New York data of health facilities.
Since both latitude and longitude were character vectors, I first parsed them as numbers and then filtered data with errors (longitude higher than 0). Then I used these columns to as the coordinates to create the geometries. Finally, I assigned the CRS to WGS84.
d_health <- d_ny_health %>%
mutate(facility_longitude = as.numeric(facility_longitude),
facility_latitude = as.numeric(facility_latitude)) %>%
filter(!is.na(facility_location) & facility_longitude < 0)
## Creating the sf objects
shp_health <- st_as_sf(x = d_health, coords = c("facility_longitude",
"facility_latitude"),
remove = F)
st_crs(shp_health) <- 4326; st_crs(shp_health)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## MEMBER["World Geodetic System 1984 (G2139)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
I read the food stores data using the same method.
d_ny_food <- read_csv(file = here("Module 9/Data/NYS_Retail_Food_Stores.csv")) %>%
janitor::clean_names()
## Rows: 29389 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): County, License Number, Operation Type, Establishment Type, Entity...
## dbl (1): Zip Code
## lgl (2): Address Line 2, Address Line 3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
For this data frame I separated the character vector using the line breaks that were naturally dividing the information. Then, I removed empty columns.
d_ny_food <- d_ny_food %>%
separate(col = location,sep = "\n", into = c("street", "county",
"coords")) %>%
separate(col = coords, into = c("lat", "long"), sep = ",") %>%
select(-address_line_2, -address_line_3, -street, -county) %>%
mutate(lat = stringr::str_remove_all(string = lat, pattern = "\\("),
long = stringr::str_remove_all(string = long, pattern = "\\)")
)
d_food <- d_ny_food %>%
mutate(lat = as.numeric(lat),
long = stringr::str_remove_all(string = long, # removed the blank space
pattern = "[:blank:]")) %>%
mutate(long = as.numeric(long)) %>%
tidyr::drop_na(long)
As with health facilities, I created the sf objects using the
st_as_sf function and set the CRS to WGS84.
shp_food <- st_as_sf(x = d_food, coords = c("long","lat"),
remove = F)
st_crs(shp_food) <- 4326; st_crs(shp_food)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## MEMBER["World Geodetic System 1984 (G2139)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
Since both data is located in New York state, I used the extent of health facilities to retrieve a basemap.
basemap <- shp_health %>%
st_bbox() %>%
as.vector() %>%
ggmap::get_stamenmap(zoom = 11, messaging = FALSE)
The basemap confirms that that the points are correctly located in New York.
ggmap(basemap) +
geom_point(data = shp_health, aes(x = facility_longitude,
y = facility_latitude,
fill = ownership_type),
shape = 21,
size = 1.5,
alpha = 0.7) +
labs(title = "Health Facilities in NY", x = "Longitude", y = "Latitude") +
scale_fill_brewer(palette = "Spectral", aesthetics = "fill") +
theme_light()
The basemap confirms that that the points are correctly located in New York.
ggmap(basemap) +
geom_point(data = shp_food, aes(x = long,
y = lat,
fill = square_footage),
shape = 21,
size = 1.2,
alpha = 0.5) +
labs(title = "Store Area in New York", subtitle = "Square footage",
x = "Longitude", y = "Latitude") +
scale_fill_viridis_c(option = "cividis", aesthetics = "fill",
direction = -1, begin = 0.2) +
theme_light()
I used st_write to save both objects into the same
GEOpackage.
st_write(obj = shp_health,
dsn = here("Module 9/Data/week_11_hmk.gpkg"),
layer='health_facilities',
delete_layer = TRUE)
## Deleting layer `health_facilities' using driver `GPKG'
## Writing layer `health_facilities' to data source
## `/run/media/gonzalo/Windows-SSD/Users/mhgon/Documents/Academia/Posgrado/Doctorado/Clases/4. Semestre/Data analysis/Spatial_section/Module 9/Data/week_11_hmk.gpkg' using driver `GPKG'
## Writing 3843 features with 36 fields and geometry type Point.
st_write(shp_food,
dsn = here("Module 9/Data/week_11_hmk.gpkg"),
layer='retail_stores',
delete_layer = TRUE)
## Deleting layer `retail_stores' using driver `GPKG'
## Writing layer `retail_stores' to data source
## `/run/media/gonzalo/Windows-SSD/Users/mhgon/Documents/Academia/Posgrado/Doctorado/Clases/4. Semestre/Data analysis/Spatial_section/Module 9/Data/week_11_hmk.gpkg' using driver `GPKG'
## Writing 23974 features with 13 fields and geometry type Point.