#load in the libraries
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(urbnmapr)
library(ggplot2)
We’ll start by loading in shapefile data.
setwd("~/Binghamton/geog380/NYS_Place_Points_SHP")
place_shape <- st_read("NYS_Place_Points.shp")
## Reading layer `NYS_Place_Points' from data source
## `C:\Users\mhaller\Documents\Binghamton\geog380\NYS_Place_Points_SHP\NYS_Place_Points.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 4571 features and 8 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 108752.5 ymin: 4484510 xmax: 756007.4 ymax: 4984062
## Projected CRS: NAD83 / UTM zone 18N
place_shape <- place_shape %>% st_transform("EPSG:32116")
#load in NYS county shape file and set crs
counties <- get_urbn_map("counties", sf = TRUE)
#filter the data to get just NYS
counties <- counties %>%
filter(state_abbv == "NY") %>%
st_transform("EPSG:32116")
## old-style crs object detected; please recreate object with a recent sf::st_crs()
#Let's map them now!
ggplot() +
geom_sf(data = counties,
mapping = aes(), color = "lightgray")+
geom_sf(data = place_shape,
mapping = aes(), color = "green")+
theme_minimal()
#get a point count by county
#Use st_intersects to create a new variable that counts the number of points in each polygon
counties$count <- lengths(st_intersects(counties, place_shape))
#map it
ggplot() +
geom_sf(data = counties,
mapping = aes(fill = count))+
theme_minimal()
Now, let’s convert a smaller spatial unit to a larger spatial unit. Suprisingly, we can do this with the summarise() command.
#Load in Covid data with region information - the data doesn't matter, we just need the region groups
setwd("~/Binghamton/geog380")
covid <- read.csv("covid_data_ny.csv")
head(covid)
#merge the dataframes
library(tidyr)
counties1 <- counties %>%
separate(county_name, c("county_name", "county"), sep = " County") %>%
select(-county)
counties1 <- counties1 %>%
left_join(covid, by = c("county_name" = "County"))
#I don't actually care about Covid cases - summarise covid cases to get our spatial data grouped by region
#If we can summarise the data by any variable at the region level, R will automatically create a region polygon
regions <- counties1 %>% group_by(region) %>%
summarize(cases = sum(new_positive, na.rm = TRUE))
#now we'll map it!
ggplot() +
geom_sf(data = regions,
mapping = aes(fill = region))+
theme_minimal()+
labs(fill = "Region", title = "Regions of NYS")+
theme(plot.title = element_text(hjust = 0.5))