#load in the libraries
library(sf)     
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(dplyr)   
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(spData) 
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(urbnmapr)
library(ggplot2)

We’ll start by loading in shapefile data.

setwd("~/Binghamton/geog380/NYS_Place_Points_SHP")
place_shape <- st_read("NYS_Place_Points.shp")
## Reading layer `NYS_Place_Points' from data source 
##   `C:\Users\mhaller\Documents\Binghamton\geog380\NYS_Place_Points_SHP\NYS_Place_Points.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 4571 features and 8 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 108752.5 ymin: 4484510 xmax: 756007.4 ymax: 4984062
## Projected CRS: NAD83 / UTM zone 18N
place_shape <- place_shape %>% st_transform("EPSG:32116")
#load in NYS county shape file and set crs
counties <- get_urbn_map("counties", sf = TRUE)

#filter the data to get just NYS
counties <- counties %>% 
  filter(state_abbv == "NY") %>% 
  st_transform("EPSG:32116")
## old-style crs object detected; please recreate object with a recent sf::st_crs()
#Let's map them now!
ggplot() +
  geom_sf(data = counties,
    mapping = aes(), color = "lightgray")+
  geom_sf(data = place_shape,
          mapping = aes(), color = "green")+
  theme_minimal()

#get a point count by county

#Use st_intersects to create a new variable that counts the number of points in each polygon
counties$count <- lengths(st_intersects(counties, place_shape))

#map it
ggplot() +
  geom_sf(data = counties,
    mapping = aes(fill = count))+
  theme_minimal()

Now, let’s convert a smaller spatial unit to a larger spatial unit. Suprisingly, we can do this with the summarise() command.

#Load in Covid data with region information - the data doesn't matter, we just need the region groups
setwd("~/Binghamton/geog380")
covid <- read.csv("covid_data_ny.csv")

head(covid)
#merge the dataframes
library(tidyr)
counties1 <- counties %>% 
  separate(county_name, c("county_name", "county"), sep = " County") %>% 
  select(-county)

counties1 <- counties1 %>%
  left_join(covid, by = c("county_name" = "County"))

#I don't actually care about Covid cases - summarise covid cases to get our spatial data grouped by region
#If we can summarise the data by any variable at the region level, R will automatically create a region polygon
regions <- counties1 %>% group_by(region) %>%
  summarize(cases = sum(new_positive, na.rm = TRUE))

#now we'll map it!
ggplot() +
  geom_sf(data = regions,
    mapping = aes(fill = region))+
  theme_minimal()+
  labs(fill = "Region", title = "Regions of NYS")+
  theme(plot.title = element_text(hjust = 0.5))