Resources

Intro to Spatial Data in R:

https://cengel.github.io/R-spatial/intro.html

Intro to Spatial Autocrrelation Analysis in R

https://rpubs.com/quarcs-lab/spatial-autocorrelation

Load Vector Spatial Data

## The sp package is used to download shapefiles 

#Insert the folder location that has the shapefile into st_read
sf <- st_read("folder_path")

## If you only have a tabular file with coordinates, you need to convert it into a sf object 

# Create coords vector that specifies that data frame's columns for longitude and latitude

df <- read.csv("file_path")
sf <- st_as_sf(df, coords = c("POINT_X", "POINT Y"))

## Attach a Coordinate System to the SF
st_crs(sf) # Will come out as NA

st_crs(sf) <- 4326 #insert the EPSF code for the projection, in this example was are using the code for WGS84

## Save the created shapefile to a location on your hard drive

st_write(sf, "folder_location", driver = "ESRI Shapefile")
## If we have a dataframe with values attached to the points we can create a SpatialPointsDataFrame object using the sp package

## Need to specify coordinates
coordinates(df) <- c("POINT_X", "POINT_Y")
class(df)

## Add spatial reference

# The rgdal package is needed to save the sp object as a shapefile with a coordinate system 

proj4string(df) <- CRS("+init=epsg:4326") # this is WGS84
is.projected(df) 

Visualize SF Object

# To vilsualize SF object we can use ggplot2 for basic mapping or the leaflet package for web mapping

ggplot(sf, aes(name_of_column_with_x_value, name_of_column_with_y_value)) +
  geom_point()

# OR

ggplot(sf) + 
  geom_sf(aes(fill=vaule_column)) # sf must be sf_object

K and L Plots

Setting Up Spatstat Object

## The spatstat package requires that your point data be transformed into ppp object

# need to create a window for the boundary of the point
win <- as.owin(sf_boundary) # insert boundary (e.g, census tract, country) of the points, note: boundary must be a sf object

class(win) #check it worked

# Get the coordinates of the point data
coords <- st_coordinates(sf_points)
head(coords)

p <- ppp(coords[,1], coords[,2], window=win)
par(mai=c(0,0,0,0))
plot(p) # plot the ppp object

K-plot

# Calculate the K-function
K <- Kest(p)

# Plot the K-function
plot(K, main = "K-function for Points")

L-Plot

# Calculate the L-function
 L <- Lest(p)

# Plot the L-Function
 plot(L, main="L-Function for Points")

GLobal Moran’s I

# Need coordiantes of your points
coords <- st_coordinates(points)

# Create a spatial weights matrix, when conducting a GLobal Moran's I on points we will need to use distance-based weights

dists <- dnearneigh(coords, 0, 100) # create a neighbor list with 100-meter radius
# The distance is up to your discretion, you could find the average distance between points and use that as the radius


weights <- nb2listw(dists, style = "B") # points are on a binary, 1 if they are within your radius, 0 if not

moran.test(x = points$numeric_vector, listw=weights, zero.policy=T)
# x must be a numeric variable in the points data frame

Practice Using Real Data

require(sf)
require(geojsonsf)
## Loading required package: geojsonsf
require(tigris)
## Loading required package: tigris
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
url <- "https://services6.arcgis.com/DZHaqZm9cxOD4CWM/arcgis/rest/services/Remediation_Sites/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"

rem_sites <- geojsonsf::geojson_sf(url)
## Warning in readLines(con): incomplete final line found on
## 'https://services6.arcgis.com/DZHaqZm9cxOD4CWM/arcgis/rest/services/Remediation_Sites/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson'
rem_sites <- st_centroid(rem_sites)
plot(rem_sites)

# NY Counties
ny_counties <- counties(state = "NY", cb = TRUE, resolution = "20m") %>% 
  st_as_sf()
## Retrieving data for the year 2021
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%
# Long Island Boundary
li_boundary <- ny_counties %>% 
  filter(NAME %in% c('Suffolk', 'Nassau'))

rem_sites <- st_make_valid(rem_sites)

li_boundary <- st_transform(li_boundary, st_crs(rem_sites))

li_rem_sites <- st_intersection(li_boundary, rem_sites)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
plot(li_rem_sites)
## Warning: plotting the first 9 out of 21 attributes; use max.plot = 21 to plot
## all

# plot the county boundaries and the superfund centroids
ggplot() +
  geom_sf(data = li_boundary, fill = NA, color = "black") +
  geom_sf(data = li_rem_sites, color = "red", size = .1) +
  ggtitle("Remediation Sites in Long Island with Centroids")

Create PPP Object

# Check that the layers needed have been projected
st_crs(rem_sites)
## Coordinate Reference System:
##   User input: 4326 
##   wkt:
## GEOGCS["WGS 84",
##       DATUM["WGS_1984",
##         SPHEROID["WGS 84",6378137,298.257223563,
##           AUTHORITY["EPSG","7030"]],
##         AUTHORITY["EPSG","6326"]],
##       PRIMEM["Greenwich",0,
##         AUTHORITY["EPSG","8901"]],
##       UNIT["degree",0.0174532925199433,
##         AUTHORITY["EPSG","9122"]],
##       AXIS["Latitude",NORTH],
##       AXIS["Longitude",EAST],
##     AUTHORITY["EPSG","4326"]]
st_crs(li_boundary) # They have not been projected
## Coordinate Reference System:
##   User input: 4326 
##   wkt:
## GEOGCS["WGS 84",
##       DATUM["WGS_1984",
##         SPHEROID["WGS 84",6378137,298.257223563,
##           AUTHORITY["EPSG","7030"]],
##         AUTHORITY["EPSG","6326"]],
##       PRIMEM["Greenwich",0,
##         AUTHORITY["EPSG","8901"]],
##       UNIT["degree",0.0174532925199433,
##         AUTHORITY["EPSG","9122"]],
##       AXIS["Latitude",NORTH],
##       AXIS["Longitude",EAST],
##     AUTHORITY["EPSG","4326"]]
# In order to convert into a ppp object, we will need to project both of the sp objects

# Define the target CRS (UTM Zone 18N)
target_crs <- "+proj=utm +zone=18 +datum=WGS84"

# Project the 'rem_sites' layer
rem_sites_proj <- st_transform(li_rem_sites, crs = target_crs)

# Project the 'li_boundary' layer
li_boundary_proj <- st_transform(li_boundary, crs = target_crs)

# Create spatial window
liOwin <- as.owin(sf::st_as_sf(li_boundary_proj))
class(liOwin)
## [1] "owin"
# Get coordinates
coords <- st_coordinates(rem_sites_proj)

# Create a ppp object
p_rem_sites <- ppp(coords[,1], coords[,2], window = liOwin)

# Plot the subset of valid points
plot(p_rem_sites, main = "Remediation Centroid Points in Long Island")

# Create a kernel density estimate
kde <- density.ppp(p_rem_sites, sigma = 2000, kernel = "gaussian") # adjust sigma as needed

# Plot the kernel density estimate
par(mar = c(1, 1, 1, 1))
plot(kde, main = "Kernel Density Estimate of Remediation Centroid Points in Long Island")

Create K and L Plots

## Calculate the K-function
K <- Kest(p_rem_sites)

# Plot the K-function
plot(K, main = "K-function for Long Island Remediation Points")

## Calculate the L-Function
L <- Lest(p_rem_sites)

# Plot the K-function
plot(L, main = "L-function for Long Island Remediation Points")

# Need coordinates of your points
coords <- st_coordinates(rem_sites_proj)

# Create a spatial weights matrix, when conducting a Global Moran's I on points we will need to use distance-based weights

# In this example we need to use knearneigh, as the data does not work for a simple dnearneigh

knn <- knearneigh(coords, k=4)
plot(st_geometry(rem_sites_proj), border="grey")
plot(knn2nb(knn), coords, add=TRUE)
title(main="K nearest neighbours, k = 4")

# Create weights

nb <- knn2nb(knn)
weights <- nb2listw(nb, style = "B")

# Perform Moran's I test
moran.test(x = rem_sites_proj$OBJECTID, listw = weights, zero.policy = TRUE)
## 
##  Moran I test under randomisation
## 
## data:  rem_sites_proj$OBJECTID  
## weights: weights    
## 
## Moran I statistic standard deviate = 2.9998, p-value = 0.001351
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.118922752      -0.003952569       0.001677776
# Our Moran's I equals 0.058, which suggests some spatial autocorrelation, although relatively week. In additon, as the p-value of 0.063 is above the 0.05, we fail to reject the null hypothesis that 

# Before we complete the example, lets expand the number of neighbors to see if we get different results

knn_reduce <- knearneigh(coords, k=2)
plot(st_geometry(rem_sites_proj), border="grey")
plot(knn2nb(knn_reduce), coords, add=TRUE)
title(main="K nearest neighbours, k = 2")

# Create weights

nb_reduce <- knn2nb(knn_reduce)
weights_reduce <- nb2listw(nb_reduce, style = "B")

# Perform Moran's I test
moran.test(x = rem_sites_proj$OBJECTID, listw = weights_reduce, zero.policy = TRUE)
## 
##  Moran I test under randomisation
## 
## data:  rem_sites_proj$OBJECTID  
## weights: weights_reduce    
## 
## Moran I statistic standard deviate = 2.7265, p-value = 0.003201
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.153387134      -0.003952569       0.003330211
## We end up with a lower Moran's I (0.033) and a higher p-value... now lets see our results if we double the number of neighbors 

knn_add <- knearneigh(coords, k=8)
plot(st_geometry(rem_sites_proj), border="grey")
plot(knn2nb(knn_add), coords, add=TRUE)
title(main="K nearest neighbours, k = 8")

# Create weights

nb_add <- knn2nb(knn_add)
weights_add <- nb2listw(nb_add, style = "B")

# Perform Moran's I test
moran_add = moran.test(x = rem_sites_proj$OBJECTID, listw = weights_add, zero.policy = TRUE)
moran_add
## 
##  Moran I test under randomisation
## 
## data:  rem_sites_proj$OBJECTID  
## weights: weights_add    
## 
## Moran I statistic standard deviate = 3.4046, p-value = 0.0003313
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.0938867204     -0.0039525692      0.0008258467
## We end up with a higher Moran's I (0.085) and a significant p-value

## Changing the neighbors significantly alters our results. Neighbor value should be based of prior research on the data.