http://rstudio-pubs-static.s3.amazonaws.com/481933_61ed3eff92e74c04a52d44a48b1b4548.html
## The sp package is used to download shapefiles
#Insert the folder location that has the shapefile into st_read
sf <- st_read("folder_path")
## If you only have a tabular file with coordinates, you need to convert it into a sf object
# Create coords vector that specifies that data frame's columns for longitude and latitude
df <- read.csv("file_path")
sf <- st_as_sf(df, coords = c("POINT_X", "POINT Y"))
## Attach a Coordinate System to the SF
st_crs(sf) # Will come out as NA
st_crs(sf) <- 4326 #insert the EPSF code for the projection, in this example was are using the code for WGS84
## Save the created shapefile to a location on your hard drive
st_write(sf, "folder_location", driver = "ESRI Shapefile")
## If we have a dataframe with values attached to the points we can create a SpatialPointsDataFrame object using the sp package
## Need to specify coordinates
coordinates(df) <- c("POINT_X", "POINT_Y")
class(df)
## Add spatial reference
# The rgdal package is needed to save the sp object as a shapefile with a coordinate system
proj4string(df) <- CRS("+init=epsg:4326") # this is WGS84
is.projected(df)
# To vilsualize SF object we can use ggplot2 for basic mapping or the leaflet package for web mapping
ggplot(sf, aes(name_of_column_with_x_value, name_of_column_with_y_value)) +
geom_point()
# OR
ggplot(sf) +
geom_sf(aes(fill=vaule_column)) # sf must be sf_object
K Plot = Summarizies the distance between points for all distances
K values above K expected: clustering of points at given distance band
K Values below K expected: dispersion of points at a given distance band
L Plot = Helps determine small differences between K and K-expected
L values above zero : clustering of points at a given distance band
L values below zero : dispersion of points at a given distance band (i.e., inhibition)
## The spatstat package requires that your point data be transformed into ppp object
# need to create a window for the boundary of the point
win <- as.owin(sf_boundary) # insert boundary (e.g, census tract, country) of the points, note: boundary must be a sf object
class(win) #check it worked
# Get the coordinates of the point data
coords <- st_coordinates(sf_points)
head(coords)
p <- ppp(coords[,1], coords[,2], window=win)
par(mai=c(0,0,0,0))
plot(p) # plot the ppp object
# Calculate the K-function
K <- Kest(p)
# Plot the K-function
plot(K, main = "K-function for Points")
# Calculate the L-function
L <- Lest(p)
# Plot the L-Function
plot(L, main="L-Function for Points")
# Need coordiantes of your points
coords <- st_coordinates(points)
# Create a spatial weights matrix, when conducting a GLobal Moran's I on points we will need to use distance-based weights
dists <- dnearneigh(coords, 0, 100) # create a neighbor list with 100-meter radius
# The distance is up to your discretion, you could find the average distance between points and use that as the radius
weights <- nb2listw(dists, style = "B") # points are on a binary, 1 if they are within your radius, 0 if not
moran.test(x = points$numeric_vector, listw=weights, zero.policy=T)
# x must be a numeric variable in the points data frame
require(sf)
require(geojsonsf)
## Loading required package: geojsonsf
require(tigris)
## Loading required package: tigris
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
##
## collapse
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
url <- "https://services6.arcgis.com/DZHaqZm9cxOD4CWM/arcgis/rest/services/Remediation_Sites/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"
rem_sites <- geojsonsf::geojson_sf(url)
## Warning in readLines(con): incomplete final line found on
## 'https://services6.arcgis.com/DZHaqZm9cxOD4CWM/arcgis/rest/services/Remediation_Sites/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson'
rem_sites <- st_centroid(rem_sites)
plot(rem_sites)
# NY Counties
ny_counties <- counties(state = "NY", cb = TRUE, resolution = "20m") %>%
st_as_sf()
## Retrieving data for the year 2021
##
|
| | 0%
|
| | 1%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|=================== | 27%
|
|====================== | 31%
|
|====================== | 32%
|
|======================== | 34%
|
|========================= | 36%
|
|========================== | 38%
|
|============================ | 40%
|
|============================== | 42%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================= | 59%
|
|=========================================== | 62%
|
|============================================= | 65%
|
|=============================================== | 68%
|
|================================================== | 71%
|
|===================================================== | 76%
|
|======================================================= | 78%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|================================================================ | 91%
|
|================================================================== | 94%
|
|======================================================================| 100%
# Long Island Boundary
li_boundary <- ny_counties %>%
filter(NAME %in% c('Suffolk', 'Nassau'))
rem_sites <- st_make_valid(rem_sites)
li_boundary <- st_transform(li_boundary, st_crs(rem_sites))
li_rem_sites <- st_intersection(li_boundary, rem_sites)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
plot(li_rem_sites)
## Warning: plotting the first 9 out of 21 attributes; use max.plot = 21 to plot
## all
# plot the county boundaries and the superfund centroids
ggplot() +
geom_sf(data = li_boundary, fill = NA, color = "black") +
geom_sf(data = li_rem_sites, color = "red", size = .1) +
ggtitle("Remediation Sites in Long Island with Centroids")
# Check that the layers needed have been projected
st_crs(rem_sites)
## Coordinate Reference System:
## User input: 4326
## wkt:
## GEOGCS["WGS 84",
## DATUM["WGS_1984",
## SPHEROID["WGS 84",6378137,298.257223563,
## AUTHORITY["EPSG","7030"]],
## AUTHORITY["EPSG","6326"]],
## PRIMEM["Greenwich",0,
## AUTHORITY["EPSG","8901"]],
## UNIT["degree",0.0174532925199433,
## AUTHORITY["EPSG","9122"]],
## AXIS["Latitude",NORTH],
## AXIS["Longitude",EAST],
## AUTHORITY["EPSG","4326"]]
st_crs(li_boundary) # They have not been projected
## Coordinate Reference System:
## User input: 4326
## wkt:
## GEOGCS["WGS 84",
## DATUM["WGS_1984",
## SPHEROID["WGS 84",6378137,298.257223563,
## AUTHORITY["EPSG","7030"]],
## AUTHORITY["EPSG","6326"]],
## PRIMEM["Greenwich",0,
## AUTHORITY["EPSG","8901"]],
## UNIT["degree",0.0174532925199433,
## AUTHORITY["EPSG","9122"]],
## AXIS["Latitude",NORTH],
## AXIS["Longitude",EAST],
## AUTHORITY["EPSG","4326"]]
# In order to convert into a ppp object, we will need to project both of the sp objects
# Define the target CRS (UTM Zone 18N)
target_crs <- "+proj=utm +zone=18 +datum=WGS84"
# Project the 'rem_sites' layer
rem_sites_proj <- st_transform(li_rem_sites, crs = target_crs)
# Project the 'li_boundary' layer
li_boundary_proj <- st_transform(li_boundary, crs = target_crs)
# Create spatial window
liOwin <- as.owin(sf::st_as_sf(li_boundary_proj))
class(liOwin)
## [1] "owin"
# Get coordinates
coords <- st_coordinates(rem_sites_proj)
# Create a ppp object
p_rem_sites <- ppp(coords[,1], coords[,2], window = liOwin)
# Plot the subset of valid points
plot(p_rem_sites, main = "Remediation Centroid Points in Long Island")
# Create a kernel density estimate
kde <- density.ppp(p_rem_sites, sigma = 2000, kernel = "gaussian") # adjust sigma as needed
# Plot the kernel density estimate
par(mar = c(1, 1, 1, 1))
plot(kde, main = "Kernel Density Estimate of Remediation Centroid Points in Long Island")
## Calculate the K-function
K <- Kest(p_rem_sites)
# Plot the K-function
plot(K, main = "K-function for Long Island Remediation Points")
## Calculate the L-Function
L <- Lest(p_rem_sites)
# Plot the K-function
plot(L, main = "L-function for Long Island Remediation Points")
# Need coordinates of your points
coords <- st_coordinates(rem_sites_proj)
# Create a spatial weights matrix, when conducting a Global Moran's I on points we will need to use distance-based weights
# In this example we need to use knearneigh, as the data does not work for a simple dnearneigh
knn <- knearneigh(coords, k=4)
plot(st_geometry(rem_sites_proj), border="grey")
plot(knn2nb(knn), coords, add=TRUE)
title(main="K nearest neighbours, k = 4")
# Create weights
nb <- knn2nb(knn)
weights <- nb2listw(nb, style = "B")
# Perform Moran's I test
moran.test(x = rem_sites_proj$OBJECTID, listw = weights, zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: rem_sites_proj$OBJECTID
## weights: weights
##
## Moran I statistic standard deviate = 2.9998, p-value = 0.001351
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.118922752 -0.003952569 0.001677776
# Our Moran's I equals 0.058, which suggests some spatial autocorrelation, although relatively week. In additon, as the p-value of 0.063 is above the 0.05, we fail to reject the null hypothesis that
# Before we complete the example, lets expand the number of neighbors to see if we get different results
knn_reduce <- knearneigh(coords, k=2)
plot(st_geometry(rem_sites_proj), border="grey")
plot(knn2nb(knn_reduce), coords, add=TRUE)
title(main="K nearest neighbours, k = 2")
# Create weights
nb_reduce <- knn2nb(knn_reduce)
weights_reduce <- nb2listw(nb_reduce, style = "B")
# Perform Moran's I test
moran.test(x = rem_sites_proj$OBJECTID, listw = weights_reduce, zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: rem_sites_proj$OBJECTID
## weights: weights_reduce
##
## Moran I statistic standard deviate = 2.7265, p-value = 0.003201
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.153387134 -0.003952569 0.003330211
## We end up with a lower Moran's I (0.033) and a higher p-value... now lets see our results if we double the number of neighbors
knn_add <- knearneigh(coords, k=8)
plot(st_geometry(rem_sites_proj), border="grey")
plot(knn2nb(knn_add), coords, add=TRUE)
title(main="K nearest neighbours, k = 8")
# Create weights
nb_add <- knn2nb(knn_add)
weights_add <- nb2listw(nb_add, style = "B")
# Perform Moran's I test
moran_add = moran.test(x = rem_sites_proj$OBJECTID, listw = weights_add, zero.policy = TRUE)
moran_add
##
## Moran I test under randomisation
##
## data: rem_sites_proj$OBJECTID
## weights: weights_add
##
## Moran I statistic standard deviate = 3.4046, p-value = 0.0003313
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.0938867204 -0.0039525692 0.0008258467
## We end up with a higher Moran's I (0.085) and a significant p-value
## Changing the neighbors significantly alters our results. Neighbor value should be based of prior research on the data.