“The first law of geography: Everything is related to everything else, but near things are more related than distant things.” Waldo R. Tobler
Step 1: Load packages & data
library(sp)
library(spdep)
library(tmap)
load(url("http://github.com/mgimond/Spatial/raw/master/Data/moransI.RData"))
Step 2: Examine the data
class(s1)
str(s1, max.level = 2)
s1@data
tmap_mode("view")
tm_basemap("Stamen.TonerLite") +
tm_shape(s1) +
tm_polygons(style = "quantile", col = "Income", alpha = 0.5) +
tm_legend(outside = TRUE, text.size = .8)
Step 3: Define neighboring polygons
queen=TRUEqueen=FALSE?poly2nb
nb <- poly2nb(s1, queen=TRUE)
nb lists contiguous neighbors for each polygon.nb itself is a list, meaning that [ will return a sublist, and [[ will return a vector of neigboring polygon IDs.class(nb)
str(nb)
summary(nb)
nb[1]
class(nb[1])
nb[[1]]
class(nb[[1]])
xy <- sp::coordinates(s1) # s1 is an sp object, so we use sp::coordinates
plot(s1, border='grey', lwd=1)
plot(nb, xy, col='red', lwd=1, add=TRUE)
s1 is an sp object, and s1$NAME returns a vector of County names.s1, subset s1$NAME by the vector of its neigboring counties.s1$NAME
s1$NAME[1]
class(s1$NAME[1])
s1$NAME[nb[[1]]]
## [1] Somerset Piscataquis Penobscot Washington
## 16 Levels: Androscoggin Aroostook Cumberland Franklin Hancock ... York
Step 4: For each polygon, compute the (weighted) average values of its neighbors
nb2listw returns a list of weights for individual countiesstyle="W" returns weights, which are row-standardized (i.e., for each county, the weights of its neighboring counties add up to one.)zero.policy=TRUE allows counties with no neighbor (i.e., isolated polygon)?nb2listw
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
lwclass(lw)
str(lw)
lw$weights
class(lw$weights)
str(lw$weights)
lw$weights[1]
lw$weights[[1]]
s1$Income value for each polygon (i.e., County)?lag.listw
Inc.lag <- lag.listw(lw, s1$Income)
Step 5: Compute the Moran’s I statistic: the hard way
# Create a regression model
M <- lm(Inc.lag ~ s1$Income)
# Plot the data
plot(Inc.lag ~ s1$Income, pch=20, asp=1, las=1)
abline(M, col = "red")
coef(M)[2]
n <- 599L # Define the number of simulations
I.r <- vector(length=n) # Create an empty vector
for (i in seq_along(I.r)){
# Randomly shuffle income values
x <- sample(s1$Income, replace=FALSE)
# Compute new set of lagged values
x.lag <- lag.listw(lw, x)
# Compute the regression slope and store its value
M.r <- lm(x.lag ~ x)
I.r[i] <- coef(M.r)[2]
}
# Plot the histogram of simulated Moran's I values
# then add our observed Moran's I value to the plot
hist(I.r, main=NULL, xlab="Moran's I", las=1)
abline(v=coef(M)[2], col="red")
N.greater <- sum(coef(M)[2] > I.r)
p <- min(N.greater + 1, n + 1 - N.greater) / (n + 1)
p # your value might be slightly different from the printed value below b/c of the random simulation
## [1] 0.02666667
Step 6: Computing the Moran’s I statistic: the easy way
s1$Income and the weights list lw, use spdep::moran.testmoran.test(s1$Income,lw)
##
## Moran I test under randomisation
##
## data: s1$Income
## weights: lw
##
## Moran I statistic standard deviate = 2.2472, p-value = 0.01231
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.28281108 -0.06666667 0.02418480
moran.test function is not computed from an MC simulation but analytically instead.moran.mc function.MC<- moran.mc(s1$Income, lw, nsim=599)
# View results (including p-value)
MC
##
## Monte-Carlo simulation of Moran I
##
## data: s1$Income
## weights: lw
## number of simulations + 1: 600
##
## statistic = 0.28281, observed rank = 589, p-value = 0.01833
## alternative hypothesis: greater
# Plot the distribution (note that this is a density plot instead of a histogram)
plot(MC, main="", las=1)
Step 7: Moran’s I as a function of a distance band
spdep works on sp objects, whose syntax differs from those of sf objects.sp::coordinates instead of sf::st_centroidcoo <- sp::coordinates(s1)
dnearneigh takes on three parameters: the coordinate values coo, the radius for the inner radius of the annulus band, and the radius for the outer annulus bandS.dist <- dnearneigh(coo, 0, 50000) # find neighbors within 50 km
str(S.dist)
plot(s1, border='grey', lwd=1)
plot(S.dist, coo, col='red', lwd=1, add=TRUE)
lw <- nb2listw(S.dist, style="W", zero.policy=T)
MI <- moran.mc(s1$Income, lw, nsim=599, zero.policy=T)
plot(MI, main = "", las = 1)
MI
##
## Monte-Carlo simulation of Moran I
##
## data: s1$Income
## weights: lw
## number of simulations + 1: 600
##
## statistic = 0.31361, observed rank = 598, p-value = 0.003333
## alternative hypothesis: greater