library(tidyverse)
library(rgdal)
library(maptools)
library(sp)
library(raster)
library(sf)
library(tmap)
library(spatstat)
library(spdep)
library(RColorBrewer)
Question 1. Cafes in the downtown of Boston and San Francisco
# read in input objects
sanf_cbd_cafes_ppp <- read_rds("sanf_cbd_cafes_ppp.rds") # planar point pattern object
sanf_innercity_owin <- read_rds("sanf_innercity_owin.rds") # 5 miles from the cbd window object
sanf_pctyoung <- read_rds("sanf_pctyoung.rds") # % 25-34 college graduates im object
# Average distance to the nearest neighbor
ann.p <- mean(nndist(sanf_cbd_cafes_ppp, k = 1))
ann.p
## [1] 82.781
# Monte Carlo: Null hypothesis is complete spatial random
n <- 599L # Number of simulations
ann.r <- vector(length = n) # Create an empty object to be used to store simulated ANN values
for (i in 1:n){
rand.p <- rpoint(n=sanf_cbd_cafes_ppp$n, win=sanf_innercity_owin) # Generate random point locations
ann.r[i] <- mean(nndist(rand.p, k=1)) # Tally the ANN values
}
hist(ann.r, main=NULL, las=1, breaks=40, col="bisque", xlim=range(ann.p, ann.r))
abline(v=ann.p, col="blue")

N.greater <- sum(ann.r > ann.p)
p <- min(N.greater + 1, n + 1 - N.greater)/(n+1)
p
## [1] 0.001666667
# Monte Carlo: Null hypothesis is complete spatial random, after taking % 25-34 college graduates into account
n <- 599L
ann.r <- vector(length=n)
for (i in 1:n){
rand.p <- rpoint(n=sanf_cbd_cafes_ppp$n, f=sanf_pctyoung) # f defines the probability density of the points
ann.r[i] <- mean(nndist(rand.p, k=1))
}
hist(ann.r, main=NULL, las=1, breaks=40, col="bisque", xlim=range(ann.p, ann.r))
abline(v=ann.p, col="blue")

N.greater <- sum(ann.r > ann.p)
p <- min(N.greater + 1, n + 1 - N.greater)/(n+1)
p
## [1] 0.001666667
Question 2. Residential segregation by race, income, and educational attainment in Fulton County, GA
# three variables are available: % non-Hispanic Whte, % college graduates, & log(median household income)
# these variables are already standardized (i.e., mean 0 standard deviation 1)
fulton2010 <- read_rds("fulton2010.rds") %>% as('Spatial')
# plot neighbor links
nb <- poly2nb(fulton2010, queen = TRUE)
xy <- sp::coordinates(fulton2010)
plot(fulton2010, border = "grey", lwd=1)
plot(nb, xy, col="red", lwd=0.1, add=TRUE)

# compute spatial weights
lw <- nb2listw(nb, style = "W", zero.policy = TRUE)
By race
# compute the spatially weighted mean for the neighbors of each tract
nhw.lag <- lag.listw(lw, fulton2010$pct_nhw)
# run regression and plot
M <- lm(nhw.lag ~ fulton2010$pct_nhw)
plot(nhw.lag ~ fulton2010$pct_nhw, pch = 20, asp = 1, las = 1)
abline(M, col = "red")

coef(M)[2]
## fulton2010$pct_nhw
## 0.8192557
# run Monte Carlo simulation
# this helps determine the statistical significance of the computed Moran's I
n <- 599L # Define the number of simulations
I.r <- vector(length=n) # Create an empty vector
for (i in seq_along(I.r)){
# Randomly shuffle income values
x <- sample(fulton2010$pct_nhw, replace=FALSE)
# Compute new set of lagged values
x.lag <- lag.listw(lw, x)
# Compute the regression slope and store its value
M.r <- lm(x.lag ~ x)
I.r[i] <- coef(M.r)[2]
}
hist(I.r, main=NULL, xlab="Moran's I", las=1)
abline(v=coef(M)[2], col="red")

N.greater <- sum(coef(M)[2] > I.r)
p <- min(N.greater + 1, n + 1 - N.greater) / (n + 1)
p # your value might be slightly different from the printed value below b/c of the random simulation
## [1] 0.001666667
moran.test(fulton2010$pct_nhw, lw)
##
## Moran I test under randomisation
##
## data: fulton2010$pct_nhw
## weights: lw
##
## Moran I statistic standard deviate = 19.354, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.819255661 -0.004926108 0.001813372
MC<- moran.mc(fulton2010$pct_nhw, lw, nsim = 599)
MC
##
## Monte-Carlo simulation of Moran I
##
## data: fulton2010$pct_nhw
## weights: lw
## number of simulations + 1: 600
##
## statistic = 0.81926, observed rank = 600, p-value = 0.001667
## alternative hypothesis: greater
plot(MC, main="", las=1)

By educational attainment
# compute the spatially weighted mean for the neighbors of each tract
coll.lag <- lag.listw(lw, fulton2010$pct_coll)
# run regression and plot
M <- lm(coll.lag ~ fulton2010$pct_coll)
plot(coll.lag ~ fulton2010$pct_coll, pch = 20, asp = 1, las = 1)
abline(M, col = "red")

coef(M)[2]
## fulton2010$pct_coll
## 0.7614924
# run Monte Carlo simulation
# this helps determine the statistical significance of the computed Moran's I
n <- 599L # Define the number of simulations
I.r <- vector(length=n) # Create an empty vector
for (i in seq_along(I.r)){
# Randomly shuffle income values
x <- sample(fulton2010$pct_coll, replace=FALSE)
# Compute new set of lagged values
x.lag <- lag.listw(lw, x)
# Compute the regression slope and store its value
M.r <- lm(x.lag ~ x)
I.r[i] <- coef(M.r)[2]
}
hist(I.r, main=NULL, xlab="Moran's I", las=1)
abline(v=coef(M)[2], col="red")

N.greater <- sum(coef(M)[2] > I.r)
p <- min(N.greater + 1, n + 1 - N.greater) / (n + 1)
p # your value might be slightly different from the printed value below b/c of the random simulation
## [1] 0.001666667
moran.test(fulton2010$pct_coll, lw)
##
## Moran I test under randomisation
##
## data: fulton2010$pct_coll
## weights: lw
##
## Moran I statistic standard deviate = 18.008, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.761492428 -0.004926108 0.001811385
MC<- moran.mc(fulton2010$pct_coll, lw, nsim = 599)
MC
##
## Monte-Carlo simulation of Moran I
##
## data: fulton2010$pct_coll
## weights: lw
## number of simulations + 1: 600
##
## statistic = 0.76149, observed rank = 600, p-value = 0.001667
## alternative hypothesis: greater
plot(MC, main="", las=1)

Question 3. The count of property crime in the City of Chicago, IL
chi.poly <- maptools::readShapePoly('foreclosures.shp')
# the median home value of owner-occupied units for each tract
chi.poly@data$oomedval_lg <- log(chi.poly@data$oomedval+1)
# recode NA to zero
chi.poly@data$property2 <- ifelse(is.na(chi.poly@data$property) == FALSE, chi.poly@data$property, 0)
tmap_mode("view")
tm_basemap("Stamen.TonerLite", alpha = 0.25) +
tm_shape(chi.poly) +
tm_fill(col = "property2", alpha = 0.75, style = "quantile", n = 10,
palette = "OrRd") +
tm_borders(col = "white", lwd = 0.5)
chi.ols2 <-lm(property2 ~ est_fcs_rt + oomedval_lg, data=chi.poly@data)
summary(chi.ols2)
##
## Call:
## lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data = chi.poly@data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -862.09 -206.17 -76.24 119.80 2929.92
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 151.900 64.531 2.354 0.0188 *
## est_fcs_rt 18.699 2.999 6.234 6.99e-10 ***
## oomedval_lg 12.157 5.396 2.253 0.0245 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 336.1 on 894 degrees of freedom
## Multiple R-squared: 0.04997, Adjusted R-squared: 0.04784
## F-statistic: 23.51 on 2 and 894 DF, p-value: 1.12e-10
# create a list of neighbors for each polygon
list.queen <- poly2nb(chi.poly, queen=TRUE)
# next, supplement the neighbor list with spatial weights
W <- nb2listw(list.queen, style="W", zero.policy=TRUE) # "W" row-standardize weights
# W
# plot(W,coordinates(chi.poly))
moran.lm2<-lm.morantest(chi.ols2, W, alternative="two.sided")
print(moran.lm2)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
##
## Moran I statistic standard deviate = 18.6, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I Expectation Variance
## 0.3391759402 -0.0020608813 0.0003365739
LM2 <- lm.LMtests(chi.ols2, W, test="all")
print(LM2)
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
##
## LMerr = 337.22, df = 1, p-value < 2.2e-16
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
##
## LMlag = 339.61, df = 1, p-value < 2.2e-16
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
##
## RLMerr = 2.156, df = 1, p-value = 0.142
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
##
## RLMlag = 4.5498, df = 1, p-value = 0.03292
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
##
## SARMA = 341.77, df = 2, p-value < 2.2e-16
- Since the p-value of
RLMlag is smaller than that of RLMerr, we proceed with a spatial lag model.
chi.sar2 <- lagsarlm(property2 ~ est_fcs_rt + oomedval_lg, data=chi.poly@data, W)
summary(chi.sar2)
##
## Call:
## lagsarlm(formula = property2 ~ est_fcs_rt + oomedval_lg, data = chi.poly@data,
## listw = W)
##
## Residuals:
## Min 1Q Median 3Q Max
## -814.611 -162.156 -54.249 102.418 2343.951
##
## Type: lag
## Coefficients: (numerical Hessian approximate standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -62.5149 58.8557 -1.0622 0.2881573
## est_fcs_rt 10.2921 2.6471 3.8880 0.0001011
## oomedval_lg 14.3355 4.7757 3.0017 0.0026846
##
## Rho: 0.56477, LR test value: 202.48, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.034758
## z-value: 16.249, p-value: < 2.22e-16
## Wald statistic: 264.03, p-value: < 2.22e-16
##
## Log likelihood: -6388.386 for lag model
## ML residual variance (sigma squared): 84726, (sigma: 291.08)
## Number of observations: 897
## Number of parameters estimated: 5
## AIC: 12787, (AIC for lm: 12987)
- Save residuals from the OLS and spatial lag models.
chi.poly@data$chi.ols2.res <- resid(chi.ols2) #residuals ols
chi.poly@data$chi.sar2.res <- resid(chi.sar2) #residual sar
sp::spplot(
chi.poly,
"chi.ols2.res",
at = seq(min(chi.poly@data$chi.ols2.res,na.rm=TRUE), max(chi.poly@data$chi.ols2.res,na.rm=TRUE), length=12),
col.regions = rev(brewer.pal(11,"RdBu"))
)

sp::spplot(
chi.poly,
"chi.sar2.res",
at = seq(min(chi.poly@data$chi.sar2.res,na.rm=TRUE), max(chi.poly@data$chi.sar2.res,na.rm=TRUE), length=12),
col.regions = rev(brewer.pal(11,"RdBu"))
)
