library(tidyverse)
library(rgdal)
library(maptools)
library(sp)
library(raster)
library(sf)
library(tmap)
library(spatstat)
library(spdep)
library(RColorBrewer)

Question 1. Cafes in the downtown of Boston and San Francisco

# read in input objects 
sanf_cbd_cafes_ppp <- read_rds("sanf_cbd_cafes_ppp.rds") # planar point pattern object 
sanf_innercity_owin <- read_rds("sanf_innercity_owin.rds") # 5 miles from the cbd window object  
sanf_pctyoung <- read_rds("sanf_pctyoung.rds") # % 25-34 college graduates im object 
# Average distance to the nearest neighbor 
ann.p <- mean(nndist(sanf_cbd_cafes_ppp, k = 1))
ann.p
## [1] 82.781
# Monte Carlo: Null hypothesis is complete spatial random 
n <- 599L # Number of simulations
ann.r <- vector(length = n) # Create an empty object to be used to store simulated ANN values

for (i in 1:n){
  rand.p   <- rpoint(n=sanf_cbd_cafes_ppp$n, win=sanf_innercity_owin) # Generate random point locations
  ann.r[i] <- mean(nndist(rand.p, k=1))  # Tally the ANN values
}

hist(ann.r, main=NULL, las=1, breaks=40, col="bisque", xlim=range(ann.p, ann.r))
abline(v=ann.p, col="blue")

N.greater <- sum(ann.r > ann.p)
p <- min(N.greater + 1, n + 1 - N.greater)/(n+1)
p
## [1] 0.001666667
# Monte Carlo: Null hypothesis is complete spatial random, after taking % 25-34 college graduates into account 
n <- 599L
ann.r <- vector(length=n)

for (i in 1:n){
  rand.p   <- rpoint(n=sanf_cbd_cafes_ppp$n, f=sanf_pctyoung) # f defines the probability density of the points
  ann.r[i] <- mean(nndist(rand.p, k=1))
}

hist(ann.r, main=NULL, las=1, breaks=40, col="bisque", xlim=range(ann.p, ann.r))
abline(v=ann.p, col="blue")

N.greater <- sum(ann.r > ann.p)
p <- min(N.greater + 1, n + 1 - N.greater)/(n+1)
p
## [1] 0.001666667

Question 2. Residential segregation by race, income, and educational attainment in Fulton County, GA

# three variables are available: % non-Hispanic Whte, % college graduates, & log(median household income)
# these variables are already standardized (i.e., mean 0 standard deviation 1)
fulton2010 <- read_rds("fulton2010.rds") %>% as('Spatial')

# plot neighbor links 
nb <- poly2nb(fulton2010, queen = TRUE)
xy <- sp::coordinates(fulton2010)
plot(fulton2010, border = "grey", lwd=1)
plot(nb, xy, col="red", lwd=0.1, add=TRUE)

# compute spatial weights 
lw <- nb2listw(nb, style = "W", zero.policy = TRUE)
By race
# compute the spatially weighted mean for the neighbors of each tract 
nhw.lag <- lag.listw(lw, fulton2010$pct_nhw)
# run regression and plot 
M <- lm(nhw.lag ~ fulton2010$pct_nhw)
plot(nhw.lag ~ fulton2010$pct_nhw, pch = 20, asp = 1, las = 1)
abline(M, col = "red")

coef(M)[2]
## fulton2010$pct_nhw 
##          0.8192557
# run Monte Carlo simulation 
# this helps determine the statistical significance of the computed Moran's I 
n <- 599L   # Define the number of simulations
I.r <- vector(length=n)  # Create an empty vector

for (i in seq_along(I.r)){
  # Randomly shuffle income values
  x <- sample(fulton2010$pct_nhw, replace=FALSE)
  # Compute new set of lagged values
  x.lag <- lag.listw(lw, x)
  # Compute the regression slope and store its value
  M.r    <- lm(x.lag ~ x)
  I.r[i] <- coef(M.r)[2]
}

hist(I.r, main=NULL, xlab="Moran's I", las=1)
abline(v=coef(M)[2], col="red")

N.greater <- sum(coef(M)[2] > I.r)
p <- min(N.greater + 1, n + 1 - N.greater) / (n + 1)
p # your value might be slightly different from the printed value below b/c of the random simulation 
## [1] 0.001666667
moran.test(fulton2010$pct_nhw, lw)
## 
##  Moran I test under randomisation
## 
## data:  fulton2010$pct_nhw  
## weights: lw    
## 
## Moran I statistic standard deviate = 19.354, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.819255661      -0.004926108       0.001813372
MC<- moran.mc(fulton2010$pct_nhw, lw, nsim = 599)
MC
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  fulton2010$pct_nhw 
## weights: lw  
## number of simulations + 1: 600 
## 
## statistic = 0.81926, observed rank = 600, p-value = 0.001667
## alternative hypothesis: greater
plot(MC, main="", las=1)

By educational attainment
# compute the spatially weighted mean for the neighbors of each tract 
coll.lag <- lag.listw(lw, fulton2010$pct_coll)
# run regression and plot 
M <- lm(coll.lag ~ fulton2010$pct_coll)
plot(coll.lag ~ fulton2010$pct_coll, pch = 20, asp = 1, las = 1)
abline(M, col = "red")

coef(M)[2]
## fulton2010$pct_coll 
##           0.7614924
# run Monte Carlo simulation 
# this helps determine the statistical significance of the computed Moran's I 
n <- 599L   # Define the number of simulations
I.r <- vector(length=n)  # Create an empty vector

for (i in seq_along(I.r)){
  # Randomly shuffle income values
  x <- sample(fulton2010$pct_coll, replace=FALSE)
  # Compute new set of lagged values
  x.lag <- lag.listw(lw, x)
  # Compute the regression slope and store its value
  M.r    <- lm(x.lag ~ x)
  I.r[i] <- coef(M.r)[2]
}

hist(I.r, main=NULL, xlab="Moran's I", las=1)
abline(v=coef(M)[2], col="red")

N.greater <- sum(coef(M)[2] > I.r)
p <- min(N.greater + 1, n + 1 - N.greater) / (n + 1)
p # your value might be slightly different from the printed value below b/c of the random simulation 
## [1] 0.001666667
moran.test(fulton2010$pct_coll, lw)
## 
##  Moran I test under randomisation
## 
## data:  fulton2010$pct_coll  
## weights: lw    
## 
## Moran I statistic standard deviate = 18.008, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.761492428      -0.004926108       0.001811385
MC<- moran.mc(fulton2010$pct_coll, lw, nsim = 599)
MC
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  fulton2010$pct_coll 
## weights: lw  
## number of simulations + 1: 600 
## 
## statistic = 0.76149, observed rank = 600, p-value = 0.001667
## alternative hypothesis: greater
plot(MC, main="", las=1)

Question 3. The count of property crime in the City of Chicago, IL

chi.poly <- maptools::readShapePoly('foreclosures.shp')
# the median home value of owner-occupied units for each tract  
chi.poly@data$oomedval_lg <- log(chi.poly@data$oomedval+1) 
# recode NA to zero 
chi.poly@data$property2 <- ifelse(is.na(chi.poly@data$property) == FALSE, chi.poly@data$property, 0) 
tmap_mode("view")
tm_basemap("Stamen.TonerLite", alpha = 0.25) + 
  tm_shape(chi.poly) + 
  tm_fill(col = "property2", alpha = 0.75, style = "quantile", n = 10, 
          palette = "OrRd") + 
  tm_borders(col = "white", lwd = 0.5)
chi.ols2 <-lm(property2 ~ est_fcs_rt + oomedval_lg, data=chi.poly@data)
summary(chi.ols2)
## 
## Call:
## lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data = chi.poly@data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -862.09 -206.17  -76.24  119.80 2929.92 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  151.900     64.531   2.354   0.0188 *  
## est_fcs_rt    18.699      2.999   6.234 6.99e-10 ***
## oomedval_lg   12.157      5.396   2.253   0.0245 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 336.1 on 894 degrees of freedom
## Multiple R-squared:  0.04997,    Adjusted R-squared:  0.04784 
## F-statistic: 23.51 on 2 and 894 DF,  p-value: 1.12e-10
# create a list of neighbors for each polygon 
list.queen <- poly2nb(chi.poly, queen=TRUE)
# next, supplement the neighbor list with spatial weights 
W <- nb2listw(list.queen, style="W", zero.policy=TRUE) # "W" row-standardize weights 
# W
# plot(W,coordinates(chi.poly))
moran.lm2<-lm.morantest(chi.ols2, W, alternative="two.sided")
print(moran.lm2)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
## 
## Moran I statistic standard deviate = 18.6, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran I      Expectation         Variance 
##     0.3391759402    -0.0020608813     0.0003365739
LM2 <- lm.LMtests(chi.ols2, W, test="all")
print(LM2)
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
## 
## LMerr = 337.22, df = 1, p-value < 2.2e-16
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
## 
## LMlag = 339.61, df = 1, p-value < 2.2e-16
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
## 
## RLMerr = 2.156, df = 1, p-value = 0.142
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
## 
## RLMlag = 4.5498, df = 1, p-value = 0.03292
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = property2 ~ est_fcs_rt + oomedval_lg, data =
## chi.poly@data)
## weights: W
## 
## SARMA = 341.77, df = 2, p-value < 2.2e-16
chi.sar2 <- lagsarlm(property2 ~ est_fcs_rt + oomedval_lg, data=chi.poly@data, W)
summary(chi.sar2)
## 
## Call:
## lagsarlm(formula = property2 ~ est_fcs_rt + oomedval_lg, data = chi.poly@data, 
##     listw = W)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -814.611 -162.156  -54.249  102.418 2343.951 
## 
## Type: lag 
## Coefficients: (numerical Hessian approximate standard errors) 
##             Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -62.5149    58.8557 -1.0622 0.2881573
## est_fcs_rt   10.2921     2.6471  3.8880 0.0001011
## oomedval_lg  14.3355     4.7757  3.0017 0.0026846
## 
## Rho: 0.56477, LR test value: 202.48, p-value: < 2.22e-16
## Approximate (numerical Hessian) standard error: 0.034758
##     z-value: 16.249, p-value: < 2.22e-16
## Wald statistic: 264.03, p-value: < 2.22e-16
## 
## Log likelihood: -6388.386 for lag model
## ML residual variance (sigma squared): 84726, (sigma: 291.08)
## Number of observations: 897 
## Number of parameters estimated: 5 
## AIC: 12787, (AIC for lm: 12987)
chi.poly@data$chi.ols2.res <- resid(chi.ols2) #residuals ols
chi.poly@data$chi.sar2.res <- resid(chi.sar2) #residual sar
sp::spplot(
  chi.poly, 
  "chi.ols2.res", 
  at = seq(min(chi.poly@data$chi.ols2.res,na.rm=TRUE), max(chi.poly@data$chi.ols2.res,na.rm=TRUE), length=12),
  col.regions = rev(brewer.pal(11,"RdBu"))
)

sp::spplot(
  chi.poly, 
  "chi.sar2.res", 
  at = seq(min(chi.poly@data$chi.sar2.res,na.rm=TRUE), max(chi.poly@data$chi.sar2.res,na.rm=TRUE), length=12),
  col.regions = rev(brewer.pal(11,"RdBu"))
)