library(tidyverse)
library(rgdal)
library(maptools)
library(sp)
library(raster)
library(sf)
library(tmap)
library(spatstat)
library(spdep)

Question 1. Cafes in the downtown of Boston and San Francisco

We want to determine whether the location of cafes in Boston and San Francisco follows Complete Spatial Random before and after taking into account percent 25-34 college grduates. Below, you will see complete scripts for Boston. For more detail, review Hypothesis testing.

# read in input objects 
boston_cbd_cafes_ppp <- read_rds("boston_cbd_cafes_ppp.rds") # planar point pattern object 
boston_innercity_owin <- read_rds("boston_innercity_owin.rds") # 5 miles from the cbd window object  
pctyoung <- read_rds("boston_pctyoung.rds") # % 25-34 college graduates im object 

# Average distance to the nearest neighbor 
ann.p <- mean(nndist(boston_cbd_cafes_ppp, k = 1))
ann.p
## [1] 125.6082
# Monte Carlo: Null hypothesis is complete spatial random 
n <- 599L # Number of simulations
ann.r <- vector(length = n) # Create an empty object to be used to store simulated ANN values

for (i in 1:n){
  rand.p   <- rpoint(n=boston_cbd_cafes_ppp$n, win=boston_innercity_owin) # Generate random point locations
  ann.r[i] <- mean(nndist(rand.p, k=1))  # Tally the ANN values
}

hist(ann.r, main=NULL, las=1, breaks=40, col="bisque", xlim=range(ann.p, ann.r))
abline(v=ann.p, col="blue")

N.greater <- sum(ann.r > ann.p)
p <- min(N.greater + 1, n + 1 - N.greater)/(n+1)
p
## [1] 0.001666667
# Monte Carlo: Null hypothesis is complete spatial random, after taking % 25-34 college graduates into account 
n <- 599L
ann.r <- vector(length=n)

for (i in 1:n){
  rand.p   <- rpoint(n=boston_cbd_cafes_ppp$n, f=pctyoung) # f defines the probability density of the points
  ann.r[i] <- mean(nndist(rand.p, k=1))
}

hist(ann.r, main=NULL, las=1, breaks=40, col="bisque", xlim=range(ann.p, ann.r))
abline(v=ann.p, col="blue")

N.greater <- sum(ann.r > ann.p)
p <- min(N.greater + 1, n + 1 - N.greater)/(n+1)
p
## [1] 0.001666667

All input files are available on Canvas. Conduct the same analysis for San Francisco, and determine whether the location of cafes follow Complete Spatial Random (CSR) before and after taking into account percent 25-34 college graduates.

# read in input objects 
sanf_cbd_cafes_ppp <- read_rds("sanf_cbd_cafes_ppp.rds") # planar point pattern object 
sanf_innercity_owin <- read_rds("sanf_innercity_owin.rds") # 5 miles from the cbd window object  
sanf_pctyoung <- read_rds("sanf_pctyoung.rds") # % 25-34 college graduates im object 

Question 2. Residential segregation by race, income, and educational attainment in Fulton County, GA

For this question, we compute global Moran’s I measures to quantify the extent to which residential segregation is present in Fulton County, GA. The US Census American Community Survey 2006-2010 5-year estimates are already processed and avaiable for your analysis.

Below, you will see complete scripts that compute the global Moran’s I statistic for log(median household income), both in hard (i.e., manual) and easy ways. For more information, refer to Spatial Autocorrelation.

# three variables are available: % non-Hispanic Whte, % college graduates, & log(median household income)
# these variables are already standardized (i.e., mean 0 standard deviation 1)
fulton2010 <- read_rds("fulton2010.rds") %>% as('Spatial')

# plot neighbor links 
nb <- poly2nb(fulton2010, queen = TRUE)
xy <- sp::coordinates(fulton2010)
plot(fulton2010, border = "grey", lwd=1)
plot(nb, xy, col="red", lwd=0.1, add=TRUE)

# compute spatial weights 
lw <- nb2listw(nb, style = "W", zero.policy = TRUE)
# compute the spatially weighted mean for the neighbors of each tract 
Inc.lag <- lag.listw(lw, fulton2010$ln_medinc)
# run regression and plot 
M <- lm(Inc.lag ~ fulton2010$ln_medinc)
plot(Inc.lag ~ fulton2010$ln_medinc, pch = 20, asp = 1, las = 1)
abline(M, col = "red")

coef(M)[2]
## fulton2010$ln_medinc 
##            0.2475017
# run Monte Carlo simulation 
# this helps determine the statistical significance of the computed Moran's I 
n <- 599L   # Define the number of simulations
I.r <- vector(length=n)  # Create an empty vector

for (i in seq_along(I.r)){
  # Randomly shuffle income values
  x <- sample(fulton2010$ln_medinc, replace=FALSE)
  # Compute new set of lagged values
  x.lag <- lag.listw(lw, x)
  # Compute the regression slope and store its value
  M.r    <- lm(x.lag ~ x)
  I.r[i] <- coef(M.r)[2]
}

hist(I.r, main=NULL, xlab="Moran's I", las=1)
abline(v=coef(M)[2], col="red")

N.greater <- sum(coef(M)[2] > I.r)
p <- min(N.greater + 1, n + 1 - N.greater) / (n + 1)
p # your value might be slightly different from the printed value below b/c of the random simulation 
## [1] 0.003333333
moran.test(fulton2010$ln_medinc, lw)
## 
##  Moran I test under randomisation
## 
## data:  fulton2010$ln_medinc  
## weights: lw    
## 
## Moran I statistic standard deviate = 6.8605, p-value = 3.431e-12
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.247501715      -0.004926108       0.001353821
MC<- moran.mc(fulton2010$ln_medinc, lw, nsim = 599)
MC
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  fulton2010$ln_medinc 
## weights: lw  
## number of simulations + 1: 600 
## 
## statistic = 0.2475, observed rank = 600, p-value = 0.001667
## alternative hypothesis: greater
plot(MC, main="", las=1)

Do the same task on % non-Hispanic White and % college graduates.

Question 3. The count of property crime in the City of Chicago, IL

  1. Complete the tutorial of Spatial Regression.
  2. After you could successfully replicate two spatial regression models on violent crimes, create the following two variables, oomedval_lg and property2.
  3. Now, estimate OLS and spatial models with a new specification below.
  4. Create a map by tmap functions, which displays the distribution of property crimes (property2) in Chicago.
  5. Use the same definition for the identification of neighbors (e.g., queen=TRUE) and the same spatial weight matrix as those in the tutorial.
  6. Determine which spatial model is more appropriate by lm.LMtests.
  7. Create two maps of Chicago with spplot, one for the OLS residuals and the other for the residuals from the spatial model of your choice. Discuss their differences.
# the median home value of owner-occupied units for each tract  
chi.poly@data$oomedval_lg <- log(chi.poly@data$oomedval+1) 
# recode NA to zero 
chi.poly@data$property2 <- ifelse(is.na(chi.poly@data$property) == FALSE, chi.poly@data$property, 0) 
chi.ols2 <-lm(property2 ~ est_fcs_rt + oomedval_lg, data=chi.poly@data)
summary(chi.ols2)