library(tidyverse)
library(rgdal)
library(maptools)
library(sp)
library(raster)
library(sf)
library(tmap)
library(spatstat)
library(spdep)
We want to determine whether the location of cafes in Boston and San Francisco follows Complete Spatial Random before and after taking into account percent 25-34 college grduates. Below, you will see complete scripts for Boston. For more detail, review Hypothesis testing.
# read in input objects
boston_cbd_cafes_ppp <- read_rds("boston_cbd_cafes_ppp.rds") # planar point pattern object
boston_innercity_owin <- read_rds("boston_innercity_owin.rds") # 5 miles from the cbd window object
pctyoung <- read_rds("boston_pctyoung.rds") # % 25-34 college graduates im object
# Average distance to the nearest neighbor
ann.p <- mean(nndist(boston_cbd_cafes_ppp, k = 1))
ann.p
## [1] 125.6082
# Monte Carlo: Null hypothesis is complete spatial random
n <- 599L # Number of simulations
ann.r <- vector(length = n) # Create an empty object to be used to store simulated ANN values
for (i in 1:n){
rand.p <- rpoint(n=boston_cbd_cafes_ppp$n, win=boston_innercity_owin) # Generate random point locations
ann.r[i] <- mean(nndist(rand.p, k=1)) # Tally the ANN values
}
hist(ann.r, main=NULL, las=1, breaks=40, col="bisque", xlim=range(ann.p, ann.r))
abline(v=ann.p, col="blue")
N.greater <- sum(ann.r > ann.p)
p <- min(N.greater + 1, n + 1 - N.greater)/(n+1)
p
## [1] 0.001666667
# Monte Carlo: Null hypothesis is complete spatial random, after taking % 25-34 college graduates into account
n <- 599L
ann.r <- vector(length=n)
for (i in 1:n){
rand.p <- rpoint(n=boston_cbd_cafes_ppp$n, f=pctyoung) # f defines the probability density of the points
ann.r[i] <- mean(nndist(rand.p, k=1))
}
hist(ann.r, main=NULL, las=1, breaks=40, col="bisque", xlim=range(ann.p, ann.r))
abline(v=ann.p, col="blue")
N.greater <- sum(ann.r > ann.p)
p <- min(N.greater + 1, n + 1 - N.greater)/(n+1)
p
## [1] 0.001666667
All input files are available on Canvas. Conduct the same analysis for San Francisco, and determine whether the location of cafes follow Complete Spatial Random (CSR) before and after taking into account percent 25-34 college graduates.
# read in input objects
sanf_cbd_cafes_ppp <- read_rds("sanf_cbd_cafes_ppp.rds") # planar point pattern object
sanf_innercity_owin <- read_rds("sanf_innercity_owin.rds") # 5 miles from the cbd window object
sanf_pctyoung <- read_rds("sanf_pctyoung.rds") # % 25-34 college graduates im object
For this question, we compute global Moran’s I measures to quantify the extent to which residential segregation is present in Fulton County, GA. The US Census American Community Survey 2006-2010 5-year estimates are already processed and avaiable for your analysis.
Below, you will see complete scripts that compute the global Moran’s I statistic for log(median household income), both in hard (i.e., manual) and easy ways. For more information, refer to Spatial Autocorrelation.
# three variables are available: % non-Hispanic Whte, % college graduates, & log(median household income)
# these variables are already standardized (i.e., mean 0 standard deviation 1)
fulton2010 <- read_rds("fulton2010.rds") %>% as('Spatial')
# plot neighbor links
nb <- poly2nb(fulton2010, queen = TRUE)
xy <- sp::coordinates(fulton2010)
plot(fulton2010, border = "grey", lwd=1)
plot(nb, xy, col="red", lwd=0.1, add=TRUE)
# compute spatial weights
lw <- nb2listw(nb, style = "W", zero.policy = TRUE)
# compute the spatially weighted mean for the neighbors of each tract
Inc.lag <- lag.listw(lw, fulton2010$ln_medinc)
# run regression and plot
M <- lm(Inc.lag ~ fulton2010$ln_medinc)
plot(Inc.lag ~ fulton2010$ln_medinc, pch = 20, asp = 1, las = 1)
abline(M, col = "red")
coef(M)[2]
## fulton2010$ln_medinc
## 0.2475017
# run Monte Carlo simulation
# this helps determine the statistical significance of the computed Moran's I
n <- 599L # Define the number of simulations
I.r <- vector(length=n) # Create an empty vector
for (i in seq_along(I.r)){
# Randomly shuffle income values
x <- sample(fulton2010$ln_medinc, replace=FALSE)
# Compute new set of lagged values
x.lag <- lag.listw(lw, x)
# Compute the regression slope and store its value
M.r <- lm(x.lag ~ x)
I.r[i] <- coef(M.r)[2]
}
hist(I.r, main=NULL, xlab="Moran's I", las=1)
abline(v=coef(M)[2], col="red")
N.greater <- sum(coef(M)[2] > I.r)
p <- min(N.greater + 1, n + 1 - N.greater) / (n + 1)
p # your value might be slightly different from the printed value below b/c of the random simulation
## [1] 0.003333333
moran.test(fulton2010$ln_medinc, lw)
##
## Moran I test under randomisation
##
## data: fulton2010$ln_medinc
## weights: lw
##
## Moran I statistic standard deviate = 6.8605, p-value = 3.431e-12
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.247501715 -0.004926108 0.001353821
MC<- moran.mc(fulton2010$ln_medinc, lw, nsim = 599)
MC
##
## Monte-Carlo simulation of Moran I
##
## data: fulton2010$ln_medinc
## weights: lw
## number of simulations + 1: 600
##
## statistic = 0.2475, observed rank = 600, p-value = 0.001667
## alternative hypothesis: greater
plot(MC, main="", las=1)
Do the same task on % non-Hispanic White and % college graduates.
oomedval_lg and property2.tmap functions, which displays the distribution of property crimes (property2) in Chicago.queen=TRUE) and the same spatial weight matrix as those in the tutorial.lm.LMtests.spplot, one for the OLS residuals and the other for the residuals from the spatial model of your choice. Discuss their differences.# the median home value of owner-occupied units for each tract
chi.poly@data$oomedval_lg <- log(chi.poly@data$oomedval+1)
# recode NA to zero
chi.poly@data$property2 <- ifelse(is.na(chi.poly@data$property) == FALSE, chi.poly@data$property, 0)
chi.ols2 <-lm(property2 ~ est_fcs_rt + oomedval_lg, data=chi.poly@data)
summary(chi.ols2)