Lecture: Spatial Autocorrelation and Regression; part 2

Key Concepts covered in the lecture include (Same as before):
1. Regression requires spatial dependence (no autocorrelation!)
2. Moran's I Significane Testing through simulation
3. Moran's Scatterplot
4. Local Indicators of Spatial Autocorrelation (LISA)
5. Correlogram
6. Semivariograms
7. The Spatial Lag Regression Model

The in-class exercise focuses on creating a spatial autoregressive model.

Part 1: Create a map of PPOV

library(RColorBrewer)
library(classInt)

## Loading required package: class

## Loading required package: e1071

library(spdep)

## Loading required package: sp

## Loading required package: boot

## Loading required package: Matrix

## Loading required package: lattice

## Attaching package: 'lattice'

## The following object(s) are masked from 'package:boot':
## 
## melanoma

## Loading required package: MASS

## Loading required package: nlme

## Loading required package: maptools

## Loading required package: foreign

## Loading required package: grid

## Checking rgeos availability: TRUE

## Loading required package: deldir

## deldir 0.0-21

## Loading required package: coda

## Loading required package: splines

load("H:/Quant/inClassExercises/InClassExerciseData/soco.rda")

breaks <- classIntervals(soco$PPOV, n = 3, style = "quantile")
labels1 <- c("Low Poverty", "medium poverty", "positive residuals")
colors1 <- brewer.pal(3, "Reds")
plot(soco, col = colors1)
mtext("Percent Poverty", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = labels1, fill = colors1, bty = "n")

plot of chunk unnamed-chunk-1

Part 2: Create a non-spatial model for predicting PPOV and do some diagnostics

# Create a model using
names(soco)

##  [1] "CNTY_ST"  "STUSAB"   "FIPS"     "YCOORD"   "XCOORD"   "SQYCORD" 
##  [7] "SQXCORD"  "XYCOORD"  "PPOV"     "PHSP"     "PFHH"     "PWKCO"   
## [13] "PHSLS"    "PUNEM"    "PUDEM"    "PEXTR"    "PPSRV"    "PMSRV"   
## [19] "PNDMFG"   "PNHSPW"   "PMNRTY"   "LO_POV"   "PFRN"     "PNAT"    
## [25] "PBLK"     "P65UP"    "PDSABL"   "METRO"    "PERPOV"   "OTMIG"   
## [31] "SQOTMIG"  "BINMIG"   "INCARC"   "BINCARC"  "SQRTPPOV" "SQRTUNEM"
## [37] "SQRTPFHH" "LOGHSPLS" "PHSPLUS"


soco_OLS <- lm(PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco)  #Create a non-spatial OLS model
summary(soco_OLS)  #All variables are significant

## 
## Call:
## lm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26570 -0.03711 -0.00785  0.03294  0.28733 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.05544    0.00891   -6.22  6.6e-10 ***
## PFHH         0.45748    0.04941    9.26  < 2e-16 ***
## PUNEM        2.18109    0.07334   29.74  < 2e-16 ***
## PBLK        -0.05919    0.01878   -3.15   0.0017 ** 
## P65UP        0.38023    0.04276    8.89  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.0576 on 1382 degrees of freedom
## Multiple R-squared: 0.603,   Adjusted R-squared: 0.601 
## F-statistic:  524 on 4 and 1382 DF,  p-value: <2e-16


# Map the residuals
soco$resid <- resid(soco_OLS)  #Create a new field for the residual values

breaks <- c(min(soco$resid), 0, max(soco$resid))
labels <- c("negative residuals", "positive residuals")
np <- findInterval(soco$resid, breaks)
colors <- c("red", "blue")
# dev.off()
plot(soco, col = colors[np], )
mtext("OLS Residuals", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = labels, fill = colors, bty = "n")

plot of chunk unnamed-chunk-2



# Is there autocorrelation in the residuals?
soco_nbq <- poly2nb(soco)  #Queens neighbors
soco_nbq_w <- nb2listw(soco_nbq)  #row standardized weights matrix

moran.mc(soco$resid, soco_nbq_w, nsim = 999)  #simulated Moran's I.

## 
##  Monte-Carlo simulation of Moran's I
## 
## data:  soco$resid 
## weights: soco_nbq_w  
## number of simulations + 1: 1000 
##  
## statistic = 0.3609, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater

Part 3: Fit a sptial model

soco_LAG <- lagsarlm(PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco, soco_nbq_w)  #creates a spatial lag model (SAR)
summary(soco_LAG)

## 
## Call:lagsarlm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco, 
##     listw = soco_nbq_w)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2457653 -0.0284360 -0.0028762  0.0262169  0.2374894 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error  z value  Pr(>|z|)
## (Intercept) -0.100260   0.007375 -13.5946 < 2.2e-16
## PFHH         0.429404   0.040246  10.6695 < 2.2e-16
## PUNEM        1.354637   0.065959  20.5374 < 2.2e-16
## PBLK        -0.069046   0.015335  -4.5025 6.716e-06
## P65UP        0.291192   0.035210   8.2701 2.220e-16
## 
## Rho: 0.5172, LR test value: 491.5, p-value: < 2.22e-16
## Asymptotic standard error: 0.02118
##     z-value: 24.42, p-value: < 2.22e-16
## Wald statistic: 596.3, p-value: < 2.22e-16
## 
## Log likelihood: 2239 for lag model
## ML residual variance (sigma squared): 0.002192, (sigma: 0.04682)
## Number of observations: 1387 
## Number of parameters estimated: 7 
## AIC: -4464, (AIC for lm: -3974)
## LM test for residual autocorrelation
## test value: 4.85, p-value: 0.027651


# Check the residuals for autocorrelation
moran.mc(soco_LAG$resid, soco_nbq_w, nsim = 999)  #yay! the residuals aren't significantly autocorrelated!

## 
##  Monte-Carlo simulation of Moran's I
## 
## data:  soco_LAG$resid 
## weights: soco_nbq_w  
## number of simulations + 1: 1000 
##  
## statistic = 0.0272, observed rank = 966, p-value = 0.034
## alternative hypothesis: greater


# Compare the spatial and non-spatial model
anova(soco_LAG, soco_OLS)

##          Model df   AIC logLik Test L.Ratio p-value
## soco_LAG     1  7 -4464   2239    1                
## soco_OLS     2  6 -3974   1993    2     491       0