Key Concepts covered in the lecture include (Same as before):
1. Regression requires spatial dependence (no autocorrelation!)
2. Moran's I Significane Testing through simulation
3. Moran's Scatterplot
4. Local Indicators of Spatial Autocorrelation (LISA)
5. Correlogram
6. Semivariograms
7. The Spatial Lag Regression Model
The in-class exercise focuses on creating a spatial autoregressive model.
Part 1: Create a map of PPOV
library(RColorBrewer)
library(classInt)
## Loading required package: class
## Loading required package: e1071
library(spdep)
## Loading required package: sp
## Loading required package: boot
## Loading required package: Matrix
## Loading required package: lattice
## Attaching package: 'lattice'
## The following object(s) are masked from 'package:boot':
##
## melanoma
## Loading required package: MASS
## Loading required package: nlme
## Loading required package: maptools
## Loading required package: foreign
## Loading required package: grid
## Checking rgeos availability: TRUE
## Loading required package: deldir
## deldir 0.0-21
## Loading required package: coda
## Loading required package: splines
load("H:/Quant/inClassExercises/InClassExerciseData/soco.rda")
breaks <- classIntervals(soco$PPOV, n = 3, style = "quantile")
labels1 <- c("Low Poverty", "medium poverty", "positive residuals")
colors1 <- brewer.pal(3, "Reds")
plot(soco, col = colors1)
mtext("Percent Poverty", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = labels1, fill = colors1, bty = "n")
Part 2: Create a non-spatial model for predicting PPOV and do some diagnostics
# Create a model using
names(soco)
## [1] "CNTY_ST" "STUSAB" "FIPS" "YCOORD" "XCOORD" "SQYCORD"
## [7] "SQXCORD" "XYCOORD" "PPOV" "PHSP" "PFHH" "PWKCO"
## [13] "PHSLS" "PUNEM" "PUDEM" "PEXTR" "PPSRV" "PMSRV"
## [19] "PNDMFG" "PNHSPW" "PMNRTY" "LO_POV" "PFRN" "PNAT"
## [25] "PBLK" "P65UP" "PDSABL" "METRO" "PERPOV" "OTMIG"
## [31] "SQOTMIG" "BINMIG" "INCARC" "BINCARC" "SQRTPPOV" "SQRTUNEM"
## [37] "SQRTPFHH" "LOGHSPLS" "PHSPLUS"
soco_OLS <- lm(PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco) #Create a non-spatial OLS model
summary(soco_OLS) #All variables are significant
##
## Call:
## lm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26570 -0.03711 -0.00785 0.03294 0.28733
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05544 0.00891 -6.22 6.6e-10 ***
## PFHH 0.45748 0.04941 9.26 < 2e-16 ***
## PUNEM 2.18109 0.07334 29.74 < 2e-16 ***
## PBLK -0.05919 0.01878 -3.15 0.0017 **
## P65UP 0.38023 0.04276 8.89 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0576 on 1382 degrees of freedom
## Multiple R-squared: 0.603, Adjusted R-squared: 0.601
## F-statistic: 524 on 4 and 1382 DF, p-value: <2e-16
# Map the residuals
soco$resid <- resid(soco_OLS) #Create a new field for the residual values
breaks <- c(min(soco$resid), 0, max(soco$resid))
labels <- c("negative residuals", "positive residuals")
np <- findInterval(soco$resid, breaks)
colors <- c("red", "blue")
# dev.off()
plot(soco, col = colors[np], )
mtext("OLS Residuals", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = labels, fill = colors, bty = "n")
# Is there autocorrelation in the residuals?
soco_nbq <- poly2nb(soco) #Queens neighbors
soco_nbq_w <- nb2listw(soco_nbq) #row standardized weights matrix
moran.mc(soco$resid, soco_nbq_w, nsim = 999) #simulated Moran's I.
##
## Monte-Carlo simulation of Moran's I
##
## data: soco$resid
## weights: soco_nbq_w
## number of simulations + 1: 1000
##
## statistic = 0.3609, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
Part 3: Fit a sptial model
soco_LAG <- lagsarlm(PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco, soco_nbq_w) #creates a spatial lag model (SAR)
summary(soco_LAG)
##
## Call:lagsarlm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco,
## listw = soco_nbq_w)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2457653 -0.0284360 -0.0028762 0.0262169 0.2374894
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.100260 0.007375 -13.5946 < 2.2e-16
## PFHH 0.429404 0.040246 10.6695 < 2.2e-16
## PUNEM 1.354637 0.065959 20.5374 < 2.2e-16
## PBLK -0.069046 0.015335 -4.5025 6.716e-06
## P65UP 0.291192 0.035210 8.2701 2.220e-16
##
## Rho: 0.5172, LR test value: 491.5, p-value: < 2.22e-16
## Asymptotic standard error: 0.02118
## z-value: 24.42, p-value: < 2.22e-16
## Wald statistic: 596.3, p-value: < 2.22e-16
##
## Log likelihood: 2239 for lag model
## ML residual variance (sigma squared): 0.002192, (sigma: 0.04682)
## Number of observations: 1387
## Number of parameters estimated: 7
## AIC: -4464, (AIC for lm: -3974)
## LM test for residual autocorrelation
## test value: 4.85, p-value: 0.027651
# Check the residuals for autocorrelation
moran.mc(soco_LAG$resid, soco_nbq_w, nsim = 999) #yay! the residuals aren't significantly autocorrelated!
##
## Monte-Carlo simulation of Moran's I
##
## data: soco_LAG$resid
## weights: soco_nbq_w
## number of simulations + 1: 1000
##
## statistic = 0.0272, observed rank = 966, p-value = 0.034
## alternative hypothesis: greater
# Compare the spatial and non-spatial model
anova(soco_LAG, soco_OLS)
## Model df AIC logLik Test L.Ratio p-value
## soco_LAG 1 7 -4464 2239 1
## soco_OLS 2 6 -3974 1993 2 491 0