Spatial lag and spatial error model

load("C:\\Users\\QINGHUAN\\Dropbox\\GEOG 5023 - Spring 2013\\Data\\soco.rda")
library(sp)
## Warning: package 'sp' was built under R version 2.15.3
library(maptools)
## Warning: package 'maptools' was built under R version 2.15.3
## Loading required package: foreign
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: TRUE
library(rgdal)
## Warning: package 'rgdal' was built under R version 2.15.3
## rgdal: version: 0.8-8, (SVN revision 463) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.9.2, released 2012/10/08 Path to GDAL shared files:
## C:/Users/QINGHUAN/Documents/R/win-library/2.15/rgdal/gdal GDAL does not
## use iconv for recoding strings. Loaded PROJ.4 runtime: Rel. 4.7.1, 23
## September 2009, [PJ_VERSION: 470] Path to PROJ.4 shared files:
## C:/Users/QINGHUAN/Documents/R/win-library/2.15/rgdal/proj
library(spdep)
## Warning: package 'spdep' was built under R version 2.15.3
## Loading required package: boot
## Warning: package 'boot' was built under R version 2.15.3
## Attaching package: 'boot'
## The following object(s) are masked from 'package:lattice':
## 
## melanoma
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 2.15.3
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 2.15.3
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 2.15.3
## Loading required package: deldir
## deldir 0.0-21
## Loading required package: coda
## Warning: package 'coda' was built under R version 2.15.3
## Loading required package: splines
library(classInt)
## Warning: package 'classInt' was built under R version 2.15.3
## Loading required package: class
## Warning: package 'class' was built under R version 2.15.3
## Loading required package: e1071
## Warning: package 'e1071' was built under R version 2.15.3
library(RColorBrewer)

# create neighbors' lists
soco_nbq <- poly2nb(soco)  #Queen's neighborhood
soco_nbq_w <- nb2listw(soco_nbq)  #row standardization
soco_LAG <- lagsarlm(PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco, soco_nbq_w)
lm1 <- lm(PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco)
anova(soco_LAG, lm1)  #LRT test;lag model has lower AIC, so it's better
##          Model df   AIC logLik Test L.Ratio p-value
## soco_LAG     1  7 -4464   2239    1                
## lm1          2  6 -3974   1993    2     491       0
# there's a significant difference between the models, we accept this as
# evidence that the spatial lag is warranted
summary(soco_LAG)
## 
## Call:lagsarlm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco, 
##     listw = soco_nbq_w)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2457653 -0.0284360 -0.0028762  0.0262169  0.2374894 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error  z value  Pr(>|z|)
## (Intercept) -0.100260   0.007375 -13.5946 < 2.2e-16
## PFHH         0.429404   0.040246  10.6695 < 2.2e-16
## PUNEM        1.354637   0.065959  20.5374 < 2.2e-16
## PBLK        -0.069046   0.015335  -4.5025 6.716e-06
## P65UP        0.291192   0.035210   8.2701 2.220e-16
## 
## Rho: 0.5172, LR test value: 491.5, p-value: < 2.22e-16
## Asymptotic standard error: 0.02118
##     z-value: 24.42, p-value: < 2.22e-16
## Wald statistic: 596.3, p-value: < 2.22e-16
## 
## Log likelihood: 2239 for lag model
## ML residual variance (sigma squared): 0.002192, (sigma: 0.04682)
## Number of observations: 1387 
## Number of parameters estimated: 7 
## AIC: -4464, (AIC for lm: -3974)
## LM test for residual autocorrelation
## test value: 4.85, p-value: 0.027651

# Exploring equilibrium effects What would happen to poverty in the
# Southeast if the unemployment rate rose from 6% to 75% in the Jefferson
# County,Alabama?
soco.new <- soco  #copy the data frame(so we don't mess up the original)
# Change the unemployment rate
soco.new@data[soco.new@data$CNTY_ST == "Jefferson County AL", "PUNEM"] <- 0.75
# The original predicted values
orig.pred <- as.data.frame(predict(soco_LAG))
# The predicted values with the new unemployment rate in Alabama
new.pred <- as.data.frame(predict(soco_LAG, newdata = soco.new, listw = soco_nbq_w))
# The difference between the predicted values
jCoAL_effect <- new.pred$fit - orig.pred$fit
# create a data frame to hold the county names and the effect of the
# changes to Jefferson Cty,AL.
el <- data.frame(name = soco$CNTY_ST, diff_in_pred_PPOV = jCoAL_effect)
# also save the results into the spatial data frame so that we can map
# them
soco.new$jee <- el$diff_in_pred_PPOV
# sort counties by absolute value of the change in predicted PPOV
el <- el[rev(order(abs(el$diff_in_pred_PPOV))), ]
el[1:10, ]  #show the top 10 counties
##                      name diff_in_pred_PPOV
## 1050   El Paso County  TX          -0.08629
## 1197    Sutton County  TX          -0.08108
## 437        Lee County  KY          -0.07285
## 1212 Val Verde County  TX          -0.06353
## 385  Breathitt County  KY          -0.06268
## 1113    Kimble County  TX          -0.06169
## 1103  Jim Hogg County  TX          -0.06045
## 831      Texas County  OK          -0.06034
## 491      Wolfe County  KY          -0.05917
## 1042   Dickens County  TX          -0.05778

# Mapping these changes
breaks <- c(min(soco.new$jee), -0.03, 0.03, max(soco.new$jee))
labels <- c("Negative effect(lower than -.03)", "No Effect(effect -.03 to .03", 
    "Positive effect(greater than .03)")
np <- findInterval(soco.new$jee, breaks)
colors <- c("red", "yellow", "blue")
# Draw map
plot(soco.new, col = colors[np])
mtext("Effects of a change in Jefferson County,AL(set PUNEM=.75)\n on predicted values in a spatial lag model", 
    side = 3, line = 1)
legend("topleft", legend = labels, fill = colors, bty = "n")

plot of chunk unnamed-chunk-2


# Map the magnitude of the changes caused by messing with Jefferson
# County,AL
pal5 <- brewer.pal(6, "Spectral")
cats5 <- classIntervals(soco.new$jee, n = 5, style = "jenks")
colors5 <- findColours(cats5, pal5)
plot(soco.new, col = colors5)
legend("topleft", legend = round(cats5$brks, 2), fill = pal5, bty = "n")
mtext("Effects of a change in Jefferson County,AL(set PUNEM=.75)\n on predicted values in a spatial lag model", 
    side = 3, line = 1)

plot of chunk unnamed-chunk-2


# These maps and effects for individual counties may be useful,they only
# tell us about a change in one place. The impacts() function gives us
# something like OLS regression coefficients for a spatial lag model. it
# tells us the direct(local),indirect(spill-over),and total effect of a
# unit change in each of the predictor variables. the changes reported by
# impacts are the global average impact.
impacts(soco_LAG, listw = soco_nbq_w)
## Impact measures (lag, exact):
##         Direct Indirect   Total
## PFHH   0.45695  0.43243  0.8894
## PUNEM  1.44154  1.36419  2.8057
## PBLK  -0.07348 -0.06953 -0.1430
## P65UP  0.30987  0.29325  0.6031
# The output tells us that a 100% increase in unemployment leads to a 280%
# increase in childhood poverty rate

# try a BP test
bptest.sarlm(soco_LAG)
## 
##  studentized Breusch-Pagan test
## 
## data:  
## BP = 50.46, df = 4, p-value = 2.901e-10
# null:error term is homoscedasticity; reject null and accept
# alt:heteroscedasticity run a spatial error model
soco_err <- errorsarlm(PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco, listw = soco_nbq_w)
summary(soco_err)
## 
## Call:
## errorsarlm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco, 
##     listw = soco_nbq_w)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.136985 -0.028201 -0.004545  0.023940  0.221906 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 0.0070874  0.0095400  0.7429    0.4575
## PFHH        0.3649742  0.0405868  8.9924 < 2.2e-16
## PUNEM       1.1472020  0.0711971 16.1131 < 2.2e-16
## PBLK        0.0849301  0.0206069  4.1215 3.765e-05
## P65UP       0.3516854  0.0431300  8.1541 4.441e-16
## 
## Lambda: 0.7466, LR test value: 526.3, p-value: < 2.22e-16
## Asymptotic standard error: 0.02174
##     z-value: 34.34, p-value: < 2.22e-16
## Wald statistic: 1179, p-value: < 2.22e-16
## 
## Log likelihood: 2256 for error model
## ML residual variance (sigma squared): 0.001972, (sigma: 0.04440)
## Number of observations: 1387 
## Number of parameters estimated: 7 
## AIC: -4498, (AIC for lm: -3974)