Exercise 9 - Spatial Lag Material

Load Data & Libraries

library(classInt)
## Loading required package: class
## Loading required package: e1071
library(spdep)
## Loading required package: sp
## Loading required package: boot
## Loading required package: Matrix
## Loading required package: lattice
## Attaching package: 'lattice'
## The following object(s) are masked from 'package:boot':
## 
## melanoma
## Loading required package: MASS
## Loading required package: nlme
## Loading required package: maptools
## Loading required package: foreign
## Loading required package: grid
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
## Loading required package: deldir
## deldir 0.0-21
## Loading required package: coda
## Loading required package: splines
library(RColorBrewer)
library(gstat)

poverty <- readShapePoly("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/poverty/poverty.shp")

Create a Variogram

# Variogram shows how the variance changes with lag distance from location
# of focus.
plot(variogram(poverty$PROPOV ~ 1, locations = coordinates(poverty), data = poverty, 
    cloud = F), type = "b", pch = 16, main = "Variogram of % Child Poverty")

plot of chunk unnamed-chunk-2

Weights for Spatial Autocorrelation

coords <- coordinates(poverty)  # coordinates function extracts coordinates
FIPS <- row.names(as(poverty, "data.frame"))
pov_kn6 <- knn2nb(knearneigh(coords, k = 6), row.names = FIPS)
pov_kn6_wb <- nb2listw(pov_kn6, style = "B")
pov_kn6_mor <- moran.test(poverty$PROPOV, listw = pov_kn6_wb, zero.policy = T)
pov_kn6_mor  # 0.611 = Moran's I -- high spatial autocorrelation.
## 
##  Moran's I test under randomisation
## 
## data:  poverty$PROPOV  
## weights: pov_kn6_wb  
##  
## Moran I statistic standard deviate = 60.92, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Moran I statistic       Expectation          Variance 
##         0.6107716        -0.0003220         0.0001006

## Test of significance using Monte Carlo Simulation:
povMoranSim <- moran.mc(poverty$PROPOV, listw = pov_kn6_wb, nsim = 999)
povMoranSim  # The real observation is significantly different from modeled Moran's I statistics.
## 
##  Monte-Carlo simulation of Moran's I
## 
## data:  poverty$PROPOV 
## weights: pov_kn6_wb  
## number of simulations + 1: 1000 
##  
## statistic = 0.6108, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
names(povMoranSim)  # povMoranSim$res is the Moran's I result
## [1] "statistic"   "parameter"   "p.value"     "alternative" "method"     
## [6] "data.name"   "res"
# Plot the Moran's I Simulation Results
hist(povMoranSim$res, breaks = 50, main = "Histogram of Moran's I Simulations", 
    xlab = "Moran's I Stat", ylab = "Frequency")

plot of chunk unnamed-chunk-4

# So we can confidently say that this Moran's I statistic is the real
# deal. There is spatial autocorrelation!

Models

lm1 <- lm(PROPOV ~ PROAUNEM + PROFEMHH + PROHSLS + PROBLCK + PROHISP, data = poverty)
summary(lm1)
## 
## Call:
## lm(formula = PROPOV ~ PROAUNEM + PROFEMHH + PROHSLS + PROBLCK + 
##     PROHISP, data = poverty)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.1937 -0.0429 -0.0072  0.0336  0.5298 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.14124    0.00807  -17.50   <2e-16 ***
## PROAUNEM     1.24605    0.04646   26.82   <2e-16 ***
## PROFEMHH     0.35913    0.03066   11.71   <2e-16 ***
## PROHSLS      0.30492    0.01138   26.79   <2e-16 ***
## PROBLCK      0.11967    0.01226    9.76   <2e-16 ***
## PROHISP      0.22029    0.01044   21.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.0624 on 3101 degrees of freedom
## Multiple R-squared: 0.643,   Adjusted R-squared: 0.642 
## F-statistic: 1.12e+03 on 5 and 3101 DF,  p-value: <2e-16
laglm1 <- lagsarlm(PROPOV ~ PROAUNEM + PROFEMHH + PROHSLS + PROBLCK + PROHISP, 
    data = poverty, pov_kn6_wb)
summary(laglm1)
## 
## Call:
## lagsarlm(formula = PROPOV ~ PROAUNEM + PROFEMHH + PROHSLS + PROBLCK + 
##     PROHISP, data = poverty, listw = pov_kn6_wb)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.1849654 -0.0328936 -0.0069813  0.0255106  0.5020163 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error  z value Pr(>|z|)
## (Intercept) -0.1628554  0.0065446 -24.8839   <2e-16
## PROAUNEM     0.7590961  0.0403112  18.8309   <2e-16
## PROFEMHH     0.4388358  0.0248662  17.6479   <2e-16
## PROHSLS      0.2196769  0.0095882  22.9112   <2e-16
## PROBLCK      0.0015337  0.0103833   0.1477   0.8826
## PROHISP      0.1241931  0.0088771  13.9904   <2e-16
## 
## Rho: 0.08705, LR test value: 1147, p-value: < 2.22e-16
## Asymptotic standard error: 0.002274
##     z-value: 38.27, p-value: < 2.22e-16
## Wald statistic: 1465, p-value: < 2.22e-16
## 
## Log likelihood: 4788 for lag model
## ML residual variance (sigma squared): 0.002557, (sigma: 0.05057)
## Number of observations: 3107 
## Number of parameters estimated: 8 
## AIC: -9559, (AIC for lm: -8414)
## LM test for residual autocorrelation
## test value: 341.7, p-value: < 2.22e-16

What if scenarios…

poverty.new <- poverty  #copy the data frame (so we don't mess up the original)

# Find a county name:
names(poverty.new)
##  [1] "NAME"     "FIPS"     "MET"      "PROPOV"   "WGHT"     "PROBLCK" 
##  [7] "PROHISP"  "SOUTH"    "PROHSLS"  "PROWKCO"  "PROFEMHH" "PROEXTR" 
## [13] "PRONMAN"  "PROMSERV" "PROPSERV" "PROAUNEM" "PROMUNDR" "XCOORD"  
## [19] "YCOORD"   "X2"       "Y2"
head(poverty.new$NAME)
## [1] Lake of the Woods Ferry             Stevens           Okanogan         
## [5] Pend Oreille      Boundary         
## 1801 Levels: Abbeville Acadia Accomack Ada Adair Adams Addison ... Ziebach

# Change the unemployment rate
poverty.new@data[poverty.new@data$NAME == "Addison", "PROAUNEM"] <- 0.75

# The original predicted values
orig.pred <- as.data.frame(predict(laglm1))

# The predicted values with the new unemployment rate in Alabama
new.pred <- as.data.frame(predict(laglm1, newdata = poverty.new, listw = pov_kn6_wb))

# the difference between the predicted values
jCoAL_effect <- new.pred$fit - orig.pred$fit
el <- data.frame(name = poverty$NAME, diff_in_pred_PROPOV = jCoAL_effect)  # To simplify things, placing the differences in a new data frame.
poverty.new$jee <- el$diff_in_pred_PROPOV  # And placing them in the origianl, as well

# sort counties by absolute value of the change in predicted PPOV
el <- el[rev(order(abs(el$diff_in_pred_PROPOV))), ]
el[1:10, ]  #show the top 10 counties
##        name diff_in_pred_PROPOV
## 352 Addison              0.5924
## 584  Thomas             -0.1300
## 582  Hooker             -0.1205
## 632   Logan             -0.1160
## 496  Cherry             -0.1145
## 708 Lincoln             -0.1135
## 458 Bennett             -0.1080
## 420   Tripp             -0.1066
## 459    Todd             -0.1048
## 580   Grant             -0.1044

Map effects of that change to the data

# Make breaks to show positive, negative, and netural effects:
povbreaks <- c(min(poverty.new$jee), -0.03, 0.03, max(poverty.new$jee))
# Create labels for those classes:
povlabels <- c("Negative effect (greater than .03)", "No Effect (effect -.03 to .03)", 
    "Positive effect (greater than .03)")
# Assign colors to the classes:
pov_np <- findInterval(poverty.new$jee, povbreaks)
povcolors <- c("red", "yellow", "blue")

# Draw Map
plot(poverty.new, col = povcolors[pov_np])
mtext("Effects of a change in Addison County, VT (set unemployment rate = 0.75)\n on predicted values in a spatial lag model", 
    side = 3, line = 1)
legend("topleft", legend = povlabels, fill = povcolors, bty = "n")

plot of chunk unnamed-chunk-7

Magnitude of Change Map

pal5 <- brewer.pal(6, "Spectral")
cats5 <- classIntervals(poverty.new$jee, n = 5, style = "jenks")
colors5 <- findColours(cats5, pal5)
plot(poverty.new, col = colors5)
legend("topleft", legend = round(cats5$brks, 2), fill = pal5, bty = "n")
mtext("Effects of a change in Addison County, VT (set unemployment rate = 0.75)\n on predicted values in a spatial lag model", 
    side = 3, line = 1)

plot of chunk unnamed-chunk-8