Lecture: Spatial Regression 2

Key Concepts covered in the lecture include (Same as before):
1. 2 (+1) types of spatial regression models
2. Methods of model selection
3. OLS and Autocorrelation in the Residuals
4. Spillover Effects
5. Hypothesis Tests for Spatial Models
6. Spatial Regimes Model
7. Spatial Expansion Model
8. Geographically Weighted Regression
9. Local vs. Global Models

The in-class exercise focuses on .

Part 1: Fit a spatial lag model

library(spdep)

## Loading required package: sp

## Loading required package: boot

## Loading required package: Matrix

## Loading required package: lattice

## Attaching package: 'lattice'

## The following object(s) are masked from 'package:boot':
## 
## melanoma

## Loading required package: MASS

## Loading required package: nlme

## Loading required package: maptools

## Loading required package: foreign

## Loading required package: grid

## Checking rgeos availability: TRUE

## Loading required package: deldir

## deldir 0.0-21

## Loading required package: coda

## Loading required package: splines


pov <- readShapePoly("E:/Quant/inClassExercises/InClassExerciseData/poverty/poverty.shp")

pov_ols <- lm(PROPOV ~ PROBLCK + PROFEMHH + PROAUNEM, data = pov)  #create a regular regression model
summary(pov_ols)  #test if the parameters are significant

## 
## Call:
## lm(formula = PROPOV ~ PROBLCK + PROFEMHH + PROAUNEM, data = pov)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2325 -0.0503 -0.0114  0.0389  0.5194 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.0492     0.0043   11.44  < 2e-16 ***
## PROBLCK       0.1782     0.0135   13.21  < 2e-16 ***
## PROFEMHH      0.1537     0.0340    4.52  6.4e-06 ***
## PROAUNEM      1.8843     0.0486   38.75  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.0727 on 3103 degrees of freedom
## Multiple R-squared: 0.515,   Adjusted R-squared: 0.515 
## F-statistic: 1.1e+03 on 3 and 3103 DF,  p-value: <2e-16


# Test for autocorrelation within the residuals.

# must create a weights matrix first...

nbq <- poly2nb(pov)  #Queens neighbors
nbq_w <- nb2listw(nbq, zero.policy = T)  #row standardized weights matrix; need to set zero.polity to true because we have island counties.

lm.morantest(pov_ols, nbq_w, zero.policy = T)  #test for autocorrelation in the residuals.

## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = PROPOV ~ PROBLCK + PROFEMHH + PROAUNEM, data =
## pov)
## weights: nbq_w
##  
## Moran I statistic standard deviate = 53.31, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##          0.5729832         -0.0008190          0.0001159

# Because we have autocorrelation in the residuals, the OLS regression
# model is biased.  We should try a spatial model...

# Create a spatial lag model:

pvo_lag <- lagsarlm(PROPOV ~ PROBLCK + PROFEMHH + PROAUNEM, data = pov, nbq_w, 
    zero.policy = T)  #spatial lag model; nearly the same as the regular lm.
summary(pvo_lag)

## 
## Call:
## lagsarlm(formula = PROPOV ~ PROBLCK + PROFEMHH + PROAUNEM, data = pov, 
##     listw = nbq_w, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2008754 -0.0363275 -0.0068699  0.0302317  0.4902326 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -0.0355682  0.0034907 -10.189 < 2.2e-16
## PROBLCK      0.0337991  0.0107539   3.143  0.001673
## PROFEMHH     0.3119127  0.0258525  12.065 < 2.2e-16
## PROAUNEM     1.0723986  0.0428918  25.002 < 2.2e-16
## 
## Rho: 0.5957, LR test value: 1461, p-value: < 2.22e-16
## Asymptotic standard error: 0.01363
##     z-value: 43.69, p-value: < 2.22e-16
## Wald statistic: 1909, p-value: < 2.22e-16
## 
## Log likelihood: 4471 for lag model
## ML residual variance (sigma squared): 0.003053, (sigma: 0.05525)
## Number of observations: 3107 
## Number of parameters estimated: 6 
## AIC: -8929, (AIC for lm: -7470)
## LM test for residual autocorrelation
## test value: 164.1, p-value: < 2.22e-16


# Compare the models using a Likelihood Ratio Test.  Does accounting for
# autocorrelation improve the model?
anova(pvo_lag, pov_ols)  #This is the LRT.  The summary for the lagsarlm() gives us this too, but ANOVA gives us goodness of fit statistics.

##         Model df   AIC logLik Test L.Ratio p-value
## pvo_lag     1  6 -8929   4471    1                
## pov_ols     2  5 -7470   3740    2    1461       0

Part 2: Change a predictor variable for Boulder County. How does this change alter the results of the model?

# Copy the spatial data frame.  We don't want to corrupt our original
# data.
pov_new <- pov  #copies the pov data to a new object.

# Edit the poverty value for Boulder County by reassigning it.
pov_new@data[pov_new@data$NAME == "Boulder", "PROPOV"] <- 0.7  #Change the proportion of the population that is unemployed from 0.09 to 0.7.

# Predict poverty for the original dataset and the altered dataset using
# the model above
orig_predict <- as.data.frame(predict(pvo_lag))  #The predicted values of the spatial model on the original data
alter_predict <- as.data.frame(predict(pvo_lag, newdata = pov_new, listw = nbq_w, 
    zero.policy = T))  #the predicted values of the spatial model using the altered data.  Looking at this will show us the compelxity of the interconnectedness of the data.  #I'm still getting NAs in the lagged values.  I'm not sure why...  It takes too long to run this line to keep trying to figure this out right now.  I think the zero.policy may be in the wrong spot...

jCoAL_effect <- alter_predict$fit - orig_predict$fit  #This calculates the differences between the modeled orignal data and altered data.  It givesa  value for every county.

# Let's take a closer look at the counties that chagned the most...
el <- data.frame(name = pov$NAME, diff_in_pred_PPOV = jCoAL_effect)  #Creates a dataframe to hold the counties and the effects of changing Boulder County's unemployment.
pov_new$jee <- el$diff_in_pred_PPOV  #Store these residuals to the copied dataset.  Now we can map them.

el <- el[rev(order(abs(el$diff_in_pred_PPOV))), ]  #sorts the change value by magnitude (absolute value)
el[1:10, ]  #shows the top 10 couties that had the greatest change.  Why isn't Boulder present in this list?

##         name diff_in_pred_PPOV
## 584   Thomas           -0.1583
## 632    Logan           -0.1564
## 517    Brown           -0.1416
## 582   Hooker           -0.1378
## 459     Todd           -0.1332
## 2181    Hall           -0.1320
## 435  Gregory           -0.1320
## 2767  Sutton           -0.1293
## 495     Boyd           -0.1275
## 420    Tripp           -0.1272

# Somehow we need to attach the state to these values...

# impacts(pov_lag, listw=nbq_w)#allows us to see another way that the data
# is altered.

Part 3: Map the differences in povery among counties

# Map the overall effects after changing Boulder County's data:

breaks1 <- c(min(pov_new$jee), -0.03, 0.03, max(pov_new$jee))
labels1 <- c("Negative effect (greater than .03)", "No Effect (effect -.03 to .03)", 
    "Positive effect (greater than .03)")

np1 <- findInterval(pov_new$jee, breaks1)
colors1 <- c("red", "yellow", "blue")

# Draw Map
plot(pov_new, col = colors1[np1])
mtext("Effects of a change in Boulder County, (set perc unemployment = .7)\n on predicted values in a spatial lag model", 
    side = 3, line = 1)
legend("topleft", legend = labels1, fill = colors1, bty = "n")

plot of chunk unnamed-chunk-3


#############################################################

# Look at the magnitude of change...
library(RColorBrewer)
library(classInt)

## Loading required package: class

## Loading required package: e1071


pal5 <- brewer.pal(5, "RdYlBu")
cats5 <- classIntervals(pov_new$jee, n = 5, style = "jenks")  #This is taking a lot longer than expected....
colors5 <- findColours(cats5, pal5)
plot(pov_new, col = colors5)
legend("topleft", legend = round(cats5$brks, 2), fill = pal5, bty = "n")
mtext("Effects of a change in Boulder County (set PROPOV = .7)\n on predicted values in a spatial lag model", 
    side = 3, line = 1)

plot of chunk unnamed-chunk-3