Key Concepts covered in the lecture include (Same as before):
1. 2 (+1) types of spatial regression models
2. Methods of model selection
3. OLS and Autocorrelation in the Residuals
4. Spillover Effects
5. Hypothesis Tests for Spatial Models
6. Spatial Regimes Model
7. Spatial Expansion Model
8. Geographically Weighted Regression
9. Local vs. Global Models
The in-class exercise focuses on .
Part 1: Fit a spatial lag model
library(spdep)
## Loading required package: sp
## Loading required package: boot
## Loading required package: Matrix
## Loading required package: lattice
## Attaching package: 'lattice'
## The following object(s) are masked from 'package:boot':
##
## melanoma
## Loading required package: MASS
## Loading required package: nlme
## Loading required package: maptools
## Loading required package: foreign
## Loading required package: grid
## Checking rgeos availability: TRUE
## Loading required package: deldir
## deldir 0.0-21
## Loading required package: coda
## Loading required package: splines
pov <- readShapePoly("E:/Quant/inClassExercises/InClassExerciseData/poverty/poverty.shp")
pov_ols <- lm(PROPOV ~ PROBLCK + PROFEMHH + PROAUNEM, data = pov) #create a regular regression model
summary(pov_ols) #test if the parameters are significant
##
## Call:
## lm(formula = PROPOV ~ PROBLCK + PROFEMHH + PROAUNEM, data = pov)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2325 -0.0503 -0.0114 0.0389 0.5194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0492 0.0043 11.44 < 2e-16 ***
## PROBLCK 0.1782 0.0135 13.21 < 2e-16 ***
## PROFEMHH 0.1537 0.0340 4.52 6.4e-06 ***
## PROAUNEM 1.8843 0.0486 38.75 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0727 on 3103 degrees of freedom
## Multiple R-squared: 0.515, Adjusted R-squared: 0.515
## F-statistic: 1.1e+03 on 3 and 3103 DF, p-value: <2e-16
# Test for autocorrelation within the residuals.
# must create a weights matrix first...
nbq <- poly2nb(pov) #Queens neighbors
nbq_w <- nb2listw(nbq, zero.policy = T) #row standardized weights matrix; need to set zero.polity to true because we have island counties.
lm.morantest(pov_ols, nbq_w, zero.policy = T) #test for autocorrelation in the residuals.
##
## Global Moran's I for regression residuals
##
## data:
## model: lm(formula = PROPOV ~ PROBLCK + PROFEMHH + PROAUNEM, data =
## pov)
## weights: nbq_w
##
## Moran I statistic standard deviate = 53.31, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran's I Expectation Variance
## 0.5729832 -0.0008190 0.0001159
# Because we have autocorrelation in the residuals, the OLS regression
# model is biased. We should try a spatial model...
# Create a spatial lag model:
pvo_lag <- lagsarlm(PROPOV ~ PROBLCK + PROFEMHH + PROAUNEM, data = pov, nbq_w,
zero.policy = T) #spatial lag model; nearly the same as the regular lm.
summary(pvo_lag)
##
## Call:
## lagsarlm(formula = PROPOV ~ PROBLCK + PROFEMHH + PROAUNEM, data = pov,
## listw = nbq_w, zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2008754 -0.0363275 -0.0068699 0.0302317 0.4902326
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0355682 0.0034907 -10.189 < 2.2e-16
## PROBLCK 0.0337991 0.0107539 3.143 0.001673
## PROFEMHH 0.3119127 0.0258525 12.065 < 2.2e-16
## PROAUNEM 1.0723986 0.0428918 25.002 < 2.2e-16
##
## Rho: 0.5957, LR test value: 1461, p-value: < 2.22e-16
## Asymptotic standard error: 0.01363
## z-value: 43.69, p-value: < 2.22e-16
## Wald statistic: 1909, p-value: < 2.22e-16
##
## Log likelihood: 4471 for lag model
## ML residual variance (sigma squared): 0.003053, (sigma: 0.05525)
## Number of observations: 3107
## Number of parameters estimated: 6
## AIC: -8929, (AIC for lm: -7470)
## LM test for residual autocorrelation
## test value: 164.1, p-value: < 2.22e-16
# Compare the models using a Likelihood Ratio Test. Does accounting for
# autocorrelation improve the model?
anova(pvo_lag, pov_ols) #This is the LRT. The summary for the lagsarlm() gives us this too, but ANOVA gives us goodness of fit statistics.
## Model df AIC logLik Test L.Ratio p-value
## pvo_lag 1 6 -8929 4471 1
## pov_ols 2 5 -7470 3740 2 1461 0
Part 2: Change a predictor variable for Boulder County. How does this change alter the results of the model?
# Copy the spatial data frame. We don't want to corrupt our original
# data.
pov_new <- pov #copies the pov data to a new object.
# Edit the poverty value for Boulder County by reassigning it.
pov_new@data[pov_new@data$NAME == "Boulder", "PROPOV"] <- 0.7 #Change the proportion of the population that is unemployed from 0.09 to 0.7.
# Predict poverty for the original dataset and the altered dataset using
# the model above
orig_predict <- as.data.frame(predict(pvo_lag)) #The predicted values of the spatial model on the original data
alter_predict <- as.data.frame(predict(pvo_lag, newdata = pov_new, listw = nbq_w,
zero.policy = T)) #the predicted values of the spatial model using the altered data. Looking at this will show us the compelxity of the interconnectedness of the data. #I'm still getting NAs in the lagged values. I'm not sure why... It takes too long to run this line to keep trying to figure this out right now. I think the zero.policy may be in the wrong spot...
jCoAL_effect <- alter_predict$fit - orig_predict$fit #This calculates the differences between the modeled orignal data and altered data. It givesa value for every county.
# Let's take a closer look at the counties that chagned the most...
el <- data.frame(name = pov$NAME, diff_in_pred_PPOV = jCoAL_effect) #Creates a dataframe to hold the counties and the effects of changing Boulder County's unemployment.
pov_new$jee <- el$diff_in_pred_PPOV #Store these residuals to the copied dataset. Now we can map them.
el <- el[rev(order(abs(el$diff_in_pred_PPOV))), ] #sorts the change value by magnitude (absolute value)
el[1:10, ] #shows the top 10 couties that had the greatest change. Why isn't Boulder present in this list?
## name diff_in_pred_PPOV
## 584 Thomas -0.1583
## 632 Logan -0.1564
## 517 Brown -0.1416
## 582 Hooker -0.1378
## 459 Todd -0.1332
## 2181 Hall -0.1320
## 435 Gregory -0.1320
## 2767 Sutton -0.1293
## 495 Boyd -0.1275
## 420 Tripp -0.1272
# Somehow we need to attach the state to these values...
# impacts(pov_lag, listw=nbq_w)#allows us to see another way that the data
# is altered.
Part 3: Map the differences in povery among counties
# Map the overall effects after changing Boulder County's data:
breaks1 <- c(min(pov_new$jee), -0.03, 0.03, max(pov_new$jee))
labels1 <- c("Negative effect (greater than .03)", "No Effect (effect -.03 to .03)",
"Positive effect (greater than .03)")
np1 <- findInterval(pov_new$jee, breaks1)
colors1 <- c("red", "yellow", "blue")
# Draw Map
plot(pov_new, col = colors1[np1])
mtext("Effects of a change in Boulder County, (set perc unemployment = .7)\n on predicted values in a spatial lag model",
side = 3, line = 1)
legend("topleft", legend = labels1, fill = colors1, bty = "n")
#############################################################
# Look at the magnitude of change...
library(RColorBrewer)
library(classInt)
## Loading required package: class
## Loading required package: e1071
pal5 <- brewer.pal(5, "RdYlBu")
cats5 <- classIntervals(pov_new$jee, n = 5, style = "jenks") #This is taking a lot longer than expected....
colors5 <- findColours(cats5, pal5)
plot(pov_new, col = colors5)
legend("topleft", legend = round(cats5$brks, 2), fill = pal5, bty = "n")
mtext("Effects of a change in Boulder County (set PROPOV = .7)\n on predicted values in a spatial lag model",
side = 3, line = 1)