Lab 3: Spatial Regression

Purpose:
decide which county level characteristics best predict county child poverty rates

Research Topics for this lab:
1. assess whether there is a spatial structure to child poverty in the US
2. decide what form that spatial structure takes and what additional information this gives you about child proverty in the US

Research Questions for this lab:
1. how do different measures of county-level employment affect child poverty?
2. Do racial composition, family composition, or overall county-level education affect poverty? Do any of these factors modify the effects of employment?
3. What spatial structure, if any, do these data exhibit? What does that tell you about child poverty in the US?

# library(maptools) install.packages('RANN') library(RANN)

# Add data
library(spdep)

## Loading required package: sp

## Loading required package: boot

## Loading required package: Matrix

## Loading required package: lattice

## Attaching package: 'lattice'

## The following object(s) are masked from 'package:boot':
## 
## melanoma

## Loading required package: MASS

## Loading required package: nlme

## Loading required package: maptools

## Loading required package: foreign

## Loading required package: grid

## Checking rgeos availability: TRUE

## Loading required package: deldir

## deldir 0.0-21

## Loading required package: coda

## Loading required package: splines

library(gstat)  #for variograms

## Loading required package: spacetime

## Loading required package: xts

## Loading required package: zoo

## Attaching package: 'zoo'

## The following object(s) are masked from 'package:base':
## 
## as.Date, as.Date.numeric

poverty <- readShapePoly("H:\\Quant\\Assignments\\Lab03\\poverty\\poverty.shp")
names(poverty)

##  [1] "POLY_ID"  "NAME"     "FIPS"     "MET"      "PROPOV"   "WGHT"    
##  [7] "PROBLCK"  "PROHISP"  "SOUTH"    "PROHSLS"  "PROWKCO"  "PROFEMHH"
## [13] "PROEXTR"  "PRONMAN"  "PROMSERV" "PROPSERV" "PROAUNEM" "PROMUNDR"
## [19] "XCOORD"   "YCOORD"   "X2"       "Y2"



# Research Topic 1: What is the spatial structure?

# Create a queen's weights matrix
nbq <- poly2nb(poverty)  #queen
nbqw <- nb2listw(nbq, style = "W", zero.policy = T)  #row standardized queens weights
moran.test(poverty$PROPOV, listw = nbqw, zero.policy = T)

## 
##  Moran's I test under randomisation
## 
## data:  poverty$PROPOV  
## weights: nbqw  
##  
## Moran I statistic standard deviate = 57.52, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Moran I statistic       Expectation          Variance 
##         0.6194394        -0.0003224         0.0001161

# This suggests that Moran's I for Poverty is significant and is 0.619 for
# a queens weights matrix

QueenMoran <- moran.mc(poverty$PROPOV, listw = nbqw, nsim = 999, zero.policy = T)
hist(QueenMoran$res, breaks = 50)

plot of chunk unnamed-chunk-1

# Our calculated moran's statistic is clearly an outlier.

# Create a distance weights matrix Create a variogram to identify an
# appropriate distance
coords <- coordinates(poverty)  #create a points dataframe for the polygon centroids
plot(variogram(poverty$PROPOV ~ 1, location = coords, data = poverty, cloud = F), 
    type = "b")

plot of chunk unnamed-chunk-1


nbd <- dnearneigh(coords, d1 = 0, d2 = 10, row.names = poverty$FIPS)  #Create distance based weights matrix, based on Decimal Degrees?
nbdw <- nb2listw(nbd, style = "W", zero.policy = T)  #row standardized queens weights
moran.test(poverty$PROPOV, listw = nbdw, zero.policy = T)

## 
##  Moran's I test under randomisation
## 
## data:  poverty$PROPOV  
## weights: nbdw  
##  
## Moran I statistic standard deviate = 135.8, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Moran I statistic       Expectation          Variance 
##         1.032e-01        -3.220e-04         5.812e-07

# The Moran's I is significant at 0.103.  Because this value is so much
# lower (and takes much longer to run) than the queen's contiguity, we
# will use that instead.

DistMoran <- moran.mc(poverty$PROPOV, listw = nbdw, nsim = 999, zero.policy = T)
hist(DistMoran$res, breaks = 50)

plot of chunk unnamed-chunk-1


# Create a k-nearest neighbor weights matrix
install.packages("RANN")

## Installing package(s) into 'C:/Program Files/RStudio/R/library' (as 'lib'
## is unspecified)

## Error: trying to use CRAN without setting a mirror

library(RANN)

## Error: there is no package called 'RANN'

FIPS <- row.names(as(poverty, "data.frame"))
nbk4 <- knn2nb(knearneigh(coords, k = 4), row.names = FIPS)  #Creates k-nearest neighbors using 4 neighbors

## Warning: there is no package called 'RANN'

nbk4w <- nb2listw(nbk4, style = "W")  #row standardized
moran.test(poverty$PROPOV, listw = nbk4w)

## 
##  Moran's I test under randomisation
## 
## data:  poverty$PROPOV  
## weights: nbk4w  
##  
## Moran I statistic standard deviate = 51.59, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Moran I statistic       Expectation          Variance 
##         0.6275627        -0.0003220         0.0001481


kMoran <- moran.mc(poverty$PROPOV, listw = nbk4w, nsim = 999)
hist(kMoran$res, breaks = 50)

plot of chunk unnamed-chunk-1


# Look at the sptial structure... Moran's Scatter plot
poverty$scPROPOV <- scale(poverty$PROPOV)
poverty$lag_scPROPOV <- lag.listw(nbqw, poverty$scPROPOV, zero.policy = T)
plot(x = poverty$scPROPOV, y = poverty$lag_scPROPOV, main = "Moran Scatterplot of Poverty", 
    xlab = "Scaled Proportion of Poverty", ylab = "Lagged Poverty")
abline(h = 0, v = 0)
abline(lm(poverty$lag_scPROPOV ~ poverty$scPROPOV), lty = 3, lwd = 4, col = "red")

plot of chunk unnamed-chunk-1


# LISA MAP
locm <- localmoran(poverty$PROPOV, nbqw, zero.policy = T)  #Calculate the local Morann
# LISA map rendered in GEODA There are actually a few significant
# negatively autocorrelated counties.

####################################

# RQ1: measures of county-level employment on poverty

# Look at effects of UNemployed on poverty:
moran.test(poverty$PROAUNEM, listw = nbqw, zero.policy = T)  #What is the Moran's I of unemployment?

## 
##  Moran's I test under randomisation
## 
## data:  poverty$PROAUNEM  
## weights: nbqw  
##  
## Moran I statistic standard deviate = 50.26, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Moran I statistic       Expectation          Variance 
##         0.5409738        -0.0003224         0.0001160


unemp_ols <- lm(PROPOV ~ PROAUNEM, data = poverty)
shapiro.test(unemp_ols$residuals)  #reject null hypothesis.  The residuals are not normally distributed

## 
##  Shapiro-Wilk normality test
## 
## data:  unemp_ols$residuals 
## W = 0.9615, p-value < 2.2e-16

# test the autocorrelation in the model:
lm.morantest(unemp_ols, nbqw, zero.policy = T)

## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = PROPOV ~ PROAUNEM, data = poverty)
## weights: nbqw
##  
## Moran I statistic standard deviate = 53.22, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##          0.5728252         -0.0004965          0.0001160


unemp_sar <- lagsarlm(PROPOV ~ PROAUNEM, data = poverty, nbqw, zero.policy = T)  #Create a spatial lag model
summary(unemp_sar)

## 
## Call:lagsarlm(formula = PROPOV ~ PROAUNEM, data = poverty, listw = nbqw, 
##     zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2346634 -0.0385288 -0.0065934  0.0312486  0.4758167 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -0.0102097  0.0027982 -3.6487 0.0002636
## PROAUNEM     1.3546656  0.0432994 31.2860 < 2.2e-16
## 
## Rho: 0.6274, LR test value: 1589, p-value: < 2.22e-16
## Asymptotic standard error: 0.01387
##     z-value: 45.25, p-value: < 2.22e-16
## Wald statistic: 2047, p-value: < 2.22e-16
## 
## Log likelihood: 4264 for lag model
## ML residual variance (sigma squared): 0.003452, (sigma: 0.05875)
## Number of observations: 3107 
## Number of parameters estimated: 4 
## AIC: -8520, (AIC for lm: -6934)
## LM test for residual autocorrelation
## test value: 45.48, p-value: 1.5427e-11

# variables are significant AIC-8520.3 from -6933.5
anova(unemp_sar, unemp_ols)

##           Model df   AIC logLik Test L.Ratio p-value
## unemp_sar     1  4 -8520   4264    1                
## unemp_ols     2  3 -6934   3470    2    1589       0

# LRT, p-value = 0.  Therefore, accounting for autocorrelation is better.

# Look at the effects changing this may may alter the data...
pov_edit <- poverty  #copy the data

pov_edit@data[pov_edit@data$NAME == "Saunders", "PROAUNEM"] <- 0.3  #edit the data

# Predict original and edited data based on the SAR model...
origUN_predict <- as.data.frame(predict(unemp_sar))
un_predict <- as.data.frame(predict(unemp_sar, newdata = pov_edit, listw = nbqw, 
    zero.policy = T))
unempEffects <- un_predict$fit - origUN_predict$fit
pov_edit$UN_EDIT <- unempEffects
summary(unempEffects)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.1530 -0.0250  0.0037  0.0011  0.0314  0.4030


# Make a map...
library(RColorBrewer)
library(classInt)

## Loading required package: class

## Loading required package: e1071

colorPal_unemp <- brewer.pal(5, "PuOr")
cat_unemp <- classIntervals(pov_edit$UN_EDIT, n = 4, style = "jenks")
colors_unemp <- findColours(cat_unemp, colorPal_unemp)
plot(pov_edit, col = colors_unemp)
legend("bottomright", legend = round(cat_unemp$brks, 2), fill = colorPal_unemp, 
    bty = "n")
mtext("Effects of a change in Saunders County \n Changed unemployment to .3 from .03)", 
    side = 3, line = 1)

plot of chunk unnamed-chunk-1




#################################### Look at effects of UNDERemployed on
#################################### poverty:
moran.test(poverty$PROMUNDR, listw = nbqw, zero.policy = T)  #What is the Moran's I of unemployment?

## 
##  Moran's I test under randomisation
## 
## data:  poverty$PROMUNDR  
## weights: nbqw  
##  
## Moran I statistic standard deviate = 25.31, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Moran I statistic       Expectation          Variance 
##         0.2722758        -0.0003224         0.0001160


under_ols <- lm(PROPOV ~ PROMUNDR, data = poverty)
shapiro.test(under_ols$residuals)  #the residuals are not normally distributed

## 
##  Shapiro-Wilk normality test
## 
## data:  under_ols$residuals 
## W = 0.9591, p-value < 2.2e-16

# test the autocorrealtion in the model:
lm.morantest(under_ols, nbqw, zero.policy = T)  #There is significant autocorrelation in the residuals

## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = PROPOV ~ PROMUNDR, data = poverty)
## weights: nbqw
##  
## Moran I statistic standard deviate = 53.09, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##          0.5715729         -0.0004099          0.0001161


under_sar <- lagsarlm(PROPOV ~ PROMUNDR, data = poverty, nbqw, zero.policy = T)  #Create a spatial lag model
summary(under_sar)  #underemployment is significant

## 
## Call:lagsarlm(formula = PROPOV ~ PROMUNDR, data = poverty, listw = nbqw, 
##     zero.policy = T)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.291661 -0.040326 -0.005151  0.032460  0.407659 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -0.0601893  0.0056757 -10.605 < 2.2e-16
## PROMUNDR     0.4816380  0.0230648  20.882 < 2.2e-16
## 
## Rho: 0.7345, LR test value: 2033, p-value: < 2.22e-16
## Asymptotic standard error: 0.01362
##     z-value: 53.93, p-value: < 2.22e-16
## Wald statistic: 2908, p-value: < 2.22e-16
## 
## Log likelihood: 3920 for lag model
## ML residual variance (sigma squared): 0.004132, (sigma: 0.06428)
## Number of observations: 3107 
## Number of parameters estimated: 4 
## AIC: -7832, (AIC for lm: -5801)
## LM test for residual autocorrelation
## test value: 83.49, p-value: < 2.22e-16


anova(under_sar, under_ols)  #Is adding autocorrelation a good idea?

##           Model df   AIC logLik Test L.Ratio p-value
## under_sar     1  4 -7832   3920    1                
## under_ols     2  3 -5801   2903    2    2033       0

# LRT p-value = 0. AIC -7831.9 from 5800.7 in OLS, so autocorrelation
# helps.

# Look at the effects changing this may may alter the data...
pov_edit@data[pov_edit@data$NAME == "Iredell", "PROMUNDR"] <- 0.66  #edit the data

# Predict original and edited data based on the SAR model...
origUNDER_predict <- as.data.frame(predict(under_sar))
under_predict <- as.data.frame(predict(under_sar, newdata = pov_edit, listw = nbqw, 
    zero.policy = T))
underEffects <- under_predict$fit - origUNDER_predict$fit
pov_edit$UNDER_EDIT <- underEffects
summary(underEffects)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.22700 -0.02990  0.01270  0.00102  0.03850  0.28100


# Make a map...
colorPal_under <- brewer.pal(5, "PuOr")
cat_under <- classIntervals(pov_edit$UNDER_EDIT, n = 4, style = "jenks")
colors_under <- findColours(cat_under, colorPal_under)
plot(pov_edit, col = colors_under)
legend("bottomright", legend = round(cat_under$brks, 2), fill = colorPal_under, 
    bty = "n")
mtext("Effects of a change in Iredel County \n Changed under-employment to .66 from .18)", 
    side = 3, line = 1)

plot of chunk unnamed-chunk-1



####################################

# Do racial composition, family composition, or overall county-level
# education affect poverty?  Do any of these factors modify the effects of
# employment?

# Use variables PROBLCK, PROHISP, PROHSLS (HS education), PROFEMHH (female
# HoH)

# Create a model to describe poverty...
full <- lm(PROPOV ~ PROBLCK + PROHISP + PROHSLS + PROFEMHH, data = poverty)
summary(full)

## 
## Call:
## lm(formula = PROPOV ~ PROBLCK + PROHISP + PROHSLS + PROFEMHH, 
##     data = poverty)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2189 -0.0467 -0.0096  0.0366  0.4946 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.19804    0.00864  -22.92   <2e-16 ***
## PROBLCK      0.03541    0.01315    2.69   0.0071 ** 
## PROHISP      0.26220    0.01146   22.88   <2e-16 ***
## PROHSLS      0.43280    0.01147   37.74   <2e-16 ***
## PROFEMHH     0.76587    0.02958   25.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.0692 on 3102 degrees of freedom
## Multiple R-squared: 0.56,    Adjusted R-squared: 0.559 
## F-statistic:  986 on 4 and 3102 DF,  p-value: <2e-16

# test the autocorrelation in the model:
lm.morantest(full, nbqw, zero.policy = T)  #There is autocorrelation in the residuals, an OLS is not appropriate.

## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = PROPOV ~ PROBLCK + PROHISP + PROHSLS +
## PROFEMHH, data = poverty)
## weights: nbqw
##  
## Moran I statistic standard deviate = 50.81, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##          0.5455412         -0.0010798          0.0001157


sar <- lagsarlm(PROPOV ~ PROBLCK + PROHISP + PROHSLS + PROFEMHH, data = poverty, 
    nbqw, zero.policy = T)  #Create a spatial lag model
summary(sar)  #all parameteres are significant using a Z test; but black has a negative effect...  Remove it.

## 
## Call:lagsarlm(formula = PROPOV ~ PROBLCK + PROHISP + PROHSLS + PROFEMHH, 
##     data = poverty, listw = nbqw, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2007318 -0.0357017 -0.0064884  0.0272765  0.4789625 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error  z value  Pr(>|z|)
## (Intercept) -0.1952138  0.0066822 -29.2140 < 2.2e-16
## PROBLCK     -0.0561234  0.0103739  -5.4101 6.301e-08
## PROHISP      0.1285638  0.0094156  13.6543 < 2.2e-16
## PROHSLS      0.2801848  0.0097127  28.8473 < 2.2e-16
## PROFEMHH     0.6811782  0.0231529  29.4209 < 2.2e-16
## 
## Rho: 0.5746, LR test value: 1426, p-value: < 2.22e-16
## Asymptotic standard error: 0.01345
##     z-value: 42.71, p-value: < 2.22e-16
## Wald statistic: 1824, p-value: < 2.22e-16
## 
## Log likelihood: 4603 for lag model
## ML residual variance (sigma squared): 0.00282, (sigma: 0.05311)
## Number of observations: 3107 
## Number of parameters estimated: 7 
## AIC: -9192, (AIC for lm: -7768)
## LM test for residual autocorrelation
## test value: 122.5, p-value: < 2.22e-16

anova(sar, full)

##      Model df   AIC logLik Test L.Ratio p-value
## sar      1  7 -9192   4603    1                
## full     2  6 -7768   3890    2    1426       0

# sar is better using LRT; p-value = 0; LR-statistic = 1426.3 AIC improved
# from -7767.9 to -9192.2

# REMOVE PROBLCK FROM THE MODEL...
sar1 <- lagsarlm(PROPOV ~ PROHISP + PROHSLS + PROFEMHH, data = poverty, nbqw, 
    zero.policy = T)  #Create a spatial lag model
summary(sar1)

## 
## Call:
## lagsarlm(formula = PROPOV ~ PROHISP + PROHSLS + PROFEMHH, data = poverty, 
##     listw = nbqw, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2011347 -0.0356693 -0.0067749  0.0271671  0.4741361 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -0.1802129  0.0062303 -28.925 < 2.2e-16
## PROHISP      0.1405236  0.0091660  15.331 < 2.2e-16
## PROHSLS      0.2742083  0.0097192  28.213 < 2.2e-16
## PROFEMHH     0.5938867  0.0177966  33.371 < 2.2e-16
## 
## Rho: 0.5602, LR test value: 1404, p-value: < 2.22e-16
## Asymptotic standard error: 0.01313
##     z-value: 42.67, p-value: < 2.22e-16
## Wald statistic: 1821, p-value: < 2.22e-16
## 
## Log likelihood: 4588 for lag model
## ML residual variance (sigma squared): 0.002859, (sigma: 0.05347)
## Number of observations: 3107 
## Number of parameters estimated: 6 
## AIC: -9165, (AIC for lm: -7763)
## LM test for residual autocorrelation
## test value: 172.4, p-value: < 2.22e-16

# all parameters are significant. There is still residual autocorrelation
# (LM test 172.43; p-value < 0.001)
bptest.sarlm(sar1)  #test for residual heteroscedacity...

## 
##  studentized Breusch-Pagan test
## 
## data:  
## BP = 22.43, df = 3, p-value = 5.316e-05


# One theory is interrelated institutional environemtns based on gender,
# class, or race. Such issues include economic dislocaations, faltering
# economies, high unemployment. So, lets use rate to interact with some of
# these to reduce the residual autocorrelation in the spatial model.

# Create a spatial model that interacts race with those who work in the
# same county in which they live
sar2 <- lagsarlm(PROPOV ~ PROHISP * PROWKCO + PROHSLS + PROFEMHH, data = poverty, 
    nbqw, zero.policy = T)  #Create a spatial lag model
summary(sar2)

## 
## Call:lagsarlm(formula = PROPOV ~ PROHISP * PROWKCO + PROHSLS + PROFEMHH, 
##     data = poverty, listw = nbqw, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2013555 -0.0336625 -0.0064195  0.0265836  0.4734179 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                   Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)     -0.2441695  0.0082094 -29.7427 < 2.2e-16
## PROHISP          0.2852650  0.0570535   5.0000 5.734e-07
## PROWKCO          0.0697996  0.0060878  11.4655 < 2.2e-16
## PROHSLS          0.3033402  0.0098326  30.8503 < 2.2e-16
## PROFEMHH         0.5839334  0.0171635  34.0218 < 2.2e-16
## PROHISP:PROWKCO -0.1926975  0.0688914  -2.7971  0.005156
## 
## Rho: 0.5451, LR test value: 1358, p-value: < 2.22e-16
## Asymptotic standard error: 0.01331
##     z-value: 40.96, p-value: < 2.22e-16
## Wald statistic: 1678, p-value: < 2.22e-16
## 
## Log likelihood: 4653 for lag model
## ML residual variance (sigma squared): 0.002753, (sigma: 0.05247)
## Number of observations: 3107 
## Number of parameters estimated: 8 
## AIC: -9290, (AIC for lm: -7934)
## LM test for residual autocorrelation
## test value: 77.38, p-value: < 2.22e-16

bptest.sarlm(sar2)  #test for residual heteroscedacity...  Still have residual autocorrelation...

## 
##  studentized Breusch-Pagan test
## 
## data:  
## BP = 37.39, df = 5, p-value = 5.013e-07


# Create a spatial model that interacts race with unemployment
sar3 <- lagsarlm(PROPOV ~ PROHISP * PROAUNEM + PROHSLS + PROFEMHH, data = poverty, 
    nbqw, zero.policy = T)  #Create a spatial lag model
summary(sar3)

## 
## Call:lagsarlm(formula = PROPOV ~ PROHISP * PROAUNEM + PROHSLS + PROFEMHH, 
##     data = poverty, listw = nbqw, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2040711 -0.0330819 -0.0072416  0.0258053  0.5037044 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                    Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)      -0.1641275  0.0059552 -27.5603 < 2.2e-16
## PROHISP           0.1459301  0.0212789   6.8580 6.984e-12
## PROAUNEM          0.7645064  0.0396893  19.2623 < 2.2e-16
## PROHSLS           0.2230297  0.0094104  23.7003 < 2.2e-16
## PROFEMHH          0.4548139  0.0175374  25.9340 < 2.2e-16
## PROHISP:PROAUNEM -0.2785385  0.1969625  -1.4142    0.1573
## 
## Rho: 0.5049, LR test value: 1212, p-value: < 2.22e-16
## Asymptotic standard error: 0.01318
##     z-value: 38.31, p-value: < 2.22e-16
## Wald statistic: 1468, p-value: < 2.22e-16
## 
## Log likelihood: 4778 for lag model
## ML residual variance (sigma squared): 0.002565, (sigma: 0.05065)
## Number of observations: 3107 
## Number of parameters estimated: 8 
## AIC: -9541, (AIC for lm: -8331)
## LM test for residual autocorrelation
## test value: 250.3, p-value: < 2.22e-16

bptest.sarlm(sar3)

## 
##  studentized Breusch-Pagan test
## 
## data:  
## BP = 42.86, df = 5, p-value = 3.953e-08


# Which model is the best?
anova(sar2, sar3)

##      Model df   AIC logLik
## sar2     1  8 -9290   4653
## sar3     2  8 -9541   4778

# Sar 2 is the best model and tells us which characteristics best
# describes poverty