X Wang - Lab 3

GEOG 5023: Quantitative Methods in Geography

library(classInt)

## Loading required package: class

## Loading required package: e1071

library(spdep)

## Loading required package: sp

## Loading required package: boot

## Loading required package: Matrix

## Loading required package: lattice

## Attaching package: 'lattice'

## The following object(s) are masked from 'package:boot':
## 
## melanoma

## Loading required package: MASS

## Loading required package: nlme

## Loading required package: maptools

## Loading required package: foreign

## Loading required package: grid

## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()

## Loading required package: deldir

## deldir 0.0-21

## Loading required package: coda

## Loading required package: splines

library(RColorBrewer)
library(gstat)
library(maptools)

# Load shapefiles
pov <- readShapePoly("/Users/xiwang/Dropbox/GEOG 5023 - offline/Labs/Lab 3/poverty/poverty.shp")
class(pov)

## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

plot(pov, axes = T)

plot of chunk unnamed-chunk-1

Variogram

plot(variogram(pov$PROPOV ~ 1, locations = coordinates(pov), data = pov, cloud = F), 
    type = "b", main = "Variogram of PROPOV")  # The variable shows significant spatial autocorrelation (range is approximately at d=13); the nugget effect is also substantial (d=1)

plot of chunk unnamed-chunk-2

Neighbor Definitions and Weights Matrices

# Queen's contiguity
coords <- coordinates(pov)
pov_nbq <- poly2nb(pov)
plot(pov)
plot(pov_nbq, coords, add = T)

plot of chunk unnamed-chunk-3


# 2 nearest neighbors
pov_kn2 <- knn2nb(knearneigh(coords, k = 2))  # closest 2 neighbors
plot(pov)
plot(pov_kn2, coords, add = T)

plot of chunk unnamed-chunk-3


# Queen's contiguity makes the most sense in this context, since a
# county's child poverty rate can be affected by all adjoining counties.
# Row standardization of Queen's contiguity
pov_nbq_w <- nb2listw(pov_nbq, zero.policy = T)

Spatial Autocorrelation in the PROPOV Variable

# Moran's I
moran_PROPOV <- moran.test(pov$PROPOV, pov_nbq_w, zero.policy = T)
moran_PROPOV

## 
##  Moran's I test under randomisation
## 
## data:  pov$PROPOV  
## weights: pov_nbq_w  
##  
## Moran I statistic standard deviate = 57.52, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Moran I statistic       Expectation          Variance 
##         0.6194394        -0.0003224         0.0001161

moran_PROPOV_sig <- moran.mc(pov$PROPOV, listw = pov_nbq_w, zero.policy = T, 
    nsim = 999)  # The null hypothesis is that autocorrelation is random
moran_PROPOV_sig  # Moran's I = 0.6202, observed rank = 1000, p-value = 0.001, the PROPOV variable shows that spatial auto covariance is significant,therefore it is not due to chance

## 
##  Monte-Carlo simulation of Moran's I
## 
## data:  pov$PROPOV 
## weights: pov_nbq_w  
## number of simulations + 1: 1000 
##  
## statistic = 0.6202, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater

The variogram and Moran's I indicate that there is spatial autocorrelation in childhood poverty (PROPOV), but this spatial pattern can be caused by the spatial patterning of the covariates of poverty. A full analysis involves fitting an OLS model and comparing that model to spatial models.

OLS and Spatial Model Comparisons

# Fit the regression models with all parameters of interest, including
# those that deal with racial composition, family composition,
# county-level education, county-level employment--the structural
# components of poverty.

# Run standard OLS model
pov_lm <- lm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + pov$PROWKCO + 
    pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR)
summary(pov_lm)

## 
## Call:
## lm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
##     pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2119 -0.0349 -0.0055  0.0283  0.5196 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.33493    0.00951  -35.23  < 2e-16 ***
## pov$PROBLCK   0.19209    0.01112   17.28  < 2e-16 ***
## pov$PROHISP   0.20029    0.00922   21.73  < 2e-16 ***
## pov$PROHSLS   0.37548    0.01022   36.74  < 2e-16 ***
## pov$PROWKCO   0.08579    0.00621   13.83  < 2e-16 ***
## pov$PROFEMHH  0.21378    0.02759    7.75  1.3e-14 ***
## pov$PROAUNEM  0.86978    0.04384   19.84  < 2e-16 ***
## pov$PROMUNDR  0.52760    0.02165   24.37  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.0546 on 3099 degrees of freedom
## Multiple R-squared: 0.726,   Adjusted R-squared: 0.726 
## F-statistic: 1.18e+03 on 7 and 3099 DF,  p-value: <2e-16

# Look at residuals
par(mfrow = c(2, 2))
plot(pov_lm)  # The residuals don't have a normal distrution around 0 and don't appear to have a mean of 0; the model appears to systematically underpredict the data

pov$lmresid <- residuals(pov_lm)
lm.morantest(pov_lm, pov_nbq_w, zero.policy = T)  # Moran's I = 0.4160, p-value = 2.2e-16 => the spatial autocovariance in the residuals is significant

## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP +
## pov$PROHSLS + pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM +
## pov$PROMUNDR)
## weights: pov_nbq_w
##  
## Moran I statistic standard deviate = 38.83, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##          0.4159612         -0.0013925          0.0001155


# Compare against spatial lag model
pov_lag <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + pov$PROWKCO + 
    pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR, listw = pov_nbq_w, zero.policy = T)
summary(pov_lag)  # p-value for all predictors are significant, Rho: 0.4224, AIC: -10115, p-value for LM test for resid autocorrelation: < 2.22e-16

## 
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
##     pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR, 
##     listw = pov_nbq_w, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.1890572 -0.0290433 -0.0052518  0.0221836  0.4997118 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)  -0.2957657  0.0082125 -36.0139 < 2.2e-16
## pov$PROBLCK   0.0740629  0.0100108   7.3983 1.379e-13
## pov$PROHISP   0.1230418  0.0082666  14.8842 < 2.2e-16
## pov$PROHSLS   0.2854499  0.0093095  30.6621 < 2.2e-16
## pov$PROWKCO   0.0526405  0.0053097   9.9139 < 2.2e-16
## pov$PROFEMHH  0.3330523  0.0235532  14.1404 < 2.2e-16
## pov$PROAUNEM  0.5470438  0.0391371  13.9776 < 2.2e-16
## pov$PROMUNDR  0.4021852  0.0187461  21.4543 < 2.2e-16
## 
## Rho: 0.4224, LR test value: 876.5, p-value: < 2.22e-16
## Asymptotic standard error: 0.01355
##     z-value: 31.16, p-value: < 2.22e-16
## Wald statistic: 971, p-value: < 2.22e-16
## 
## Log likelihood: 5067 for lag model
## ML residual variance (sigma squared): 0.002166, (sigma: 0.04654)
## Number of observations: 3107 
## Number of parameters estimated: 10 
## AIC: -10115, (AIC for lm: -9240)
## LM test for residual autocorrelation
## test value: 182.8, p-value: < 2.22e-16

# Look at residuals
pov$lagresid <- residuals(pov_lag)
moran.test(pov$lagresid, listw = pov_nbq_w, zero.policy = T)  # Moran's I = 0.1203, p-value < 2.2e-16 => the spatial autocovariance in the residuals is still significant, but the degree is quite a bit less than the OLS model

## 
##  Moran's I test under randomisation
## 
## data:  pov$lagresid  
## weights: pov_nbq_w  
##  
## Moran I statistic standard deviate = 11.2, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Moran I statistic       Expectation          Variance 
##         0.1202535        -0.0003224         0.0001159

The Moran's I test on the residuals indicate that there is spatial lag in our variable of interest, so the OLS model will be biased and inefficient.

The results of the lag model indicate that each of the predictors and the spatial lag are significant. The output includes a z-test based on the standard error of Moran's I 0.4224/0.013555 = 31.161. Furthermore, the AIC for the lag model (-10115) is lower than its OLS counterpart (-9240.4), and the LR test indicates there is significant difference between the two models due to the spatial term. Proceed with spatial regression analysis.

Univariate Spatial Regression

County-level employment measures as predictors

# Proportion of persons employed in the county they live in
povlag_pwkco <- lagsarlm(pov$PROPOV ~ pov$PROWKCO, listw = pov_nbq_w, zero.policy = T)
summary(povlag_pwkco)  # p-value for predictor: 2.068e-08, Rho: 0.77559, AIC: -7437.5, p-value for LM test for resid autocorrelation: < 2.22e-16

## 
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROWKCO, listw = pov_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.211545 -0.043946 -0.010089  0.032930  0.462267 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 0.0198214  0.0056991  3.4780 0.0005051
## pov$PROWKCO 0.0395218  0.0070496  5.6062 2.068e-08
## 
## Rho: 0.7756, LR test value: 2162, p-value: < 2.22e-16
## Asymptotic standard error: 0.01352
##     z-value: 57.38, p-value: < 2.22e-16
## Wald statistic: 3293, p-value: < 2.22e-16
## 
## Log likelihood: 3723 for lag model
## ML residual variance (sigma squared): 0.004598, (sigma: 0.06781)
## Number of observations: 3107 
## Number of parameters estimated: 4 
## AIC: -7438, (AIC for lm: -5277)
## LM test for residual autocorrelation
## test value: 150.6, p-value: < 2.22e-16

impacts(povlag_pwkco, listw = pov_nbq_w)  # direct: 0.0475, indirect: 0.1284, total: 0.1759

## Impact measures (lag, exact):
##              Direct Indirect  Total
## pov$PROWKCO 0.04751   0.1284 0.1759


# Proportion of workers aged 16+ who are unemployed
povlag_paunem <- lagsarlm(pov$PROPOV ~ pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)
summary(povlag_paunem)  # p-value for predictor: < 2.2e-16, Rho: 0.62739, AIC: -8520.3, p-value for LM test for resid autocorrelation: 1.5427e-11

## 
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROAUNEM, listw = pov_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2346634 -0.0385288 -0.0065934  0.0312486  0.4758167 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                Estimate Std. Error z value  Pr(>|z|)
## (Intercept)  -0.0102097  0.0027982 -3.6487 0.0002636
## pov$PROAUNEM  1.3546656  0.0432994 31.2860 < 2.2e-16
## 
## Rho: 0.6274, LR test value: 1589, p-value: < 2.22e-16
## Asymptotic standard error: 0.01387
##     z-value: 45.25, p-value: < 2.22e-16
## Wald statistic: 2047, p-value: < 2.22e-16
## 
## Log likelihood: 4264 for lag model
## ML residual variance (sigma squared): 0.003452, (sigma: 0.05875)
## Number of observations: 3107 
## Number of parameters estimated: 4 
## AIC: -8520, (AIC for lm: -6934)
## LM test for residual autocorrelation
## test value: 45.48, p-value: 1.5427e-11

impacts(povlag_paunem, listw = pov_nbq_w)  # direct: 1.4952, indirect: 2.1375, total: 3.6327

## Impact measures (lag, exact):
##              Direct Indirect Total
## pov$PROAUNEM  1.495    2.137 3.633


# Proportion of male workers aged 16+ who are underemployed
povlag_pmundr <- lagsarlm(pov$PROPOV ~ pov$PROMUNDR, listw = pov_nbq_w, zero.policy = T)
summary(povlag_pmundr)  # p-value for predictor: < 2.2e-16, Rho: 0.73454, AIC: -7831.9, p-value for LM test for resid autocorrelation: < 2.22e-16

## 
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROMUNDR, listw = pov_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.291661 -0.040326 -0.005151  0.032460  0.407659 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                Estimate Std. Error z value  Pr(>|z|)
## (Intercept)  -0.0601893  0.0056757 -10.605 < 2.2e-16
## pov$PROMUNDR  0.4816380  0.0230648  20.882 < 2.2e-16
## 
## Rho: 0.7345, LR test value: 2033, p-value: < 2.22e-16
## Asymptotic standard error: 0.01362
##     z-value: 53.93, p-value: < 2.22e-16
## Wald statistic: 2908, p-value: < 2.22e-16
## 
## Log likelihood: 3920 for lag model
## ML residual variance (sigma squared): 0.004132, (sigma: 0.06428)
## Number of observations: 3107 
## Number of parameters estimated: 4 
## AIC: -7832, (AIC for lm: -5801)
## LM test for residual autocorrelation
## test value: 83.49, p-value: < 2.22e-16

impacts(povlag_pmundr, listw = pov_nbq_w)  # direct: 0.5623, indirect: 1.2504, total: 1.8126

## Impact measures (lag, exact):
##              Direct Indirect Total
## pov$PROMUNDR 0.5623     1.25 1.813

Other predictors of child poverty

# Racial Composition Proportion non-Hispanic black
povlag_pblck <- lagsarlm(pov$PROPOV ~ pov$PROBLCK, listw = pov_nbq_w, zero.policy = T)
summary(povlag_pblck)  # p-value for predictor: < 2.2e-16, Rho: 0.71985, AIC: -7723.5, p-value for LM test for resid autocorrelation: 0.92641

## 
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK, listw = pov_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2160498 -0.0417652 -0.0065088  0.0334262  0.4739797 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 0.0467795  0.0029247  15.995 < 2.2e-16
## pov$PROBLCK 0.1559274  0.0103291  15.096 < 2.2e-16
## 
## Rho: 0.7198, LR test value: 1846, p-value: < 2.22e-16
## Asymptotic standard error: 0.01455
##     z-value: 49.47, p-value: < 2.22e-16
## Wald statistic: 2448, p-value: < 2.22e-16
## 
## Log likelihood: 3866 for lag model
## ML residual variance (sigma squared): 0.004307, (sigma: 0.06563)
## Number of observations: 3107 
## Number of parameters estimated: 4 
## AIC: -7724, (AIC for lm: -5879)
## LM test for residual autocorrelation
## test value: 0.008531, p-value: 0.92641

impacts(povlag_pblck, listw = pov_nbq_w)  # direct: 0.1804, indirect: 0.3757, total: 0.5561

## Impact measures (lag, exact):
##             Direct Indirect  Total
## pov$PROBLCK 0.1804   0.3757 0.5561


# Proportion Hispanic
povlag_phisp <- lagsarlm(pov$PROPOV ~ pov$PROHISP, listw = pov_nbq_w, zero.policy = T)
summary(povlag_phisp)  # p-value for predictor: 6.661e-16, Rho: 0.76003, AIC: -7479.1, p-value for LM test for resid autocorrelation: < 2.22e-16

## 
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROHISP, listw = pov_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2132604 -0.0435487 -0.0074707  0.0333935  0.4655743 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##             Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 0.047391   0.003105 15.2627 < 2.2e-16
## pov$PROHISP 0.096385   0.011910  8.0927 6.661e-16
## 
## Rho: 0.76, LR test value: 2032, p-value: < 2.22e-16
## Asymptotic standard error: 0.01406
##     z-value: 54.07, p-value: < 2.22e-16
## Wald statistic: 2924, p-value: < 2.22e-16
## 
## Log likelihood: 3744 for lag model
## ML residual variance (sigma squared): 0.004573, (sigma: 0.06763)
## Number of observations: 3107 
## Number of parameters estimated: 4 
## AIC: -7479, (AIC for lm: -5450)
## LM test for residual autocorrelation
## test value: 97.77, p-value: < 2.22e-16

impacts(povlag_phisp, listw = pov_nbq_w)  # direct: 0.1145, indirect: 0.2867, total: 0.4013

## Impact measures (lag, exact):
##             Direct Indirect  Total
## pov$PROHISP 0.1145   0.2867 0.4013


# Family Composition Proportion female-headed families with children
povlag_pfemhh <- lagsarlm(pov$PROPOV ~ pov$PROFEMHH, listw = pov_nbq_w, zero.policy = T)
summary(povlag_pfemhh)  # p-value for predictor: < 2.2e-16, Rho: 0.70847, AIC: -8269.9, p-value for LM test for resid autocorrelation: 3.3551e-05

## 
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROFEMHH, listw = pov_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2226634 -0.0396259 -0.0069339  0.0313656  0.4450728 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                Estimate Std. Error z value  Pr(>|z|)
## (Intercept)  -0.0233532  0.0030557 -7.6425 2.132e-14
## pov$PROFEMHH  0.5587807  0.0202177 27.6383 < 2.2e-16
## 
## Rho: 0.7085, LR test value: 2050, p-value: < 2.22e-16
## Asymptotic standard error: 0.01237
##     z-value: 57.28, p-value: < 2.22e-16
## Wald statistic: 3280, p-value: < 2.22e-16
## 
## Log likelihood: 4139 for lag model
## ML residual variance (sigma squared): 0.00363, (sigma: 0.06025)
## Number of observations: 3107 
## Number of parameters estimated: 4 
## AIC: -8270, (AIC for lm: -6222)
## LM test for residual autocorrelation
## test value: 17.2, p-value: 3.3551e-05

impacts(povlag_pfemhh, listw = pov_nbq_w)  # direct: 0.6420, indirect: 1.2730, total: 1.9149

## Impact measures (lag, exact):
##              Direct Indirect Total
## pov$PROFEMHH  0.642    1.273 1.915


# Education Level Proportion with a high school education or less
povlag_phsls <- lagsarlm(pov$PROPOV ~ pov$PROHSLS, listw = pov_nbq_w, zero.policy = T)
summary(povlag_phsls)  # p-value for predictor: < 2.2e-16, Rho: 0.70362, AIC: -7885.7, p-value for LM test for resid autocorrelation: 4.5994e-07

## 
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROHSLS, listw = pov_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2058386 -0.0416047 -0.0084195  0.0312799  0.4236786 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -0.0952330  0.0067933 -14.019 < 2.2e-16
## pov$PROHSLS  0.2462637  0.0115701  21.284 < 2.2e-16
## 
## Rho: 0.7036, LR test value: 1815, p-value: < 2.22e-16
## Asymptotic standard error: 0.01401
##     z-value: 50.21, p-value: < 2.22e-16
## Wald statistic: 2521, p-value: < 2.22e-16
## 
## Log likelihood: 3947 for lag model
## ML residual variance (sigma squared): 0.004116, (sigma: 0.06416)
## Number of observations: 3107 
## Number of parameters estimated: 4 
## AIC: -7886, (AIC for lm: -6072)
## LM test for residual autocorrelation
## test value: 25.43, p-value: 4.5994e-07

impacts(povlag_phsls, listw = pov_nbq_w)  # direct: 0.2821, indirect: 0.5480, total: 0.8302

## Impact measures (lag, exact):
##             Direct Indirect  Total
## pov$PROHSLS 0.2821    0.548 0.8302

The summary of the full lag model (pov_lag) shows that all the included predictors contribute significantly. The model AIC is -10115, and the p-value for LM test for resid autocorrelation: < 2.22e-16. Though the AIC is low compared to univariate models, the model complexity motivates exploration of other multivariate spatial models. As well, the presence of spatial autocorrelation in the residuals indicates the possibility of model misspecification.

Multivariate Spatial Regression

Full Lag Model (pov_lag)

# Look at relative contribution of variables
impacts(pov_lag, listw = pov_nbq_w)  # total - problck: 0.12815509; prohisp:0.21290593, prohsls: 0.49392951; prowkco: 0.09108666; profemhh: 0.57629861; proaunem: 0.94657964; promundr: 0.69592303

## Impact measures (lag, exact):
##               Direct Indirect   Total
## pov$PROBLCK  0.07689  0.05126 0.12816
## pov$PROHISP  0.12775  0.08516 0.21291
## pov$PROHSLS  0.29636  0.19756 0.49393
## pov$PROWKCO  0.05465  0.03643 0.09109
## pov$PROFEMHH 0.34579  0.23051 0.57630
## pov$PROAUNEM 0.56796  0.37862 0.94658
## pov$PROMUNDR 0.41756  0.27836 0.69592

# Use B-P test to see if there is heteroscedacity in the residuals
bptest.sarlm(pov_lag)  # BP = 58.0829, df = 7, p-value = 3.64e-10

## 
##  studentized Breusch-Pagan test
## 
## data:  
## BP = 58.08, df = 7, p-value = 3.64e-10

Employment Interaction Model

# Looking at the three measures of employment--PROWKCO, PROAUNEM, and
# PROMUNDR--PROAUNEM is better than the other two measures at predicting
# PROPOV. It's model AIC is the lowest (indicating the best fit of the
# three) and a unit of change results in the highest average, overall
# change in child poverty (increase of 3.63). Using this measure also
# makes sense because it is the most direct measure of general employment
# levels of county residents. So I will interact the other factors with
# this measure of employment.
povlag_paunem_int <- lagsarlm(pov$PROPOV ~ pov$PROBLCK * pov$PROAUNEM + pov$PROHISP * 
    pov$PROAUNEM + pov$PROFEMHH * pov$PROAUNEM + pov$PROHSLS * pov$PROAUNEM, 
    listw = pov_nbq_w, zero.policy = T)
summary(povlag_paunem_int)  # only p-value for predictor pov$PROBLCK is not significant , Rho: 0.49285, AIC: -9800.3, p-value for LM test for resid autocorrelation: < 2.22e-16

## 
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK * pov$PROAUNEM + 
##     pov$PROHISP * pov$PROAUNEM + pov$PROFEMHH * pov$PROAUNEM + 
##     pov$PROHSLS * pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2186812 -0.0315493 -0.0049033  0.0256617  0.4543385 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                            Estimate Std. Error z value  Pr(>|z|)
## (Intercept)               -0.025321   0.013264 -1.9090  0.056268
## pov$PROBLCK                0.037217   0.020365  1.8275  0.067626
## pov$PROAUNEM              -1.559133   0.208514 -7.4774 7.594e-14
## pov$PROHISP                0.170382   0.020823  8.1824 2.220e-16
## pov$PROFEMHH               0.103479   0.038796  2.6673  0.007647
## pov$PROHSLS                0.098882   0.019417  5.0924 3.535e-07
## pov$PROBLCK:pov$PROAUNEM  -0.561492   0.207573 -2.7050  0.006830
## pov$PROAUNEM:pov$PROHISP  -0.426794   0.191461 -2.2291  0.025805
## pov$PROAUNEM:pov$PROFEMHH  5.020064   0.401243 12.5113 < 2.2e-16
## pov$PROAUNEM:pov$PROHSLS   2.192811   0.291402  7.5250 5.262e-14
## 
## Rho: 0.4929, LR test value: 1132, p-value: < 2.22e-16
## Asymptotic standard error: 0.01387
##     z-value: 35.55, p-value: < 2.22e-16
## Wald statistic: 1264, p-value: < 2.22e-16
## 
## Log likelihood: 4912 for lag model
## ML residual variance (sigma squared): 0.00236, (sigma: 0.04858)
## Number of observations: 3107 
## Number of parameters estimated: 12 
## AIC: -9800, (AIC for lm: -8670)
## LM test for residual autocorrelation
## test value: 113, p-value: < 2.22e-16

impacts(povlag_paunem_int, listw = pov_nbq_w)  # total - problck: 0.07333926; paunem: -3.07238282; phisp: 0.33575055; pfemhh: 0.20391222; phsls: 0.19485311; pblck:paunem: -1.10645967; phisp:paunem: -0.84102726; pfemhh:paunem: 9.89239443; phsls:paunem: 4.32109041

## Impact measures (lag, exact):
##                             Direct Indirect    Total
## pov$PROBLCK                0.03927  0.03406  0.07334
## pov$PROAUNEM              -1.64532 -1.42706 -3.07238
## pov$PROHISP                0.17980  0.15595  0.33575
## pov$PROFEMHH               0.10920  0.09471  0.20391
## pov$PROHSLS                0.10435  0.09051  0.19485
## pov$PROBLCK:pov$PROAUNEM  -0.59253 -0.51393 -1.10646
## pov$PROAUNEM:pov$PROHISP  -0.45039 -0.39064 -0.84103
## pov$PROAUNEM:pov$PROFEMHH  5.29757  4.59483  9.89239
## pov$PROAUNEM:pov$PROHSLS   2.31403  2.00706  4.32109

# The effects of race (both black and hispanic) on employment decreases
# child poverty; the effects of female-headed households with children
# dramatically increases child poverty; the effects of education level
# also substantially increases child poverty

Prediction Models

# Reducing the Full Model To try to reduce model complexity from the full
# model (pov_lag), remove variables PROBLCK, PROHISP, and PROWKCO since
# their univariate models had the 3 highest AICs. As well in both the
# univariate and multivariate models, these variables had the least
# average, total impacts on child poverty per unit change.
povlag_r1 <- lagsarlm(pov$PROPOV ~ pov$PROHSLS + pov$PROFEMHH + pov$PROAUNEM + 
    pov$PROMUNDR, listw = pov_nbq_w, zero.policy = T)
summary(povlag_r1)  # p-values for all predictors are significant , Rho: 0.50543, AIC: -9789.7, p-value for LM test for resid autocorrelation: < 2.22e-16

## 
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROHSLS + pov$PROFEMHH + 
##     pov$PROAUNEM + pov$PROMUNDR, listw = pov_nbq_w, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2089551 -0.0303175 -0.0064557  0.0234504  0.4989275 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                Estimate Std. Error z value  Pr(>|z|)
## (Intercept)  -0.2614752  0.0076461 -34.197 < 2.2e-16
## pov$PROHSLS   0.2563777  0.0095067  26.968 < 2.2e-16
## pov$PROFEMHH  0.4427462  0.0165534  26.747 < 2.2e-16
## pov$PROAUNEM  0.5147227  0.0391130  13.160 < 2.2e-16
## pov$PROMUNDR  0.4092532  0.0189675  21.576 < 2.2e-16
## 
## Rho: 0.5054, LR test value: 1323, p-value: < 2.22e-16
## Asymptotic standard error: 0.0128
##     z-value: 39.49, p-value: < 2.22e-16
## Wald statistic: 1559, p-value: < 2.22e-16
## 
## Log likelihood: 4902 for lag model
## ML residual variance (sigma squared): 0.002369, (sigma: 0.04867)
## Number of observations: 3107 
## Number of parameters estimated: 7 
## AIC: -9790, (AIC for lm: -8468)
## LM test for residual autocorrelation
## test value: 233.1, p-value: < 2.22e-16


# The AIC for the reduced model povlag_r1 is higher than the full model,
# and since AIC accounts for model complexity, try reducing the full model
# by only one variable. Remove only PROWKCO since it had the lowest AIC
# and the least average, total impact on child poverty per unit change.
povlag_r2 <- pov_lag <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
    pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR, listw = pov_nbq_w, zero.policy = T)
summary(povlag_r2)  # p-values for all predictors are significant , Rho: 0.4446, AIC: -10022, p-value for LM test for resid autocorrelation: < 2.22e-16

## 
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
##     pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR, listw = pov_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2085243 -0.0292183 -0.0058704  0.0226068  0.5015325 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)  -0.2632126  0.0076277 -34.5074 < 2.2e-16
## pov$PROBLCK   0.0468098  0.0098170   4.7683 1.858e-06
## pov$PROHISP   0.1285531  0.0083371  15.4194 < 2.2e-16
## pov$PROHSLS   0.2714768  0.0093173  29.1368 < 2.2e-16
## pov$PROFEMHH  0.3884725  0.0232611  16.7005 < 2.2e-16
## pov$PROAUNEM  0.4949083  0.0393869  12.5653 < 2.2e-16
## pov$PROMUNDR  0.4296969  0.0186474  23.0433 < 2.2e-16
## 
## Rho: 0.4446, LR test value: 967.9, p-value: < 2.22e-16
## Asymptotic standard error: 0.01351
##     z-value: 32.92, p-value: < 2.22e-16
## Wald statistic: 1084, p-value: < 2.22e-16
## 
## Log likelihood: 5020 for lag model
## ML residual variance (sigma squared): 0.002224, (sigma: 0.04716)
## Number of observations: 3107 
## Number of parameters estimated: 9 
## AIC: -10022, (AIC for lm: -9056)
## LM test for residual autocorrelation
## test value: 236.7, p-value: < 2.22e-16

# Though the AIC for povlag_r2 is lower than for povlag_r1, it's still not
# as low as for the full model, so keep all variables

# Adding Interaction Terms to Full Model (pov_lag) From the above
# interaction model, the interaction between PROFEMHH and PROAUNEM
# resulted in the highest average, overall change (in absolute magnitude)
# in child poverty. So will try adding it.
povlag_int1 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
    pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM, 
    listw = pov_nbq_w, zero.policy = T)
summary(povlag_int1)  # p-values for all predictors are significant except PROFEMHH, Rho: 0.42133, AIC: -10312, p-value for LM test for resid autocorrelation: < 2.22e-16

## 
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
##     pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + 
##     pov$PROFEMHH:pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2591271 -0.0275824 -0.0042479  0.0217976  0.4682655 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                             Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)               -0.2470659  0.0086960 -28.4116 < 2.2e-16
## pov$PROBLCK                0.0581724  0.0098448   5.9089 3.443e-09
## pov$PROHISP                0.1285256  0.0080326  16.0005 < 2.2e-16
## pov$PROHSLS                0.2910373  0.0090235  32.2531 < 2.2e-16
## pov$PROWKCO                0.0488753  0.0051706   9.4525 < 2.2e-16
## pov$PROFEMHH               0.0446918  0.0303889   1.4707  0.141383
## pov$PROAUNEM              -0.1640939  0.0630142  -2.6041  0.009212
## pov$PROMUNDR               0.3918907  0.0182102  21.5204 < 2.2e-16
## pov$PROFEMHH:pov$PROAUNEM  4.1347944  0.2892762  14.2936 < 2.2e-16
## 
## Rho: 0.4213, LR test value: 916, p-value: < 2.22e-16
## Asymptotic standard error: 0.01338
##     z-value: 31.49, p-value: < 2.22e-16
## Wald statistic: 991.5, p-value: < 2.22e-16
## 
## Log likelihood: 5167 for lag model
## ML residual variance (sigma squared): 0.002032, (sigma: 0.04508)
## Number of observations: 3107 
## Number of parameters estimated: 11 
## AIC: -10312, (AIC for lm: -9398)
## LM test for residual autocorrelation
## test value: 77.06, p-value: < 2.22e-16


# The addition of the interaction term gave a lower AIC than the full
# model, so will keep. However, the interaction of PROFEMHH and PROAUNEM
# made the p-value for PROFEMHH not significant (0.141383), so try
# removing that variable.
povlag_int1r1 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
    pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM, listw = pov_nbq_w, 
    zero.policy = T)
summary(povlag_int1r1)  # p-values for all predictors are significant, Rho: 0.41892, AIC: -10311, p-value for LM test for resid autocorrelation: < 2.22e-16

## 
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
##     pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM, 
##     listw = pov_nbq_w, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2733968 -0.0273964 -0.0040589  0.0220878  0.4653314 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                             Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)               -0.2416375  0.0077882 -31.0259 < 2.2e-16
## pov$PROBLCK                0.0653355  0.0087489   7.4679 8.149e-14
## pov$PROHISP                0.1294840  0.0080298  16.1254 < 2.2e-16
## pov$PROHSLS                0.2896905  0.0089426  32.3943 < 2.2e-16
## pov$PROWKCO                0.0500122  0.0051330   9.7433 < 2.2e-16
## pov$PROAUNEM              -0.1928506  0.0594772  -3.2424  0.001185
## pov$PROMUNDR               0.3926519  0.0182233  21.5468 < 2.2e-16
## pov$PROAUNEM:pov$PROFEMHH  4.4122347  0.2175192  20.2844 < 2.2e-16
## 
## Rho: 0.4189, LR test value: 920.1, p-value: < 2.22e-16
## Asymptotic standard error: 0.01341
##     z-value: 31.24, p-value: < 2.22e-16
## Wald statistic: 975.8, p-value: < 2.22e-16
## 
## Log likelihood: 5166 for lag model
## ML residual variance (sigma squared): 0.002035, (sigma: 0.04511)
## Number of observations: 3107 
## Number of parameters estimated: 10 
## AIC: -10311, (AIC for lm: -9393)
## LM test for residual autocorrelation
## test value: 70.46, p-value: < 2.22e-16


# The AIC is one digit higher, so hard to make a call between fit and
# parsimony. Will try two models with the same addition of the another
# interaction term, one with and one without PROFEMHH. The interaction
# between PROHSLS and PROAUNEM resulted in the next highest overall change
# (in absolute magnitude) in child poverty.  Additional interaction
# without PROFEMHH
povlag_int2r1:lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
    pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM + 
    pov$PROHSLS:pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)

## Error: object 'povlag_int2r1' not found

summary(povlag_int2)  # p-values for all predictors are significant, Rho: 0.40207, AIC: -10410, p-value for LM test for resid autocorrelation: < 2.22e-16

## Error: error in evaluating the argument 'object' in selecting a method for
## function 'summary': Error: object 'povlag_int2' not found


# Additional interaction with PROFEMHH
povlag_int2 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
    pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM + 
    pov$PROHSLS:pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)
summary(povlag_int2)  # p-values for all predictors are significant, Rho: 0.40616, AIC: -10416, p-value for LM test for resid autocorrelation: < 2.22e-16

## 
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
##     pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + 
##     pov$PROFEMHH:pov$PROAUNEM + pov$PROHSLS:pov$PROAUNEM, listw = pov_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2519462 -0.0273324 -0.0043355  0.0217578  0.4524466 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                             Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)               -0.1478067  0.0127634 -11.5805 < 2.2e-16
## pov$PROBLCK                0.0586698  0.0097103   6.0420 1.522e-09
## pov$PROHISP                0.1334341  0.0079267  16.8336 < 2.2e-16
## pov$PROHSLS                0.1304208  0.0177426   7.3507 1.972e-13
## pov$PROWKCO                0.0561714  0.0051297  10.9502 < 2.2e-16
## pov$PROFEMHH               0.0865359  0.0302382   2.8618  0.004212
## pov$PROAUNEM              -2.0405327  0.1898170 -10.7500 < 2.2e-16
## pov$PROMUNDR               0.3964526  0.0179210  22.1222 < 2.2e-16
## pov$PROFEMHH:pov$PROAUNEM  3.8632962  0.2860585  13.5053 < 2.2e-16
## pov$PROHSLS:pov$PROAUNEM   2.7748436  0.2661432  10.4261 < 2.2e-16
## 
## Rho: 0.4062, LR test value: 868.6, p-value: < 2.22e-16
## Asymptotic standard error: 0.01334
##     z-value: 30.45, p-value: < 2.22e-16
## Wald statistic: 927.5, p-value: < 2.22e-16
## 
## Log likelihood: 5220 for lag model
## ML residual variance (sigma squared): 0.001969, (sigma: 0.04437)
## Number of observations: 3107 
## Number of parameters estimated: 12 
## AIC: -10416, (AIC for lm: -9550)
## LM test for residual autocorrelation
## test value: 68.08, p-value: < 2.22e-16


# The 2nd interaction lowers the AIC overall, so keep it. The inclusion of
# the PROFEMHH variable also consistently lowers the AIC, so keep it. Will
# add the interaction term between PROBLCK and PROAUNEM that resulted in
# the 3rd highest overall change (in absolute magnitude).
povlag_int3 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
    pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM + 
    pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM, listw = pov_nbq_w, 
    zero.policy = T)
summary(povlag_int3)  # p-values for all predictors are significant except PROFEMHH, Rho: 0.40934, AIC: -10427, p-value for LM test for resid autocorrelation: 2.3315e-15

## 
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
##     pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + 
##     pov$PROFEMHH:pov$PROAUNEM + pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM, 
##     listw = pov_nbq_w, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2008267 -0.0271235 -0.0040221  0.0211449  0.4494534 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                             Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)               -0.1411917  0.0128801 -10.9620 < 2.2e-16
## pov$PROBLCK                0.1152612  0.0188776   6.1057 1.024e-09
## pov$PROHISP                0.1316647  0.0079344  16.5942 < 2.2e-16
## pov$PROHSLS                0.1271430  0.0177283   7.1717 7.405e-13
## pov$PROWKCO                0.0572919  0.0051292  11.1696 < 2.2e-16
## pov$PROFEMHH               0.0222566  0.0354591   0.6277 0.5302210
## pov$PROAUNEM              -2.1241696  0.1909754 -11.1227 < 2.2e-16
## pov$PROMUNDR               0.3958445  0.0178842  22.1337 < 2.2e-16
## pov$PROFEMHH:pov$PROAUNEM  4.6502927  0.3647565  12.7490 < 2.2e-16
## pov$PROHSLS:pov$PROAUNEM   2.7863313  0.2656661  10.4881 < 2.2e-16
## pov$PROBLCK:pov$PROAUNEM  -0.6597251  0.1887224  -3.4957 0.0004727
## 
## Rho: 0.4093, LR test value: 879.7, p-value: < 2.22e-16
## Asymptotic standard error: 0.01339
##     z-value: 30.56, p-value: < 2.22e-16
## Wald statistic: 934, p-value: < 2.22e-16
## 
## Log likelihood: 5226 for lag model
## ML residual variance (sigma squared): 0.00196, (sigma: 0.04427)
## Number of observations: 3107 
## Number of parameters estimated: 13 
## AIC: -10427, (AIC for lm: -9549)
## LM test for residual autocorrelation
## test value: 62.75, p-value: 2.3315e-15


# The additional term lowers the AIC, so keep it. Try removing PROFEMHH
# again since it's p-value is not significant.
povlag_int3r1 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
    pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM + 
    pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM, listw = pov_nbq_w, 
    zero.policy = T)
summary(povlag_int3r1)  # # p-values for all predictors are significant, Rho: 0.40886, AIC: -10428, p-value for LM test for resid autocorrelation: 8.8818e-15

## 
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
##     pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM + 
##     pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM, listw = pov_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2012355 -0.0270264 -0.0038699  0.0209987  0.4482308 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                             Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)               -0.1392979  0.0125171 -11.1286 < 2.2e-16
## pov$PROBLCK                0.1230821  0.0143511   8.5765 < 2.2e-16
## pov$PROHISP                0.1318116  0.0079368  16.6076 < 2.2e-16
## pov$PROHSLS                0.1274321  0.0177256   7.1892 6.519e-13
## pov$PROWKCO                0.0577547  0.0050890  11.3490 < 2.2e-16
## pov$PROAUNEM              -2.1296776  0.1908409 -11.1594 < 2.2e-16
## pov$PROMUNDR               0.3960316  0.0178874  22.1403 < 2.2e-16
## pov$PROAUNEM:pov$PROFEMHH  4.8245736  0.2332749  20.6819 < 2.2e-16
## pov$PROHSLS:pov$PROAUNEM   2.7688312  0.2638865  10.4925 < 2.2e-16
## pov$PROBLCK:pov$PROAUNEM  -0.7210186  0.1605918  -4.4898 7.130e-06
## 
## Rho: 0.4089, LR test value: 880.6, p-value: < 2.22e-16
## Asymptotic standard error: 0.0134
##     z-value: 30.52, p-value: < 2.22e-16
## Wald statistic: 931.2, p-value: < 2.22e-16
## 
## Log likelihood: 5226 for lag model
## ML residual variance (sigma squared): 0.001961, (sigma: 0.04428)
## Number of observations: 3107 
## Number of parameters estimated: 12 
## AIC: -10428, (AIC for lm: -9550)
## LM test for residual autocorrelation
## test value: 60.14, p-value: 8.8818e-15


# The removal of PROFEMHH resulted in an AIC that is one digit lower, so
# keep it for parsimony and fit. Try adding the last interaction term from
# the employment interaction model (povlag_paunem_int).
povlag_int4r1 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
    pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM + 
    pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM + pov$PROHISP:pov$PROAUNEM, 
    listw = pov_nbq_w, zero.policy = T)
summary(povlag_int4r1)  # # p-values for all predictors are significant, Rho: 0.40332, AIC: -10449, p-value for LM test for resid autocorrelation: 3.042e-13

## 
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
##     pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM + 
##     pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM + pov$PROHISP:pov$PROAUNEM, 
##     listw = pov_nbq_w, zero.policy = T)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.198791 -0.026433 -0.003855  0.020973  0.451232 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                             Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)               -0.1385233  0.0124769 -11.1024 < 2.2e-16
## pov$PROBLCK                0.1298194  0.0143765   9.0300 < 2.2e-16
## pov$PROHISP                0.2155045  0.0190383  11.3195 < 2.2e-16
## pov$PROHSLS                0.1197528  0.0177349   6.7524 1.455e-11
## pov$PROWKCO                0.0571659  0.0050768  11.2602 < 2.2e-16
## pov$PROAUNEM              -2.1976671  0.1906772 -11.5256 < 2.2e-16
## pov$PROMUNDR               0.4051790  0.0179033  22.6316 < 2.2e-16
## pov$PROAUNEM:pov$PROFEMHH  4.8029827  0.2326680  20.6431 < 2.2e-16
## pov$PROHSLS:pov$PROAUNEM   2.9369203  0.2651546  11.0763 < 2.2e-16
## pov$PROBLCK:pov$PROAUNEM  -0.7867199  0.1607577  -4.8938 9.889e-07
## pov$PROHISP:pov$PROAUNEM  -0.8416600  0.1745634  -4.8215 1.425e-06
## 
## Rho: 0.4033, LR test value: 857.2, p-value: < 2.22e-16
## Asymptotic standard error: 0.01341
##     z-value: 30.08, p-value: < 2.22e-16
## Wald statistic: 904.5, p-value: < 2.22e-16
## 
## Log likelihood: 5238 for lag model
## ML residual variance (sigma squared): 0.001948, (sigma: 0.04414)
## Number of observations: 3107 
## Number of parameters estimated: 13 
## AIC: -10449, (AIC for lm: -9594)
## LM test for residual autocorrelation
## test value: 53.18, p-value: 3.042e-13


# Just for kicks, since the inclusion of PROFEMHH has been on the fence,
# add it back in to see how it affects the AIC.
povlag_int4 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
    pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM + 
    pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM + pov$PROHISP:pov$PROAUNEM, 
    listw = pov_nbq_w, zero.policy = T)
summary(povlag_int4)  # # p-values for all predictors are significant, Rho: 0.40362, AIC: -10447, p-value for LM test for resid autocorrelation: 1.4133e-13

## 
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + 
##     pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + 
##     pov$PROFEMHH:pov$PROAUNEM + pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM + 
##     pov$PROHISP:pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.1985629 -0.0265292 -0.0039071  0.0210961  0.4519332 
## 
## Type: lag 
## Regions with no neighbours included:
##  35 689 709 880 
## Coefficients: (asymptotic standard errors) 
##                             Estimate Std. Error  z value  Pr(>|z|)
## (Intercept)               -0.1396315  0.0128418 -10.8732 < 2.2e-16
## pov$PROBLCK                0.1252276  0.0189231   6.6177 3.648e-11
## pov$PROHISP                0.2150670  0.0190539  11.2873 < 2.2e-16
## pov$PROHSLS                0.1196164  0.0177369   6.7439 1.542e-11
## pov$PROWKCO                0.0568983  0.0051166  11.1204 < 2.2e-16
## pov$PROFEMHH               0.0129866  0.0353863   0.3670    0.7136
## pov$PROAUNEM              -2.1941670  0.1908506 -11.4968 < 2.2e-16
## pov$PROMUNDR               0.4050312  0.0179018  22.6252 < 2.2e-16
## pov$PROFEMHH:pov$PROAUNEM  4.7013822  0.3638585  12.9209 < 2.2e-16
## pov$PROHSLS:pov$PROAUNEM   2.9464238  0.2667677  11.0449 < 2.2e-16
## pov$PROBLCK:pov$PROAUNEM  -0.7506799  0.1891346  -3.9690 7.217e-05
## pov$PROHISP:pov$PROAUNEM  -0.8381232  0.1747459  -4.7962 1.617e-06
## 
## Rho: 0.4036, LR test value: 855.1, p-value: < 2.22e-16
## Asymptotic standard error: 0.01341
##     z-value: 30.1, p-value: < 2.22e-16
## Wald statistic: 905.9, p-value: < 2.22e-16
## 
## Log likelihood: 5238 for lag model
## ML residual variance (sigma squared): 0.001948, (sigma: 0.04413)
## Number of observations: 3107 
## Number of parameters estimated: 14 
## AIC: -10447, (AIC for lm: -9594)
## LM test for residual autocorrelation
## test value: 54.69, p-value: 1.4133e-13

# The AIC with PROFEMHH is lower, so povlag_int4r1 appears as the best
# predictio model.

Best Prediction Model

# Look at residuals
pov$resid_plagint4r1 <- residuals(povlag_int4r1)
moran.test(pov$resid_plagint4r1, listw = pov_nbq_w, zero.policy = T)  # Moran's I = 0.0655996436, p-value = 4.604e-10 => the spatial autocovariance in the residuals is still significant, but close to 0, and quite a bit less than the full lag model (Moran's I = 0.1203) and its OLS counterpart (Moran's I = 0.4160), indicating that this model is reducing model misspecification

## 
##  Moran's I test under randomisation
## 
## data:  pov$resid_plagint4r1  
## weights: pov_nbq_w  
##  
## Moran I statistic standard deviate = 6.123, p-value = 4.604e-10
## alternative hypothesis: greater 
## sample estimates:
## Moran I statistic       Expectation          Variance 
##         0.0655996        -0.0003224         0.0001159

# Use B-P test to see if there is heteroscedacity in the residuals
bptest.sarlm(povlag_int4r1)  # BP = 89.3419, df = 10, p-value = 7.216e-15

## 
##  studentized Breusch-Pagan test
## 
## data:  
## BP = 89.34, df = 10, p-value = 7.216e-15


# Examine spatial structure via predictors' direct, indirect, and overall
# average impacts
impacts(povlag_int4r1, listw = pov_nbq_w)  # total - problck: 0.21745794; prohisp:0.36098729, prohsls: 0.20059557; prowkco: 0.09575737; proaunem: -3.68126815; promundr: 0.67870720, profemhh:proaunem: 8.04538002; prohsls:proaunem: 4.91957631; problck:proaunem: -1.31781872; prohisp:proaunem: -1.40984781

## Impact measures (lag, exact):
##                             Direct Indirect    Total
## pov$PROBLCK                0.13428  0.08318  0.21746
## pov$PROHISP                0.22291  0.13808  0.36099
## pov$PROHSLS                0.12387  0.07673  0.20060
## pov$PROWKCO                0.05913  0.03663  0.09576
## pov$PROAUNEM              -2.27316 -1.40810 -3.68127
## pov$PROMUNDR               0.41910  0.25961  0.67871
## pov$PROAUNEM:pov$PROFEMHH  4.96798  3.07740  8.04538
## pov$PROHSLS:pov$PROAUNEM   3.03781  1.88176  4.91958
## pov$PROBLCK:pov$PROAUNEM  -0.81375 -0.50407 -1.31782
## pov$PROHISP:pov$PROAUNEM  -0.87057 -0.53927 -1.40985


# Examine spatial structure via a change in a predictor Looking at the
# contribution of predictors, the single predictor that most affects the
# overall average change in PROPOV is PROAUNEM, so will make a change in
# this variable to see how it affects child poverty Copy original data
# frame
pov.new <- pov
# Look at the structure of the data frame
str(pov.new@data)

## 'data.frame':    3107 obs. of  25 variables:
##  $ POLY_ID         : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ NAME            : Factor w/ 1801 levels "Abbeville","Acadia",..: 902 564 1542 1180 1242 177 948 569 628 1615 ...
##  $ FIPS            : Factor w/ 3107 levels "01001","01003",..: 1317 2928 2951 2942 2944 526 1589 1577 1580 1613 ...
##  $ MET             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PROPOV          : num  0.117 0.305 0.229 0.286 0.283 ...
##  $ WGHT            : num  118 414 1690 1908 530 ...
##  $ PROBLCK         : num  0 0.00111 0.00288 0.00087 0.00269 ...
##  $ PROHISP         : num  0.00491 0.01716 0.0168 0.08414 0.01155 ...
##  $ SOUTH           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PROHSLS         : num  0.615 0.643 0.59 0.628 0.613 ...
##  $ PROWKCO         : num  0.77 0.842 0.773 0.93 0.685 ...
##  $ PROFEMHH        : num  0.116 0.163 0.153 0.186 0.177 ...
##  $ PROEXTR         : num  0.103 0.32 0.159 0.258 0.163 ...
##  $ PRONMAN         : num  0.07947 0.00915 0.01476 0.01269 0.05104 ...
##  $ PROMSERV        : num  0.1211 0.0466 0.0773 0.0688 0.0602 ...
##  $ PROPSERV        : num  0.253 0.338 0.305 0.283 0.26 ...
##  $ PROAUNEM        : num  0.0389 0.1681 0.107 0.102 0.1499 ...
##  $ PROMUNDR        : num  0.189 0.296 0.265 0.266 0.269 ...
##  $ XCOORD          : num  -94.9 -118.5 -117.9 -119.9 -117.3 ...
##  $ YCOORD          : num  48.9 48.4 48.4 48.5 48.5 ...
##  $ X2              : num  9003 14038 13903 14366 13767 ...
##  $ Y2              : num  2388 2344 2342 2349 2354 ...
##  $ lmresid         : num  -0.00376 -0.0144 0.00629 0.01977 0.0162 ...
##  $ lagresid        : num  -0.02241 0.00766 -0.00206 0.0338 0.04153 ...
##  $ resid_plagint4r1: num  -0.0248 0.0241 0.0192 0.0416 0.059 ...
##  - attr(*, "data_types")= chr  "N" "C" "C" "N" ...

# Look at unemployment rate in Keith County, NE (relatively sparsely
# populated county in the middle of the U.S.)
pov.new@data[grepl("Keith", pov.new@data$NAME), c("NAME", "PROAUNEM")]

##      NAME PROAUNEM
## 710 Keith   0.0308

# Change the unemployment rate in Keith County, NE from 3% to 70%
pov.new@data[pov.new@data$NAME == "Keith", "PROAUNEM"] <- 0.7
# The original predicted values
orig.pred <- as.data.frame(predict(povlag_int4r1))
# The predicted values with the new unemployment rate in Nebraska
new.pred <- as.data.frame(predict(povlag_int4r1, newdata = pov.new, listw = pov_nbq_w, 
    zero.policy = T))
# The difference between the predicted values
kCoNE_effect <- new.pred$fit - orig.pred$fit
abc <- data.frame(name = pov$NAME, diff_in_pred_PROPOV = kCoNE_effect)
pov.new$KCONECHG <- abc$diff_in_pred_PROPOV
# Sort counties by absolute value of the change in predicted PROPOV
abc <- abc[rev(order(abs(abc$diff_in_pred_PROPOV))), ]
abc[1:10, ]  # Show the top 10 counties

##          name diff_in_pred_PROPOV
## 459      Todd            -0.07961
## 582    Hooker            -0.07849
## 632     Logan            -0.07606
## 584    Thomas            -0.07545
## 435   Gregory            -0.05978
## 375     Jones            -0.05958
## 420     Tripp            -0.05770
## 517     Brown            -0.05762
## 2181     Hall            -0.05401
## 399  Mellette            -0.05279


# Map Keith County cascade effects
pal5 <- brewer.pal(6, "BrBG")
cats5 <- classIntervals(pov.new$KCONECHG, n = 5, style = "jenks")
colors5 <- findColours(cats5, pal5)
plot(pov.new, col = colors5)
legend("bottomleft", legend = round(cats5$brks, 2), fill = pal5, bty = "n", 
    cex = 0.7)
mtext("Effects of a change in Keith County, NE (set PROAUNEM=.70)\n on predicted values in a spatial lag model", 
    side = 3, line = 1)

plot of chunk unnamed-chunk-11