library(classInt)
## Loading required package: class
## Loading required package: e1071
library(spdep)
## Loading required package: sp
## Loading required package: boot
## Loading required package: Matrix
## Loading required package: lattice
## Attaching package: 'lattice'
## The following object(s) are masked from 'package:boot':
##
## melanoma
## Loading required package: MASS
## Loading required package: nlme
## Loading required package: maptools
## Loading required package: foreign
## Loading required package: grid
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
## Loading required package: deldir
## deldir 0.0-21
## Loading required package: coda
## Loading required package: splines
library(RColorBrewer)
library(gstat)
library(maptools)
# Load shapefiles
pov <- readShapePoly("/Users/xiwang/Dropbox/GEOG 5023 - offline/Labs/Lab 3/poverty/poverty.shp")
class(pov)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
plot(pov, axes = T)
plot(variogram(pov$PROPOV ~ 1, locations = coordinates(pov), data = pov, cloud = F),
type = "b", main = "Variogram of PROPOV") # The variable shows significant spatial autocorrelation (range is approximately at d=13); the nugget effect is also substantial (d=1)
# Queen's contiguity
coords <- coordinates(pov)
pov_nbq <- poly2nb(pov)
plot(pov)
plot(pov_nbq, coords, add = T)
# 2 nearest neighbors
pov_kn2 <- knn2nb(knearneigh(coords, k = 2)) # closest 2 neighbors
plot(pov)
plot(pov_kn2, coords, add = T)
# Queen's contiguity makes the most sense in this context, since a
# county's child poverty rate can be affected by all adjoining counties.
# Row standardization of Queen's contiguity
pov_nbq_w <- nb2listw(pov_nbq, zero.policy = T)
# Moran's I
moran_PROPOV <- moran.test(pov$PROPOV, pov_nbq_w, zero.policy = T)
moran_PROPOV
##
## Moran's I test under randomisation
##
## data: pov$PROPOV
## weights: pov_nbq_w
##
## Moran I statistic standard deviate = 57.52, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.6194394 -0.0003224 0.0001161
moran_PROPOV_sig <- moran.mc(pov$PROPOV, listw = pov_nbq_w, zero.policy = T,
nsim = 999) # The null hypothesis is that autocorrelation is random
moran_PROPOV_sig # Moran's I = 0.6202, observed rank = 1000, p-value = 0.001, the PROPOV variable shows that spatial auto covariance is significant,therefore it is not due to chance
##
## Monte-Carlo simulation of Moran's I
##
## data: pov$PROPOV
## weights: pov_nbq_w
## number of simulations + 1: 1000
##
## statistic = 0.6202, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
The variogram and Moran's I indicate that there is spatial autocorrelation in childhood poverty (PROPOV), but this spatial pattern can be caused by the spatial patterning of the covariates of poverty. A full analysis involves fitting an OLS model and comparing that model to spatial models.
# Fit the regression models with all parameters of interest, including
# those that deal with racial composition, family composition,
# county-level education, county-level employment--the structural
# components of poverty.
# Run standard OLS model
pov_lm <- lm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + pov$PROWKCO +
pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR)
summary(pov_lm)
##
## Call:
## lm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
## pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2119 -0.0349 -0.0055 0.0283 0.5196
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.33493 0.00951 -35.23 < 2e-16 ***
## pov$PROBLCK 0.19209 0.01112 17.28 < 2e-16 ***
## pov$PROHISP 0.20029 0.00922 21.73 < 2e-16 ***
## pov$PROHSLS 0.37548 0.01022 36.74 < 2e-16 ***
## pov$PROWKCO 0.08579 0.00621 13.83 < 2e-16 ***
## pov$PROFEMHH 0.21378 0.02759 7.75 1.3e-14 ***
## pov$PROAUNEM 0.86978 0.04384 19.84 < 2e-16 ***
## pov$PROMUNDR 0.52760 0.02165 24.37 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0546 on 3099 degrees of freedom
## Multiple R-squared: 0.726, Adjusted R-squared: 0.726
## F-statistic: 1.18e+03 on 7 and 3099 DF, p-value: <2e-16
# Look at residuals
par(mfrow = c(2, 2))
plot(pov_lm) # The residuals don't have a normal distrution around 0 and don't appear to have a mean of 0; the model appears to systematically underpredict the data
pov$lmresid <- residuals(pov_lm)
lm.morantest(pov_lm, pov_nbq_w, zero.policy = T) # Moran's I = 0.4160, p-value = 2.2e-16 => the spatial autocovariance in the residuals is significant
##
## Global Moran's I for regression residuals
##
## data:
## model: lm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP +
## pov$PROHSLS + pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM +
## pov$PROMUNDR)
## weights: pov_nbq_w
##
## Moran I statistic standard deviate = 38.83, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran's I Expectation Variance
## 0.4159612 -0.0013925 0.0001155
# Compare against spatial lag model
pov_lag <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS + pov$PROWKCO +
pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR, listw = pov_nbq_w, zero.policy = T)
summary(pov_lag) # p-value for all predictors are significant, Rho: 0.4224, AIC: -10115, p-value for LM test for resid autocorrelation: < 2.22e-16
##
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
## pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR,
## listw = pov_nbq_w, zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1890572 -0.0290433 -0.0052518 0.0221836 0.4997118
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2957657 0.0082125 -36.0139 < 2.2e-16
## pov$PROBLCK 0.0740629 0.0100108 7.3983 1.379e-13
## pov$PROHISP 0.1230418 0.0082666 14.8842 < 2.2e-16
## pov$PROHSLS 0.2854499 0.0093095 30.6621 < 2.2e-16
## pov$PROWKCO 0.0526405 0.0053097 9.9139 < 2.2e-16
## pov$PROFEMHH 0.3330523 0.0235532 14.1404 < 2.2e-16
## pov$PROAUNEM 0.5470438 0.0391371 13.9776 < 2.2e-16
## pov$PROMUNDR 0.4021852 0.0187461 21.4543 < 2.2e-16
##
## Rho: 0.4224, LR test value: 876.5, p-value: < 2.22e-16
## Asymptotic standard error: 0.01355
## z-value: 31.16, p-value: < 2.22e-16
## Wald statistic: 971, p-value: < 2.22e-16
##
## Log likelihood: 5067 for lag model
## ML residual variance (sigma squared): 0.002166, (sigma: 0.04654)
## Number of observations: 3107
## Number of parameters estimated: 10
## AIC: -10115, (AIC for lm: -9240)
## LM test for residual autocorrelation
## test value: 182.8, p-value: < 2.22e-16
# Look at residuals
pov$lagresid <- residuals(pov_lag)
moran.test(pov$lagresid, listw = pov_nbq_w, zero.policy = T) # Moran's I = 0.1203, p-value < 2.2e-16 => the spatial autocovariance in the residuals is still significant, but the degree is quite a bit less than the OLS model
##
## Moran's I test under randomisation
##
## data: pov$lagresid
## weights: pov_nbq_w
##
## Moran I statistic standard deviate = 11.2, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.1202535 -0.0003224 0.0001159
The Moran's I test on the residuals indicate that there is spatial lag in our variable of interest, so the OLS model will be biased and inefficient.
The results of the lag model indicate that each of the predictors and the spatial lag are significant. The output includes a z-test based on the standard error of Moran's I 0.4224/0.013555 = 31.161. Furthermore, the AIC for the lag model (-10115) is lower than its OLS counterpart (-9240.4), and the LR test indicates there is significant difference between the two models due to the spatial term. Proceed with spatial regression analysis.
# Proportion of persons employed in the county they live in
povlag_pwkco <- lagsarlm(pov$PROPOV ~ pov$PROWKCO, listw = pov_nbq_w, zero.policy = T)
summary(povlag_pwkco) # p-value for predictor: 2.068e-08, Rho: 0.77559, AIC: -7437.5, p-value for LM test for resid autocorrelation: < 2.22e-16
##
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROWKCO, listw = pov_nbq_w,
## zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.211545 -0.043946 -0.010089 0.032930 0.462267
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0198214 0.0056991 3.4780 0.0005051
## pov$PROWKCO 0.0395218 0.0070496 5.6062 2.068e-08
##
## Rho: 0.7756, LR test value: 2162, p-value: < 2.22e-16
## Asymptotic standard error: 0.01352
## z-value: 57.38, p-value: < 2.22e-16
## Wald statistic: 3293, p-value: < 2.22e-16
##
## Log likelihood: 3723 for lag model
## ML residual variance (sigma squared): 0.004598, (sigma: 0.06781)
## Number of observations: 3107
## Number of parameters estimated: 4
## AIC: -7438, (AIC for lm: -5277)
## LM test for residual autocorrelation
## test value: 150.6, p-value: < 2.22e-16
impacts(povlag_pwkco, listw = pov_nbq_w) # direct: 0.0475, indirect: 0.1284, total: 0.1759
## Impact measures (lag, exact):
## Direct Indirect Total
## pov$PROWKCO 0.04751 0.1284 0.1759
# Proportion of workers aged 16+ who are unemployed
povlag_paunem <- lagsarlm(pov$PROPOV ~ pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)
summary(povlag_paunem) # p-value for predictor: < 2.2e-16, Rho: 0.62739, AIC: -8520.3, p-value for LM test for resid autocorrelation: 1.5427e-11
##
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROAUNEM, listw = pov_nbq_w,
## zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2346634 -0.0385288 -0.0065934 0.0312486 0.4758167
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0102097 0.0027982 -3.6487 0.0002636
## pov$PROAUNEM 1.3546656 0.0432994 31.2860 < 2.2e-16
##
## Rho: 0.6274, LR test value: 1589, p-value: < 2.22e-16
## Asymptotic standard error: 0.01387
## z-value: 45.25, p-value: < 2.22e-16
## Wald statistic: 2047, p-value: < 2.22e-16
##
## Log likelihood: 4264 for lag model
## ML residual variance (sigma squared): 0.003452, (sigma: 0.05875)
## Number of observations: 3107
## Number of parameters estimated: 4
## AIC: -8520, (AIC for lm: -6934)
## LM test for residual autocorrelation
## test value: 45.48, p-value: 1.5427e-11
impacts(povlag_paunem, listw = pov_nbq_w) # direct: 1.4952, indirect: 2.1375, total: 3.6327
## Impact measures (lag, exact):
## Direct Indirect Total
## pov$PROAUNEM 1.495 2.137 3.633
# Proportion of male workers aged 16+ who are underemployed
povlag_pmundr <- lagsarlm(pov$PROPOV ~ pov$PROMUNDR, listw = pov_nbq_w, zero.policy = T)
summary(povlag_pmundr) # p-value for predictor: < 2.2e-16, Rho: 0.73454, AIC: -7831.9, p-value for LM test for resid autocorrelation: < 2.22e-16
##
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROMUNDR, listw = pov_nbq_w,
## zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.291661 -0.040326 -0.005151 0.032460 0.407659
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0601893 0.0056757 -10.605 < 2.2e-16
## pov$PROMUNDR 0.4816380 0.0230648 20.882 < 2.2e-16
##
## Rho: 0.7345, LR test value: 2033, p-value: < 2.22e-16
## Asymptotic standard error: 0.01362
## z-value: 53.93, p-value: < 2.22e-16
## Wald statistic: 2908, p-value: < 2.22e-16
##
## Log likelihood: 3920 for lag model
## ML residual variance (sigma squared): 0.004132, (sigma: 0.06428)
## Number of observations: 3107
## Number of parameters estimated: 4
## AIC: -7832, (AIC for lm: -5801)
## LM test for residual autocorrelation
## test value: 83.49, p-value: < 2.22e-16
impacts(povlag_pmundr, listw = pov_nbq_w) # direct: 0.5623, indirect: 1.2504, total: 1.8126
## Impact measures (lag, exact):
## Direct Indirect Total
## pov$PROMUNDR 0.5623 1.25 1.813
# Racial Composition Proportion non-Hispanic black
povlag_pblck <- lagsarlm(pov$PROPOV ~ pov$PROBLCK, listw = pov_nbq_w, zero.policy = T)
summary(povlag_pblck) # p-value for predictor: < 2.2e-16, Rho: 0.71985, AIC: -7723.5, p-value for LM test for resid autocorrelation: 0.92641
##
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK, listw = pov_nbq_w,
## zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2160498 -0.0417652 -0.0065088 0.0334262 0.4739797
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0467795 0.0029247 15.995 < 2.2e-16
## pov$PROBLCK 0.1559274 0.0103291 15.096 < 2.2e-16
##
## Rho: 0.7198, LR test value: 1846, p-value: < 2.22e-16
## Asymptotic standard error: 0.01455
## z-value: 49.47, p-value: < 2.22e-16
## Wald statistic: 2448, p-value: < 2.22e-16
##
## Log likelihood: 3866 for lag model
## ML residual variance (sigma squared): 0.004307, (sigma: 0.06563)
## Number of observations: 3107
## Number of parameters estimated: 4
## AIC: -7724, (AIC for lm: -5879)
## LM test for residual autocorrelation
## test value: 0.008531, p-value: 0.92641
impacts(povlag_pblck, listw = pov_nbq_w) # direct: 0.1804, indirect: 0.3757, total: 0.5561
## Impact measures (lag, exact):
## Direct Indirect Total
## pov$PROBLCK 0.1804 0.3757 0.5561
# Proportion Hispanic
povlag_phisp <- lagsarlm(pov$PROPOV ~ pov$PROHISP, listw = pov_nbq_w, zero.policy = T)
summary(povlag_phisp) # p-value for predictor: 6.661e-16, Rho: 0.76003, AIC: -7479.1, p-value for LM test for resid autocorrelation: < 2.22e-16
##
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROHISP, listw = pov_nbq_w,
## zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2132604 -0.0435487 -0.0074707 0.0333935 0.4655743
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.047391 0.003105 15.2627 < 2.2e-16
## pov$PROHISP 0.096385 0.011910 8.0927 6.661e-16
##
## Rho: 0.76, LR test value: 2032, p-value: < 2.22e-16
## Asymptotic standard error: 0.01406
## z-value: 54.07, p-value: < 2.22e-16
## Wald statistic: 2924, p-value: < 2.22e-16
##
## Log likelihood: 3744 for lag model
## ML residual variance (sigma squared): 0.004573, (sigma: 0.06763)
## Number of observations: 3107
## Number of parameters estimated: 4
## AIC: -7479, (AIC for lm: -5450)
## LM test for residual autocorrelation
## test value: 97.77, p-value: < 2.22e-16
impacts(povlag_phisp, listw = pov_nbq_w) # direct: 0.1145, indirect: 0.2867, total: 0.4013
## Impact measures (lag, exact):
## Direct Indirect Total
## pov$PROHISP 0.1145 0.2867 0.4013
# Family Composition Proportion female-headed families with children
povlag_pfemhh <- lagsarlm(pov$PROPOV ~ pov$PROFEMHH, listw = pov_nbq_w, zero.policy = T)
summary(povlag_pfemhh) # p-value for predictor: < 2.2e-16, Rho: 0.70847, AIC: -8269.9, p-value for LM test for resid autocorrelation: 3.3551e-05
##
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROFEMHH, listw = pov_nbq_w,
## zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2226634 -0.0396259 -0.0069339 0.0313656 0.4450728
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0233532 0.0030557 -7.6425 2.132e-14
## pov$PROFEMHH 0.5587807 0.0202177 27.6383 < 2.2e-16
##
## Rho: 0.7085, LR test value: 2050, p-value: < 2.22e-16
## Asymptotic standard error: 0.01237
## z-value: 57.28, p-value: < 2.22e-16
## Wald statistic: 3280, p-value: < 2.22e-16
##
## Log likelihood: 4139 for lag model
## ML residual variance (sigma squared): 0.00363, (sigma: 0.06025)
## Number of observations: 3107
## Number of parameters estimated: 4
## AIC: -8270, (AIC for lm: -6222)
## LM test for residual autocorrelation
## test value: 17.2, p-value: 3.3551e-05
impacts(povlag_pfemhh, listw = pov_nbq_w) # direct: 0.6420, indirect: 1.2730, total: 1.9149
## Impact measures (lag, exact):
## Direct Indirect Total
## pov$PROFEMHH 0.642 1.273 1.915
# Education Level Proportion with a high school education or less
povlag_phsls <- lagsarlm(pov$PROPOV ~ pov$PROHSLS, listw = pov_nbq_w, zero.policy = T)
summary(povlag_phsls) # p-value for predictor: < 2.2e-16, Rho: 0.70362, AIC: -7885.7, p-value for LM test for resid autocorrelation: 4.5994e-07
##
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROHSLS, listw = pov_nbq_w,
## zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2058386 -0.0416047 -0.0084195 0.0312799 0.4236786
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0952330 0.0067933 -14.019 < 2.2e-16
## pov$PROHSLS 0.2462637 0.0115701 21.284 < 2.2e-16
##
## Rho: 0.7036, LR test value: 1815, p-value: < 2.22e-16
## Asymptotic standard error: 0.01401
## z-value: 50.21, p-value: < 2.22e-16
## Wald statistic: 2521, p-value: < 2.22e-16
##
## Log likelihood: 3947 for lag model
## ML residual variance (sigma squared): 0.004116, (sigma: 0.06416)
## Number of observations: 3107
## Number of parameters estimated: 4
## AIC: -7886, (AIC for lm: -6072)
## LM test for residual autocorrelation
## test value: 25.43, p-value: 4.5994e-07
impacts(povlag_phsls, listw = pov_nbq_w) # direct: 0.2821, indirect: 0.5480, total: 0.8302
## Impact measures (lag, exact):
## Direct Indirect Total
## pov$PROHSLS 0.2821 0.548 0.8302
The summary of the full lag model (pov_lag) shows that all the included predictors contribute significantly. The model AIC is -10115, and the p-value for LM test for resid autocorrelation: < 2.22e-16. Though the AIC is low compared to univariate models, the model complexity motivates exploration of other multivariate spatial models. As well, the presence of spatial autocorrelation in the residuals indicates the possibility of model misspecification.
# Look at relative contribution of variables
impacts(pov_lag, listw = pov_nbq_w) # total - problck: 0.12815509; prohisp:0.21290593, prohsls: 0.49392951; prowkco: 0.09108666; profemhh: 0.57629861; proaunem: 0.94657964; promundr: 0.69592303
## Impact measures (lag, exact):
## Direct Indirect Total
## pov$PROBLCK 0.07689 0.05126 0.12816
## pov$PROHISP 0.12775 0.08516 0.21291
## pov$PROHSLS 0.29636 0.19756 0.49393
## pov$PROWKCO 0.05465 0.03643 0.09109
## pov$PROFEMHH 0.34579 0.23051 0.57630
## pov$PROAUNEM 0.56796 0.37862 0.94658
## pov$PROMUNDR 0.41756 0.27836 0.69592
# Use B-P test to see if there is heteroscedacity in the residuals
bptest.sarlm(pov_lag) # BP = 58.0829, df = 7, p-value = 3.64e-10
##
## studentized Breusch-Pagan test
##
## data:
## BP = 58.08, df = 7, p-value = 3.64e-10
# Looking at the three measures of employment--PROWKCO, PROAUNEM, and
# PROMUNDR--PROAUNEM is better than the other two measures at predicting
# PROPOV. It's model AIC is the lowest (indicating the best fit of the
# three) and a unit of change results in the highest average, overall
# change in child poverty (increase of 3.63). Using this measure also
# makes sense because it is the most direct measure of general employment
# levels of county residents. So I will interact the other factors with
# this measure of employment.
povlag_paunem_int <- lagsarlm(pov$PROPOV ~ pov$PROBLCK * pov$PROAUNEM + pov$PROHISP *
pov$PROAUNEM + pov$PROFEMHH * pov$PROAUNEM + pov$PROHSLS * pov$PROAUNEM,
listw = pov_nbq_w, zero.policy = T)
summary(povlag_paunem_int) # only p-value for predictor pov$PROBLCK is not significant , Rho: 0.49285, AIC: -9800.3, p-value for LM test for resid autocorrelation: < 2.22e-16
##
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK * pov$PROAUNEM +
## pov$PROHISP * pov$PROAUNEM + pov$PROFEMHH * pov$PROAUNEM +
## pov$PROHSLS * pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2186812 -0.0315493 -0.0049033 0.0256617 0.4543385
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.025321 0.013264 -1.9090 0.056268
## pov$PROBLCK 0.037217 0.020365 1.8275 0.067626
## pov$PROAUNEM -1.559133 0.208514 -7.4774 7.594e-14
## pov$PROHISP 0.170382 0.020823 8.1824 2.220e-16
## pov$PROFEMHH 0.103479 0.038796 2.6673 0.007647
## pov$PROHSLS 0.098882 0.019417 5.0924 3.535e-07
## pov$PROBLCK:pov$PROAUNEM -0.561492 0.207573 -2.7050 0.006830
## pov$PROAUNEM:pov$PROHISP -0.426794 0.191461 -2.2291 0.025805
## pov$PROAUNEM:pov$PROFEMHH 5.020064 0.401243 12.5113 < 2.2e-16
## pov$PROAUNEM:pov$PROHSLS 2.192811 0.291402 7.5250 5.262e-14
##
## Rho: 0.4929, LR test value: 1132, p-value: < 2.22e-16
## Asymptotic standard error: 0.01387
## z-value: 35.55, p-value: < 2.22e-16
## Wald statistic: 1264, p-value: < 2.22e-16
##
## Log likelihood: 4912 for lag model
## ML residual variance (sigma squared): 0.00236, (sigma: 0.04858)
## Number of observations: 3107
## Number of parameters estimated: 12
## AIC: -9800, (AIC for lm: -8670)
## LM test for residual autocorrelation
## test value: 113, p-value: < 2.22e-16
impacts(povlag_paunem_int, listw = pov_nbq_w) # total - problck: 0.07333926; paunem: -3.07238282; phisp: 0.33575055; pfemhh: 0.20391222; phsls: 0.19485311; pblck:paunem: -1.10645967; phisp:paunem: -0.84102726; pfemhh:paunem: 9.89239443; phsls:paunem: 4.32109041
## Impact measures (lag, exact):
## Direct Indirect Total
## pov$PROBLCK 0.03927 0.03406 0.07334
## pov$PROAUNEM -1.64532 -1.42706 -3.07238
## pov$PROHISP 0.17980 0.15595 0.33575
## pov$PROFEMHH 0.10920 0.09471 0.20391
## pov$PROHSLS 0.10435 0.09051 0.19485
## pov$PROBLCK:pov$PROAUNEM -0.59253 -0.51393 -1.10646
## pov$PROAUNEM:pov$PROHISP -0.45039 -0.39064 -0.84103
## pov$PROAUNEM:pov$PROFEMHH 5.29757 4.59483 9.89239
## pov$PROAUNEM:pov$PROHSLS 2.31403 2.00706 4.32109
# The effects of race (both black and hispanic) on employment decreases
# child poverty; the effects of female-headed households with children
# dramatically increases child poverty; the effects of education level
# also substantially increases child poverty
# Reducing the Full Model To try to reduce model complexity from the full
# model (pov_lag), remove variables PROBLCK, PROHISP, and PROWKCO since
# their univariate models had the 3 highest AICs. As well in both the
# univariate and multivariate models, these variables had the least
# average, total impacts on child poverty per unit change.
povlag_r1 <- lagsarlm(pov$PROPOV ~ pov$PROHSLS + pov$PROFEMHH + pov$PROAUNEM +
pov$PROMUNDR, listw = pov_nbq_w, zero.policy = T)
summary(povlag_r1) # p-values for all predictors are significant , Rho: 0.50543, AIC: -9789.7, p-value for LM test for resid autocorrelation: < 2.22e-16
##
## Call:lagsarlm(formula = pov$PROPOV ~ pov$PROHSLS + pov$PROFEMHH +
## pov$PROAUNEM + pov$PROMUNDR, listw = pov_nbq_w, zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2089551 -0.0303175 -0.0064557 0.0234504 0.4989275
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2614752 0.0076461 -34.197 < 2.2e-16
## pov$PROHSLS 0.2563777 0.0095067 26.968 < 2.2e-16
## pov$PROFEMHH 0.4427462 0.0165534 26.747 < 2.2e-16
## pov$PROAUNEM 0.5147227 0.0391130 13.160 < 2.2e-16
## pov$PROMUNDR 0.4092532 0.0189675 21.576 < 2.2e-16
##
## Rho: 0.5054, LR test value: 1323, p-value: < 2.22e-16
## Asymptotic standard error: 0.0128
## z-value: 39.49, p-value: < 2.22e-16
## Wald statistic: 1559, p-value: < 2.22e-16
##
## Log likelihood: 4902 for lag model
## ML residual variance (sigma squared): 0.002369, (sigma: 0.04867)
## Number of observations: 3107
## Number of parameters estimated: 7
## AIC: -9790, (AIC for lm: -8468)
## LM test for residual autocorrelation
## test value: 233.1, p-value: < 2.22e-16
# The AIC for the reduced model povlag_r1 is higher than the full model,
# and since AIC accounts for model complexity, try reducing the full model
# by only one variable. Remove only PROWKCO since it had the lowest AIC
# and the least average, total impact on child poverty per unit change.
povlag_r2 <- pov_lag <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR, listw = pov_nbq_w, zero.policy = T)
summary(povlag_r2) # p-values for all predictors are significant , Rho: 0.4446, AIC: -10022, p-value for LM test for resid autocorrelation: < 2.22e-16
##
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
## pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR, listw = pov_nbq_w,
## zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2085243 -0.0292183 -0.0058704 0.0226068 0.5015325
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2632126 0.0076277 -34.5074 < 2.2e-16
## pov$PROBLCK 0.0468098 0.0098170 4.7683 1.858e-06
## pov$PROHISP 0.1285531 0.0083371 15.4194 < 2.2e-16
## pov$PROHSLS 0.2714768 0.0093173 29.1368 < 2.2e-16
## pov$PROFEMHH 0.3884725 0.0232611 16.7005 < 2.2e-16
## pov$PROAUNEM 0.4949083 0.0393869 12.5653 < 2.2e-16
## pov$PROMUNDR 0.4296969 0.0186474 23.0433 < 2.2e-16
##
## Rho: 0.4446, LR test value: 967.9, p-value: < 2.22e-16
## Asymptotic standard error: 0.01351
## z-value: 32.92, p-value: < 2.22e-16
## Wald statistic: 1084, p-value: < 2.22e-16
##
## Log likelihood: 5020 for lag model
## ML residual variance (sigma squared): 0.002224, (sigma: 0.04716)
## Number of observations: 3107
## Number of parameters estimated: 9
## AIC: -10022, (AIC for lm: -9056)
## LM test for residual autocorrelation
## test value: 236.7, p-value: < 2.22e-16
# Though the AIC for povlag_r2 is lower than for povlag_r1, it's still not
# as low as for the full model, so keep all variables
# Adding Interaction Terms to Full Model (pov_lag) From the above
# interaction model, the interaction between PROFEMHH and PROAUNEM
# resulted in the highest average, overall change (in absolute magnitude)
# in child poverty. So will try adding it.
povlag_int1 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM,
listw = pov_nbq_w, zero.policy = T)
summary(povlag_int1) # p-values for all predictors are significant except PROFEMHH, Rho: 0.42133, AIC: -10312, p-value for LM test for resid autocorrelation: < 2.22e-16
##
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
## pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR +
## pov$PROFEMHH:pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2591271 -0.0275824 -0.0042479 0.0217976 0.4682655
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2470659 0.0086960 -28.4116 < 2.2e-16
## pov$PROBLCK 0.0581724 0.0098448 5.9089 3.443e-09
## pov$PROHISP 0.1285256 0.0080326 16.0005 < 2.2e-16
## pov$PROHSLS 0.2910373 0.0090235 32.2531 < 2.2e-16
## pov$PROWKCO 0.0488753 0.0051706 9.4525 < 2.2e-16
## pov$PROFEMHH 0.0446918 0.0303889 1.4707 0.141383
## pov$PROAUNEM -0.1640939 0.0630142 -2.6041 0.009212
## pov$PROMUNDR 0.3918907 0.0182102 21.5204 < 2.2e-16
## pov$PROFEMHH:pov$PROAUNEM 4.1347944 0.2892762 14.2936 < 2.2e-16
##
## Rho: 0.4213, LR test value: 916, p-value: < 2.22e-16
## Asymptotic standard error: 0.01338
## z-value: 31.49, p-value: < 2.22e-16
## Wald statistic: 991.5, p-value: < 2.22e-16
##
## Log likelihood: 5167 for lag model
## ML residual variance (sigma squared): 0.002032, (sigma: 0.04508)
## Number of observations: 3107
## Number of parameters estimated: 11
## AIC: -10312, (AIC for lm: -9398)
## LM test for residual autocorrelation
## test value: 77.06, p-value: < 2.22e-16
# The addition of the interaction term gave a lower AIC than the full
# model, so will keep. However, the interaction of PROFEMHH and PROAUNEM
# made the p-value for PROFEMHH not significant (0.141383), so try
# removing that variable.
povlag_int1r1 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM, listw = pov_nbq_w,
zero.policy = T)
summary(povlag_int1r1) # p-values for all predictors are significant, Rho: 0.41892, AIC: -10311, p-value for LM test for resid autocorrelation: < 2.22e-16
##
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
## pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM,
## listw = pov_nbq_w, zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2733968 -0.0273964 -0.0040589 0.0220878 0.4653314
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2416375 0.0077882 -31.0259 < 2.2e-16
## pov$PROBLCK 0.0653355 0.0087489 7.4679 8.149e-14
## pov$PROHISP 0.1294840 0.0080298 16.1254 < 2.2e-16
## pov$PROHSLS 0.2896905 0.0089426 32.3943 < 2.2e-16
## pov$PROWKCO 0.0500122 0.0051330 9.7433 < 2.2e-16
## pov$PROAUNEM -0.1928506 0.0594772 -3.2424 0.001185
## pov$PROMUNDR 0.3926519 0.0182233 21.5468 < 2.2e-16
## pov$PROAUNEM:pov$PROFEMHH 4.4122347 0.2175192 20.2844 < 2.2e-16
##
## Rho: 0.4189, LR test value: 920.1, p-value: < 2.22e-16
## Asymptotic standard error: 0.01341
## z-value: 31.24, p-value: < 2.22e-16
## Wald statistic: 975.8, p-value: < 2.22e-16
##
## Log likelihood: 5166 for lag model
## ML residual variance (sigma squared): 0.002035, (sigma: 0.04511)
## Number of observations: 3107
## Number of parameters estimated: 10
## AIC: -10311, (AIC for lm: -9393)
## LM test for residual autocorrelation
## test value: 70.46, p-value: < 2.22e-16
# The AIC is one digit higher, so hard to make a call between fit and
# parsimony. Will try two models with the same addition of the another
# interaction term, one with and one without PROFEMHH. The interaction
# between PROHSLS and PROAUNEM resulted in the next highest overall change
# (in absolute magnitude) in child poverty. Additional interaction
# without PROFEMHH
povlag_int2r1:lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM +
pov$PROHSLS:pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)
## Error: object 'povlag_int2r1' not found
summary(povlag_int2) # p-values for all predictors are significant, Rho: 0.40207, AIC: -10410, p-value for LM test for resid autocorrelation: < 2.22e-16
## Error: error in evaluating the argument 'object' in selecting a method for
## function 'summary': Error: object 'povlag_int2' not found
# Additional interaction with PROFEMHH
povlag_int2 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM +
pov$PROHSLS:pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)
summary(povlag_int2) # p-values for all predictors are significant, Rho: 0.40616, AIC: -10416, p-value for LM test for resid autocorrelation: < 2.22e-16
##
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
## pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR +
## pov$PROFEMHH:pov$PROAUNEM + pov$PROHSLS:pov$PROAUNEM, listw = pov_nbq_w,
## zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2519462 -0.0273324 -0.0043355 0.0217578 0.4524466
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1478067 0.0127634 -11.5805 < 2.2e-16
## pov$PROBLCK 0.0586698 0.0097103 6.0420 1.522e-09
## pov$PROHISP 0.1334341 0.0079267 16.8336 < 2.2e-16
## pov$PROHSLS 0.1304208 0.0177426 7.3507 1.972e-13
## pov$PROWKCO 0.0561714 0.0051297 10.9502 < 2.2e-16
## pov$PROFEMHH 0.0865359 0.0302382 2.8618 0.004212
## pov$PROAUNEM -2.0405327 0.1898170 -10.7500 < 2.2e-16
## pov$PROMUNDR 0.3964526 0.0179210 22.1222 < 2.2e-16
## pov$PROFEMHH:pov$PROAUNEM 3.8632962 0.2860585 13.5053 < 2.2e-16
## pov$PROHSLS:pov$PROAUNEM 2.7748436 0.2661432 10.4261 < 2.2e-16
##
## Rho: 0.4062, LR test value: 868.6, p-value: < 2.22e-16
## Asymptotic standard error: 0.01334
## z-value: 30.45, p-value: < 2.22e-16
## Wald statistic: 927.5, p-value: < 2.22e-16
##
## Log likelihood: 5220 for lag model
## ML residual variance (sigma squared): 0.001969, (sigma: 0.04437)
## Number of observations: 3107
## Number of parameters estimated: 12
## AIC: -10416, (AIC for lm: -9550)
## LM test for residual autocorrelation
## test value: 68.08, p-value: < 2.22e-16
# The 2nd interaction lowers the AIC overall, so keep it. The inclusion of
# the PROFEMHH variable also consistently lowers the AIC, so keep it. Will
# add the interaction term between PROBLCK and PROAUNEM that resulted in
# the 3rd highest overall change (in absolute magnitude).
povlag_int3 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM +
pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM, listw = pov_nbq_w,
zero.policy = T)
summary(povlag_int3) # p-values for all predictors are significant except PROFEMHH, Rho: 0.40934, AIC: -10427, p-value for LM test for resid autocorrelation: 2.3315e-15
##
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
## pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR +
## pov$PROFEMHH:pov$PROAUNEM + pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM,
## listw = pov_nbq_w, zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2008267 -0.0271235 -0.0040221 0.0211449 0.4494534
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1411917 0.0128801 -10.9620 < 2.2e-16
## pov$PROBLCK 0.1152612 0.0188776 6.1057 1.024e-09
## pov$PROHISP 0.1316647 0.0079344 16.5942 < 2.2e-16
## pov$PROHSLS 0.1271430 0.0177283 7.1717 7.405e-13
## pov$PROWKCO 0.0572919 0.0051292 11.1696 < 2.2e-16
## pov$PROFEMHH 0.0222566 0.0354591 0.6277 0.5302210
## pov$PROAUNEM -2.1241696 0.1909754 -11.1227 < 2.2e-16
## pov$PROMUNDR 0.3958445 0.0178842 22.1337 < 2.2e-16
## pov$PROFEMHH:pov$PROAUNEM 4.6502927 0.3647565 12.7490 < 2.2e-16
## pov$PROHSLS:pov$PROAUNEM 2.7863313 0.2656661 10.4881 < 2.2e-16
## pov$PROBLCK:pov$PROAUNEM -0.6597251 0.1887224 -3.4957 0.0004727
##
## Rho: 0.4093, LR test value: 879.7, p-value: < 2.22e-16
## Asymptotic standard error: 0.01339
## z-value: 30.56, p-value: < 2.22e-16
## Wald statistic: 934, p-value: < 2.22e-16
##
## Log likelihood: 5226 for lag model
## ML residual variance (sigma squared): 0.00196, (sigma: 0.04427)
## Number of observations: 3107
## Number of parameters estimated: 13
## AIC: -10427, (AIC for lm: -9549)
## LM test for residual autocorrelation
## test value: 62.75, p-value: 2.3315e-15
# The additional term lowers the AIC, so keep it. Try removing PROFEMHH
# again since it's p-value is not significant.
povlag_int3r1 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM +
pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM, listw = pov_nbq_w,
zero.policy = T)
summary(povlag_int3r1) # # p-values for all predictors are significant, Rho: 0.40886, AIC: -10428, p-value for LM test for resid autocorrelation: 8.8818e-15
##
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
## pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM +
## pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM, listw = pov_nbq_w,
## zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2012355 -0.0270264 -0.0038699 0.0209987 0.4482308
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1392979 0.0125171 -11.1286 < 2.2e-16
## pov$PROBLCK 0.1230821 0.0143511 8.5765 < 2.2e-16
## pov$PROHISP 0.1318116 0.0079368 16.6076 < 2.2e-16
## pov$PROHSLS 0.1274321 0.0177256 7.1892 6.519e-13
## pov$PROWKCO 0.0577547 0.0050890 11.3490 < 2.2e-16
## pov$PROAUNEM -2.1296776 0.1908409 -11.1594 < 2.2e-16
## pov$PROMUNDR 0.3960316 0.0178874 22.1403 < 2.2e-16
## pov$PROAUNEM:pov$PROFEMHH 4.8245736 0.2332749 20.6819 < 2.2e-16
## pov$PROHSLS:pov$PROAUNEM 2.7688312 0.2638865 10.4925 < 2.2e-16
## pov$PROBLCK:pov$PROAUNEM -0.7210186 0.1605918 -4.4898 7.130e-06
##
## Rho: 0.4089, LR test value: 880.6, p-value: < 2.22e-16
## Asymptotic standard error: 0.0134
## z-value: 30.52, p-value: < 2.22e-16
## Wald statistic: 931.2, p-value: < 2.22e-16
##
## Log likelihood: 5226 for lag model
## ML residual variance (sigma squared): 0.001961, (sigma: 0.04428)
## Number of observations: 3107
## Number of parameters estimated: 12
## AIC: -10428, (AIC for lm: -9550)
## LM test for residual autocorrelation
## test value: 60.14, p-value: 8.8818e-15
# The removal of PROFEMHH resulted in an AIC that is one digit lower, so
# keep it for parsimony and fit. Try adding the last interaction term from
# the employment interaction model (povlag_paunem_int).
povlag_int4r1 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM +
pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM + pov$PROHISP:pov$PROAUNEM,
listw = pov_nbq_w, zero.policy = T)
summary(povlag_int4r1) # # p-values for all predictors are significant, Rho: 0.40332, AIC: -10449, p-value for LM test for resid autocorrelation: 3.042e-13
##
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
## pov$PROWKCO + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM +
## pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM + pov$PROHISP:pov$PROAUNEM,
## listw = pov_nbq_w, zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.198791 -0.026433 -0.003855 0.020973 0.451232
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1385233 0.0124769 -11.1024 < 2.2e-16
## pov$PROBLCK 0.1298194 0.0143765 9.0300 < 2.2e-16
## pov$PROHISP 0.2155045 0.0190383 11.3195 < 2.2e-16
## pov$PROHSLS 0.1197528 0.0177349 6.7524 1.455e-11
## pov$PROWKCO 0.0571659 0.0050768 11.2602 < 2.2e-16
## pov$PROAUNEM -2.1976671 0.1906772 -11.5256 < 2.2e-16
## pov$PROMUNDR 0.4051790 0.0179033 22.6316 < 2.2e-16
## pov$PROAUNEM:pov$PROFEMHH 4.8029827 0.2326680 20.6431 < 2.2e-16
## pov$PROHSLS:pov$PROAUNEM 2.9369203 0.2651546 11.0763 < 2.2e-16
## pov$PROBLCK:pov$PROAUNEM -0.7867199 0.1607577 -4.8938 9.889e-07
## pov$PROHISP:pov$PROAUNEM -0.8416600 0.1745634 -4.8215 1.425e-06
##
## Rho: 0.4033, LR test value: 857.2, p-value: < 2.22e-16
## Asymptotic standard error: 0.01341
## z-value: 30.08, p-value: < 2.22e-16
## Wald statistic: 904.5, p-value: < 2.22e-16
##
## Log likelihood: 5238 for lag model
## ML residual variance (sigma squared): 0.001948, (sigma: 0.04414)
## Number of observations: 3107
## Number of parameters estimated: 13
## AIC: -10449, (AIC for lm: -9594)
## LM test for residual autocorrelation
## test value: 53.18, p-value: 3.042e-13
# Just for kicks, since the inclusion of PROFEMHH has been on the fence,
# add it back in to see how it affects the AIC.
povlag_int4 <- lagsarlm(pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR + pov$PROFEMHH:pov$PROAUNEM +
pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM + pov$PROHISP:pov$PROAUNEM,
listw = pov_nbq_w, zero.policy = T)
summary(povlag_int4) # # p-values for all predictors are significant, Rho: 0.40362, AIC: -10447, p-value for LM test for resid autocorrelation: 1.4133e-13
##
## Call:
## lagsarlm(formula = pov$PROPOV ~ pov$PROBLCK + pov$PROHISP + pov$PROHSLS +
## pov$PROWKCO + pov$PROFEMHH + pov$PROAUNEM + pov$PROMUNDR +
## pov$PROFEMHH:pov$PROAUNEM + pov$PROHSLS:pov$PROAUNEM + pov$PROBLCK:pov$PROAUNEM +
## pov$PROHISP:pov$PROAUNEM, listw = pov_nbq_w, zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1985629 -0.0265292 -0.0039071 0.0210961 0.4519332
##
## Type: lag
## Regions with no neighbours included:
## 35 689 709 880
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1396315 0.0128418 -10.8732 < 2.2e-16
## pov$PROBLCK 0.1252276 0.0189231 6.6177 3.648e-11
## pov$PROHISP 0.2150670 0.0190539 11.2873 < 2.2e-16
## pov$PROHSLS 0.1196164 0.0177369 6.7439 1.542e-11
## pov$PROWKCO 0.0568983 0.0051166 11.1204 < 2.2e-16
## pov$PROFEMHH 0.0129866 0.0353863 0.3670 0.7136
## pov$PROAUNEM -2.1941670 0.1908506 -11.4968 < 2.2e-16
## pov$PROMUNDR 0.4050312 0.0179018 22.6252 < 2.2e-16
## pov$PROFEMHH:pov$PROAUNEM 4.7013822 0.3638585 12.9209 < 2.2e-16
## pov$PROHSLS:pov$PROAUNEM 2.9464238 0.2667677 11.0449 < 2.2e-16
## pov$PROBLCK:pov$PROAUNEM -0.7506799 0.1891346 -3.9690 7.217e-05
## pov$PROHISP:pov$PROAUNEM -0.8381232 0.1747459 -4.7962 1.617e-06
##
## Rho: 0.4036, LR test value: 855.1, p-value: < 2.22e-16
## Asymptotic standard error: 0.01341
## z-value: 30.1, p-value: < 2.22e-16
## Wald statistic: 905.9, p-value: < 2.22e-16
##
## Log likelihood: 5238 for lag model
## ML residual variance (sigma squared): 0.001948, (sigma: 0.04413)
## Number of observations: 3107
## Number of parameters estimated: 14
## AIC: -10447, (AIC for lm: -9594)
## LM test for residual autocorrelation
## test value: 54.69, p-value: 1.4133e-13
# The AIC with PROFEMHH is lower, so povlag_int4r1 appears as the best
# predictio model.
# Look at residuals
pov$resid_plagint4r1 <- residuals(povlag_int4r1)
moran.test(pov$resid_plagint4r1, listw = pov_nbq_w, zero.policy = T) # Moran's I = 0.0655996436, p-value = 4.604e-10 => the spatial autocovariance in the residuals is still significant, but close to 0, and quite a bit less than the full lag model (Moran's I = 0.1203) and its OLS counterpart (Moran's I = 0.4160), indicating that this model is reducing model misspecification
##
## Moran's I test under randomisation
##
## data: pov$resid_plagint4r1
## weights: pov_nbq_w
##
## Moran I statistic standard deviate = 6.123, p-value = 4.604e-10
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.0655996 -0.0003224 0.0001159
# Use B-P test to see if there is heteroscedacity in the residuals
bptest.sarlm(povlag_int4r1) # BP = 89.3419, df = 10, p-value = 7.216e-15
##
## studentized Breusch-Pagan test
##
## data:
## BP = 89.34, df = 10, p-value = 7.216e-15
# Examine spatial structure via predictors' direct, indirect, and overall
# average impacts
impacts(povlag_int4r1, listw = pov_nbq_w) # total - problck: 0.21745794; prohisp:0.36098729, prohsls: 0.20059557; prowkco: 0.09575737; proaunem: -3.68126815; promundr: 0.67870720, profemhh:proaunem: 8.04538002; prohsls:proaunem: 4.91957631; problck:proaunem: -1.31781872; prohisp:proaunem: -1.40984781
## Impact measures (lag, exact):
## Direct Indirect Total
## pov$PROBLCK 0.13428 0.08318 0.21746
## pov$PROHISP 0.22291 0.13808 0.36099
## pov$PROHSLS 0.12387 0.07673 0.20060
## pov$PROWKCO 0.05913 0.03663 0.09576
## pov$PROAUNEM -2.27316 -1.40810 -3.68127
## pov$PROMUNDR 0.41910 0.25961 0.67871
## pov$PROAUNEM:pov$PROFEMHH 4.96798 3.07740 8.04538
## pov$PROHSLS:pov$PROAUNEM 3.03781 1.88176 4.91958
## pov$PROBLCK:pov$PROAUNEM -0.81375 -0.50407 -1.31782
## pov$PROHISP:pov$PROAUNEM -0.87057 -0.53927 -1.40985
# Examine spatial structure via a change in a predictor Looking at the
# contribution of predictors, the single predictor that most affects the
# overall average change in PROPOV is PROAUNEM, so will make a change in
# this variable to see how it affects child poverty Copy original data
# frame
pov.new <- pov
# Look at the structure of the data frame
str(pov.new@data)
## 'data.frame': 3107 obs. of 25 variables:
## $ POLY_ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ NAME : Factor w/ 1801 levels "Abbeville","Acadia",..: 902 564 1542 1180 1242 177 948 569 628 1615 ...
## $ FIPS : Factor w/ 3107 levels "01001","01003",..: 1317 2928 2951 2942 2944 526 1589 1577 1580 1613 ...
## $ MET : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PROPOV : num 0.117 0.305 0.229 0.286 0.283 ...
## $ WGHT : num 118 414 1690 1908 530 ...
## $ PROBLCK : num 0 0.00111 0.00288 0.00087 0.00269 ...
## $ PROHISP : num 0.00491 0.01716 0.0168 0.08414 0.01155 ...
## $ SOUTH : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PROHSLS : num 0.615 0.643 0.59 0.628 0.613 ...
## $ PROWKCO : num 0.77 0.842 0.773 0.93 0.685 ...
## $ PROFEMHH : num 0.116 0.163 0.153 0.186 0.177 ...
## $ PROEXTR : num 0.103 0.32 0.159 0.258 0.163 ...
## $ PRONMAN : num 0.07947 0.00915 0.01476 0.01269 0.05104 ...
## $ PROMSERV : num 0.1211 0.0466 0.0773 0.0688 0.0602 ...
## $ PROPSERV : num 0.253 0.338 0.305 0.283 0.26 ...
## $ PROAUNEM : num 0.0389 0.1681 0.107 0.102 0.1499 ...
## $ PROMUNDR : num 0.189 0.296 0.265 0.266 0.269 ...
## $ XCOORD : num -94.9 -118.5 -117.9 -119.9 -117.3 ...
## $ YCOORD : num 48.9 48.4 48.4 48.5 48.5 ...
## $ X2 : num 9003 14038 13903 14366 13767 ...
## $ Y2 : num 2388 2344 2342 2349 2354 ...
## $ lmresid : num -0.00376 -0.0144 0.00629 0.01977 0.0162 ...
## $ lagresid : num -0.02241 0.00766 -0.00206 0.0338 0.04153 ...
## $ resid_plagint4r1: num -0.0248 0.0241 0.0192 0.0416 0.059 ...
## - attr(*, "data_types")= chr "N" "C" "C" "N" ...
# Look at unemployment rate in Keith County, NE (relatively sparsely
# populated county in the middle of the U.S.)
pov.new@data[grepl("Keith", pov.new@data$NAME), c("NAME", "PROAUNEM")]
## NAME PROAUNEM
## 710 Keith 0.0308
# Change the unemployment rate in Keith County, NE from 3% to 70%
pov.new@data[pov.new@data$NAME == "Keith", "PROAUNEM"] <- 0.7
# The original predicted values
orig.pred <- as.data.frame(predict(povlag_int4r1))
# The predicted values with the new unemployment rate in Nebraska
new.pred <- as.data.frame(predict(povlag_int4r1, newdata = pov.new, listw = pov_nbq_w,
zero.policy = T))
# The difference between the predicted values
kCoNE_effect <- new.pred$fit - orig.pred$fit
abc <- data.frame(name = pov$NAME, diff_in_pred_PROPOV = kCoNE_effect)
pov.new$KCONECHG <- abc$diff_in_pred_PROPOV
# Sort counties by absolute value of the change in predicted PROPOV
abc <- abc[rev(order(abs(abc$diff_in_pred_PROPOV))), ]
abc[1:10, ] # Show the top 10 counties
## name diff_in_pred_PROPOV
## 459 Todd -0.07961
## 582 Hooker -0.07849
## 632 Logan -0.07606
## 584 Thomas -0.07545
## 435 Gregory -0.05978
## 375 Jones -0.05958
## 420 Tripp -0.05770
## 517 Brown -0.05762
## 2181 Hall -0.05401
## 399 Mellette -0.05279
# Map Keith County cascade effects
pal5 <- brewer.pal(6, "BrBG")
cats5 <- classIntervals(pov.new$KCONECHG, n = 5, style = "jenks")
colors5 <- findColours(cats5, pal5)
plot(pov.new, col = colors5)
legend("bottomleft", legend = round(cats5$brks, 2), fill = pal5, bty = "n",
cex = 0.7)
mtext("Effects of a change in Keith County, NE (set PROAUNEM=.70)\n on predicted values in a spatial lag model",
side = 3, line = 1)