In this homework I use public use data about index of marginalization (IM), percentage of population of 12 years old or more illiterate (ANALF), population of 12 years old or more without elementary education (SPRIM), percentage of population that earns up to two minimun wages (PO2SM) and number of homicides (scale(homicides)) by municipality in Mexico (year 2015).
# Upload Mexican municipalities and marginalization variables
mxm<-st_read(dsn = "C:/Users/gpe637/Desktop/UTSA/Mexico shapefiles", layer = "mpiosMX")
## Reading layer `mpiosMX' from data source `C:\Users\gpe637\Desktop\UTSA\Mexico shapefiles' using driver `ESRI Shapefile'
## Simple feature collection with 2463 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 911292 ymin: 319149.1 xmax: 4082997 ymax: 2349615
## epsg (SRID): NA
## proj4string: +proj=lcc +lat_1=17.5 +lat_2=29.5 +lat_0=12 +lon_0=-102 +x_0=2500000 +y_0=0 +ellps=GRS80 +units=m +no_defs
im<-read_csv("C:/Users/gpe637/Desktop/UTSA/Spring 2019/Spatial Demography/im2015.csv")
## Parsed with column specification:
## cols(
## CVEGEO = col_character(),
## POB_TOT = col_integer(),
## ANALF = col_double(),
## SPRIM = col_double(),
## OVSDE = col_double(),
## OVSEE = col_double(),
## OVSAE = col_double(),
## VHAC = col_double(),
## OVPT = col_double(),
## `PL<5000` = col_double(),
## PO2SM = col_double(),
## IM = col_double(),
## GM = col_character(),
## LUG_NAC = col_integer(),
## LUGAR_EST = col_integer(),
## homicides = col_integer()
## )
mxim<-geo_join(mxm, im, by_sp="CVEGEO", by_df="CVEGEO", how ="inner")
## Warning: Column `CVEGEO` joining factor and character vector, coercing into
## character vector
# Spatial weights matrix
mxim_q<-poly2nb(as(mxim, "Spatial"), queen = T)
summary(mxim_q)
## Neighbour list object:
## Number of regions: 2457
## Number of nonzero links: 14378
## Percentage nonzero weights: 0.2381706
## Average number of links: 5.851852
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 19
## 8 64 205 388 519 467 321 230 141 52 26 15 5 4 3 3 1 1
## 20 22
## 1 3
## 8 least connected regions:
## 521 914 1037 1224 1488 2245 2363 2440 with 1 link
## 3 most connected regions:
## 948 1014 1166 with 22 links
mximwq<-nb2listw(mxim_q, style = "W") # Weight matrix (queen)
mxim_r<-poly2nb(as(mxim, "Spatial"), queen = F)
summary(mxim_r)
## Neighbour list object:
## Number of regions: 2457
## Number of nonzero links: 14162
## Percentage nonzero weights: 0.2345926
## Average number of links: 5.76394
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 19
## 8 70 218 412 512 461 324 224 120 52 23 12 6 4 4 1 1 1
## 20 21 22
## 2 1 1
## 8 least connected regions:
## 521 914 1037 1224 1488 2245 2363 2440 with 1 link
## 1 most connected region:
## 948 with 22 links
mximwr<-nb2listw(mxim_r, style = "W") # Weight matrix (rook)
mxim_nn5<-knearneigh(x=coordinates(as(mxim, "Spatial")), k=5)
mxim_n5<-knn2nb(knn = mxim_nn5)
mxim_nn5<-nb2listw(mxim_n5, style = "W") # Weight matrix (k =5)
I fit IM against ANALF, SPRIM, PO2SM and homicides first with an OLS model. Then I test for spatial autocorrelation of the residuals with three different adjancency matrix structures, i.e. queen, rook and k-5 neighbors.
# OLS
mOLS<-lm(IM~ANALF+SPRIM+PO2SM+scale(homicides), data = mxim)
summary(mOLS)
##
## Call:
## lm(formula = IM ~ ANALF + SPRIM + PO2SM + scale(homicides), data = mxim)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4717 -0.2172 -0.0486 0.1518 3.8737
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.1589650 0.0310101 -69.621 <2e-16 ***
## ANALF 0.0574205 0.0019315 29.729 <2e-16 ***
## SPRIM 0.0220986 0.0014750 14.982 <2e-16 ***
## PO2SM 0.0151084 0.0005962 25.342 <2e-16 ***
## scale(homicides) -0.0144659 0.0079317 -1.824 0.0683 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3788 on 2452 degrees of freedom
## Multiple R-squared: 0.8567, Adjusted R-squared: 0.8565
## F-statistic: 3665 on 4 and 2452 DF, p-value: < 2.2e-16
mxim$resid<-rstudent(mOLS)
#Moran's I on residuals from OLS model
MIq<-lm.morantest(mOLS, listw = mximwq)
MIr<-lm.morantest(mOLS, listw = mximwr)
MIk5<-lm.morantest(mOLS, listw = mxim_nn5)
MIq; MIr; MIk5
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = IM ~ ANALF + SPRIM + PO2SM + scale(homicides),
## data = mxim)
## weights: mximwq
##
## Moran I statistic standard deviate = 35.039, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.425436767 -0.001165284 0.000148236
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = IM ~ ANALF + SPRIM + PO2SM + scale(homicides),
## data = mxim)
## weights: mximwr
##
## Moran I statistic standard deviate = 34.875, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.426689448 -0.001167192 0.000150508
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = IM ~ ANALF + SPRIM + PO2SM + scale(homicides),
## data = mxim)
## weights: mxim_nn5
##
## Moran I statistic standard deviate = 34.083, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.4071609036 -0.0011960527 0.0001435494
I will continue with the rook adjacency matrix because shows a highest Moran’s I than the queen adjacency matrix and k-neighbors (k=5)
mxim2<-st_as_sf(mxim)
mxim2%>%
mutate(rquant=cut(resid, breaks = quantile(mxim$resid, p=seq(0,1,length.out = 6)), include.lowest = T))%>%
ggplot(aes(color=rquant, fill=rquant))+geom_sf()+
scale_fill_viridis_d()+
scale_color_viridis_d()+geom_sf(data=mxim2, fill=NA, color="black")+ggtitle("Mexico", "OLS Model Residuals")

ggplot(mxim, aes(fill=GM))+geom_sf()+ggtitle("Mexico", "Marginalization level by municipality in 2015")

ggplot(mxim, aes(color=homicides, fill=homicides))+geom_sf()+ggtitle("Mexico", "Number of homicides by municipality in 2015")

Alternative SAR models
By lookinf at the different LMTs we should go with the Spatial Error Model since both regular and robust LMT showed larger values for that model.
lm.LMtests(model = mOLS, listw=mximwr, test = c("LMerr", "LMlag", "RLMerr", "RLMlag"))
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = IM ~ ANALF + SPRIM + PO2SM + scale(homicides),
## data = mxim)
## weights: mximwr
##
## LMerr = 1202.1, df = 1, p-value < 2.2e-16
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = IM ~ ANALF + SPRIM + PO2SM + scale(homicides),
## data = mxim)
## weights: mximwr
##
## LMlag = 364.41, df = 1, p-value < 2.2e-16
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = IM ~ ANALF + SPRIM + PO2SM + scale(homicides),
## data = mxim)
## weights: mximwr
##
## RLMerr = 840.8, df = 1, p-value < 2.2e-16
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = IM ~ ANALF + SPRIM + PO2SM + scale(homicides),
## data = mxim)
## weights: mximwr
##
## RLMlag = 3.1118, df = 1, p-value = 0.07773
Spatial Error Model
m.error<-errorsarlm(IM~ANALF+SPRIM+PO2SM+scale(homicides), data=mxim, listw=mximwr)
summary(m.error)
##
## Call:errorsarlm(formula = IM ~ ANALF + SPRIM + PO2SM + scale(homicides),
## data = mxim, listw = mximwr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.354363 -0.160916 -0.035264 0.124270 3.494154
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.23540082 0.03967384 -56.3445 < 2.2e-16
## ANALF 0.04708395 0.00213966 22.0054 < 2.2e-16
## SPRIM 0.02732608 0.00158819 17.2058 < 2.2e-16
## PO2SM 0.01553940 0.00063769 24.3682 < 2.2e-16
## scale(homicides) -0.01848298 0.00640594 -2.8853 0.003911
##
## Lambda: 0.68877, LR test value: 905.94, p-value: < 2.22e-16
## Asymptotic standard error: 0.018716
## z-value: 36.801, p-value: < 2.22e-16
## Wald statistic: 1354.3, p-value: < 2.22e-16
##
## Log likelihood: -646.0425 for error model
## ML residual variance (sigma squared): 0.089028, (sigma: 0.29838)
## Number of observations: 2457
## Number of parameters estimated: 7
## AIC: 1306.1, (AIC for lm: 2210)
Spatial Lag Model
m.lag<-lagsarlm(IM~ANALF+SPRIM+PO2SM+scale(homicides), data=mxim, listw=mximwr, type="lag")
summary(m.lag)
##
## Call:lagsarlm(formula = IM ~ ANALF + SPRIM + PO2SM + scale(homicides),
## data = mxim, listw = mximwr, type = "lag")
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.302308 -0.206467 -0.034038 0.151152 3.960301
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.74391001 0.03886538 -44.8705 < 2e-16
## ANALF 0.04330824 0.00193540 22.3769 < 2e-16
## SPRIM 0.02028328 0.00138204 14.6763 < 2e-16
## PO2SM 0.01162123 0.00059367 19.5753 < 2e-16
## scale(homicides) -0.01906466 0.00736620 -2.5881 0.00965
##
## Rho: 0.26807, LR test value: 327.28, p-value: < 2.22e-16
## Asymptotic standard error: 0.015013
## z-value: 17.856, p-value: < 2.22e-16
## Wald statistic: 318.85, p-value: < 2.22e-16
##
## Log likelihood: -935.3741 for lag model
## ML residual variance (sigma squared): 0.12373, (sigma: 0.35175)
## Number of observations: 2457
## Number of parameters estimated: 7
## AIC: 1884.7, (AIC for lm: 2210)
## LM test for residual autocorrelation
## test value: 546.48, p-value: < 2.22e-16