Poisson model
mhpois<-glm(homicides~offset(log(pop.x))+ GM + fahomicides + SPRIM + PO2SM, data = mxmx, family = poisson)
summary(mhpois)
##
## Call:
## glm(formula = homicides ~ offset(log(pop.x)) + GM + fahomicides +
## SPRIM + PO2SM, family = poisson, data = mxmx)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.3422 -1.5259 -0.7231 0.4720 16.8240
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.472e+00 7.773e-02 -108.995 < 2e-16 ***
## GMBajo -6.068e-02 4.037e-02 -1.503 0.1328
## GMMedio -7.723e-02 3.590e-02 -2.151 0.0315 *
## GMMuy alto 3.219e-01 4.305e-02 7.478 7.53e-14 ***
## GMMuy bajo -3.710e-01 5.401e-02 -6.869 6.47e-12 ***
## fahomicides 3.015e-03 3.878e-05 77.745 < 2e-16 ***
## SPRIM 1.944e-02 1.777e-03 10.943 < 2e-16 ***
## PO2SM -1.506e-02 8.854e-04 -17.013 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 16205 on 2303 degrees of freedom
## Residual deviance: 11162 on 2296 degrees of freedom
## AIC: 16162
##
## Number of Fisher Scoring iterations: 6
exp(coef(mhpois))
## (Intercept) GMBajo GMMedio GMMuy alto GMMuy bajo
## 0.0002091835 0.9411262363 0.9256770951 1.3798134378 0.6900588506
## fahomicides SPRIM PO2SM
## 1.0030194766 1.0196341443 0.9850486650
Interested to observe if this model presents overdispersion, we follow the handout like this:
scale<-sqrt(mhpois$deviance/mhpois$df.residual)
scale
## [1] 2.204889
1-pchisq(mhpois$deviance, df=mhpois$df.residual)
## [1] 0
We observed overdispersion which indicates a Poisson model is not the best. So we turn to a quasi-poisson and negative binomial distributions.
quasi-poisson
mhqpois<-glm(homicides~offset(log(pop.x)) + GM + fahomicides + SPRIM + PO2SM, data = mxmx, family = quasipoisson)
summary(mhqpois)
##
## Call:
## glm(formula = homicides ~ offset(log(pop.x)) + GM + fahomicides +
## SPRIM + PO2SM, family = quasipoisson, data = mxmx)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.3422 -1.5259 -0.7231 0.4720 16.8240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.4722988 0.2329677 -36.367 < 2e-16 ***
## GMBajo -0.0606780 0.1209783 -0.502 0.616024
## GMMedio -0.0772298 0.1075960 -0.718 0.472968
## GMMuy alto 0.3219483 0.1290269 2.495 0.012658 *
## GMMuy bajo -0.3709784 0.1618672 -2.292 0.022003 *
## fahomicides 0.0030149 0.0001162 25.940 < 2e-16 ***
## SPRIM 0.0194439 0.0053254 3.651 0.000267 ***
## PO2SM -0.0150642 0.0026537 -5.677 1.55e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 8.982544)
##
## Null deviance: 16205 on 2303 degrees of freedom
## Residual deviance: 11162 on 2296 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
Negative binomial
mhnb<-glm.nb(homicides~offset(log(pop.x)) + GM + fahomicides + SPRIM + PO2SM, data = mxmx)
## Warning: glm.fit: algorithm did not converge
## Warning in glm.nb(homicides ~ offset(log(pop.x)) + GM + fahomicides + SPRIM
## + : alternation limit reached
summary(mhnb)
##
## Call:
## glm.nb(formula = homicides ~ offset(log(pop.x)) + GM + fahomicides +
## SPRIM + PO2SM, data = mxmx, init.theta = 1.241328118, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.9435 -0.9509 -0.4480 0.2566 9.4084
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.6007312 0.2138309 -40.222 < 2e-16 ***
## GMBajo -0.0958806 0.1029660 -0.931 0.351757
## GMMedio -0.0120057 0.0813111 -0.148 0.882618
## GMMuy alto 0.3570396 0.0986977 3.618 0.000297 ***
## GMMuy bajo -0.4362342 0.1434943 -3.040 0.002365 **
## fahomicides 0.0220249 0.0007622 28.896 < 2e-16 ***
## SPRIM 0.0232791 0.0042797 5.439 5.35e-08 ***
## PO2SM -0.0176527 0.0024699 -7.147 8.87e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.2413) family taken to be 1)
##
## Null deviance: 2858.9 on 2303 degrees of freedom
## Residual deviance: 2469.0 on 2296 degrees of freedom
## AIC: 9985.8
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.2413
## Std. Err.: 0.0606
## Warning while fitting theta: alternation limit reached
##
## 2 x log-likelihood: -9967.8270
exp(coef(mhnb))
## (Intercept) GMBajo GMMedio GMMuy alto GMMuy bajo
## 0.0001839712 0.9085724710 0.9880661093 1.4290924233 0.6464663099
## fahomicides SPRIM PO2SM
## 1.0222692550 1.0235521652 0.9825021521
Binomial count model
mhbin<-glm(cbind(homicides, pop.x) ~ GM + fahomicides + SPRIM + PO2SM, family=binomial, data = mxmx)
summary(mhbin)
##
## Call:
## glm(formula = cbind(homicides, pop.x) ~ GM + fahomicides + SPRIM +
## PO2SM, family = binomial, data = mxmx)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.3415 -1.5258 -0.7230 0.4719 16.8169
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.4723823 0.0777363 -108.989 < 2e-16 ***
## GMBajo -0.0606462 0.0403679 -1.502 0.1330
## GMMedio -0.0772174 0.0359026 -2.151 0.0315 *
## GMMuy alto 0.3219201 0.0430512 7.478 7.57e-14 ***
## GMMuy bajo -0.3709481 0.0540114 -6.868 6.51e-12 ***
## fahomicides 0.0030151 0.0000388 77.716 < 2e-16 ***
## SPRIM 0.0194441 0.0017770 10.942 < 2e-16 ***
## PO2SM -0.0150632 0.0008855 -17.011 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 16200 on 2303 degrees of freedom
## Residual deviance: 11158 on 2296 degrees of freedom
## AIC: 16157
##
## Number of Fisher Scoring iterations: 5
exp(coef(mhbin))
## (Intercept) GMBajo GMMedio GMMuy alto GMMuy bajo fahomicides
## 0.000209166 0.941156146 0.925688620 1.379774561 0.690079784 1.003019681
## SPRIM PO2SM
## 1.019634397 0.985049713
Filtering the models for spatial autocorrelation
Spatial weights matrix (queen and rook)
mxmx_q<-poly2nb(as(mxmx, "Spatial"), queen = T)
summary(mxmx_q)
## Neighbour list object:
## Number of regions: 2304
## Number of nonzero links: 13082
## Percentage nonzero weights: 0.2464389
## Average number of links: 5.677951
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 13 74 225 401 469 429 277 202 120 42 23 13 4 4 1 1 1 1
## 20 21 22
## 1 1 2
## 13 least connected regions:
## 68 95 463 834 866 937 1099 1235 1264 1344 2100 2213 2287 with 1 link
## 2 most connected regions:
## 864 1047 with 22 links
mxmxwq<-nb2listw(mxmx_q, style = "W") # Weight matrix (queen)
mxmx_r<-poly2nb(as(mxmx, "Spatial"), queen = F)
summary(mxmx_r)
## Neighbour list object:
## Number of regions: 2304
## Number of nonzero links: 12890
## Percentage nonzero weights: 0.242822
## Average number of links: 5.594618
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 17 18 20
## 13 81 236 426 461 423 271 199 102 46 20 10 6 2 2 1 1 3
## 22
## 1
## 13 least connected regions:
## 68 95 463 834 866 937 1099 1235 1264 1344 2100 2213 2287 with 1 link
## 1 most connected region:
## 864 with 22 links
mxmxwr<-nb2listw(mxmx_r, style = "W") # Weight matrix (rook)
Models residuals autocorrelation (showing just the residuals Moran’s I for the negative binomial and the relative risk models)
lm.morantest(mhpoise, listw = mxmxwq)
##
## Global Moran I for regression residuals
##
## data:
## model: glm(formula = homicides ~ offset(log(Eh)) + GM +
## fahomicides + SPRIM + PO2SM, family = poisson, data = mxmx)
## weights: mxmxwq
##
## Moran I statistic standard deviate = 25.6, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.3298557418 -0.0003204983 0.0001663434
lm.morantest(mhpoise, listw = mxmxwr)
##
## Global Moran I for regression residuals
##
## data:
## model: glm(formula = homicides ~ offset(log(Eh)) + GM +
## fahomicides + SPRIM + PO2SM, family = poisson, data = mxmx)
## weights: mxmxwr
##
## Moran I statistic standard deviate = 25.443, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.3303107145 -0.0003203137 0.0001688692
lm.morantest(mhnb, listw = mxmxwq)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = homicides ~ offset(log(pop.x)) + GM +
## fahomicides + SPRIM + PO2SM, data = mxmx, init.theta =
## 1.241328118, link = log)
## weights: mxmxwq
##
## Moran I statistic standard deviate = 20.5, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.2608176155 -0.0019851943 0.0001643368
lm.morantest(mhnb, listw = mxmxwr)
##
## Global Moran I for regression residuals
##
## data:
## model: glm.nb(formula = homicides ~ offset(log(pop.x)) + GM +
## fahomicides + SPRIM + PO2SM, data = mxmx, init.theta =
## 1.241328118, link = log)
## weights: mxmxwr
##
## Moran I statistic standard deviate = 20.435, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.2619731169 -0.0019857742 0.0001668439
Moran eigenvectors (just showing for the relative risk model)
me.mhpoise<-spdep::ME(mhpoise, family=poisson, data = mxmx, listw = mxmxwq, verbose = T, alpha = 0.2)
## eV[,209], I: 0.1126283 ZI: NA, pr(ZI): 0.01
## eV[,244], I: 0.09657619 ZI: NA, pr(ZI): 0.01
## eV[,73], I: 0.08616103 ZI: NA, pr(ZI): 0.01
## eV[,62], I: 0.0755501 ZI: NA, pr(ZI): 0.01
## eV[,291], I: 0.06000866 ZI: NA, pr(ZI): 0.01
## eV[,1654], I: 0.0512278 ZI: NA, pr(ZI): 0.01
## eV[,29], I: 0.04507192 ZI: NA, pr(ZI): 0.01
## eV[,1759], I: 0.03922331 ZI: NA, pr(ZI): 0.01
## eV[,776], I: 0.03423445 ZI: NA, pr(ZI): 0.01
## eV[,98], I: 0.02954958 ZI: NA, pr(ZI): 0.01
## eV[,1062], I: 0.02537175 ZI: NA, pr(ZI): 0.04
## eV[,302], I: 0.02145699 ZI: NA, pr(ZI): 0.01
## eV[,1155], I: 0.01787133 ZI: NA, pr(ZI): 0.06
## eV[,357], I: 0.01409356 ZI: NA, pr(ZI): 0.08
## eV[,2053], I: 0.01075624 ZI: NA, pr(ZI): 0.19
## eV[,160], I: 0.007696783 ZI: NA, pr(ZI): 0.21
me.mhpoise
## Eigenvector ZI pr(ZI)
## 0 NA NA 0.01
## 1 209 NA 0.01
## 2 244 NA 0.01
## 3 73 NA 0.01
## 4 62 NA 0.01
## 5 291 NA 0.01
## 6 1654 NA 0.01
## 7 29 NA 0.01
## 8 1759 NA 0.01
## 9 776 NA 0.01
## 10 98 NA 0.01
## 11 1062 NA 0.04
## 12 302 NA 0.01
## 13 1155 NA 0.06
## 14 357 NA 0.08
## 15 2053 NA 0.19
## 16 160 NA 0.21
fitsh<-data.frame(fitted(me.mhpoise))
mxmx$me209<-fitsh$vec209
mxmx$me244<-fitsh$vec244
mxmx$me73<-fitsh$vec73
Plots
mxmx%>%
ggplot() + geom_sf(aes(fill=me209, color=me209))+scale_color_viridis_c()+scale_fill_viridis_c(na.value = "grey50") + ggtitle("Spatial distribution of eigenvector 209", "Mexican municipalities, 2015")

mxmx%>%
ggplot()+geom_sf(aes(fill=me244, color=me244))+scale_color_viridis_c()+scale_fill_viridis_c(na.value = "grey50") + ggtitle("Spatial distribution of eigenvector 244", "Mexican municipalities, 2015")

mxmx%>%
ggplot()+geom_sf(aes(fill=me73, color=me73))+scale_color_viridis_c()+scale_fill_viridis_c(na.value = "grey50") + ggtitle("Spatial distribution of eigenvector 73", "Mexican municipalities, 2015")

lm.morantest(clean.poise, listw = mxmxwq)
##
## Global Moran I for regression residuals
##
## data:
## model: glm(formula = homicides ~ offset(log(pop.x)) + GM +
## fahomicides + SPRIM + PO2SM + fitted(me.mhpoise), family =
## poisson, data = mxmx)
## weights: mxmxwq
##
## Moran I statistic standard deviate = 21.521, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.2783541041 -0.0006700785 0.0001680985
Did we get a better result after the filtering?
AIC(clean.poise)
## [1] 15043.23
AIC(mhpois)
## [1] 16161.57
Más o menos