Example using colombian data for municipalities. The idea is to find the best predictor for GINI.
The response variable in this case is Gausian (Normal), but can be binomial, Gaussian, multinomial, Poisson or zero-inflated Poisson (ZIP). Spatial autocorrelation is acounted and modelled by a set of random effects that are assigned as a conditional autoregressive (CAR) prior distribution.
See more examples in: https://r-spatial.github.io/spdep/articles/CO69.html
library (sf)## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library (readxl) # read excel data
library (tidyverse) # Data handling## -- Attaching packages ----------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 2.0.1 v dplyr 0.7.8
## v tidyr 0.8.2 v stringr 1.3.1
## v readr 1.3.1 v forcats 0.3.0
## -- Conflicts -------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library (tmap)
library (spdep)## Loading required package: sp
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
Read a shp file for colombian municipalities and a excel file with the economic data for each municipalitie.
## Reading layer `municipios' from data source `C:\Users\diego.lizcano\Box Sync\CodigoR\Andres\shp\municipios.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1128 features and 8 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -233755.1 ymin: -469226.2 xmax: 1410054 ymax: 1490018
## epsg (SRID): 32618
## proj4string: +proj=utm +zone=18 +datum=WGS84 +units=m +no_defs
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
# merge both data sets
Full_data <- merge(col_map ,
regre_data,
by="ID")
# see ig GINI is Normal
hist(Full_data$GINI, main="GINI")#plot GINI Map
brks <- c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
tm_shape(Full_data) + tm_fill("GINI", breaks=brks, midpoint=0.5, palette="RdBu") + tm_layout(legend.outside=TRUE)# drop Municipios sin GINI
outl <- which(is.na( Full_data$GINI))
as.character(Full_data$NOM_MUNICI[outl])## [1] "MEDELLÍN"
## [2] "BOGOTÁ, D.C."
## [3] "NOROSI"
## [4] "SAN JOSÉ DE URÉ"
## [5] "TUCHÍN"
## [6] "BAJO BAUDÓ (Pizarro)"
## [7] "CARMEN DEL DARIÉN (Curbaradó)"
## [8] "CERTEGUI"
## [9] "CONDOTO"
## [10] "EL LITORAL DEL SAN JUAN"
## [11] "MEDIO ATRATO (Beté)"
## [12] "MEDIO BAUDÓ (Boca de Pepé)"
## [13] "RÍO IRÓ (Santa Rita)"
## [14] "RÍO QUITO (Paimadó)"
## [15] "SIPÍ"
## [16] "UNIÓN PANAMERICANA (Ánimas)"
## [17] "LA TOLA"
## [18] "CALI"
## [19] "EL ENCANTO (Cor. Departamental)"
## [20] "LA CHORRERA (Cor. Departamental)"
## [21] "LA PEDRERA (Cor. Departamental)"
## [22] "LA VICTORIA (Pacoa) (Cor. Departamental)"
## [23] "MIRITÍ-PARANÁ (Campoamor) (Cor. Departamental)"
## [24] "PUERTO ALEGRÍA (Cor. Departamental)"
## [25] "PUERTO ARICA (Cor. Departamental)"
## [26] "SANTANDER (Araracuara) (Cor. Departamental)"
## [27] "TARAPACÁ (Cor. Departamental)"
## [28] "BARRANCO MINA (Cor. Departamental)"
## [29] "MAPIRIPANA (Cor. Departamental)"
## [30] "SAN FELIPE (Cor. Departamental)"
## [31] "PUERTO COLOMBIA (Cor. Departamental)"
## [32] "LA GUADALUPE (Cor. Departamental)"
## [33] "CACAHUAL (Cor. Departamental)"
## [34] "PANÁ PANÁ (Campo Alegre) (Cor. Departamental)"
## [35] "MORICHAL (Morichal Nuevo) (Cor. Departamental)"
## [36] "MIRAFLORES"
## [37] "MITÚ"
## [38] "CARURÚ"
## [39] "PACOA (Cor. Departamental)"
## [40] "TARAIRA"
## [41] "PAPUNAUA (Cor. Departamental)"
## [42] "YAVARATÉ (Cor. Departamental)"
W.4 <- Full_data[-outl, -outl]
nb_Full_data_2 <- poly2nb(W.4)
nb_B_2 <- nb2listw(nb_Full_data_2, style="B", zero.policy=TRUE)
Full_data_2 <- Full_data[!(1:length(Full_data$ID) %in% outl),]
# Convert to vecinos y lista
nb_Full_data <- poly2nb(Full_data)
nb_B <- nb2listw(nb_Full_data, style="B", zero.policy=TRUE)
#B <- as(nb_B, "CsparseMatrix")
# plot vecinos
# plot(st_geometry(Full_data), border="grey")
# plot(nb_B, st_centroid(st_geometry(Full_data), of_largest_polygon), add=TRUE, col="blue")
########################################
#################### Moran test
########################################
form1 <- GINI~ Ha_Coca
model1 <- lm(formula=form1, data=Full_data_2)
W.list <- nb_B_2
resid.model <- residuals(model1)
moran.mc(x=resid.model, listw=W.list, nsim=1000)##
## Monte-Carlo simulation of Moran I
##
## data: resid.model
## weights: W.list
## number of simulations + 1: 1001
##
## statistic = 0.40006, observed rank = 1001, p-value = 0.000999
## alternative hypothesis: greater
# The Moran's I test has a p-value much less than 0.05, which suggests that the residuals contain
# substantial positive spatial autocorrelation.
########### Model 1
car_mod1 <- spautolm(GINI ~ 1, data=Full_data, listw=nb_B, family="CAR")
summary(car_mod1)##
## Call: spautolm(formula = GINI ~ 1, data = Full_data, listw = nb_B,
## family = "CAR")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3549860 -0.0497422 0.0006531 0.0499323 0.3521475
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6647926 0.0067888 97.925 < 2.2e-16
##
## Lambda: 0.15054 LR test value: 362.38 p-value: < 2.22e-16
## Numerical Hessian standard error of lambda: 0.00094668
##
## Log likelihood: 1108.107
## ML residual variance (sigma squared): 0.0066992, (sigma: 0.081848)
## Number of observations: 1082
## Number of parameters estimated: 3
## AIC: -2210.2
Full_data_2$fit_mod1 <- fitted.values(car_mod1)
tm_shape(Full_data_2) + tm_fill("fit_mod1")########### Model 2
car_mod2 <- spautolm(GINI ~ Ha_Coca -1, data=Full_data, listw=nb_B, family="CAR")
summary(car_mod2)##
## Call:
## spautolm(formula = GINI ~ Ha_Coca - 1, data = Full_data, listw = nb_B,
## family = "CAR")
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.620596 -0.041186 0.097477 0.225282 0.653650
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Ha_Coca 8.5797e-05 2.3892e-05 3.5911 0.0003293
##
## Lambda: 0.15121 LR test value: 2044.6 p-value: < 2.22e-16
## Numerical Hessian standard error of lambda: 3.2371e-05
##
## Log likelihood: -110.8306
## ML residual variance (sigma squared): 0.063412, (sigma: 0.25182)
## Number of observations: 1082
## Number of parameters estimated: 3
## AIC: 227.66
Full_data_2$fit_mod2 <- fitted.values(car_mod2)
tm_shape(Full_data_2) + tm_fill("fit_mod2")