CAR Regresion

Regresion model with CAR

Example using colombian data for municipalities. The idea is to find the best predictor for GINI.

The response variable in this case is Gausian (Normal), but can be binomial, Gaussian, multinomial, Poisson or zero-inflated Poisson (ZIP). Spatial autocorrelation is acounted and modelled by a set of random effects that are assigned as a conditional autoregressive (CAR) prior distribution.

See more examples in: https://r-spatial.github.io/spdep/articles/CO69.html

library (sf)

## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3

library (readxl) # read excel data
library (tidyverse) # Data handling

## -- Attaching packages ----------------------- tidyverse 1.2.1 --

## v ggplot2 3.1.0     v purrr   0.2.5
## v tibble  2.0.1     v dplyr   0.7.8
## v tidyr   0.8.2     v stringr 1.3.1
## v readr   1.3.1     v forcats 0.3.0

## -- Conflicts -------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library (tmap)
library (spdep)

## Loading required package: sp

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following object is masked from 'package:tidyr':
## 
##     expand

## Loading required package: spData

## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`

Read Data

Read a shp file for colombian municipalities and a excel file with the economic data for each municipalitie.

## Reading layer `municipios' from data source `C:\Users\diego.lizcano\Box Sync\CodigoR\Andres\shp\municipios.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1128 features and 8 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -233755.1 ymin: -469226.2 xmax: 1410054 ymax: 1490018
## epsg (SRID):    32618
## proj4string:    +proj=utm +zone=18 +datum=WGS84 +units=m +no_defs

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

# merge both data sets
Full_data <- merge(col_map ,
                   regre_data,
                   by="ID")

# see ig GINI is Normal
hist(Full_data$GINI, main="GINI")

#plot GINI Map
brks <- c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)

tm_shape(Full_data) + tm_fill("GINI", breaks=brks, midpoint=0.5, palette="RdBu") + tm_layout(legend.outside=TRUE)

# drop Municipios sin GINI
outl <- which(is.na( Full_data$GINI))
as.character(Full_data$NOM_MUNICI[outl])

##  [1] "MEDELLÍN"                                      
##  [2] "BOGOTÁ, D.C."                                  
##  [3] "NOROSI"                                        
##  [4] "SAN JOSÉ DE URÉ"                               
##  [5] "TUCHÍN"                                        
##  [6] "BAJO BAUDÓ  (Pizarro)"                         
##  [7] "CARMEN DEL DARIÉN  (Curbaradó)"                
##  [8] "CERTEGUI"                                      
##  [9] "CONDOTO"                                       
## [10] "EL LITORAL DEL SAN JUAN"                       
## [11] "MEDIO ATRATO (Beté)"                           
## [12] "MEDIO BAUDÓ (Boca de Pepé)"                    
## [13] "RÍO IRÓ (Santa Rita)"                          
## [14] "RÍO QUITO (Paimadó)"                           
## [15] "SIPÍ"                                          
## [16] "UNIÓN PANAMERICANA (Ánimas)"                   
## [17] "LA TOLA"                                       
## [18] "CALI"                                          
## [19] "EL ENCANTO (Cor. Departamental)"               
## [20] "LA CHORRERA (Cor. Departamental)"              
## [21] "LA PEDRERA (Cor. Departamental)"               
## [22] "LA VICTORIA (Pacoa) (Cor. Departamental)"      
## [23] "MIRITÍ-PARANÁ (Campoamor) (Cor. Departamental)"
## [24] "PUERTO ALEGRÍA (Cor. Departamental)"           
## [25] "PUERTO ARICA (Cor. Departamental)"             
## [26] "SANTANDER (Araracuara) (Cor. Departamental)"   
## [27] "TARAPACÁ (Cor. Departamental)"                 
## [28] "BARRANCO MINA (Cor. Departamental)"            
## [29] "MAPIRIPANA (Cor. Departamental)"               
## [30] "SAN FELIPE (Cor. Departamental)"               
## [31] "PUERTO COLOMBIA (Cor. Departamental)"          
## [32] "LA GUADALUPE (Cor. Departamental)"             
## [33] "CACAHUAL (Cor. Departamental)"                 
## [34] "PANÁ PANÁ (Campo Alegre) (Cor. Departamental)" 
## [35] "MORICHAL (Morichal Nuevo) (Cor. Departamental)"
## [36] "MIRAFLORES"                                    
## [37] "MITÚ"                                          
## [38] "CARURÚ"                                        
## [39] "PACOA (Cor. Departamental)"                    
## [40] "TARAIRA"                                       
## [41] "PAPUNAUA (Cor. Departamental)"                 
## [42] "YAVARATÉ (Cor. Departamental)"

W.4 <- Full_data[-outl, -outl]
nb_Full_data_2 <- poly2nb(W.4)
nb_B_2 <- nb2listw(nb_Full_data_2, style="B", zero.policy=TRUE)
Full_data_2 <- Full_data[!(1:length(Full_data$ID) %in% outl),]

# Convert to vecinos y lista 
nb_Full_data <- poly2nb(Full_data)
nb_B <- nb2listw(nb_Full_data, style="B", zero.policy=TRUE)
#B <- as(nb_B, "CsparseMatrix")

# plot vecinos
# plot(st_geometry(Full_data), border="grey")
# plot(nb_B, st_centroid(st_geometry(Full_data), of_largest_polygon), add=TRUE, col="blue")

######################################## 
#################### Moran test
######################################## 
form1 <- GINI~ Ha_Coca 

model1 <- lm(formula=form1, data=Full_data_2)
W.list <- nb_B_2
resid.model <- residuals(model1)
moran.mc(x=resid.model, listw=W.list, nsim=1000)

## 
##  Monte-Carlo simulation of Moran I
## 
## data:  resid.model 
## weights: W.list  
## number of simulations + 1: 1001 
## 
## statistic = 0.40006, observed rank = 1001, p-value = 0.000999
## alternative hypothesis: greater

# The Moran's I test has a p-value much less than 0.05, which suggests that the residuals contain
# substantial positive spatial autocorrelation.


########### Model 1
car_mod1 <- spautolm(GINI ~ 1, data=Full_data, listw=nb_B,  family="CAR")

summary(car_mod1)

## 
## Call: spautolm(formula = GINI ~ 1, data = Full_data, listw = nb_B, 
##     family = "CAR")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.3549860 -0.0497422  0.0006531  0.0499323  0.3521475 
## 
## Coefficients: 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 0.6647926  0.0067888  97.925 < 2.2e-16
## 
## Lambda: 0.15054 LR test value: 362.38 p-value: < 2.22e-16 
## Numerical Hessian standard error of lambda: 0.00094668 
## 
## Log likelihood: 1108.107 
## ML residual variance (sigma squared): 0.0066992, (sigma: 0.081848)
## Number of observations: 1082 
## Number of parameters estimated: 3 
## AIC: -2210.2

Full_data_2$fit_mod1 <- fitted.values(car_mod1)
tm_shape(Full_data_2) + tm_fill("fit_mod1")

########### Model 2
car_mod2 <- spautolm(GINI ~ Ha_Coca -1, data=Full_data, listw=nb_B,  family="CAR")

summary(car_mod2)

## 
## Call: 
## spautolm(formula = GINI ~ Ha_Coca - 1, data = Full_data, listw = nb_B, 
##     family = "CAR")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.620596 -0.041186  0.097477  0.225282  0.653650 
## 
## Coefficients: 
##           Estimate Std. Error z value  Pr(>|z|)
## Ha_Coca 8.5797e-05 2.3892e-05  3.5911 0.0003293
## 
## Lambda: 0.15121 LR test value: 2044.6 p-value: < 2.22e-16 
## Numerical Hessian standard error of lambda: 3.2371e-05 
## 
## Log likelihood: -110.8306 
## ML residual variance (sigma squared): 0.063412, (sigma: 0.25182)
## Number of observations: 1082 
## Number of parameters estimated: 3 
## AIC: 227.66

Full_data_2$fit_mod2 <- fitted.values(car_mod2)
tm_shape(Full_data_2) + tm_fill("fit_mod2")

CAR Regresion

Diego J. Lizcano

21 de febrero de 2019

Regresion model with CAR

Read Data