Para este análisis se utilizó una base datos de homicidios de la ciudad de Cali, la cual contiene la georeferenciación de los casos ocurridos durante el año 2017 en el area metropolitana de la Ciudad. Cali es una ciudad con más de 2 millones de habitantes de habitantes, cuya tasa para este año correspondió con 70 casos por cada 100mil habitantes. Los resultados que se muestran a continuación corresponden a los casos ocurridos en el mes de diciembre.
#cargando librerias
require(rgdal)
require(maptools)
require(sp)
require(raster)
require(stpp)
require(KernSmooth)
library(sf) # Development version in the ppp branch
library(spatstat)
#leyendo shape
comunas=st_read(dsn="C:/Users/delia/Desktop/Doctorado/Estadistica_Espacial/Ejercicio_NoParamétrico/Comunas/Comunas/bcs_lim_comunas_WGS84.shp")
## Reading layer `bcs_lim_comunas_WGS84' from data source `C:\Users\delia\Desktop\Doctorado\Estadistica_Espacial\Ejercicio_NoParamétrico\Comunas\Comunas\bcs_lim_comunas_WGS84.shp' using driver `ESRI Shapefile'
## Simple feature collection with 22 features and 5 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -76.59076 ymin: 3.331819 xmax: -76.46125 ymax: 3.505871
## geographic CRS: WGS 84
homicidios=st_read(dsn="C:/Users/delia/Desktop/Doctorado/Estadistica_Espacial/Ejercicio_NoParamétrico/Homicidios2017/Homicidios2017/2017_0_WGS84.shp")
## Reading layer `2017_0_WGS84' from data source `C:\Users\delia\Desktop\Doctorado\Estadistica_Espacial\Ejercicio_NoParamétrico\Homicidios2017\Homicidios2017\2017_0_WGS84.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1242 features and 47 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: -1.797693e+308 ymin: -1.797693e+308 xmax: -76.46154 ymax: 3.50128
## geographic CRS: WGS 84
homicidios=st_transform(homicidios, crs = 6345)
comunas=st_transform(comunas, crs = 6345)
#creando la ventana del patron puntual espacial
comunas_win=as.owin(comunas)
#Creando un objeto de clase ppp (patron puntual)
fin=homicidios$mes=="DEC"
homicidios2=st_coordinates(homicidios[fin,])
homicidiospp <- ppp(x=homicidios2[,1],y=homicidios2[,2],window=comunas_win)
homicidiospp2=ripras(homicidiospp)
plot(homicidiospp2)
homicidiospp <- ppp(x=homicidios2[,1],y=homicidios2[,2],window=homicidiospp2)
plot(homicidiospp)
#eliminando duplicados para tener un proceso puntual simple
homicidiospp <- unique(homicidiospp)
plot(homicidiospp)
Primero se ajustaron modelos estacionarios como el Matern, LGCP y el modelo Cox, los cuales mostraron los siguientes resultados:
fitM <- kppm(homicidiospp ~ 1, "MatClust")
fitM
## Stationary cluster point process model
## Fitted to point pattern dataset 'homicidiospp'
## Fitted by minimum contrast
## Summary statistic: K-function
##
## Uniform intensity: 8.203719e-07
##
## Cluster model: Matern cluster process
## Fitted cluster parameters:
## kappa scale
## 1.269264e-07 1.270648e+03
## Mean cluster size: 6.463365 points
En el caso del modelo Matern se observa una intensidad estimada de los padres igual a \(\hat{k}=1.27e-07\), un radio del cluster de \(R=1.27e+03\) y el número de hijos por cada cluster \(\hat{\mu}=6.46\). De esta forma, la intensidad estimada del proceso es igual a \(\hat{k}\hat{\mu}=8.26e-07\)
fitT<- kppm(homicidiospp ~ 1, "LGCP")
fitT
## Stationary Cox point process model
## Fitted to point pattern dataset 'homicidiospp'
## Fitted by minimum contrast
## Summary statistic: K-function
##
## Uniform intensity: 8.203719e-07
##
## Cox model: log-Gaussian Cox process
## Covariance model: exponential
## Fitted covariance parameters:
## var scale
## 1.201492 1143.599323
## Fitted mean of log of random intensity: -14.61425
par(mfrow=c(1,2))
plot(fitT,pause=F)
Para el modelo LGCP se observa una intensidad estimada de los padres igual a \(\hat{k}=1.25e-07\), una dispersión de los puntos de \(\hat{\sigma}=6.73e+02\) y el número de hijos por cada cluster \(\hat{\mu}=6.56\).
Por otro lado, se realizó la comparación con el ajuste de un modelo poisson en cuanto a la varianza estimada para el \(log\lambda\), asi mismo, respecto al intervalo de confianza, observando que el modelo poisson presenta intervalos más estrechos, sin embargo, esto no lo hace mejor dado que el modelo cluster está reflejando una mayor incertidumbre dada la correlación que presentan los datos.
vcov(fitM)
## (Intercept)
## (Intercept) 0.06274571
confint(fitM)
## 2.5 % 97.5 %
## (Intercept) -14.50446 -13.52255
vcov(ppm(homicidiospp))
## log(lambda)
## log(lambda) 0.009345794
confint(ppm(homicidiospp))
## 2.5 % 97.5 %
## log(lambda) -14.20322 -13.82426
Para el modelo cox homogéneo, se obtuvo lo siguiente:
fitL <- kppm(homicidiospp ~ 1, "LGCP")
fitL
## Stationary Cox point process model
## Fitted to point pattern dataset 'homicidiospp'
## Fitted by minimum contrast
## Summary statistic: K-function
##
## Uniform intensity: 8.203719e-07
##
## Cox model: log-Gaussian Cox process
## Covariance model: exponential
## Fitted covariance parameters:
## var scale
## 1.201492 1143.599323
## Fitted mean of log of random intensity: -14.61425
Se ajustaron los tres modelos anteriores teniendo en cuenta como covariable los parques en la ciudad, la cual es un variable proxi de la condición económica de la población.
parques=st_read("C:/Users/delia/Desktop/Doctorado/Estadistica_Espacial/amb_eep_aeie_ecoparques/amb_eep_aeie_ecoparques.shp")
## Reading layer `amb_eep_aeie_ecoparques' from data source `C:\Users\delia\Desktop\Doctorado\Estadistica_Espacial\amb_eep_aeie_ecoparques\amb_eep_aeie_ecoparques.shp' using driver `ESRI Shapefile'
## Simple feature collection with 11 features and 2 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 1054055 ymin: 860103.1 xmax: 1066737 ymax: 878825.4
## projected CRS: MAGNA-SIRGAS / Cali urban grid
parques = st_transform(parques, crs = 6345)
parques_tabla=st_coordinates(parques)[,1:2]
plot(parques_tabla)
points(homicidiospp,add=T,col="red")
intensidad_homicidios <- density.ppp(homicidiospp,diggle=TRUE,sigma=bw.diggle(homicidiospp))
require(raster)
intensidad=rasterFromXYZ(data.frame(intensidad_homicidios)[,1:3])
imagen_parques=distanceFromPoints(object = intensidad,xy
= parques_tabla)
plot(imagen_parques)
points(homicidiospp,add=T,col="red")
covariables=list()
covariables[[1]]=as.im(imagen_parques)
names(covariables)="parques"
fitM2=kppm(homicidiospp~parques,"MatClust", method = c("clik2"), data=covariables)
summary(fitM2)
## Inhomogeneous cluster point process model
## Fitted to point pattern dataset 'homicidiospp'
## Fitted by maximum second order composite likelihood
## rmax = 3463.23573572794
## weight function: Indicator(distance <= 1731.61786786397)
## Converged successfully after 111 function evaluations
##
## ----------- TREND MODEL -----
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = X, trend = trend, rename.intercept = FALSE, covariates = covariates,
## covfunargs = covfunargs, use.gam = use.gam, forcefit = TRUE,
## nd = nd, eps = eps)
## Edge correction: "border"
## [border correction distance r = 0 ]
## --------------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
##
## Data pattern:
## Planar point pattern: 107 points
## Average intensity 8.2e-07 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 12 vertices
## enclosing rectangle: [1663363.2, 1677216.2] x [379174.1, 393610] units
## (13850 x 14440 units)
## Window area = 130459000 square units
## Fraction of frame area: 0.652
##
## Dummy quadrature points:
## 32 x 32 grid of dummy points, plus 4 corner points
## dummy spacing: 432.9045 x 451.1222 units
##
## Original dummy parameters: =
## Planar point pattern: 717 points
## Average intensity 5.5e-06 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 12 vertices
## enclosing rectangle: [1663363.2, 1677216.2] x [379174.1, 393610] units
## (13850 x 14440 units)
## Window area = 130459000 square units
## Fraction of frame area: 0.652
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
## range: [8990, 195000] total: 1.3e+08
## Weights on data points:
## range: [39100, 97600] total: 9210000
## Weights on dummy points:
## range: [8990, 195000] total: 1.21e+08
## --------------------------------------------------------------------------------
## FITTED MODEL:
##
## Nonstationary Poisson process
##
## ---- Intensity: ----
##
## Log intensity: ~parques
## Model depends on external covariate 'parques'
## Covariates provided:
## parques: im
##
## Fitted trend coefficients:
## (Intercept) parques
## -1.402594e+01 7.910550e-06
##
## Estimate S.E. CI95.lo CI95.hi Ztest
## (Intercept) -1.402594e+01 1.676938e-01 -1.435461e+01 -1.369727e+01 ***
## parques 7.910550e-06 8.692699e-05 -1.624632e-04 1.782843e-04
## Zval
## (Intercept) -83.64018220
## parques 0.09100223
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept) parques
## -1.402594e+01 7.910550e-06
##
## Fitted exp(theta):
## (Intercept) parques
## 8.102371e-07 1.000008e+00
##
## ----------- CLUSTER MODEL -----------
## Model: Matern cluster process
##
## Fitted cluster parameters:
## kappa scale
## 8.234685e-07 5.552285e+02
## Mean cluster size: [pixel image]
##
## Final standard error and CI
## (allowing for correlation of cluster process):
## Estimate S.E. CI95.lo CI95.hi Ztest
## (Intercept) -1.402594e+01 0.2324009543 -1.448144e+01 -1.357044e+01 ***
## parques 7.910550e-06 0.0001202838 -2.278413e-04 2.436624e-04
## Zval
## (Intercept) -60.35232916
## parques 0.06576572
De acuerdo a los resultados del modelo anterior, se observa que la covariable no es significativa.
El modelo LGCP:
fitT2=kppm(homicidiospp~parques,"LGCP", method = c("clik2"), data=covariables)
summary(fitT2)
## Inhomogeneous Cox point process model
## Fitted to point pattern dataset 'homicidiospp'
## Fitted by maximum second order composite likelihood
## rmax = 3463.23573572794
## weight function: Indicator(distance <= 1731.61786786397)
## Converged successfully after 55 function evaluations
##
## ----------- TREND MODEL -----
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = X, trend = trend, rename.intercept = FALSE, covariates = covariates,
## covfunargs = covfunargs, use.gam = use.gam, forcefit = TRUE,
## nd = nd, eps = eps)
## Edge correction: "border"
## [border correction distance r = 0 ]
## --------------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
##
## Data pattern:
## Planar point pattern: 107 points
## Average intensity 8.2e-07 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 12 vertices
## enclosing rectangle: [1663363.2, 1677216.2] x [379174.1, 393610] units
## (13850 x 14440 units)
## Window area = 130459000 square units
## Fraction of frame area: 0.652
##
## Dummy quadrature points:
## 32 x 32 grid of dummy points, plus 4 corner points
## dummy spacing: 432.9045 x 451.1222 units
##
## Original dummy parameters: =
## Planar point pattern: 717 points
## Average intensity 5.5e-06 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 12 vertices
## enclosing rectangle: [1663363.2, 1677216.2] x [379174.1, 393610] units
## (13850 x 14440 units)
## Window area = 130459000 square units
## Fraction of frame area: 0.652
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
## range: [8990, 195000] total: 1.3e+08
## Weights on data points:
## range: [39100, 97600] total: 9210000
## Weights on dummy points:
## range: [8990, 195000] total: 1.21e+08
## --------------------------------------------------------------------------------
## FITTED MODEL:
##
## Nonstationary Poisson process
##
## ---- Intensity: ----
##
## Log intensity: ~parques
## Model depends on external covariate 'parques'
## Covariates provided:
## parques: im
##
## Fitted trend coefficients:
## (Intercept) parques
## -1.402594e+01 7.910550e-06
##
## Estimate S.E. CI95.lo CI95.hi Ztest
## (Intercept) -1.402594e+01 1.676938e-01 -1.435461e+01 -1.369727e+01 ***
## parques 7.910550e-06 8.692699e-05 -1.624632e-04 1.782843e-04
## Zval
## (Intercept) -83.64018220
## parques 0.09100223
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept) parques
## -1.402594e+01 7.910550e-06
##
## Fitted exp(theta):
## (Intercept) parques
## 8.102371e-07 1.000008e+00
##
## ----------- COX MODEL -----------
## Model: log-Gaussian Cox process
##
## Covariance model: exponential
## Fitted covariance parameters:
## var scale
## 1.08105 785.91622
## Fitted mean of log of random intensity: [pixel image]
##
## Final standard error and CI
## (allowing for correlation of Cox process):
## Estimate S.E. CI95.lo CI95.hi Ztest
## (Intercept) -1.402594e+01 0.3217711106 -1.465660e+01 -1.339528e+01 ***
## parques 7.910550e-06 0.0001611127 -3.078645e-04 3.236856e-04
## Zval
## (Intercept) -43.58980166
## parques 0.04909949
par(mfrow=c(1,2))
plot(fitT,pause=F)
fitC2=kppm(homicidiospp~parques,"LGCP", method = c("clik2"), data=covariables)
summary(fitC2)
## Inhomogeneous Cox point process model
## Fitted to point pattern dataset 'homicidiospp'
## Fitted by maximum second order composite likelihood
## rmax = 3463.23573572794
## weight function: Indicator(distance <= 1731.61786786397)
## Converged successfully after 55 function evaluations
##
## ----------- TREND MODEL -----
## Point process model
## Fitting method: maximum likelihood (Berman-Turner approximation)
## Model was fitted using glm()
## Algorithm converged
## Call:
## ppm.ppp(Q = X, trend = trend, rename.intercept = FALSE, covariates = covariates,
## covfunargs = covfunargs, use.gam = use.gam, forcefit = TRUE,
## nd = nd, eps = eps)
## Edge correction: "border"
## [border correction distance r = 0 ]
## --------------------------------------------------------------------------------
## Quadrature scheme (Berman-Turner) = data + dummy + weights
##
## Data pattern:
## Planar point pattern: 107 points
## Average intensity 8.2e-07 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 12 vertices
## enclosing rectangle: [1663363.2, 1677216.2] x [379174.1, 393610] units
## (13850 x 14440 units)
## Window area = 130459000 square units
## Fraction of frame area: 0.652
##
## Dummy quadrature points:
## 32 x 32 grid of dummy points, plus 4 corner points
## dummy spacing: 432.9045 x 451.1222 units
##
## Original dummy parameters: =
## Planar point pattern: 717 points
## Average intensity 5.5e-06 points per square unit
## Window: polygonal boundary
## single connected closed polygon with 12 vertices
## enclosing rectangle: [1663363.2, 1677216.2] x [379174.1, 393610] units
## (13850 x 14440 units)
## Window area = 130459000 square units
## Fraction of frame area: 0.652
## Quadrature weights:
## (counting weights based on 32 x 32 array of rectangular tiles)
## All weights:
## range: [8990, 195000] total: 1.3e+08
## Weights on data points:
## range: [39100, 97600] total: 9210000
## Weights on dummy points:
## range: [8990, 195000] total: 1.21e+08
## --------------------------------------------------------------------------------
## FITTED MODEL:
##
## Nonstationary Poisson process
##
## ---- Intensity: ----
##
## Log intensity: ~parques
## Model depends on external covariate 'parques'
## Covariates provided:
## parques: im
##
## Fitted trend coefficients:
## (Intercept) parques
## -1.402594e+01 7.910550e-06
##
## Estimate S.E. CI95.lo CI95.hi Ztest
## (Intercept) -1.402594e+01 1.676938e-01 -1.435461e+01 -1.369727e+01 ***
## parques 7.910550e-06 8.692699e-05 -1.624632e-04 1.782843e-04
## Zval
## (Intercept) -83.64018220
## parques 0.09100223
##
## ----------- gory details -----
##
## Fitted regular parameters (theta):
## (Intercept) parques
## -1.402594e+01 7.910550e-06
##
## Fitted exp(theta):
## (Intercept) parques
## 8.102371e-07 1.000008e+00
##
## ----------- COX MODEL -----------
## Model: log-Gaussian Cox process
##
## Covariance model: exponential
## Fitted covariance parameters:
## var scale
## 1.08105 785.91622
## Fitted mean of log of random intensity: [pixel image]
##
## Final standard error and CI
## (allowing for correlation of Cox process):
## Estimate S.E. CI95.lo CI95.hi Ztest
## (Intercept) -1.402594e+01 0.3217711106 -1.465660e+01 -1.339528e+01 ***
## parques 7.910550e-06 0.0001611127 -3.078645e-04 3.236856e-04
## Zval
## (Intercept) -43.58980166
## parques 0.04909949
Al comparar por el criterio de AIC cuál modelo presenta mejor ajuste, se observa que el COX reporta menor AIC.
AIC(fitM2)
## [1] -13662.27
AIC(fitT2)
## [1] -13651.82
AIC(fitC2)
## [1] -13651.82
plot(simulate(fitC2, nsim=4))
## Generating 4 simulations... 1, 2, 3, 4.
## Done.
envLT <- envelope(fitC2, Lest, nsim=39)
## Generating 39 simulated realisations of fitted cluster model ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39.
##
## Done.
plot(envLT)
library(spatstat)
LGCP=rLGCP("gauss", mu = -14.61, var = 1.20 ,scale = 1143.60, ,nsim =100 , win = homicidiospp$window)
## ....................................................................................................
plot(LGCP[[1]])
Inicialmente se realizó una simulación calculando el valor promedio por cada pixel de las diferentes simulaciones realizadas del modelo LGCP.
inten_sim=list()
for(i in 1:100){
inten_sim[[i]] = density.ppp(LGCP[[i]],diggle=TRUE,sigma=bw.scott(LGCP[[i]]))
inten_sim[[i]] =rasterFromXYZ(data.frame(inten_sim[[i]])[1:3])
}
inten_sim=stack(inten_sim)
names(inten_sim)=paste("simulación",1:100)
plot(inten_sim[[1:8]])
promedio_sim=mean(inten_sim[[1:100]])
plot(promedio_sim)
intensidad_homicidios1 <- density.ppp(homicidiospp,diggle=TRUE,sigma=bw.ppl(homicidiospp))
intensidad_observada=rasterFromXYZ(data.frame(intensidad_homicidios1)[1:3])
resultado=stack(promedio_sim,intensidad_observada)
names(resultado)=c("promedio_simulaciones","intensidad_observada")
plot(resultado)
Luego se realizó otra simulación usando la función eval.im para realizar operaciones entre imagénes. Donde se observa una mayor consistencia entre el patrón y la intensidad estimada.
library(spatstat)
LGCP=rLGCP("gauss", mu = -14.61, var = 1.20 ,scale = 1143.60, ,nsim =100 , win = homicidiospp$window)
## ....................................................................................................
plot(LGCP[[1]])
inten_sim1=list()
for(i in 1:100){
inten_sim1[[i]] = density.ppp(LGCP[[i]],diggle=TRUE,sigma=bw.scott(LGCP[[i]]))
}
intensidad_homicidios1 <- density.ppp(homicidiospp,diggle=TRUE,sigma=bw.ppl(homicidiospp))
prom_im=function(A,B){
return(eval.im(A+B)/2)
}
c=prom_im(inten_sim1[[1]],inten_sim1[[2]])
for(i in 3:100){
c=prom_im(c,inten_sim1[[i]])
}
c
## real-valued pixel image
## 128 x 128 pixel array (ny, nx)
## enclosing rectangle: [1663400, 1677200] x [379170, 393610] units
plot(c)