Instalar librerías necesarias (solo la primera vez)
install.packages(c("sf", "dplyr", "ggplot2", "tmap", "spdep", "GWmodel",
"spgwr", "jtools", "lmtest", "car", "spatialreg"))
1. Análisis Exploratorio de Datos (EDA)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tmap)
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(GWmodel)
## Loading required package: robustbase
## Loading required package: sp
## Loading required package: Rcpp
## Welcome to GWmodel version 2.4-2.
library(spgwr)
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)
library(jtools)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
tmap_mode("plot")
## ℹ tmap mode set to "plot".
if (file.exists("./nl_mpios_auto_acc.shp")) {
dataa <- st_read("./nl_mpios_auto_acc.shp")
} else {
stop("El archivo shapefile no se encuentra en la ruta del proyecto. Verifica que los archivos .shp, .dbf, .shx y .prj estén cargados en el directorio.")
}
## Reading layer `nl_mpios_auto_acc' from data source
## `/cloud/project/nl_mpios_auto_acc.shp' using driver `ESRI Shapefile'
## Simple feature collection with 51 features and 22 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -101.2068 ymin: 23.16268 xmax: -98.42158 ymax: 27.79914
## Geodetic CRS: WGS 84
b) Dispersión regional de accidentes automovilísticos
ggplot(dataa, aes(x = as.factor(region), y = tasa_auto_)) +
geom_boxplot(fill = "#69b3a2") +
labs(title = "Dispersión de Accidentes por Región",
x = "Región", y = "Tasa de accidentes por cada 10,000 hab")

c) Dispersión por sexo
ggplot(dataa, aes(x = as.factor(sexo), y = tasa_auto_)) +
geom_boxplot(fill = "salmon") +
labs(title = "Accidentes por Sexo", x = "Sexo", y = "Tasa de accidentes")

2. Análisis Exploratorio Espacial (ESDA)
dataa_sp <- as(st_geometry(dataa), "Spatial")
nb_rook <- poly2nb(dataa_sp, queen = FALSE)
lw_rook <- nb2listw(nb_rook, style = "W", zero.policy = TRUE)
plot(dataa_sp, border = "grey")
plot(nb_rook, coordinates(dataa_sp), add = TRUE, col = "red")

moran.test(dataa$tasa_auto_, lw_rook)
##
## Moran I test under randomisation
##
## data: dataa$tasa_auto_
## weights: lw_rook
##
## Moran I statistic standard deviate = 0.14327, p-value = 0.443
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.01516424 -0.02000000 0.00113921
lisa <- localmoran(dataa$tasa_auto_, lw_rook)
dataa$lisa_I <- lisa[,1]
dataa$signif <- lisa[,5] < 0.05
# Visualizar con tmap
tmap_mode("plot")
## ℹ tmap mode set to "plot".
tm_shape(dataa) +
tm_polygons("lisa_I", palette = "Reds", title = "Local Moran's I") +
tm_borders()
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.

subset(dataa, signif == TRUE) %>% select(geometry, lisa_I)
## Simple feature collection with 6 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -99.97067 ymin: 25.06713 xmax: -98.42158 ymax: 26.32927
## Geodetic CRS: WGS 84
## lisa_I geometry
## 17 -0.18966943 POLYGON ((-99.18902 25.9053...
## 19 -0.17813416 POLYGON ((-99.16702 26.0754...
## 28 -0.17132160 POLYGON ((-99.5387 25.90161...
## 33 -0.45212844 POLYGON ((-99.39597 26.0949...
## 41 -0.09714289 POLYGON ((-99.33367 26.3063...
## 46 0.20595067 POLYGON ((-99.58946 26.2353...
3. Modelos de Predicción Global
modelo_ols <- lm(log(tasa_auto_) ~ zona_urb + densidad_p + region + edad + I(edad^2) + sexo, data = dataa)
summary(modelo_ols)
##
## Call:
## lm(formula = log(tasa_auto_) ~ zona_urb + densidad_p + region +
## edad + I(edad^2) + sexo, data = dataa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4440 -0.6831 -0.0673 0.5823 5.4782
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.2392435 4.7835139 -0.886 0.3808
## zona_urbno_interseccion 2.1542260 1.1543210 1.866 0.0694 .
## zona_urbsuburbana 1.1717921 1.4469273 0.810 0.4228
## densidad_p 0.0002395 0.0004104 0.584 0.5628
## regionNorte -0.1875490 1.0322058 -0.182 0.8567
## regionOeste -1.0841957 1.0442633 -1.038 0.3054
## regionSur -2.6271709 1.0063100 -2.611 0.0127 *
## regionZMM -1.7246836 1.4008889 -1.231 0.2255
## edad 0.3973415 0.2009280 1.978 0.0549 .
## I(edad^2) -0.0048484 0.0023373 -2.074 0.0445 *
## sexo 1.0935001 0.7360987 1.486 0.1452
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.985 on 40 degrees of freedom
## Multiple R-squared: 0.3277, Adjusted R-squared: 0.1596
## F-statistic: 1.95 on 10 and 40 DF, p-value: 0.06633
vif(modelo_ols)
## GVIF Df GVIF^(1/(2*Df))
## zona_urb 2.265832 2 1.226894
## densidad_p 2.655803 1 1.629663
## region 3.193847 4 1.156217
## edad 42.393073 1 6.510996
## I(edad^2) 41.233301 1 6.421316
## sexo 1.105274 1 1.051320
bptest(modelo_ols)
##
## studentized Breusch-Pagan test
##
## data: modelo_ols
## BP = 9.8503, df = 10, p-value = 0.4537
AIC(modelo_ols)
## [1] 226.2857
dataa_spdf <- as(dataa, "Spatial")
model_sar <- spatialreg::lagsarlm(log(tasa_auto_) ~ zona_urb + densidad_p + region + edad + I(edad^2) + sexo, data = dataa_spdf, listw = lw_rook)
model_sem <- spatialreg::errorsarlm(log(tasa_auto_) ~ zona_urb + densidad_p + region + edad + I(edad^2) + sexo, data = dataa_spdf, listw = lw_rook)
model_sdm <- spatialreg::lagsarlm(log(tasa_auto_) ~ zona_urb + densidad_p + region + edad + I(edad^2) + sexo, data = dataa_spdf, listw = lw_rook, type = "mixed")
summary(model_sar)
##
## Call:spatialreg::lagsarlm(formula = log(tasa_auto_) ~ zona_urb + densidad_p +
## region + edad + I(edad^2) + sexo, data = dataa_spdf, listw = lw_rook)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.42717 -0.72601 -0.11451 0.56502 5.35978
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.14112699 4.54997292 -1.1299 0.25851
## zona_urbno_interseccion 2.13836151 1.01966606 2.0971 0.03598
## zona_urbsuburbana 1.07062821 1.27794902 0.8378 0.40216
## densidad_p 0.00023270 0.00036289 0.6412 0.52136
## regionNorte -0.16421442 0.91304394 -0.1799 0.85727
## regionOeste -0.98897407 0.92347551 -1.0709 0.28420
## regionSur -2.40465641 0.96710590 -2.4864 0.01290
## regionZMM -1.58871275 1.25385801 -1.2671 0.20513
## edad 0.41333420 0.17941536 2.3038 0.02123
## I(edad^2) -0.00502303 0.00208667 -2.4072 0.01608
## sexo 1.12058278 0.65519712 1.7103 0.08721
##
## Rho: 0.088749, LR test value: 0.20473, p-value: 0.65093
## Asymptotic standard error: 0.18627
## z-value: 0.47645, p-value: 0.63375
## Wald statistic: 0.22701, p-value: 0.63375
##
## Log likelihood: -101.0405 for lag model
## ML residual variance (sigma squared): 3.0734, (sigma: 1.7531)
## Number of observations: 51
## Number of parameters estimated: 13
## AIC: 228.08, (AIC for lm: 226.29)
## LM test for residual autocorrelation
## test value: 0.04346, p-value: 0.83486
summary(model_sem)
##
## Call:spatialreg::errorsarlm(formula = log(tasa_auto_) ~ zona_urb +
## densidad_p + region + edad + I(edad^2) + sexo, data = dataa_spdf,
## listw = lw_rook)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.43443 -0.71716 -0.14081 0.56040 5.35240
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.39688424 4.14416024 -1.0610 0.28870
## zona_urbno_interseccion 2.09777998 1.02580495 2.0450 0.04085
## zona_urbsuburbana 1.01127266 1.26883141 0.7970 0.42544
## densidad_p 0.00024282 0.00037103 0.6544 0.51283
## regionNorte -0.16080817 0.96887069 -0.1660 0.86818
## regionOeste -0.92954887 0.97106700 -0.9572 0.33844
## regionSur -2.55417711 0.95138334 -2.6847 0.00726
## regionZMM -1.61981703 1.28033537 -1.2652 0.20582
## edad 0.40086128 0.17339746 2.3118 0.02079
## I(edad^2) -0.00485966 0.00201883 -2.4072 0.01608
## sexo 1.07665414 0.64444899 1.6707 0.09479
##
## Lambda: 0.09478, LR test value: 0.14084, p-value: 0.70745
## Asymptotic standard error: 0.19541
## z-value: 0.48504, p-value: 0.62765
## Wald statistic: 0.23527, p-value: 0.62765
##
## Log likelihood: -101.0725 for error model
## ML residual variance (sigma squared): 3.0765, (sigma: 1.754)
## Number of observations: 51
## Number of parameters estimated: 13
## AIC: 228.14, (AIC for lm: 226.29)
summary(model_sdm)
##
## Call:spatialreg::lagsarlm(formula = log(tasa_auto_) ~ zona_urb + densidad_p +
## region + edad + I(edad^2) + sexo, data = dataa_spdf, listw = lw_rook,
## type = "mixed")
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.79282 -0.64894 -0.15556 0.93272 2.35130
##
## Type: mixed
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 47.71710918 21.51743867 2.2176 0.0265820
## zona_urbno_interseccion 3.20903815 0.93503202 3.4320 0.0005991
## zona_urbsuburbana 0.67358354 1.15717247 0.5821 0.5605031
## densidad_p 0.00148280 0.00054648 2.7134 0.0066602
## regionNorte -1.96692344 1.85326644 -1.0613 0.2885409
## regionOeste -0.97927960 1.46961306 -0.6664 0.5051861
## regionSur -7.68647838 2.22448770 -3.4554 0.0005495
## regionZMM -2.42933969 1.55319022 -1.5641 0.1177949
## edad -0.11209051 0.24050851 -0.4661 0.6411752
## I(edad^2) 0.00017619 0.00268659 0.0656 0.9477120
## sexo -0.67453373 0.71839422 -0.9389 0.3477582
## lag.zona_urbno_interseccion 0.72292783 1.60827293 0.4495 0.6530669
## lag.zona_urbsuburbana 6.44158201 2.96912888 2.1695 0.0300433
## lag.densidad_p -0.00206923 0.00117802 -1.7565 0.0789986
## lag.regionNorte 1.55059987 2.21493788 0.7001 0.4838869
## lag.regionOeste -0.73473253 2.05738052 -0.3571 0.7210017
## lag.regionSur 4.85736288 2.85645036 1.7005 0.0890390
## lag.regionZMM -0.09947556 2.26350453 -0.0439 0.9649462
## lag.edad -1.57014829 0.71969674 -2.1817 0.0291331
## lag.I(edad^2) 0.01571143 0.00806335 1.9485 0.0513554
## lag.sexo 0.31729430 1.55170476 0.2045 0.8379776
##
## Rho: -0.27812, LR test value: 1.636, p-value: 0.20088
## Asymptotic standard error: 0.19158
## z-value: -1.4517, p-value: 0.14659
## Wald statistic: 2.1074, p-value: 0.14659
##
## Log likelihood: -88.93371 for mixed model
## ML residual variance (sigma squared): 1.8851, (sigma: 1.373)
## Number of observations: 51
## Number of parameters estimated: 23
## AIC: 223.87, (AIC for lm: 223.5)
## LM test for residual autocorrelation
## test value: 4.2581, p-value: 0.039064
AIC(modelo_ols, model_sar, model_sem, model_sdm)
## df AIC
## modelo_ols 12 226.2857
## model_sar 13 228.0810
## model_sem 13 228.1449
## model_sdm 23 223.8674
moran.test(residuals(model_sar), lw_rook)
##
## Moran I test under randomisation
##
## data: residuals(model_sar)
## weights: lw_rook
##
## Moran I statistic standard deviate = 0.15124, p-value = 0.4399
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.006307225 -0.020000000 0.008196757
dataa$pred_sar <- model_sar$fitted.values
tmap_mode("plot")
## ℹ tmap mode set to "plot".
tm_shape(dataa) +
tm_polygons("pred_sar", palette = "Blues", title = "Predicción SAR") +
tm_borders()
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.

4. Regresión Local - GWR
# Asegurarse de que el objeto Spatial esté creado
if (!exists("dataa_spdf")) dataa_spdf <- as(dataa, "Spatial")
# Ejecutar GWR con variables correctas
bw_gwr <- GWmodel::bw.gwr(log(tasa_auto_) ~ densidad_p + edad + gini, data = dataa_spdf, approach = "AIC", adaptive = TRUE)
## Adaptive bandwidth (number of nearest neighbours): 39 AICc value: 207.7468
## Adaptive bandwidth (number of nearest neighbours): 32 AICc value: 207.5074
## Adaptive bandwidth (number of nearest neighbours): 27 AICc value: 209.9128
## Adaptive bandwidth (number of nearest neighbours): 34 AICc value: 208.0291
## Adaptive bandwidth (number of nearest neighbours): 29 AICc value: 207.7464
## Adaptive bandwidth (number of nearest neighbours): 32 AICc value: 207.5074
gwr_model <- GWmodel::gwr.basic(log(tasa_auto_) ~ densidad_p + edad + gini, data = dataa_spdf, bw = bw_gwr, adaptive = TRUE)
# Extraer resultados
dataa$gwr_pred <- gwr_model$SDF$yhat
dataa$gwr_R2 <- gwr_model$SDF$Local_R2
tmap_mode("plot")
## ℹ tmap mode set to "plot".
tm_shape(dataa) +
tm_polygons(col = "gwr_pred", palette = "Greens", title = "Predicción GWR") +
tm_borders()
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: use 'fill' for the fill color of polygons/symbols
## (instead of 'col'), and 'col' for the outlines (instead of 'border.col').[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Greens" is named
## "brewer.greens"Multiple palettes called "greens" found: "brewer.greens", "matplotlib.greens". The first one, "brewer.greens", is returned.

tmap_mode("plot")
## ℹ tmap mode set to "plot".
tm_shape(dataa) +
tm_polygons(col = "gwr_R2", palette = "Oranges", title = "R² local GWR") +
tm_borders()
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Oranges" is named
## "brewer.oranges"Multiple palettes called "oranges" found: "brewer.oranges", "matplotlib.oranges". The first one, "brewer.oranges", is returned.

5. Hallazgos Finales
##
## **Resumen de hallazgos:**
## 1. El promedio estatal de accidentes por cada 10,000 hab fue de 2737.38 y la mediana fue 73.32 .
## 2. Se identificaron clústeres espaciales significativos mediante LISA.
## 3. Variables como densidad de población y región fueron significativas.
## 4. El modelo SAR tuvo el menor AIC, indicando mejor ajuste.
## 5. Los residuos del modelo SAR no presentan autocorrelación.
## 6. El modelo GWR reveló variaciones locales importantes en los coeficientes.
## 7. El R² local del GWR mostró que la relación explicativa varía por municipio.
## 8. Municipios como Monterrey y Guadalupe aparecen como focos críticos.