Instalar librerías necesarias (solo la primera vez)

install.packages(c("sf", "dplyr", "ggplot2", "tmap", "spdep", "GWmodel", 
                   "spgwr", "jtools", "lmtest", "car", "spatialreg"))

1. Análisis Exploratorio de Datos (EDA)

library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tmap)
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(GWmodel)
## Loading required package: robustbase
## Loading required package: sp
## Loading required package: Rcpp
## Welcome to GWmodel version 2.4-2.
library(spgwr)
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)
library(jtools)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(spatialreg)
## Loading required package: Matrix
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption
tmap_mode("plot")
## ℹ tmap mode set to "plot".
if (file.exists("./nl_mpios_auto_acc.shp")) {
  dataa <- st_read("./nl_mpios_auto_acc.shp")
} else {
  stop("El archivo shapefile no se encuentra en la ruta del proyecto. Verifica que los archivos .shp, .dbf, .shx y .prj estén cargados en el directorio.")
}
## Reading layer `nl_mpios_auto_acc' from data source 
##   `/cloud/project/nl_mpios_auto_acc.shp' using driver `ESRI Shapefile'
## Simple feature collection with 51 features and 22 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -101.2068 ymin: 23.16268 xmax: -98.42158 ymax: 27.79914
## Geodetic CRS:  WGS 84

a) Promedio y mediana de accidentes a nivel estatal y por región

mean_acc <- mean(dataa$tasa_auto_)
median_acc <- median(dataa$tasa_auto_)
mean_acc
## [1] 2737.383
median_acc
## [1] 73.32
dataa %>% 
  group_by(region) %>% 
  summarise(promedio = mean(tasa_auto_), mediana = median(tasa_auto_)) %>% 
  as_tibble()
## # A tibble: 5 × 4
##   region promedio mediana                                               geometry
##   <chr>     <dbl>   <dbl>                                         <GEOMETRY [°]>
## 1 Este      413.    325.  POLYGON ((-98.58157 25.72242, -98.58181 25.74087, -98…
## 2 Norte    2872.    167.  POLYGON ((-99.40973 26.31697, -99.4123 26.32066, -99.…
## 3 Oeste    9095.     66.7 MULTIPOLYGON (((-99.80249 25.99574, -99.8028 25.99588…
## 4 Sur        78.1    28.1 POLYGON ((-100.0582 25.3615, -100.0586 25.36193, -100…
## 5 ZMM       168.     84.6 POLYGON ((-99.9929 25.68308, -99.99316 25.68445, -99.…

b) Dispersión regional de accidentes automovilísticos

ggplot(dataa, aes(x = as.factor(region), y = tasa_auto_)) +
  geom_boxplot(fill = "#69b3a2") +
  labs(title = "Dispersión de Accidentes por Región",
       x = "Región", y = "Tasa de accidentes por cada 10,000 hab")

c) Dispersión por sexo

ggplot(dataa, aes(x = as.factor(sexo), y = tasa_auto_)) +
  geom_boxplot(fill = "salmon") +
  labs(title = "Accidentes por Sexo", x = "Sexo", y = "Tasa de accidentes")

2. Análisis Exploratorio Espacial (ESDA)

dataa_sp <- as(st_geometry(dataa), "Spatial")
nb_rook <- poly2nb(dataa_sp, queen = FALSE)
lw_rook <- nb2listw(nb_rook, style = "W", zero.policy = TRUE)
plot(dataa_sp, border = "grey")
plot(nb_rook, coordinates(dataa_sp), add = TRUE, col = "red")

moran.test(dataa$tasa_auto_, lw_rook)
## 
##  Moran I test under randomisation
## 
## data:  dataa$tasa_auto_  
## weights: lw_rook    
## 
## Moran I statistic standard deviate = 0.14327, p-value = 0.443
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       -0.01516424       -0.02000000        0.00113921
lisa <- localmoran(dataa$tasa_auto_, lw_rook)
dataa$lisa_I <- lisa[,1]
dataa$signif <- lisa[,5] < 0.05

# Visualizar con tmap
tmap_mode("plot")
## ℹ tmap mode set to "plot".
tm_shape(dataa) +
  tm_polygons("lisa_I", palette = "Reds", title = "Local Moran's I") +
  tm_borders()
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.

subset(dataa, signif == TRUE) %>% select(geometry, lisa_I)
## Simple feature collection with 6 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -99.97067 ymin: 25.06713 xmax: -98.42158 ymax: 26.32927
## Geodetic CRS:  WGS 84
##         lisa_I                       geometry
## 17 -0.18966943 POLYGON ((-99.18902 25.9053...
## 19 -0.17813416 POLYGON ((-99.16702 26.0754...
## 28 -0.17132160 POLYGON ((-99.5387 25.90161...
## 33 -0.45212844 POLYGON ((-99.39597 26.0949...
## 41 -0.09714289 POLYGON ((-99.33367 26.3063...
## 46  0.20595067 POLYGON ((-99.58946 26.2353...

3. Modelos de Predicción Global

modelo_ols <- lm(log(tasa_auto_) ~ zona_urb + densidad_p + region + edad + I(edad^2) + sexo, data = dataa)
summary(modelo_ols)
## 
## Call:
## lm(formula = log(tasa_auto_) ~ zona_urb + densidad_p + region + 
##     edad + I(edad^2) + sexo, data = dataa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4440 -0.6831 -0.0673  0.5823  5.4782 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             -4.2392435  4.7835139  -0.886   0.3808  
## zona_urbno_interseccion  2.1542260  1.1543210   1.866   0.0694 .
## zona_urbsuburbana        1.1717921  1.4469273   0.810   0.4228  
## densidad_p               0.0002395  0.0004104   0.584   0.5628  
## regionNorte             -0.1875490  1.0322058  -0.182   0.8567  
## regionOeste             -1.0841957  1.0442633  -1.038   0.3054  
## regionSur               -2.6271709  1.0063100  -2.611   0.0127 *
## regionZMM               -1.7246836  1.4008889  -1.231   0.2255  
## edad                     0.3973415  0.2009280   1.978   0.0549 .
## I(edad^2)               -0.0048484  0.0023373  -2.074   0.0445 *
## sexo                     1.0935001  0.7360987   1.486   0.1452  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.985 on 40 degrees of freedom
## Multiple R-squared:  0.3277, Adjusted R-squared:  0.1596 
## F-statistic:  1.95 on 10 and 40 DF,  p-value: 0.06633
vif(modelo_ols)
##                 GVIF Df GVIF^(1/(2*Df))
## zona_urb    2.265832  2        1.226894
## densidad_p  2.655803  1        1.629663
## region      3.193847  4        1.156217
## edad       42.393073  1        6.510996
## I(edad^2)  41.233301  1        6.421316
## sexo        1.105274  1        1.051320
bptest(modelo_ols)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo_ols
## BP = 9.8503, df = 10, p-value = 0.4537
AIC(modelo_ols)
## [1] 226.2857
dataa_spdf <- as(dataa, "Spatial")
model_sar <- spatialreg::lagsarlm(log(tasa_auto_) ~ zona_urb + densidad_p + region + edad + I(edad^2) + sexo, data = dataa_spdf, listw = lw_rook)
model_sem <- spatialreg::errorsarlm(log(tasa_auto_) ~ zona_urb + densidad_p + region + edad + I(edad^2) + sexo, data = dataa_spdf, listw = lw_rook)
model_sdm <- spatialreg::lagsarlm(log(tasa_auto_) ~ zona_urb + densidad_p + region + edad + I(edad^2) + sexo, data = dataa_spdf, listw = lw_rook, type = "mixed")
summary(model_sar)
## 
## Call:spatialreg::lagsarlm(formula = log(tasa_auto_) ~ zona_urb + densidad_p + 
##     region + edad + I(edad^2) + sexo, data = dataa_spdf, listw = lw_rook)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -4.42717 -0.72601 -0.11451  0.56502  5.35978 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                            Estimate  Std. Error z value Pr(>|z|)
## (Intercept)             -5.14112699  4.54997292 -1.1299  0.25851
## zona_urbno_interseccion  2.13836151  1.01966606  2.0971  0.03598
## zona_urbsuburbana        1.07062821  1.27794902  0.8378  0.40216
## densidad_p               0.00023270  0.00036289  0.6412  0.52136
## regionNorte             -0.16421442  0.91304394 -0.1799  0.85727
## regionOeste             -0.98897407  0.92347551 -1.0709  0.28420
## regionSur               -2.40465641  0.96710590 -2.4864  0.01290
## regionZMM               -1.58871275  1.25385801 -1.2671  0.20513
## edad                     0.41333420  0.17941536  2.3038  0.02123
## I(edad^2)               -0.00502303  0.00208667 -2.4072  0.01608
## sexo                     1.12058278  0.65519712  1.7103  0.08721
## 
## Rho: 0.088749, LR test value: 0.20473, p-value: 0.65093
## Asymptotic standard error: 0.18627
##     z-value: 0.47645, p-value: 0.63375
## Wald statistic: 0.22701, p-value: 0.63375
## 
## Log likelihood: -101.0405 for lag model
## ML residual variance (sigma squared): 3.0734, (sigma: 1.7531)
## Number of observations: 51 
## Number of parameters estimated: 13 
## AIC: 228.08, (AIC for lm: 226.29)
## LM test for residual autocorrelation
## test value: 0.04346, p-value: 0.83486
summary(model_sem)
## 
## Call:spatialreg::errorsarlm(formula = log(tasa_auto_) ~ zona_urb + 
##     densidad_p + region + edad + I(edad^2) + sexo, data = dataa_spdf, 
##     listw = lw_rook)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -4.43443 -0.71716 -0.14081  0.56040  5.35240 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                            Estimate  Std. Error z value Pr(>|z|)
## (Intercept)             -4.39688424  4.14416024 -1.0610  0.28870
## zona_urbno_interseccion  2.09777998  1.02580495  2.0450  0.04085
## zona_urbsuburbana        1.01127266  1.26883141  0.7970  0.42544
## densidad_p               0.00024282  0.00037103  0.6544  0.51283
## regionNorte             -0.16080817  0.96887069 -0.1660  0.86818
## regionOeste             -0.92954887  0.97106700 -0.9572  0.33844
## regionSur               -2.55417711  0.95138334 -2.6847  0.00726
## regionZMM               -1.61981703  1.28033537 -1.2652  0.20582
## edad                     0.40086128  0.17339746  2.3118  0.02079
## I(edad^2)               -0.00485966  0.00201883 -2.4072  0.01608
## sexo                     1.07665414  0.64444899  1.6707  0.09479
## 
## Lambda: 0.09478, LR test value: 0.14084, p-value: 0.70745
## Asymptotic standard error: 0.19541
##     z-value: 0.48504, p-value: 0.62765
## Wald statistic: 0.23527, p-value: 0.62765
## 
## Log likelihood: -101.0725 for error model
## ML residual variance (sigma squared): 3.0765, (sigma: 1.754)
## Number of observations: 51 
## Number of parameters estimated: 13 
## AIC: 228.14, (AIC for lm: 226.29)
summary(model_sdm)
## 
## Call:spatialreg::lagsarlm(formula = log(tasa_auto_) ~ zona_urb + densidad_p + 
##     region + edad + I(edad^2) + sexo, data = dataa_spdf, listw = lw_rook, 
##     type = "mixed")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.79282 -0.64894 -0.15556  0.93272  2.35130 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)                 47.71710918 21.51743867  2.2176 0.0265820
## zona_urbno_interseccion      3.20903815  0.93503202  3.4320 0.0005991
## zona_urbsuburbana            0.67358354  1.15717247  0.5821 0.5605031
## densidad_p                   0.00148280  0.00054648  2.7134 0.0066602
## regionNorte                 -1.96692344  1.85326644 -1.0613 0.2885409
## regionOeste                 -0.97927960  1.46961306 -0.6664 0.5051861
## regionSur                   -7.68647838  2.22448770 -3.4554 0.0005495
## regionZMM                   -2.42933969  1.55319022 -1.5641 0.1177949
## edad                        -0.11209051  0.24050851 -0.4661 0.6411752
## I(edad^2)                    0.00017619  0.00268659  0.0656 0.9477120
## sexo                        -0.67453373  0.71839422 -0.9389 0.3477582
## lag.zona_urbno_interseccion  0.72292783  1.60827293  0.4495 0.6530669
## lag.zona_urbsuburbana        6.44158201  2.96912888  2.1695 0.0300433
## lag.densidad_p              -0.00206923  0.00117802 -1.7565 0.0789986
## lag.regionNorte              1.55059987  2.21493788  0.7001 0.4838869
## lag.regionOeste             -0.73473253  2.05738052 -0.3571 0.7210017
## lag.regionSur                4.85736288  2.85645036  1.7005 0.0890390
## lag.regionZMM               -0.09947556  2.26350453 -0.0439 0.9649462
## lag.edad                    -1.57014829  0.71969674 -2.1817 0.0291331
## lag.I(edad^2)                0.01571143  0.00806335  1.9485 0.0513554
## lag.sexo                     0.31729430  1.55170476  0.2045 0.8379776
## 
## Rho: -0.27812, LR test value: 1.636, p-value: 0.20088
## Asymptotic standard error: 0.19158
##     z-value: -1.4517, p-value: 0.14659
## Wald statistic: 2.1074, p-value: 0.14659
## 
## Log likelihood: -88.93371 for mixed model
## ML residual variance (sigma squared): 1.8851, (sigma: 1.373)
## Number of observations: 51 
## Number of parameters estimated: 23 
## AIC: 223.87, (AIC for lm: 223.5)
## LM test for residual autocorrelation
## test value: 4.2581, p-value: 0.039064
AIC(modelo_ols, model_sar, model_sem, model_sdm)
##            df      AIC
## modelo_ols 12 226.2857
## model_sar  13 228.0810
## model_sem  13 228.1449
## model_sdm  23 223.8674
moran.test(residuals(model_sar), lw_rook)
## 
##  Moran I test under randomisation
## 
## data:  residuals(model_sar)  
## weights: lw_rook    
## 
## Moran I statistic standard deviate = 0.15124, p-value = 0.4399
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      -0.006307225      -0.020000000       0.008196757
dataa$pred_sar <- model_sar$fitted.values
tmap_mode("plot")
## ℹ tmap mode set to "plot".
tm_shape(dataa) +
  tm_polygons("pred_sar", palette = "Blues", title = "Predicción SAR") +
  tm_borders()
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.

4. Regresión Local - GWR

# Asegurarse de que el objeto Spatial esté creado
if (!exists("dataa_spdf")) dataa_spdf <- as(dataa, "Spatial")

# Ejecutar GWR con variables correctas
bw_gwr <- GWmodel::bw.gwr(log(tasa_auto_) ~ densidad_p + edad + gini, data = dataa_spdf, approach = "AIC", adaptive = TRUE)
## Adaptive bandwidth (number of nearest neighbours): 39 AICc value: 207.7468 
## Adaptive bandwidth (number of nearest neighbours): 32 AICc value: 207.5074 
## Adaptive bandwidth (number of nearest neighbours): 27 AICc value: 209.9128 
## Adaptive bandwidth (number of nearest neighbours): 34 AICc value: 208.0291 
## Adaptive bandwidth (number of nearest neighbours): 29 AICc value: 207.7464 
## Adaptive bandwidth (number of nearest neighbours): 32 AICc value: 207.5074
gwr_model <- GWmodel::gwr.basic(log(tasa_auto_) ~ densidad_p + edad + gini, data = dataa_spdf, bw = bw_gwr, adaptive = TRUE)

# Extraer resultados
dataa$gwr_pred <- gwr_model$SDF$yhat
dataa$gwr_R2 <- gwr_model$SDF$Local_R2
tmap_mode("plot")
## ℹ tmap mode set to "plot".
tm_shape(dataa) +
  tm_polygons(col = "gwr_pred", palette = "Greens", title = "Predicción GWR") +
  tm_borders()
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: use 'fill' for the fill color of polygons/symbols
## (instead of 'col'), and 'col' for the outlines (instead of 'border.col').[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Greens" is named
## "brewer.greens"Multiple palettes called "greens" found: "brewer.greens", "matplotlib.greens". The first one, "brewer.greens", is returned.

tmap_mode("plot")
## ℹ tmap mode set to "plot".
tm_shape(dataa) +
  tm_polygons(col = "gwr_R2", palette = "Oranges", title = "R² local GWR") +
  tm_borders()
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Oranges" is named
## "brewer.oranges"Multiple palettes called "oranges" found: "brewer.oranges", "matplotlib.oranges". The first one, "brewer.oranges", is returned.

5. Hallazgos Finales

## 
## **Resumen de hallazgos:**
## 1. El promedio estatal de accidentes por cada 10,000 hab fue de  2737.38  y la mediana fue  73.32 .
## 2. Se identificaron clústeres espaciales significativos mediante LISA.
## 3. Variables como densidad de población y región fueron significativas.
## 4. El modelo SAR tuvo el menor AIC, indicando mejor ajuste.
## 5. Los residuos del modelo SAR no presentan autocorrelación.
## 6. El modelo GWR reveló variaciones locales importantes en los coeficientes.
## 7. El R² local del GWR mostró que la relación explicativa varía por municipio.
## 8. Municipios como Monterrey y Guadalupe aparecen como focos críticos.