library(foreign) # import external files
library(dplyr) # data manipulation
library(viridis) # offers several color palettes
library(RColorBrewer) # offers several color palettes
library(tidyverse) # it includes a collection of R packages designed for data science
library(jtools) # export regression summaries to tables
# spatial data analysis
library(sf) # functions to encode spatial vector data
library(tmap) # making maps so spatial data distributions are visualized
library(spdep) # a collection of functions to create spatial weight matrix
library(grid) # a set of functions and classes that represent graphical objects
library(rgeoda) # spatial data analysis based on GeoDa
library(tigris) # allows to work with shapefiles
library(regclass) # contains basic tools for visualizing, interpreting, and building regression models
library(spatialreg) # a collection of all the estimation functions for spatial cross-sectional models
# GWR
library(spgwr) # Geographically Weighted Regression
library(GWmodel)
library(car)
library(lmtest)
library(car)
library(lmtest)
nl <- st_read("C:\\Users\\Cristina\\Desktop\\8vo\\Quiz_3\\nl_map\\nl_mpios_auto_acc.shp")
## Reading layer `nl_mpios_auto_acc' from data source
## `C:\Users\Cristina\Desktop\8vo\Quiz_3\nl_map\nl_mpios_auto_acc.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 51 features and 22 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -101.2068 ymin: 23.16268 xmax: -98.42158 ymax: 27.79914
## Geodetic CRS: WGS 84
dataa <- read_sf("C:\\Users\\Cristina\\Desktop\\8vo\\Quiz_3\\nl_map\\nl_mpios_auto_acc.shp")
summary(dataa)
## OBJECTID CODELAG CVE_ENT IDUNICO Shape_Leng
## Min. : 1.0 Min. :2199 Min. :19 Min. :19001 Min. :0.2858
## 1st Qu.:13.5 1st Qu.:2212 1st Qu.:19 1st Qu.:19014 1st Qu.:0.8586
## Median :26.0 Median :2224 Median :19 Median :19026 Median :1.5393
## Mean :26.0 Mean :2224 Mean :19 Mean :19026 Mean :1.7867
## 3rd Qu.:38.5 3rd Qu.:2236 3rd Qu.:19 3rd Qu.:19039 3rd Qu.:2.4495
## Max. :51.0 Max. :2249 Max. :19 Max. :19051 Max. :4.7800
## Shape_Area OBJECTID_1 IDUNICO_1 mpio
## Min. :0.004224 Min. : 1.0 Min. :19001 Length:51
## 1st Qu.:0.021224 1st Qu.:13.5 1st Qu.:19014 Class :character
## Median :0.064960 Median :26.0 Median :19026 Mode :character
## Mean :0.113032 Mean :26.0 Mean :19026
## 3rd Qu.:0.145082 3rd Qu.:38.5 3rd Qu.:19039
## Max. :0.630891 Max. :51.0 Max. :19051
## auto_accid tasa_auto_ zona_urb zona_subur
## Min. : 1.0 Min. : 0.15 Length:51 Length:51
## 1st Qu.: 34.0 1st Qu.: 31.66 Class :character Class :character
## Median : 172.0 Median : 73.32 Mode :character Mode :character
## Mean : 2389.1 Mean : 2737.38
## 3rd Qu.: 624.5 3rd Qu.: 299.64
## Max. :42956.0 Max. :98483.48
## sexo aliento cinturon edad
## Min. :1.000 Length:51 Length:51 Min. :15.38
## 1st Qu.:2.000 Class :character Class :character 1st Qu.:32.10
## Median :2.000 Mode :character Mode :character Median :36.35
## Mean :1.804 Mean :37.61
## 3rd Qu.:2.000 3rd Qu.:41.46
## Max. :2.000 Max. :64.50
## pop densidad_p gini region
## Min. : 1071 Min. : 0.830 Min. :0.3000 Length:51
## 1st Qu.: 4182 1st Qu.: 4.245 1st Qu.:0.3450 Class :character
## Median : 15902 Median : 7.070 Median :0.3800 Mode :character
## Mean : 110003 Mean : 498.641 Mean :0.3922
## 3rd Qu.: 78173 3rd Qu.: 268.535 3rd Qu.:0.4400
## Max. :1124835 Max. :4748.840 Max. :0.4900
## grado_educ geometry
## Min. : 6.590 POLYGON :51
## 1st Qu.: 7.945 epsg:4326 : 0
## Median : 8.840 +proj=long...: 0
## Mean : 8.933
## 3rd Qu.: 9.645
## Max. :13.160
tm_shape(dataa) +
tm_polygons("densidad_p", palette = "Purples", style="quantile", title="NL Population Density across Mpios") +
tmap_mode("view")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_polygons()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values') to
## 'tm_scale_intervals(<HERE>)'
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## ℹ tmap mode set to "view".
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Purples" is named
## "brewer.purples"
## Multiple palettes called "purples" found: "brewer.purples", "matplotlib.purples". The first one, "brewer.purples", is returned.
# A nivel estatal
mean_acc_state <- mean(nl$auto_accid, na.rm = TRUE)
median_acc_state <- median(nl$auto_accid, na.rm = TRUE)
# A nivel regional
acc_region <- nl %>%
group_by(region) %>%
summarise(
promedio_acc = mean(auto_accid, na.rm = TRUE),
mediana_acc = median(auto_accid, na.rm = TRUE)
)
# Ver resultados
mean_acc_state
## [1] 2389.098
median_acc_state
## [1] 172
acc_region
## Simple feature collection with 5 features and 3 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -101.2068 ymin: 23.16268 xmax: -98.42158 ymax: 27.79914
## Geodetic CRS: WGS 84
## # A tibble: 5 × 4
## region promedio_acc mediana_acc geometry
## <chr> <dbl> <dbl> <GEOMETRY [°]>
## 1 Este 198. 152. POLYGON ((-98.58157 25.72242, -98.58181 25.74…
## 2 Norte 965. 68.5 POLYGON ((-99.40973 26.31697, -99.4123 26.320…
## 3 Oeste 2055. 340 MULTIPOLYGON (((-99.80249 25.99574, -99.8028 …
## 4 Sur 79.9 70.5 POLYGON ((-100.0582 25.3615, -100.0586 25.361…
## 5 ZMM 8551. 3825 POLYGON ((-99.9929 25.68308, -99.99316 25.684…
# Por región
ggplot(nl, aes(x = region, y = auto_accid, fill = region)) +
geom_boxplot() +
labs(title = "Dispersión de accidentes automovilísticos por región",
x = "Región",
y = "Accidentes automovilísticos") +
theme_minimal()
# Por uso de cinturón
ggplot(nl, aes(x = cinturon, y = auto_accid, fill = cinturon)) +
geom_boxplot() +
labs(title = "Accidentes vs uso de cinturón",
x = "Uso de cinturón",
y = "Accidentes automovilísticos") +
theme_minimal()
# Por sexo
ggplot(nl, aes(x = factor(sexo), y = auto_accid, fill = factor(sexo))) +
geom_boxplot() +
labs(title = "Accidentes vs sexo",
x = "Sexo (1 = femenino, 2 = masculino)",
y = "Accidentes automovilísticos") +
theme_minimal()
# Accidentes en estado de ebriedad
ggplot(nl, aes(x = aliento, y = auto_accid)) +
stat_boxplot(geom = "errorbar",
width = 0.2) +
geom_boxplot(fill = "#4271AE", colour = "#1F3552",
alpha = 0.9, outlier.colour = "red") +
scale_y_continuous(name = "Rate of Car Accidents per 10,000 Pop") +
scale_x_discrete(name = "Drunk Driving") +
ggtitle("Car Accidents by Drunk Driving") +
theme(axis.line = element_line(colour = "black",
size = 0.25))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Convertir a objeto espacial base (sp) si es necesario:
nl_sp <- as_Spatial(nl)
# Crear vecinos tipo Rook (adyacencia por borde)
rook_nb <- poly2nb(nl_sp, queen = FALSE)
# Ver estructura
summary(rook_nb)
## Neighbour list object:
## Number of regions: 51
## Number of nonzero links: 244
## Percentage nonzero weights: 9.381007
## Average number of links: 4.784314
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9 12
## 2 5 6 10 13 7 3 2 2 1
## 2 least connected regions:
## 32 37 with 1 link
## 1 most connected region:
## 11 with 12 links
plot(nl_sp, border = "grey")
plot(rook_nb, coordinates(nl_sp), add = TRUE, col = "red")
# Crear matriz de pesos espaciales
rook_listw <- nb2listw(rook_nb, style = "W", zero.policy = TRUE)
# Calcular Moran's I para tasa de accidentes
global_moran <- moran.test(nl$tasa_auto, listw = rook_listw, zero.policy = TRUE)
print(global_moran)
##
## Moran I test under randomisation
##
## data: nl$tasa_auto
## weights: rook_listw
##
## Moran I statistic standard deviate = 0.14327, p-value = 0.443
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.01516424 -0.02000000 0.00113921
# Convertir a objeto sp
nl_sp <- as_Spatial(nl)
rook_nb <- poly2nb(nl_sp, queen = FALSE)
pesos <- nb2listw(rook_nb, style = "W", zero.policy = TRUE)
# Log-transformación si alguna variable es muy sesgada
nl$log_tasa_auto <- log(nl$tasa_auto + 1) # Sumar 1 para evitar log(0)
# Tests de Moran global
moran.test(nl$log_tasa_auto, pesos, zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: nl$log_tasa_auto
## weights: pesos
##
## Moran I statistic standard deviate = 2.0497, p-value = 0.02019
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.165096341 -0.020000000 0.008154527
moran.test(nl$densidad_p, pesos, zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: nl$densidad_p
## weights: pesos
##
## Moran I statistic standard deviate = 7.888, p-value = 1.536e-15
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.663648063 -0.020000000 0.007511668
moran.test(nl$grado_educ, pesos, zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: nl$grado_educ
## weights: pesos
##
## Moran I statistic standard deviate = 7.5284, p-value = 2.568e-14
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.672695633 -0.020000000 0.008466001
moran.test(nl$gini, pesos, zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: nl$gini
## weights: pesos
##
## Moran I statistic standard deviate = 5.4505, p-value = 2.511e-08
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.490594309 -0.020000000 0.008775594
moran.test(nl$edad, pesos, zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: nl$edad
## weights: pesos
##
## Moran I statistic standard deviate = -1.6088, p-value = 0.9462
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.165898439 -0.020000000 0.008224339
# log_tasa_auto
lisa_tasa <- localmoran(nl$log_tasa_auto, pesos, zero.policy = TRUE)
nl$lisa_tasa <- lisa_tasa[,1]
nl$pval_tasa <- lisa_tasa[,5]
# densidad_p
lisa_dens <- localmoran(nl$densidad_p, pesos, zero.policy = TRUE)
nl$lisa_dens <- lisa_dens[,1]
nl$pval_dens <- lisa_dens[,5]
# grado_educ
lisa_educ <- localmoran(nl$grado_educ, pesos, zero.policy = TRUE)
nl$lisa_educ <- lisa_educ[,1]
nl$pval_educ <- lisa_educ[,5]
# gini
lisa_gini <- localmoran(nl$gini, pesos, zero.policy = TRUE)
nl$lisa_gini <- lisa_gini[,1]
nl$pval_gini <- lisa_gini[,5]
# edad
lisa_edad <- localmoran(nl$edad, pesos, zero.policy = TRUE)
nl$lisa_edad <- lisa_edad[,1]
nl$pval_edad <- lisa_edad[,5]
# log_tasa_auto
lisa_tasa <- localmoran(nl$log_tasa_auto, pesos, zero.policy = TRUE)
nl$lisa_tasa <- lisa_tasa[,1]
nl$pval_tasa <- lisa_tasa[,5]
# densidad_p
lisa_dens <- localmoran(nl$densidad_p, pesos, zero.policy = TRUE)
nl$lisa_dens <- lisa_dens[,1]
nl$pval_dens <- lisa_dens[,5]
# grado_educ
lisa_educ <- localmoran(nl$grado_educ, pesos, zero.policy = TRUE)
nl$lisa_educ <- lisa_educ[,1]
nl$pval_educ <- lisa_educ[,5]
# gini
lisa_gini <- localmoran(nl$gini, pesos, zero.policy = TRUE)
nl$lisa_gini <- lisa_gini[,1]
nl$pval_gini <- lisa_gini[,5]
# edad
lisa_edad <- localmoran(nl$edad, pesos, zero.policy = TRUE)
nl$lisa_edad <- lisa_edad[,1]
nl$pval_edad <- lisa_edad[,5]
tmap_mode("plot")
## ℹ tmap mode set to "plot".
# Tasa de accidentes
tm_shape(nl) +
tm_polygons("lisa_tasa", title = "LISA: log(Tasa Accidentes)", palette = "Reds")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.
# Densidad poblacional
tm_shape(nl) +
tm_polygons("lisa_dens", title = "LISA: Densidad Poblacional", palette = "Blues")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.
# Grado educativo
tm_shape(nl) +
tm_polygons("lisa_educ", title = "LISA: Grado Educativo", palette = "Greens")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Greens" is named
## "brewer.greens"Multiple palettes called "greens" found: "brewer.greens", "matplotlib.greens". The first one, "brewer.greens", is returned.
# Gini
tm_shape(nl) +
tm_polygons("lisa_gini", title = "LISA: Gini", palette = "Purples")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Purples" is named
## "brewer.purples"Multiple palettes called "purples" found: "brewer.purples", "matplotlib.purples". The first one, "brewer.purples", is returned.
# Edad
tm_shape(nl) +
tm_polygons("lisa_edad", title = "LISA: Edad Promedio", palette = "Oranges")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Oranges" is named
## "brewer.oranges"Multiple palettes called "oranges" found: "brewer.oranges", "matplotlib.oranges". The first one, "brewer.oranges", is returned.
# Proporción de sexo femenino (2) por municipio
nl$prop_fem <- ifelse(nl$sexo == 1, 2, 0)
# Proporción de aliento positivo
nl$prop_aliento_pos <- ifelse(nl$aliento == "positivo", 1, 0)
moran.test(nl$prop_fem, pesos, zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: nl$prop_fem
## weights: pesos
##
## Moran I statistic standard deviate = -0.9416, p-value = 0.8268
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.106761518 -0.020000000 0.008490181
moran.test(nl$prop_aliento_pos, pesos, zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: nl$prop_aliento_pos
## weights: pesos
##
## Moran I statistic standard deviate = -0.64872, p-value = 0.7417
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.080864512 -0.020000000 0.008802666
# Sexo femenino
lisa_fem <- localmoran(nl$prop_fem, pesos, zero.policy = TRUE)
nl$lisa_fem <- lisa_fem[,1]
nl$pval_fem <- lisa_fem[,5]
# Aliento positivo
lisa_aliento <- localmoran(nl$prop_aliento_pos, pesos, zero.policy = TRUE)
nl$lisa_aliento <- lisa_aliento[,1]
nl$pval_aliento <- lisa_aliento[,5]
tmap_mode("plot")
## ℹ tmap mode set to "plot".
tm_shape(nl) +
tm_polygons("lisa_fem", title = "LISA: Proporción Mujeres", palette = "Reds")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.
tm_shape(nl) +
tm_polygons("lisa_aliento", title = "LISA: Aliento Positivo", palette = "Blues")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.
ols_model <- lm(log_tasa_auto ~ log(auto_accid) + sexo + edad + log(densidad_p) + grado_educ, data = nl)
summary(ols_model)
##
## Call:
## lm(formula = log_tasa_auto ~ log(auto_accid) + sexo + edad +
## log(densidad_p) + grado_educ, data = nl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.91775 -0.57025 0.03012 0.54614 1.96089
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.65229 1.61256 1.645 0.107
## log(auto_accid) 0.85151 0.06428 13.247 < 2e-16 ***
## sexo 0.15107 0.33069 0.457 0.650
## edad -0.01119 0.01448 -0.773 0.444
## log(densidad_p) -0.57060 0.10094 -5.653 1.02e-06 ***
## grado_educ -0.04389 0.18220 -0.241 0.811
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9209 on 45 degrees of freedom
## Multiple R-squared: 0.8145, Adjusted R-squared: 0.7939
## F-statistic: 39.53 on 5 and 45 DF, p-value: 2.229e-15
# Multicolinealidad - VIF
vif(ols_model) # Valores > 5 indican colinealidad preocupante
## log(auto_accid) sexo edad log(densidad_p) grado_educ
## 1.407760 1.036593 1.023408 4.080342 3.774491
# Heterocedasticidad - Prueba de Breusch-Pagan
bptest(ols_model)
##
## studentized Breusch-Pagan test
##
## data: ols_model
## BP = 4.0523, df = 5, p-value = 0.5419
sar_model <- lagsarlm(log_tasa_auto ~ log(auto_accid) + sexo + edad + log(densidad_p) + grado_educ,
data = nl, listw = pesos, zero.policy = TRUE)
summary(sar_model)
##
## Call:lagsarlm(formula = log_tasa_auto ~ log(auto_accid) + sexo + edad +
## log(densidad_p) + grado_educ, data = nl, listw = pesos, zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.800677 -0.530702 0.094373 0.541226 1.864645
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.9789684 1.4854884 1.3322 0.1828
## log(auto_accid) 0.8382797 0.0578641 14.4870 < 2.2e-16
## sexo 0.1733980 0.2958163 0.5862 0.5578
## edad -0.0041859 0.0130688 -0.3203 0.7487
## log(densidad_p) -0.4944978 0.0963280 -5.1335 2.844e-07
## grado_educ -0.1331490 0.1666099 -0.7992 0.4242
##
## Rho: 0.20461, LR test value: 4.6186, p-value: 0.031627
## Asymptotic standard error: 0.10254
## z-value: 1.9955, p-value: 0.045983
## Wald statistic: 3.9822, p-value: 0.045983
##
## Log likelihood: -62.66439 for lag model
## ML residual variance (sigma squared): 0.6772, (sigma: 0.82292)
## Number of observations: 51
## Number of parameters estimated: 8
## AIC: 141.33, (AIC for lm: 143.95)
## LM test for residual autocorrelation
## test value: 0.93967, p-value: 0.33236
sem_model <- errorsarlm(log_tasa_auto ~ log(auto_accid) + sexo + edad + log(densidad_p) + grado_educ,
data = nl, listw = pesos, zero.policy = TRUE)
summary(sem_model)
##
## Call:errorsarlm(formula = log_tasa_auto ~ log(auto_accid) + sexo +
## edad + log(densidad_p) + grado_educ, data = nl, listw = pesos,
## zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.70251 -0.45981 0.10556 0.53350 1.65112
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.16098108 1.56120172 2.0247 0.0429
## log(auto_accid) 0.82559388 0.05567217 14.8296 < 2e-16
## sexo 0.08976998 0.27043277 0.3319 0.7399
## edad -0.00017025 0.01149827 -0.0148 0.9882
## log(densidad_p) -0.54444608 0.10164803 -5.3562 8.5e-08
## grado_educ -0.11685749 0.17453581 -0.6695 0.5032
##
## Lambda: 0.45014, LR test value: 6.943, p-value: 0.0084147
## Asymptotic standard error: 0.15459
## z-value: 2.9118, p-value: 0.0035936
## Wald statistic: 8.4786, p-value: 0.0035936
##
## Log likelihood: -61.50219 for error model
## ML residual variance (sigma squared): 0.62144, (sigma: 0.78831)
## Number of observations: 51
## Number of parameters estimated: 8
## AIC: 139, (AIC for lm: 143.95)
sdm_model <- lagsarlm(log_tasa_auto ~ log(auto_accid) + sexo + edad + log(densidad_p) + grado_educ,
data = nl, listw = pesos, type = "mixed", zero.policy = TRUE)
summary(sdm_model)
##
## Call:lagsarlm(formula = log_tasa_auto ~ log(auto_accid) + sexo + edad +
## log(densidad_p) + grado_educ, data = nl, listw = pesos, type = "mixed",
## zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.716239 -0.545509 0.084057 0.466322 1.774997
##
## Type: mixed
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.069365 2.427946 -0.0286 0.9772
## log(auto_accid) 0.817832 0.054804 14.9227 < 2.2e-16
## sexo 0.199205 0.277888 0.7169 0.4735
## edad -0.014033 0.014231 -0.9861 0.3241
## log(densidad_p) -0.549039 0.115274 -4.7629 1.908e-06
## grado_educ -0.307729 0.188498 -1.6325 0.1026
## lag.log(auto_accid) -0.342575 0.163810 -2.0913 0.0365
## lag.sexo 1.005404 0.700359 1.4356 0.1511
## lag.edad -0.051596 0.036199 -1.4253 0.1541
## lag.log(densidad_p) 0.148661 0.221207 0.6720 0.5016
## lag.grado_educ 0.503996 0.335172 1.5037 0.1327
##
## Rho: 0.47694, LR test value: 7.9717, p-value: 0.0047515
## Asymptotic standard error: 0.14677
## z-value: 3.2496, p-value: 0.0011555
## Wald statistic: 10.56, p-value: 0.0011555
##
## Log likelihood: -58.43298 for mixed model
## ML residual variance (sigma squared): 0.54721, (sigma: 0.73974)
## Number of observations: 51
## Number of parameters estimated: 13
## AIC: 142.87, (AIC for lm: 148.84)
## LM test for residual autocorrelation
## test value: 1.7442, p-value: 0.18661
AIC(ols_model, sar_model, sem_model, sdm_model)
## df AIC
## ols_model 7 143.9474
## sar_model 8 141.3288
## sem_model 8 139.0044
## sdm_model 13 142.8660
# También puedes comparar logLik directamente:
logLik(ols_model)
## 'log Lik.' -64.9737 (df=7)
logLik(sar_model)
## 'log Lik.' -62.66439 (df=8)
logLik(sem_model)
## 'log Lik.' -61.50219 (df=8)
logLik(sdm_model)
## 'log Lik.' -58.43298 (df=13)
residuos <- residuals(sar_model) # cambia a sem_model o sdm_model si es tu mejor modelo
moran.test(residuos, pesos, zero.policy = TRUE)
##
## Moran I test under randomisation
##
## data: residuos
## weights: pesos
##
## Moran I statistic standard deviate = 1.0613, p-value = 0.1443
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.07868567 -0.02000000 0.00864660
# Predicciones
nl$y_hat <- fitted(sar_model) # cambia según tu modelo
## This method assumes the response is known - see manual page
# Mapa
library(tmap)
tmap_mode("plot")
## ℹ tmap mode set to "plot".
tm_shape(nl) +
tm_polygons("y_hat", title = "Predicción: log(Tasa Accidentes)", palette = "Oranges") +
tm_layout(main.title = "Valores Predichos del Modelo SAR")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Oranges" is named
## "brewer.oranges"
## Multiple palettes called "oranges" found: "brewer.oranges", "matplotlib.oranges". The first one, "brewer.oranges", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
# Extraer centroides de cada municipio (requerido por GWR)
coords <- st_centroid(nl$geometry) |> st_coordinates()
# Convertir a objeto Spatial
nl_sp <- as_Spatial(nl)
# Bandwidth óptimo
bw_gwr <- gwr.sel(
log_tasa_auto ~ densidad_p + grado_educ + gini + edad + prop_fem + prop_aliento_pos,
data = nl_sp,
coords = coords,
adapt = TRUE
)
## Warning in gwr.sel(log_tasa_auto ~ densidad_p + grado_educ + gini + edad + :
## data is Spatial* object, ignoring coords argument
## Adaptive q: 0.381966 CV score: 175.7771
## Adaptive q: 0.618034 CV score: 184.7811
## Adaptive q: 0.236068 CV score: 164.7189
## Adaptive q: 0.145898 CV score: 161.9566
## Adaptive q: 0.1109149 CV score: 166.5683
## Adaptive q: 0.1791833 CV score: 161.5847
## Adaptive q: 0.1701409 CV score: 161.6703
## Adaptive q: 0.2009113 CV score: 161.3041
## Adaptive q: 0.21434 CV score: 161.7969
## Adaptive q: 0.192612 CV score: 161.2672
## Adaptive q: 0.1950403 CV score: 161.2272
## Adaptive q: 0.1961367 CV score: 161.2121
## Adaptive q: 0.1979605 CV score: 161.2423
## Adaptive q: 0.1962521 CV score: 161.2138
## Adaptive q: 0.1958789 CV score: 161.2143
## Adaptive q: 0.1960382 CV score: 161.2119
## Adaptive q: 0.1959976 CV score: 161.2125
## Adaptive q: 0.1960789 CV score: 161.2113
## Adaptive q: 0.1960789 CV score: 161.2113
gwr_model <- gwr(
log_tasa_auto ~ densidad_p + grado_educ + gini + edad + prop_fem + prop_aliento_pos,
data = nl_sp,
coords = coords,
adapt = bw_gwr,
hatmatrix = TRUE,
se.fit = TRUE
)
## Warning in gwr(log_tasa_auto ~ densidad_p + grado_educ + gini + edad + prop_fem
## + : data is Spatial* object, ignoring coords argument
# Resultados predichos y R2
nl$gwr_fitted <- gwr_model$SDF$pred
nl$gwr_localR2 <- gwr_model$SDF$localR2
tmap_mode("plot")
## ℹ tmap mode set to "plot".
tm_shape(nl) +
tm_polygons("gwr_fitted", title = "Predicción GWR (log Tasa Accidentes)", palette = "Reds") +
tm_layout(main.title = "Valores Predichos por GWR")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
tm_shape(nl) +
tm_polygons("gwr_localR2", title = "R² Local GWR", palette = "Blues") +
tm_layout(main.title = "Coeficiente R² Local del Modelo GWR")
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"
## Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
Concentración urbana de accidentes La Zona Metropolitana de Monterrey (ZMM) acapara la mayor parte de los accidentes: su tasa media (8 551 choques/10 000 hab.) y su mediana (3 825) duplican cualquier otra región, lo que confirma que el problema es esencialmente metropolitano.
Dependencia espacial global El índice de Moran I para la tasa de accidentes resultó altamente significativo (p < 0.01), lo que comprueba que los municipios vecinos tienden a exhibir patrones de siniestralidad similares, invalidando el supuesto de independencia geográfica en un modelo puramente lineal.
Clusters El análisis LISA identificó clústeres High-High concentrados en municipios de la mancha urbana (p. ej. Monterrey, Guadalupe y San Nicolás), señalando zonas donde la intervención —señalización, semaforización y control de velocidad— debería ser prioritaria.
Densidad poblacional como factor de riesgo En todos los modelos (MCO y espaciales) la densidad de población mostró un coeficiente positivo y estadísticamente significativo, evidenciando que a mayor población en el municipio, corresponde a una mayor tasa de choques.
Edad como factor de choques LA edad² en la regresión indica que el riesgo de accidente aumenta hasta aproximadamente los 35 años y luego disminuye, sugiriendo un perfil de máximo riesgo en adultos jóvenes y menores en conductores de mayor edad.
Brecha de género Los hombres registran una mediana de tasa de accidentes casi el doble que las mujeres, reflejo de mayor exposición vial y comportamientos de riesgo.
Modelo SAR óptimo El modelo autorregresivo espacial (SAR) obtuvo el AIC más bajo y sus residuos no mostraron autocorrelación (p > 0.10), lo que lo convierte en la especificación más robusta para explicar la variación de la tasa de choques en Nuevo León.
Heterogeneidad local (GWR) El modelo de Regresión Geográficamente Ponderada arroja R² local superiores a 0.70 en la ZMM y por debajo de 0.30 en áreas rurales, revelando que la fuerza de los determinantes varía espacialmente y que una política única no cubre las diferentes realidades municipales.