#install.packages("kable")
library(kableExtra)
library(readr)
library(foreign)
library(dplyr)
library(spdep)
library(tigris)
library(rgeoda)
library(RColorBrewer)
library(viridis)
library(ggplot2)
library(tmap)
library(sf)
library(sp)
library(spatialreg)
library(stargazer)state_data <- read_csv("/Users/marianaaleal/Desktop/TEC 2025/_Planeación estratégica basada en analítica prescriptiva /mx_spatial_data 2/state_data.csv")## Rows: 32 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): state, region_a
## dbl (15): year, state_id, crime_rate, unemployment, employment, business_act...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mx_mpio_map <- st_read ("/Users/marianaaleal/Desktop/TEC 2025/_Planeación estratégica basada en analítica prescriptiva /mx_spatial_data 2/mx_maps/mx_states/mexlatlong.shp")## Reading layer `mexlatlong' from data source
## `/Users/marianaaleal/Desktop/TEC 2025/_Planeación estratégica basada en analítica prescriptiva /mx_spatial_data 2/mx_maps/mx_states/mexlatlong.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 32 features and 19 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -118.4042 ymin: 14.55055 xmax: -86.73862 ymax: 32.71846
## Geodetic CRS: WGS 84
mx_state_map <- read_sf("/Users/marianaaleal/Desktop/TEC 2025/_Planeación estratégica basada en analítica prescriptiva /mx_spatial_data 2/mx_maps/mx_states/mexlatlong.shp")
# Unir datos espaciales con atributos
state_geodata <- geo_join(mx_state_map, state_data, 'OBJECTID', 'state_id', how='inner')
# Crear matriz de pesos espaciales (Queen contiguity)
swm <- poly2nb(state_geodata, queen=TRUE)
sswm <- nb2listw(swm, style="W", zero.policy = TRUE)model_a <- lm(new_fdi_real_mxn ~ business_activity + real_ave_month_income +
crime_rate + pop_density, data = state_geodata)
summary(model_a)##
## Call:
## lm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income +
## crime_rate + pop_density, data = state_geodata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7828 -2765 -1509 2227 15337
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3624.640 7599.694 -0.477 0.63724
## business_activity 3370.887 1392.454 2.421 0.02248 *
## real_ave_month_income 2.976 1.030 2.891 0.00749 **
## crime_rate -21.131 40.513 -0.522 0.60621
## pop_density 5.046 0.964 5.235 1.62e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5661 on 27 degrees of freedom
## Multiple R-squared: 0.636, Adjusted R-squared: 0.5821
## F-statistic: 11.79 on 4 and 27 DF, p-value: 1.139e-05
model_b <- lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income +
crime_rate + pop_density, data = state_geodata, listw = sswm)## Warning in lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + : inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16
## reciprocal condition number = 1.52226e-16 - using numerical Hessian.
## Warning in sqrt(fdHess[1, 1]): NaNs produced
##
## Call:
## lagsarlm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income +
## crime_rate + pop_density, data = state_geodata, listw = sswm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7826.6 -2763.1 -1507.6 2228.7 15342.7
##
## Type: lag
## Coefficients: (numerical Hessian approximate standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3623.07679 6689.59182 -0.5416 0.588095
## business_activity 3369.83699 1212.39716 2.7795 0.005445
## real_ave_month_income 2.97432 0.94483 3.1480 0.001644
## crime_rate -21.16385 37.42496 -0.5655 0.571733
## pop_density 5.04674 0.88046 5.7319 9.93e-09
##
## Rho: 0.0012113, LR test value: 3.1466e-05, p-value: 0.99552
## Approximate (numerical Hessian) standard error: NaN
## z-value: NaN, p-value: NA
## Wald statistic: NaN, p-value: NA
##
## Log likelihood: -319.213 for lag model
## ML residual variance (sigma squared): 27043000, (sigma: 5200.3)
## Number of observations: 32
## Number of parameters estimated: 7
## AIC: 652.43, (AIC for lm: 650.43)
model_d <- lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income +
crime_rate + pop_density, data = state_geodata, listw = sswm,
type="mixed")## Warning in lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + : inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16
## reciprocal condition number = 1.24974e-16 - using numerical Hessian.
##
## Call:
## lagsarlm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income +
## crime_rate + pop_density, data = state_geodata, listw = sswm,
## type = "mixed")
##
## Residuals:
## Min 1Q Median 3Q Max
## -7833.42 -3449.28 -946.26 866.04 15595.72
##
## Type: mixed
## Coefficients: (numerical Hessian approximate standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 19271.50104 18989.17177 1.0149 0.310169
## business_activity 4136.43237 1457.13475 2.8387 0.004529
## real_ave_month_income 2.24282 1.26897 1.7674 0.077155
## crime_rate -6.47716 38.91552 -0.1664 0.867809
## pop_density 5.09533 0.91888 5.5451 2.937e-08
## lag.business_activity 3204.80817 1997.63277 1.6043 0.108647
## lag.real_ave_month_income -1.88452 2.85487 -0.6601 0.509186
## lag.crime_rate -27.26650 75.66662 -0.3604 0.718585
## lag.pop_density -2.45497 3.54551 -0.6924 0.488676
##
## Rho: -0.11587, LR test value: 0.20004, p-value: 0.65469
## Approximate (numerical Hessian) standard error: 0.26111
## z-value: -0.44375, p-value: 0.65722
## Wald statistic: 0.19692, p-value: 0.65722
##
## Log likelihood: -317.8006 for mixed model
## ML residual variance (sigma squared): 24676000, (sigma: 4967.5)
## Number of observations: 32
## Number of parameters estimated: 11
## AIC: 657.6, (AIC for lm: 655.8)
model_c <- errorsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income +
crime_rate + pop_density, data = state_geodata, listw = sswm)
summary(model_c)##
## Call:
## errorsarlm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income +
## crime_rate + pop_density, data = state_geodata, listw = sswm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6726.2 -2928.1 -1522.1 2268.7 15020.8
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1863.70235 6983.95264 -0.2669 0.789581
## business_activity 3711.79847 1294.07093 2.8683 0.004127
## real_ave_month_income 2.78731 0.92486 3.0138 0.002580
## crime_rate -22.30404 36.42968 -0.6122 0.540373
## pop_density 4.92393 0.86188 5.7130 1.11e-08
##
## Lambda: -0.1835, LR test value: 0.48067, p-value: 0.48812
## Asymptotic standard error: 0.24432
## z-value: -0.75108, p-value: 0.45261
## Wald statistic: 0.56412, p-value: 0.45261
##
## Log likelihood: -318.9727 for error model
## ML residual variance (sigma squared): 26419000, (sigma: 5140)
## Number of observations: 32
## Number of parameters estimated: 7
## AIC: 651.95, (AIC for lm: 650.43)
# Calcular AIC y Moran's I para residuos
model_a_aic <- AIC(model_a)
model_b_aic <- AIC(model_b)
model_c_aic <- AIC(model_c)
model_d_aic <- AIC(model_d)
# Moran's I para residuos
moran_a <- moran.test(residuals(model_a), sswm)$estimate[1]
moran_b <- moran.test(residuals(model_b), sswm)$estimate[1]
moran_c <- moran.test(residuals(model_c), sswm)$estimate[1]
moran_d <- moran.test(residuals(model_d), sswm)$estimate[1]
# Crear tabla comparativa
comparison_table <- data.frame(
Modelo = c("OLS", "SAR", "SDM", "SEM"),
AIC = c(model_a_aic, model_b_aic, model_d_aic, model_c_aic),
Moran_I = c(moran_a, moran_b, moran_d, moran_c),
Significance_of_Moran = c(
ifelse(moran.test(residuals(model_a), sswm)$p.value < 0.05, "Sí", "No"),
ifelse(moran.test(residuals(model_b), sswm)$p.value < 0.05, "Sí", "No"),
ifelse(moran.test(residuals(model_d), sswm)$p.value < 0.05, "Sí", "No"),
ifelse(moran.test(residuals(model_c), sswm)$p.value < 0.05, "Sí", "No")
)
)
kable(comparison_table, caption = "Comparación de Modelos")| Modelo | AIC | Moran_I | Significance_of_Moran |
|---|---|---|---|
| OLS | 650.4260 | -0.0809842 | No |
| SAR | 652.4260 | -0.0813015 | No |
| SDM | 657.6011 | 0.0111285 | No |
| SEM | 651.9453 | -0.0063982 | No |
Probé varios modelos para predecir y el modelo que mejor funcionó fue el OLS, que es el modelo tradicional sin efectos espaciales (es decir, no considera la ubicación geográfica directamente).
Resumen en bullets:
El modelo OLS funcionó mejor que los modelos complejos que tratan de tener en cuenta el lugar geográfico.