#install.packages("foreign")
library(foreign) # import external files
#install.packages("dplyr")
library(dplyr) # data manipulation
#install.packages("spdep")
library(spdep) # a collection of functions to create spatial weight matrix
#install.packages("tigris")
library(tigris) # allows to work with shapefiles
#install.packages("rgeoda")
library(rgeoda) # spatial data analysis based on GeoDa
#install.packages("RColorBrewer")
library(RColorBrewer) # offers several color palettes
#install.packages("viridis")
library(viridis) # offers several color palettes
#install.packages("ggplot2")
library(ggplot2) # to create plots and graphics from dataset
#install.packages("tmap")
library(tmap) # making maps so spatial data distributions are visualized
#install.packages("sf")
library(sf) # functions to encode spatial vector data
#install.packages("sp")
library(sp) # classes and methods for spatial data
#install.packages("remotes")
library(remotes)
#install.packages("spatialreg")
library(spatialreg)
#install.packages("stargazer")
library(stargazer)
#install.packages("spdep")
library(spdep)
mx_state <- read.csv("/Users/gilmenchaca/Documents/OTROS/TEC/SEMESTRE 8/PLANEACION ESTRATEGICA/Modulo 1 - DAVID/ACT EXTRA/state_data.csv")
#mx_mpio <- read.csv("D:\\Tec21\\AD3003B\\FJ2024\\mpio_data.csv")
### importing shapefile
# Mexico's states (32)
mx_state_map <- sf::st_read("/Users/gilmenchaca/Documents/OTROS/TEC/SEMESTRE 8/PLANEACION ESTRATEGICA/Modulo 1 - DAVID/ACT EXTRA/mx_states/mexlatlong.shp")
## Reading layer `mexlatlong' from data source
## `/Users/gilmenchaca/Documents/OTROS/TEC/SEMESTRE 8/PLANEACION ESTRATEGICA/Modulo 1 - DAVID/ACT EXTRA/mx_states/mexlatlong.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 32 features and 19 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -118.4042 ymin: 14.55055 xmax: -86.73862 ymax: 32.71846
## Geodetic CRS: WGS 84
#mx_state_map <- read_sf("D:\\Tec21\\AD3003B\\FJ2024\\mx_maps\\mx_states\\mexlatlong.shp")
# merging dataset
state_geodata <- geo_join(mx_state_map,mx_state,'OBJECTID','state_id',how='inner')
## Warning in geo_join(mx_state_map, mx_state, "OBJECTID", "state_id", how =
## "inner"): We recommend using the dplyr::*_join() family of functions instead.
tm_shape(mx_state_map) +
tm_polygons(col = "lightgray") +
tm_compass(position=c("left","bottom")) +
tm_layout(main.title = "Mexico's States") +
tm_text("ADMIN_NAME", size = "AREA")
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
# Mexico's States - Distribution of FDI Inflows
fdi <- tm_shape(state_geodata) +
tm_polygons(col = "new_fdi_real_mxn", palette="Blues", style="quantile", n=8, title="MXN FDI Inflows") +
tm_layout(main.title= 'New FDI Inflows', title.position = c('right', 'top'), legend.position= c("left", "bottom"), title.size = 1)
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_polygons()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'n', 'palette' (rename to 'values') to
## 'tm_scale_intervals(<HERE>)'
## [v3->v4] `tm_polygons()`: use 'fill' for the fill color of polygons/symbols
## (instead of 'col'), and 'col' for the outlines (instead of 'border.col').
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
income <- tm_shape(state_geodata) +
tm_polygons(col = "real_ave_month_income", palette="BuGn", style="quantile", n=8, title="Average Monthly Income") +
tm_layout(main.title= '2018 Average Monthly Income', title.position = c('right', 'top'), legend.position= c("left", "bottom"), title.size = 1)
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
crime <- tm_shape(state_geodata) +
tm_polygons(col = "crime_rate", palette="OrRd", style="quantile", n=8, title="Crime Rate") +
tm_layout(main.title= 'Crime Rate 100,000 Pop', title.position = c('right', 'top'), legend.position= c("left", "bottom"), title.size = 1)
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
pop_den <- tm_shape(state_geodata) +
tm_polygons(col = "pop_density", palette="-viridis", style="quantile", n=8, title="Pop Density") +
tm_layout(main.title= 'Population Density', title.position = c('right', 'top'), legend.position= c("left", "bottom"), title.size = 1)
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
# Panel of Maps
tmap_arrange(fdi, income, crime, pop_den, ncol = 2)
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"
## Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.
##
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "BuGn" is named
## "brewer.bu_gn"
## Multiple palettes called "bu_gn" found: "brewer.bu_gn", "matplotlib.bu_gn". The first one, "brewer.bu_gn", is returned.
##
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "OrRd" is named
## "brewer.or_rd"
## Multiple palettes called "or_rd" found: "brewer.or_rd", "matplotlib.or_rd". The first one, "brewer.or_rd", is returned.
### Spatial Connectivity Matrix
swm <- poly2nb(mx_state_map, queen=T)
summary(swm) # The average number of neighbors is 4.31
## Neighbour list object:
## Number of regions: 32
## Number of nonzero links: 138
## Percentage nonzero weights: 13.47656
## Average number of links: 4.3125
## Link number distribution:
##
## 1 2 3 4 5 6 7 8 9
## 1 6 6 6 5 2 3 2 1
## 1 least connected region:
## 31 with 1 link
## 1 most connected region:
## 8 with 9 links
sswm <- nb2listw(swm, style="W", zero.policy = TRUE)
mx_state_map_a <- as(mx_state_map, "Spatial")
mx_state_map_centroid <- coordinates(mx_state_map_a)
plot(mx_state_map_a,border="blue",axes=FALSE,las=1, main="Mexico's States Queen SWM")
plot(mx_state_map_a,col="grey",border=grey(0.9),axes=T,add=T)
plot(sswm,coords=mx_state_map_centroid,pch=19,cex=0.1,col="red",add=T)
# Global Moran's I
moran.test(state_geodata$new_fdi_real_mxn, sswm) # Global Moran's I is 0.11 but not statistically significant (p-value > 10%).
##
## Moran I test under randomisation
##
## data: state_geodata$new_fdi_real_mxn
## weights: sswm
##
## Moran I statistic standard deviate = 1.2015, p-value = 0.1148
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.10886112 -0.03225806 0.01379607
moran.test(state_geodata$crime_rate, sswm) # Global Moran's I is 0.02 but not statistically significant (p-value > 10%).
##
## Moran I test under randomisation
##
## data: state_geodata$crime_rate
## weights: sswm
##
## Moran I statistic standard deviate = 0.4018, p-value = 0.3439
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.01660134 -0.03225806 0.01478702
moran.test(state_geodata$real_ave_month_income, sswm) # Global Moran's I is 0.50 and statistically significant (p-value < 1%).
##
## Moran I test under randomisation
##
## data: state_geodata$real_ave_month_income
## weights: sswm
##
## Moran I statistic standard deviate = 4.3774, p-value = 6.005e-06
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.49717426 -0.03225806 0.01462819
table <- data.frame(Variable = c("Crime", "New FDI Inflows", "Average Monthly Income"), GM = c(0.02, 0.10, 0.50), Significance = c("NS","NS","***"))
table
## Variable GM Significance
## 1 Crime 0.02 NS
## 2 New FDI Inflows 0.10 NS
## 3 Average Monthly Income 0.50 ***
# Estimate Non-Spatial and Spatial Regression Model
# non - spatial regression model
model_a <- lm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + crime_rate + pop_density, data = state_geodata)
summary(model_a)
##
## Call:
## lm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income +
## crime_rate + pop_density, data = state_geodata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7828 -2765 -1509 2227 15337
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3624.640 7599.694 -0.477 0.63724
## business_activity 3370.887 1392.454 2.421 0.02248 *
## real_ave_month_income 2.976 1.030 2.891 0.00749 **
## crime_rate -21.131 40.513 -0.522 0.60621
## pop_density 5.046 0.964 5.235 1.62e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5661 on 27 degrees of freedom
## Multiple R-squared: 0.636, Adjusted R-squared: 0.5821
## F-statistic: 11.79 on 4 and 27 DF, p-value: 1.139e-05
AIC(model_a)
## [1] 650.426
# SAR - Spatial Autoregressive Model
model_b <- lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + crime_rate + pop_density, data = state_geodata, listw = sswm)
## Warning in lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + : inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16
## reciprocal condition number = 1.52226e-16 - using numerical Hessian.
## Warning in sqrt(fdHess[1, 1]): NaNs produced
summary(model_b)
##
## Call:
## lagsarlm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income +
## crime_rate + pop_density, data = state_geodata, listw = sswm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7826.6 -2763.1 -1507.6 2228.7 15342.7
##
## Type: lag
## Coefficients: (numerical Hessian approximate standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3623.07679 6689.59182 -0.5416 0.588095
## business_activity 3369.83699 1212.39716 2.7795 0.005445
## real_ave_month_income 2.97432 0.94483 3.1480 0.001644
## crime_rate -21.16385 37.42496 -0.5655 0.571733
## pop_density 5.04674 0.88046 5.7319 9.93e-09
##
## Rho: 0.0012113, LR test value: 3.1466e-05, p-value: 0.99552
## Approximate (numerical Hessian) standard error: NaN
## z-value: NaN, p-value: NA
## Wald statistic: NaN, p-value: NA
##
## Log likelihood: -319.213 for lag model
## ML residual variance (sigma squared): 27043000, (sigma: 5200.3)
## Number of observations: 32
## Number of parameters estimated: 7
## AIC: 652.43, (AIC for lm: 650.43)
# SEM - Spatial Error Model
model_c <- errorsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + crime_rate + pop_density, data = state_geodata, listw = sswm)
summary(model_c)
##
## Call:
## errorsarlm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income +
## crime_rate + pop_density, data = state_geodata, listw = sswm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6726.2 -2928.1 -1522.1 2268.7 15020.8
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1863.70235 6983.95264 -0.2669 0.789581
## business_activity 3711.79847 1294.07093 2.8683 0.004127
## real_ave_month_income 2.78731 0.92486 3.0138 0.002580
## crime_rate -22.30404 36.42968 -0.6122 0.540373
## pop_density 4.92393 0.86188 5.7130 1.11e-08
##
## Lambda: -0.1835, LR test value: 0.48067, p-value: 0.48812
## Asymptotic standard error: 0.24432
## z-value: -0.75108, p-value: 0.45261
## Wald statistic: 0.56412, p-value: 0.45261
##
## Log likelihood: -318.9727 for error model
## ML residual variance (sigma squared): 26419000, (sigma: 5140)
## Number of observations: 32
## Number of parameters estimated: 7
## AIC: 651.95, (AIC for lm: 650.43)
# Spatial Durbin Model
model_d <- lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + crime_rate + pop_density, data = state_geodata, listw = sswm, type="mixed")
## Warning in lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + : inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16
## reciprocal condition number = 1.24974e-16 - using numerical Hessian.
summary(model_d)
##
## Call:
## lagsarlm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income +
## crime_rate + pop_density, data = state_geodata, listw = sswm,
## type = "mixed")
##
## Residuals:
## Min 1Q Median 3Q Max
## -7833.42 -3449.28 -946.26 866.04 15595.72
##
## Type: mixed
## Coefficients: (numerical Hessian approximate standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 19271.50104 18989.17177 1.0149 0.310169
## business_activity 4136.43237 1457.13475 2.8387 0.004529
## real_ave_month_income 2.24282 1.26897 1.7674 0.077155
## crime_rate -6.47716 38.91552 -0.1664 0.867809
## pop_density 5.09533 0.91888 5.5451 2.937e-08
## lag.business_activity 3204.80817 1997.63277 1.6043 0.108647
## lag.real_ave_month_income -1.88452 2.85487 -0.6601 0.509186
## lag.crime_rate -27.26650 75.66662 -0.3604 0.718585
## lag.pop_density -2.45497 3.54551 -0.6924 0.488676
##
## Rho: -0.11587, LR test value: 0.20004, p-value: 0.65469
## Approximate (numerical Hessian) standard error: 0.26111
## z-value: -0.44375, p-value: 0.65722
## Wald statistic: 0.19692, p-value: 0.65722
##
## Log likelihood: -317.8006 for mixed model
## ML residual variance (sigma squared): 24676000, (sigma: 4967.5)
## Number of observations: 32
## Number of parameters estimated: 11
## AIC: 657.6, (AIC for lm: 655.8)
stargazer(model_a, model_b, model_c, model_d, type = "text", title="Estimated Regression Results")
##
## Estimated Regression Results
## =============================================================================================
## Dependent variable:
## -------------------------------------------------------------------
## new_fdi_real_mxn
## OLS spatial spatial spatial
## autoregressive error autoregressive
## (1) (2) (3) (4)
## ---------------------------------------------------------------------------------------------
## business_activity 3,370.887** 3,369.837*** 3,711.798*** 4,136.432***
## (1,392.454) (1,212.397) (1,294.071) (1,457.135)
##
## real_ave_month_income 2.976*** 2.974*** 2.787*** 2.243*
## (1.029) (0.945) (0.925) (1.269)
##
## crime_rate -21.131 -21.164 -22.304 -6.477
## (40.513) (37.425) (36.430) (38.916)
##
## pop_density 5.046*** 5.047*** 4.924*** 5.095***
## (0.964) (0.880) (0.862) (0.919)
##
## lag.business_activity 3,204.808
## (1,997.633)
##
## lag.real_ave_month_income -1.885
## (2.855)
##
## lag.crime_rate -27.266
## (75.667)
##
## lag.pop_density -2.455
## (3.546)
##
## Constant -3,624.640 -3,623.077 -1,863.702 19,271.500
## (7,599.694) (6,689.592) (6,983.953) (18,989.170)
##
## ---------------------------------------------------------------------------------------------
## Observations 32 32 32 32
## R2 0.636
## Adjusted R2 0.582
## Log Likelihood -319.213 -318.973 -317.801
## sigma2 27,042,860.000 26,419,476.000 24,675,602.000
## Akaike Inf. Crit. 652.426 651.945 657.601
## Residual Std. Error 5,661.346 (df = 27)
## F Statistic 11.794*** (df = 4; 27)
## Wald Test (df = 1) 0.564 0.197
## LR Test (df = 1) 0.00003 0.481 0.200
## =============================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
aic_modelo_a <- AIC(model_a)
aic_modelo_b <- AIC(model_b)
aic_modelo_c <- AIC(model_c)
aic_modelo_d <- AIC(model_d)
aic_table <- data.frame(
Modelo = c("Modelo A", "Modelo B", "Modelo C", "Modelo D"),
AIC = c(aic_modelo_a, aic_modelo_b, aic_modelo_c, aic_modelo_d)
)
print(aic_table)
## Modelo AIC
## 1 Modelo A 650.4260
## 2 Modelo B 652.4260
## 3 Modelo C 651.9453
## 4 Modelo D 657.6011
nb <- poly2nb(mx_state_map)
lw <- nb2listw(nb, style = "W")
library(spdep)
# Calcular residuos
res_a <- residuals(model_a)
res_b <- residuals(model_b)
res_c <- residuals(model_c)
res_d <- residuals(model_d)
# Moran's I para cada modelo
moran_a <- moran.test(res_a, lw)
moran_b <- moran.test(res_b, lw)
moran_c <- moran.test(res_c, lw)
moran_d <- moran.test(res_d, lw)
moran_table <- data.frame(
Modelo = c("No Espacial", "SAR", "SDM", "SEM"),
Moran_I = c(
moran_a$estimate["Moran I statistic"],
moran_b$estimate["Moran I statistic"],
moran_c$estimate["Moran I statistic"],
moran_d$estimate["Moran I statistic"]
),
p_value = c(
moran_a$p.value,
moran_b$p.value,
moran_c$p.value,
moran_d$p.value
)
)
print(moran_table)
## Modelo Moran_I p_value
## 1 No Espacial -0.080984184 0.6581896
## 2 SAR -0.081301499 0.6591739
## 3 SDM -0.006398185 0.4146177
## 4 SEM 0.011128458 0.3565223
Modela A es obtuvo un AIC statistic de 650.4260 lo cual es el menor de todos los modelos. Adicionalmente el Modelo A obtuvo un valor Moran_I de -0.081, esto indica que no hay presencia de autocorrelacion espacial siginficativa en los resiudales. Por estas razones el modelo A muestra el mejor ajuste de los datos.