#install.packages("foreign")
library(foreign)                  # import external files

#install.packages("dplyr")
library(dplyr)                    # data manipulation 

#install.packages("spdep")
library(spdep)                    # a collection of functions to create spatial weight matrix 

#install.packages("tigris")
library(tigris)                   # allows to work with shapefiles

#install.packages("rgeoda")
library(rgeoda)                   # spatial data analysis based on GeoDa

#install.packages("RColorBrewer")
library(RColorBrewer)             # offers several color palettes 

#install.packages("viridis")
library(viridis)                  # offers several color palettes

#install.packages("ggplot2")
library(ggplot2)                  # to create plots and graphics from dataset 

#install.packages("tmap")
library(tmap)                     # making maps so spatial data distributions are visualized

#install.packages("sf")
library(sf)                       # functions to encode spatial vector data 

#install.packages("sp")
library(sp)                       # classes and methods for spatial data 

#install.packages("remotes")
library(remotes)

#install.packages("spatialreg")
library(spatialreg)

#install.packages("stargazer")
library(stargazer)

#install.packages("spdep")
library(spdep)
mx_state <- read.csv("/Users/gilmenchaca/Documents/OTROS/TEC/SEMESTRE 8/PLANEACION ESTRATEGICA/Modulo 1 - DAVID/ACT EXTRA/state_data.csv") 
#mx_mpio  <- read.csv("D:\\Tec21\\AD3003B\\FJ2024\\mpio_data.csv")

### importing shapefile 

# Mexico's states (32) 
mx_state_map <- sf::st_read("/Users/gilmenchaca/Documents/OTROS/TEC/SEMESTRE 8/PLANEACION ESTRATEGICA/Modulo 1 - DAVID/ACT EXTRA/mx_states/mexlatlong.shp")
## Reading layer `mexlatlong' from data source 
##   `/Users/gilmenchaca/Documents/OTROS/TEC/SEMESTRE 8/PLANEACION ESTRATEGICA/Modulo 1 - DAVID/ACT EXTRA/mx_states/mexlatlong.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 32 features and 19 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -118.4042 ymin: 14.55055 xmax: -86.73862 ymax: 32.71846
## Geodetic CRS:  WGS 84
#mx_state_map <- read_sf("D:\\Tec21\\AD3003B\\FJ2024\\mx_maps\\mx_states\\mexlatlong.shp")
# merging dataset 
state_geodata <- geo_join(mx_state_map,mx_state,'OBJECTID','state_id',how='inner')
## Warning in geo_join(mx_state_map, mx_state, "OBJECTID", "state_id", how =
## "inner"): We recommend using the dplyr::*_join() family of functions instead.
tm_shape(mx_state_map) +
  tm_polygons(col = "lightgray") +
  tm_compass(position=c("left","bottom")) +
  tm_layout(main.title = "Mexico's States") +
  tm_text("ADMIN_NAME", size = "AREA")
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`

# Mexico's States - Distribution of FDI Inflows 
fdi <- tm_shape(state_geodata) + 
  tm_polygons(col = "new_fdi_real_mxn", palette="Blues", style="quantile", n=8, title="MXN FDI Inflows") +
  tm_layout(main.title= 'New FDI Inflows',  title.position = c('right', 'top'), legend.position= c("left", "bottom"), title.size = 1)
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_polygons()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'n', 'palette' (rename to 'values') to
##   'tm_scale_intervals(<HERE>)'
## [v3->v4] `tm_polygons()`: use 'fill' for the fill color of polygons/symbols
## (instead of 'col'), and 'col' for the outlines (instead of 'border.col').
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
income <- tm_shape(state_geodata) + 
  tm_polygons(col = "real_ave_month_income", palette="BuGn", style="quantile", n=8, title="Average Monthly Income") +
  tm_layout(main.title= '2018 Average Monthly Income',  title.position = c('right', 'top'), legend.position= c("left", "bottom"), title.size = 1)
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
crime <- tm_shape(state_geodata) + 
  tm_polygons(col = "crime_rate", palette="OrRd", style="quantile", n=8, title="Crime Rate") +
  tm_layout(main.title= 'Crime Rate 100,000 Pop',  title.position = c('right', 'top'), legend.position= c("left", "bottom"), title.size = 1)
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
pop_den <- tm_shape(state_geodata) + 
  tm_polygons(col = "pop_density", palette="-viridis", style="quantile", n=8, title="Pop Density") +
  tm_layout(main.title= 'Population Density',  title.position = c('right', 'top'), legend.position= c("left", "bottom"), title.size = 1)
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
# Panel of Maps 
tmap_arrange(fdi, income, crime, pop_den, ncol = 2)
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"
## Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.
## 
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "BuGn" is named
## "brewer.bu_gn"
## Multiple palettes called "bu_gn" found: "brewer.bu_gn", "matplotlib.bu_gn". The first one, "brewer.bu_gn", is returned.
## 
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "OrRd" is named
## "brewer.or_rd"
## Multiple palettes called "or_rd" found: "brewer.or_rd", "matplotlib.or_rd". The first one, "brewer.or_rd", is returned.

### Spatial Connectivity Matrix 
swm  <- poly2nb(mx_state_map, queen=T)

summary(swm) # The average number of neighbors is 4.31
## Neighbour list object:
## Number of regions: 32 
## Number of nonzero links: 138 
## Percentage nonzero weights: 13.47656 
## Average number of links: 4.3125 
## Link number distribution:
## 
## 1 2 3 4 5 6 7 8 9 
## 1 6 6 6 5 2 3 2 1 
## 1 least connected region:
## 31 with 1 link
## 1 most connected region:
## 8 with 9 links
sswm <- nb2listw(swm, style="W", zero.policy = TRUE)

mx_state_map_a <- as(mx_state_map, "Spatial")
mx_state_map_centroid <- coordinates(mx_state_map_a) 
plot(mx_state_map_a,border="blue",axes=FALSE,las=1, main="Mexico's States Queen SWM")
plot(mx_state_map_a,col="grey",border=grey(0.9),axes=T,add=T) 
plot(sswm,coords=mx_state_map_centroid,pch=19,cex=0.1,col="red",add=T) 

# Global Moran's I 
moran.test(state_geodata$new_fdi_real_mxn, sswm)            # Global Moran's I is 0.11 but not statistically significant (p-value > 10%). 
## 
##  Moran I test under randomisation
## 
## data:  state_geodata$new_fdi_real_mxn  
## weights: sswm    
## 
## Moran I statistic standard deviate = 1.2015, p-value = 0.1148
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.10886112       -0.03225806        0.01379607
moran.test(state_geodata$crime_rate, sswm)                  # Global Moran's I is 0.02 but not statistically significant (p-value > 10%). 
## 
##  Moran I test under randomisation
## 
## data:  state_geodata$crime_rate  
## weights: sswm    
## 
## Moran I statistic standard deviate = 0.4018, p-value = 0.3439
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.01660134       -0.03225806        0.01478702
moran.test(state_geodata$real_ave_month_income, sswm)       # Global Moran's I is 0.50 and statistically significant (p-value < 1%). 
## 
##  Moran I test under randomisation
## 
## data:  state_geodata$real_ave_month_income  
## weights: sswm    
## 
## Moran I statistic standard deviate = 4.3774, p-value = 6.005e-06
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.49717426       -0.03225806        0.01462819
table <- data.frame(Variable = c("Crime", "New FDI Inflows", "Average Monthly Income"), GM = c(0.02, 0.10, 0.50), Significance = c("NS","NS","***"))
table
##                 Variable   GM Significance
## 1                  Crime 0.02           NS
## 2        New FDI Inflows 0.10           NS
## 3 Average Monthly Income 0.50          ***
# Estimate Non-Spatial and Spatial Regression Model 


# non - spatial regression model 
model_a <- lm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + crime_rate + pop_density, data = state_geodata)
summary(model_a)
## 
## Call:
## lm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income + 
##     crime_rate + pop_density, data = state_geodata)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -7828  -2765  -1509   2227  15337 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -3624.640   7599.694  -0.477  0.63724    
## business_activity      3370.887   1392.454   2.421  0.02248 *  
## real_ave_month_income     2.976      1.030   2.891  0.00749 ** 
## crime_rate              -21.131     40.513  -0.522  0.60621    
## pop_density               5.046      0.964   5.235 1.62e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5661 on 27 degrees of freedom
## Multiple R-squared:  0.636,  Adjusted R-squared:  0.5821 
## F-statistic: 11.79 on 4 and 27 DF,  p-value: 1.139e-05
AIC(model_a)
## [1] 650.426
# SAR - Spatial Autoregressive Model 
model_b <- lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + crime_rate + pop_density, data = state_geodata, listw = sswm)
## Warning in lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + : inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16 
##   reciprocal condition number = 1.52226e-16 - using numerical Hessian.
## Warning in sqrt(fdHess[1, 1]): NaNs produced
summary(model_b)
## 
## Call:
## lagsarlm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income + 
##     crime_rate + pop_density, data = state_geodata, listw = sswm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7826.6 -2763.1 -1507.6  2228.7 15342.7 
## 
## Type: lag 
## Coefficients: (numerical Hessian approximate standard errors) 
##                          Estimate  Std. Error z value Pr(>|z|)
## (Intercept)           -3623.07679  6689.59182 -0.5416 0.588095
## business_activity      3369.83699  1212.39716  2.7795 0.005445
## real_ave_month_income     2.97432     0.94483  3.1480 0.001644
## crime_rate              -21.16385    37.42496 -0.5655 0.571733
## pop_density               5.04674     0.88046  5.7319 9.93e-09
## 
## Rho: 0.0012113, LR test value: 3.1466e-05, p-value: 0.99552
## Approximate (numerical Hessian) standard error: NaN
##     z-value: NaN, p-value: NA
## Wald statistic: NaN, p-value: NA
## 
## Log likelihood: -319.213 for lag model
## ML residual variance (sigma squared): 27043000, (sigma: 5200.3)
## Number of observations: 32 
## Number of parameters estimated: 7 
## AIC: 652.43, (AIC for lm: 650.43)
# SEM - Spatial Error Model 
model_c <- errorsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + crime_rate + pop_density, data = state_geodata, listw = sswm) 
summary(model_c)
## 
## Call:
## errorsarlm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income + 
##     crime_rate + pop_density, data = state_geodata, listw = sswm)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6726.2 -2928.1 -1522.1  2268.7 15020.8 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                          Estimate  Std. Error z value Pr(>|z|)
## (Intercept)           -1863.70235  6983.95264 -0.2669 0.789581
## business_activity      3711.79847  1294.07093  2.8683 0.004127
## real_ave_month_income     2.78731     0.92486  3.0138 0.002580
## crime_rate              -22.30404    36.42968 -0.6122 0.540373
## pop_density               4.92393     0.86188  5.7130 1.11e-08
## 
## Lambda: -0.1835, LR test value: 0.48067, p-value: 0.48812
## Asymptotic standard error: 0.24432
##     z-value: -0.75108, p-value: 0.45261
## Wald statistic: 0.56412, p-value: 0.45261
## 
## Log likelihood: -318.9727 for error model
## ML residual variance (sigma squared): 26419000, (sigma: 5140)
## Number of observations: 32 
## Number of parameters estimated: 7 
## AIC: 651.95, (AIC for lm: 650.43)
# Spatial Durbin Model
model_d <- lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + crime_rate + pop_density, data = state_geodata, listw = sswm, type="mixed") 
## Warning in lagsarlm(new_fdi_real_mxn ~ business_activity + real_ave_month_income + : inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16 
##   reciprocal condition number = 1.24974e-16 - using numerical Hessian.
summary(model_d)
## 
## Call:
## lagsarlm(formula = new_fdi_real_mxn ~ business_activity + real_ave_month_income + 
##     crime_rate + pop_density, data = state_geodata, listw = sswm, 
##     type = "mixed")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7833.42 -3449.28  -946.26   866.04 15595.72 
## 
## Type: mixed 
## Coefficients: (numerical Hessian approximate standard errors) 
##                              Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)               19271.50104 18989.17177  1.0149  0.310169
## business_activity          4136.43237  1457.13475  2.8387  0.004529
## real_ave_month_income         2.24282     1.26897  1.7674  0.077155
## crime_rate                   -6.47716    38.91552 -0.1664  0.867809
## pop_density                   5.09533     0.91888  5.5451 2.937e-08
## lag.business_activity      3204.80817  1997.63277  1.6043  0.108647
## lag.real_ave_month_income    -1.88452     2.85487 -0.6601  0.509186
## lag.crime_rate              -27.26650    75.66662 -0.3604  0.718585
## lag.pop_density              -2.45497     3.54551 -0.6924  0.488676
## 
## Rho: -0.11587, LR test value: 0.20004, p-value: 0.65469
## Approximate (numerical Hessian) standard error: 0.26111
##     z-value: -0.44375, p-value: 0.65722
## Wald statistic: 0.19692, p-value: 0.65722
## 
## Log likelihood: -317.8006 for mixed model
## ML residual variance (sigma squared): 24676000, (sigma: 4967.5)
## Number of observations: 32 
## Number of parameters estimated: 11 
## AIC: 657.6, (AIC for lm: 655.8)
stargazer(model_a, model_b, model_c, model_d, type = "text", title="Estimated Regression Results")
## 
## Estimated Regression Results
## =============================================================================================
##                                                   Dependent variable:                        
##                           -------------------------------------------------------------------
##                                                    new_fdi_real_mxn                          
##                                    OLS              spatial        spatial        spatial    
##                                                  autoregressive     error      autoregressive
##                                    (1)                (2)            (3)            (4)      
## ---------------------------------------------------------------------------------------------
## business_activity              3,370.887**        3,369.837***   3,711.798***   4,136.432*** 
##                                (1,392.454)        (1,212.397)    (1,294.071)    (1,457.135)  
##                                                                                              
## real_ave_month_income            2.976***           2.974***       2.787***        2.243*    
##                                  (1.029)            (0.945)        (0.925)        (1.269)    
##                                                                                              
## crime_rate                       -21.131            -21.164        -22.304         -6.477    
##                                  (40.513)           (37.425)       (36.430)       (38.916)   
##                                                                                              
## pop_density                      5.046***           5.047***       4.924***       5.095***   
##                                  (0.964)            (0.880)        (0.862)        (0.919)    
##                                                                                              
## lag.business_activity                                                            3,204.808   
##                                                                                 (1,997.633)  
##                                                                                              
## lag.real_ave_month_income                                                          -1.885    
##                                                                                   (2.855)    
##                                                                                              
## lag.crime_rate                                                                    -27.266    
##                                                                                   (75.667)   
##                                                                                              
## lag.pop_density                                                                    -2.455    
##                                                                                   (3.546)    
##                                                                                              
## Constant                        -3,624.640         -3,623.077     -1,863.702     19,271.500  
##                                (7,599.694)        (6,689.592)    (6,983.953)    (18,989.170) 
##                                                                                              
## ---------------------------------------------------------------------------------------------
## Observations                        32                 32             32             32      
## R2                                0.636                                                      
## Adjusted R2                       0.582                                                      
## Log Likelihood                                      -319.213       -318.973       -317.801   
## sigma2                                           27,042,860.000 26,419,476.000 24,675,602.000
## Akaike Inf. Crit.                                   652.426        651.945        657.601    
## Residual Std. Error        5,661.346 (df = 27)                                               
## F Statistic               11.794*** (df = 4; 27)                                             
## Wald Test (df = 1)                                                  0.564          0.197     
## LR Test (df = 1)                                    0.00003         0.481          0.200     
## =============================================================================================
## Note:                                                             *p<0.1; **p<0.05; ***p<0.01
aic_modelo_a <- AIC(model_a)
aic_modelo_b <- AIC(model_b)
aic_modelo_c <- AIC(model_c)
aic_modelo_d <- AIC(model_d)

aic_table <- data.frame(
  Modelo = c("Modelo A", "Modelo B", "Modelo C", "Modelo D"),
  AIC = c(aic_modelo_a, aic_modelo_b, aic_modelo_c, aic_modelo_d)
)

print(aic_table)
##     Modelo      AIC
## 1 Modelo A 650.4260
## 2 Modelo B 652.4260
## 3 Modelo C 651.9453
## 4 Modelo D 657.6011
nb <- poly2nb(mx_state_map)
lw <- nb2listw(nb, style = "W")

library(spdep)

# Calcular residuos
res_a <- residuals(model_a)
res_b <- residuals(model_b)
res_c <- residuals(model_c)
res_d <- residuals(model_d)

# Moran's I para cada modelo
moran_a <- moran.test(res_a, lw)
moran_b <- moran.test(res_b, lw)
moran_c <- moran.test(res_c, lw)
moran_d <- moran.test(res_d, lw)

moran_table <- data.frame(
  Modelo = c("No Espacial", "SAR", "SDM", "SEM"),
  Moran_I = c(
    moran_a$estimate["Moran I statistic"],
    moran_b$estimate["Moran I statistic"],
    moran_c$estimate["Moran I statistic"],
    moran_d$estimate["Moran I statistic"]
  ),
  p_value = c(
    moran_a$p.value,
    moran_b$p.value,
    moran_c$p.value,
    moran_d$p.value
  )
)

print(moran_table)
##        Modelo      Moran_I   p_value
## 1 No Espacial -0.080984184 0.6581896
## 2         SAR -0.081301499 0.6591739
## 3         SDM -0.006398185 0.4146177
## 4         SEM  0.011128458 0.3565223

Modela A es obtuvo un AIC statistic de 650.4260 lo cual es el menor de todos los modelos. Adicionalmente el Modelo A obtuvo un valor Moran_I de -0.081, esto indica que no hay presencia de autocorrelacion espacial siginficativa en los resiudales. Por estas razones el modelo A muestra el mejor ajuste de los datos.