# REGRESIÓN ESPACIAL DE DATOS PANEL ------------------------------------
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plm)
## 
## Attaching package: 'plm'
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(spatialreg)
## Loading required package: Matrix
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption
library(tmap)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
# CARGAR BASES DE DATOS ---------------------------------------------------
tourism_data <- read_excel("/Users/gabrielmedina/Downloads/mx_spatial_data-2/tourism_state_data.xlsx")
## New names:
## • `region` -> `region...17`
## • `region` -> `region...18`
# Limpiar base y seleccionar variables relevantes
tourism_data <- tourism_data %>%
  select(state, year, state_id, tourism_gdp, crime_rate, college_education, business_activity, pop_density) %>%
  na.omit()

# CONVERTIR A DATOS PANEL --------------------------------------------------
tourism_panel <- pdata.frame(tourism_data, index = c("state", "year"))

# MODELOS NO ESPACIALES PANEL ----------------------------------------------
modelo_fe <- plm(tourism_gdp ~ crime_rate + college_education + business_activity + pop_density,
                 data = tourism_panel, model = "within")

modelo_re <- plm(tourism_gdp ~ crime_rate + college_education + business_activity + pop_density,
                 data = tourism_panel, model = "random")

summary(modelo_fe)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = tourism_gdp ~ crime_rate + college_education + 
##     business_activity + pop_density, data = tourism_panel, model = "within")
## 
## Balanced Panel: n = 32, T = 17, N = 544
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -86572.08  -3990.83    733.98   3822.82  56076.26 
## 
## Coefficients:
##                    Estimate Std. Error  t-value  Pr(>|t|)    
## crime_rate           15.221     32.744   0.4648  0.642244    
## college_education 79018.017  15940.012   4.9572 9.762e-07 ***
## business_activity  3600.493   1160.019   3.1038  0.002017 ** 
## pop_density        -428.222     31.956 -13.4002 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    9.186e+10
## Residual Sum of Squares: 6.6951e+10
## R-Squared:      0.27116
## Adj. R-Squared: 0.22094
## F-statistic: 47.2494 on 4 and 508 DF, p-value: < 2.22e-16
summary(modelo_re)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = tourism_gdp ~ crime_rate + college_education + 
##     business_activity + pop_density, data = tourism_panel, model = "random")
## 
## Balanced Panel: n = 32, T = 17, N = 544
## 
## Effects:
##                     var   std.dev share
## idiosyncratic 1.318e+08 1.148e+04 0.082
## individual    1.481e+09 3.848e+04 0.918
## theta: 0.9278
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -125426.6   -4319.9   -1321.6    2549.8   89791.6 
## 
## Coefficients:
##                      Estimate  Std. Error z-value  Pr(>|z|)    
## (Intercept)        54621.1323   9657.9292  5.6556 1.553e-08 ***
## crime_rate            36.2374     38.7142  0.9360    0.3493    
## college_education -25805.9812  16935.6307 -1.5238    0.1276    
## business_activity   3200.3552   1364.4656  2.3455    0.0190 *  
## pop_density           40.6260      7.6315  5.3235 1.018e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1.0641e+11
## Residual Sum of Squares: 1.001e+11
## R-Squared:      0.059322
## Adj. R-Squared: 0.052341
## Chisq: 33.9907 on 4 DF, p-value: 7.4848e-07
# PRUEBA DE HAUSMAN --------------------------------------------------------
hausman_test <- phtest(modelo_fe, modelo_re)
print(hausman_test)
## 
##  Hausman Test
## 
## data:  tourism_gdp ~ crime_rate + college_education + business_activity +  ...
## chisq = 228.25, df = 4, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# CARGAR SHAPEFILE ---------------------------------------------------------
mx_map <- st_read("/Users/gabrielmedina/Downloads/mx_spatial_data/mx_maps/mx_states/mexlatlong.shp")
## Reading layer `mexlatlong' from data source 
##   `/Users/gabrielmedina/Downloads/mx_spatial_data/mx_maps/mx_states/mexlatlong.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 32 features and 19 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -118.4042 ymin: 14.55055 xmax: -86.73862 ymax: 32.71846
## Geodetic CRS:  WGS 84
# MATRIZ DE CONECTIVIDAD ---------------------------------------------------
nb <- poly2nb(mx_map)
listw <- nb2listw(nb, style = "W")
plot(mx_map$geometry, border = "gray")
plot(nb, st_coordinates(st_centroid(mx_map)), col = "red", add = TRUE)
## Warning: st_centroid assumes attributes are constant over geometries

# MODELOS ESPACIALES PANEL --------------------------------------------------
library(splm)


# Conversión a panel para splm
panel_matrix <- pdata.frame(tourism_data, index = c("state", "year"))

# Modelo SAR panel
modelo_sar_panel <- spml(tourism_gdp ~ crime_rate + college_education + business_activity + pop_density,
                         data = panel_matrix, listw = listw,
                         model = "within", spatial.error = "none", lag = TRUE)
summary(modelo_sar_panel)
## Spatial panel fixed effects lag model
##  
## 
## Call:
## spml(formula = tourism_gdp ~ crime_rate + college_education + 
##     business_activity + pop_density, data = panel_matrix, listw = listw, 
##     model = "within", lag = TRUE, spatial.error = "none")
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -85025.1  -3413.7    431.2   3998.0  55137.6 
## 
## Spatial autoregressive coefficient:
##        Estimate Std. Error t-value  Pr(>|t|)    
## lambda 0.179589   0.047682  3.7664 0.0001656 ***
## 
## Coefficients:
##                    Estimate Std. Error  t-value  Pr(>|t|)    
## crime_rate           14.925     31.086   0.4801  0.631145    
## college_education 90692.249  15181.100   5.9740 2.315e-09 ***
## business_activity  3072.900   1109.971   2.7685  0.005632 ** 
## pop_density        -442.673     30.357 -14.5822 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Modelo SEM panel
modelo_sem_panel <- spml(tourism_gdp ~ crime_rate + college_education + business_activity + pop_density,
                         data = panel_matrix, listw = listw,
                         model = "within", spatial.error = "b", lag = FALSE)
summary(modelo_sem_panel)
## Spatial panel fixed effects error model
##  
## 
## Call:
## spml(formula = tourism_gdp ~ crime_rate + college_education + 
##     business_activity + pop_density, data = panel_matrix, listw = listw, 
##     model = "within", lag = FALSE, spatial.error = "b")
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -86335.87  -3713.48    782.93   3928.97  55092.41 
## 
## Spatial error parameter:
##     Estimate Std. Error t-value Pr(>|t|)    
## rho 0.215122   0.052829  4.0721 4.66e-05 ***
## 
## Coefficients:
##                    Estimate Std. Error  t-value  Pr(>|t|)    
## crime_rate           19.083     30.478   0.6261  0.531221    
## college_education 87905.986  17202.201   5.1102 3.219e-07 ***
## business_activity  3645.519   1300.960   2.8022  0.005076 ** 
## pop_density        -434.654     29.230 -14.8699 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# PRESENCIA DE AUTOCORRELACIÓN ESPACIAL EN LOS RESIDUALES ------------------
# Seleccionar dos años: 2006 y 2022
resid_2006 <- residuals(modelo_sem_panel)[panel_matrix$year == 2006]
resid_2022 <- residuals(modelo_sem_panel)[panel_matrix$year == 2022]

# Calcular Moran's I para cada año
moran.test(resid_2006, listw)
## 
##  Moran I test under randomisation
## 
## data:  resid_2006  
## weights: listw    
## 
## Moran I statistic standard deviate = 0.1474, p-value = 0.4414
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       -0.01435573       -0.03225806        0.01475019
moran.test(resid_2022, listw)
## 
##  Moran I test under randomisation
## 
## data:  resid_2022  
## weights: listw    
## 
## Moran I statistic standard deviate = -0.20254, p-value = 0.5803
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       -0.05560199       -0.03225806        0.01328388
# VISUALIZACIÓN DE RESULTADOS ----------------------------------------------
# Agregar predicciones del mejor modelo para 2006 y 2022
panel_matrix$predicted <- fitted(modelo_sem_panel)
pred_2006 <- panel_matrix %>% filter(year == 2006)
pred_2022 <- panel_matrix %>% filter(year == 2022)

# Unir con shapefile para mapear
map_2006 <- left_join(mx_map, pred_2006, by = c("OBJECTID" = "state_id"))
map_2022 <- left_join(mx_map, pred_2022, by = c("OBJECTID" = "state_id"))

# Mapas
mapa_2006 <- tm_shape(map_2006) +
  tm_polygons("predicted", title = "Pred. Tourism GDP (2006)", palette = "Blues") +
  tm_layout(main.title = "Predicción 2006")

mapa_2022 <- tm_shape(map_2022) +
  tm_polygons("predicted", title = "Pred. Tourism GDP (2022)", palette = "Greens") +
  tm_layout(main.title = "Predicción 2022")

tmap_arrange(mapa_2006, mapa_2022)