# REGRESIÓN ESPACIAL DE DATOS PANEL ------------------------------------
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plm)
##
## Attaching package: 'plm'
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
library(tmap)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
# CARGAR BASES DE DATOS ---------------------------------------------------
tourism_data <- read_excel("/Users/gabrielmedina/Downloads/mx_spatial_data-2/tourism_state_data.xlsx")
## New names:
## • `region` -> `region...17`
## • `region` -> `region...18`
# Limpiar base y seleccionar variables relevantes
tourism_data <- tourism_data %>%
select(state, year, state_id, tourism_gdp, crime_rate, college_education, business_activity, pop_density) %>%
na.omit()
# CONVERTIR A DATOS PANEL --------------------------------------------------
tourism_panel <- pdata.frame(tourism_data, index = c("state", "year"))
# MODELOS NO ESPACIALES PANEL ----------------------------------------------
modelo_fe <- plm(tourism_gdp ~ crime_rate + college_education + business_activity + pop_density,
data = tourism_panel, model = "within")
modelo_re <- plm(tourism_gdp ~ crime_rate + college_education + business_activity + pop_density,
data = tourism_panel, model = "random")
summary(modelo_fe)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = tourism_gdp ~ crime_rate + college_education +
## business_activity + pop_density, data = tourism_panel, model = "within")
##
## Balanced Panel: n = 32, T = 17, N = 544
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -86572.08 -3990.83 733.98 3822.82 56076.26
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## crime_rate 15.221 32.744 0.4648 0.642244
## college_education 79018.017 15940.012 4.9572 9.762e-07 ***
## business_activity 3600.493 1160.019 3.1038 0.002017 **
## pop_density -428.222 31.956 -13.4002 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 9.186e+10
## Residual Sum of Squares: 6.6951e+10
## R-Squared: 0.27116
## Adj. R-Squared: 0.22094
## F-statistic: 47.2494 on 4 and 508 DF, p-value: < 2.22e-16
summary(modelo_re)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = tourism_gdp ~ crime_rate + college_education +
## business_activity + pop_density, data = tourism_panel, model = "random")
##
## Balanced Panel: n = 32, T = 17, N = 544
##
## Effects:
## var std.dev share
## idiosyncratic 1.318e+08 1.148e+04 0.082
## individual 1.481e+09 3.848e+04 0.918
## theta: 0.9278
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -125426.6 -4319.9 -1321.6 2549.8 89791.6
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 54621.1323 9657.9292 5.6556 1.553e-08 ***
## crime_rate 36.2374 38.7142 0.9360 0.3493
## college_education -25805.9812 16935.6307 -1.5238 0.1276
## business_activity 3200.3552 1364.4656 2.3455 0.0190 *
## pop_density 40.6260 7.6315 5.3235 1.018e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1.0641e+11
## Residual Sum of Squares: 1.001e+11
## R-Squared: 0.059322
## Adj. R-Squared: 0.052341
## Chisq: 33.9907 on 4 DF, p-value: 7.4848e-07
# PRUEBA DE HAUSMAN --------------------------------------------------------
hausman_test <- phtest(modelo_fe, modelo_re)
print(hausman_test)
##
## Hausman Test
##
## data: tourism_gdp ~ crime_rate + college_education + business_activity + ...
## chisq = 228.25, df = 4, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# CARGAR SHAPEFILE ---------------------------------------------------------
mx_map <- st_read("/Users/gabrielmedina/Downloads/mx_spatial_data/mx_maps/mx_states/mexlatlong.shp")
## Reading layer `mexlatlong' from data source
## `/Users/gabrielmedina/Downloads/mx_spatial_data/mx_maps/mx_states/mexlatlong.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 32 features and 19 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -118.4042 ymin: 14.55055 xmax: -86.73862 ymax: 32.71846
## Geodetic CRS: WGS 84
# MATRIZ DE CONECTIVIDAD ---------------------------------------------------
nb <- poly2nb(mx_map)
listw <- nb2listw(nb, style = "W")
plot(mx_map$geometry, border = "gray")
plot(nb, st_coordinates(st_centroid(mx_map)), col = "red", add = TRUE)
## Warning: st_centroid assumes attributes are constant over geometries

# MODELOS ESPACIALES PANEL --------------------------------------------------
library(splm)
# Conversión a panel para splm
panel_matrix <- pdata.frame(tourism_data, index = c("state", "year"))
# Modelo SAR panel
modelo_sar_panel <- spml(tourism_gdp ~ crime_rate + college_education + business_activity + pop_density,
data = panel_matrix, listw = listw,
model = "within", spatial.error = "none", lag = TRUE)
summary(modelo_sar_panel)
## Spatial panel fixed effects lag model
##
##
## Call:
## spml(formula = tourism_gdp ~ crime_rate + college_education +
## business_activity + pop_density, data = panel_matrix, listw = listw,
## model = "within", lag = TRUE, spatial.error = "none")
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -85025.1 -3413.7 431.2 3998.0 55137.6
##
## Spatial autoregressive coefficient:
## Estimate Std. Error t-value Pr(>|t|)
## lambda 0.179589 0.047682 3.7664 0.0001656 ***
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## crime_rate 14.925 31.086 0.4801 0.631145
## college_education 90692.249 15181.100 5.9740 2.315e-09 ***
## business_activity 3072.900 1109.971 2.7685 0.005632 **
## pop_density -442.673 30.357 -14.5822 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Modelo SEM panel
modelo_sem_panel <- spml(tourism_gdp ~ crime_rate + college_education + business_activity + pop_density,
data = panel_matrix, listw = listw,
model = "within", spatial.error = "b", lag = FALSE)
summary(modelo_sem_panel)
## Spatial panel fixed effects error model
##
##
## Call:
## spml(formula = tourism_gdp ~ crime_rate + college_education +
## business_activity + pop_density, data = panel_matrix, listw = listw,
## model = "within", lag = FALSE, spatial.error = "b")
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -86335.87 -3713.48 782.93 3928.97 55092.41
##
## Spatial error parameter:
## Estimate Std. Error t-value Pr(>|t|)
## rho 0.215122 0.052829 4.0721 4.66e-05 ***
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## crime_rate 19.083 30.478 0.6261 0.531221
## college_education 87905.986 17202.201 5.1102 3.219e-07 ***
## business_activity 3645.519 1300.960 2.8022 0.005076 **
## pop_density -434.654 29.230 -14.8699 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# PRESENCIA DE AUTOCORRELACIÓN ESPACIAL EN LOS RESIDUALES ------------------
# Seleccionar dos años: 2006 y 2022
resid_2006 <- residuals(modelo_sem_panel)[panel_matrix$year == 2006]
resid_2022 <- residuals(modelo_sem_panel)[panel_matrix$year == 2022]
# Calcular Moran's I para cada año
moran.test(resid_2006, listw)
##
## Moran I test under randomisation
##
## data: resid_2006
## weights: listw
##
## Moran I statistic standard deviate = 0.1474, p-value = 0.4414
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.01435573 -0.03225806 0.01475019
moran.test(resid_2022, listw)
##
## Moran I test under randomisation
##
## data: resid_2022
## weights: listw
##
## Moran I statistic standard deviate = -0.20254, p-value = 0.5803
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.05560199 -0.03225806 0.01328388
# VISUALIZACIÓN DE RESULTADOS ----------------------------------------------
# Agregar predicciones del mejor modelo para 2006 y 2022
panel_matrix$predicted <- fitted(modelo_sem_panel)
pred_2006 <- panel_matrix %>% filter(year == 2006)
pred_2022 <- panel_matrix %>% filter(year == 2022)
# Unir con shapefile para mapear
map_2006 <- left_join(mx_map, pred_2006, by = c("OBJECTID" = "state_id"))
map_2022 <- left_join(mx_map, pred_2022, by = c("OBJECTID" = "state_id"))
# Mapas
mapa_2006 <- tm_shape(map_2006) +
tm_polygons("predicted", title = "Pred. Tourism GDP (2006)", palette = "Blues") +
tm_layout(main.title = "Predicción 2006")
mapa_2022 <- tm_shape(map_2022) +
tm_polygons("predicted", title = "Pred. Tourism GDP (2022)", palette = "Greens") +
tm_layout(main.title = "Predicción 2022")
tmap_arrange(mapa_2006, mapa_2022)
