#install.packages("foreign")
library(foreign) # import external files
#install.packages("dplyr")
library(dplyr) # data manipulation
#install.packages("spdep")
library(spdep) # a collection of functions to create spatial weight matrix
#install.packages("tigris")
library(tigris) # allows to work with shapefiles
#install.packages("rgeoda")
library(rgeoda) # spatial data analysis based on GeoDa
#install.packages("RColorBrewer")
library(RColorBrewer) # offers several color palettes
#install.packages("viridis")
library(viridis) # offers several color palettes
#install.packages("ggplot2")
library(ggplot2) # to create plots and graphics from dataset
#install.packages("tmap")
library(tmap) # making maps so spatial data distributions are visualized
#install.packages("sf")
library(sf) # functions to encode spatial vector data
#install.packages("sp")
library(sp) # classes and methods for spatial data
#install.packages("remotes")
library(remotes)
#install.packages("spatialreg")
library(spatialreg)
#install.packages("stargazer")
library(stargazer)
#install.packages("spdep")
library(spdep)
#install.packages("readxl")
library(readxl)
#install.packages("car")
library(car)
tourism_state_data <- read_excel("/Users/gilmenchaca/Documents/OTROS/TEC/SEMESTRE 8/PLANEACION ESTRATEGICA/Modulo 1 - DAVID/ACT 2/tourism_state_data.xlsx")
## New names:
## • `region` -> `region...17`
## • `region` -> `region...18`
cuartos_ocupados_extranjeros <- read_excel("/Users/gilmenchaca/Documents/OTROS/TEC/SEMESTRE 8/PLANEACION ESTRATEGICA/Modulo 1 - DAVID/ACT 2/cuartos_ocupados_extranjeros.xlsx")
mx_state_map <- sf::st_read("/Users/gilmenchaca/Documents/OTROS/TEC/SEMESTRE 8/PLANEACION ESTRATEGICA/Modulo 1 - DAVID/ACT EXTRA/mx_states/mexlatlong.shp")
## Reading layer `mexlatlong' from data source
## `/Users/gilmenchaca/Documents/OTROS/TEC/SEMESTRE 8/PLANEACION ESTRATEGICA/Modulo 1 - DAVID/ACT EXTRA/mx_states/mexlatlong.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 32 features and 19 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -118.4042 ymin: 14.55055 xmax: -86.73862 ymax: 32.71846
## Geodetic CRS: WGS 84
#state_geodata <- geo_join(mx_state_map,tourism_state_data,'OBJECTID','state_id',how='inner')
summary(tourism_state_data)
## state year state_id tourism_gdp
## Length:544 Min. :2006 Min. : 888 Min. : 6240
## Class :character 1st Qu.:2010 1st Qu.:1047 1st Qu.: 22685
## Mode :character Median :2014 Median :1081 Median : 32482
## Mean :2014 Mean :1219 Mean : 56520
## 3rd Qu.:2018 3rd Qu.:1118 3rd Qu.: 59014
## Max. :2022 Max. :2357 Max. :472642
## crime_rate college_education unemployment employment
## Min. : 1.710 Min. :0.08751 Min. :0.01000 Min. :0.8900
## 1st Qu.: 8.107 1st Qu.:0.16703 1st Qu.:0.03000 1st Qu.:0.9500
## Median : 13.880 Median :0.20304 Median :0.04000 Median :0.9700
## Mean : 22.163 Mean :0.21106 Mean :0.04251 Mean :0.9639
## 3rd Qu.: 26.314 3rd Qu.:0.25085 3rd Qu.:0.05000 3rd Qu.:0.9754
## Max. :181.510 Max. :0.43761 Max. :0.10000 Max. :0.9928
## business_activity real_wage pop_density good_governance
## Min. :-2.980 Min. :239.3 Min. : 7.74 Min. : 0.000
## 1st Qu.:-2.260 1st Qu.:282.5 1st Qu.: 39.56 1st Qu.: 0.180
## Median :-2.070 Median :306.2 Median : 61.77 Median : 0.500
## Mean :-1.757 Mean :314.9 Mean : 299.46 Mean : 2.362
## 3rd Qu.:-1.768 3rd Qu.:335.4 3rd Qu.: 150.46 3rd Qu.: 1.350
## Max. : 2.470 Max. :481.7 Max. :6211.45 Max. :200.020
## ratio_public_investment exchange_rate inpc border_distance
## Min. :0.000000 Min. :10.85 Min. : 62.69 Min. : 8.83
## 1st Qu.:0.000000 1st Qu.:12.87 1st Qu.: 74.93 1st Qu.: 613.26
## Median :0.000000 Median :14.51 Median : 87.19 Median : 751.64
## Mean :0.005736 Mean :15.91 Mean : 89.08 Mean : 704.92
## 3rd Qu.:0.010000 3rd Qu.:19.47 3rd Qu.:103.02 3rd Qu.: 875.76
## Max. :0.067644 Max. :20.52 Max. :126.48 Max. :1252.66
## region...17 region...18
## Length:544 Min. :1.000
## Class :character 1st Qu.:2.000
## Mode :character Median :3.000
## Mean :3.188
## 3rd Qu.:4.250
## Max. :5.000
summary(cuartos_ocupados_extranjeros)
## Estados cuartos_ocupados_extranjeros state_id
## Length:32 Min. : 12.1 Min. : 888
## Class :character 1st Qu.: 8364.8 1st Qu.:1047
## Mode :character Median : 19851.7 Median :1081
## Mean : 113430.3 Mean :1219
## 3rd Qu.: 40242.9 3rd Qu.:1118
## Max. :2105822.9 Max. :2357
#state_geodata <- state_geodata %>% filter(year == 2022)
tourism_state_data <- tourism_state_data %>% filter(year == 2022)
#tourism_state_data <- tourism_state_data %>% select(-business_activity,-good_governance,-exchange_rate,-inpc,-region...17,-region...18)
df_merged <- left_join(tourism_state_data, cuartos_ocupados_extranjeros, by = c("state" = "Estados"))
#Check Multicollinearity
vif(lm(cuartos_ocupados_extranjeros ~ tourism_gdp + crime_rate + college_education + unemployment + real_wage + pop_density + business_activity + good_governance + ratio_public_investment + border_distance, data = df_merged))
## tourism_gdp crime_rate college_education
## 4.026260 1.259822 1.882481
## unemployment real_wage pop_density
## 1.815590 2.309100 5.095886
## business_activity good_governance ratio_public_investment
## 2.267063 20.608949 17.959626
## border_distance
## 1.929597
model_no_espacial <- lm(cuartos_ocupados_extranjeros ~ tourism_gdp + crime_rate + college_education + unemployment + real_wage + border_distance, data = df_merged)
Se justifica el uso de modelos espaciales debido a la presencia de autocorrelacion espacial en los resiudales. Esto se comprueba al realizar la preuba de Moran´s. Al realizar la prueba si el p-value es menor a 0.05, esto indica que hay presencia de autocorrelacion en los resiudales. En este caso significa que falta conteplar la estrucutra espacial para poder realizar una analisis preciso.
swm2 <- poly2nb(mx_state_map, queen=T)
sswm_3 <- nb2listw(swm2, style="W", zero.policy = TRUE)
# SAR - Spatial Autoregressive Model
model_b <- lagsarlm(cuartos_ocupados_extranjeros ~ tourism_gdp + crime_rate + college_education + unemployment + real_wage + border_distance, data = df_merged, listw = sswm_3)
## Warning in lagsarlm(cuartos_ocupados_extranjeros ~ tourism_gdp + crime_rate + : inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16
## reciprocal condition number = 6.59236e-23 - using numerical Hessian.
summary(model_b)
##
## Call:lagsarlm(formula = cuartos_ocupados_extranjeros ~ tourism_gdp +
## crime_rate + college_education + unemployment + real_wage +
## border_distance, data = df_merged, listw = sswm_3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -448473 -96770 -33801 41923 1404548
##
## Type: lag
## Coefficients: (numerical Hessian approximate standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.7885e+05 6.6184e+05 -0.5724 0.56704
## tourism_gdp 1.3325e+00 9.4021e-01 1.4172 0.15642
## crime_rate 6.6834e+02 2.1431e+03 0.3119 0.75515
## college_education 2.5751e+06 1.2522e+06 2.0564 0.03974
## unemployment -5.6091e+06 7.0178e+06 -0.7993 0.42413
## real_wage -1.3373e+03 1.7618e+03 -0.7591 0.44781
## border_distance 4.1993e+02 2.3154e+02 1.8136 0.06974
##
## Rho: 0.014468, LR test value: 0.002506, p-value: 0.96007
## Approximate (numerical Hessian) standard error: 0.28644
## z-value: 0.050509, p-value: 0.95972
## Wald statistic: 0.0025511, p-value: 0.95972
##
## Log likelihood: -449.059 for lag model
## ML residual variance (sigma squared): 9.0467e+10, (sigma: 300780)
## Number of observations: 32
## Number of parameters estimated: 9
## AIC: 916.12, (AIC for lm: 914.12)
# SEM - Spatial Error Model
model_c <- errorsarlm(cuartos_ocupados_extranjeros ~ tourism_gdp + crime_rate + college_education + unemployment + real_wage + border_distance, data = df_merged, listw = sswm_3)
## Warning in errorsarlm(cuartos_ocupados_extranjeros ~ tourism_gdp + crime_rate + : inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16
## reciprocal condition number = 1.09664e-22 - using numerical Hessian.
summary(model_c)
##
## Call:errorsarlm(formula = cuartos_ocupados_extranjeros ~ tourism_gdp +
## crime_rate + college_education + unemployment + real_wage +
## border_distance, data = df_merged, listw = sswm_3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -449387 -95693 -34214 41027 1404392
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.7451e+05 6.5942e+05 -0.5679 0.57008
## tourism_gdp 1.3324e+00 9.3800e-01 1.4204 0.15548
## crime_rate 6.6850e+02 2.1211e+03 0.3152 0.75264
## college_education 2.5736e+06 1.2349e+06 2.0840 0.03716
## unemployment -5.5653e+06 6.9977e+06 -0.7953 0.42644
## real_wage -1.3420e+03 1.7498e+03 -0.7670 0.44310
## border_distance 4.1717e+02 2.3085e+02 1.8072 0.07074
##
## Lambda: 0.012365, LR test value: 0.001706, p-value: 0.96705
## Approximate (numerical Hessian) standard error: 0.36102
## z-value: 0.034251, p-value: 0.97268
## Wald statistic: 0.0011731, p-value: 0.97268
##
## Log likelihood: -449.0594 for error model
## ML residual variance (sigma squared): 9.0471e+10, (sigma: 300780)
## Number of observations: 32
## Number of parameters estimated: 9
## AIC: 916.12, (AIC for lm: 914.12)
# Spatial Durbin Model
model_d <- lagsarlm(cuartos_ocupados_extranjeros ~ tourism_gdp + crime_rate + college_education + unemployment + real_wage + border_distance, data = df_merged, listw = sswm_3, type="mixed")
## Warning in lagsarlm(cuartos_ocupados_extranjeros ~ tourism_gdp + crime_rate + : inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16
## reciprocal condition number = 7.83223e-23 - using numerical Hessian.
## Warning in sqrt(fdHess[1, 1]): NaNs produced
summary(model_d)
##
## Call:lagsarlm(formula = cuartos_ocupados_extranjeros ~ tourism_gdp +
## crime_rate + college_education + unemployment + real_wage +
## border_distance, data = df_merged, listw = sswm_3, type = "mixed")
##
## Residuals:
## Min 1Q Median 3Q Max
## -404842 -176223 13577 122039 1131527
##
## Type: mixed
## Coefficients: (numerical Hessian approximate standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.8240e+06 2.0099e+06 -1.4050 0.16002
## tourism_gdp 2.9678e-01 1.1365e+00 0.2611 0.79399
## crime_rate 1.5453e+03 2.1033e+03 0.7347 0.46253
## college_education 3.6550e+06 1.5085e+06 2.4229 0.01540
## unemployment -1.3625e+07 9.2743e+06 -1.4691 0.14181
## real_wage -1.4897e+02 1.8423e+03 -0.0809 0.93555
## border_distance 4.0559e+02 2.3225e+02 1.7463 0.08076
## lag.tourism_gdp -3.1767e+00 4.4035e+00 -0.7214 0.47066
## lag.crime_rate 5.5732e+03 5.6611e+03 0.9845 0.32488
## lag.college_education -3.0558e+06 3.5018e+06 -0.8726 0.38286
## lag.unemployment -1.1043e+07 1.6000e+07 -0.6902 0.49008
## lag.real_wage 6.4213e+03 4.1208e+03 1.5583 0.11917
## lag.border_distance 1.4517e+03 7.8870e+02 1.8406 0.06568
##
## Rho: -0.0064265, LR test value: 0.00045514, p-value: 0.98298
## Approximate (numerical Hessian) standard error: NaN
## z-value: NaN, p-value: NA
## Wald statistic: NaN, p-value: NA
##
## Log likelihood: -445.9949 for mixed model
## ML residual variance (sigma squared): 7.4703e+10, (sigma: 273320)
## Number of observations: 32
## Number of parameters estimated: 15
## AIC: 921.99, (AIC for lm: 919.99)
stargazer(model_no_espacial, model_b, model_c, model_d, type = "text", title="Estimated Regression Results")
##
## Estimated Regression Results
## ====================================================================================================
## Dependent variable:
## ------------------------------------------------------------------------------
## cuartos_ocupados_extranjeros
## OLS spatial spatial spatial
## autoregressive error autoregressive
## (1) (2) (3) (4)
## ----------------------------------------------------------------------------------------------------
## tourism_gdp 1.331 1.332 1.332 0.297
## (1.062) (0.940) (0.938) (1.137)
##
## crime_rate 666.812 668.343 668.499 1,545.294
## (2,399.367) (2,143.132) (2,121.131) (2,103.342)
##
## college_education 2,568,106.000* 2,575,130.000** 2,573,569.000** 3,654,980.000**
## (1,398,712.000) (1,252,245.000) (1,234,901.000) (1,508,530.000)
##
## unemployment -5,628,747.000 -5,609,113.000 -5,565,277.000 -13,624,595.000
## (7,922,488.000) (7,017,782.000) (6,997,669.000) (9,274,268.000)
##
## real_wage -1,326.911 -1,337.308 -1,342.035 -148.971
## (1,980.680) (1,761.756) (1,749.787) (1,842.267)
##
## border_distance 419.818 419.928* 417.173* 405.585*
## (261.350) (231.544) (230.845) (232.255)
##
## lag.tourism_gdp -3.177
## (4.403)
##
## lag.crime_rate 5,573.231
## (5,661.143)
##
## lag.college_education -3,055,787.000
## (3,501,793.000)
##
## lag.unemployment -11,042,647.000
## (15,999,718.000)
##
## lag.real_wage 6,421.326
## (4,120.779)
##
## lag.border_distance 1,451.703*
## (788.702)
##
## Constant -378,582.800 -378,846.600 -374,505.900 -2,823,979.000
## (745,394.400) (661,841.900) (659,421.000) (2,009,926.000)
##
## ----------------------------------------------------------------------------------------------------
## Observations 32 32 32 32
## R2 0.326
## Adjusted R2 0.165
## Log Likelihood -449.059 -449.059 -445.995
## sigma2 90,467,414,954.000 90,470,970,850.000 74,703,399,509.000
## Akaike Inf. Crit. 916.118 916.119 921.990
## Residual Std. Error 340,313.800 (df = 25)
## F Statistic 2.018 (df = 6; 25)
## Wald Test (df = 1) 0.003 0.001
## LR Test (df = 1) 0.003 0.002 0.0005
## ====================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01