The following research is conducted by Maria R. Koldasheva and Nikolai A. Popov.
library(tidyverse)
library(dplyr)
library(stargazer)
# geo
library(sp)
library(spdep)
library(rgdal)
library(maptools)
library(rgeos)
library(sf)
library(spatialreg)
# mass retype()
library(hablar)
# multicollinearity test omcdiag imcdiag
library(mctest)
library(GGally)
#tables
library(xtable)
library(texreg)
# Theil index specification
Theil_2018 <- log(GRP_2018) ~ Div_2018 + Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)
# WM and IM specification
Margins_2018 <- log(GRP_2018) ~ EM_2018 + IM_2018 + Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)
#downloading spatial cross-sectional data
Russia_shape_2018 = readOGR(
'C:/Users/Kolya/Documents/New_studies/Diploma/Geo/Data_geo/Rus_regions_2018.shp')
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\Kolya\Documents\New_studies\Diploma\Geo\Data_geo\Rus_regions_2018.shp", layer: "Rus_regions_2018"
## with 78 features
## It has 16 fields
## Integer64 fields read as strings: Edu_2018
# change columns type (which was changed unintentionally by QGIS)
Russia_shape_2018$Edu_2018 <- as.numeric(Russia_shape_2018$Edu_2018 )
#Descriptive statistics
Russia_shape_2018@data %>% glimpse()
## Rows: 78
## Columns: 16
## $ region <chr> "Республика Адыгея", "Алтайский край", "Амурская область", "…
## $ GRPL_2018 <dbl> 200763.6, 191088.3, 310611.9, 544107.3, 361074.8, 309524.4, …
## $ InvL_2018 <dbl> 0.1899902, 0.1610921, 0.6433209, 0.2838402, 0.3313535, 0.187…
## $ FDIL_2018 <dbl> 0.0244560199, 0.0048135624, 0.0918287926, 0.0344199371, 0.00…
## $ DivL_2018 <dbl> 0.23205912, 1.89692465, 0.14187907, 0.41500809, 1.91762571, …
## $ EML_2018 <dbl> 3.4829567, 1.6372636, 3.9298785, 3.3955224, 1.6257074, 1.760…
## $ IML_2018 <dbl> 21.435542, 18.728317, 23.504197, 31.265937, 19.608514, 36.56…
## $ Edu_2018 <dbl> 291, 216, 191, 162, 294, 248, 308, 211, 227, 266, 281, 550, …
## $ GRP_2018 <dbl> 210326.8, 196543.3, 333585.8, 603355.3, 455456.1, 347702.7, …
## $ Inv_2018 <dbl> 0.2550824, 0.1848265, 0.7516793, 0.2291233, 0.1903964, 0.154…
## $ FDI_2018 <dbl> 0.001042692, 0.007551503, 0.053901555, 0.002167261, 0.003455…
## $ Pop_2018 <dbl> 1.0105704, 0.9929956, 0.9947223, 0.9919102, 0.9941918, 0.996…
## $ Imp_2018 <dbl> 1400723790, 6951216909, 4525812109, 6904760397, 26005271280,…
## $ EM_2018 <dbl> 3.3816654, 1.5766154, 3.6571892, 3.2307966, 1.6123266, 1.692…
## $ IM_2018 <dbl> 16.9659373, 20.4793791, 32.6429345, 30.0139353, 27.2501188, …
## $ Div_2018 <dbl> 0.24349315, 2.33436715, 0.28093926, 0.48234165, 2.91849684, …
proj4string(Russia_shape_2018) <- CRS("+init=epsg:4326")
Russia.centroids<-gCentroid(Russia_shape_2018,byid=TRUE)
weights <- knn2nb(knearneigh(Russia.centroids, k=8,longlat = TRUE),row.names=Russia_shape_2018$region)
Russia.weights <- nb2listw(weights)
#H0 - variable is randomly distributed (no-autocrrelation)
columns = names(Russia_shape_2018@data)[2:length(names(Russia_shape_2018@data))]
Russia_sp_dat <- Russia_shape_2018@data
for(i in columns){
res <- moran.test(Russia_sp_dat[[i]], Russia.weights)
# print(res)
if(res$p.value < 0.05){
print(paste(i,': Spatial correlation detected'))
}
}
## [1] "GRPL_2018 : Spatial correlation detected"
## [1] "InvL_2018 : Spatial correlation detected"
## [1] "FDIL_2018 : Spatial correlation detected"
## [1] "EML_2018 : Spatial correlation detected"
## [1] "IML_2018 : Spatial correlation detected"
## [1] "GRP_2018 : Spatial correlation detected"
## [1] "Inv_2018 : Spatial correlation detected"
## [1] "Pop_2018 : Spatial correlation detected"
## [1] "Imp_2018 : Spatial correlation detected"
## [1] "EM_2018 : Spatial correlation detected"
## [1] "IM_2018 : Spatial correlation detected"
# 6/9 variables appear to be spatially correlated (lags are not important for this analysis - they will be used only later in dynamic models)
# GRP_2018, EM_2018, IM_2018, Inv_2018, Pop_2018, Imp_2018. Two of those - two independent variables, and 1- depedendent one.
for (g in columns){
moran.plot(Russia_shape_2018@data[[g]], Russia.weights, xlab=g)
}
#graphically, all results of tests are correct
attach(Russia_shape_2018@data) #strange ritual
reg_Theil_2018 <- lm(Theil_2018, data = Russia_shape_2018@data)
summary(reg_Theil_2018) #%>% xtable()
##
## Call:
## lm(formula = Theil_2018, data = Russia_shape_2018@data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5639 -0.2595 -0.1242 0.1738 1.4420
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.1016926 8.5683957 1.762 0.082291 .
## Div_2018 -0.0145501 0.0318961 -0.456 0.649659
## Edu_2018 0.0002724 0.0006671 0.408 0.684314
## Pop_2018 -6.8542134 8.4648465 -0.810 0.420803
## Inv_2018 0.5133863 0.6299200 0.815 0.417796
## FDI_2018 2.0064298 0.9104057 2.204 0.030776 *
## log(Imp_2018) 0.1856968 0.0533985 3.478 0.000868 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4418 on 71 degrees of freedom
## Multiple R-squared: 0.3548, Adjusted R-squared: 0.3002
## F-statistic: 6.506 on 6 and 71 DF, p-value: 1.673e-05
#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Theil_2018,Russia.weights)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## Moran I statistic standard deviate = 4.371, p-value = 6.183e-06
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.191888053 -0.017918857 0.002303925
moran.test(reg_Theil_2018$residuals, Russia.weights)
##
## Moran I test under randomisation
##
## data: reg_Theil_2018$residuals
## weights: Russia.weights
##
## Moran I statistic standard deviate = 4.1832, p-value = 1.437e-05
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.191888053 -0.012987013 0.002398565
moran.plot(reg_Theil_2018$residuals, Russia.weights)
# residuals are correlated according to the both test and graph
lm.LMtests(reg_Theil_2018, listw = Russia.weights, test="all", zero.policy=TRUE)
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## LMerr = 12.94, df = 1, p-value = 0.0003217
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## LMlag = 10.612, df = 1, p-value = 0.001123
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## RLMerr = 2.7102, df = 1, p-value = 0.09971
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## RLMlag = 0.38264, df = 1, p-value = 0.5362
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## SARMA = 13.322, df = 2, p-value = 0.00128
# Analitics
# LMerr = 12.94, df = 1, p-value = 0.0003217 => significant
# LMlag = 10.612, df = 1, p-value = 0.001123 => significant
# conclusion => moving to Robust LM Diagnostics
# RLMerr = 2.7102, df = 1, p-value = 0.09971
# RLMlag = 0.38264, df = 1, p-value = 0.5362
# p-value: RLMerr < RLMlag => RLMerr is robust
# #SEM - spatial error model - we need it in our case (RLMerr is robust)
# y = X beta + rho W1 y + u
# u = lambda W2 u + e
# SAR - spatial autoregressive model
# y = X beta + rho W1 y + u
#building an actual model
SEM_Theil_2018 <- errorsarlm(Theil_2018, listw = Russia.weights, zero.policy=TRUE)
sum_Theil_2018 <- summary(SEM_Theil_2018)
# sum_Theil_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])
sum_Theil_2018$Coef[, c(1,4)]
## Estimate Pr(>|z|)
## (Intercept) 6.7718256871 0.3809453767
## Div_2018 -0.0102227293 0.6958316415
## Edu_2018 0.0003887803 0.4827255754
## Pop_2018 1.8851380482 0.8093991903
## Inv_2018 0.0202629801 0.9703711832
## FDI_2018 1.4514549611 0.0562834950
## log(Imp_2018) 0.1762802040 0.0001474848
#comparing our spatial model with the original one
AIC(SEM_Theil_2018)
## [1] 92.96259
AIC(reg_Theil_2018)
## [1] 102.5796
logLik(SEM_Theil_2018)
## 'log Lik.' -37.4813 (df=9)
logLik(reg_Theil_2018)
## 'log Lik.' -43.28981 (df=8)
#AIC - the lower, the better the model
#log-likelihood - the higher, the better the model
#SEM is better than OLS in this case
attach(Russia_shape_2018@data) #strange ritual
## Следующие объекты скрыты от Russia_shape_2018@data (pos = 3):
##
## Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
## FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
## Inv_2018, InvL_2018, Pop_2018, region
reg_Margins_2018 <- lm(Margins_2018, data = Russia_shape_2018@data)
summary(reg_Margins_2018) #%>% xtable()
##
## Call:
## lm(formula = Margins_2018, data = Russia_shape_2018@data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62690 -0.17128 -0.02921 0.14315 0.98139
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.800e+01 5.802e+00 3.103 0.00277 **
## EM_2018 2.188e-01 4.664e-02 4.690 1.31e-05 ***
## IM_2018 1.800e-02 4.020e-03 4.477 2.86e-05 ***
## Edu_2018 6.504e-04 4.581e-04 1.420 0.16013
## Pop_2018 -1.289e+01 5.993e+00 -2.150 0.03500 *
## Inv_2018 -1.231e-01 4.693e-01 -0.262 0.79385
## FDI_2018 7.532e-01 6.751e-01 1.116 0.26838
## log(Imp_2018) 2.787e-01 5.170e-02 5.390 8.99e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3276 on 70 degrees of freedom
## Multiple R-squared: 0.6502, Adjusted R-squared: 0.6152
## F-statistic: 18.58 on 7 and 70 DF, p-value: 9.456e-14
#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Margins_2018,Russia.weights)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## Moran I statistic standard deviate = 1.9185, p-value = 0.02752
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.063190983 -0.024065263 0.002068579
moran.test(reg_Margins_2018$residuals, Russia.weights)
##
## Moran I test under randomisation
##
## data: reg_Margins_2018$residuals
## weights: Russia.weights
##
## Moran I statistic standard deviate = 1.5356, p-value = 0.06232
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.063190983 -0.012987013 0.002461049
moran.plot(reg_Margins_2018$residuals, Russia.weights)
# residuals are correlated according to the both test and graph
lm.LMtests(reg_Margins_2018, listw = Russia.weights, test="all", zero.policy=TRUE)
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## LMerr = 1.4033, df = 1, p-value = 0.2362
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## LMlag = 11.087, df = 1, p-value = 0.0008693
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## RLMerr = 1.9221, df = 1, p-value = 0.1656
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## RLMlag = 11.606, df = 1, p-value = 0.0006574
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
##
## SARMA = 13.009, df = 2, p-value = 0.001497
# Analitics
# LMerr = 1.4033, df = 1, p-value = 0.2362 => insignificant
# LMlag = 11.087, df = 1, p-value = 0.0008693 => significant
# conclusion => SAR
# SEM - spatial error model
# y = X beta + rho W1 y + u
# u = lambda W2 u + e
# SAR - spatial autoregressive model - we need it in our case (LMlag is significant)
# y = X beta + rho W1 y + u
#building an actual model
SAR_Margins_2018 <- lagsarlm(Margins_2018, listw = Russia.weights, zero.policy=TRUE)
sum_Margins_2018 <- summary(SAR_Margins_2018)
# sum_Margins_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])
sum_Margins_2018$Coef[, c(1,4)]
## Estimate Pr(>|z|)
## (Intercept) 6.6395153792 2.714358e-01
## EM_2018 0.1999276688 1.276680e-06
## IM_2018 0.0172481103 1.481835e-06
## Edu_2018 0.0007743503 5.586459e-02
## Pop_2018 -6.0189626785 2.831617e-01
## Inv_2018 -0.2512608378 5.463222e-01
## FDI_2018 0.5045494346 4.011729e-01
## log(Imp_2018) 0.2509808088 4.315525e-08
#comparing our spatial model with the original one
AIC(SAR_Margins_2018)
## [1] 49.57632
AIC(reg_Margins_2018)
## [1] 56.83275
logLik(SAR_Margins_2018)
## 'log Lik.' -14.78816 (df=10)
logLik(reg_Margins_2018)
## 'log Lik.' -19.41637 (df=9)
#AIC - the lower, the better the model
#log-likelihood - the higher, the better the model
#SAR is better than OLS in this case
stargazer(SEM_Theil_2018, SAR_Margins_2018, type = "text", omit = c('Constant'), keep.stat = c("n"), title = 'Cross-sectional Spatial models',
add.lines = list(c('model', 'SEM', 'SAR')), header=FALSE, dep.var.labels = NULL,
model.names = FALSE) #ci = T, font.size = "tiny",
##
## Cross-sectional Spatial models
## ==========================================
## Dependent variable:
## ----------------------------
## log(GRP_2018)
## (1) (2)
## ------------------------------------------
## Div_2018 -0.010
## (0.026)
##
## EM_2018 0.200***
## (0.041)
##
## IM_2018 0.017***
## (0.004)
##
## Edu_2018 0.0004 0.001*
## (0.001) (0.0004)
##
## Pop_2018 1.885 -6.019
## (7.816) (5.608)
##
## Inv_2018 0.020 -0.251
## (0.546) (0.416)
##
## FDI_2018 1.451* 0.505
## (0.760) (0.601)
##
## log(Imp_2018) 0.176*** 0.251***
## (0.046) (0.046)
##
## ------------------------------------------
## model SEM SAR
## Observations 78 78
## ==========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#downloading spatial cross-sectional data
Russia_shape_2018_robust = readOGR(
'C:/Users/Kolya/Documents/New_studies/Diploma/Geo/Data_geo/Rus_regions_2018_robust.shp')
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\Kolya\Documents\New_studies\Diploma\Geo\Data_geo\Rus_regions_2018_robust.shp", layer: "Rus_regions_2018_robust"
## with 76 features
## It has 16 fields
## Integer64 fields read as strings: Edu_2018
# change columns_robust type (which was changed unintentionally by QGIS)
Russia_shape_2018_robust$Edu_2018 <- as.numeric(Russia_shape_2018_robust$Edu_2018 )
#Descriptive statistics
Russia_shape_2018_robust@data %>% glimpse()
## Rows: 76
## Columns: 16
## $ region <chr> "Республика Адыгея", "Алтайский край", "Амурская область", "…
## $ GRPL_2018 <dbl> 200763.6, 191088.3, 310611.9, 544107.3, 361074.8, 309524.4, …
## $ InvL_2018 <dbl> 0.1899902, 0.1610921, 0.6433209, 0.2838402, 0.3313535, 0.187…
## $ FDIL_2018 <dbl> 0.0244560199, 0.0048135624, 0.0918287926, 0.0344199371, 0.00…
## $ DivL_2018 <dbl> 0.23205912, 1.89692465, 0.14187907, 0.41500809, 1.91762571, …
## $ EML_2018 <dbl> 3.482957, 1.637264, 3.929879, 3.395522, 1.625707, 1.760371, …
## $ IML_2018 <dbl> 21.435542, 18.728317, 23.504197, 31.265937, 19.608514, 36.56…
## $ Edu_2018 <dbl> 291, 216, 191, 162, 294, 248, 308, 211, 227, 266, 281, 171, …
## $ GRP_2018 <dbl> 210326.8, 196543.3, 333585.8, 603355.3, 455456.1, 347702.7, …
## $ Inv_2018 <dbl> 0.2550824, 0.1848265, 0.7516793, 0.2291233, 0.1903964, 0.154…
## $ FDI_2018 <dbl> 0.001042692, 0.007551503, 0.053901555, 0.002167261, 0.003455…
## $ Pop_2018 <dbl> 1.0105704, 0.9929956, 0.9947223, 0.9919102, 0.9941918, 0.996…
## $ Imp_2018 <dbl> 1400723790, 6951216909, 4525812109, 6904760397, 26005271280,…
## $ EM_2018 <dbl> 3.381665, 1.576615, 3.657189, 3.230797, 1.612327, 1.692044, …
## $ IM_2018 <dbl> 16.9659373, 20.4793791, 32.6429345, 30.0139353, 27.2501188, …
## $ Div_2018 <dbl> 0.24349315, 2.33436715, 0.28093926, 0.48234165, 2.91849684, …
proj4string(Russia_shape_2018_robust) <- CRS("+init=epsg:4326")
Russia.centroids_robust<-gCentroid(Russia_shape_2018_robust,byid=TRUE)
weights_robust <- knn2nb(knearneigh(Russia.centroids_robust, k=8,longlat = TRUE),row.names=Russia_shape_2018_robust$region)
Russia.weights_robust <- nb2listw(weights_robust)
#H0 - variable is randomly distributed (no-autocrrelation)
columns_robust = names(Russia_shape_2018_robust@data)[2:length(names(Russia_shape_2018_robust@data))]
Russia_sp_dat <- Russia_shape_2018_robust@data
for(i in columns_robust){
res <- moran.test(Russia_sp_dat[[i]], Russia.weights_robust)
# print(res)
if(res$p.value < 0.05){
print(paste(i,': Spatial correlation detected'))
}
}
## [1] "GRPL_2018 : Spatial correlation detected"
## [1] "InvL_2018 : Spatial correlation detected"
## [1] "FDIL_2018 : Spatial correlation detected"
## [1] "DivL_2018 : Spatial correlation detected"
## [1] "EML_2018 : Spatial correlation detected"
## [1] "IML_2018 : Spatial correlation detected"
## [1] "Edu_2018 : Spatial correlation detected"
## [1] "GRP_2018 : Spatial correlation detected"
## [1] "Inv_2018 : Spatial correlation detected"
## [1] "Pop_2018 : Spatial correlation detected"
## [1] "EM_2018 : Spatial correlation detected"
## [1] "IM_2018 : Spatial correlation detected"
# 6/9 variables appear to be spatially correlated
#GRP_2018, EM_2018, IM_2018, Edu_2018, Inv_2018, Pop_2018. Two of those - two independent variables, and 1- depedendent one.
for (g in columns_robust){
moran.plot(Russia_shape_2018_robust@data[[g]], Russia.weights_robust, xlab=g)
}
#graphically, all results of tests are correct
attach(Russia_shape_2018_robust@data) #strange ritual
## Следующие объекты скрыты от Russia_shape_2018@data (pos = 3):
##
## Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
## FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
## Inv_2018, InvL_2018, Pop_2018, region
## Следующие объекты скрыты от Russia_shape_2018@data (pos = 4):
##
## Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
## FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
## Inv_2018, InvL_2018, Pop_2018, region
reg_Theil_robust_2018 <- lm(Theil_2018, data = Russia_shape_2018_robust@data)
summary(reg_Theil_robust_2018) #%>% xtable()
##
## Call:
## lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6064 -0.2549 -0.1139 0.1627 1.4397
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.440e+01 8.658e+00 1.663 0.100886
## Div_2018 -4.504e-02 4.120e-02 -1.093 0.278054
## Edu_2018 -6.517e-05 7.342e-04 -0.089 0.929535
## Pop_2018 -6.586e+00 8.524e+00 -0.773 0.442366
## Inv_2018 4.327e-01 6.363e-01 0.680 0.498737
## FDI_2018 1.621e+00 9.711e-01 1.670 0.099531 .
## log(Imp_2018) 2.121e-01 5.838e-02 3.633 0.000535 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4436 on 69 degrees of freedom
## Multiple R-squared: 0.3039, Adjusted R-squared: 0.2434
## F-statistic: 5.021 on 6 and 69 DF, p-value: 0.0002541
#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Theil_robust_2018,Russia.weights_robust)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## Moran I statistic standard deviate = 4.9636, p-value = 3.459e-07
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.216739733 -0.020910814 0.002292331
moran.test(reg_Theil_robust_2018$residuals, Russia.weights_robust)
##
## Moran I test under randomisation
##
## data: reg_Theil_robust_2018$residuals
## weights: Russia.weights_robust
##
## Moran I statistic standard deviate = 4.636, p-value = 1.776e-06
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.216739733 -0.013333333 0.002462886
moran.plot(reg_Theil_robust_2018$residuals, Russia.weights_robust)
# residuals are correlated according to the both test and graph
lm.LMtests(reg_Theil_robust_2018, listw = Russia.weights_robust, test="all", zero.policy=TRUE)
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## LMerr = 16.02, df = 1, p-value = 6.269e-05
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## LMlag = 14.836, df = 1, p-value = 0.0001173
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## RLMerr = 1.9779, df = 1, p-value = 0.1596
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## RLMlag = 0.794, df = 1, p-value = 0.3729
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## SARMA = 16.814, df = 2, p-value = 0.0002233
# Analitics
# LMerr = 16.02, df = 1, p-value = 6.269e-05 => significant
# LMlag = 14.836, df = 1, p-value = 0.0001173 => significant
# conclusion => moving to Robust LM Diagnostics
# RLMerr = 1.9779, df = 1, p-value = 0.1596
# RLMlag = 0.794, df = 1, p-value = 0.3729
# p-value: RLMerr < RLMlag => RLMerr is robust
# #SEM - spatial error model - we need it in our case (RLMerr is robust)
# y = X beta + rho W1 y + u
# u = lambda W2 u + e
# SAR - spatial autoregressive model
# y = X beta + rho W1 y + u
#building an actual model
SEM_Theil_robust_2018 <- errorsarlm(Theil_2018, listw = Russia.weights_robust, zero.policy=TRUE)
sum_Theil_robust_2018 <- summary(SEM_Theil_robust_2018)
# sum_Theil_robust_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])
sum_Theil_robust_2018$Coef[, c(1,4)]
## Estimate Pr(>|z|)
## (Intercept) 6.261629e+00 4.168137e-01
## Div_2018 -5.319917e-02 1.325870e-01
## Edu_2018 -8.200708e-05 8.974239e-01
## Pop_2018 1.649626e+00 8.322620e-01
## Inv_2018 -2.110823e-02 9.688815e-01
## FDI_2018 1.099042e+00 1.607322e-01
## log(Imp_2018) 2.181055e-01 2.714074e-05
#comparing our spatial model with the original one
AIC(SEM_Theil_robust_2018)
## [1] 90.16144
AIC(reg_Theil_robust_2018)
## [1] 100.7762
logLik(SEM_Theil_robust_2018)
## 'log Lik.' -36.08072 (df=9)
logLik(reg_Theil_robust_2018)
## 'log Lik.' -42.38809 (df=8)
#AIC - the lower, the better the model
#log-likelihood - the higher, the better the model
#SAR is better than OLS in this case
attach(Russia_shape_2018_robust@data) #strange ritual
## Следующие объекты скрыты от Russia_shape_2018_robust@data (pos = 3):
##
## Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
## FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
## Inv_2018, InvL_2018, Pop_2018, region
## Следующие объекты скрыты от Russia_shape_2018@data (pos = 4):
##
## Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
## FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
## Inv_2018, InvL_2018, Pop_2018, region
## Следующие объекты скрыты от Russia_shape_2018@data (pos = 5):
##
## Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
## FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
## Inv_2018, InvL_2018, Pop_2018, region
reg_Margins_robust_2018 <- lm(Margins_2018, data = Russia_shape_2018_robust@data)
summary(reg_Margins_robust_2018) #%>% xtable()
##
## Call:
## lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62733 -0.17678 -0.04112 0.15177 0.98194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.848e+01 6.050e+00 3.054 0.00322 **
## EM_2018 2.155e-01 4.829e-02 4.462 3.13e-05 ***
## IM_2018 1.817e-02 4.107e-03 4.424 3.59e-05 ***
## Edu_2018 5.480e-04 5.554e-04 0.987 0.32730
## Pop_2018 -1.324e+01 6.166e+00 -2.147 0.03534 *
## Inv_2018 -1.140e-01 4.772e-01 -0.239 0.81200
## FDI_2018 6.634e-01 7.403e-01 0.896 0.37335
## log(Imp_2018) 2.745e-01 5.386e-02 5.096 2.96e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3321 on 68 degrees of freedom
## Multiple R-squared: 0.6154, Adjusted R-squared: 0.5758
## F-statistic: 15.54 on 7 and 68 DF, p-value: 5.544e-12
#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Margins_robust_2018,Russia.weights_robust)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## Moran I statistic standard deviate = 1.9215, p-value = 0.02734
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 0.062773875 -0.025356811 0.002103697
moran.test(reg_Margins_robust_2018$residuals, Russia.weights_robust)
##
## Moran I test under randomisation
##
## data: reg_Margins_robust_2018$residuals
## weights: Russia.weights_robust
##
## Moran I statistic standard deviate = 1.5129, p-value = 0.06515
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.062773875 -0.013333333 0.002530626
moran.plot(reg_Margins_robust_2018$residuals, Russia.weights_robust)
# residuals are correlated according to the both test and graph
lm.LMtests(reg_Margins_robust_2018, listw = Russia.weights_robust, test="all", zero.policy=TRUE)
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## LMerr = 1.3438, df = 1, p-value = 0.2464
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## LMlag = 10.807, df = 1, p-value = 0.001011
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## RLMerr = 2.1706, df = 1, p-value = 0.1407
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## RLMlag = 11.634, df = 1, p-value = 0.0006476
##
##
## Lagrange multiplier diagnostics for spatial dependence
##
## data:
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
##
## SARMA = 12.978, df = 2, p-value = 0.00152
# Analitics
# LMerr = 1.3438, df = 1, p-value = 0.2464 => insignificant
# LMlag = 10.807, df = 1, p-value = 0.001011 => significant
# conclusion => SAR
# #SEM - spatial error model
# y = X beta + rho W1 y + u
# u = lambda W2 u + e
# SAR - spatial autoregressive model - we need it in our case
# y = X beta + rho W1 y + u
#building an actual model
SAR_Margins_robust_2018 <- lagsarlm(Margins_2018, listw = Russia.weights_robust, zero.policy=TRUE)
sum_Margins_robust_2018 <- summary(SAR_Margins_robust_2018)
# sum_Margins_robust_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])
sum_Margins_robust_2018$Coef[, c(1,4)]
## Estimate Pr(>|z|)
## (Intercept) 7.0693883156 2.545338e-01
## EM_2018 0.1963046425 4.260337e-06
## IM_2018 0.0164624395 8.667365e-06
## Edu_2018 0.0005853796 2.326976e-01
## Pop_2018 -6.5002580292 2.556823e-01
## Inv_2018 -0.2245906605 5.949596e-01
## FDI_2018 0.5043277554 4.411717e-01
## log(Imp_2018) 0.2536240230 9.598595e-08
#comparing our spatial model with the original one
AIC(SAR_Margins_robust_2018)
## [1] 50.71042
AIC(reg_Margins_robust_2018)
## [1] 57.68435
logLik(SAR_Margins_robust_2018)
## 'log Lik.' -15.35521 (df=10)
logLik(reg_Margins_robust_2018)
## 'log Lik.' -19.84217 (df=9)
#AIC - the lower, the better the model
#log-likelihood - the higher, the better the model
#SAR is better than OLS in this case
stargazer(SEM_Theil_robust_2018, SAR_Margins_robust_2018, type = "text", omit = c('Constant'), keep.stat = c("n"), title = 'Cross-sectional Spatial models',
add.lines = list(c('model', 'SEM', 'SAR')), header=FALSE, dep.var.labels = NULL,
model.names = FALSE) #ci = T, font.size = "tiny",
##
## Cross-sectional Spatial models
## ==========================================
## Dependent variable:
## ----------------------------
## log(GRP_2018)
## (1) (2)
## ------------------------------------------
## Div_2018 -0.053
## (0.035)
##
## EM_2018 0.196***
## (0.043)
##
## IM_2018 0.016***
## (0.004)
##
## Edu_2018 -0.0001 0.001
## (0.001) (0.0005)
##
## Pop_2018 1.650 -6.500
## (7.789) (5.719)
##
## Inv_2018 -0.021 -0.225
## (0.541) (0.422)
##
## FDI_2018 1.099 0.504
## (0.784) (0.655)
##
## log(Imp_2018) 0.218*** 0.254***
## (0.052) (0.048)
##
## ------------------------------------------
## model SEM SAR
## Observations 76 76
## ==========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(SEM_Theil_2018, SEM_Theil_robust_2018,
SAR_Margins_2018, SAR_Margins_robust_2018,
type = "text", omit = c('Constant'), keep.stat = c("n"), title = 'Cross-sectional Spatial models',
add.lines = list(c('model', 'SEM', 'SEM', 'SAR', 'SAR')), header=FALSE, dep.var.labels = NULL,
model.names = FALSE, digits = 4) #ci = T, font.size = "tiny",
##
## Cross-sectional Spatial models
## =====================================================
## Dependent variable:
## ---------------------------------------
## log(GRP_2018)
## (1) (2) (3) (4)
## -----------------------------------------------------
## Div_2018 -0.0102 -0.0532
## (0.0261) (0.0354)
##
## EM_2018 0.1999*** 0.1963***
## (0.0413) (0.0427)
##
## IM_2018 0.0172*** 0.0165***
## (0.0036) (0.0037)
##
## Edu_2018 0.0004 -0.0001 0.0008* 0.0006
## (0.0006) (0.0006) (0.0004) (0.0005)
##
## Pop_2018 1.8851 1.6496 -6.0190 -6.5003
## (7.8156) (7.7886) (5.6082) (5.7188)
##
## Inv_2018 0.0203 -0.0211 -0.2513 -0.2246
## (0.5455) (0.5411) (0.4165) (0.4224)
##
## FDI_2018 1.4515* 1.0990 0.5045 0.5043
## (0.7604) (0.7836) (0.6010) (0.6548)
##
## log(Imp_2018) 0.1763*** 0.2181*** 0.2510*** 0.2536***
## (0.0464) (0.0520) (0.0458) (0.0475)
##
## -----------------------------------------------------
## model SEM SEM SAR SAR
## Observations 78 76 78 76
## =====================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Pretty robust results (betas of independent variables).