The following research is conducted by Maria R. Koldasheva and Nikolai A. Popov.

Libraries

library(tidyverse)
library(dplyr)
library(stargazer)
# geo
library(sp)
library(spdep)
library(rgdal)
library(maptools)
library(rgeos)
library(sf)
library(spatialreg)

# mass retype()
library(hablar)

# multicollinearity test omcdiag imcdiag
library(mctest)

library(GGally)

#tables
library(xtable)
library(texreg)

Cross-sectional specifications

# Theil index specification
Theil_2018 <- log(GRP_2018) ~ Div_2018 +  Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)

# WM and IM specification
Margins_2018 <- log(GRP_2018) ~ EM_2018 + IM_2018 + Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)

Spatial Analysis

Spatial data

#downloading spatial cross-sectional data
Russia_shape_2018 = readOGR(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Geo/Data_geo/Rus_regions_2018.shp')
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Kolya\Documents\New_studies\Diploma\Geo\Data_geo\Rus_regions_2018.shp", layer: "Rus_regions_2018"
## with 78 features
## It has 16 fields
## Integer64 fields read as strings:  Edu_2018

Data Transformation

# change columns type (which was changed unintentionally by QGIS)
Russia_shape_2018$Edu_2018  <- as.numeric(Russia_shape_2018$Edu_2018 )

#Descriptive statistics
Russia_shape_2018@data %>% glimpse()
## Rows: 78
## Columns: 16
## $ region    <chr> "Республика Адыгея", "Алтайский край", "Амурская область", "…
## $ GRPL_2018 <dbl> 200763.6, 191088.3, 310611.9, 544107.3, 361074.8, 309524.4, …
## $ InvL_2018 <dbl> 0.1899902, 0.1610921, 0.6433209, 0.2838402, 0.3313535, 0.187…
## $ FDIL_2018 <dbl> 0.0244560199, 0.0048135624, 0.0918287926, 0.0344199371, 0.00…
## $ DivL_2018 <dbl> 0.23205912, 1.89692465, 0.14187907, 0.41500809, 1.91762571, …
## $ EML_2018  <dbl> 3.4829567, 1.6372636, 3.9298785, 3.3955224, 1.6257074, 1.760…
## $ IML_2018  <dbl> 21.435542, 18.728317, 23.504197, 31.265937, 19.608514, 36.56…
## $ Edu_2018  <dbl> 291, 216, 191, 162, 294, 248, 308, 211, 227, 266, 281, 550, …
## $ GRP_2018  <dbl> 210326.8, 196543.3, 333585.8, 603355.3, 455456.1, 347702.7, …
## $ Inv_2018  <dbl> 0.2550824, 0.1848265, 0.7516793, 0.2291233, 0.1903964, 0.154…
## $ FDI_2018  <dbl> 0.001042692, 0.007551503, 0.053901555, 0.002167261, 0.003455…
## $ Pop_2018  <dbl> 1.0105704, 0.9929956, 0.9947223, 0.9919102, 0.9941918, 0.996…
## $ Imp_2018  <dbl> 1400723790, 6951216909, 4525812109, 6904760397, 26005271280,…
## $ EM_2018   <dbl> 3.3816654, 1.5766154, 3.6571892, 3.2307966, 1.6123266, 1.692…
## $ IM_2018   <dbl> 16.9659373, 20.4793791, 32.6429345, 30.0139353, 27.2501188, …
## $ Div_2018  <dbl> 0.24349315, 2.33436715, 0.28093926, 0.48234165, 2.91849684, …

Projections and neighbors

proj4string(Russia_shape_2018) <- CRS("+init=epsg:4326")

Russia.centroids<-gCentroid(Russia_shape_2018,byid=TRUE)

weights <- knn2nb(knearneigh(Russia.centroids, k=8,longlat = TRUE),row.names=Russia_shape_2018$region)
Russia.weights <- nb2listw(weights)

Spatial correlation: Moran tests

#H0 - variable is randomly distributed (no-autocrrelation)
columns  = names(Russia_shape_2018@data)[2:length(names(Russia_shape_2018@data))]
Russia_sp_dat <- Russia_shape_2018@data
for(i in columns){
    res <- moran.test(Russia_sp_dat[[i]], Russia.weights)
    # print(res)
    if(res$p.value < 0.05){
      print(paste(i,': Spatial correlation detected'))
    }
}
## [1] "GRPL_2018 : Spatial correlation detected"
## [1] "InvL_2018 : Spatial correlation detected"
## [1] "FDIL_2018 : Spatial correlation detected"
## [1] "EML_2018 : Spatial correlation detected"
## [1] "IML_2018 : Spatial correlation detected"
## [1] "GRP_2018 : Spatial correlation detected"
## [1] "Inv_2018 : Spatial correlation detected"
## [1] "Pop_2018 : Spatial correlation detected"
## [1] "Imp_2018 : Spatial correlation detected"
## [1] "EM_2018 : Spatial correlation detected"
## [1] "IM_2018 : Spatial correlation detected"
# 6/9 variables appear to be spatially correlated (lags are not important for this analysis - they will be used only later in dynamic models)
# GRP_2018, EM_2018, IM_2018, Inv_2018, Pop_2018, Imp_2018. Two of those - two independent variables, and 1- depedendent one.

Moran plots

for (g in columns){
  moran.plot(Russia_shape_2018@data[[g]], Russia.weights, xlab=g)
}

#graphically, all results of tests are correct

Naive spatial models, Theil specification

Global Moran test for residuals, Theil

attach(Russia_shape_2018@data) #strange ritual

reg_Theil_2018 <- lm(Theil_2018, data = Russia_shape_2018@data)
summary(reg_Theil_2018) #%>% xtable()
## 
## Call:
## lm(formula = Theil_2018, data = Russia_shape_2018@data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5639 -0.2595 -0.1242  0.1738  1.4420 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   15.1016926  8.5683957   1.762 0.082291 .  
## Div_2018      -0.0145501  0.0318961  -0.456 0.649659    
## Edu_2018       0.0002724  0.0006671   0.408 0.684314    
## Pop_2018      -6.8542134  8.4648465  -0.810 0.420803    
## Inv_2018       0.5133863  0.6299200   0.815 0.417796    
## FDI_2018       2.0064298  0.9104057   2.204 0.030776 *  
## log(Imp_2018)  0.1856968  0.0533985   3.478 0.000868 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4418 on 71 degrees of freedom
## Multiple R-squared:  0.3548, Adjusted R-squared:  0.3002 
## F-statistic: 6.506 on 6 and 71 DF,  p-value: 1.673e-05
#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Theil_2018,Russia.weights)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## Moran I statistic standard deviate = 4.371, p-value = 6.183e-06
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.191888053     -0.017918857      0.002303925
moran.test(reg_Theil_2018$residuals, Russia.weights)
## 
##  Moran I test under randomisation
## 
## data:  reg_Theil_2018$residuals  
## weights: Russia.weights    
## 
## Moran I statistic standard deviate = 4.1832, p-value = 1.437e-05
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.191888053      -0.012987013       0.002398565
moran.plot(reg_Theil_2018$residuals, Russia.weights)

# residuals are correlated according to the both test and graph

Lagrange multiplier test, Theil

lm.LMtests(reg_Theil_2018, listw = Russia.weights, test="all", zero.policy=TRUE)
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## LMerr = 12.94, df = 1, p-value = 0.0003217
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## LMlag = 10.612, df = 1, p-value = 0.001123
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## RLMerr = 2.7102, df = 1, p-value = 0.09971
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## RLMlag = 0.38264, df = 1, p-value = 0.5362
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## SARMA = 13.322, df = 2, p-value = 0.00128
# Analitics
# LMerr = 12.94, df = 1, p-value = 0.0003217 => significant
# LMlag = 10.612, df = 1, p-value = 0.001123 => significant
# conclusion => moving to Robust LM Diagnostics

# RLMerr = 2.7102, df = 1, p-value = 0.09971
# RLMlag = 0.38264, df = 1, p-value = 0.5362
# p-value: RLMerr < RLMlag => RLMerr is robust

# #SEM - spatial error model  - we need it in our case (RLMerr is robust)
# y = X beta + rho W1 y + u
# u = lambda W2 u + e

# SAR - spatial autoregressive model
# y = X beta + rho W1 y + u

SEM, Theil

#building an actual model
SEM_Theil_2018 <- errorsarlm(Theil_2018, listw = Russia.weights, zero.policy=TRUE)
sum_Theil_2018 <- summary(SEM_Theil_2018)

# sum_Theil_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])

sum_Theil_2018$Coef[, c(1,4)]
##                    Estimate     Pr(>|z|)
## (Intercept)    6.7718256871 0.3809453767
## Div_2018      -0.0102227293 0.6958316415
## Edu_2018       0.0003887803 0.4827255754
## Pop_2018       1.8851380482 0.8093991903
## Inv_2018       0.0202629801 0.9703711832
## FDI_2018       1.4514549611 0.0562834950
## log(Imp_2018)  0.1762802040 0.0001474848
#comparing our spatial model with the original one

AIC(SEM_Theil_2018)
## [1] 92.96259
AIC(reg_Theil_2018)
## [1] 102.5796
logLik(SEM_Theil_2018)
## 'log Lik.' -37.4813 (df=9)
logLik(reg_Theil_2018)
## 'log Lik.' -43.28981 (df=8)
#AIC - the lower, the better the model
#log-likelihood  - the higher, the better the model

#SEM is better than OLS in this case

Naive spatial models, Margins specification

Global Moran test for residuals, Margins

attach(Russia_shape_2018@data) #strange ritual
## Следующие объекты скрыты от Russia_shape_2018@data (pos = 3):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region
reg_Margins_2018 <- lm(Margins_2018, data = Russia_shape_2018@data)
summary(reg_Margins_2018) #%>% xtable()
## 
## Call:
## lm(formula = Margins_2018, data = Russia_shape_2018@data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62690 -0.17128 -0.02921  0.14315  0.98139 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.800e+01  5.802e+00   3.103  0.00277 ** 
## EM_2018        2.188e-01  4.664e-02   4.690 1.31e-05 ***
## IM_2018        1.800e-02  4.020e-03   4.477 2.86e-05 ***
## Edu_2018       6.504e-04  4.581e-04   1.420  0.16013    
## Pop_2018      -1.289e+01  5.993e+00  -2.150  0.03500 *  
## Inv_2018      -1.231e-01  4.693e-01  -0.262  0.79385    
## FDI_2018       7.532e-01  6.751e-01   1.116  0.26838    
## log(Imp_2018)  2.787e-01  5.170e-02   5.390 8.99e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3276 on 70 degrees of freedom
## Multiple R-squared:  0.6502, Adjusted R-squared:  0.6152 
## F-statistic: 18.58 on 7 and 70 DF,  p-value: 9.456e-14
#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Margins_2018,Russia.weights)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## Moran I statistic standard deviate = 1.9185, p-value = 0.02752
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.063190983     -0.024065263      0.002068579
moran.test(reg_Margins_2018$residuals, Russia.weights)
## 
##  Moran I test under randomisation
## 
## data:  reg_Margins_2018$residuals  
## weights: Russia.weights    
## 
## Moran I statistic standard deviate = 1.5356, p-value = 0.06232
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.063190983      -0.012987013       0.002461049
moran.plot(reg_Margins_2018$residuals, Russia.weights)

# residuals are correlated according to the both test and graph

Lagrange multiplier test, Margins

lm.LMtests(reg_Margins_2018, listw = Russia.weights, test="all", zero.policy=TRUE)
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## LMerr = 1.4033, df = 1, p-value = 0.2362
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## LMlag = 11.087, df = 1, p-value = 0.0008693
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## RLMerr = 1.9221, df = 1, p-value = 0.1656
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## RLMlag = 11.606, df = 1, p-value = 0.0006574
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## SARMA = 13.009, df = 2, p-value = 0.001497
# Analitics
# LMerr = 1.4033, df = 1, p-value = 0.2362 => insignificant
# LMlag = 11.087, df = 1, p-value = 0.0008693 => significant
# conclusion => SAR

# SEM - spatial error model
# y = X beta + rho W1 y + u
# u = lambda W2 u + e

# SAR - spatial autoregressive model - we need it in our case (LMlag is significant)
# y = X beta + rho W1 y + u

SAR, Margins

#building an actual model
SAR_Margins_2018 <- lagsarlm(Margins_2018, listw = Russia.weights, zero.policy=TRUE)
sum_Margins_2018 <- summary(SAR_Margins_2018)

# sum_Margins_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])

sum_Margins_2018$Coef[, c(1,4)]
##                    Estimate     Pr(>|z|)
## (Intercept)    6.6395153792 2.714358e-01
## EM_2018        0.1999276688 1.276680e-06
## IM_2018        0.0172481103 1.481835e-06
## Edu_2018       0.0007743503 5.586459e-02
## Pop_2018      -6.0189626785 2.831617e-01
## Inv_2018      -0.2512608378 5.463222e-01
## FDI_2018       0.5045494346 4.011729e-01
## log(Imp_2018)  0.2509808088 4.315525e-08
#comparing our spatial model with the original one

AIC(SAR_Margins_2018)
## [1] 49.57632
AIC(reg_Margins_2018)
## [1] 56.83275
logLik(SAR_Margins_2018)
## 'log Lik.' -14.78816 (df=10)
logLik(reg_Margins_2018)
## 'log Lik.' -19.41637 (df=9)
#AIC - the lower, the better the model
#log-likelihood  - the higher, the better the model

#SAR is better than OLS in this case

United table

stargazer(SEM_Theil_2018, SAR_Margins_2018, type = "text", omit = c('Constant'), keep.stat = c("n"), title = 'Cross-sectional Spatial models',
          
add.lines = list(c('model', 'SEM', 'SAR')), header=FALSE, dep.var.labels = NULL, 
model.names = FALSE) #ci = T, font.size = "tiny", 
## 
## Cross-sectional Spatial models
## ==========================================
##                   Dependent variable:     
##               ----------------------------
##                      log(GRP_2018)        
##                    (1)            (2)     
## ------------------------------------------
## Div_2018          -0.010                  
##                  (0.026)                  
##                                           
## EM_2018                        0.200***   
##                                 (0.041)   
##                                           
## IM_2018                        0.017***   
##                                 (0.004)   
##                                           
## Edu_2018          0.0004        0.001*    
##                  (0.001)       (0.0004)   
##                                           
## Pop_2018          1.885         -6.019    
##                  (7.816)        (5.608)   
##                                           
## Inv_2018          0.020         -0.251    
##                  (0.546)        (0.416)   
##                                           
## FDI_2018          1.451*         0.505    
##                  (0.760)        (0.601)   
##                                           
## log(Imp_2018)    0.176***      0.251***   
##                  (0.046)        (0.046)   
##                                           
## ------------------------------------------
## model              SEM            SAR     
## Observations        78            78      
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01

SAME CODE, BUT FOR ROBUST DATASET (NO MSK, SPB)

Spatial Analysis, robust

Spatial data

#downloading spatial cross-sectional data
Russia_shape_2018_robust = readOGR(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Geo/Data_geo/Rus_regions_2018_robust.shp')
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Kolya\Documents\New_studies\Diploma\Geo\Data_geo\Rus_regions_2018_robust.shp", layer: "Rus_regions_2018_robust"
## with 76 features
## It has 16 fields
## Integer64 fields read as strings:  Edu_2018

Data Transformation, robust

# change columns_robust type (which was changed unintentionally by QGIS)
Russia_shape_2018_robust$Edu_2018  <- as.numeric(Russia_shape_2018_robust$Edu_2018 )

#Descriptive statistics
Russia_shape_2018_robust@data %>% glimpse()
## Rows: 76
## Columns: 16
## $ region    <chr> "Республика Адыгея", "Алтайский край", "Амурская область", "…
## $ GRPL_2018 <dbl> 200763.6, 191088.3, 310611.9, 544107.3, 361074.8, 309524.4, …
## $ InvL_2018 <dbl> 0.1899902, 0.1610921, 0.6433209, 0.2838402, 0.3313535, 0.187…
## $ FDIL_2018 <dbl> 0.0244560199, 0.0048135624, 0.0918287926, 0.0344199371, 0.00…
## $ DivL_2018 <dbl> 0.23205912, 1.89692465, 0.14187907, 0.41500809, 1.91762571, …
## $ EML_2018  <dbl> 3.482957, 1.637264, 3.929879, 3.395522, 1.625707, 1.760371, …
## $ IML_2018  <dbl> 21.435542, 18.728317, 23.504197, 31.265937, 19.608514, 36.56…
## $ Edu_2018  <dbl> 291, 216, 191, 162, 294, 248, 308, 211, 227, 266, 281, 171, …
## $ GRP_2018  <dbl> 210326.8, 196543.3, 333585.8, 603355.3, 455456.1, 347702.7, …
## $ Inv_2018  <dbl> 0.2550824, 0.1848265, 0.7516793, 0.2291233, 0.1903964, 0.154…
## $ FDI_2018  <dbl> 0.001042692, 0.007551503, 0.053901555, 0.002167261, 0.003455…
## $ Pop_2018  <dbl> 1.0105704, 0.9929956, 0.9947223, 0.9919102, 0.9941918, 0.996…
## $ Imp_2018  <dbl> 1400723790, 6951216909, 4525812109, 6904760397, 26005271280,…
## $ EM_2018   <dbl> 3.381665, 1.576615, 3.657189, 3.230797, 1.612327, 1.692044, …
## $ IM_2018   <dbl> 16.9659373, 20.4793791, 32.6429345, 30.0139353, 27.2501188, …
## $ Div_2018  <dbl> 0.24349315, 2.33436715, 0.28093926, 0.48234165, 2.91849684, …

Projections and neighbors, robust

proj4string(Russia_shape_2018_robust) <- CRS("+init=epsg:4326")

Russia.centroids_robust<-gCentroid(Russia_shape_2018_robust,byid=TRUE)

weights_robust <- knn2nb(knearneigh(Russia.centroids_robust, k=8,longlat = TRUE),row.names=Russia_shape_2018_robust$region)
Russia.weights_robust <- nb2listw(weights_robust)

Spatial correlation: Moran tests, robust

#H0 - variable is randomly distributed (no-autocrrelation)
columns_robust  = names(Russia_shape_2018_robust@data)[2:length(names(Russia_shape_2018_robust@data))]
Russia_sp_dat <- Russia_shape_2018_robust@data
for(i in columns_robust){
    res <- moran.test(Russia_sp_dat[[i]], Russia.weights_robust)
    # print(res)
    if(res$p.value < 0.05){
      print(paste(i,': Spatial correlation detected'))
    }
}
## [1] "GRPL_2018 : Spatial correlation detected"
## [1] "InvL_2018 : Spatial correlation detected"
## [1] "FDIL_2018 : Spatial correlation detected"
## [1] "DivL_2018 : Spatial correlation detected"
## [1] "EML_2018 : Spatial correlation detected"
## [1] "IML_2018 : Spatial correlation detected"
## [1] "Edu_2018 : Spatial correlation detected"
## [1] "GRP_2018 : Spatial correlation detected"
## [1] "Inv_2018 : Spatial correlation detected"
## [1] "Pop_2018 : Spatial correlation detected"
## [1] "EM_2018 : Spatial correlation detected"
## [1] "IM_2018 : Spatial correlation detected"
# 6/9 variables appear to be spatially correlated
#GRP_2018, EM_2018, IM_2018, Edu_2018, Inv_2018, Pop_2018. Two of those - two independent variables, and 1- depedendent one.

Moran plots, robust

for (g in columns_robust){
  moran.plot(Russia_shape_2018_robust@data[[g]], Russia.weights_robust, xlab=g)
}

#graphically, all results of tests are correct

Naive spatial models, Theil specification, robust

Global Moran test for residuals, Theil, robust

attach(Russia_shape_2018_robust@data) #strange ritual
## Следующие объекты скрыты от Russia_shape_2018@data (pos = 3):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region
## Следующие объекты скрыты от Russia_shape_2018@data (pos = 4):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region
reg_Theil_robust_2018 <- lm(Theil_2018, data = Russia_shape_2018_robust@data)
summary(reg_Theil_robust_2018) #%>% xtable()
## 
## Call:
## lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6064 -0.2549 -0.1139  0.1627  1.4397 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.440e+01  8.658e+00   1.663 0.100886    
## Div_2018      -4.504e-02  4.120e-02  -1.093 0.278054    
## Edu_2018      -6.517e-05  7.342e-04  -0.089 0.929535    
## Pop_2018      -6.586e+00  8.524e+00  -0.773 0.442366    
## Inv_2018       4.327e-01  6.363e-01   0.680 0.498737    
## FDI_2018       1.621e+00  9.711e-01   1.670 0.099531 .  
## log(Imp_2018)  2.121e-01  5.838e-02   3.633 0.000535 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4436 on 69 degrees of freedom
## Multiple R-squared:  0.3039, Adjusted R-squared:  0.2434 
## F-statistic: 5.021 on 6 and 69 DF,  p-value: 0.0002541
#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Theil_robust_2018,Russia.weights_robust)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## Moran I statistic standard deviate = 4.9636, p-value = 3.459e-07
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.216739733     -0.020910814      0.002292331
moran.test(reg_Theil_robust_2018$residuals, Russia.weights_robust)
## 
##  Moran I test under randomisation
## 
## data:  reg_Theil_robust_2018$residuals  
## weights: Russia.weights_robust    
## 
## Moran I statistic standard deviate = 4.636, p-value = 1.776e-06
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.216739733      -0.013333333       0.002462886
moran.plot(reg_Theil_robust_2018$residuals, Russia.weights_robust)

# residuals are correlated according to the both test and graph

Lagrange multiplier test, Theil, robust

lm.LMtests(reg_Theil_robust_2018, listw = Russia.weights_robust, test="all", zero.policy=TRUE)
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## LMerr = 16.02, df = 1, p-value = 6.269e-05
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## LMlag = 14.836, df = 1, p-value = 0.0001173
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## RLMerr = 1.9779, df = 1, p-value = 0.1596
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## RLMlag = 0.794, df = 1, p-value = 0.3729
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## SARMA = 16.814, df = 2, p-value = 0.0002233
# Analitics
# LMerr = 16.02, df = 1, p-value = 6.269e-05 => significant
# LMlag = 14.836, df = 1, p-value = 0.0001173 => significant
# conclusion => moving to Robust LM Diagnostics

# RLMerr = 1.9779, df = 1, p-value = 0.1596
# RLMlag = 0.794, df = 1, p-value = 0.3729
# p-value: RLMerr < RLMlag => RLMerr is robust

# #SEM - spatial error model - we need it in our case (RLMerr is robust)
# y = X beta + rho W1 y + u
# u = lambda W2 u + e

# SAR - spatial autoregressive model
# y = X beta + rho W1 y + u

SEM, Theil, robust

#building an actual model
SEM_Theil_robust_2018 <- errorsarlm(Theil_2018, listw = Russia.weights_robust, zero.policy=TRUE)
sum_Theil_robust_2018 <- summary(SEM_Theil_robust_2018)

# sum_Theil_robust_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])

sum_Theil_robust_2018$Coef[, c(1,4)]
##                    Estimate     Pr(>|z|)
## (Intercept)    6.261629e+00 4.168137e-01
## Div_2018      -5.319917e-02 1.325870e-01
## Edu_2018      -8.200708e-05 8.974239e-01
## Pop_2018       1.649626e+00 8.322620e-01
## Inv_2018      -2.110823e-02 9.688815e-01
## FDI_2018       1.099042e+00 1.607322e-01
## log(Imp_2018)  2.181055e-01 2.714074e-05
#comparing our spatial model with the original one

AIC(SEM_Theil_robust_2018)
## [1] 90.16144
AIC(reg_Theil_robust_2018)
## [1] 100.7762
logLik(SEM_Theil_robust_2018)
## 'log Lik.' -36.08072 (df=9)
logLik(reg_Theil_robust_2018)
## 'log Lik.' -42.38809 (df=8)
#AIC - the lower, the better the model
#log-likelihood  - the higher, the better the model

#SAR is better than OLS in this case

Naive spatial models, Margins specification, robust

Global Moran test for residuals, Margins, robust

attach(Russia_shape_2018_robust@data) #strange ritual
## Следующие объекты скрыты от Russia_shape_2018_robust@data (pos = 3):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region
## Следующие объекты скрыты от Russia_shape_2018@data (pos = 4):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region
## Следующие объекты скрыты от Russia_shape_2018@data (pos = 5):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region
reg_Margins_robust_2018 <- lm(Margins_2018, data = Russia_shape_2018_robust@data)
summary(reg_Margins_robust_2018) #%>% xtable()
## 
## Call:
## lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62733 -0.17678 -0.04112  0.15177  0.98194 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.848e+01  6.050e+00   3.054  0.00322 ** 
## EM_2018        2.155e-01  4.829e-02   4.462 3.13e-05 ***
## IM_2018        1.817e-02  4.107e-03   4.424 3.59e-05 ***
## Edu_2018       5.480e-04  5.554e-04   0.987  0.32730    
## Pop_2018      -1.324e+01  6.166e+00  -2.147  0.03534 *  
## Inv_2018      -1.140e-01  4.772e-01  -0.239  0.81200    
## FDI_2018       6.634e-01  7.403e-01   0.896  0.37335    
## log(Imp_2018)  2.745e-01  5.386e-02   5.096 2.96e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3321 on 68 degrees of freedom
## Multiple R-squared:  0.6154, Adjusted R-squared:  0.5758 
## F-statistic: 15.54 on 7 and 68 DF,  p-value: 5.544e-12
#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Margins_robust_2018,Russia.weights_robust)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## Moran I statistic standard deviate = 1.9215, p-value = 0.02734
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.062773875     -0.025356811      0.002103697
moran.test(reg_Margins_robust_2018$residuals, Russia.weights_robust)
## 
##  Moran I test under randomisation
## 
## data:  reg_Margins_robust_2018$residuals  
## weights: Russia.weights_robust    
## 
## Moran I statistic standard deviate = 1.5129, p-value = 0.06515
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.062773875      -0.013333333       0.002530626
moran.plot(reg_Margins_robust_2018$residuals, Russia.weights_robust)

# residuals are correlated according to the both test and graph

Lagrange multiplier test, Margins, robust

lm.LMtests(reg_Margins_robust_2018, listw = Russia.weights_robust, test="all", zero.policy=TRUE)
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## LMerr = 1.3438, df = 1, p-value = 0.2464
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## LMlag = 10.807, df = 1, p-value = 0.001011
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## RLMerr = 2.1706, df = 1, p-value = 0.1407
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## RLMlag = 11.634, df = 1, p-value = 0.0006476
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## SARMA = 12.978, df = 2, p-value = 0.00152
# Analitics
# LMerr = 1.3438, df = 1, p-value = 0.2464 => insignificant
# LMlag = 10.807, df = 1, p-value = 0.001011 => significant
# conclusion => SAR

# #SEM - spatial error model
# y = X beta + rho W1 y + u
# u = lambda W2 u + e

# SAR - spatial autoregressive model - we need it in our case
# y = X beta + rho W1 y + u

SAR, Margins, robust

#building an actual model
SAR_Margins_robust_2018 <- lagsarlm(Margins_2018, listw = Russia.weights_robust, zero.policy=TRUE)
sum_Margins_robust_2018 <- summary(SAR_Margins_robust_2018)

# sum_Margins_robust_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])

sum_Margins_robust_2018$Coef[, c(1,4)]
##                    Estimate     Pr(>|z|)
## (Intercept)    7.0693883156 2.545338e-01
## EM_2018        0.1963046425 4.260337e-06
## IM_2018        0.0164624395 8.667365e-06
## Edu_2018       0.0005853796 2.326976e-01
## Pop_2018      -6.5002580292 2.556823e-01
## Inv_2018      -0.2245906605 5.949596e-01
## FDI_2018       0.5043277554 4.411717e-01
## log(Imp_2018)  0.2536240230 9.598595e-08
#comparing our spatial model with the original one

AIC(SAR_Margins_robust_2018)
## [1] 50.71042
AIC(reg_Margins_robust_2018)
## [1] 57.68435
logLik(SAR_Margins_robust_2018)
## 'log Lik.' -15.35521 (df=10)
logLik(reg_Margins_robust_2018)
## 'log Lik.' -19.84217 (df=9)
#AIC - the lower, the better the model
#log-likelihood  - the higher, the better the model

#SAR is better than OLS in this case

United table, robust

stargazer(SEM_Theil_robust_2018, SAR_Margins_robust_2018, type = "text", omit = c('Constant'), keep.stat = c("n"), title = 'Cross-sectional Spatial models',
          
add.lines = list(c('model', 'SEM', 'SAR')), header=FALSE, dep.var.labels = NULL, 
model.names = FALSE) #ci = T, font.size = "tiny", 
## 
## Cross-sectional Spatial models
## ==========================================
##                   Dependent variable:     
##               ----------------------------
##                      log(GRP_2018)        
##                    (1)            (2)     
## ------------------------------------------
## Div_2018          -0.053                  
##                  (0.035)                  
##                                           
## EM_2018                        0.196***   
##                                 (0.043)   
##                                           
## IM_2018                        0.016***   
##                                 (0.004)   
##                                           
## Edu_2018         -0.0001         0.001    
##                  (0.001)       (0.0005)   
##                                           
## Pop_2018          1.650         -6.500    
##                  (7.789)        (5.719)   
##                                           
## Inv_2018          -0.021        -0.225    
##                  (0.541)        (0.422)   
##                                           
## FDI_2018          1.099          0.504    
##                  (0.784)        (0.655)   
##                                           
## log(Imp_2018)    0.218***      0.254***   
##                  (0.052)        (0.048)   
##                                           
## ------------------------------------------
## model              SEM            SAR     
## Observations        76            76      
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01

Final table

stargazer(SEM_Theil_2018, SEM_Theil_robust_2018,
          SAR_Margins_2018, SAR_Margins_robust_2018,
          type = "text", omit = c('Constant'), keep.stat = c("n"), title = 'Cross-sectional Spatial models',
          
add.lines = list(c('model', 'SEM', 'SEM', 'SAR', 'SAR')), header=FALSE, dep.var.labels = NULL, 
model.names = FALSE, digits = 4) #ci = T, font.size = "tiny", 
## 
## Cross-sectional Spatial models
## =====================================================
##                         Dependent variable:          
##               ---------------------------------------
##                            log(GRP_2018)             
##                  (1)       (2)       (3)       (4)   
## -----------------------------------------------------
## Div_2018       -0.0102   -0.0532                     
##               (0.0261)  (0.0354)                     
##                                                      
## EM_2018                           0.1999*** 0.1963***
##                                   (0.0413)  (0.0427) 
##                                                      
## IM_2018                           0.0172*** 0.0165***
##                                   (0.0036)  (0.0037) 
##                                                      
## Edu_2018       0.0004    -0.0001   0.0008*   0.0006  
##               (0.0006)  (0.0006)  (0.0004)  (0.0005) 
##                                                      
## Pop_2018       1.8851    1.6496    -6.0190   -6.5003 
##               (7.8156)  (7.7886)  (5.6082)  (5.7188) 
##                                                      
## Inv_2018       0.0203    -0.0211   -0.2513   -0.2246 
##               (0.5455)  (0.5411)  (0.4165)  (0.4224) 
##                                                      
## FDI_2018       1.4515*   1.0990    0.5045    0.5043  
##               (0.7604)  (0.7836)  (0.6010)  (0.6548) 
##                                                      
## log(Imp_2018) 0.1763*** 0.2181*** 0.2510*** 0.2536***
##               (0.0464)  (0.0520)  (0.0458)  (0.0475) 
##                                                      
## -----------------------------------------------------
## model            SEM       SEM       SAR       SAR   
## Observations     78        76        78        76    
## =====================================================
## Note:                     *p<0.1; **p<0.05; ***p<0.01

Pretty robust results (betas of independent variables).