The following research is conducted by Maria R. Koldasheva and Nikolai A. Popov.

Libraries

library(tidyverse)
library(dplyr)
library(stargazer)
# geo
library(sp)
library(spdep)
library(rgdal)
library(maptools)
library(rgeos)
library(sf)
library(spatialreg)

# mass retype()
library(hablar)

# multicollinearity test omcdiag imcdiag
library(mctest)

library(GGally)

#tables
library(xtable)
library(texreg)

Cross-sectional specifications

# Theil index specification
Theil_2018 <- log(GRP_2018) ~ Div_2018 +  Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)

# WM and IM specification
Margins_2018 <- log(GRP_2018) ~ EM_2018 + IM_2018 + Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)

Spatial Analysis

Spatial data

#downloading spatial cross-sectional data
Russia_shape_2018 = readOGR(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Geo/Data_geo/Rus_regions_2018.shp')

## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Kolya\Documents\New_studies\Diploma\Geo\Data_geo\Rus_regions_2018.shp", layer: "Rus_regions_2018"
## with 78 features
## It has 16 fields
## Integer64 fields read as strings:  Edu_2018

Data Transformation

# change columns type (which was changed unintentionally by QGIS)
Russia_shape_2018$Edu_2018  <- as.numeric(Russia_shape_2018$Edu_2018 )

#Descriptive statistics
Russia_shape_2018@data %>% glimpse()

## Rows: 78
## Columns: 16
## $ region    <chr> "Республика Адыгея", "Алтайский край", "Амурская область", "…
## $ GRPL_2018 <dbl> 200763.6, 191088.3, 310611.9, 544107.3, 361074.8, 309524.4, …
## $ InvL_2018 <dbl> 0.1899902, 0.1610921, 0.6433209, 0.2838402, 0.3313535, 0.187…
## $ FDIL_2018 <dbl> 0.0244560199, 0.0048135624, 0.0918287926, 0.0344199371, 0.00…
## $ DivL_2018 <dbl> 0.23205912, 1.89692465, 0.14187907, 0.41500809, 1.91762571, …
## $ EML_2018  <dbl> 3.4829567, 1.6372636, 3.9298785, 3.3955224, 1.6257074, 1.760…
## $ IML_2018  <dbl> 21.435542, 18.728317, 23.504197, 31.265937, 19.608514, 36.56…
## $ Edu_2018  <dbl> 291, 216, 191, 162, 294, 248, 308, 211, 227, 266, 281, 550, …
## $ GRP_2018  <dbl> 210326.8, 196543.3, 333585.8, 603355.3, 455456.1, 347702.7, …
## $ Inv_2018  <dbl> 0.2550824, 0.1848265, 0.7516793, 0.2291233, 0.1903964, 0.154…
## $ FDI_2018  <dbl> 0.001042692, 0.007551503, 0.053901555, 0.002167261, 0.003455…
## $ Pop_2018  <dbl> 1.0105704, 0.9929956, 0.9947223, 0.9919102, 0.9941918, 0.996…
## $ Imp_2018  <dbl> 1400723790, 6951216909, 4525812109, 6904760397, 26005271280,…
## $ EM_2018   <dbl> 3.3816654, 1.5766154, 3.6571892, 3.2307966, 1.6123266, 1.692…
## $ IM_2018   <dbl> 16.9659373, 20.4793791, 32.6429345, 30.0139353, 27.2501188, …
## $ Div_2018  <dbl> 0.24349315, 2.33436715, 0.28093926, 0.48234165, 2.91849684, …

Projections and neighbors

proj4string(Russia_shape_2018) <- CRS("+init=epsg:4326")

Russia.centroids<-gCentroid(Russia_shape_2018,byid=TRUE)

weights <- knn2nb(knearneigh(Russia.centroids, k=8,longlat = TRUE),row.names=Russia_shape_2018$region)
Russia.weights <- nb2listw(weights)

Spatial correlation: Moran tests

#H0 - variable is randomly distributed (no-autocrrelation)
columns  = names(Russia_shape_2018@data)[2:length(names(Russia_shape_2018@data))]
Russia_sp_dat <- Russia_shape_2018@data
for(i in columns){
    res <- moran.test(Russia_sp_dat[[i]], Russia.weights)
    # print(res)
    if(res$p.value < 0.05){
      print(paste(i,': Spatial correlation detected'))
    }
}

## [1] "GRPL_2018 : Spatial correlation detected"
## [1] "InvL_2018 : Spatial correlation detected"
## [1] "FDIL_2018 : Spatial correlation detected"
## [1] "EML_2018 : Spatial correlation detected"
## [1] "IML_2018 : Spatial correlation detected"
## [1] "GRP_2018 : Spatial correlation detected"
## [1] "Inv_2018 : Spatial correlation detected"
## [1] "Pop_2018 : Spatial correlation detected"
## [1] "Imp_2018 : Spatial correlation detected"
## [1] "EM_2018 : Spatial correlation detected"
## [1] "IM_2018 : Spatial correlation detected"

# 6/9 variables appear to be spatially correlated (lags are not important for this analysis - they will be used only later in dynamic models)
# GRP_2018, EM_2018, IM_2018, Inv_2018, Pop_2018, Imp_2018. Two of those - two independent variables, and 1- depedendent one.

Moran plots

for (g in columns){
  moran.plot(Russia_shape_2018@data[[g]], Russia.weights, xlab=g)
}

#graphically, all results of tests are correct

Naive spatial models, Theil specification

Global Moran test for residuals, Theil

attach(Russia_shape_2018@data) #strange ritual

reg_Theil_2018 <- lm(Theil_2018, data = Russia_shape_2018@data)
summary(reg_Theil_2018) #%>% xtable()

## 
## Call:
## lm(formula = Theil_2018, data = Russia_shape_2018@data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5639 -0.2595 -0.1242  0.1738  1.4420 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   15.1016926  8.5683957   1.762 0.082291 .  
## Div_2018      -0.0145501  0.0318961  -0.456 0.649659    
## Edu_2018       0.0002724  0.0006671   0.408 0.684314    
## Pop_2018      -6.8542134  8.4648465  -0.810 0.420803    
## Inv_2018       0.5133863  0.6299200   0.815 0.417796    
## FDI_2018       2.0064298  0.9104057   2.204 0.030776 *  
## log(Imp_2018)  0.1856968  0.0533985   3.478 0.000868 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4418 on 71 degrees of freedom
## Multiple R-squared:  0.3548, Adjusted R-squared:  0.3002 
## F-statistic: 6.506 on 6 and 71 DF,  p-value: 1.673e-05

#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Theil_2018,Russia.weights)

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## Moran I statistic standard deviate = 4.371, p-value = 6.183e-06
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.191888053     -0.017918857      0.002303925

moran.test(reg_Theil_2018$residuals, Russia.weights)

## 
##  Moran I test under randomisation
## 
## data:  reg_Theil_2018$residuals  
## weights: Russia.weights    
## 
## Moran I statistic standard deviate = 4.1832, p-value = 1.437e-05
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.191888053      -0.012987013       0.002398565

moran.plot(reg_Theil_2018$residuals, Russia.weights)

# residuals are correlated according to the both test and graph

Lagrange multiplier test, Theil

lm.LMtests(reg_Theil_2018, listw = Russia.weights, test="all", zero.policy=TRUE)

## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## LMerr = 12.94, df = 1, p-value = 0.0003217
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## LMlag = 10.612, df = 1, p-value = 0.001123
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## RLMerr = 2.7102, df = 1, p-value = 0.09971
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## RLMlag = 0.38264, df = 1, p-value = 0.5362
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## SARMA = 13.322, df = 2, p-value = 0.00128

# Analitics
# LMerr = 12.94, df = 1, p-value = 0.0003217 => significant
# LMlag = 10.612, df = 1, p-value = 0.001123 => significant
# conclusion => moving to Robust LM Diagnostics

# RLMerr = 2.7102, df = 1, p-value = 0.09971
# RLMlag = 0.38264, df = 1, p-value = 0.5362
# p-value: RLMerr < RLMlag => RLMerr is robust

# #SEM - spatial error model  - we need it in our case (RLMerr is robust)
# y = X beta + rho W1 y + u
# u = lambda W2 u + e

# SAR - spatial autoregressive model
# y = X beta + rho W1 y + u

SEM, Theil

#building an actual model
SEM_Theil_2018 <- errorsarlm(Theil_2018, listw = Russia.weights, zero.policy=TRUE)
sum_Theil_2018 <- summary(SEM_Theil_2018)

# sum_Theil_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])

sum_Theil_2018$Coef[, c(1,4)]

##                    Estimate     Pr(>|z|)
## (Intercept)    6.7718256871 0.3809453767
## Div_2018      -0.0102227293 0.6958316415
## Edu_2018       0.0003887803 0.4827255754
## Pop_2018       1.8851380482 0.8093991903
## Inv_2018       0.0202629801 0.9703711832
## FDI_2018       1.4514549611 0.0562834950
## log(Imp_2018)  0.1762802040 0.0001474848

#comparing our spatial model with the original one

AIC(SEM_Theil_2018)

## [1] 92.96259

AIC(reg_Theil_2018)

## [1] 102.5796

logLik(SEM_Theil_2018)

## 'log Lik.' -37.4813 (df=9)

logLik(reg_Theil_2018)

## 'log Lik.' -43.28981 (df=8)

#AIC - the lower, the better the model
#log-likelihood  - the higher, the better the model

#SEM is better than OLS in this case

Naive spatial models, Margins specification

Global Moran test for residuals, Margins

attach(Russia_shape_2018@data) #strange ritual

## Следующие объекты скрыты от Russia_shape_2018@data (pos = 3):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region

reg_Margins_2018 <- lm(Margins_2018, data = Russia_shape_2018@data)
summary(reg_Margins_2018) #%>% xtable()

## 
## Call:
## lm(formula = Margins_2018, data = Russia_shape_2018@data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62690 -0.17128 -0.02921  0.14315  0.98139 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.800e+01  5.802e+00   3.103  0.00277 ** 
## EM_2018        2.188e-01  4.664e-02   4.690 1.31e-05 ***
## IM_2018        1.800e-02  4.020e-03   4.477 2.86e-05 ***
## Edu_2018       6.504e-04  4.581e-04   1.420  0.16013    
## Pop_2018      -1.289e+01  5.993e+00  -2.150  0.03500 *  
## Inv_2018      -1.231e-01  4.693e-01  -0.262  0.79385    
## FDI_2018       7.532e-01  6.751e-01   1.116  0.26838    
## log(Imp_2018)  2.787e-01  5.170e-02   5.390 8.99e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3276 on 70 degrees of freedom
## Multiple R-squared:  0.6502, Adjusted R-squared:  0.6152 
## F-statistic: 18.58 on 7 and 70 DF,  p-value: 9.456e-14

#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Margins_2018,Russia.weights)

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## Moran I statistic standard deviate = 1.9185, p-value = 0.02752
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.063190983     -0.024065263      0.002068579

moran.test(reg_Margins_2018$residuals, Russia.weights)

## 
##  Moran I test under randomisation
## 
## data:  reg_Margins_2018$residuals  
## weights: Russia.weights    
## 
## Moran I statistic standard deviate = 1.5356, p-value = 0.06232
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.063190983      -0.012987013       0.002461049

moran.plot(reg_Margins_2018$residuals, Russia.weights)

# residuals are correlated according to the both test and graph

Lagrange multiplier test, Margins

lm.LMtests(reg_Margins_2018, listw = Russia.weights, test="all", zero.policy=TRUE)

## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## LMerr = 1.4033, df = 1, p-value = 0.2362
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## LMlag = 11.087, df = 1, p-value = 0.0008693
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## RLMerr = 1.9221, df = 1, p-value = 0.1656
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## RLMlag = 11.606, df = 1, p-value = 0.0006574
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018@data)
## weights: Russia.weights
## 
## SARMA = 13.009, df = 2, p-value = 0.001497

# Analitics
# LMerr = 1.4033, df = 1, p-value = 0.2362 => insignificant
# LMlag = 11.087, df = 1, p-value = 0.0008693 => significant
# conclusion => SAR

# SEM - spatial error model
# y = X beta + rho W1 y + u
# u = lambda W2 u + e

# SAR - spatial autoregressive model - we need it in our case (LMlag is significant)
# y = X beta + rho W1 y + u

SAR, Margins

#building an actual model
SAR_Margins_2018 <- lagsarlm(Margins_2018, listw = Russia.weights, zero.policy=TRUE)
sum_Margins_2018 <- summary(SAR_Margins_2018)

# sum_Margins_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])

sum_Margins_2018$Coef[, c(1,4)]

##                    Estimate     Pr(>|z|)
## (Intercept)    6.6395153792 2.714358e-01
## EM_2018        0.1999276688 1.276680e-06
## IM_2018        0.0172481103 1.481835e-06
## Edu_2018       0.0007743503 5.586459e-02
## Pop_2018      -6.0189626785 2.831617e-01
## Inv_2018      -0.2512608378 5.463222e-01
## FDI_2018       0.5045494346 4.011729e-01
## log(Imp_2018)  0.2509808088 4.315525e-08

#comparing our spatial model with the original one

AIC(SAR_Margins_2018)

## [1] 49.57632

AIC(reg_Margins_2018)

## [1] 56.83275

logLik(SAR_Margins_2018)

## 'log Lik.' -14.78816 (df=10)

logLik(reg_Margins_2018)

## 'log Lik.' -19.41637 (df=9)

#AIC - the lower, the better the model
#log-likelihood  - the higher, the better the model

#SAR is better than OLS in this case

United table

stargazer(SEM_Theil_2018, SAR_Margins_2018, type = "text", omit = c('Constant'), keep.stat = c("n"), title = 'Cross-sectional Spatial models',
          
add.lines = list(c('model', 'SEM', 'SAR')), header=FALSE, dep.var.labels = NULL, 
model.names = FALSE) #ci = T, font.size = "tiny",

## 
## Cross-sectional Spatial models
## ==========================================
##                   Dependent variable:     
##               ----------------------------
##                      log(GRP_2018)        
##                    (1)            (2)     
## ------------------------------------------
## Div_2018          -0.010                  
##                  (0.026)                  
##                                           
## EM_2018                        0.200***   
##                                 (0.041)   
##                                           
## IM_2018                        0.017***   
##                                 (0.004)   
##                                           
## Edu_2018          0.0004        0.001*    
##                  (0.001)       (0.0004)   
##                                           
## Pop_2018          1.885         -6.019    
##                  (7.816)        (5.608)   
##                                           
## Inv_2018          0.020         -0.251    
##                  (0.546)        (0.416)   
##                                           
## FDI_2018          1.451*         0.505    
##                  (0.760)        (0.601)   
##                                           
## log(Imp_2018)    0.176***      0.251***   
##                  (0.046)        (0.046)   
##                                           
## ------------------------------------------
## model              SEM            SAR     
## Observations        78            78      
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01

SAME CODE, BUT FOR ROBUST DATASET (NO MSK, SPB)

Spatial Analysis, robust

Spatial data

#downloading spatial cross-sectional data
Russia_shape_2018_robust = readOGR(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Geo/Data_geo/Rus_regions_2018_robust.shp')

## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Kolya\Documents\New_studies\Diploma\Geo\Data_geo\Rus_regions_2018_robust.shp", layer: "Rus_regions_2018_robust"
## with 76 features
## It has 16 fields
## Integer64 fields read as strings:  Edu_2018

Data Transformation, robust

# change columns_robust type (which was changed unintentionally by QGIS)
Russia_shape_2018_robust$Edu_2018  <- as.numeric(Russia_shape_2018_robust$Edu_2018 )

#Descriptive statistics
Russia_shape_2018_robust@data %>% glimpse()

## Rows: 76
## Columns: 16
## $ region    <chr> "Республика Адыгея", "Алтайский край", "Амурская область", "…
## $ GRPL_2018 <dbl> 200763.6, 191088.3, 310611.9, 544107.3, 361074.8, 309524.4, …
## $ InvL_2018 <dbl> 0.1899902, 0.1610921, 0.6433209, 0.2838402, 0.3313535, 0.187…
## $ FDIL_2018 <dbl> 0.0244560199, 0.0048135624, 0.0918287926, 0.0344199371, 0.00…
## $ DivL_2018 <dbl> 0.23205912, 1.89692465, 0.14187907, 0.41500809, 1.91762571, …
## $ EML_2018  <dbl> 3.482957, 1.637264, 3.929879, 3.395522, 1.625707, 1.760371, …
## $ IML_2018  <dbl> 21.435542, 18.728317, 23.504197, 31.265937, 19.608514, 36.56…
## $ Edu_2018  <dbl> 291, 216, 191, 162, 294, 248, 308, 211, 227, 266, 281, 171, …
## $ GRP_2018  <dbl> 210326.8, 196543.3, 333585.8, 603355.3, 455456.1, 347702.7, …
## $ Inv_2018  <dbl> 0.2550824, 0.1848265, 0.7516793, 0.2291233, 0.1903964, 0.154…
## $ FDI_2018  <dbl> 0.001042692, 0.007551503, 0.053901555, 0.002167261, 0.003455…
## $ Pop_2018  <dbl> 1.0105704, 0.9929956, 0.9947223, 0.9919102, 0.9941918, 0.996…
## $ Imp_2018  <dbl> 1400723790, 6951216909, 4525812109, 6904760397, 26005271280,…
## $ EM_2018   <dbl> 3.381665, 1.576615, 3.657189, 3.230797, 1.612327, 1.692044, …
## $ IM_2018   <dbl> 16.9659373, 20.4793791, 32.6429345, 30.0139353, 27.2501188, …
## $ Div_2018  <dbl> 0.24349315, 2.33436715, 0.28093926, 0.48234165, 2.91849684, …

Projections and neighbors, robust

proj4string(Russia_shape_2018_robust) <- CRS("+init=epsg:4326")

Russia.centroids_robust<-gCentroid(Russia_shape_2018_robust,byid=TRUE)

weights_robust <- knn2nb(knearneigh(Russia.centroids_robust, k=8,longlat = TRUE),row.names=Russia_shape_2018_robust$region)
Russia.weights_robust <- nb2listw(weights_robust)

Spatial correlation: Moran tests, robust

#H0 - variable is randomly distributed (no-autocrrelation)
columns_robust  = names(Russia_shape_2018_robust@data)[2:length(names(Russia_shape_2018_robust@data))]
Russia_sp_dat <- Russia_shape_2018_robust@data
for(i in columns_robust){
    res <- moran.test(Russia_sp_dat[[i]], Russia.weights_robust)
    # print(res)
    if(res$p.value < 0.05){
      print(paste(i,': Spatial correlation detected'))
    }
}

## [1] "GRPL_2018 : Spatial correlation detected"
## [1] "InvL_2018 : Spatial correlation detected"
## [1] "FDIL_2018 : Spatial correlation detected"
## [1] "DivL_2018 : Spatial correlation detected"
## [1] "EML_2018 : Spatial correlation detected"
## [1] "IML_2018 : Spatial correlation detected"
## [1] "Edu_2018 : Spatial correlation detected"
## [1] "GRP_2018 : Spatial correlation detected"
## [1] "Inv_2018 : Spatial correlation detected"
## [1] "Pop_2018 : Spatial correlation detected"
## [1] "EM_2018 : Spatial correlation detected"
## [1] "IM_2018 : Spatial correlation detected"

# 6/9 variables appear to be spatially correlated
#GRP_2018, EM_2018, IM_2018, Edu_2018, Inv_2018, Pop_2018. Two of those - two independent variables, and 1- depedendent one.

Moran plots, robust

for (g in columns_robust){
  moran.plot(Russia_shape_2018_robust@data[[g]], Russia.weights_robust, xlab=g)
}

#graphically, all results of tests are correct

Naive spatial models, Theil specification, robust

Global Moran test for residuals, Theil, robust

attach(Russia_shape_2018_robust@data) #strange ritual

## Следующие объекты скрыты от Russia_shape_2018@data (pos = 3):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region

## Следующие объекты скрыты от Russia_shape_2018@data (pos = 4):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region

reg_Theil_robust_2018 <- lm(Theil_2018, data = Russia_shape_2018_robust@data)
summary(reg_Theil_robust_2018) #%>% xtable()

## 
## Call:
## lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6064 -0.2549 -0.1139  0.1627  1.4397 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.440e+01  8.658e+00   1.663 0.100886    
## Div_2018      -4.504e-02  4.120e-02  -1.093 0.278054    
## Edu_2018      -6.517e-05  7.342e-04  -0.089 0.929535    
## Pop_2018      -6.586e+00  8.524e+00  -0.773 0.442366    
## Inv_2018       4.327e-01  6.363e-01   0.680 0.498737    
## FDI_2018       1.621e+00  9.711e-01   1.670 0.099531 .  
## log(Imp_2018)  2.121e-01  5.838e-02   3.633 0.000535 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4436 on 69 degrees of freedom
## Multiple R-squared:  0.3039, Adjusted R-squared:  0.2434 
## F-statistic: 5.021 on 6 and 69 DF,  p-value: 0.0002541

#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Theil_robust_2018,Russia.weights_robust)

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## Moran I statistic standard deviate = 4.9636, p-value = 3.459e-07
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.216739733     -0.020910814      0.002292331

moran.test(reg_Theil_robust_2018$residuals, Russia.weights_robust)

## 
##  Moran I test under randomisation
## 
## data:  reg_Theil_robust_2018$residuals  
## weights: Russia.weights_robust    
## 
## Moran I statistic standard deviate = 4.636, p-value = 1.776e-06
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.216739733      -0.013333333       0.002462886

moran.plot(reg_Theil_robust_2018$residuals, Russia.weights_robust)

# residuals are correlated according to the both test and graph

Lagrange multiplier test, Theil, robust

lm.LMtests(reg_Theil_robust_2018, listw = Russia.weights_robust, test="all", zero.policy=TRUE)

## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## LMerr = 16.02, df = 1, p-value = 6.269e-05
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## LMlag = 14.836, df = 1, p-value = 0.0001173
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## RLMerr = 1.9779, df = 1, p-value = 0.1596
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## RLMlag = 0.794, df = 1, p-value = 0.3729
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Theil_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## SARMA = 16.814, df = 2, p-value = 0.0002233

# Analitics
# LMerr = 16.02, df = 1, p-value = 6.269e-05 => significant
# LMlag = 14.836, df = 1, p-value = 0.0001173 => significant
# conclusion => moving to Robust LM Diagnostics

# RLMerr = 1.9779, df = 1, p-value = 0.1596
# RLMlag = 0.794, df = 1, p-value = 0.3729
# p-value: RLMerr < RLMlag => RLMerr is robust

# #SEM - spatial error model - we need it in our case (RLMerr is robust)
# y = X beta + rho W1 y + u
# u = lambda W2 u + e

# SAR - spatial autoregressive model
# y = X beta + rho W1 y + u

SEM, Theil, robust

#building an actual model
SEM_Theil_robust_2018 <- errorsarlm(Theil_2018, listw = Russia.weights_robust, zero.policy=TRUE)
sum_Theil_robust_2018 <- summary(SEM_Theil_robust_2018)

# sum_Theil_robust_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])

sum_Theil_robust_2018$Coef[, c(1,4)]

##                    Estimate     Pr(>|z|)
## (Intercept)    6.261629e+00 4.168137e-01
## Div_2018      -5.319917e-02 1.325870e-01
## Edu_2018      -8.200708e-05 8.974239e-01
## Pop_2018       1.649626e+00 8.322620e-01
## Inv_2018      -2.110823e-02 9.688815e-01
## FDI_2018       1.099042e+00 1.607322e-01
## log(Imp_2018)  2.181055e-01 2.714074e-05

#comparing our spatial model with the original one

AIC(SEM_Theil_robust_2018)

## [1] 90.16144

AIC(reg_Theil_robust_2018)

## [1] 100.7762

logLik(SEM_Theil_robust_2018)

## 'log Lik.' -36.08072 (df=9)

logLik(reg_Theil_robust_2018)

## 'log Lik.' -42.38809 (df=8)

#AIC - the lower, the better the model
#log-likelihood  - the higher, the better the model

#SAR is better than OLS in this case

Naive spatial models, Margins specification, robust

Global Moran test for residuals, Margins, robust

attach(Russia_shape_2018_robust@data) #strange ritual

## Следующие объекты скрыты от Russia_shape_2018_robust@data (pos = 3):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region

## Следующие объекты скрыты от Russia_shape_2018@data (pos = 4):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region

## Следующие объекты скрыты от Russia_shape_2018@data (pos = 5):
## 
##     Div_2018, DivL_2018, Edu_2018, EM_2018, EML_2018, FDI_2018,
##     FDIL_2018, GRP_2018, GRPL_2018, IM_2018, IML_2018, Imp_2018,
##     Inv_2018, InvL_2018, Pop_2018, region

reg_Margins_robust_2018 <- lm(Margins_2018, data = Russia_shape_2018_robust@data)
summary(reg_Margins_robust_2018) #%>% xtable()

## 
## Call:
## lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62733 -0.17678 -0.04112  0.15177  0.98194 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.848e+01  6.050e+00   3.054  0.00322 ** 
## EM_2018        2.155e-01  4.829e-02   4.462 3.13e-05 ***
## IM_2018        1.817e-02  4.107e-03   4.424 3.59e-05 ***
## Edu_2018       5.480e-04  5.554e-04   0.987  0.32730    
## Pop_2018      -1.324e+01  6.166e+00  -2.147  0.03534 *  
## Inv_2018      -1.140e-01  4.772e-01  -0.239  0.81200    
## FDI_2018       6.634e-01  7.403e-01   0.896  0.37335    
## log(Imp_2018)  2.745e-01  5.386e-02   5.096 2.96e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3321 on 68 degrees of freedom
## Multiple R-squared:  0.6154, Adjusted R-squared:  0.5758 
## F-statistic: 15.54 on 7 and 68 DF,  p-value: 5.544e-12

#H0 - residuals are randomly distributed (no-autocorrelation)
lm.morantest(reg_Margins_robust_2018,Russia.weights_robust)

## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## Moran I statistic standard deviate = 1.9215, p-value = 0.02734
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.062773875     -0.025356811      0.002103697

moran.test(reg_Margins_robust_2018$residuals, Russia.weights_robust)

## 
##  Moran I test under randomisation
## 
## data:  reg_Margins_robust_2018$residuals  
## weights: Russia.weights_robust    
## 
## Moran I statistic standard deviate = 1.5129, p-value = 0.06515
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.062773875      -0.013333333       0.002530626

moran.plot(reg_Margins_robust_2018$residuals, Russia.weights_robust)

# residuals are correlated according to the both test and graph

Lagrange multiplier test, Margins, robust

lm.LMtests(reg_Margins_robust_2018, listw = Russia.weights_robust, test="all", zero.policy=TRUE)

## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## LMerr = 1.3438, df = 1, p-value = 0.2464
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## LMlag = 10.807, df = 1, p-value = 0.001011
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## RLMerr = 2.1706, df = 1, p-value = 0.1407
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## RLMlag = 11.634, df = 1, p-value = 0.0006476
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = Margins_2018, data = Russia_shape_2018_robust@data)
## weights: Russia.weights_robust
## 
## SARMA = 12.978, df = 2, p-value = 0.00152

# Analitics
# LMerr = 1.3438, df = 1, p-value = 0.2464 => insignificant
# LMlag = 10.807, df = 1, p-value = 0.001011 => significant
# conclusion => SAR

# #SEM - spatial error model
# y = X beta + rho W1 y + u
# u = lambda W2 u + e

# SAR - spatial autoregressive model - we need it in our case
# y = X beta + rho W1 y + u

SAR, Margins, robust

#building an actual model
SAR_Margins_robust_2018 <- lagsarlm(Margins_2018, listw = Russia.weights_robust, zero.policy=TRUE)
sum_Margins_robust_2018 <- summary(SAR_Margins_robust_2018)

# sum_Margins_robust_2018$Coef[, c(1,4)] %>% xtable() # for getting a latex table
# print("NOX")
# print(sum_b$Coef[c(11),])

sum_Margins_robust_2018$Coef[, c(1,4)]

##                    Estimate     Pr(>|z|)
## (Intercept)    7.0693883156 2.545338e-01
## EM_2018        0.1963046425 4.260337e-06
## IM_2018        0.0164624395 8.667365e-06
## Edu_2018       0.0005853796 2.326976e-01
## Pop_2018      -6.5002580292 2.556823e-01
## Inv_2018      -0.2245906605 5.949596e-01
## FDI_2018       0.5043277554 4.411717e-01
## log(Imp_2018)  0.2536240230 9.598595e-08

#comparing our spatial model with the original one

AIC(SAR_Margins_robust_2018)

## [1] 50.71042

AIC(reg_Margins_robust_2018)

## [1] 57.68435

logLik(SAR_Margins_robust_2018)

## 'log Lik.' -15.35521 (df=10)

logLik(reg_Margins_robust_2018)

## 'log Lik.' -19.84217 (df=9)

#AIC - the lower, the better the model
#log-likelihood  - the higher, the better the model

#SAR is better than OLS in this case

United table, robust

stargazer(SEM_Theil_robust_2018, SAR_Margins_robust_2018, type = "text", omit = c('Constant'), keep.stat = c("n"), title = 'Cross-sectional Spatial models',
          
add.lines = list(c('model', 'SEM', 'SAR')), header=FALSE, dep.var.labels = NULL, 
model.names = FALSE) #ci = T, font.size = "tiny",

## 
## Cross-sectional Spatial models
## ==========================================
##                   Dependent variable:     
##               ----------------------------
##                      log(GRP_2018)        
##                    (1)            (2)     
## ------------------------------------------
## Div_2018          -0.053                  
##                  (0.035)                  
##                                           
## EM_2018                        0.196***   
##                                 (0.043)   
##                                           
## IM_2018                        0.016***   
##                                 (0.004)   
##                                           
## Edu_2018         -0.0001         0.001    
##                  (0.001)       (0.0005)   
##                                           
## Pop_2018          1.650         -6.500    
##                  (7.789)        (5.719)   
##                                           
## Inv_2018          -0.021        -0.225    
##                  (0.541)        (0.422)   
##                                           
## FDI_2018          1.099          0.504    
##                  (0.784)        (0.655)   
##                                           
## log(Imp_2018)    0.218***      0.254***   
##                  (0.052)        (0.048)   
##                                           
## ------------------------------------------
## model              SEM            SAR     
## Observations        76            76      
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01

Final table

stargazer(SEM_Theil_2018, SEM_Theil_robust_2018,
          SAR_Margins_2018, SAR_Margins_robust_2018,
          type = "text", omit = c('Constant'), keep.stat = c("n"), title = 'Cross-sectional Spatial models',
          
add.lines = list(c('model', 'SEM', 'SEM', 'SAR', 'SAR')), header=FALSE, dep.var.labels = NULL, 
model.names = FALSE, digits = 4) #ci = T, font.size = "tiny",

## 
## Cross-sectional Spatial models
## =====================================================
##                         Dependent variable:          
##               ---------------------------------------
##                            log(GRP_2018)             
##                  (1)       (2)       (3)       (4)   
## -----------------------------------------------------
## Div_2018       -0.0102   -0.0532                     
##               (0.0261)  (0.0354)                     
##                                                      
## EM_2018                           0.1999*** 0.1963***
##                                   (0.0413)  (0.0427) 
##                                                      
## IM_2018                           0.0172*** 0.0165***
##                                   (0.0036)  (0.0037) 
##                                                      
## Edu_2018       0.0004    -0.0001   0.0008*   0.0006  
##               (0.0006)  (0.0006)  (0.0004)  (0.0005) 
##                                                      
## Pop_2018       1.8851    1.6496    -6.0190   -6.5003 
##               (7.8156)  (7.7886)  (5.6082)  (5.7188) 
##                                                      
## Inv_2018       0.0203    -0.0211   -0.2513   -0.2246 
##               (0.5455)  (0.5411)  (0.4165)  (0.4224) 
##                                                      
## FDI_2018       1.4515*   1.0990    0.5045    0.5043  
##               (0.7604)  (0.7836)  (0.6010)  (0.6548) 
##                                                      
## log(Imp_2018) 0.1763*** 0.2181*** 0.2510*** 0.2536***
##               (0.0464)  (0.0520)  (0.0458)  (0.0475) 
##                                                      
## -----------------------------------------------------
## model            SEM       SEM       SAR       SAR   
## Observations     78        76        78        76    
## =====================================================
## Note:                     *p<0.1; **p<0.05; ***p<0.01

Pretty robust results (betas of independent variables).

Spatial cross-sectional models

Libraries

Cross-sectional specifications

Spatial Analysis

Spatial data

Data Transformation

Projections and neighbors

Spatial correlation: Moran tests

Moran plots

Naive spatial models, Theil specification

Global Moran test for residuals, Theil

Lagrange multiplier test, Theil

SEM, Theil

Naive spatial models, Margins specification

Global Moran test for residuals, Margins

Lagrange multiplier test, Margins

SAR, Margins

United table

SAME CODE, BUT FOR ROBUST DATASET (NO MSK, SPB)

Spatial Analysis, robust

Spatial data

Data Transformation, robust

Projections and neighbors, robust

Spatial correlation: Moran tests, robust

Moran plots, robust

Naive spatial models, Theil specification, robust

Global Moran test for residuals, Theil, robust

Lagrange multiplier test, Theil, robust

SEM, Theil, robust

Naive spatial models, Margins specification, robust

Global Moran test for residuals, Margins, robust

Lagrange multiplier test, Margins, robust

SAR, Margins, robust

United table, robust

Final table