The following research is conducted by Maria R. Koldasheva and Nikolai A. Popov.

Libraries

library(tidyverse)
library(dplyr)
# library(stargazer)
library(writexl)

# geo
library(sp)
library(spdep)
library(rgdal)
library(maptools)
library(rgeos)
library(sf)
library(spatialreg)
library(modelsummary)
# mass retype()
library(hablar)
library(readxl)

# multicollinearity test omcdiag imcdiag
library(mctest)

library(GGally)

#tables
library(xtable)
# library(texreg)
library(remotes)
library(modelsummary)

#spatial panels
library(splm)

library(plm)

Spatial data, pleliminary cleaning

Spatial data

Panel_Ru_shape = readOGR(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Geo/Data_geo/Rus_regions_panel.shp')
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Kolya\Documents\New_studies\Diploma\Geo\Data_geo\Rus_regions_panel.shp", layer: "Rus_regions_panel"
## with 78 features
## It has 106 fields
## Integer64 fields read as strings:  Edu_2014 Edu_2015 Edu_2016 Edu_2017 Edu_2018 Edu_2019 Edu_2020
Panel_Ru <- Panel_Ru_shape@data
Panel_Ru

Data Transformation

# to numeric
Panel_Ru_shape@data[, c(-1)] <-  Panel_Ru_shape@data[, c(-1)] %>% mutate_if(is.character, as.numeric)
# Panel_Ru_shape@data %>% glimpse() #everything is character


### LONG vs.WIDE format
long_spatial_data <- Panel_Ru_shape@data %>% pivot_longer(cols = !region,
                                            names_to = c(".value", "year"),
                                          names_pattern = "([A-Za-z]+)_(\\d+)")


pdim(long_spatial_data) # we have balanced panel
## Balanced Panel: n = 78, T = 7, N = 546

Weight matrix

#projection
proj4string(Panel_Ru_shape) <- CRS("+init=epsg:4326")

#listw
map_crd <- coordinates(Panel_Ru_shape)
      Points <- SpatialPoints(map_crd)
      Panel_Ru_shape.knn_8 <- knearneigh(Points, k=8)
      K8_nb <- knn2nb(Panel_Ru_shape.knn_8)
      Ru_weight_sq <- nb2listw(K8_nb, style="W")

Panel specifications

# Theil index specification
Theil <- log(GRP) ~ Div + Edu + Pop + Inv + FDI + log(Imp)

# EM and IM specification
Margins <- log(GRP) ~ EM + IM + Edu + Pop + Inv + FDI + log(Imp)

Models

Estimation: two-step Maximum Likelihood

Theil, two-ways spatial FE

Theil_fe_ind_t_lag <- spml(formula = Theil, data = long_spatial_data, index = NULL, listw = Ru_weight_sq,
lag = TRUE, spatial.error = "kkp", model = "within", effect = "twoways", method = "eigen", na.action = na.fail )
sum_Theil_fe_ind_t_lag <- summary(Theil_fe_ind_t_lag)
sum_Theil_fe_ind_t_lag$CoefTable[c(3:nrow(sum_Theil_fe_ind_t_lag$CoefTable)), c(1, 2, 4)]
##               Estimate   Std. Error     Pr(>|t|)
## Div       0.0111134719 0.0056478321 4.909782e-02
## Edu      -0.0003088894 0.0001595472 5.286307e-02
## Pop       2.9719363944 1.2479728300 1.724645e-02
## Inv      -0.2091815186 0.0502235733 3.113434e-05
## FDI      -0.1421869709 0.0456157207 1.826639e-03
## log(Imp)  0.0092693677 0.0068686312 1.771694e-01

Margins, two-ways spatial FE

Margins_fe_ind_t_lag <- spml(formula = Margins, data = long_spatial_data, index = NULL, listw = Ru_weight_sq,
lag = TRUE, spatial.error= "kkp", model = "within", effect = "twoways", method = "eigen", na.action = na.fail )
sum_Margins_fe_ind_t_lag <- summary(Margins_fe_ind_t_lag)
sum_Margins_fe_ind_t_lag$CoefTable[c(3:nrow(sum_Margins_fe_ind_t_lag$CoefTable)), c(1, 2, 4)]
##               Estimate   Std. Error     Pr(>|t|)
## EM        0.0489012184 0.0103661811 2.389009e-06
## IM        0.0019238919 0.0004722533 4.624105e-05
## Edu      -0.0002995858 0.0001603018 6.163858e-02
## Pop       2.2736845112 1.2498112828 6.887753e-02
## Inv      -0.1835564190 0.0508622422 3.075057e-04
## FDI      -0.1315011909 0.0462148352 4.435117e-03
## log(Imp)  0.0091467015 0.0068733384 1.832710e-01

United table creating

# Table for Theil
united_table_spatial_panels_Theil <- cbind(sum_Theil_fe_ind_t_lag$CoefTable[c(3:nrow(sum_Theil_fe_ind_t_lag$CoefTable)), c(1, 2, 4)])

# Table for Margins
united_table_spatial_panels_Margins <- 
cbind(sum_Margins_fe_ind_t_lag$CoefTable[c(3:nrow(sum_Margins_fe_ind_t_lag$CoefTable)), c(1, 2, 4)])

# matrixes into dataframe
united_table_spatial_panels_Theil <- as.data.frame(united_table_spatial_panels_Theil)

united_table_spatial_panels_Margins <- as.data.frame(united_table_spatial_panels_Margins)

# renaming
names(united_table_spatial_panels_Theil) <- c('Est_Theil', 'SE_Theil', 'Pvalue_Theil')

names(united_table_spatial_panels_Margins) <- c('Est_Margins', 'SE_Margins', 'Pvalue_Margins')

united_table_spatial_panels_Theil
united_table_spatial_panels_Margins

Table

ti_Theil <- data.frame(
  term = c('Div', 'Edu', 'Pop', 'Inv', 'FDI', 'log(Imp)'),
  estimate = united_table_spatial_panels_Theil$Est_Theil,
  std.error = united_table_spatial_panels_Theil$SE_Theil,
  p.value = united_table_spatial_panels_Theil$Pvalue_Theil)

ti_Margins <- data.frame(
  term = c('EM', 'IM', 'Edu', 'Pop', 'Inv', 'FDI', 'log(Imp)'),
  estimate = united_table_spatial_panels_Margins$Est_Margins,
  std.error = united_table_spatial_panels_Margins$SE_Margins,
  p.value = united_table_spatial_panels_Margins$Pvalue_Margins)

gl <- data.frame(
  Observations = "546",
  Model = "SARAR")

mod_Theil <- list(tidy = ti_Theil, glance = gl)
mod_Margins <- list(tidy = ti_Margins, glance = gl)

# Model class
class(mod_Theil) <- "modelsummary_list"
class(mod_Margins) <- "modelsummary_list"


# created named list
models <- list()
models[['log(GRP) (1)']] <- mod_Theil
models[['log(GRP) (2)']] <- mod_Margins

# Model
modelsummary(models, stars = T, title = 'Model with fixed and time effects, considering spatial weights', fmt = 3, coef_map = c('Div', 'EM', 'IM', 'Edu', 'Pop', 'Inv', 'FDI', 'log(Imp)'))
Model with fixed and time effects, considering spatial weights
log(GRP) (1)  log(GRP) (2)
Div 0.011*
(0.006)
EM 0.049***
(0.010)
IM 0.002***
(0.000)
Edu 0.000+ 0.000+
(0.000) (0.000)
Pop 2.972* 2.274+
(1.248) (1.250)
Inv -0.209*** -0.184***
(0.050) (0.051)
FDI -0.142** -0.132**
(0.046) (0.046)
log(Imp) 0.009 0.009
(0.007) (0.007)
Observations 546 546
Model SARAR SARAR
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
#output = 'latex'
# modelsummary(mod)

SAME CODE, BUT FOR ROBUST DATASET (NO MSK, SPB)

Spatial data, pleliminary cleaning, robust

Spatial data, robust

Panel_Ru_robust_shape = readOGR(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Geo/Data_geo/Rus_regions_panel_robust.shp')
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Kolya\Documents\New_studies\Diploma\Geo\Data_geo\Rus_regions_panel_robust.shp", layer: "Rus_regions_panel_robust"
## with 76 features
## It has 106 fields
## Integer64 fields read as strings:  Edu_2014 Edu_2015 Edu_2016 Edu_2017 Edu_2018 Edu_2019 Edu_2020
Panel_Ru_robust <- Panel_Ru_robust_shape@data
Panel_Ru_robust

Data Transformation, robust

# to numeric
Panel_Ru_robust_shape@data[, c(-1)] <-  Panel_Ru_robust_shape@data[, c(-1)] %>% mutate_if(is.character, as.numeric)
# Panel_Ru_robust_shape@data %>% glimpse() #everything is character


### LONG vs.WIDE format
long_spatial_data_robust <- Panel_Ru_robust_shape@data %>% pivot_longer(cols = !region,
                                            names_to = c(".value", "year"),
                                          names_pattern = "([A-Za-z]+)_(\\d+)")

pdim(long_spatial_data_robust) # we have balanced panel
## Balanced Panel: n = 76, T = 7, N = 532

Weight matrix, robust

#projection
proj4string(Panel_Ru_robust_shape) <- CRS("+init=epsg:4326")

#listw
map_crd_robust <- coordinates(Panel_Ru_robust_shape)
      Points_robust <- SpatialPoints(map_crd_robust)
      Panel_Ru_robust_shape.knn_8 <- knearneigh(Points_robust, k=8)
      K8_nb_robust <- knn2nb(Panel_Ru_robust_shape.knn_8)
      Ru_weight_sq_robust <- nb2listw(K8_nb_robust, style="W")

Models, robust

Theil, two-ways spatial FE, robust

Theil_fe_robust_ind_t_lag <- spml(formula = Theil, data = long_spatial_data_robust, index = NULL, listw = Ru_weight_sq_robust,
lag = TRUE, spatial.error = "kkp", model = "within", effect = "twoways", method = "eigen", na.action = na.fail )
sum_Theil_robust_fe_robust_ind_t_lag <- summary(Theil_fe_robust_ind_t_lag)
sum_Theil_robust_fe_robust_ind_t_lag$CoefTable[c(3:nrow(sum_Theil_robust_fe_robust_ind_t_lag$CoefTable)), c(1, 2, 4)]
##               Estimate   Std. Error     Pr(>|t|)
## Div       0.0082331920 0.0061684476 1.819658e-01
## Edu      -0.0003378878 0.0001704089 4.738905e-02
## Pop       2.9960752591 1.3304736398 2.432946e-02
## Inv      -0.2283204402 0.0520548782 1.153752e-05
## FDI      -0.1522890505 0.0475395152 1.358056e-03
## log(Imp)  0.0108270320 0.0069758888 1.206467e-01

Margins, two-ways spatial FE, robust

Margins_fe_robust_ind_t_lag <- spml(formula = Margins, data = long_spatial_data_robust, index = NULL, listw = Ru_weight_sq_robust,
lag = TRUE, spatial.error= "kkp", model = "within", effect = "twoways", method = "eigen", na.action = na.fail )
sum_Margins_robust_fe_robust_ind_t_lag <- summary(Margins_fe_robust_ind_t_lag)
sum_Margins_robust_fe_robust_ind_t_lag$CoefTable[c(3:nrow(sum_Margins_robust_fe_robust_ind_t_lag$CoefTable)), c(1, 2, 4)]
##               Estimate   Std. Error     Pr(>|t|)
## EM        0.0531103680 0.0103212028 2.664487e-07
## IM        0.0021014189 0.0004745664 9.507469e-06
## Edu      -0.0004778364 0.0001566676 2.288410e-03
## Pop       2.9955924379 1.3304845300 2.435358e-02
## Inv      -0.1795212371 0.0507094739 3.998371e-04
## FDI      -0.1186037127 0.0461372319 1.015013e-02
## log(Imp)  0.0065456276 0.0068836824 3.416600e-01

United table creating, robust

# Table for Theil
united_table_spatial_panels_Theil_robust <- cbind(sum_Theil_robust_fe_robust_ind_t_lag$CoefTable[c(3:nrow(sum_Theil_robust_fe_robust_ind_t_lag$CoefTable)), c(1, 2, 4)])

# Table for Margins
united_table_spatial_panels_Margins_robust <- 
cbind(sum_Margins_robust_fe_robust_ind_t_lag$CoefTable[c(3:nrow(sum_Margins_robust_fe_robust_ind_t_lag$CoefTable)), c(1, 2, 4)])

# matrixes into dataframe
united_table_spatial_panels_Theil_robust <- as.data.frame(united_table_spatial_panels_Theil_robust)

united_table_spatial_panels_Margins_robust <- as.data.frame(united_table_spatial_panels_Margins_robust)

# renaming
names(united_table_spatial_panels_Theil_robust) <- c('Est_Theil_robust', 'SE_Theil_robust', 'Pvalue_Theil_robust')

names(united_table_spatial_panels_Margins_robust) <- c('Est_Margins_robust', 'SE_Margins_robust', 'Pvalue_Margins_robust')

united_table_spatial_panels_Theil_robust
united_table_spatial_panels_Margins_robust

Table, robust

ti_Theil_robust <- data.frame(
  term = c('Div', 'Edu', 'Pop', 'Inv', 'FDI', 'log(Imp)'),
  estimate = united_table_spatial_panels_Theil_robust$Est_Theil_robust,
  std.error = united_table_spatial_panels_Theil_robust$SE_Theil_robust,
  p.value = united_table_spatial_panels_Theil_robust$Pvalue_Theil_robust)

ti_Margins_robust <- data.frame(
  term = c('EM', 'IM', 'Edu', 'Pop', 'Inv', 'FDI', 'log(Imp)'),
  estimate = united_table_spatial_panels_Margins_robust$Est_Margins_robust,
  std.error = united_table_spatial_panels_Margins_robust$SE_Margins_robust,
  p.value = united_table_spatial_panels_Margins_robust$Pvalue_Margins_robust)

gl <- data.frame(
  Observations = "532",
  Model = "SARAR")

mod_Theil_robust <- list(tidy = ti_Theil_robust, glance = gl)
mod_Margins_robust <- list(tidy = ti_Margins_robust, glance = gl)

# Model class
class(mod_Theil_robust) <- "modelsummary_list"
class(mod_Margins_robust) <- "modelsummary_list"


# created named list
models <- list()
models[['log(GRP) (1)']] <- mod_Theil_robust
models[['log(GRP) (2)']] <- mod_Margins_robust

# Model
modelsummary(models, stars = T, title = 'Model with fixed and time effects, considering spatial weights, robust', fmt = 3, coef_map = c('Div', 'EM', 'IM', 'Edu', 'Pop', 'Inv', 'FDI', 'log(Imp)'))
Model with fixed and time effects, considering spatial weights, robust
log(GRP) (1)  log(GRP) (2)
Div 0.008
(0.006)
EM 0.053***
(0.010)
IM 0.002***
(0.000)
Edu 0.000* 0.000**
(0.000) (0.000)
Pop 2.996* 2.996*
(1.330) (1.330)
Inv -0.228*** -0.180***
(0.052) (0.051)
FDI -0.152** -0.119*
(0.048) (0.046)
log(Imp) 0.011 0.007
(0.007) (0.007)
Observations 532 532
Model SARAR SARAR
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
#output = 'latex'
# modelsummary(mod)

Final Table

# created named list
models <- list()
models[['log(GRP) (1)']] <- mod_Theil
models[['log(GRP) (2)']] <- mod_Theil_robust
models[['log(GRP) (3)']] <- mod_Margins
models[['log(GRP) (4)']] <- mod_Margins_robust

# Model
modelsummary(models, stars = T, title = 'Model with fixed and time effects, considering spatial weights, robust', fmt = 4, coef_map = c('Div', 'EM', 'IM', 'Edu', 'Pop', 'Inv', 'FDI', 'log(Imp)'))
Model with fixed and time effects, considering spatial weights, robust
log(GRP) (1)  log(GRP) (2)  log(GRP) (3)  log(GRP) (4)
Div 0.0111* 0.0082
(0.0056) (0.0062)
EM 0.0489*** 0.0531***
(0.0104) (0.0103)
IM 0.0019*** 0.0021***
(0.0005) (0.0005)
Edu -0.0003+ -0.0003* -0.0003+ -0.0005**
(0.0002) (0.0002) (0.0002) (0.0002)
Pop 2.9719* 2.9961* 2.2737+ 2.9956*
(1.2480) (1.3305) (1.2498) (1.3305)
Inv -0.2092*** -0.2283*** -0.1836*** -0.1795***
(0.0502) (0.0521) (0.0509) (0.0507)
FDI -0.1422** -0.1523** -0.1315** -0.1186*
(0.0456) (0.0475) (0.0462) (0.0461)
log(Imp) 0.0093 0.0108 0.0091 0.0065
(0.0069) (0.0070) (0.0069) (0.0069)
Observations 546 532 546 532
Model SARAR SARAR SARAR SARAR
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
#output = 'latex'
# modelsummary(mod)

Direct/indirect effects

The total impact takes into account the direct and indirect effects of a variable, while the coefficient estimate only measures the direct effect. Thus, direct effects should coincide with estimated coefficients, which they do (to some extent due to the differences in methods of calculations) in the following analysis.

Direct/indirect effects calculation

# Calculate direct and indirect effects
Theil_fe_ind_t_lag_effects <- summary(impacts(Theil_fe_ind_t_lag, listw=Ru_weight_sq,
                                      time = 7 ), short = TRUE)

Margins_fe_ind_t_lag_effects <- summary(impacts(Margins_fe_ind_t_lag, listw=Ru_weight_sq,
                                        time = 7), short = TRUE)

Theil_fe_robust_ind_t_lag_effects <- summary(impacts(Theil_fe_robust_ind_t_lag,
                                                     listw=Ru_weight_sq_robust,
                                                     time = 7), short = TRUE)

Margins_fe_robust_ind_t_lag_effects <- summary(impacts(Margins_fe_robust_ind_t_lag,  
                                                       listw=Ru_weight_sq_robust,
                                                       time = 7), short = TRUE)

summary(Margins_fe_robust_ind_t_lag)
## Spatial panel fixed effects sarar model
##  
## 
## Call:
## spml(formula = Margins, data = long_spatial_data_robust, index = NULL, 
##     listw = Ru_weight_sq_robust, na.action = na.fail, model = "within", 
##     effect = "twoways", lag = TRUE, spatial.error = "kkp", method = "eigen")
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -0.1734163 -0.0316115  0.0012378  0.0295382  0.3097877 
## 
## Spatial error parameter:
##     Estimate Std. Error t-value Pr(>|t|)  
## rho -0.47418    0.26591 -1.7832  0.07455 .
## 
## Spatial autoregressive coefficient:
##        Estimate Std. Error t-value Pr(>|t|)   
## lambda  0.45077    0.14037  3.2113 0.001322 **
## 
## Coefficients:
##             Estimate  Std. Error t-value  Pr(>|t|)    
## EM        0.05311037  0.01032120  5.1458 2.664e-07 ***
## IM        0.00210142  0.00047457  4.4281 9.507e-06 ***
## Edu      -0.00047784  0.00015667 -3.0500 0.0022884 ** 
## Pop       2.99559244  1.33048453  2.2515 0.0243536 *  
## Inv      -0.17952124  0.05070947 -3.5402 0.0003998 ***
## FDI      -0.11860371  0.04613723 -2.5707 0.0101501 *  
## log(Imp)  0.00654563  0.00688368  0.9509 0.3416600    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print('now Direct/indirect effects: =================================')
## [1] "now Direct/indirect effects: ================================="
Margins_fe_robust_ind_t_lag_effects
## Impact measures (lag, trace):
##                 Direct      Indirect         Total
## EM        0.0546921995  0.0420081828  0.0967003824
## IM        0.0021640072  0.0016621385  0.0038261457
## Edu      -0.0004920682 -0.0003779495 -0.0008700177
## Pop       3.0848127339  2.3693941448  5.4542068787
## Inv      -0.1848680719 -0.1419941387 -0.3268622106
## FDI      -0.1221361887 -0.0938108065 -0.2159469951
## log(Imp)  0.0067405817  0.0051773304  0.0119179121
## ========================================================
## Simulation results ( variance matrix):

Direct/indirect effects, Theil table

Theil_table <- data.frame('Direct' = Theil_fe_ind_t_lag_effects$res$direct,
                          'Indirect' = Theil_fe_ind_t_lag_effects$res$indirect,
                          'Total' = Theil_fe_ind_t_lag_effects$res$total,
                          'Direct_r' = Theil_fe_robust_ind_t_lag_effects$res$direct,
                          'Indirect_r' = Theil_fe_robust_ind_t_lag_effects$res$indirect,
                          'Total_r' = Theil_fe_robust_ind_t_lag_effects$res$total,
                          row.names = c('Div', 'Edu', 'Pop', 'Inv', 'FDI', 'log(Imp)'))

xtable(Theil_table, include.rownames = FALSE,
       digits = 4, caption = 'Direct and indirect effects for the Theil specification',
       type = "latex")

Direct/indirect effects, Margins table

Margins_table <- data.frame('Direct' = Margins_fe_ind_t_lag_effects$res$direct,
                            'Indirect' = Margins_fe_ind_t_lag_effects$res$indirect,
                            'Total' = Margins_fe_ind_t_lag_effects$res$total,
                            'Direct_r' = Margins_fe_robust_ind_t_lag_effects$res$direct,
                            'Indirect_r' = Margins_fe_robust_ind_t_lag_effects$res$indirect,
                            'Total_r' = Margins_fe_robust_ind_t_lag_effects$res$total,
                            row.names =   c('EM', 'IM', 'Edu', 'Pop', 'Inv', 'FDI', 'log(Imp)'))
xtable(Margins_table, digits = 4, caption = 'Direct and indirect effects for the Margins specification',
       type = "latex")