Links to sources:

https://cran.r-project.org/web/packages/SDPDmod/vignettes/spatial_model.html

https://rdrr.io/github/RozetaSimonovska/SDPDmod/man/SDPDm.html

The following research is conducted by Maria R. Koldasheva and Nikolai A. Popov.

Libraries

library(dplyr) 
library(psych)
library(readxl)
library(writexl)
library(kableExtra)

# Econometrics
library(tidyverse)
library(plm) # panel models
library(sandwich) #covariance matrixes
library(lmtest) # tests
library(xtable)# latex tables
library(stargazer) # latex regression tables
library(ggpubr) # correlation test

# geo
library(sp)
library(spdep)
library(rgdal)
library(maptools)
library(rgeos)
library(sf)
library(spatialreg)
library(modelsummary)
# mass retype()
library(hablar)


# Spartial dynamic models

# install.packages("SDPDmod") # better to do it manually through the R interface

library(SDPDmod)

Downloading the data

# Downloading
long_data <- read_csv('C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data.csv',
                      show_col_types = FALSE)

long_data

Spatial data, pleliminary cleaning

Spatial data

Panel_Ru_shape = readOGR(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Geo/Data_geo/Rus_regions_panel.shp')
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Kolya\Documents\New_studies\Diploma\Geo\Data_geo\Rus_regions_panel.shp", layer: "Rus_regions_panel"
## with 78 features
## It has 106 fields
## Integer64 fields read as strings:  Edu_2014 Edu_2015 Edu_2016 Edu_2017 Edu_2018 Edu_2019 Edu_2020
Panel_Ru <- Panel_Ru_shape@data

# obtain list of regions (in an appropriate order)
region_order = Panel_Ru$region

# # sorting by 'region'
# Panel_Ru <- Panel_Ru[
#   order(Panel_Ru[,1] ),
# ]
# 
# # reseting index
# rownames(Panel_Ru) <- 1:nrow(Panel_Ru)

Data Transformation

# to numeric
Panel_Ru_shape@data[, c(-1)] <-  Panel_Ru_shape@data[, c(-1)] %>% mutate_if(is.character, as.numeric)
# Panel_Ru_shape@data %>% glimpse() #everything is character


### LONG vs.WIDE format
long_spatial_data <- Panel_Ru_shape@data %>% pivot_longer(cols = !region,
                                            names_to = c(".value", "year"),
                                          names_pattern = "([A-Za-z]+)_(\\d+)")

pdim(long_spatial_data) # we have balanced panel
## Balanced Panel: n = 78, T = 7, N = 546

Weight matrix

# projection
proj4string(Panel_Ru_shape) <- CRS("+init=epsg:4326")

# listw
map_crd <- coordinates(Panel_Ru_shape)
      Points <- SpatialPoints(map_crd)
      Panel_Ru_shape.knn_8 <- knearneigh(Points, k=8)
      K8_nb <- knn2nb(Panel_Ru_shape.knn_8)
      # Ru_weight_sq <- nb2listw(K8_nb, style="W")
      
# creating binary contiguity matrix

## creating a neighbor matrix
near_neighbor <- apply(matrix(K8_nb),1,unlist) %>% t()

## binary contiguity matrix 
nr <- nrow(near_neighbor)
ru78 <- matrix(0, nr, nr)
for(i in 1:nr) ru78[i, unlist(near_neighbor[i, ])] <- 1

# rename by regions' names
rownames(ru78) = region_order
colnames(ru78) = region_order

# sort by regions' names
ru78 <- ru78[order(rownames(ru78)), order(colnames(ru78))]


# row normalized matrix
W <- rownor(ru78)
isrownor(W)
## [1] TRUE

Model construction

Specifications, Panel models

Theil index specification

Theil <- log(GRP) ~ lag(log(GRP)) + lag(Div) + Edu + Pop + lag(Inv) + lag(FDI) + log(Imp)

EM and IM specification

Margins <- log(GRP) ~ lag(log(GRP)) + lag(EM) + lag(IM) + Edu + Pop + lag(Inv) +lag(FDI)+ log(Imp)

# Theil index specification
Theil_d <- GRP ~ DivL + Edu + Pop + InvL + FDIL + Imp

# EM and IM specification
Margins_d <- GRP ~ EML + IML + Edu + Pop + InvL + FDIL + Imp

Manually making logarythms for “SDPDm”

# new dataset
long_data_log = data.frame(long_data)

# Check if memory addresses are the same
tracemem(long_data_log) == tracemem(long_data)
# FALSE expected

# logarythm of aprropriate columns
long_data_log$GRP=log(long_data_log$GRP)
long_data_log$Imp=log(long_data_log$Imp)
long_data_log$GRPL = log(long_data_log$GRPL)

Model setting

Chosing the correct model specification, Theil

Uniform prior check, Theil
Uniform_Theil<-blmpSDPD(formula = Theil_d, W = W,
               data = long_data_log,
               index = c("region","year"),
               model = list("sar","sdm","sem","sdem","slx", "ols"), 
               effect = "twoways",
               ldet = "mc",
               dynamic = TRUE,
               prior = "uniform")
Uniform_Theil$probs

We have two available models in SDPDm package: SAR and SDM. As could be observed, SAR model is preferable (has much higher posterior probability). We also don’t consider OLS - already built those models.

Beta prior check, Theil
d_Theil <- plm::pdata.frame(long_data_log, index=c('region', 'year'))
data_Theil<-d_Theil[which(!is.na(d_Theil$GRPL)),]
rownames(data_Theil)<-1:nrow(data_Theil)
kk_Theil<-which(colnames(data_Theil)=="GRPL")

Beta_Theil<- blmpSDPD(formula = Theil_d,
                   W = W,
                   data = data_Theil,
                   index = c("region","year"),
                   model = list("sar","sdm","sem","sdem", "slx", "ols"),
                   effect = "twoways",
                   ldet = "mc",
                   dynamic = TRUE,
                   tlaginfo = list(ind = kk_Theil),
                   prior = "beta")
Beta_Theil$probs

SAR is still better than sdm (but now is also is preferred to OLS).

Estimation: Maximum Likelihood

Theil

# Theil
Theil_sp_d <- SDPDm(formula = Theil_d,
            data = long_data_log,
            W = W,
            index = c("region","year"),
            model = "sar", 
            effect = "twoways",
            LYtrans = T,
            dynamic = T,
            tlaginfo = list(ind = NULL, tl = T, stl = T))
            # ind - time lag
            # tl -  lag of the dependent variable in time
            # stl - lag of the dependent variable in space and time
# Theil_sp_d_sum <- summary(Theil_sp_d)
summary(Theil_sp_d)
## sar dynamic panel model with twoways fixed effects
## 
## Call:
## SDPDm(formula = Theil_d, data = long_data_log, W = W, index = c("region", 
##     "year"), model = "sar", effect = "twoways", dynamic = T, 
##     tlaginfo = list(ind = NULL, tl = T, stl = T), LYtrans = T)
## 
## Spatial autoregressive coefficient:
##     Estimate Std. Error t-value Pr(>|t|)
## rho  1.59927    0.24767  6.4574   0.6703
## 
## Coefficients:
##              Estimate Std. Error t-value  Pr(>|t|)    
## GRP(t-1)    0.9408778  0.4929482  1.9087 1.026e-09 ***
## W*GRP(t-1) -5.5132907  3.5022173 -1.5742  0.949493    
## DivL       -0.0398740  0.0821683 -0.4853  0.230421    
## Edu        -0.0027873  0.0028698 -0.9713  0.278472    
## Pop        -1.5993951  3.2540153 -0.4915  0.432883    
## InvL        0.5646791  0.1184326  4.7679  0.008096 ** 
## FDIL        0.0245537  0.2111381  0.1163  0.133299    
## Imp        -0.0238194  0.0176943 -1.3462  0.080043 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Theil_sp_d$pval
##     GRP(t-1)   W*GRP(t-1)         DivL          Edu          Pop         InvL 
## 1.026423e-09 9.494929e-01 2.304209e-01 2.784716e-01 4.328831e-01 8.095622e-03 
##         FDIL          Imp 
## 1.332988e-01 8.004273e-02
Theil_imp  <- impactsSDPDm(Theil_sp_d)
Theil_imp
## $DIRECTst.tab
##          Estimate  Std. Error     t-value  Pr(>|t|)
## DivL -0.005864201 0.344446586 -0.01702499 0.9864167
## Edu  -0.002799822 0.009558269 -0.29292140 0.7695822
## Pop  -2.046827715 9.780409564 -0.20927832 0.8342310
## InvL  0.794644622 4.555137560  0.17445019 0.8615117
## FDIL  0.124385387 1.226286189  0.10143259 0.9192071
## Imp  -0.041346576 0.308954061 -0.13382759 0.8935389
## 
## $INDIRECTst.tab
##          Estimate   Std. Error     t-value  Pr(>|t|)
## DivL  0.061867749   1.88096037  0.03289157 0.9737611
## Edu   0.007309787   0.04918542  0.14861694 0.8818559
## Pop   2.788092569 108.72780618  0.02564287 0.9795422
## InvL -1.913159477   9.19504030 -0.20806428 0.8351788
## FDIL -0.153810983   6.50407069 -0.02364842 0.9811331
## Imp   0.075119595   0.82231727  0.09135111 0.9272136
## 
## $TOTALst.tab
##          Estimate   Std. Error      t-value  Pr(>|t|)
## DivL  0.056003548   1.87478014  0.029872061 0.9761691
## Edu   0.004509965   0.04897069  0.092095183 0.9266224
## Pop   0.741264854 110.25688540  0.006723071 0.9946358
## InvL -1.118514854   8.06521200 -0.138683875 0.8897000
## FDIL -0.029425597   6.48757221 -0.004535687 0.9963811
## Imp   0.033773018   0.77331454  0.043673068 0.9651650
## 
## $DIRECTlt.tab
##          Estimate  Std. Error     t-value  Pr(>|t|)
## DivL  0.014314824 0.172632883  0.08292061 0.9339147
## Edu   0.001914659 0.008907293  0.21495414 0.8298031
## Pop   2.160957944 8.337112330  0.25919741 0.7954829
## InvL -0.529024303 1.902531229 -0.27806340 0.7809637
## FDIL -0.068962820 0.394950286 -0.17461139 0.8613850
## Imp   0.025142851 0.095451678  0.26340921 0.7922352
## 
## $INDIRECTlt.tab
##           Estimate   Std. Error     t-value  Pr(>|t|)
## DivL   0.045464849 1.073903e+00  0.04233608 0.9662308
## Edu   -0.002678341 8.929376e-03 -0.29994720 0.7642174
## Pop  -12.324345246 1.445155e+02 -0.08528044 0.9320384
## InvL   1.874791414 1.803604e+01  0.10394696 0.9172114
## FDIL   0.425476932 5.614631e+00  0.07578003 0.9395941
## Imp   -0.104374464 1.124872e+00 -0.09278783 0.9260721
## 
## $TOTALlt.tab
##           Estimate   Std. Error     t-value  Pr(>|t|)
## DivL  5.977967e-02 1.063367e+00  0.05621735 0.9551687
## Edu  -7.636817e-04 7.178920e-04 -1.06378363 0.2874267
## Pop  -1.016339e+01 1.449591e+02 -0.07011211 0.9441044
## InvL  1.345767e+00 1.803824e+01  0.07460634 0.9405279
## FDIL  3.565141e-01 5.631810e+00  0.06330364 0.9495247
## Imp  -7.923161e-02 1.127027e+00 -0.07030142 0.9439538
## 
## attr(,"class")
## [1] "impactsSDPDm"

Note:

  1. LYtrans = T

The Lee-Yu (LY) transformation is a commonly used technique in spatial econometrics to reduce spatial correlation in panel data models. It involves taking the first-differences of each variable in the model and dividing by the square root of the spatial weight matrix. This transformation converts the original spatial panel data model into a non-spatial one, which allows the use of standard panel data estimation techniques. After estimation, the transformed coefficients can be converted back to the original scale by multiplying by the square root of the weight matrix. The LY transformation was introduced by Lee and Yu (2010) as an improvement over earlier transformations that were found to have certain drawbacks.

Note, that p-value in summary is calculated wrongly (for coefficients. It’s correct for direct/indirect effects.) ## How to calculate P-value

# from INDIRECTlt.tab, EML
# Create two vectors of data

R = -0.03427117
SE = 0.23572770
# calculate t-value

t_value <- R / SE

# calculate p-value
p_value <- 2 * pt(abs(t_value), df = Inf, lower.tail = FALSE)
p_value
## [1] 0.8844072

Chosing the correct model specification, Margins

Uniform prior check, Margins
Uniform_Margins<-blmpSDPD(formula = Margins_d, W = W,
               data = long_data_log,
               index = c("region","year"),
               model = list("sar","sdm","sem","sdem", "slx", "ols"), 
               effect = "twoways",
               ldet = "mc",
               dynamic = TRUE,
               prior = "uniform")
Uniform_Margins$probs

We have two available models in SDPDm package: SAR and SDM. As could be observed, SAR model is preferable (has much higher posterior probability). We also don’t consider OLS - already built those models.

Beta prior check, Margins
d_Margins <- plm::pdata.frame(long_data_log, index=c('region', 'year'))
data_Margins<-d_Margins[which(!is.na(d_Margins$GRPL)),]
rownames(data_Margins)<-1:nrow(data_Margins)
Margins_kk<-which(colnames(data_Margins)=="GRPL")

Beta_Margins <- blmpSDPD(formula = Margins_d,
                   W = W,
                   data = data_Margins,
                   index = c("region","year"),
                   model = list("sar","sdm","sem","sdem", "slx", "ols"),
                   effect = "twoways",
                   ldet = "mc",
                   dynamic = TRUE,
                   tlaginfo = list(ind = Margins_kk),
                   prior = "beta")
Beta_Margins$probs

SAR is still better than sdm (but now is also is preferred to OLS).

Margins

# Margins
Margins_sp_d <- SDPDm(formula = Margins_d,
            data = long_data_log,
            W = W,
            index = c("region","year"),
            model = "sar", 
            effect = "twoways",
            LYtrans = T,
            dynamic = T,
            tlaginfo = list(ind = NULL, tl = T, stl = T))

# Margins_sp_d_sum <- summary(Margins_sp_d) # R cannot create an object of summary in memory - possible bug
summary(Margins_sp_d)
## sar dynamic panel model with twoways fixed effects
## 
## Call:
## SDPDm(formula = Margins_d, data = long_data_log, W = W, index = c("region", 
##     "year"), model = "sar", effect = "twoways", dynamic = T, 
##     tlaginfo = list(ind = NULL, tl = T, stl = T), LYtrans = T)
## 
## Spatial autoregressive coefficient:
##     Estimate Std. Error t-value Pr(>|t|)
## rho  1.40442    0.27742  5.0624   0.6464
## 
## Coefficients:
##               Estimate  Std. Error t-value  Pr(>|t|)    
## GRP(t-1)    0.90231030  0.08496440 10.6199 2.106e-09 ***
## W*GRP(t-1) -4.58803187  1.99865954 -2.2956  0.951840    
## EML        -0.02371364  0.04208516 -0.5635  0.875046    
## IML        -0.00016973  0.01315530 -0.0129  0.294919    
## Edu        -0.00251783  0.00443859 -0.5673  0.278256    
## Pop        -0.85613353 17.21270080 -0.0497  0.736005    
## InvL        0.51618026  0.37568230  1.3740  0.005603 ** 
## FDIL        0.02544955  0.96360420  0.0264  0.182600    
## Imp        -0.01880556  0.05915189 -0.3179  0.159860    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Margins_imp  <- impactsSDPDm(Margins_sp_d)
Margins_imp
## $DIRECTst.tab
##           Estimate  Std. Error      t-value  Pr(>|t|)
## EML  -0.0168084387  0.11436540 -0.146971366 0.8831546
## IML  -0.0007936049  0.03463639 -0.022912460 0.9817201
## Edu  -0.0013647468  0.01220589 -0.111810516 0.9109736
## Pop   0.4378514924 44.72664408  0.009789500 0.9921892
## InvL  0.1918081687  2.34594912  0.081761436 0.9348364
## FDIL -0.0146835194  2.55263209 -0.005752305 0.9954103
## Imp  -0.0101149896  0.13783584 -0.073384320 0.9415003
## 
## $INDIRECTst.tab
##          Estimate   Std. Error     t-value  Pr(>|t|)
## EML   -0.23214445    3.8781591 -0.05985944 0.9522676
## IML   -0.10851840    1.3569592 -0.07997175 0.9362597
## Edu   -0.01541001    0.2822159 -0.05460363 0.9564542
## Pop  141.47543768 1716.4416174  0.08242368 0.9343098
## InvL  -7.38662708   73.8950503 -0.09996105 0.9203752
## FDIL  -8.37244319  102.7367850 -0.08149411 0.9350490
## Imp   -0.30846687    4.4264850 -0.06968664 0.9444431
## 
## $TOTALst.tab
##          Estimate   Std. Error     t-value  Pr(>|t|)
## EML   -0.24895288    3.9191448 -0.06352225 0.9493506
## IML   -0.10931200    1.3719456 -0.07967663 0.9364944
## Edu   -0.01677476    0.2850474 -0.05884902 0.9530724
## Pop  141.91328918 1735.5094149  0.08177039 0.9348293
## InvL  -7.19481892   74.7205853 -0.09628965 0.9232905
## FDIL  -8.38712671  103.8768982 -0.08074102 0.9356479
## Imp   -0.31858186    4.4736495 -0.07121297 0.9432283
## 
## $DIRECTlt.tab
##          Estimate   Std. Error     t-value  Pr(>|t|)
## EML   0.072566682   0.71878926  0.10095683 0.9195847
## IML   0.006140787   0.17406070  0.03527957 0.9718568
## Edu   0.006256252   0.08068115  0.07754292 0.9381916
## Pop  -5.796595035 206.98585238 -0.02800479 0.9776583
## InvL -0.742204238   3.38160074 -0.21948311 0.8262737
## FDIL  0.271585908  12.55255797  0.02163590 0.9827384
## Imp   0.057856390   0.90170866  0.06416306 0.9488404
## 
## $INDIRECTlt.tab
##          Estimate   Std. Error     t-value  Pr(>|t|)
## EML  -0.091362614   0.68459991 -0.13345403 0.8938343
## IML  -0.009858123   0.16809856 -0.05864490 0.9532349
## Edu  -0.008627980   0.07854308 -0.10985030 0.9125281
## Pop   9.248606714 202.16191033  0.04574851 0.9635107
## InvL  0.913566698   3.34207700  0.27335298 0.7845819
## FDIL -0.522877640  12.06036820 -0.04335503 0.9654185
## Imp  -0.081123722   0.87344171 -0.09287823 0.9260003
## 
## $TOTALlt.tab
##          Estimate  Std. Error     t-value  Pr(>|t|)
## EML  -0.018795932  0.19440787 -0.09668298 0.9229782
## IML  -0.003717336  0.05550461 -0.06697346 0.9466028
## Edu  -0.002371728  0.02593049 -0.09146485 0.9271232
## Pop   3.452011680 63.64533851  0.05423825 0.9567453
## InvL  0.171362460  0.23920645  0.71637894 0.4737574
## FDIL -0.251291733  4.01075353 -0.06265449 0.9500416
## Imp  -0.023267332  0.28654919 -0.08119839 0.9352842
## 
## attr(,"class")
## [1] "impactsSDPDm"

United table creating

# Table for Theil
united_table_spatial_dynamic_panels_Theil <-
cbind(Theil_sp_d$coefficients1, Theil_sp_d$std1, 2 * pt(abs(Theil_sp_d$coefficients1 / Theil_sp_d$std1), df = Inf, lower.tail = FALSE))

# Table for Margins
united_table_spatial_dynamic_panels_Margins <- 
cbind(Margins_sp_d$coefficients1, Margins_sp_d$std1, 2 * pt(abs(Margins_sp_d$coefficients1 / Margins_sp_d$std1), df = Inf, lower.tail = FALSE))

# matrixes into dataframe
united_table_spatial_dynamic_panels_Theil <- as.data.frame(united_table_spatial_dynamic_panels_Theil)
united_table_spatial_dynamic_panels_Margins <- as.data.frame(united_table_spatial_dynamic_panels_Margins)

# renaming
names(united_table_spatial_dynamic_panels_Theil) <- c('Est_Theil', 'SE_Theil', 'Pvalue_Theil')

names(united_table_spatial_dynamic_panels_Margins) <- c('Est_Margins', 'SE_Margins', 'Pvalue_Margins')

united_table_spatial_dynamic_panels_Theil
united_table_spatial_dynamic_panels_Margins

Table

library(modelsummary)
ti_Theil <- data.frame(
  term = c('L1.GRP', 'L1.Div', 'L0.Edu', 'L0.Pop', 'L1.Inv', 'L1.FDI', 'L0.Imp'),
  estimate = united_table_spatial_dynamic_panels_Theil$Est_Theil[c(1, 3:8)],
  std.error = united_table_spatial_dynamic_panels_Theil$SE_Theil[c(1, 3:8)],
  p.value = united_table_spatial_dynamic_panels_Theil$Pvalue_Theil[c(1, 3:8)])

ti_Margins <- data.frame(
  term = c('L1.GRP', 'L1.EM', 'L1.IM', 'L0.Edu', 'L0.Pop', 'L1.Inv', 'L1.FDI', 'L0.Imp'),
  estimate = united_table_spatial_dynamic_panels_Margins$Est_Margins[c(1, 3:9)],
  std.error = united_table_spatial_dynamic_panels_Margins$SE_Margins[c(1, 3:9)],
  p.value = united_table_spatial_dynamic_panels_Margins$Pvalue_Margins[c(1, 3:9)])

gl <- data.frame(
  Observations = "546",
  Model = "dynamic SAR")

mod_Theil <- list(tidy = ti_Theil, glance = gl)
mod_Margins <- list(tidy = ti_Margins, glance = gl)

# Model class
class(mod_Theil) <- "modelsummary_list"
class(mod_Margins) <- "modelsummary_list"


# created named list
models <- list()
models[['log(GRP) (1)']] <- mod_Theil
models[['log(GRP) (2)']] <- mod_Margins

# Model
modelsummary(models, stars = T, title = 'Dynamic model', fmt = 4,  coef_map = c('L1.GRP', 'L1.Div',
                                                                               'L1.EM', 'L1.IM',
                                                                               'L0.Edu', 'L0.Pop',
                                                                               'L1.Inv','L1.FDI',
                                                                               'L0.Imp'))
Dynamic model
log(GRP) (1)  log(GRP) (2)
L1.GRP 0.9409+ 0.9023***
(0.4929) (0.0850)
L1.Div -0.0399
(0.0822)
L1.EM -0.0237
(0.0421)
L1.IM -0.0002
(0.0132)
L0.Edu -0.0028 -0.0025
(0.0029) (0.0044)
L0.Pop -1.5994 -0.8561
(3.2540) (17.2127)
L1.Inv 0.5647*** 0.5162
(0.1184) (0.3757)
L1.FDI 0.0246 0.0254
(0.2111) (0.9636)
L0.Imp -0.0238 -0.0188
(0.0177) (0.0592)
Observations 546 546
Model dynamic SAR dynamic SAR
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
#output = 'latex'
# modelsummary(mod)

SAME CODE, BUT FOR ROBUST DATASET (NO MSK, SPB)

Downloading the data, robust

# Downloading
long_data_robust <- read_csv('C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data_robust.csv',
                      show_col_types = FALSE)

long_data_robust

Spatial data, pleliminary cleaning, robust

Spatial data, robust

Panel_Ru_shape_robust= readOGR(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Geo/Data_geo/Rus_regions_panel_robust.shp')
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Kolya\Documents\New_studies\Diploma\Geo\Data_geo\Rus_regions_panel_robust.shp", layer: "Rus_regions_panel_robust"
## with 76 features
## It has 106 fields
## Integer64 fields read as strings:  Edu_2014 Edu_2015 Edu_2016 Edu_2017 Edu_2018 Edu_2019 Edu_2020
Panel_Ru_robust <- Panel_Ru_shape_robust@data

# obtain list of regions (in an appropriate order)
region_order_robust = Panel_Ru_robust$region

# # sorting by 'region'
# Panel_Ru_robust <- Panel_Ru_robust[
#   order(Panel_Ru_robust[,1] ),
# ]
# 
# # reseting index
# rownames(Panel_Ru_robust) <- 1:nrow(Panel_Ru_robust)

Data Transformation, robust

# to numeric
Panel_Ru_shape_robust@data[, c(-1)] <-  Panel_Ru_shape_robust@data[, c(-1)] %>% mutate_if(is.character, as.numeric)
# Panel_Ru_shape_robust@data %>% glimpse() #everything is character


### LONG vs.WIDE format
long_spatial_data <- Panel_Ru_shape_robust@data %>% pivot_longer(cols = !region,
                                            names_to = c(".value", "year"),
                                          names_pattern = "([A-Za-z]+)_(\\d+)")

pdim(long_spatial_data) # we have balanced panel
## Balanced Panel: n = 76, T = 7, N = 532

Weight matrix, robust

# projection
proj4string(Panel_Ru_shape_robust) <- CRS("+init=epsg:4326")

# listw
map_crd_robust <- coordinates(Panel_Ru_shape_robust)
      Points_robust <- SpatialPoints(map_crd_robust)
      Panel_Ru_shape_robust.knn_8 <- knearneigh(Points_robust, k=8)
      K8_nb_robust <- knn2nb(Panel_Ru_shape_robust.knn_8)
      # Ru_weight_sq <- nb2listw(K8_nb_robust, style="W")
      
# creating binary contiguity matrix

## creating a neighbor matrix
near_neighbor_robust <- apply(matrix(K8_nb_robust),1,unlist) %>% t()

## binary contiguity matrix 
nr_robust <- nrow(near_neighbor_robust)
ru76 <- matrix(0, nr_robust, nr_robust)
for(i in 1:nr_robust) ru76[i, unlist(near_neighbor_robust[i, ])] <- 1

# rename by regions' names
rownames(ru76) = region_order_robust
colnames(ru76) = region_order_robust

# sort by regions' names
ru76 <- ru76[order(rownames(ru76)), order(colnames(ru76))]


# row normalized matrix
W_robust <- rownor(ru76)
isrownor(W_robust)
## [1] TRUE

Model construction, robust

Specifications, Panel models

Theil_robust index specification

Theil_robust <- log(GRP) ~ lag(log(GRP)) + lag(Div) + Edu + Pop + lag(Inv) + lag(FDI) + log(Imp)

EM and IM specification

Margins_robust <- log(GRP) ~ lag(log(GRP)) + lag(EM) + lag(IM) + Edu + Pop + lag(Inv) + lag(FDI) + log(Imp)

# Theil_robust index specification
Theil_robust_d <- GRP ~ DivL + Edu + Pop + InvL + FDIL + Imp

# EM and IM specification
Margins_robust_d <- GRP ~ EML + IML + Edu + Pop + InvL + FDIL + Imp

Manually making logarythms for “SDPDm”, robust

# new dataset
long_data_robust_log = data.frame(long_data_robust)

# Check if memory addresses are the same
tracemem(long_data_robust_log) == tracemem(long_data_robust)
# FALSE expected

# logarythm of aprropriate columns
long_data_robust_log$GRP=log(long_data_robust_log$GRP)
long_data_robust_log$Imp=log(long_data_robust_log$Imp)
long_data_robust_log$GRPL = log(long_data_robust_log$GRPL)

Model setting, robust

Chosing the correct model specification, Theil_robust

Uniform prior check, Theil_robust
Uniform_Theil_robust<-blmpSDPD(formula = Theil_robust_d, W = W_robust,
               data = long_data_robust_log,
               index = c("region","year"),
               model = list("sar","sdm","sem","sdem","slx", "ols"), 
               effect = "twoways",
               ldet = "mc",
               dynamic = TRUE,
               prior = "uniform")
Uniform_Theil_robust$probs

We have two available models in SDPDm package: SAR and SDM. As could be observed, SAR model is preferable (has much higher posterior probability). We also don’t consider OLS - already built those models.

Beta prior check, Theil_robust
d_Theil_robust <- plm::pdata.frame(long_data_robust_log, index=c('region', 'year'))
data_Theil_robust<-d_Theil_robust[which(!is.na(d_Theil_robust$GRPL)),]
rownames(data_Theil_robust)<-1:nrow(data_Theil_robust)
kk_Theil_robust<-which(colnames(data_Theil_robust)=="GRPL")

Beta_Theil_robust<- blmpSDPD(formula = Theil_robust_d,
                   W = W_robust,
                   data = data_Theil_robust,
                   index = c("region","year"),
                   model = list("sar","sdm","sem","sdem", "slx", "ols"),
                   effect = "twoways",
                   ldet = "mc",
                   dynamic = TRUE,
                   tlaginfo = list(ind = kk_Theil_robust),
                   prior = "beta")
Beta_Theil_robust$probs

SAR is still better than sdm (but now is also is preferred to OLS).

Estimation: Maximum Likelihood

Theil_robust, robust

# Theil_robust
Theil_robust_sp_d <- SDPDm(formula = Theil_robust_d,
            data = long_data_robust_log,
            W = W_robust,
            index = c("region","year"),
            model = "sar", 
            effect = "twoways",
            LYtrans = T,
            dynamic = T,
            tlaginfo = list(ind = NULL, tl = T, stl = T))
            # ind - time lag
            # tl -  lag of the dependent variable in time
            # stl - lag of the dependent variable in space and time
# Theil_robust_sp_d_sum <- summary(Theil_robust_sp_d)
summary(Theil_robust_sp_d)
## sar dynamic panel model with twoways fixed effects
## 
## Call:
## SDPDm(formula = Theil_robust_d, data = long_data_robust_log, 
##     W = W_robust, index = c("region", "year"), model = "sar", 
##     effect = "twoways", dynamic = T, tlaginfo = list(ind = NULL, 
##         tl = T, stl = T), LYtrans = T)
## 
## Spatial autoregressive coefficient:
##     Estimate Std. Error t-value Pr(>|t|)
## rho  1.77313    0.25881  6.8509    0.666
## 
## Coefficients:
##              Estimate Std. Error t-value  Pr(>|t|)    
## GRP(t-1)    0.9761236  0.3837269  2.5438 4.158e-09 ***
## W*GRP(t-1) -6.2366666  2.6812469 -2.3260   0.94763    
## DivL       -0.0577035  0.0343543 -1.6797   0.36425    
## Edu        -0.0034835  0.0013625 -2.5567   0.34064    
## Pop        -1.4083464  2.3655443 -0.5954   0.49481    
## InvL        0.5984444  0.0674463  8.8729   0.00848 ** 
## FDIL        0.0518528  0.1816712  0.2854   0.14368    
## Imp        -0.0246365  0.0229654 -1.0728   0.08277 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Theil_robust_imp  <- impactsSDPDm(Theil_robust_sp_d)
Theil_robust_imp
## $DIRECTst.tab
##         Estimate  Std. Error     t-value  Pr(>|t|)
## DivL -0.03336917 0.100349365 -0.33253000 0.7394891
## Edu  -0.00227069 0.005339849 -0.42523491 0.6706654
## Pop  -0.41336378 5.647176206 -0.07319831 0.9416483
## InvL  0.38489620 1.000298687  0.38478127 0.7003995
## FDIL  0.04118726 0.349579694  0.11781936 0.9062108
## Imp  -0.01685995 0.043476549 -0.38779410 0.6981684
## 
## $INDIRECTst.tab
##         Estimate  Std. Error    t-value  Pr(>|t|)
## DivL  0.09571550 0.112767222  0.8487883 0.3959991
## Edu   0.00666858 0.005466862  1.2198186 0.2225336
## Pop   3.53177256 8.247459290  0.4282255 0.6684869
## InvL -1.31002171 1.090516991 -1.2012850 0.2296407
## FDIL -0.26442675 0.625811066 -0.4225345 0.6726349
## Imp   0.03826349 0.063872777  0.5990579 0.5491342
## 
## $TOTALst.tab
##          Estimate   Std. Error    t-value     Pr(>|t|)
## DivL  0.062346327 0.0428944745  1.4534815 1.460901e-01
## Edu   0.004397889 0.0007203303  6.1053790 1.025570e-09
## Pop   3.118408773 5.8107038589  0.5366663 5.914982e-01
## InvL -0.925125517 0.4966191797 -1.8628469 6.248380e-02
## FDIL -0.223239493 0.4958002138 -0.4502610 6.525223e-01
## Imp   0.021403545 0.0379516296  0.5639691 5.727752e-01
## 
## $DIRECTlt.tab
##          Estimate  Std. Error     t-value  Pr(>|t|)
## DivL  0.039981556 0.092491995  0.43227045 0.6655449
## Edu   0.002431381 0.005998077  0.40536017 0.6852128
## Pop   1.376389082 4.832013854  0.28484792 0.7757607
## InvL -0.415892815 1.588428405 -0.26182660 0.7934551
## FDIL -0.040799015 0.579120924 -0.07044991 0.9438356
## Imp   0.019060697 0.024780497  0.76918137 0.4417856
## 
## $INDIRECTlt.tab
##          Estimate  Std. Error    t-value  Pr(>|t|)
## DivL -0.055374434 0.097658160 -0.5670231 0.5706985
## Edu  -0.003435197 0.006459117 -0.5318369 0.5948389
## Pop  -1.954972456 9.858534433 -0.1983025 0.8428084
## InvL  0.584546261 2.022773188  0.2889826 0.7725947
## FDIL  0.073434732 0.816314272  0.0899589 0.9283199
## Imp  -0.022010872 0.034836320 -0.6318369 0.5274935
## 
## $TOTALlt.tab
##          Estimate  Std. Error     t-value  Pr(>|t|)
## DivL -0.015392877 0.033867128 -0.45450791 0.6494633
## Edu  -0.001003815 0.002570401 -0.39052873 0.6961456
## Pop  -0.578583374 8.625794985 -0.06707595 0.9465212
## InvL  0.168653446 1.317705833  0.12799021 0.8981567
## FDIL  0.032635718 0.605345461  0.05391255 0.9570048
## Imp  -0.002950175 0.025052713 -0.11775870 0.9062589
## 
## attr(,"class")
## [1] "impactsSDPDm"

Note:

  1. LYtrans = T

The Lee-Yu (LY) transformation is a commonly used technique in spatial econometrics to reduce spatial correlation in panel data models. It involves taking the first-differences of each variable in the model and dividing by the square root of the spatial weight matrix. This transformation converts the original spatial panel data model into a non-spatial one, which allows the use of standard panel data estimation techniques. After estimation, the transformed coefficients can be converted back to the original scale by multiplying by the square root of the weight matrix. The LY transformation was introduced by Lee and Yu (2010) as an improvement over earlier transformations that were found to have certain drawbacks.

Chosing the correct model specification, Margins_robust

Uniform prior check, Margins_robust
Uniform_Margins_robust<-blmpSDPD(formula = Margins_robust_d, W = W_robust,
               data = long_data_robust_log,
               index = c("region","year"),
               model = list("sar","sdm","sem","sdem", "slx", "ols"), 
               effect = "twoways",
               ldet = "mc",
               dynamic = TRUE,
               prior = "uniform")
Uniform_Margins_robust$probs

We have two available models in SDPDm package: SAR and SDM. As could be observed, SAR model is preferable (has much higher posterior probability). We also don’t consider OLS - already built those models.

Beta prior check, Margins_robust
d_Margins_robust <- plm::pdata.frame(long_data_robust_log, index=c('region', 'year'))
data_Margins_robust<-d_Margins_robust[which(!is.na(d_Margins_robust$GRPL)),]
rownames(data_Margins_robust)<-1:nrow(data_Margins_robust)
Margins_robust_kk<-which(colnames(data_Margins_robust)=="GRPL")

Beta_Margins_robust <- blmpSDPD(formula = Margins_robust_d,
                   W = W_robust,
                   data = data_Margins_robust,
                   index = c("region","year"),
                   model = list("sar","sdm","sem","sdem", "slx", "ols"),
                   effect = "twoways",
                   ldet = "mc",
                   dynamic = TRUE,
                   tlaginfo = list(ind = Margins_robust_kk),
                   prior = "beta")
Beta_Margins_robust$probs

SAR is still better than sdm.

Margins_robust

# Margins_robust
Margins_robust_sp_d <- SDPDm(formula = Margins_robust_d,
            data = long_data_robust_log,
            W = W_robust,
            index = c("region","year"),
            model = "sar", 
            effect = "twoways",
            LYtrans = T,
            dynamic = T,
            tlaginfo = list(ind = NULL, tl = T, stl = T))

# Margins_robust_sp_d_sum <- summary(Margins_robust_sp_d) # R cannot create an object of summary in memory - possible bug
summary(Margins_robust_sp_d)
## sar dynamic panel model with twoways fixed effects
## 
## Call:
## SDPDm(formula = Margins_robust_d, data = long_data_robust_log, 
##     W = W_robust, index = c("region", "year"), model = "sar", 
##     effect = "twoways", dynamic = T, tlaginfo = list(ind = NULL, 
##         tl = T, stl = T), LYtrans = T)
## 
## Spatial autoregressive coefficient:
##     Estimate Std. Error t-value Pr(>|t|)
## rho   1.4104     1.0728  1.3146   0.6454
## 
## Coefficients:
##               Estimate  Std. Error t-value  Pr(>|t|)    
## GRP(t-1)    9.1304e-01  2.5865e+00  0.3530 5.672e-09 ***
## W*GRP(t-1) -4.5947e+00  3.9250e+01 -0.1171  0.956940    
## EML        -2.5488e-02  7.7602e-01 -0.0328  0.875278    
## IML        -2.3608e-04  1.8579e-01 -0.0013  0.297684    
## Edu        -2.7386e-03  7.3256e-02 -0.0374  0.355136    
## Pop        -1.0832e+00  2.8272e+02 -0.0038  0.622939    
## InvL        5.2931e-01  5.8678e+00  0.0902  0.006753 ** 
## FDIL        3.7936e-02  1.0394e+01  0.0036  0.187965    
## Imp        -1.9070e-02  5.9128e-01 -0.0323  0.164158    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Margins_robust_imp  <- impactsSDPDm(Margins_robust_sp_d)
Margins_robust_imp
## $DIRECTst.tab
##          Estimate  Std. Error      t-value  Pr(>|t|)
## EML  -0.014774300   1.2483461 -0.011835099 0.9905572
## IML   0.001078605   0.2953480  0.003651979 0.9970861
## Edu  -0.001710791   0.1185327 -0.014433071 0.9884845
## Pop  -2.768438745 448.6778573 -0.006170215 0.9950769
## InvL  0.474626754   8.9696844  0.052914544 0.9578000
## FDIL  0.113437521  16.4745205  0.006885634 0.9945061
## Imp  -0.010248137   0.9543980 -0.010737801 0.9914326
## 
## $INDIRECTst.tab
##           Estimate   Std. Error    t-value  Pr(>|t|)
## EML     0.85006430    3.4712344  0.2448882 0.8065430
## IML     0.19918820    0.8816461  0.2259276 0.8212577
## Edu     0.07918362    0.3159955  0.2505847 0.8021352
## Pop  -301.24123201 1352.0828077 -0.2227979 0.8236928
## InvL    6.02757703   33.9300552  0.1776471 0.8590001
## FDIL   11.16765728   50.7067082  0.2202402 0.8256841
## Imp     0.63977665    2.5860924  0.2473913 0.8046054
## 
## $TOTALst.tab
##           Estimate   Std. Error    t-value  Pr(>|t|)
## EML     0.83529000    3.3624624  0.2484162 0.8038124
## IML     0.20026680    0.8613905  0.2324925 0.8161555
## Edu     0.07747283    0.3043199  0.2545770 0.7990499
## Pop  -304.00967075 1322.3766356 -0.2298964 0.8181722
## InvL    6.50220379   33.8127163  0.1923005 0.8475068
## FDIL   11.28109480   49.6811641  0.2270699 0.8203694
## Imp     0.62952851    2.4982695  0.2519858 0.8010520
## 
## $DIRECTlt.tab
##          Estimate  Std. Error    t-value    Pr(>|t|)
## EML  -0.092087334  0.12916252 -0.7129571 0.475872294
## IML  -0.022856294  0.02545296 -0.8979817 0.369195280
## Edu  -0.008620655  0.01265590 -0.6811570 0.495772138
## Pop  35.054717027 37.82688159  0.9267144 0.354074823
## InvL -0.810053050  0.27301701 -2.9670424 0.003006795
## FDIL -1.287450984  1.38007345 -0.9328858 0.350878914
## Imp  -0.070136729  0.09896999 -0.7086666 0.478531380
## 
## $INDIRECTlt.tab
##           Estimate  Std. Error    t-value     Pr(>|t|)
## EML    0.103342110  0.17110689  0.6039623 0.5458687119
## IML    0.025890580  0.03345251  0.7739503 0.4389601813
## Edu    0.009612677  0.01729316  0.5558657 0.5783026345
## Pop  -39.873240821 48.31947504 -0.8252002 0.4092579357
## InvL   0.951324435  0.28023332  3.3947585 0.0006868917
## FDIL   1.466597191  1.74294080  0.8414498 0.4000959955
## Imp    0.078751443  0.13068300  0.6026143 0.5467653013
## 
## $TOTALlt.tab
##           Estimate   Std. Error    t-value  Pr(>|t|)
## EML   0.0112547761  0.066936315  0.1681416 0.8664719
## IML   0.0030342864  0.013065619  0.2322344 0.8163559
## Edu   0.0009920218  0.007168145  0.1383931 0.8899297
## Pop  -4.8185237943 17.681089914 -0.2725241 0.7852190
## InvL  0.1412713851  0.060050685  2.3525358 0.0186459
## FDIL  0.1791462066  0.633984439  0.2825719 0.7775050
## Imp   0.0086147141  0.051879027  0.1660539 0.8681145
## 
## attr(,"class")
## [1] "impactsSDPDm"

United table creating, robust

# Table for Theil_robust
united_table_spatial_dynamic_panels_Theil_robust <-
cbind(Theil_robust_sp_d$coefficients1, Theil_robust_sp_d$std1, 2 * pt(abs(Theil_robust_sp_d$coefficients1 / Theil_robust_sp_d$std1), df = Inf, lower.tail = FALSE))

# Table for Margins_robust
united_table_spatial_dynamic_panels_Margins_robust <- 
cbind(Margins_robust_sp_d$coefficients1, Margins_robust_sp_d$std1, 2 * pt(abs(Margins_robust_sp_d$coefficients1 / Margins_robust_sp_d$std1), df = Inf, lower.tail = FALSE))

# matrixes into dataframe
united_table_spatial_dynamic_panels_Theil_robust <- as.data.frame(united_table_spatial_dynamic_panels_Theil_robust)
united_table_spatial_dynamic_panels_Margins_robust <- as.data.frame(united_table_spatial_dynamic_panels_Margins_robust)

# renaming
names(united_table_spatial_dynamic_panels_Theil_robust) <- c('Est_Theil_robust', 'SE_Theil_robust', 'Pvalue_Theil_robust')

names(united_table_spatial_dynamic_panels_Margins_robust) <- c('Est_Margins_robust', 'SE_Margins_robust', 'Pvalue_Margins_robust')

united_table_spatial_dynamic_panels_Theil_robust
united_table_spatial_dynamic_panels_Margins_robust

Table, robust

library(modelsummary)
ti_Theil_robust <- data.frame(
  term = c('L1.GRP', 'L1.Div', 'L0.Edu', 'L0.Pop', 'L1.Inv', 'L1.FDI', 'L0.Imp'),
  estimate = united_table_spatial_dynamic_panels_Theil_robust$Est_Theil_robust[c(1, 3:8)],
  std.error = united_table_spatial_dynamic_panels_Theil_robust$SE_Theil_robust[c(1, 3:8)],
  p.value = united_table_spatial_dynamic_panels_Theil_robust$Pvalue_Theil_robust[c(1, 3:8)])

ti_Margins_robust <- data.frame(
  term = c('L1.GRP', 'L1.EM', 'L1.IM', 'L0.Edu', 'L0.Pop', 'L1.Inv', 'L1.FDI', 'L0.Imp'),
  estimate = united_table_spatial_dynamic_panels_Margins_robust$Est_Margins_robust[c(1, 3:9)],
  std.error = united_table_spatial_dynamic_panels_Margins_robust$SE_Margins_robust[c(1, 3:9)],
  p.value = united_table_spatial_dynamic_panels_Margins_robust$Pvalue_Margins_robust[c(1, 3:9)])

gl <- data.frame(
  Observations = "532",
  Model = "dynamic SAR")

mod_Theil_robust <- list(tidy = ti_Theil_robust, glance = gl)
mod_Margins_robust <- list(tidy = ti_Margins_robust, glance = gl)

# Model class
class(mod_Theil_robust) <- "modelsummary_list"
class(mod_Margins_robust) <- "modelsummary_list"


# created named list
models <- list()
models[['log(GRP) (1)']] <- mod_Theil_robust
models[['log(GRP) (2)']] <- mod_Margins_robust

# Model
modelsummary(models, stars = T, title = 'Dynamic model', fmt = 4,  coef_map = c('L1.GRP', 'L1.Div',
                                                                               'L1.EM', 'L1.IM',
                                                                               'L0.Edu', 'L0.Pop',
                                                                               'L1.Inv','L1.FDI',
                                                                               'L0.Imp'))
Dynamic model
log(GRP) (1)  log(GRP) (2)
L1.GRP 0.9761* 0.9130
(0.3837) (2.5865)
L1.Div -0.0577+
(0.0344)
L1.EM -0.0255
(0.7760)
L1.IM -0.0002
(0.1858)
L0.Edu -0.0035* -0.0027
(0.0014) (0.0733)
L0.Pop -1.4083 -1.0832
(2.3655) (282.7196)
L1.Inv 0.5984*** 0.5293
(0.0674) (5.8678)
L1.FDI 0.0519 0.0379
(0.1817) (10.3941)
L0.Imp -0.0246 -0.0191
(0.0230) (0.5913)
Observations 532 532
Model dynamic SAR dynamic SAR
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
#output = 'latex'
# modelsummary(mod)

Final table

# created named list
models <- list()
models[['log(GRP) (1)']] <- mod_Theil
models[['log(GRP) (2)']] <- mod_Theil_robust
models[['log(GRP) (3)']] <- mod_Margins
models[['log(GRP) (4)']] <- mod_Margins_robust
# Model
modelsummary(models, stars = T, title = 'Spatial dynamic models', fmt = 4, coef_map = c('L1.GRP', 'L1.Div',
                                                                               'L1.EM', 'L1.IM',
                                                                               'L0.Edu', 'L0.Pop',
                                                                               'L1.Inv','L1.FDI',
                                                                               'L0.Imp'))
Spatial dynamic models
log(GRP) (1)  log(GRP) (2)  log(GRP) (3)  log(GRP) (4)
L1.GRP 0.9409+ 0.9761* 0.9023*** 0.9130
(0.4929) (0.3837) (0.0850) (2.5865)
L1.Div -0.0399 -0.0577+
(0.0822) (0.0344)
L1.EM -0.0237 -0.0255
(0.0421) (0.7760)
L1.IM -0.0002 -0.0002
(0.0132) (0.1858)
L0.Edu -0.0028 -0.0035* -0.0025 -0.0027
(0.0029) (0.0014) (0.0044) (0.0733)
L0.Pop -1.5994 -1.4083 -0.8561 -1.0832
(3.2540) (2.3655) (17.2127) (282.7196)
L1.Inv 0.5647*** 0.5984*** 0.5162 0.5293
(0.1184) (0.0674) (0.3757) (5.8678)
L1.FDI 0.0246 0.0519 0.0254 0.0379
(0.2111) (0.1817) (0.9636) (10.3941)
L0.Imp -0.0238 -0.0246 -0.0188 -0.0191
(0.0177) (0.0230) (0.0592) (0.5913)
Observations 546 532 546 532
Model dynamic SAR dynamic SAR dynamic SAR dynamic SAR
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001