The following research is conducted by Maria R. Koldasheva and Nikolai A. Popov.

Libraries

library(dplyr) 
library(psych)
library(readxl)
library(writexl)
library(kableExtra)

# Econometrics
library(tidyverse)
library(plm) # panel models
library(sandwich) #covariance matrixes
library(lmtest) # tests
library(xtable)# latex tables
library(stargazer) # latex regression tables
library(ggpubr) # correlation test

Downloading the data

# Downloading
wide_data <- read_csv('C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data.csv',
                      show_col_types = FALSE)

long_data <- read_csv('C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data.csv',
                      show_col_types = FALSE)

map_data <- read_csv(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_map.csv',
  show_col_types = FALSE)

# Exporting them back to fix QGIS coding issues
write_xlsx(wide_data,
           'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_geo.xlsx')

# write_xlsx(long_data,
#            'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data_geo.xlsx')

# Observing the tables
wide_data
long_data

Descriptive statistics

# For all the variables
Desc_stat <- long_data %>% psych::describe()

# For all variables
Index_stat <- Desc_stat[c(3:11), c(2, 3:4, (8:9))]

# row.names(Index_stat) <- c('Theil index', 'Extensive Margin', 'Intensive Margin')
xtable(Index_stat, caption = 'Descriptive statistics', digits = 3, type = "latex")
# For dependent/independent variables
Index_stat_important <- Desc_stat[c(3:6), c(2, 3:4, (8:9))]

# row.names(Index_stat) <- c('Theil index', 'Extensive Margin', 'Intensive Margin')
xtable(Index_stat_important, caption = 'Descriptive statistics', digits = 3, type = "latex")

Correlation matrix

res1 <- cor(long_data[, c(3:(length(long_data)-6))])
res1
##             GRP        Div           EM          IM        Imp         Edu
## GRP  1.00000000  0.3146169 -0.038722996  0.64378658  0.3743528  0.02979357
## Div  0.31461686  1.0000000 -0.628686905  0.15837051  0.8189165  0.41657305
## EM  -0.03872300 -0.6286869  1.000000000 -0.05498696 -0.3377338 -0.30802795
## IM   0.64378658  0.1583705 -0.054986955  1.00000000  0.1138910 -0.12929391
## Imp  0.37435280  0.8189165 -0.337733790  0.11389101  1.0000000  0.39000887
## Edu  0.02979357  0.4165731 -0.308027954 -0.12929391  0.3900089  1.00000000
## Pop  0.14156322  0.2564669  0.009496447  0.06912775  0.2728343  0.05786492
## Inv  0.04349540 -0.2353995  0.321429672  0.05782095 -0.1519467 -0.14601628
## FDI  0.44006908  0.2610529 -0.093738498  0.28381835  0.3739655 -0.12666111
##             Pop         Inv         FDI
## GRP 0.141563225  0.04349540  0.44006908
## Div 0.256466894 -0.23539946  0.26105288
## EM  0.009496447  0.32142967 -0.09373850
## IM  0.069127748  0.05782095  0.28381835
## Imp 0.272834331 -0.15194671  0.37396553
## Edu 0.057864921 -0.14601628 -0.12666111
## Pop 1.000000000  0.15650166  0.15337992
## Inv 0.156501660  1.00000000  0.06486492
## FDI 0.153379922  0.06486492  1.00000000
library(ggcorrplot)

#visualize correlation matrix
ggcorrplot(cor(long_data[, c(3:(length(long_data)-6))]))

Create a dataset for 2018

2018 is taken as the last economically stable year available in our data.

get_year <- function(df, year){
  df %>%
    select(contains(c('region', year)))
}


Data_2018 <- get_year(wide_data, '2018')
Map_2018 <- get_year(map_data, '2018')

# Exporting this file
write_xlsx(Data_2018,
           'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Data_2018_geo.xlsx')

write_xlsx(Map_2018,
           'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Map_2018_geo.xlsx')

Specifications

For non-dynamic models we use the simplified specification without lags.

Cross-sectional specifications

# Theil index specification
Theil_2018 <- log(GRP_2018) ~ Div_2018 +  Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)

# WM and IM specification
Margins_2018 <- log(GRP_2018) ~ EM_2018 + IM_2018 + Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)

Panel specifications

# Theil index specification
Theil <- log(GRP) ~ Div + Edu + Pop + Inv + FDI + log(Imp)

# EM and IM specification
Margins <- log(GRP) ~ EM + IM + Edu + Pop + Inv + FDI + log(Imp)

Cross-sectional models

# Theil index specification
reg_Theil_2018 <- lm(Theil_2018, data = Data_2018)

vcov2_Theil_2018 <- vcovHC(reg_Theil_2018, type = "HC0")
se2imp_Theil_2018 <- sqrt(diag(vcov2_Theil_2018))

# WM and IM specification
reg_Margins_2018 <- lm(Margins_2018, data = Data_2018)

vcov2_Margins_2018 <- vcovHC(reg_Margins_2018, type = "HC0")
se2imp_Margins_2018 <- sqrt(diag(vcov2_Margins_2018))

# Table
stargazer(reg_Theil_2018, reg_Margins_2018, se = list(se2imp_Theil_2018, se2imp_Margins_2018), type = 'text', omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##               ----------------------------
##                      log(GRP_2018)        
##                    (1)            (2)     
## ------------------------------------------
## Div_2018          -0.015                  
##                  (0.026)                  
##                                           
## EM_2018                        0.219***   
##                                 (0.060)   
##                                           
## IM_2018                        0.018***   
##                                 (0.006)   
##                                           
## Edu_2018          0.0003        0.001*    
##                  (0.001)       (0.0004)   
##                                           
## Pop_2018          -6.854       -12.886*   
##                  (9.529)        (7.409)   
##                                           
## Inv_2018          0.513         -0.123    
##                  (0.596)        (0.387)   
##                                           
## FDI_2018         2.006**         0.753    
##                  (0.890)        (0.593)   
##                                           
## log(Imp_2018)    0.186***      0.279***   
##                  (0.046)        (0.059)   
##                                           
## ------------------------------------------
## Observations        78            78      
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01

Panel models

Dataset for 2015-2019

We exclude 2014 and 2020 as crisis years, to check whether our results are robust.

long_data <- subset(long_data, year %in% c(2015:2019))

Theil specification

Pooled OLS Theil

ols_plm_Theil <- plm(Theil, 
               data = long_data, 
               index = c("region", "year"),
               model = "pooling")
cov_ols_plm_Theil <- vcovHC(ols_plm_Theil, type = "HC3")
se_ols_plm_Theil <- sqrt(diag(cov_ols_plm_Theil))
coeftest(ols_plm_Theil, df = Inf, vcov = cov_ols_plm_Theil)
## 
## z test of coefficients:
## 
##                Estimate  Std. Error z value Pr(>|z|)   
## (Intercept) 17.98283073  9.00275087  1.9975  0.04577 * 
## Div          0.01197230  0.02185960  0.5477  0.58390   
## Edu          0.00012523  0.00040249  0.3111  0.75570   
## Pop         -8.76753346  8.69566924 -1.0083  0.31333   
## Inv          0.59227939  0.47134286  1.2566  0.20891   
## FDI          1.41623541  0.70332251  2.0136  0.04405 * 
## log(Imp)     0.14018974  0.04861357  2.8838  0.00393 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fixed (individual) effects Theil

fe_ind_Theil <- plm(Theil, 
              data = long_data, 
              index = c("region", "year"),
              effect = "individual",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_ind_Theil <- vcovHC(fe_ind_Theil, type = "HC3")
se_fe_ind_Theil <- sqrt(diag(cov_fe_ind_Theil))
# coeftest(fe_ind_Theil, df = Inf, vcov = cov_fe_ind_Theil)

Random Effects Theil

re_Theil <- plm(Theil, 
              data = long_data, 
              index = c("region", "year"),
              model = "random")
cov_re_Theil <- vcovHC(re_Theil, type = "HC3")
se_re_Theil <- sqrt(diag(cov_re_Theil))
#coeftest(re_Theil, df = Inf, vcov = cov_re_Theil)

Panel regressions table, Theil

stargazer(ols_plm_Theil, re_Theil, fe_ind_Theil, se = list(se_ols_plm_Theil, se_re_Theil, se_fe_ind_Theil), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##              -----------------------------
##                        log(GRP)           
##                 (1)       (2)       (3)   
## ------------------------------------------
## Div            0.012   0.055***  0.039*** 
##               (0.022)   (0.012)   (0.013) 
##                                           
## Edu           0.0001   -0.002*** -0.002***
##              (0.0004)  (0.0002)  (0.0003) 
##                                           
## Pop           -8.768   -6.121**   -5.530  
##               (8.696)   (3.058)   (3.438) 
##                                           
## Inv            0.592    -0.294    -0.286  
##               (0.471)   (0.195)   (0.186) 
##                                           
## FDI           1.416**   -0.181    -0.208  
##               (0.703)   (0.119)   (0.129) 
##                                           
## log(Imp)     0.140***   0.040*     0.027  
##               (0.049)   (0.021)   (0.018) 
##                                           
## ------------------------------------------
## Observations    390       390       390   
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE

Tests for best panel model for the Theil specification

#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Theil, ols_plm_Theil)
## 
##  F test for individual effects
## 
## data:  Theil
## F = 210.65, df1 = 77, df2 = 306, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Theil, type = c('bp'))
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  Theil
## chisq = 594.75, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
phtest(fe_ind_Theil, re_Theil)
## 
##  Hausman Test
## 
## data:  Theil
## chisq = 16.599, df = 6, p-value = 0.01087
## alternative hypothesis: one model is inconsistent
#random effects are better than fixed effects

Still, by all economic means, RE cannot be applied to our model. There should be a problem in our data (possible with the indexes), which causes biases in the Hausmann test. Thus, we will use FE.

Best panel model for the Theil specification

fe_twoways_Theil <- plm(Theil, 
              data = long_data, 
              index = c("region", "year"),
              effect = "twoways",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_twoways_Theil <- vcovHC(fe_twoways_Theil, type = "HC3")
se_fe_twoways_Theil <- sqrt(diag(cov_fe_twoways_Theil))
# coeftest(fe_twoways_Theil, df = Inf, vcov = cov_fe_twoways_Theil)

Margins specification

Pooled OLS Margins

ols_plm_Margins <- plm(Margins, 
               data = long_data, 
               index = c("region", "year"),
               model = "pooling")
cov_ols_plm_Margins <- vcovHC(ols_plm_Margins, type = "HC3")
se_ols_plm_Margins <- sqrt(diag(cov_ols_plm_Margins))
coeftest(ols_plm_Margins, df = Inf, vcov = cov_ols_plm_Margins)
## 
## z test of coefficients:
## 
##                Estimate  Std. Error z value  Pr(>|z|)    
## (Intercept)  1.7508e+01  5.7752e+00  3.0317  0.002432 ** 
## EM           1.5188e-01  5.2397e-02  2.8987  0.003747 ** 
## IM           2.1465e-02  3.9328e-03  5.4580 4.816e-08 ***
## Edu          8.9491e-04  3.0359e-04  2.9478  0.003201 ** 
## Pop         -1.0360e+01  5.9765e+00 -1.7335  0.083016 .  
## Inv         -7.6401e-02  2.6359e-01 -0.2898  0.771935    
## FDI          8.5943e-01  4.8642e-01  1.7668  0.077254 .  
## log(Imp)     1.8852e-01  4.5125e-02  4.1777 2.945e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fixed (individual) effects Margins

fe_ind_Margins <- plm(Margins, 
              data = long_data, 
              index = c("region", "year"),
              effect = "individual",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_ind_Margins <- vcovHC(fe_ind_Margins, type = "HC3")
se_fe_ind_Margins <- sqrt(diag(cov_fe_ind_Margins))
# coeftest(fe_ind_Margins, df = Inf, vcov = cov_fe_ind_Margins)

Random Effects Margins

re_Margins <- plm(Margins, 
              data = long_data, 
              index = c("region", "year"),
              model = "random")
cov_re_Margins <- vcovHC(re_Margins, type = "HC3")
se_re_Margins <- sqrt(diag(cov_re_Margins))
#coeftest(re_Margins, df = Inf, vcov = cov_re_Margins)

Panel regressions table, Margins

stargazer(ols_plm_Margins,  re_Margins, fe_ind_Margins, se = list(se_ols_plm_Margins,se_re_Margins, se_fe_ind_Margins), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##              -----------------------------
##                        log(GRP)           
##                (1)       (2)        (3)   
## ------------------------------------------
## EM           0.152***   -0.014    -0.014  
##              (0.052)   (0.034)    (0.053) 
##                                           
## IM           0.021***   0.003*     0.001  
##              (0.004)   (0.002)    (0.002) 
##                                           
## Edu          0.001*** -0.002***  -0.002***
##              (0.0003)  (0.0003)  (0.0003) 
##                                           
## Pop          -10.360* -10.194*** -7.649** 
##              (5.976)   (3.382)    (3.378) 
##                                           
## Inv           -0.076    -0.288    -0.269  
##              (0.264)   (0.204)    (0.191) 
##                                           
## FDI           0.859*    -0.124    -0.207  
##              (0.486)   (0.112)    (0.134) 
##                                           
## log(Imp)     0.189***  0.072***    0.032  
##              (0.045)   (0.025)    (0.021) 
##                                           
## ------------------------------------------
## Observations   390       390        390   
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE

Tests for best panel model for the Margins specification

#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Margins, ols_plm_Margins)
## 
##  F test for individual effects
## 
## data:  Margins
## F = 98.197, df1 = 77, df2 = 305, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Margins, type = c('bp'))
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  Margins
## chisq = 369.16, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
phtest(fe_ind_Margins, re_Margins)
## 
##  Hausman Test
## 
## data:  Margins
## chisq = 49.943, df = 7, p-value = 1.482e-08
## alternative hypothesis: one model is inconsistent
#random effects are better than fixed effects

Still, by all economic means, RE cannot be applied to our model. There should be a problem in our data (possible with the indexes), which causes biases in the Hausmann test. Thus, we will use FE.

Best panel model for the Margins specification

fe_twoways_Margins <- plm(Margins, 
              data = long_data, 
              index = c("region", "year"),
              effect = "twoways",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_twoways_Margins <- vcovHC(fe_twoways_Margins, type = "HC3")
se_fe_twoways_Margins <- sqrt(diag(cov_fe_twoways_Margins))
# coeftest(fe_twoways_Margins, df = Inf, vcov = cov_fe_twoways_Margins)

Panel models, united tables

Fixing Stargazer

# #echo = FALSE, warning = FALSE
# ## Quick fix for stargazer <= 5.2.3 is.na() issue with long model names in R >= 4.2
# # Unload stargazer if loaded
# detach("package:stargazer",unload=T)
# # Delete it
# remove.packages("stargazer")
# # Download the source
# download.file("https://cran.r-project.org/src/contrib/stargazer_5.2.3.tar.gz", destfile = "stargazer_5.2.3.tar.gz")
# # Unpack
# untar("stargazer_5.2.3.tar.gz")
# # Read the sourcefile with .inside.bracket fun
# stargazer_src <- readLines("stargazer/R/stargazer-internal.R")
# # Move the length check 5 lines up so it precedes is.na(.)
# stargazer_src[1990] <- stargazer_src[1995]
# stargazer_src[1995] <- ""
# # Save back
# writeLines(stargazer_src, con="stargazer/R/stargazer-internal.R")
# # Compile and install the patched package
# install.packages("stargazer", repos = NULL, type="source")

All models

stargazer(ols_plm_Theil, re_Theil, fe_ind_Theil, # Theil
          ols_plm_Margins, re_Margins, fe_ind_Margins, # Margins
          se = list(
            se_ols_plm_Theil, se_re_Theil, se_fe_ind_Theil, # Theil
            se_ols_plm_Margins, se_re_Margins, se_fe_ind_Margins), # Margins
          type = "text", omit = c('Constant'), keep.stat = c("n"), font.size = "tiny",
          add.lines = list(c('model', 'OLS', 'RE', 'FE', 'OLS', 'RE', 'FE')),
          title = "All panel models", digits = 4)
## 
## All panel models
## =============================================================================
##                                    Dependent variable:                       
##              ----------------------------------------------------------------
##                                          log(GRP)                            
##                 (1)       (2)        (3)        (4)        (5)        (6)    
## -----------------------------------------------------------------------------
## Div           0.0120   0.0547***  0.0387***                                  
##              (0.0219)   (0.0120)   (0.0131)                                  
##                                                                              
## EM                                           0.1519***   -0.0137    -0.0139  
##                                              (0.0524)   (0.0342)    (0.0527) 
##                                                                              
## IM                                           0.0215***   0.0026*     0.0007  
##                                              (0.0039)   (0.0016)    (0.0016) 
##                                                                              
## Edu           0.0001   -0.0019*** -0.0023*** 0.0009*** -0.0016***  -0.0023***
##              (0.0004)   (0.0002)   (0.0003)  (0.0003)   (0.0003)    (0.0003) 
##                                                                              
## Pop           -8.7675  -6.1207**   -5.5304   -10.3599* -10.1937*** -7.6495** 
##              (8.6957)   (3.0576)   (3.4379)  (5.9765)   (3.3825)    (3.3784) 
##                                                                              
## Inv           0.5923    -0.2938    -0.2857    -0.0764    -0.2882    -0.2688  
##              (0.4713)   (0.1947)   (0.1865)  (0.2636)   (0.2040)    (0.1914) 
##                                                                              
## FDI          1.4162**   -0.1807    -0.2080    0.8594*    -0.1239    -0.2070  
##              (0.7033)   (0.1193)   (0.1289)  (0.4864)   (0.1122)    (0.1344) 
##                                                                              
## log(Imp)     0.1402***  0.0399*     0.0271   0.1885***  0.0718***    0.0324  
##              (0.0486)   (0.0207)   (0.0183)  (0.0451)   (0.0247)    (0.0209) 
##                                                                              
## -----------------------------------------------------------------------------
## model           OLS        RE         FE        OLS        RE          FE    
## Observations    390       390        390        390        390        390    
## =============================================================================
## Note:                                             *p<0.1; **p<0.05; ***p<0.01
# Order: Pooled OLS, RE, FE

The best one, two-ways modifications

stargazer(fe_twoways_Theil, # Theil
          fe_twoways_Margins,# Margins
          se = list(
            se_fe_twoways_Theil,# Theil
            se_fe_twoways_Margins),# Margins
          type = "text", omit = c('Constant'), keep.stat = c("n")) #, font.size = "tiny"
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                        log(GRP)          
##                   (1)            (2)     
## -----------------------------------------
## Div              0.005                   
##                 (0.010)                  
##                                          
## EM                              0.028    
##                                (0.030)   
##                                          
## IM                              0.001    
##                                (0.001)   
##                                          
## Edu             -0.00004       -0.0001   
##                 (0.0003)      (0.0003)   
##                                          
## Pop              -0.680        -0.558    
##                 (2.062)        (2.266)   
##                                          
## Inv             -0.252*       -0.247**   
##                 (0.131)        (0.123)   
##                                          
## FDI              -0.184        -0.174*   
##                 (0.112)        (0.105)   
##                                          
## log(Imp)         0.006          0.004    
##                 (0.012)        (0.012)   
##                                          
## -----------------------------------------
## Observations      390            390     
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01

SAME CODE, BUT FOR ROBUST DATASET (NO MSK, SPB)

Downloading the data, robust

# Downloading
wide_data_robust <- read_csv(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_robust.csv',
                      show_col_types = FALSE)

long_data_robust  <- read_csv(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data_robust.csv',
                      show_col_types = FALSE)


# Exporting them back to fix QGIS coding issues
write_xlsx(wide_data_robust,
           'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_robust_geo.xlsx')
# write_xlsx(long_data_robust,
#            'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data_robust_geo.xlsx')

# Observing the tables
wide_data_robust
long_data_robust

Descriptive statistics, robust

# For all the variables
Desc_stat_robust <- long_data_robust %>% psych::describe()

# For diversification indexes
Index_stat_robust <- Desc_stat_robust[c(4:6), c(2, 3:4, (8:9))]


# row.names(Index_stat) <- c('Theil_robust index', 'Extensive Margin', 'Intensive Margin')
xtable(Index_stat_robust, caption = 'Descriptive statistics', digits = 3, type = "latex")

No extreme values of IM now.

Correlation matrix, robust

res1_robust <- cor(long_data_robust[, c(3:(length(long_data_robust)-6))])
res1_robust
##             GRP         Div          EM          IM         Imp         Edu
## GRP  1.00000000  0.12046454  0.03859512  0.65649208  0.25091214 -0.14958925
## Div  0.12046454  1.00000000 -0.70020999  0.15537158  0.63382255  0.09129487
## EM   0.03859512 -0.70020999  1.00000000 -0.04011758 -0.43768312 -0.21937034
## IM   0.65649208  0.15537158 -0.04011758  1.00000000  0.14558743 -0.19019470
## Imp  0.25091214  0.63382255 -0.43768312  0.14558743  1.00000000 -0.13265746
## Edu -0.14958925  0.09129487 -0.21937034 -0.19019470 -0.13265746  1.00000000
## Pop  0.08861862  0.16799415  0.06597524  0.05827624  0.36067681 -0.06556583
## Inv  0.09861951 -0.18139214  0.29565431  0.07015477 -0.05516198 -0.07647036
## FDI  0.38155793  0.05952865 -0.02877383  0.27741739  0.24955672 -0.32491996
##             Pop         Inv         FDI
## GRP  0.08861862  0.09861951  0.38155793
## Div  0.16799415 -0.18139214  0.05952865
## EM   0.06597524  0.29565431 -0.02877383
## IM   0.05827624  0.07015477  0.27741739
## Imp  0.36067681 -0.05516198  0.24955672
## Edu -0.06556583 -0.07647036 -0.32491996
## Pop  1.00000000  0.20059835  0.10996522
## Inv  0.20059835  1.00000000  0.11527150
## FDI  0.10996522  0.11527150  1.00000000
library(ggcorrplot)

#visualize correlation matrix
ggcorrplot(cor(long_data_robust[, c(3:(length(long_data_robust)-6))]))

Create a dataset for 2018, robust

2018 is taken as the last economically stable year available in our data.

Data_2018_robust <- get_year(wide_data_robust, '2018')

# Exporting this file
write_xlsx(Data_2018_robust,
           'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Data_2018_robust_geo.xlsx')

Cross-sectional models, robust

# Theil_robust index specification
reg_Theil_robust_2018 <- lm(Theil_2018, data = Data_2018_robust)

vcov2_Theil_robust_2018 <- vcovHC(reg_Theil_robust_2018, type = "HC0")
se2imp_Theil_robust_2018 <- sqrt(diag(vcov2_Theil_robust_2018))

# WM and IM specification
reg_Margins_robust_2018 <- lm(Margins_2018, data = Data_2018_robust)

vcov2_Margins_robust_2018 <- vcovHC(reg_Margins_robust_2018, type = "HC0")
se2imp_Margins_robust_2018 <- sqrt(diag(vcov2_Margins_robust_2018))

# Table
stargazer(reg_Theil_robust_2018, reg_Margins_robust_2018, se = list(se2imp_Theil_robust_2018, se2imp_Margins_robust_2018), type = 'text', omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##               ----------------------------
##                      log(GRP_2018)        
##                    (1)            (2)     
## ------------------------------------------
## Div_2018          -0.045                  
##                  (0.028)                  
##                                           
## EM_2018                        0.215***   
##                                 (0.059)   
##                                           
## IM_2018                        0.018***   
##                                 (0.006)   
##                                           
## Edu_2018         -0.0001         0.001    
##                  (0.001)        (0.001)   
##                                           
## Pop_2018          -6.586       -13.241*   
##                  (9.586)        (7.464)   
##                                           
## Inv_2018          0.433         -0.114    
##                  (0.582)        (0.392)   
##                                           
## FDI_2018          1.621*         0.663    
##                  (0.936)        (0.673)   
##                                           
## log(Imp_2018)    0.212***      0.274***   
##                  (0.048)        (0.059)   
##                                           
## ------------------------------------------
## Observations        76            76      
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01

Panel models, robust

Dataset for 2015-2019, robust

We exclude 2014 and 2020 as crisis years, to check whether our results are robust.

long_data_robust <- subset(long_data_robust, year %in% c(2015:2019))

Theil_robust specification

Pooled OLS Theil_robust

ols_plm_Theil_robust <- plm(Theil, 
               data = long_data_robust, 
               index = c("region", "year"),
               model = "pooling")
cov_ols_plm_Theil_robust <- vcovHC(ols_plm_Theil_robust, type = "HC3")
se_ols_plm_Theil_robust <- sqrt(diag(cov_ols_plm_Theil_robust))
coeftest(ols_plm_Theil_robust, df = Inf, vcov = cov_ols_plm_Theil_robust)
## 
## z test of coefficients:
## 
##                Estimate  Std. Error z value Pr(>|z|)   
## (Intercept)  1.7957e+01  9.3885e+00  1.9127  0.05579 . 
## Div         -8.3417e-03  3.0080e-02 -0.2773  0.78154   
## Edu         -7.6926e-05  4.9116e-04 -0.1566  0.87554   
## Pop         -8.9012e+00  9.0349e+00 -0.9852  0.32453   
## Inv          5.7384e-01  4.7525e-01  1.2075  0.22726   
## FDI          1.2809e+00  7.5464e-01  1.6974  0.08963 . 
## log(Imp)     1.5118e-01  5.5489e-02  2.7245  0.00644 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fixed (individual) effects Theil_robust

fe_ind_Theil_robust <- plm(Theil, 
              data = long_data_robust, 
              index = c("region", "year"),
              effect = "individual",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_ind_Theil_robust <- vcovHC(fe_ind_Theil_robust, type = "HC3")
se_fe_ind_Theil_robust <- sqrt(diag(cov_fe_ind_Theil_robust))
# coeftest(fe_ind_Theil_robust, df = Inf, vcov = cov_fe_ind_Theil_robust)

Random Effects Theil_robust

re_Theil_robust <- plm(Theil, 
              data = long_data_robust, 
              index = c("region", "year"),
              model = "random")
cov_re_Theil_robust <- vcovHC(re_Theil_robust, type = "HC3")
se_re_Theil_robust <- sqrt(diag(cov_re_Theil_robust))
#coeftest(re_Theil_robust, df = Inf, vcov = cov_re_Theil_robust)

Panel regressions table, Theil_robust

stargazer(ols_plm_Theil_robust, re_Theil_robust, fe_ind_Theil_robust, se = list(se_ols_plm_Theil_robust, se_re_Theil_robust, se_fe_ind_Theil_robust), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##              -----------------------------
##                        log(GRP)           
##                 (1)       (2)       (3)   
## ------------------------------------------
## Div           -0.008   0.047***  0.043*** 
##               (0.030)   (0.015)   (0.016) 
##                                           
## Edu           -0.0001  -0.002*** -0.002***
##              (0.0005)  (0.0003)  (0.0003) 
##                                           
## Pop           -8.901   -6.796**   -5.737  
##               (9.035)   (3.397)   (3.786) 
##                                           
## Inv            0.574    -0.271    -0.278  
##               (0.475)   (0.187)   (0.185) 
##                                           
## FDI           1.281*    -0.189    -0.209  
##               (0.755)   (0.120)   (0.129) 
##                                           
## log(Imp)     0.151***   0.037*     0.027  
##               (0.055)   (0.020)   (0.018) 
##                                           
## ------------------------------------------
## Observations    380       380       380   
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE

Tests for best panel model for the Theil_robust specification

#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Theil_robust, ols_plm_Theil_robust)
## 
##  F test for individual effects
## 
## data:  Theil
## F = 209.68, df1 = 75, df2 = 298, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Theil_robust, type = c('bp'))
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  Theil
## chisq = 589.81, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
phtest(fe_ind_Theil_robust, re_Theil_robust)
## 
##  Hausman Test
## 
## data:  Theil
## chisq = 9.8938, df = 6, p-value = 0.1292
## alternative hypothesis: one model is inconsistent
#random effects are better than fixed effects

Still, by all economic means, RE cannot be applied to our model. There should be a problem in our data (possible with the indexes), which causes biases in the Hausmann test. Thus, we will use FE.

Best panel model for the Theil_robust specification

fe_twoways_Theil_robust <- plm(Theil, 
              data = long_data_robust, 
              index = c("region", "year"),
              effect = "twoways",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_twoways_Theil_robust <- vcovHC(fe_twoways_Theil_robust, type = "HC3")
se_fe_twoways_Theil_robust <- sqrt(diag(cov_fe_twoways_Theil_robust))
# coeftest(fe_twoways_Theil_robust, df = Inf, vcov = cov_fe_twoways_Theil_robust)

Margins_robust specification

Pooled OLS Margins_robust

ols_plm_Margins_robust <- plm(Margins, 
               data = long_data_robust, 
               index = c("region", "year"),
               model = "pooling")
cov_ols_plm_Margins_robust <- vcovHC(ols_plm_Margins_robust, type = "HC3")
se_ols_plm_Margins_robust <- sqrt(diag(cov_ols_plm_Margins_robust))
coeftest(ols_plm_Margins_robust, df = Inf, vcov = cov_ols_plm_Margins_robust)
## 
## z test of coefficients:
## 
##                Estimate  Std. Error z value  Pr(>|z|)    
## (Intercept)  1.8555e+01  5.9706e+00  3.1078  0.001885 ** 
## EM           1.4556e-01  5.2625e-02  2.7659  0.005677 ** 
## IM           2.1621e-02  3.9949e-03  5.4120 6.231e-08 ***
## Edu          7.2378e-04  3.5462e-04  2.0410  0.041250 *  
## Pop         -1.1194e+01  6.1271e+00 -1.8270  0.067705 .  
## Inv         -2.2800e-02  2.7050e-01 -0.0843  0.932826    
## FDI          7.9119e-01  5.1032e-01  1.5504  0.121051    
## log(Imp)     1.8099e-01  4.5233e-02  4.0012 6.302e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fixed (individual) effects Margins_robust

fe_ind_Margins_robust <- plm(Margins, 
              data = long_data_robust, 
              index = c("region", "year"),
              effect = "individual",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_ind_Margins_robust <- vcovHC(fe_ind_Margins_robust, type = "HC3")
se_fe_ind_Margins_robust <- sqrt(diag(cov_fe_ind_Margins_robust))
# coeftest(fe_ind_Margins_robust, df = Inf, vcov = cov_fe_ind_Margins_robust)

Random Effects Margins_robust

re_Margins_robust <- plm(Margins, 
              data = long_data_robust, 
              index = c("region", "year"),
              model = "random")
cov_re_Margins_robust <- vcovHC(re_Margins_robust, type = "HC3")
se_re_Margins_robust <- sqrt(diag(cov_re_Margins_robust))
#coeftest(re_Margins_robust, df = Inf, vcov = cov_re_Margins_robust)

Panel regressions table, Margins_robust

stargazer(ols_plm_Margins_robust,  re_Margins_robust, fe_ind_Margins_robust, se = list(se_ols_plm_Margins_robust,se_re_Margins_robust, se_fe_ind_Margins_robust), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##              -----------------------------
##                        log(GRP)           
##                 (1)       (2)       (3)   
## ------------------------------------------
## EM           0.146***   0.0004    -0.014  
##               (0.053)   (0.033)   (0.053) 
##                                           
## IM           0.022***   0.003*     0.001  
##               (0.004)   (0.001)   (0.002) 
##                                           
## Edu           0.001**  -0.002*** -0.002***
##              (0.0004)  (0.0003)  (0.0003) 
##                                           
## Pop          -11.194*  -9.781***  -7.406* 
##               (6.127)   (3.760)   (3.861) 
##                                           
## Inv           -0.023    -0.254    -0.273  
##               (0.271)   (0.187)   (0.194) 
##                                           
## FDI            0.791    -0.136    -0.205  
##               (0.510)   (0.108)   (0.135) 
##                                           
## log(Imp)     0.181***  0.059***    0.032  
##               (0.045)   (0.021)   (0.021) 
##                                           
## ------------------------------------------
## Observations    380       380       380   
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE

Tests for best panel model for the Margins_robust specification

#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Margins_robust, ols_plm_Margins_robust)
## 
##  F test for individual effects
## 
## data:  Margins
## F = 97.955, df1 = 75, df2 = 297, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Margins_robust, type = c('bp'))
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  Margins
## chisq = 373.78, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
hausman_test <- phtest(fe_ind_Margins_robust, re_Margins_robust)
hausman_test
## 
##  Hausman Test
## 
## data:  Margins
## chisq = 63.161, df = 7, p-value = 3.519e-11
## alternative hypothesis: one model is inconsistent
#fixed effects are better then random ones

Let’s build two-ways variant of FE.

Best panel model for the Margins_robust specification

fe_twoways_Margins_robust <- plm(Margins, 
              data = long_data_robust, 
              index = c("region", "year"),
              effect = "twoways",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_twoways_Margins_robust <- vcovHC(fe_twoways_Margins_robust, type = "HC3")
se_fe_twoways_Margins_robust <- sqrt(diag(cov_fe_twoways_Margins_robust))
# coeftest(fe_twoways_Margins_robust, df = Inf, vcov = cov_fe_twoways_Margins_robust)

Panel models, united tables, robust

All models, robust

stargazer(ols_plm_Theil_robust, re_Theil_robust, fe_ind_Theil_robust, # Theil_robust
          ols_plm_Margins_robust, re_Margins_robust, fe_ind_Margins_robust, # Margins_robust
          se = list(
            se_ols_plm_Theil_robust, se_re_Theil_robust, se_fe_ind_Theil_robust, # Theil_robust
            se_ols_plm_Margins_robust, se_re_Margins_robust, se_fe_ind_Margins_robust), # Margins_robust
          type = "text", omit = c('Constant'), keep.stat = c("n"), font.size = "tiny",
          add.lines = list(c('model', 'OLS', 'RE', 'FE', 'OLS', 'RE', 'FE')),
          title = "All panel models (robust)", digits = 4)
## 
## All panel models (robust)
## ============================================================================
##                                    Dependent variable:                      
##              ---------------------------------------------------------------
##                                         log(GRP)                            
##                 (1)       (2)        (3)        (4)       (5)        (6)    
## ----------------------------------------------------------------------------
## Div           -0.0083  0.0466***  0.0429***                                 
##              (0.0301)   (0.0149)   (0.0156)                                 
##                                                                             
## EM                                           0.1456***   0.0004    -0.0140  
##                                              (0.0526)   (0.0325)   (0.0527) 
##                                                                             
## IM                                           0.0216***  0.0027*     0.0007  
##                                              (0.0040)   (0.0015)   (0.0016) 
##                                                                             
## Edu           -0.0001  -0.0020*** -0.0022*** 0.0007**  -0.0019*** -0.0023***
##              (0.0005)   (0.0003)   (0.0003)  (0.0004)   (0.0003)   (0.0003) 
##                                                                             
## Pop           -8.9012  -6.7965**   -5.7374   -11.1939* -9.7807***  -7.4057* 
##              (9.0349)   (3.3967)   (3.7861)  (6.1271)   (3.7598)   (3.8608) 
##                                                                             
## Inv           0.5738    -0.2715    -0.2784    -0.0228   -0.2542    -0.2730  
##              (0.4752)   (0.1874)   (0.1851)  (0.2705)   (0.1871)   (0.1938) 
##                                                                             
## FDI           1.2809*   -0.1890    -0.2094    0.7912    -0.1360    -0.2050  
##              (0.7546)   (0.1200)   (0.1292)  (0.5103)   (0.1078)   (0.1351) 
##                                                                             
## log(Imp)     0.1512***  0.0374*     0.0266   0.1810*** 0.0593***    0.0323  
##              (0.0555)   (0.0203)   (0.0183)  (0.0452)   (0.0210)   (0.0209) 
##                                                                             
## ----------------------------------------------------------------------------
## model           OLS        RE         FE        OLS        RE         FE    
## Observations    380       380        380        380       380        380    
## ============================================================================
## Note:                                            *p<0.1; **p<0.05; ***p<0.01
# Order: Pooled OLS, RE, FE

The best one, two-ways modifications, robust

stargazer(fe_twoways_Theil_robust, # Theil_robust
          fe_twoways_Margins_robust,# Margins_robust
          se = list(
            se_fe_twoways_Theil_robust,# Theil_robust
            se_fe_twoways_Margins_robust),# Margins_robust
          type = "text", omit = c('Constant'), keep.stat = c("n")) #, font.size = "tiny"
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                        log(GRP)          
##                   (1)            (2)     
## -----------------------------------------
## Div              0.005                   
##                 (0.013)                  
##                                          
## EM                              0.027    
##                                (0.030)   
##                                          
## IM                              0.001    
##                                (0.001)   
##                                          
## Edu             -0.00003      -0.00004   
##                 (0.0003)      (0.0003)   
##                                          
## Pop              -0.912        -0.758    
##                 (2.352)        (2.550)   
##                                          
## Inv             -0.251*       -0.247**   
##                 (0.130)        (0.124)   
##                                          
## FDI              -0.184        -0.174    
##                 (0.113)        (0.106)   
##                                          
## log(Imp)         0.006          0.004    
##                 (0.012)        (0.012)   
##                                          
## -----------------------------------------
## Observations      380            380     
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01

Final table of OLS

stargazer(reg_Theil_2018,# Theil
          reg_Theil_robust_2018, # Theil_robust
          reg_Margins_2018, # Margins
          reg_Margins_robust_2018,# Margins_robust
          
          se = list(se2imp_Theil_2018, # Theil
                    se2imp_Theil_robust_2018,# Theil_robust
                    se2imp_Margins_2018, # Margins
                    se2imp_Margins_robust_2018),# Margins_robust
          type = "text", omit = c('Constant'), keep.stat = c("n"),
          add.lines = list(c('model', 'OLS', 'OLS', 'OLS', 'OLS')),
          title = "OLS", digits = 4) #, font.size = "tiny"
## 
## OLS
## =====================================================
##                         Dependent variable:          
##               ---------------------------------------
##                            log(GRP_2018)             
##                  (1)       (2)       (3)       (4)   
## -----------------------------------------------------
## Div_2018       -0.0146   -0.0450                     
##               (0.0256)  (0.0283)                     
##                                                      
## EM_2018                           0.2188*** 0.2155***
##                                   (0.0598)  (0.0593) 
##                                                      
## IM_2018                           0.0180*** 0.0182***
##                                   (0.0062)  (0.0062) 
##                                                      
## Edu_2018       0.0003    -0.0001   0.0007*   0.0005  
##               (0.0005)  (0.0006)  (0.0004)  (0.0005) 
##                                                      
## Pop_2018       -6.8542   -6.5859  -12.8861* -13.2407*
##               (9.5294)  (9.5860)  (7.4090)  (7.4636) 
##                                                      
## Inv_2018       0.5134    0.4327    -0.1231   -0.1140 
##               (0.5958)  (0.5823)  (0.3866)  (0.3915) 
##                                                      
## FDI_2018      2.0064**   1.6214*   0.7532    0.6634  
##               (0.8901)  (0.9355)  (0.5929)  (0.6735) 
##                                                      
## log(Imp_2018) 0.1857*** 0.2121*** 0.2787*** 0.2745***
##               (0.0455)  (0.0481)  (0.0593)  (0.0592) 
##                                                      
## -----------------------------------------------------
## model            OLS       OLS       OLS       OLS   
## Observations     78        76        78        76    
## =====================================================
## Note:                     *p<0.1; **p<0.05; ***p<0.01

Final table of FE two-ways specifications

stargazer(fe_twoways_Theil,# Theil
          fe_twoways_Theil_robust, # Theil_robust
          fe_twoways_Margins, # Margins
          fe_twoways_Margins_robust,# Margins_robust
          
          se = list(se_fe_twoways_Theil, # Theil
                    se_fe_twoways_Theil_robust,# Theil_robust
                    se_fe_twoways_Margins, # Margins
                    se_fe_twoways_Margins_robust),# Margins_robust
          type = "text", omit = c('Constant'), keep.stat = c("n"),
          add.lines = list(c('model', 'FE', 'FE', 'FE', 'FE')),
          title = "Two-ways FE", digits = 4) #, font.size = "tiny"
## 
## Two-ways FE
## ==================================================
##                       Dependent variable:         
##              -------------------------------------
##                            log(GRP)               
##                (1)      (2)       (3)       (4)   
## --------------------------------------------------
## Div           0.0046   0.0054                     
##              (0.0098) (0.0126)                    
##                                                   
## EM                              0.0277    0.0274  
##                                (0.0304)  (0.0305) 
##                                                   
## IM                              0.0011    0.0011  
##                                (0.0009)  (0.0009) 
##                                                   
## Edu          -0.00004 -0.00003  -0.0001  -0.00004 
##              (0.0003) (0.0003) (0.0003)  (0.0003) 
##                                                   
## Pop          -0.6805  -0.9115   -0.5585   -0.7576 
##              (2.0616) (2.3521) (2.2658)  (2.5497) 
##                                                   
## Inv          -0.2519* -0.2511* -0.2471** -0.2474**
##              (0.1305) (0.1304) (0.1230)  (0.1240) 
##                                                   
## FDI          -0.1837  -0.1837  -0.1739*   -0.1736 
##              (0.1123) (0.1127) (0.1051)  (0.1056) 
##                                                   
## log(Imp)      0.0064   0.0062   0.0039    0.0038  
##              (0.0116) (0.0117) (0.0121)  (0.0120) 
##                                                   
## --------------------------------------------------
## model           FE       FE       FE        FE    
## Observations   390      380       390       380   
## ==================================================
## Note:                  *p<0.1; **p<0.05; ***p<0.01

Not robust results (betas of independent variables).