The following research is conducted by Maria R. Koldasheva and Nikolai A. Popov.

Libraries

library(dplyr) 
library(psych)
library(readxl)
library(writexl)
library(kableExtra)

# Econometrics
library(tidyverse)
library(plm) # panel models
library(sandwich) #covariance matrixes
library(lmtest) # tests
library(xtable)# latex tables
library(stargazer) # latex regression tables
library(ggpubr) # correlation test

Downloading the data

# Downloading
wide_data <- read_csv('C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data.csv',
                      show_col_types = FALSE)

long_data <- read_csv('C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data.csv',
                      show_col_types = FALSE)

map_data <- read_csv(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_map.csv',
  show_col_types = FALSE)

# Exporting them back to fix QGIS coding issues
write_xlsx(wide_data,
           'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_geo.xlsx')

# write_xlsx(long_data,
#            'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data_geo.xlsx')

# Observing the tables
wide_data
long_data

Descriptive statistics

# For all the variables
Desc_stat <- long_data %>% psych::describe()

# For all variables
Index_stat <- Desc_stat[c(3:11), c(2, 3:4, (8:9))]

# row.names(Index_stat) <- c('Theil index', 'Extensive Margin', 'Intensive Margin')
xtable(Index_stat, caption = 'Descriptive statistics', digits = 3, type = "latex")
# For dependent/independent variables
Index_stat_important <- Desc_stat[c(3:6), c(2, 3:4, (8:9))]

# row.names(Index_stat) <- c('Theil index', 'Extensive Margin', 'Intensive Margin')
xtable(Index_stat_important, caption = 'Descriptive statistics', digits = 3, type = "latex")

Correlation matrix

res1 <- cor(long_data[, c(3:(length(long_data)-6))])
res1
##             GRP        Div           EM          IM        Imp         Edu
## GRP  1.00000000  0.3146169 -0.038722996  0.64378658  0.3743528  0.02979357
## Div  0.31461686  1.0000000 -0.628686905  0.15837051  0.8189165  0.41657305
## EM  -0.03872300 -0.6286869  1.000000000 -0.05498696 -0.3377338 -0.30802795
## IM   0.64378658  0.1583705 -0.054986955  1.00000000  0.1138910 -0.12929391
## Imp  0.37435280  0.8189165 -0.337733790  0.11389101  1.0000000  0.39000887
## Edu  0.02979357  0.4165731 -0.308027954 -0.12929391  0.3900089  1.00000000
## Pop  0.14156322  0.2564669  0.009496447  0.06912775  0.2728343  0.05786492
## Inv  0.04349540 -0.2353995  0.321429672  0.05782095 -0.1519467 -0.14601628
## FDI  0.44006908  0.2610529 -0.093738498  0.28381835  0.3739655 -0.12666111
##             Pop         Inv         FDI
## GRP 0.141563225  0.04349540  0.44006908
## Div 0.256466894 -0.23539946  0.26105288
## EM  0.009496447  0.32142967 -0.09373850
## IM  0.069127748  0.05782095  0.28381835
## Imp 0.272834331 -0.15194671  0.37396553
## Edu 0.057864921 -0.14601628 -0.12666111
## Pop 1.000000000  0.15650166  0.15337992
## Inv 0.156501660  1.00000000  0.06486492
## FDI 0.153379922  0.06486492  1.00000000
library(ggcorrplot)

#visualize correlation matrix
ggcorrplot(cor(long_data[, c(3:(length(long_data)-6))]))

Create a dataset for 2018

2018 is taken as the last economically stable year available in our data.

get_year <- function(df, year){
  df %>%
    select(contains(c('region', year)))
}


Data_2018 <- get_year(wide_data, '2018')
Map_2018 <- get_year(map_data, '2018')

# Exporting this file
write_xlsx(Data_2018,
           'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Data_2018_geo.xlsx')

write_xlsx(Map_2018,
           'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Map_2018_geo.xlsx')

Specifications

For non-dynamic models we use the simplified specification without lags.

Cross-sectional specifications

# Theil index specification
Theil_2018 <- log(GRP_2018) ~ Div_2018 +  Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)

# WM and IM specification
Margins_2018 <- log(GRP_2018) ~ EM_2018 + IM_2018 + Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)

Panel specifications

# Theil index specification
Theil <- log(GRP) ~ Div + Edu + Pop + Inv + FDI + log(Imp)

# EM and IM specification
Margins <- log(GRP) ~ EM + IM + Edu + Pop + Inv + FDI + log(Imp)

Cross-sectional models

# Theil index specification
reg_Theil_2018 <- lm(Theil_2018, data = Data_2018)

vcov2_Theil_2018 <- vcovHC(reg_Theil_2018, type = "HC0")
se2imp_Theil_2018 <- sqrt(diag(vcov2_Theil_2018))

# WM and IM specification
reg_Margins_2018 <- lm(Margins_2018, data = Data_2018)

vcov2_Margins_2018 <- vcovHC(reg_Margins_2018, type = "HC0")
se2imp_Margins_2018 <- sqrt(diag(vcov2_Margins_2018))

# Table
stargazer(reg_Theil_2018, reg_Margins_2018, se = list(se2imp_Theil_2018, se2imp_Margins_2018), type = 'text', omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##               ----------------------------
##                      log(GRP_2018)        
##                    (1)            (2)     
## ------------------------------------------
## Div_2018          -0.015                  
##                  (0.026)                  
##                                           
## EM_2018                        0.219***   
##                                 (0.060)   
##                                           
## IM_2018                        0.018***   
##                                 (0.006)   
##                                           
## Edu_2018          0.0003        0.001*    
##                  (0.001)       (0.0004)   
##                                           
## Pop_2018          -6.854       -12.886*   
##                  (9.529)        (7.409)   
##                                           
## Inv_2018          0.513         -0.123    
##                  (0.596)        (0.387)   
##                                           
## FDI_2018         2.006**         0.753    
##                  (0.890)        (0.593)   
##                                           
## log(Imp_2018)    0.186***      0.279***   
##                  (0.046)        (0.059)   
##                                           
## ------------------------------------------
## Observations        78            78      
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01

Panel models

Theil specification

Pooled OLS Theil

ols_plm_Theil <- plm(Theil, 
               data = long_data, 
               index = c("region", "year"),
               model = "pooling")
cov_ols_plm_Theil <- vcovHC(ols_plm_Theil, type = "HC3")
se_ols_plm_Theil <- sqrt(diag(cov_ols_plm_Theil))
coeftest(ols_plm_Theil, df = Inf, vcov = cov_ols_plm_Theil)
## 
## z test of coefficients:
## 
##                Estimate  Std. Error z value Pr(>|z|)   
## (Intercept)  1.7007e+01  8.2036e+00  2.0731 0.038163 * 
## Div          8.5641e-03  1.9625e-02  0.4364 0.662557   
## Edu          5.9504e-05  3.5335e-04  0.1684 0.866271   
## Pop         -7.7624e+00  7.9256e+00 -0.9794 0.327377   
## Inv          5.1728e-01  3.7254e-01  1.3885 0.164982   
## FDI          1.5422e+00  6.6096e-01  2.3333 0.019630 * 
## log(Imp)     1.4015e-01  4.2796e-02  3.2747 0.001058 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fixed (individual) effects Theil

fe_ind_Theil <- plm(Theil, 
              data = long_data, 
              index = c("region", "year"),
              effect = "individual",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_ind_Theil <- vcovHC(fe_ind_Theil, type = "HC3")
se_fe_ind_Theil <- sqrt(diag(cov_fe_ind_Theil))
# coeftest(fe_ind_Theil, df = Inf, vcov = cov_fe_ind_Theil)

Random Effects Theil

re_Theil <- plm(Theil, 
              data = long_data, 
              index = c("region", "year"),
              model = "random")
cov_re_Theil <- vcovHC(re_Theil, type = "HC3")
se_re_Theil <- sqrt(diag(cov_re_Theil))
#coeftest(re_Theil, df = Inf, vcov = cov_re_Theil)

Panel regressions table, Theil

stargazer(ols_plm_Theil, re_Theil, fe_ind_Theil, se = list(se_ols_plm_Theil, se_re_Theil, se_fe_ind_Theil), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##              -----------------------------
##                        log(GRP)           
##                 (1)       (2)       (3)   
## ------------------------------------------
## Div            0.009   0.037***  0.027*** 
##               (0.020)   (0.012)   (0.010) 
##                                           
## Edu           0.0001   -0.001*** -0.002***
##              (0.0004)  (0.0002)  (0.0002) 
##                                           
## Pop           -7.762    -3.569    -3.665  
##               (7.926)   (2.593)   (2.856) 
##                                           
## Inv            0.517    -0.249    -0.227  
##               (0.373)   (0.169)   (0.161) 
##                                           
## FDI           1.542**   -0.202*  -0.231** 
##               (0.661)   (0.103)   (0.110) 
##                                           
## log(Imp)     0.140***    0.034     0.020  
##               (0.043)   (0.023)   (0.021) 
##                                           
## ------------------------------------------
## Observations    546       546       546   
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE

Tests for best panel model for the Theil specification

#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Theil, ols_plm_Theil)
## 
##  F test for individual effects
## 
## data:  Theil
## F = 206.21, df1 = 77, df2 = 462, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Theil, type = c('bp'))
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  Theil
## chisq = 1215.3, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
phtest(fe_ind_Theil, re_Theil)
## 
##  Hausman Test
## 
## data:  Theil
## chisq = 9.9076, df = 6, p-value = 0.1286
## alternative hypothesis: one model is inconsistent
#random effects are better than fixed effects

Still, by all economic means, RE cannot be applied to our model. There should be a problem in our data (possible with the indexes), which causes biases in the Hausmann test. Thus, we will use FE.

Best panel model for the Theil specification

fe_twoways_Theil <- plm(Theil, 
              data = long_data, 
              index = c("region", "year"),
              effect = "twoways",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_twoways_Theil <- vcovHC(fe_twoways_Theil, type = "HC3")
se_fe_twoways_Theil <- sqrt(diag(cov_fe_twoways_Theil))
# coeftest(fe_twoways_Theil, df = Inf, vcov = cov_fe_twoways_Theil)

Margins specification

Pooled OLS Margins

ols_plm_Margins <- plm(Margins, 
               data = long_data, 
               index = c("region", "year"),
               model = "pooling")
cov_ols_plm_Margins <- vcovHC(ols_plm_Margins, type = "HC3")
se_ols_plm_Margins <- sqrt(diag(cov_ols_plm_Margins))
coeftest(ols_plm_Margins, df = Inf, vcov = cov_ols_plm_Margins)
## 
## z test of coefficients:
## 
##                Estimate  Std. Error z value  Pr(>|z|)    
## (Intercept) 16.84836620  5.23392083  3.2191  0.001286 ** 
## EM           0.14718778  0.05211557  2.8243  0.004739 ** 
## IM           0.02117837  0.00343260  6.1698 6.838e-10 ***
## Edu          0.00076023  0.00026021  2.9215  0.003483 ** 
## Pop         -9.58929309  5.48289191 -1.7489  0.080300 .  
## Inv         -0.18333138  0.21765178 -0.8423  0.399612    
## FDI          0.92502198  0.45160245  2.0483  0.040530 *  
## log(Imp)     0.18633702  0.04406512  4.2287 2.351e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fixed (individual) effects Margins

fe_ind_Margins <- plm(Margins, 
              data = long_data, 
              index = c("region", "year"),
              effect = "individual",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_ind_Margins <- vcovHC(fe_ind_Margins, type = "HC3")
se_fe_ind_Margins <- sqrt(diag(cov_fe_ind_Margins))
# coeftest(fe_ind_Margins, df = Inf, vcov = cov_fe_ind_Margins)

Random Effects Margins

re_Margins <- plm(Margins, 
              data = long_data, 
              index = c("region", "year"),
              model = "random")
cov_re_Margins <- vcovHC(re_Margins, type = "HC3")
se_re_Margins <- sqrt(diag(cov_re_Margins))
#coeftest(re_Margins, df = Inf, vcov = cov_re_Margins)

Panel regressions table, Margins

stargazer(ols_plm_Margins,  re_Margins, fe_ind_Margins, se = list(se_ols_plm_Margins,se_re_Margins, se_fe_ind_Margins), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##              -----------------------------
##                        log(GRP)           
##                 (1)       (2)       (3)   
## ------------------------------------------
## EM           0.147***    0.020     0.036  
##               (0.052)   (0.024)   (0.026) 
##                                           
## IM           0.021***  0.004***   0.002** 
##               (0.003)   (0.001)   (0.001) 
##                                           
## Edu          0.001***  -0.001*** -0.002***
##              (0.0003)  (0.0003)  (0.0002) 
##                                           
## Pop           -9.589*  -5.453**   -4.886* 
##               (5.483)   (2.491)   (2.814) 
##                                           
## Inv           -0.183    -0.316*   -0.227  
##               (0.218)   (0.167)   (0.143) 
##                                           
## FDI           0.925**   -0.136*  -0.213** 
##               (0.452)   (0.082)   (0.101) 
##                                           
## log(Imp)     0.186***   0.065**    0.023  
##               (0.044)   (0.027)   (0.021) 
##                                           
## ------------------------------------------
## Observations    546       546       546   
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE

Tests for best panel model for the Margins specification

#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Margins, ols_plm_Margins)
## 
##  F test for individual effects
## 
## data:  Margins
## F = 101.45, df1 = 77, df2 = 461, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Margins, type = c('bp'))
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  Margins
## chisq = 744.36, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
phtest(fe_ind_Margins, re_Margins)
## 
##  Hausman Test
## 
## data:  Margins
## chisq = 0.75482, df = 7, p-value = 0.9979
## alternative hypothesis: one model is inconsistent
#random effects are better than fixed effects

Still, by all economic means, RE cannot be applied to our model. There should be a problem in our data (possible with the indexes), which causes biases in the Hausmann test. Thus, we will use FE.

Best panel model for the Margins specification

fe_twoways_Margins <- plm(Margins, 
              data = long_data, 
              index = c("region", "year"),
              effect = "twoways",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_twoways_Margins <- vcovHC(fe_twoways_Margins, type = "HC3")
se_fe_twoways_Margins <- sqrt(diag(cov_fe_twoways_Margins))
# coeftest(fe_twoways_Margins, df = Inf, vcov = cov_fe_twoways_Margins)

Panel models, united tables

Fixing Stargazer

# #echo = FALSE, warning = FALSE
# ## Quick fix for stargazer <= 5.2.3 is.na() issue with long model names in R >= 4.2
# # Unload stargazer if loaded
# detach("package:stargazer",unload=T)
# # Delete it
# remove.packages("stargazer")
# # Download the source
# download.file("https://cran.r-project.org/src/contrib/stargazer_5.2.3.tar.gz", destfile = "stargazer_5.2.3.tar.gz")
# # Unpack
# untar("stargazer_5.2.3.tar.gz")
# # Read the sourcefile with .inside.bracket fun
# stargazer_src <- readLines("stargazer/R/stargazer-internal.R")
# # Move the length check 5 lines up so it precedes is.na(.)
# stargazer_src[1990] <- stargazer_src[1995]
# stargazer_src[1995] <- ""
# # Save back
# writeLines(stargazer_src, con="stargazer/R/stargazer-internal.R")
# # Compile and install the patched package
# install.packages("stargazer", repos = NULL, type="source")

All models

stargazer(ols_plm_Theil, re_Theil, fe_ind_Theil, # Theil
          ols_plm_Margins, re_Margins, fe_ind_Margins, # Margins
          se = list(
            se_ols_plm_Theil, se_re_Theil, se_fe_ind_Theil, # Theil
            se_ols_plm_Margins, se_re_Margins, se_fe_ind_Margins), # Margins
          type = "text", omit = c('Constant'), keep.stat = c("n"), font.size = "tiny",
          add.lines = list(c('model', 'OLS', 'RE', 'FE', 'OLS', 'RE', 'FE')),
          title = "All panel models", digits = 4)
## 
## All panel models
## ============================================================================
##                                    Dependent variable:                      
##              ---------------------------------------------------------------
##                                         log(GRP)                            
##                 (1)       (2)        (3)        (4)       (5)        (6)    
## ----------------------------------------------------------------------------
## Div           0.0086   0.0375***  0.0265***                                 
##              (0.0196)   (0.0122)   (0.0101)                                 
##                                                                             
## EM                                           0.1472***   0.0201     0.0358  
##                                              (0.0521)   (0.0238)   (0.0265) 
##                                                                             
## IM                                           0.0212*** 0.0037***   0.0024** 
##                                              (0.0034)   (0.0012)   (0.0011) 
##                                                                             
## Edu           0.0001   -0.0014*** -0.0016*** 0.0008*** -0.0013*** -0.0018***
##              (0.0004)   (0.0002)   (0.0002)  (0.0003)   (0.0003)   (0.0002) 
##                                                                             
## Pop           -7.7624   -3.5686    -3.6647   -9.5893*  -5.4534**   -4.8862* 
##              (7.9256)   (2.5932)   (2.8556)  (5.4829)   (2.4914)   (2.8144) 
##                                                                             
## Inv           0.5173    -0.2493    -0.2266    -0.1833   -0.3161*   -0.2268  
##              (0.3725)   (0.1695)   (0.1607)  (0.2177)   (0.1674)   (0.1429) 
##                                                                             
## FDI          1.5422**   -0.2015*  -0.2315**  0.9250**   -0.1362*  -0.2131** 
##              (0.6610)   (0.1030)   (0.1100)  (0.4516)   (0.0822)   (0.1009) 
##                                                                             
## log(Imp)     0.1401***   0.0339     0.0201   0.1863***  0.0652**    0.0226  
##              (0.0428)   (0.0229)   (0.0211)  (0.0441)   (0.0274)   (0.0212) 
##                                                                             
## ----------------------------------------------------------------------------
## model           OLS        RE         FE        OLS        RE         FE    
## Observations    546       546        546        546       546        546    
## ============================================================================
## Note:                                            *p<0.1; **p<0.05; ***p<0.01
# Order: Pooled OLS, RE, FE

The best one, two-ways modifications

stargazer(fe_twoways_Theil, # Theil
          fe_twoways_Margins,# Margins
          se = list(
            se_fe_twoways_Theil,# Theil
            se_fe_twoways_Margins),# Margins
          type = "text", omit = c('Constant'), keep.stat = c("n")) #, font.size = "tiny"
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                        log(GRP)          
##                   (1)            (2)     
## -----------------------------------------
## Div              0.008                   
##                 (0.008)                  
##                                          
## EM                            0.052***   
##                                (0.018)   
##                                          
## IM                             0.002**   
##                                (0.001)   
##                                          
## Edu             -0.0004        -0.0003   
##                 (0.0005)      (0.0004)   
##                                          
## Pop              3.173          2.466    
##                 (3.019)        (2.975)   
##                                          
## Inv              -0.211        -0.187    
##                 (0.148)        (0.135)   
##                                          
## FDI              -0.148        -0.136    
##                 (0.092)        (0.084)   
##                                          
## log(Imp)         0.010          0.009    
##                 (0.017)        (0.017)   
##                                          
## -----------------------------------------
## Observations      546            546     
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01

SAME CODE, BUT FOR ROBUST DATASET (NO MSK, SPB)

Downloading the data, robust

# Downloading
wide_data_robust <- read_csv(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_robust.csv',
                      show_col_types = FALSE)

long_data_robust  <- read_csv(
  'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data_robust.csv',
                      show_col_types = FALSE)


# Exporting them back to fix QGIS coding issues
write_xlsx(wide_data_robust,
           'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_robust_geo.xlsx')
# write_xlsx(long_data_robust,
#            'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data_robust_geo.xlsx')

# Observing the tables
wide_data_robust
long_data_robust

Descriptive statistics, robust

# For all the variables
Desc_stat_robust <- long_data_robust %>% psych::describe()

# For diversification indexes
Index_stat_robust <- Desc_stat_robust[c(4:6), c(2, 3:4, (8:9))]


# row.names(Index_stat) <- c('Theil_robust index', 'Extensive Margin', 'Intensive Margin')
xtable(Index_stat_robust, caption = 'Descriptive statistics', digits = 3, type = "latex")

No extreme values of IM now.

Correlation matrix, robust

res1_robust <- cor(long_data_robust[, c(3:(length(long_data_robust)-6))])
res1_robust
##             GRP         Div          EM          IM         Imp         Edu
## GRP  1.00000000  0.12046454  0.03859512  0.65649208  0.25091214 -0.14958925
## Div  0.12046454  1.00000000 -0.70020999  0.15537158  0.63382255  0.09129487
## EM   0.03859512 -0.70020999  1.00000000 -0.04011758 -0.43768312 -0.21937034
## IM   0.65649208  0.15537158 -0.04011758  1.00000000  0.14558743 -0.19019470
## Imp  0.25091214  0.63382255 -0.43768312  0.14558743  1.00000000 -0.13265746
## Edu -0.14958925  0.09129487 -0.21937034 -0.19019470 -0.13265746  1.00000000
## Pop  0.08861862  0.16799415  0.06597524  0.05827624  0.36067681 -0.06556583
## Inv  0.09861951 -0.18139214  0.29565431  0.07015477 -0.05516198 -0.07647036
## FDI  0.38155793  0.05952865 -0.02877383  0.27741739  0.24955672 -0.32491996
##             Pop         Inv         FDI
## GRP  0.08861862  0.09861951  0.38155793
## Div  0.16799415 -0.18139214  0.05952865
## EM   0.06597524  0.29565431 -0.02877383
## IM   0.05827624  0.07015477  0.27741739
## Imp  0.36067681 -0.05516198  0.24955672
## Edu -0.06556583 -0.07647036 -0.32491996
## Pop  1.00000000  0.20059835  0.10996522
## Inv  0.20059835  1.00000000  0.11527150
## FDI  0.10996522  0.11527150  1.00000000
library(ggcorrplot)

#visualize correlation matrix
ggcorrplot(cor(long_data_robust[, c(3:(length(long_data_robust)-6))]))

Create a dataset for 2018, robust

2018 is taken as the last economically stable year available in our data.

Data_2018_robust <- get_year(wide_data_robust, '2018')

# Exporting this file
write_xlsx(Data_2018_robust,
           'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Data_2018_robust_geo.xlsx')

Cross-sectional models, robust

# Theil_robust index specification
reg_Theil_robust_2018 <- lm(Theil_2018, data = Data_2018_robust)

vcov2_Theil_robust_2018 <- vcovHC(reg_Theil_robust_2018, type = "HC0")
se2imp_Theil_robust_2018 <- sqrt(diag(vcov2_Theil_robust_2018))

# WM and IM specification
reg_Margins_robust_2018 <- lm(Margins_2018, data = Data_2018_robust)

vcov2_Margins_robust_2018 <- vcovHC(reg_Margins_robust_2018, type = "HC0")
se2imp_Margins_robust_2018 <- sqrt(diag(vcov2_Margins_robust_2018))

# Table
stargazer(reg_Theil_robust_2018, reg_Margins_robust_2018, se = list(se2imp_Theil_robust_2018, se2imp_Margins_robust_2018), type = 'text', omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##               ----------------------------
##                      log(GRP_2018)        
##                    (1)            (2)     
## ------------------------------------------
## Div_2018          -0.045                  
##                  (0.028)                  
##                                           
## EM_2018                        0.215***   
##                                 (0.059)   
##                                           
## IM_2018                        0.018***   
##                                 (0.006)   
##                                           
## Edu_2018         -0.0001         0.001    
##                  (0.001)        (0.001)   
##                                           
## Pop_2018          -6.586       -13.241*   
##                  (9.586)        (7.464)   
##                                           
## Inv_2018          0.433         -0.114    
##                  (0.582)        (0.392)   
##                                           
## FDI_2018          1.621*         0.663    
##                  (0.936)        (0.673)   
##                                           
## log(Imp_2018)    0.212***      0.274***   
##                  (0.048)        (0.059)   
##                                           
## ------------------------------------------
## Observations        76            76      
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01

Panel models, robust

Theil_robust specification

Pooled OLS Theil_robust

ols_plm_Theil_robust <- plm(Theil, 
               data = long_data_robust, 
               index = c("region", "year"),
               model = "pooling")
cov_ols_plm_Theil_robust <- vcovHC(ols_plm_Theil_robust, type = "HC3")
se_ols_plm_Theil_robust <- sqrt(diag(cov_ols_plm_Theil_robust))
coeftest(ols_plm_Theil_robust, df = Inf, vcov = cov_ols_plm_Theil_robust)
## 
## z test of coefficients:
## 
##                Estimate  Std. Error z value Pr(>|z|)   
## (Intercept) 17.06183552  8.53429728  1.9992 0.045586 * 
## Div         -0.01247898  0.02646898 -0.4715 0.637315   
## Edu         -0.00014571  0.00044147 -0.3300 0.741363   
## Pop         -7.99324498  8.21824558 -0.9726 0.330741   
## Inv          0.52125953  0.37829033  1.3779 0.168223   
## FDI          1.39439102  0.73046089  1.9089 0.056272 . 
## log(Imp)     0.15175351  0.04845296  3.1320 0.001736 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fixed (individual) effects Theil_robust

fe_ind_Theil_robust <- plm(Theil, 
              data = long_data_robust, 
              index = c("region", "year"),
              effect = "individual",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_ind_Theil_robust <- vcovHC(fe_ind_Theil_robust, type = "HC3")
se_fe_ind_Theil_robust <- sqrt(diag(cov_fe_ind_Theil_robust))
# coeftest(fe_ind_Theil_robust, df = Inf, vcov = cov_fe_ind_Theil_robust)

Random Effects Theil_robust

re_Theil_robust <- plm(Theil, 
              data = long_data_robust, 
              index = c("region", "year"),
              model = "random")
cov_re_Theil_robust <- vcovHC(re_Theil_robust, type = "HC3")
se_re_Theil_robust <- sqrt(diag(cov_re_Theil_robust))
#coeftest(re_Theil_robust, df = Inf, vcov = cov_re_Theil_robust)

Panel regressions table, Theil_robust

stargazer(ols_plm_Theil_robust, re_Theil_robust, fe_ind_Theil_robust, se = list(se_ols_plm_Theil_robust, se_re_Theil_robust, se_fe_ind_Theil_robust), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##              -----------------------------
##                        log(GRP)           
##                 (1)       (2)       (3)   
## ------------------------------------------
## Div           -0.012    0.026**   0.023** 
##               (0.026)   (0.011)   (0.011) 
##                                           
## Edu           -0.0001  -0.002*** -0.002***
##              (0.0004)  (0.0002)  (0.0002) 
##                                           
## Pop           -7.993    -3.543    -3.314  
##               (8.218)   (2.856)   (3.145) 
##                                           
## Inv            0.521    -0.238    -0.228  
##               (0.378)   (0.166)   (0.162) 
##                                           
## FDI           1.394*   -0.212**  -0.234** 
##               (0.730)   (0.107)   (0.114) 
##                                           
## log(Imp)     0.152***    0.032     0.021  
##               (0.048)   (0.023)   (0.021) 
##                                           
## ------------------------------------------
## Observations    532       532       532   
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE

Tests for best panel model for the Theil_robust specification

#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Theil_robust, ols_plm_Theil_robust)
## 
##  F test for individual effects
## 
## data:  Theil
## F = 205.49, df1 = 75, df2 = 450, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Theil_robust, type = c('bp'))
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  Theil
## chisq = 1208.8, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
phtest(fe_ind_Theil_robust, re_Theil_robust)
## 
##  Hausman Test
## 
## data:  Theil
## chisq = 1.8048, df = 6, p-value = 0.9367
## alternative hypothesis: one model is inconsistent
#random effects are better than fixed effects

Still, by all economic means, RE cannot be applied to our model. There should be a problem in our data (possible with the indexes), which causes biases in the Hausmann test. Thus, we will use FE.

Best panel model for the Theil_robust specification

fe_twoways_Theil_robust <- plm(Theil, 
              data = long_data_robust, 
              index = c("region", "year"),
              effect = "twoways",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_twoways_Theil_robust <- vcovHC(fe_twoways_Theil_robust, type = "HC3")
se_fe_twoways_Theil_robust <- sqrt(diag(cov_fe_twoways_Theil_robust))
# coeftest(fe_twoways_Theil_robust, df = Inf, vcov = cov_fe_twoways_Theil_robust)

Margins_robust specification

Pooled OLS Margins_robust

ols_plm_Margins_robust <- plm(Margins, 
               data = long_data_robust, 
               index = c("region", "year"),
               model = "pooling")
cov_ols_plm_Margins_robust <- vcovHC(ols_plm_Margins_robust, type = "HC3")
se_ols_plm_Margins_robust <- sqrt(diag(cov_ols_plm_Margins_robust))
coeftest(ols_plm_Margins_robust, df = Inf, vcov = cov_ols_plm_Margins_robust)
## 
## z test of coefficients:
## 
##                Estimate  Std. Error z value  Pr(>|z|)    
## (Intercept)  1.7743e+01  5.4428e+00  3.2600  0.001114 ** 
## EM           1.4120e-01  5.2062e-02  2.7122  0.006683 ** 
## IM           2.1300e-02  3.4803e-03  6.1202 9.348e-10 ***
## Edu          6.0001e-04  3.0958e-04  1.9382  0.052603 .  
## Pop         -1.0275e+01  5.6518e+00 -1.8180  0.069065 .  
## Inv         -1.2200e-01  2.1384e-01 -0.5705  0.568329    
## FDI          8.5039e-01  4.8774e-01  1.7435  0.081243 .  
## log(Imp)     1.7882e-01  4.4108e-02  4.0541 5.034e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fixed (individual) effects Margins_robust

fe_ind_Margins_robust <- plm(Margins, 
              data = long_data_robust, 
              index = c("region", "year"),
              effect = "individual",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_ind_Margins_robust <- vcovHC(fe_ind_Margins_robust, type = "HC3")
se_fe_ind_Margins_robust <- sqrt(diag(cov_fe_ind_Margins_robust))
# coeftest(fe_ind_Margins_robust, df = Inf, vcov = cov_fe_ind_Margins_robust)

Random Effects Margins_robust

re_Margins_robust <- plm(Margins, 
              data = long_data_robust, 
              index = c("region", "year"),
              model = "random")
cov_re_Margins_robust <- vcovHC(re_Margins_robust, type = "HC3")
se_re_Margins_robust <- sqrt(diag(cov_re_Margins_robust))
#coeftest(re_Margins_robust, df = Inf, vcov = cov_re_Margins_robust)

Panel regressions table, Margins_robust

stargazer(ols_plm_Margins_robust,  re_Margins_robust, fe_ind_Margins_robust, se = list(se_ols_plm_Margins_robust,se_re_Margins_robust, se_fe_ind_Margins_robust), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
## 
## ==========================================
##                   Dependent variable:     
##              -----------------------------
##                        log(GRP)           
##                 (1)       (2)       (3)   
## ------------------------------------------
## EM           0.141***    0.031     0.035  
##               (0.052)   (0.022)   (0.026) 
##                                           
## IM           0.021***  0.004***   0.002** 
##               (0.003)   (0.001)   (0.001) 
##                                           
## Edu           0.001*   -0.002*** -0.002***
##              (0.0003)  (0.0002)  (0.0002) 
##                                           
## Pop          -10.275*   -4.992*   -4.286  
##               (5.652)   (2.697)   (3.077) 
##                                           
## Inv           -0.122    -0.267*   -0.226  
##               (0.214)   (0.152)   (0.145) 
##                                           
## FDI           0.850*    -0.152*  -0.210** 
##               (0.488)   (0.083)   (0.104) 
##                                           
## log(Imp)     0.179***   0.053**    0.022  
##               (0.044)   (0.024)   (0.021) 
##                                           
## ------------------------------------------
## Observations    532       532       532   
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE

Tests for best panel model for the Margins_robust specification

#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Margins_robust, ols_plm_Margins_robust)
## 
##  F test for individual effects
## 
## data:  Margins
## F = 102.4, df1 = 75, df2 = 449, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Margins_robust, type = c('bp'))
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  Margins
## chisq = 757.14, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
hausman_test <- phtest(fe_ind_Margins_robust, re_Margins_robust)
hausman_test
## 
##  Hausman Test
## 
## data:  Margins
## chisq = 50.834, df = 7, p-value = 9.903e-09
## alternative hypothesis: one model is inconsistent
#fixed effects are better then random ones

Let’s build two-ways variant of FE.

Best panel model for the Margins_robust specification

fe_twoways_Margins_robust <- plm(Margins, 
              data = long_data_robust, 
              index = c("region", "year"),
              effect = "twoways",
              model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_twoways_Margins_robust <- vcovHC(fe_twoways_Margins_robust, type = "HC3")
se_fe_twoways_Margins_robust <- sqrt(diag(cov_fe_twoways_Margins_robust))
# coeftest(fe_twoways_Margins_robust, df = Inf, vcov = cov_fe_twoways_Margins_robust)

Panel models, united tables, robust

All models, robust

stargazer(ols_plm_Theil_robust, re_Theil_robust, fe_ind_Theil_robust, # Theil_robust
          ols_plm_Margins_robust, re_Margins_robust, fe_ind_Margins_robust, # Margins_robust
          se = list(
            se_ols_plm_Theil_robust, se_re_Theil_robust, se_fe_ind_Theil_robust, # Theil_robust
            se_ols_plm_Margins_robust, se_re_Margins_robust, se_fe_ind_Margins_robust), # Margins_robust
          type = "text", omit = c('Constant'), keep.stat = c("n"), font.size = "tiny",
          add.lines = list(c('model', 'OLS', 'RE', 'FE', 'OLS', 'RE', 'FE')),
          title = "All panel models (robust)", digits = 4)
## 
## All panel models (robust)
## ============================================================================
##                                    Dependent variable:                      
##              ---------------------------------------------------------------
##                                         log(GRP)                            
##                 (1)       (2)        (3)        (4)       (5)        (6)    
## ----------------------------------------------------------------------------
## Div           -0.0125   0.0255**   0.0226**                                 
##              (0.0265)   (0.0107)   (0.0105)                                 
##                                                                             
## EM                                           0.1412***   0.0307     0.0353  
##                                              (0.0521)   (0.0218)   (0.0265) 
##                                                                             
## IM                                           0.0213*** 0.0037***   0.0023** 
##                                              (0.0035)   (0.0011)   (0.0011) 
##                                                                             
## Edu           -0.0001  -0.0015*** -0.0016***  0.0006*  -0.0015*** -0.0018***
##              (0.0004)   (0.0002)   (0.0002)  (0.0003)   (0.0002)   (0.0002) 
##                                                                             
## Pop           -7.9932   -3.5425    -3.3143   -10.2750*  -4.9925*   -4.2857  
##              (8.2182)   (2.8558)   (3.1446)  (5.6518)   (2.6971)   (3.0768) 
##                                                                             
## Inv           0.5213    -0.2379    -0.2278    -0.1220   -0.2674*   -0.2263  
##              (0.3783)   (0.1658)   (0.1620)  (0.2138)   (0.1522)   (0.1450) 
##                                                                             
## FDI           1.3944*  -0.2123**  -0.2340**   0.8504*   -0.1524*  -0.2097** 
##              (0.7305)   (0.1067)   (0.1142)  (0.4877)   (0.0829)   (0.1038) 
##                                                                             
## log(Imp)     0.1518***   0.0317     0.0205   0.1788***  0.0526**    0.0223  
##              (0.0485)   (0.0226)   (0.0214)  (0.0441)   (0.0241)   (0.0213) 
##                                                                             
## ----------------------------------------------------------------------------
## model           OLS        RE         FE        OLS        RE         FE    
## Observations    532       532        532        532       532        532    
## ============================================================================
## Note:                                            *p<0.1; **p<0.05; ***p<0.01
# Order: Pooled OLS, RE, FE

The best one, two-ways modifications, robust

stargazer(fe_twoways_Theil_robust, # Theil_robust
          fe_twoways_Margins_robust,# Margins_robust
          se = list(
            se_fe_twoways_Theil_robust,# Theil_robust
            se_fe_twoways_Margins_robust),# Margins_robust
          type = "text", omit = c('Constant'), keep.stat = c("n")) #, font.size = "tiny"
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                        log(GRP)          
##                   (1)            (2)     
## -----------------------------------------
## Div              0.005                   
##                 (0.008)                  
##                                          
## EM                            0.051***   
##                                (0.018)   
##                                          
## IM                             0.002**   
##                                (0.001)   
##                                          
## Edu             -0.0004        -0.0004   
##                 (0.0005)      (0.0004)   
##                                          
## Pop              3.476          2.813    
##                 (3.310)        (3.250)   
##                                          
## Inv              -0.212        -0.187    
##                 (0.149)        (0.137)   
##                                          
## FDI              -0.148        -0.135    
##                 (0.095)        (0.087)   
##                                          
## log(Imp)         0.010          0.008    
##                 (0.017)        (0.017)   
##                                          
## -----------------------------------------
## Observations      532            532     
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01

Final table of OLS

stargazer(reg_Theil_2018,# Theil
          reg_Theil_robust_2018, # Theil_robust
          reg_Margins_2018, # Margins
          reg_Margins_robust_2018,# Margins_robust
          
          se = list(se2imp_Theil_2018, # Theil
                    se2imp_Theil_robust_2018,# Theil_robust
                    se2imp_Margins_2018, # Margins
                    se2imp_Margins_robust_2018),# Margins_robust
          type = "text", omit = c('Constant'), keep.stat = c("n"),
          add.lines = list(c('model', 'OLS', 'OLS', 'OLS', 'OLS')),
          title = "OLS", digits = 4) #, font.size = "tiny"
## 
## OLS
## =====================================================
##                         Dependent variable:          
##               ---------------------------------------
##                            log(GRP_2018)             
##                  (1)       (2)       (3)       (4)   
## -----------------------------------------------------
## Div_2018       -0.0146   -0.0450                     
##               (0.0256)  (0.0283)                     
##                                                      
## EM_2018                           0.2188*** 0.2155***
##                                   (0.0598)  (0.0593) 
##                                                      
## IM_2018                           0.0180*** 0.0182***
##                                   (0.0062)  (0.0062) 
##                                                      
## Edu_2018       0.0003    -0.0001   0.0007*   0.0005  
##               (0.0005)  (0.0006)  (0.0004)  (0.0005) 
##                                                      
## Pop_2018       -6.8542   -6.5859  -12.8861* -13.2407*
##               (9.5294)  (9.5860)  (7.4090)  (7.4636) 
##                                                      
## Inv_2018       0.5134    0.4327    -0.1231   -0.1140 
##               (0.5958)  (0.5823)  (0.3866)  (0.3915) 
##                                                      
## FDI_2018      2.0064**   1.6214*   0.7532    0.6634  
##               (0.8901)  (0.9355)  (0.5929)  (0.6735) 
##                                                      
## log(Imp_2018) 0.1857*** 0.2121*** 0.2787*** 0.2745***
##               (0.0455)  (0.0481)  (0.0593)  (0.0592) 
##                                                      
## -----------------------------------------------------
## model            OLS       OLS       OLS       OLS   
## Observations     78        76        78        76    
## =====================================================
## Note:                     *p<0.1; **p<0.05; ***p<0.01

Final table of FE two-ways specifications

stargazer(fe_twoways_Theil,# Theil
          fe_twoways_Theil_robust, # Theil_robust
          fe_twoways_Margins, # Margins
          fe_twoways_Margins_robust,# Margins_robust
          
          se = list(se_fe_twoways_Theil, # Theil
                    se_fe_twoways_Theil_robust,# Theil_robust
                    se_fe_twoways_Margins, # Margins
                    se_fe_twoways_Margins_robust),# Margins_robust
          type = "text", omit = c('Constant'), keep.stat = c("n"),
          add.lines = list(c('model', 'FE', 'FE', 'FE', 'FE')),
          title = "Two-ways FE", digits = 4) #, font.size = "tiny"
## 
## Two-ways FE
## ==================================================
##                       Dependent variable:         
##              -------------------------------------
##                            log(GRP)               
##                (1)      (2)       (3)       (4)   
## --------------------------------------------------
## Div           0.0083   0.0049                     
##              (0.0075) (0.0082)                    
##                                                   
## EM                             0.0518*** 0.0509***
##                                (0.0180)  (0.0180) 
##                                                   
## IM                             0.0020**  0.0020** 
##                                (0.0009)  (0.0009) 
##                                                   
## Edu          -0.0004  -0.0004   -0.0003   -0.0004 
##              (0.0005) (0.0005) (0.0004)  (0.0004) 
##                                                   
## Pop           3.1728   3.4758   2.4659    2.8125  
##              (3.0186) (3.3102) (2.9752)  (3.2495) 
##                                                   
## Inv          -0.2106  -0.2118   -0.1867   -0.1871 
##              (0.1479) (0.1493) (0.1352)  (0.1374) 
##                                                   
## FDI          -0.1479  -0.1485   -0.1358   -0.1346 
##              (0.0925) (0.0954) (0.0843)  (0.0869) 
##                                                   
## log(Imp)      0.0095   0.0097   0.0087    0.0085  
##              (0.0167) (0.0169) (0.0167)  (0.0169) 
##                                                   
## --------------------------------------------------
## model           FE       FE       FE        FE    
## Observations   546      532       546       532   
## ==================================================
## Note:                  *p<0.1; **p<0.05; ***p<0.01

Pretty robust results (betas of independent variables).