The following research is conducted by Maria R. Koldasheva and Nikolai A. Popov.
library(dplyr)
library(psych)
library(readxl)
library(writexl)
library(kableExtra)
# Econometrics
library(tidyverse)
library(plm) # panel models
library(sandwich) #covariance matrixes
library(lmtest) # tests
library(xtable)# latex tables
library(stargazer) # latex regression tables
library(ggpubr) # correlation test
# Downloading
wide_data <- read_csv('C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data.csv',
show_col_types = FALSE)
long_data <- read_csv('C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data.csv',
show_col_types = FALSE)
map_data <- read_csv(
'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_map.csv',
show_col_types = FALSE)
# Exporting them back to fix QGIS coding issues
write_xlsx(wide_data,
'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_geo.xlsx')
# write_xlsx(long_data,
# 'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data_geo.xlsx')
# Observing the tables
wide_data
long_data
# For all the variables
Desc_stat <- long_data %>% psych::describe()
# For all variables
Index_stat <- Desc_stat[c(3:11), c(2, 3:4, (8:9))]
# row.names(Index_stat) <- c('Theil index', 'Extensive Margin', 'Intensive Margin')
xtable(Index_stat, caption = 'Descriptive statistics', digits = 3, type = "latex")
# For dependent/independent variables
Index_stat_important <- Desc_stat[c(3:6), c(2, 3:4, (8:9))]
# row.names(Index_stat) <- c('Theil index', 'Extensive Margin', 'Intensive Margin')
xtable(Index_stat_important, caption = 'Descriptive statistics', digits = 3, type = "latex")
res1 <- cor(long_data[, c(3:(length(long_data)-6))])
res1
## GRP Div EM IM Imp Edu
## GRP 1.00000000 0.3146169 -0.038722996 0.64378658 0.3743528 0.02979357
## Div 0.31461686 1.0000000 -0.628686905 0.15837051 0.8189165 0.41657305
## EM -0.03872300 -0.6286869 1.000000000 -0.05498696 -0.3377338 -0.30802795
## IM 0.64378658 0.1583705 -0.054986955 1.00000000 0.1138910 -0.12929391
## Imp 0.37435280 0.8189165 -0.337733790 0.11389101 1.0000000 0.39000887
## Edu 0.02979357 0.4165731 -0.308027954 -0.12929391 0.3900089 1.00000000
## Pop 0.14156322 0.2564669 0.009496447 0.06912775 0.2728343 0.05786492
## Inv 0.04349540 -0.2353995 0.321429672 0.05782095 -0.1519467 -0.14601628
## FDI 0.44006908 0.2610529 -0.093738498 0.28381835 0.3739655 -0.12666111
## Pop Inv FDI
## GRP 0.141563225 0.04349540 0.44006908
## Div 0.256466894 -0.23539946 0.26105288
## EM 0.009496447 0.32142967 -0.09373850
## IM 0.069127748 0.05782095 0.28381835
## Imp 0.272834331 -0.15194671 0.37396553
## Edu 0.057864921 -0.14601628 -0.12666111
## Pop 1.000000000 0.15650166 0.15337992
## Inv 0.156501660 1.00000000 0.06486492
## FDI 0.153379922 0.06486492 1.00000000
library(ggcorrplot)
#visualize correlation matrix
ggcorrplot(cor(long_data[, c(3:(length(long_data)-6))]))
2018 is taken as the last economically stable year available in our data.
get_year <- function(df, year){
df %>%
select(contains(c('region', year)))
}
Data_2018 <- get_year(wide_data, '2018')
Map_2018 <- get_year(map_data, '2018')
# Exporting this file
write_xlsx(Data_2018,
'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Data_2018_geo.xlsx')
write_xlsx(Map_2018,
'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Map_2018_geo.xlsx')
For non-dynamic models we use the simplified specification without lags.
# Theil index specification
Theil_2018 <- log(GRP_2018) ~ Div_2018 + Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)
# WM and IM specification
Margins_2018 <- log(GRP_2018) ~ EM_2018 + IM_2018 + Edu_2018 + Pop_2018 + Inv_2018 + FDI_2018 + log(Imp_2018)
# Theil index specification
Theil <- log(GRP) ~ Div + Edu + Pop + Inv + FDI + log(Imp)
# EM and IM specification
Margins <- log(GRP) ~ EM + IM + Edu + Pop + Inv + FDI + log(Imp)
# Theil index specification
reg_Theil_2018 <- lm(Theil_2018, data = Data_2018)
vcov2_Theil_2018 <- vcovHC(reg_Theil_2018, type = "HC0")
se2imp_Theil_2018 <- sqrt(diag(vcov2_Theil_2018))
# WM and IM specification
reg_Margins_2018 <- lm(Margins_2018, data = Data_2018)
vcov2_Margins_2018 <- vcovHC(reg_Margins_2018, type = "HC0")
se2imp_Margins_2018 <- sqrt(diag(vcov2_Margins_2018))
# Table
stargazer(reg_Theil_2018, reg_Margins_2018, se = list(se2imp_Theil_2018, se2imp_Margins_2018), type = 'text', omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
##
## ==========================================
## Dependent variable:
## ----------------------------
## log(GRP_2018)
## (1) (2)
## ------------------------------------------
## Div_2018 -0.015
## (0.026)
##
## EM_2018 0.219***
## (0.060)
##
## IM_2018 0.018***
## (0.006)
##
## Edu_2018 0.0003 0.001*
## (0.001) (0.0004)
##
## Pop_2018 -6.854 -12.886*
## (9.529) (7.409)
##
## Inv_2018 0.513 -0.123
## (0.596) (0.387)
##
## FDI_2018 2.006** 0.753
## (0.890) (0.593)
##
## log(Imp_2018) 0.186*** 0.279***
## (0.046) (0.059)
##
## ------------------------------------------
## Observations 78 78
## ==========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Pooled OLS Theil
ols_plm_Theil <- plm(Theil,
data = long_data,
index = c("region", "year"),
model = "pooling")
cov_ols_plm_Theil <- vcovHC(ols_plm_Theil, type = "HC3")
se_ols_plm_Theil <- sqrt(diag(cov_ols_plm_Theil))
coeftest(ols_plm_Theil, df = Inf, vcov = cov_ols_plm_Theil)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.7007e+01 8.2036e+00 2.0731 0.038163 *
## Div 8.5641e-03 1.9625e-02 0.4364 0.662557
## Edu 5.9504e-05 3.5335e-04 0.1684 0.866271
## Pop -7.7624e+00 7.9256e+00 -0.9794 0.327377
## Inv 5.1728e-01 3.7254e-01 1.3885 0.164982
## FDI 1.5422e+00 6.6096e-01 2.3333 0.019630 *
## log(Imp) 1.4015e-01 4.2796e-02 3.2747 0.001058 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Fixed (individual) effects Theil
fe_ind_Theil <- plm(Theil,
data = long_data,
index = c("region", "year"),
effect = "individual",
model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_ind_Theil <- vcovHC(fe_ind_Theil, type = "HC3")
se_fe_ind_Theil <- sqrt(diag(cov_fe_ind_Theil))
# coeftest(fe_ind_Theil, df = Inf, vcov = cov_fe_ind_Theil)
Random Effects Theil
re_Theil <- plm(Theil,
data = long_data,
index = c("region", "year"),
model = "random")
cov_re_Theil <- vcovHC(re_Theil, type = "HC3")
se_re_Theil <- sqrt(diag(cov_re_Theil))
#coeftest(re_Theil, df = Inf, vcov = cov_re_Theil)
Panel regressions table, Theil
stargazer(ols_plm_Theil, re_Theil, fe_ind_Theil, se = list(se_ols_plm_Theil, se_re_Theil, se_fe_ind_Theil), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
##
## ==========================================
## Dependent variable:
## -----------------------------
## log(GRP)
## (1) (2) (3)
## ------------------------------------------
## Div 0.009 0.037*** 0.027***
## (0.020) (0.012) (0.010)
##
## Edu 0.0001 -0.001*** -0.002***
## (0.0004) (0.0002) (0.0002)
##
## Pop -7.762 -3.569 -3.665
## (7.926) (2.593) (2.856)
##
## Inv 0.517 -0.249 -0.227
## (0.373) (0.169) (0.161)
##
## FDI 1.542** -0.202* -0.231**
## (0.661) (0.103) (0.110)
##
## log(Imp) 0.140*** 0.034 0.020
## (0.043) (0.023) (0.021)
##
## ------------------------------------------
## Observations 546 546 546
## ==========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE
#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Theil, ols_plm_Theil)
##
## F test for individual effects
##
## data: Theil
## F = 206.21, df1 = 77, df2 = 462, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Theil, type = c('bp'))
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: Theil
## chisq = 1215.3, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
phtest(fe_ind_Theil, re_Theil)
##
## Hausman Test
##
## data: Theil
## chisq = 9.9076, df = 6, p-value = 0.1286
## alternative hypothesis: one model is inconsistent
#random effects are better than fixed effects
Still, by all economic means, RE cannot be applied to our model. There should be a problem in our data (possible with the indexes), which causes biases in the Hausmann test. Thus, we will use FE.
fe_twoways_Theil <- plm(Theil,
data = long_data,
index = c("region", "year"),
effect = "twoways",
model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_twoways_Theil <- vcovHC(fe_twoways_Theil, type = "HC3")
se_fe_twoways_Theil <- sqrt(diag(cov_fe_twoways_Theil))
# coeftest(fe_twoways_Theil, df = Inf, vcov = cov_fe_twoways_Theil)
Pooled OLS Margins
ols_plm_Margins <- plm(Margins,
data = long_data,
index = c("region", "year"),
model = "pooling")
cov_ols_plm_Margins <- vcovHC(ols_plm_Margins, type = "HC3")
se_ols_plm_Margins <- sqrt(diag(cov_ols_plm_Margins))
coeftest(ols_plm_Margins, df = Inf, vcov = cov_ols_plm_Margins)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 16.84836620 5.23392083 3.2191 0.001286 **
## EM 0.14718778 0.05211557 2.8243 0.004739 **
## IM 0.02117837 0.00343260 6.1698 6.838e-10 ***
## Edu 0.00076023 0.00026021 2.9215 0.003483 **
## Pop -9.58929309 5.48289191 -1.7489 0.080300 .
## Inv -0.18333138 0.21765178 -0.8423 0.399612
## FDI 0.92502198 0.45160245 2.0483 0.040530 *
## log(Imp) 0.18633702 0.04406512 4.2287 2.351e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Fixed (individual) effects Margins
fe_ind_Margins <- plm(Margins,
data = long_data,
index = c("region", "year"),
effect = "individual",
model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_ind_Margins <- vcovHC(fe_ind_Margins, type = "HC3")
se_fe_ind_Margins <- sqrt(diag(cov_fe_ind_Margins))
# coeftest(fe_ind_Margins, df = Inf, vcov = cov_fe_ind_Margins)
Random Effects Margins
re_Margins <- plm(Margins,
data = long_data,
index = c("region", "year"),
model = "random")
cov_re_Margins <- vcovHC(re_Margins, type = "HC3")
se_re_Margins <- sqrt(diag(cov_re_Margins))
#coeftest(re_Margins, df = Inf, vcov = cov_re_Margins)
Panel regressions table, Margins
stargazer(ols_plm_Margins, re_Margins, fe_ind_Margins, se = list(se_ols_plm_Margins,se_re_Margins, se_fe_ind_Margins), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
##
## ==========================================
## Dependent variable:
## -----------------------------
## log(GRP)
## (1) (2) (3)
## ------------------------------------------
## EM 0.147*** 0.020 0.036
## (0.052) (0.024) (0.026)
##
## IM 0.021*** 0.004*** 0.002**
## (0.003) (0.001) (0.001)
##
## Edu 0.001*** -0.001*** -0.002***
## (0.0003) (0.0003) (0.0002)
##
## Pop -9.589* -5.453** -4.886*
## (5.483) (2.491) (2.814)
##
## Inv -0.183 -0.316* -0.227
## (0.218) (0.167) (0.143)
##
## FDI 0.925** -0.136* -0.213**
## (0.452) (0.082) (0.101)
##
## log(Imp) 0.186*** 0.065** 0.023
## (0.044) (0.027) (0.021)
##
## ------------------------------------------
## Observations 546 546 546
## ==========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE
#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Margins, ols_plm_Margins)
##
## F test for individual effects
##
## data: Margins
## F = 101.45, df1 = 77, df2 = 461, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Margins, type = c('bp'))
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: Margins
## chisq = 744.36, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
phtest(fe_ind_Margins, re_Margins)
##
## Hausman Test
##
## data: Margins
## chisq = 0.75482, df = 7, p-value = 0.9979
## alternative hypothesis: one model is inconsistent
#random effects are better than fixed effects
Still, by all economic means, RE cannot be applied to our model. There should be a problem in our data (possible with the indexes), which causes biases in the Hausmann test. Thus, we will use FE.
fe_twoways_Margins <- plm(Margins,
data = long_data,
index = c("region", "year"),
effect = "twoways",
model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data)
cov_fe_twoways_Margins <- vcovHC(fe_twoways_Margins, type = "HC3")
se_fe_twoways_Margins <- sqrt(diag(cov_fe_twoways_Margins))
# coeftest(fe_twoways_Margins, df = Inf, vcov = cov_fe_twoways_Margins)
Fixing Stargazer
# #echo = FALSE, warning = FALSE
# ## Quick fix for stargazer <= 5.2.3 is.na() issue with long model names in R >= 4.2
# # Unload stargazer if loaded
# detach("package:stargazer",unload=T)
# # Delete it
# remove.packages("stargazer")
# # Download the source
# download.file("https://cran.r-project.org/src/contrib/stargazer_5.2.3.tar.gz", destfile = "stargazer_5.2.3.tar.gz")
# # Unpack
# untar("stargazer_5.2.3.tar.gz")
# # Read the sourcefile with .inside.bracket fun
# stargazer_src <- readLines("stargazer/R/stargazer-internal.R")
# # Move the length check 5 lines up so it precedes is.na(.)
# stargazer_src[1990] <- stargazer_src[1995]
# stargazer_src[1995] <- ""
# # Save back
# writeLines(stargazer_src, con="stargazer/R/stargazer-internal.R")
# # Compile and install the patched package
# install.packages("stargazer", repos = NULL, type="source")
stargazer(ols_plm_Theil, re_Theil, fe_ind_Theil, # Theil
ols_plm_Margins, re_Margins, fe_ind_Margins, # Margins
se = list(
se_ols_plm_Theil, se_re_Theil, se_fe_ind_Theil, # Theil
se_ols_plm_Margins, se_re_Margins, se_fe_ind_Margins), # Margins
type = "text", omit = c('Constant'), keep.stat = c("n"), font.size = "tiny",
add.lines = list(c('model', 'OLS', 'RE', 'FE', 'OLS', 'RE', 'FE')),
title = "All panel models", digits = 4)
##
## All panel models
## ============================================================================
## Dependent variable:
## ---------------------------------------------------------------
## log(GRP)
## (1) (2) (3) (4) (5) (6)
## ----------------------------------------------------------------------------
## Div 0.0086 0.0375*** 0.0265***
## (0.0196) (0.0122) (0.0101)
##
## EM 0.1472*** 0.0201 0.0358
## (0.0521) (0.0238) (0.0265)
##
## IM 0.0212*** 0.0037*** 0.0024**
## (0.0034) (0.0012) (0.0011)
##
## Edu 0.0001 -0.0014*** -0.0016*** 0.0008*** -0.0013*** -0.0018***
## (0.0004) (0.0002) (0.0002) (0.0003) (0.0003) (0.0002)
##
## Pop -7.7624 -3.5686 -3.6647 -9.5893* -5.4534** -4.8862*
## (7.9256) (2.5932) (2.8556) (5.4829) (2.4914) (2.8144)
##
## Inv 0.5173 -0.2493 -0.2266 -0.1833 -0.3161* -0.2268
## (0.3725) (0.1695) (0.1607) (0.2177) (0.1674) (0.1429)
##
## FDI 1.5422** -0.2015* -0.2315** 0.9250** -0.1362* -0.2131**
## (0.6610) (0.1030) (0.1100) (0.4516) (0.0822) (0.1009)
##
## log(Imp) 0.1401*** 0.0339 0.0201 0.1863*** 0.0652** 0.0226
## (0.0428) (0.0229) (0.0211) (0.0441) (0.0274) (0.0212)
##
## ----------------------------------------------------------------------------
## model OLS RE FE OLS RE FE
## Observations 546 546 546 546 546 546
## ============================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# Order: Pooled OLS, RE, FE
stargazer(fe_twoways_Theil, # Theil
fe_twoways_Margins,# Margins
se = list(
se_fe_twoways_Theil,# Theil
se_fe_twoways_Margins),# Margins
type = "text", omit = c('Constant'), keep.stat = c("n")) #, font.size = "tiny"
##
## =========================================
## Dependent variable:
## ----------------------------
## log(GRP)
## (1) (2)
## -----------------------------------------
## Div 0.008
## (0.008)
##
## EM 0.052***
## (0.018)
##
## IM 0.002**
## (0.001)
##
## Edu -0.0004 -0.0003
## (0.0005) (0.0004)
##
## Pop 3.173 2.466
## (3.019) (2.975)
##
## Inv -0.211 -0.187
## (0.148) (0.135)
##
## FDI -0.148 -0.136
## (0.092) (0.084)
##
## log(Imp) 0.010 0.009
## (0.017) (0.017)
##
## -----------------------------------------
## Observations 546 546
## =========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# Downloading
wide_data_robust <- read_csv(
'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_robust.csv',
show_col_types = FALSE)
long_data_robust <- read_csv(
'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data_robust.csv',
show_col_types = FALSE)
# Exporting them back to fix QGIS coding issues
write_xlsx(wide_data_robust,
'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Wide_united_data_robust_geo.xlsx')
# write_xlsx(long_data_robust,
# 'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Long_united_data_robust_geo.xlsx')
# Observing the tables
wide_data_robust
long_data_robust
# For all the variables
Desc_stat_robust <- long_data_robust %>% psych::describe()
# For diversification indexes
Index_stat_robust <- Desc_stat_robust[c(4:6), c(2, 3:4, (8:9))]
# row.names(Index_stat) <- c('Theil_robust index', 'Extensive Margin', 'Intensive Margin')
xtable(Index_stat_robust, caption = 'Descriptive statistics', digits = 3, type = "latex")
No extreme values of IM now.
res1_robust <- cor(long_data_robust[, c(3:(length(long_data_robust)-6))])
res1_robust
## GRP Div EM IM Imp Edu
## GRP 1.00000000 0.12046454 0.03859512 0.65649208 0.25091214 -0.14958925
## Div 0.12046454 1.00000000 -0.70020999 0.15537158 0.63382255 0.09129487
## EM 0.03859512 -0.70020999 1.00000000 -0.04011758 -0.43768312 -0.21937034
## IM 0.65649208 0.15537158 -0.04011758 1.00000000 0.14558743 -0.19019470
## Imp 0.25091214 0.63382255 -0.43768312 0.14558743 1.00000000 -0.13265746
## Edu -0.14958925 0.09129487 -0.21937034 -0.19019470 -0.13265746 1.00000000
## Pop 0.08861862 0.16799415 0.06597524 0.05827624 0.36067681 -0.06556583
## Inv 0.09861951 -0.18139214 0.29565431 0.07015477 -0.05516198 -0.07647036
## FDI 0.38155793 0.05952865 -0.02877383 0.27741739 0.24955672 -0.32491996
## Pop Inv FDI
## GRP 0.08861862 0.09861951 0.38155793
## Div 0.16799415 -0.18139214 0.05952865
## EM 0.06597524 0.29565431 -0.02877383
## IM 0.05827624 0.07015477 0.27741739
## Imp 0.36067681 -0.05516198 0.24955672
## Edu -0.06556583 -0.07647036 -0.32491996
## Pop 1.00000000 0.20059835 0.10996522
## Inv 0.20059835 1.00000000 0.11527150
## FDI 0.10996522 0.11527150 1.00000000
library(ggcorrplot)
#visualize correlation matrix
ggcorrplot(cor(long_data_robust[, c(3:(length(long_data_robust)-6))]))
2018 is taken as the last economically stable year available in our data.
Data_2018_robust <- get_year(wide_data_robust, '2018')
# Exporting this file
write_xlsx(Data_2018_robust,
'C:/Users/Kolya/Documents/New_studies/Diploma/Data/Data_use/Data_2018_robust_geo.xlsx')
# Theil_robust index specification
reg_Theil_robust_2018 <- lm(Theil_2018, data = Data_2018_robust)
vcov2_Theil_robust_2018 <- vcovHC(reg_Theil_robust_2018, type = "HC0")
se2imp_Theil_robust_2018 <- sqrt(diag(vcov2_Theil_robust_2018))
# WM and IM specification
reg_Margins_robust_2018 <- lm(Margins_2018, data = Data_2018_robust)
vcov2_Margins_robust_2018 <- vcovHC(reg_Margins_robust_2018, type = "HC0")
se2imp_Margins_robust_2018 <- sqrt(diag(vcov2_Margins_robust_2018))
# Table
stargazer(reg_Theil_robust_2018, reg_Margins_robust_2018, se = list(se2imp_Theil_robust_2018, se2imp_Margins_robust_2018), type = 'text', omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
##
## ==========================================
## Dependent variable:
## ----------------------------
## log(GRP_2018)
## (1) (2)
## ------------------------------------------
## Div_2018 -0.045
## (0.028)
##
## EM_2018 0.215***
## (0.059)
##
## IM_2018 0.018***
## (0.006)
##
## Edu_2018 -0.0001 0.001
## (0.001) (0.001)
##
## Pop_2018 -6.586 -13.241*
## (9.586) (7.464)
##
## Inv_2018 0.433 -0.114
## (0.582) (0.392)
##
## FDI_2018 1.621* 0.663
## (0.936) (0.673)
##
## log(Imp_2018) 0.212*** 0.274***
## (0.048) (0.059)
##
## ------------------------------------------
## Observations 76 76
## ==========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Pooled OLS Theil_robust
ols_plm_Theil_robust <- plm(Theil,
data = long_data_robust,
index = c("region", "year"),
model = "pooling")
cov_ols_plm_Theil_robust <- vcovHC(ols_plm_Theil_robust, type = "HC3")
se_ols_plm_Theil_robust <- sqrt(diag(cov_ols_plm_Theil_robust))
coeftest(ols_plm_Theil_robust, df = Inf, vcov = cov_ols_plm_Theil_robust)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 17.06183552 8.53429728 1.9992 0.045586 *
## Div -0.01247898 0.02646898 -0.4715 0.637315
## Edu -0.00014571 0.00044147 -0.3300 0.741363
## Pop -7.99324498 8.21824558 -0.9726 0.330741
## Inv 0.52125953 0.37829033 1.3779 0.168223
## FDI 1.39439102 0.73046089 1.9089 0.056272 .
## log(Imp) 0.15175351 0.04845296 3.1320 0.001736 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Fixed (individual) effects Theil_robust
fe_ind_Theil_robust <- plm(Theil,
data = long_data_robust,
index = c("region", "year"),
effect = "individual",
model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_ind_Theil_robust <- vcovHC(fe_ind_Theil_robust, type = "HC3")
se_fe_ind_Theil_robust <- sqrt(diag(cov_fe_ind_Theil_robust))
# coeftest(fe_ind_Theil_robust, df = Inf, vcov = cov_fe_ind_Theil_robust)
Random Effects Theil_robust
re_Theil_robust <- plm(Theil,
data = long_data_robust,
index = c("region", "year"),
model = "random")
cov_re_Theil_robust <- vcovHC(re_Theil_robust, type = "HC3")
se_re_Theil_robust <- sqrt(diag(cov_re_Theil_robust))
#coeftest(re_Theil_robust, df = Inf, vcov = cov_re_Theil_robust)
Panel regressions table, Theil_robust
stargazer(ols_plm_Theil_robust, re_Theil_robust, fe_ind_Theil_robust, se = list(se_ols_plm_Theil_robust, se_re_Theil_robust, se_fe_ind_Theil_robust), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
##
## ==========================================
## Dependent variable:
## -----------------------------
## log(GRP)
## (1) (2) (3)
## ------------------------------------------
## Div -0.012 0.026** 0.023**
## (0.026) (0.011) (0.011)
##
## Edu -0.0001 -0.002*** -0.002***
## (0.0004) (0.0002) (0.0002)
##
## Pop -7.993 -3.543 -3.314
## (8.218) (2.856) (3.145)
##
## Inv 0.521 -0.238 -0.228
## (0.378) (0.166) (0.162)
##
## FDI 1.394* -0.212** -0.234**
## (0.730) (0.107) (0.114)
##
## log(Imp) 0.152*** 0.032 0.021
## (0.048) (0.023) (0.021)
##
## ------------------------------------------
## Observations 532 532 532
## ==========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE
#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Theil_robust, ols_plm_Theil_robust)
##
## F test for individual effects
##
## data: Theil
## F = 205.49, df1 = 75, df2 = 450, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Theil_robust, type = c('bp'))
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: Theil
## chisq = 1208.8, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
phtest(fe_ind_Theil_robust, re_Theil_robust)
##
## Hausman Test
##
## data: Theil
## chisq = 1.8048, df = 6, p-value = 0.9367
## alternative hypothesis: one model is inconsistent
#random effects are better than fixed effects
Still, by all economic means, RE cannot be applied to our model. There should be a problem in our data (possible with the indexes), which causes biases in the Hausmann test. Thus, we will use FE.
fe_twoways_Theil_robust <- plm(Theil,
data = long_data_robust,
index = c("region", "year"),
effect = "twoways",
model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_twoways_Theil_robust <- vcovHC(fe_twoways_Theil_robust, type = "HC3")
se_fe_twoways_Theil_robust <- sqrt(diag(cov_fe_twoways_Theil_robust))
# coeftest(fe_twoways_Theil_robust, df = Inf, vcov = cov_fe_twoways_Theil_robust)
Pooled OLS Margins_robust
ols_plm_Margins_robust <- plm(Margins,
data = long_data_robust,
index = c("region", "year"),
model = "pooling")
cov_ols_plm_Margins_robust <- vcovHC(ols_plm_Margins_robust, type = "HC3")
se_ols_plm_Margins_robust <- sqrt(diag(cov_ols_plm_Margins_robust))
coeftest(ols_plm_Margins_robust, df = Inf, vcov = cov_ols_plm_Margins_robust)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.7743e+01 5.4428e+00 3.2600 0.001114 **
## EM 1.4120e-01 5.2062e-02 2.7122 0.006683 **
## IM 2.1300e-02 3.4803e-03 6.1202 9.348e-10 ***
## Edu 6.0001e-04 3.0958e-04 1.9382 0.052603 .
## Pop -1.0275e+01 5.6518e+00 -1.8180 0.069065 .
## Inv -1.2200e-01 2.1384e-01 -0.5705 0.568329
## FDI 8.5039e-01 4.8774e-01 1.7435 0.081243 .
## log(Imp) 1.7882e-01 4.4108e-02 4.0541 5.034e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Fixed (individual) effects Margins_robust
fe_ind_Margins_robust <- plm(Margins,
data = long_data_robust,
index = c("region", "year"),
effect = "individual",
model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_ind_Margins_robust <- vcovHC(fe_ind_Margins_robust, type = "HC3")
se_fe_ind_Margins_robust <- sqrt(diag(cov_fe_ind_Margins_robust))
# coeftest(fe_ind_Margins_robust, df = Inf, vcov = cov_fe_ind_Margins_robust)
Random Effects Margins_robust
re_Margins_robust <- plm(Margins,
data = long_data_robust,
index = c("region", "year"),
model = "random")
cov_re_Margins_robust <- vcovHC(re_Margins_robust, type = "HC3")
se_re_Margins_robust <- sqrt(diag(cov_re_Margins_robust))
#coeftest(re_Margins_robust, df = Inf, vcov = cov_re_Margins_robust)
Panel regressions table, Margins_robust
stargazer(ols_plm_Margins_robust, re_Margins_robust, fe_ind_Margins_robust, se = list(se_ols_plm_Margins_robust,se_re_Margins_robust, se_fe_ind_Margins_robust), type = "text", omit = c('Constant'), keep.stat = c("n")) # font.size = "tiny"
##
## ==========================================
## Dependent variable:
## -----------------------------
## log(GRP)
## (1) (2) (3)
## ------------------------------------------
## EM 0.141*** 0.031 0.035
## (0.052) (0.022) (0.026)
##
## IM 0.021*** 0.004*** 0.002**
## (0.003) (0.001) (0.001)
##
## Edu 0.001* -0.002*** -0.002***
## (0.0003) (0.0002) (0.0002)
##
## Pop -10.275* -4.992* -4.286
## (5.652) (2.697) (3.077)
##
## Inv -0.122 -0.267* -0.226
## (0.214) (0.152) (0.145)
##
## FDI 0.850* -0.152* -0.210**
## (0.488) (0.083) (0.104)
##
## log(Imp) 0.179*** 0.053** 0.022
## (0.044) (0.024) (0.021)
##
## ------------------------------------------
## Observations 532 532 532
## ==========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#order: Pooled OLS, RE, FE
#Ftest - between fixed effects and pooled OLS
#H0 - pooled OLS are better than fixed effects (because all individual effects are the same)
#H1 - fixed effects are better than pooled OLS ones
pFtest(fe_ind_Margins_robust, ols_plm_Margins_robust)
##
## F test for individual effects
##
## data: Margins
## F = 102.4, df1 = 75, df2 = 449, p-value < 2.2e-16
## alternative hypothesis: significant effects
#fixed effects are better, as expected
#plmtest - between random effects and pooled OLS
#H0 - pooled OLS are better than random effects (because all random effects are the same - on panel effect)
#H1 - random effects are better than pooled OLS ones
plmtest(ols_plm_Margins_robust, type = c('bp'))
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: Margins
## chisq = 757.14, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
#random effects are better
#phtest - between fixed effects and random effects
#H0 - random effects are better than fixed effects (random coef. are more efficient)
#H1 - fixed effects are better than random effects (fixed coef. are more efficient)
hausman_test <- phtest(fe_ind_Margins_robust, re_Margins_robust)
hausman_test
##
## Hausman Test
##
## data: Margins
## chisq = 50.834, df = 7, p-value = 9.903e-09
## alternative hypothesis: one model is inconsistent
#fixed effects are better then random ones
Let’s build two-ways variant of FE.
fe_twoways_Margins_robust <- plm(Margins,
data = long_data_robust,
index = c("region", "year"),
effect = "twoways",
model = "within")
# model.matrix(log(GRP) ~ Imp, data = long_data_robust)
cov_fe_twoways_Margins_robust <- vcovHC(fe_twoways_Margins_robust, type = "HC3")
se_fe_twoways_Margins_robust <- sqrt(diag(cov_fe_twoways_Margins_robust))
# coeftest(fe_twoways_Margins_robust, df = Inf, vcov = cov_fe_twoways_Margins_robust)
stargazer(ols_plm_Theil_robust, re_Theil_robust, fe_ind_Theil_robust, # Theil_robust
ols_plm_Margins_robust, re_Margins_robust, fe_ind_Margins_robust, # Margins_robust
se = list(
se_ols_plm_Theil_robust, se_re_Theil_robust, se_fe_ind_Theil_robust, # Theil_robust
se_ols_plm_Margins_robust, se_re_Margins_robust, se_fe_ind_Margins_robust), # Margins_robust
type = "text", omit = c('Constant'), keep.stat = c("n"), font.size = "tiny",
add.lines = list(c('model', 'OLS', 'RE', 'FE', 'OLS', 'RE', 'FE')),
title = "All panel models (robust)", digits = 4)
##
## All panel models (robust)
## ============================================================================
## Dependent variable:
## ---------------------------------------------------------------
## log(GRP)
## (1) (2) (3) (4) (5) (6)
## ----------------------------------------------------------------------------
## Div -0.0125 0.0255** 0.0226**
## (0.0265) (0.0107) (0.0105)
##
## EM 0.1412*** 0.0307 0.0353
## (0.0521) (0.0218) (0.0265)
##
## IM 0.0213*** 0.0037*** 0.0023**
## (0.0035) (0.0011) (0.0011)
##
## Edu -0.0001 -0.0015*** -0.0016*** 0.0006* -0.0015*** -0.0018***
## (0.0004) (0.0002) (0.0002) (0.0003) (0.0002) (0.0002)
##
## Pop -7.9932 -3.5425 -3.3143 -10.2750* -4.9925* -4.2857
## (8.2182) (2.8558) (3.1446) (5.6518) (2.6971) (3.0768)
##
## Inv 0.5213 -0.2379 -0.2278 -0.1220 -0.2674* -0.2263
## (0.3783) (0.1658) (0.1620) (0.2138) (0.1522) (0.1450)
##
## FDI 1.3944* -0.2123** -0.2340** 0.8504* -0.1524* -0.2097**
## (0.7305) (0.1067) (0.1142) (0.4877) (0.0829) (0.1038)
##
## log(Imp) 0.1518*** 0.0317 0.0205 0.1788*** 0.0526** 0.0223
## (0.0485) (0.0226) (0.0214) (0.0441) (0.0241) (0.0213)
##
## ----------------------------------------------------------------------------
## model OLS RE FE OLS RE FE
## Observations 532 532 532 532 532 532
## ============================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# Order: Pooled OLS, RE, FE
stargazer(fe_twoways_Theil_robust, # Theil_robust
fe_twoways_Margins_robust,# Margins_robust
se = list(
se_fe_twoways_Theil_robust,# Theil_robust
se_fe_twoways_Margins_robust),# Margins_robust
type = "text", omit = c('Constant'), keep.stat = c("n")) #, font.size = "tiny"
##
## =========================================
## Dependent variable:
## ----------------------------
## log(GRP)
## (1) (2)
## -----------------------------------------
## Div 0.005
## (0.008)
##
## EM 0.051***
## (0.018)
##
## IM 0.002**
## (0.001)
##
## Edu -0.0004 -0.0004
## (0.0005) (0.0004)
##
## Pop 3.476 2.813
## (3.310) (3.250)
##
## Inv -0.212 -0.187
## (0.149) (0.137)
##
## FDI -0.148 -0.135
## (0.095) (0.087)
##
## log(Imp) 0.010 0.008
## (0.017) (0.017)
##
## -----------------------------------------
## Observations 532 532
## =========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(reg_Theil_2018,# Theil
reg_Theil_robust_2018, # Theil_robust
reg_Margins_2018, # Margins
reg_Margins_robust_2018,# Margins_robust
se = list(se2imp_Theil_2018, # Theil
se2imp_Theil_robust_2018,# Theil_robust
se2imp_Margins_2018, # Margins
se2imp_Margins_robust_2018),# Margins_robust
type = "text", omit = c('Constant'), keep.stat = c("n"),
add.lines = list(c('model', 'OLS', 'OLS', 'OLS', 'OLS')),
title = "OLS", digits = 4) #, font.size = "tiny"
##
## OLS
## =====================================================
## Dependent variable:
## ---------------------------------------
## log(GRP_2018)
## (1) (2) (3) (4)
## -----------------------------------------------------
## Div_2018 -0.0146 -0.0450
## (0.0256) (0.0283)
##
## EM_2018 0.2188*** 0.2155***
## (0.0598) (0.0593)
##
## IM_2018 0.0180*** 0.0182***
## (0.0062) (0.0062)
##
## Edu_2018 0.0003 -0.0001 0.0007* 0.0005
## (0.0005) (0.0006) (0.0004) (0.0005)
##
## Pop_2018 -6.8542 -6.5859 -12.8861* -13.2407*
## (9.5294) (9.5860) (7.4090) (7.4636)
##
## Inv_2018 0.5134 0.4327 -0.1231 -0.1140
## (0.5958) (0.5823) (0.3866) (0.3915)
##
## FDI_2018 2.0064** 1.6214* 0.7532 0.6634
## (0.8901) (0.9355) (0.5929) (0.6735)
##
## log(Imp_2018) 0.1857*** 0.2121*** 0.2787*** 0.2745***
## (0.0455) (0.0481) (0.0593) (0.0592)
##
## -----------------------------------------------------
## model OLS OLS OLS OLS
## Observations 78 76 78 76
## =====================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(fe_twoways_Theil,# Theil
fe_twoways_Theil_robust, # Theil_robust
fe_twoways_Margins, # Margins
fe_twoways_Margins_robust,# Margins_robust
se = list(se_fe_twoways_Theil, # Theil
se_fe_twoways_Theil_robust,# Theil_robust
se_fe_twoways_Margins, # Margins
se_fe_twoways_Margins_robust),# Margins_robust
type = "text", omit = c('Constant'), keep.stat = c("n"),
add.lines = list(c('model', 'FE', 'FE', 'FE', 'FE')),
title = "Two-ways FE", digits = 4) #, font.size = "tiny"
##
## Two-ways FE
## ==================================================
## Dependent variable:
## -------------------------------------
## log(GRP)
## (1) (2) (3) (4)
## --------------------------------------------------
## Div 0.0083 0.0049
## (0.0075) (0.0082)
##
## EM 0.0518*** 0.0509***
## (0.0180) (0.0180)
##
## IM 0.0020** 0.0020**
## (0.0009) (0.0009)
##
## Edu -0.0004 -0.0004 -0.0003 -0.0004
## (0.0005) (0.0005) (0.0004) (0.0004)
##
## Pop 3.1728 3.4758 2.4659 2.8125
## (3.0186) (3.3102) (2.9752) (3.2495)
##
## Inv -0.2106 -0.2118 -0.1867 -0.1871
## (0.1479) (0.1493) (0.1352) (0.1374)
##
## FDI -0.1479 -0.1485 -0.1358 -0.1346
## (0.0925) (0.0954) (0.0843) (0.0869)
##
## log(Imp) 0.0095 0.0097 0.0087 0.0085
## (0.0167) (0.0169) (0.0167) (0.0169)
##
## --------------------------------------------------
## model FE FE FE FE
## Observations 546 532 546 532
## ==================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Pretty robust results (betas of independent variables).