\(~\)
\(~\)
\(~\)
library(haven)
library(tidyverse)
library(reshape2)
library(data.table)
library(janitor)
library(xtable)
library(kableExtra)
library(stargazer)
library(ivreg)
library(RColorBrewer)
library(plotly)
data_AJR_2001 <- read_dta("/Users/bastienpatras/Desktop/ENS - Master of Economics/Econometrics/Tutorial 5-20210322/data_AJR_2001.dta")
\(~\)
\(~\)
# Computation of the first column
data_AJR_2001_sum <- data_AJR_2001 %>%
select(logpgp95, loghjypl, avexpr, cons00a, cons1, democ00a, euro1900, logem4) %>%
mutate(euro1900 = euro1900/100) %>%
summarize(across(
.cols = everything(),
.fns = list(
Mean = ~mean(.x, na.rm = T),
SD = ~var(.x, na.rm = T)^0.5),
.names = "{.col}_{.fn}")
)
# Formating
column1 <- reshape2::melt(data_AJR_2001_sum) %>%
rename(Decriptive = variable, `Whole sample` = value )
# Output
column1 %>%
kbl(booktabs = T) %>%
kable_styling(latex_options = "striped", full_width = T)
Decriptive | Whole sample |
---|---|
logpgp95_Mean | 8.3025091 |
logpgp95_SD | 1.1053422 |
loghjypl_Mean | -1.7311057 |
loghjypl_SD | 1.0837256 |
avexpr_Mean | 7.0664914 |
avexpr_SD | 1.8042868 |
cons00a_Mean | 1.8571429 |
cons00a_SD | 1.8231318 |
cons1_Mean | 3.5909091 |
cons1_SD | 2.4146886 |
democ00a_Mean | 1.1494253 |
democ00a_SD | 2.5768592 |
euro1900_Mean | 0.3046623 |
euro1900_SD | 0.4238986 |
logem4_Mean | 4.5959838 |
logem4_SD | 1.3033335 |
\(~\)
\(~\)
# Computation of the second column
data_AJR_2001_sum_2 <- data_AJR_2001 %>%
select(logpgp95, loghjypl, avexpr, cons00a, cons1, democ00a, euro1900, logem4, baseco) %>%
mutate(euro1900 = euro1900/100) %>%
filter(baseco==1) %>%
summarize(across(
.cols = everything(),
.fns = list(
Mean = ~mean(.x, na.rm = T),
SD = ~var(.x, na.rm = T)^0.5),
.names = "{.col}_{.fn}")) %>%
select(logpgp95_Mean, logpgp95_SD,
loghjypl_Mean, loghjypl_SD,
avexpr_Mean, avexpr_SD,
cons00a_Mean, cons00a_SD,
cons1_Mean, cons1_SD,
democ00a_Mean, democ00a_SD,
euro1900_Mean, euro1900_SD,
logem4_Mean, logem4_SD)
# Formating
column2 <- reshape2::melt(data_AJR_2001_sum_2) %>%
rename(`Base sample` = value )
# Output
column2 %>%
kbl(booktabs = T) %>%
kable_styling(latex_options = "striped", full_width = T)
variable | Base sample |
---|---|
logpgp95_Mean | 8.0622369 |
logpgp95_SD | 1.0433593 |
loghjypl_Mean | -1.9340524 |
loghjypl_SD | 0.9807444 |
avexpr_Mean | 6.5156250 |
avexpr_SD | 1.4686472 |
cons00a_Mean | 2.2500000 |
cons00a_SD | 2.1123126 |
cons1_Mean | 3.4000000 |
cons1_SD | 2.3949099 |
democ00a_Mean | 1.6440678 |
democ00a_SD | 3.0043802 |
euro1900_Mean | 0.1618095 |
euro1900_SD | 0.2553334 |
logem4_Mean | 4.6570311 |
logem4_SD | 1.2579836 |
\(~\)
\(~\)
# Merging the 2 first columns
Table_1 <- cbind(column1,column2) %>% select(Decriptive, `Whole sample`, `Base sample`)
# Output
Table_1 %>%
kbl(booktabs = T) %>%
kable_styling(latex_options = "striped", full_width = T)
Decriptive | Whole sample | Base sample |
---|---|---|
logpgp95_Mean | 8.3025091 | 8.0622369 |
logpgp95_SD | 1.1053422 | 1.0433593 |
loghjypl_Mean | -1.7311057 | -1.9340524 |
loghjypl_SD | 1.0837256 | 0.9807444 |
avexpr_Mean | 7.0664914 | 6.5156250 |
avexpr_SD | 1.8042868 | 1.4686472 |
cons00a_Mean | 1.8571429 | 2.2500000 |
cons00a_SD | 1.8231318 | 2.1123126 |
cons1_Mean | 3.5909091 | 3.4000000 |
cons1_SD | 2.4146886 | 2.3949099 |
democ00a_Mean | 1.1494253 | 1.6440678 |
democ00a_SD | 2.5768592 | 3.0043802 |
euro1900_Mean | 0.3046623 | 0.1618095 |
euro1900_SD | 0.4238986 | 0.2553334 |
logem4_Mean | 4.5959838 | 4.6570311 |
logem4_SD | 1.3033335 | 1.2579836 |
\(~\)
\(~\)
# Computation of the second table
data_AJR_2001_sum_3 <- data_AJR_2001 %>% filter(baseco==1)
data_AJR_2001_sum_3 <- data_AJR_2001_sum_3 %>%
select(logpgp95, loghjypl, avexpr, cons00a, cons1, democ00a, euro1900, logem4) %>%
mutate(euro1900 = euro1900/100) %>%
mutate(quantile = case_when(logem4 < quantile(logem4, probs = 0.25) ~ "q1",
quantile(logem4, probs = 0.25) < logem4 & logem4 < quantile(logem4, probs = 0.5) ~ "q2",
quantile(logem4, probs = 0.5) < logem4 & logem4 < quantile(logem4, probs = 0.75) ~ "q3",
quantile(logem4, probs = 0.75) < logem4 & logem4 < quantile(logem4, probs = 1) ~ "q4")) %>%
group_by(quantile) %>%
summarize(across(
.cols = everything(),
.fns = list(Mean = ~mean(.x, na.rm = T),
SD = ~var(.x, na.rm = T)^0.5),
.names = "{.col}_{.fn}"))
# Formating second table
c <- as.data.frame(t(as.matrix(data_AJR_2001_sum_3)))
Table_2 <- janitor::row_to_names(c, row_number = 1)
# Output
Table_2 %>%
kbl(booktabs = T) %>%
kable_styling(latex_options = "striped", full_width = T)
q1 | q2 | q3 | q4 | NA | |
---|---|---|---|---|---|
logpgp95_Mean | 8.825758 | 8.327826 | 7.873064 | 7.178371 | 8.344939 |
logpgp95_SD | 1.2659394 | 0.6124916 | 0.7218007 | 0.6532207 | 1.5445691 |
loghjypl_Mean | -1.071928 | -1.496654 | -2.106015 | -3.031726 | -1.853427 |
loghjypl_SD | 0.8450932 | 0.4305819 | 0.7506511 | 0.4815506 | 1.3186635 |
avexpr_Mean | 7.828788 | 6.363636 | 5.931818 | 6.066667 | 6.068182 |
avexpr_SD | 1.5197167 | 0.9777634 | 1.3518684 | 1.1959527 | 1.9288748 |
cons00a_Mean | 3.923077 | 2.800000 | 1.133333 | 1.000000 | 3.666667 |
cons00a_SD | 3.0127932 | 1.7808505 | 0.5163978 | 0.0000000 | 3.0550505 |
cons1_Mean | 4.846154 | 2.733333 | 3.133333 | 3.428571 | 1.666667 |
cons1_SD | 2.882307 | 1.980861 | 2.356349 | 2.138090 | 1.154701 |
democ00a_Mean | 4.153846 | 2.285714 | 0.200000 | 0.000000 | 2.666667 |
democ00a_SD | 4.4692683 | 2.8937219 | 0.4140393 | 0.0000000 | 2.5166115 |
euro1900_Mean | 0.294642857 | 0.233333340 | 0.086812500 | 0.005333333 | 0.366666679 |
euro1900_SD | 0.43043120 | 0.14597783 | 0.11991064 | 0.02065591 | 0.32145504 |
logem4_Mean | 3.089550 | 4.293698 | 4.874785 | 6.190215 | 5.483826 |
logem4_SD | 0.65109792 | 0.04729285 | 0.36773350 | 0.62147923 | 2.16708946 |
\(~\)
\(~\)
# Merging the two tables
Table <- cbind(Table_1, Table_2)
# Output
Table %>%
kbl(booktabs = T) %>%
kable_styling(latex_options = "striped", full_width = T)
Decriptive | Whole sample | Base sample | q1 | q2 | q3 | q4 | NA | |
---|---|---|---|---|---|---|---|---|
logpgp95_Mean | logpgp95_Mean | 8.3025091 | 8.0622369 | 8.825758 | 8.327826 | 7.873064 | 7.178371 | 8.344939 |
logpgp95_SD | logpgp95_SD | 1.1053422 | 1.0433593 | 1.2659394 | 0.6124916 | 0.7218007 | 0.6532207 | 1.5445691 |
loghjypl_Mean | loghjypl_Mean | -1.7311057 | -1.9340524 | -1.071928 | -1.496654 | -2.106015 | -3.031726 | -1.853427 |
loghjypl_SD | loghjypl_SD | 1.0837256 | 0.9807444 | 0.8450932 | 0.4305819 | 0.7506511 | 0.4815506 | 1.3186635 |
avexpr_Mean | avexpr_Mean | 7.0664914 | 6.5156250 | 7.828788 | 6.363636 | 5.931818 | 6.066667 | 6.068182 |
avexpr_SD | avexpr_SD | 1.8042868 | 1.4686472 | 1.5197167 | 0.9777634 | 1.3518684 | 1.1959527 | 1.9288748 |
cons00a_Mean | cons00a_Mean | 1.8571429 | 2.2500000 | 3.923077 | 2.800000 | 1.133333 | 1.000000 | 3.666667 |
cons00a_SD | cons00a_SD | 1.8231318 | 2.1123126 | 3.0127932 | 1.7808505 | 0.5163978 | 0.0000000 | 3.0550505 |
cons1_Mean | cons1_Mean | 3.5909091 | 3.4000000 | 4.846154 | 2.733333 | 3.133333 | 3.428571 | 1.666667 |
cons1_SD | cons1_SD | 2.4146886 | 2.3949099 | 2.882307 | 1.980861 | 2.356349 | 2.138090 | 1.154701 |
democ00a_Mean | democ00a_Mean | 1.1494253 | 1.6440678 | 4.153846 | 2.285714 | 0.200000 | 0.000000 | 2.666667 |
democ00a_SD | democ00a_SD | 2.5768592 | 3.0043802 | 4.4692683 | 2.8937219 | 0.4140393 | 0.0000000 | 2.5166115 |
euro1900_Mean | euro1900_Mean | 0.3046623 | 0.1618095 | 0.294642857 | 0.233333340 | 0.086812500 | 0.005333333 | 0.366666679 |
euro1900_SD | euro1900_SD | 0.4238986 | 0.2553334 | 0.43043120 | 0.14597783 | 0.11991064 | 0.02065591 | 0.32145504 |
logem4_Mean | logem4_Mean | 4.5959838 | 4.6570311 | 3.089550 | 4.293698 | 4.874785 | 6.190215 | 5.483826 |
logem4_SD | logem4_SD | 1.3033335 | 1.2579836 | 0.65109792 | 0.04729285 | 0.36773350 | 0.62147923 | 2.16708946 |
# Latex output : xtable(Table)
\(~\)
\(~\)
# Slicing data
data_AJR_20011 <- data_AJR_2001 %>%
select(logpgp95, logem4, shortnam)
# Ploting data
plot_ACEM_2001 <- data_AJR_20011 %>%
ggplot(aes(x = logem4, y = logpgp95))+
geom_point(color = "#C38D94") +
geom_smooth(method=lm , color="#D81E5B", fill="#FFE3DC", se=TRUE) +
xlab("Log settler mortality") +
ylab("Log GDP (1995)") +
theme(panel.background = element_rect(fill = "white"),
axis.line = element_line(size = 0.2, colour = "#2F2D2E", linetype=1)) +
scale_color_brewer()
# Output
show(plot_ACEM_2001)
\(~\)
\(~\)
\(~\)
# Column 1:
c1 <- lm(logpgp95 ~ avexpr, data=data_AJR_2001)
# Column 2:
data_AJR_2001_baseco1 <- data_AJR_2001 %>% filter(baseco==1)
c2 <- lm(logpgp95 ~ avexpr, data=data_AJR_2001_baseco1)
# Column 3:
c3 <- lm(logpgp95 ~ avexpr + lat_abst, data=data_AJR_2001)
# Column 4:
c4 <- lm(logpgp95 ~ avexpr + lat_abst + as.factor(africa) + as.factor(asia) + as.factor(other), data=data_AJR_2001)
# Column 5:
c5 <- lm(logpgp95 ~ avexpr + lat_abst, data=data_AJR_2001_baseco1)
# Column 6:
c6 <- lm(logpgp95 ~ avexpr + lat_abst + as.factor(africa) + as.factor(asia) + as.factor(other), data=data_AJR_2001_baseco1)
# stargazer(c1, c2, c3, c4, c5, c6)
\(~\)
\(~\)
\(~\)
# Conditioning the data set
data_AJR_2001_baseco1 <- data_AJR_2001 %>% filter(baseco==1 & extmort4!=0)
# Model in Equation(4)
m1 <- lm(avexpr ~ lat_abst + cons00a, data=data_AJR_2001_baseco1)
# Model in Equation(5)
m2 <- lm(cons00a ~ euro1900 + lat_abst, data=data_AJR_2001_baseco1)
# Model in Equation(6)
m3 <- lm(euro1900 ~ lat_abst + logem4, data=data_AJR_2001_baseco1)
# stargazer(m1, m2, m3)
\(~\)
\(~\)
# Conditioning the data set
data_AJR_2001_baseco1 <- data_AJR_2001 %>% filter(baseco==1 & extmort4!=0)
# First stage
First_stage <- lm(avexpr ~ lat_abst + logem4, data=data_AJR_2001_baseco1)
# Storing the predicted value
First_stage_fitted <- fitted(First_stage)
# Output
head(First_stage_fitted) %>%
kbl(booktabs = T) %>%
kable_styling(latex_options = "striped", full_width = T)
x |
---|
5.927755 |
7.125869 |
8.034964 |
5.943324 |
6.885191 |
6.799632 |
# Output
summary(First_stage)
##
## Call:
## lm(formula = avexpr ~ lat_abst + logem4, data = data_AJR_2001_baseco1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7410 -0.9299 0.0393 0.8553 3.1693
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.5294 0.8123 10.500 2.67e-15 ***
## lat_abst 2.0018 1.3372 1.497 0.139546
## logem4 -0.5103 0.1410 -3.618 0.000603 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.252 on 61 degrees of freedom
## Multiple R-squared: 0.296, Adjusted R-squared: 0.2729
## F-statistic: 12.82 on 2 and 61 DF, p-value: 2.244e-05
# Latex output : stargazer(First_stage)
\(~\)
\(~\)
# Merging
data_AJR_2001_baseco1 <- cbind(First_stage_fitted, data_AJR_2001_baseco1)
# Second stage
Second_stage <- lm(logpgp95 ~ First_stage_fitted + lat_abst, data=data_AJR_2001_baseco1)
# Output
summary(Second_stage)
##
## Call:
## lm(formula = logpgp95 ~ First_stage_fitted + lat_abst, data = data_AJR_2001_baseco1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5396 -0.4654 0.1097 0.4632 1.5712
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6918 0.9647 1.754 0.0845 .
## First_stage_fitted 0.9957 0.1654 6.020 1.08e-07 ***
## lat_abst -0.6472 0.9961 -0.650 0.5183
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7495 on 61 degrees of freedom
## Multiple R-squared: 0.5004, Adjusted R-squared: 0.484
## F-statistic: 30.55 on 2 and 61 DF, p-value: 6.42e-10
# Latex output : stargazer(First_stage, Second_stage)
\(~\)
\(~\)
# Conditioning the data set
data_AJR_2001_baseco1 <- data_AJR_2001 %>% filter(baseco==1)
# 2SLS estimation using ivreg() command
m_iv <- ivreg(logpgp95 ~ lat_abst + avexpr |
lat_abst + logem4, data = data_AJR_2001_baseco1)
# Output
summary(m_iv)
##
## Call:
## ivreg(formula = logpgp95 ~ lat_abst + avexpr | lat_abst + logem4,
## data = data_AJR_2001_baseco1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5611 -0.6557 0.0732 0.7572 1.8803
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6918 1.2930 1.308 0.196
## lat_abst -0.6472 1.3351 -0.485 0.630
## avexpr 0.9957 0.2217 4.492 3.21e-05 ***
##
## Diagnostic tests:
## df1 df2 statistic p-value
## Weak instruments 1 61 13.09 0.000603 ***
## Wu-Hausman 1 60 18.75 5.75e-05 ***
## Sargan 0 NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.005 on 61 degrees of freedom
## Multiple R-Squared: 0.1025, Adjusted R-squared: 0.07305
## Wald test: 17.01 on 2 and 61 DF, p-value: 1.351e-06
# Latex output : stargazer(m_iv)
\(~\)
\(~\)
# Conditioning the data set
data_AJR_2001_baseco1 <- data_AJR_2001 %>% filter(baseco==1)
data_AJR_2001_baseco12 <- data_AJR_2001 %>% filter(baseco==1 & f_brit == 1)
# 2SLS estimation using ivreg() command
### Table 1 : controlling for disease, latitude, life expectency
# Column 1
m_iv_1 <- ivreg(logpgp95 ~ avexpr + lat_abst + as.factor(f_brit) + as.factor(f_french) |
lat_abst + logem4 + as.factor(f_french) + as.factor(f_brit),
data = data_AJR_2001_baseco1)
# Column 2
m_iv_2 <- ivreg(logpgp95 ~ avexpr + lat_abst |
lat_abst + logem4, data = data_AJR_2001_baseco12)
# Column 3
m_iv_3 <- ivreg(logpgp95 ~ avexpr + lat_abst + as.factor(sjlofr) |
lat_abst + logem4 + as.factor(sjlofr),
data = data_AJR_2001_baseco1)
# Column 4
m_iv_4 <- ivreg(logpgp95 ~ avexpr + lat_abst + catho80 +
muslim80 + no_cpm80 |
lat_abst + logem4 + catho80 +
muslim80 + no_cpm80, data = data_AJR_2001_baseco1)
# Column 5
m_iv_5 <- ivreg(logpgp95 ~ avexpr + lat_abst + catho80 +
muslim80 + no_cpm80 |
lat_abst + logem4 + catho80 +
muslim80 + no_cpm80, data = data_AJR_2001_baseco1)
# Latex output : stargazer(m_iv_1, m_iv_2, m_iv_3, m_iv_4, m_iv_5)
### Table 2
# Column 11
m_iv_11 <- ivreg(logpgp95 ~ avexpr + lat_abst + malfal94 |
lat_abst + logem4 + malfal94,
data = data_AJR_2001_baseco1)
# Column 12
m_iv_12 <- ivreg(logpgp95 ~ avexpr + lat_abst + yellow |
lat_abst + logem4 + yellow,
data = data_AJR_2001_baseco1)
# Column 13
m_iv_13 <- ivreg(logpgp95 ~ avexpr + lat_abst + leb95 |
lat_abst + logem4 + leb95,
data = data_AJR_2001_baseco1)
# Latex output : stargazer(m_iv_11, m_iv_12, m_iv_13)
\(~\)
\(~\)
# Conditioning the data set
data_AJR_2001_baseco13 <- data_AJR_2001 %>% filter(baseco==1 & rich4==0)
# 2SLS estimation using ivreg() command
m_iv_poor <- ivreg(logpgp95 ~ lat_abst + avexpr |
lat_abst + logem4, data = data_AJR_2001_baseco13)
# Output
summary(m_iv_poor)
##
## Call:
## ivreg(formula = logpgp95 ~ lat_abst + avexpr | lat_abst + logem4,
## data = data_AJR_2001_baseco13)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.05145 -0.70560 -0.07305 0.90445 2.48150
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1442 2.1835 0.066 0.94757
## lat_abst 0.9385 1.4631 0.641 0.52378
## avexpr 1.2118 0.3543 3.420 0.00116 **
##
## Diagnostic tests:
## df1 df2 statistic p-value
## Weak instruments 1 57 7.826 0.00701 **
## Wu-Hausman 1 56 17.633 9.69e-05 ***
## Sargan 0 NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.178 on 57 degrees of freedom
## Multiple R-Squared: -0.4918, Adjusted R-squared: -0.5442
## Wald test: 7.252 on 2 and 57 DF, p-value: 0.001564
# Latex output : stargazer(m_iv, m_iv_poor)