seed(1)

fit1<-lm(acad ~ pedu2,dtf)
stargazer(fit1, type="html")
Dependent variable:
acad
pedu2 0.338***
(0.006)
Constant 0.479***
(0.004)
Observations 25,776
R2 0.127
Adjusted R2 0.127
Residual Std. Error 0.440 (df = 25774)
F Statistic 3,754.158*** (df = 1; 25774)
Note: p<0.1; p<0.05; p<0.01
fit2<-lm(acad ~ gpa9 , dtf)
stargazer(fit2, type="html")
Dependent variable:
acad
gpa9 0.087***
(0.001)
Constant -0.313***
(0.008)
Observations 25,776
R2 0.382
Adjusted R2 0.382
Residual Std. Error 0.370 (df = 25774)
F Statistic 15,909.180*** (df = 1; 25774)
Note: p<0.1; p<0.05; p<0.01
fit3<-lm(acad~ pedu2 + gpa9, dtf)
stargazer(fit3, type="html")
Dependent variable:
acad
pedu2 0.150***
(0.005)
gpa9 0.080***
(0.001)
Constant -0.308***
(0.008)
Observations 25,776
R2 0.404
Adjusted R2 0.403
Residual Std. Error 0.364 (df = 25773)
F Statistic 8,717.271*** (df = 2; 25773)
Note: p<0.1; p<0.05; p<0.01

Using Mediaton Analysis

First step: The effect of the indep onto the mediator

beta1 <- lm(acad ~ pedu2 , dtf)
summary(beta1)
## 
## Call:
## lm(formula = acad ~ pedu2, data = dtf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8170 -0.4788  0.1830  0.1830  0.5212 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.478748   0.004131  115.89   <2e-16 ***
## pedu2       0.338238   0.005520   61.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4399 on 25774 degrees of freedom
## Multiple R-squared:  0.1271, Adjusted R-squared:  0.1271 
## F-statistic:  3754 on 1 and 25774 DF,  p-value: < 2.2e-16

Second step: The effect of the mediator on the the dependent variable

First we are going to calculate using simple OLS model and then adding a interaction term:

gamma <- lm(acad ~ pedu2 + gpa9, dtf) #simple OLS

Third step: Pass model objects through mediate function

Mediaton analysis OLS:

med.out <- mediate(beta1, gamma, treat="pedu2", mediator="gpa9", sims=1000)
summary(med.out)
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME             0.0269       0.0259         0.03  <2e-16 ***
## ADE              0.1498       0.1400         0.16  <2e-16 ***
## Total Effect     0.1767       0.1673         0.19  <2e-16 ***
## Prop. Mediated   0.1521       0.1431         0.16  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 25776 
## 
## 
## Simulations: 1000

Mediation analysis using probit model:

## First step: The effect of the indep onto the mediator
beta.probit<- glm(acad~pedu2, dtf, family= binomial(link="probit"))
gamma.probit <- glm(acad ~ pedu2 + gpa9 , dtf, family = binomial(link="probit"))


med.out.prob<- mediate(beta.probit, gamma.probit, treat="pedu2", mediator="gpa9", sims=50)
summary(med.out.prob)
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                          Estimate 95% CI Lower 95% CI Upper p-value    
## ACME (control)           0.000199     0.000156         0.00  <2e-16 ***
## ACME (treated)           0.001169     0.000920         0.00  <2e-16 ***
## ADE (control)            0.003019     0.002296         0.00  <2e-16 ***
## ADE (treated)            0.003989     0.003073         0.01  <2e-16 ***
## Total Effect             0.004188     0.003217         0.01  <2e-16 ***
## Prop. Mediated (control) 0.047221     0.041336         0.06  <2e-16 ***
## Prop. Mediated (treated) 0.279470     0.263179         0.29  <2e-16 ***
## ACME (average)           0.000684     0.000536         0.00  <2e-16 ***
## ADE (average)            0.003504     0.002685         0.00  <2e-16 ***
## Prop. Mediated (average) 0.163346     0.152877         0.17  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 25776 
## 
## 
## Simulations: 50

Sensitivity Analysis

First we need to pass mediate output through medsens function, we will do only for the OLS models, the probit ones take too long.

Sensitivity Analysis for OLS model

sens.cont <- medsens(med.out,rho.by=.1, eps=.01, effect.type="both")
summary(sens.cont)
## 
## Mediation Sensitivity Analysis for Average Causal Mediation Effect
## 
## Rho at which ACME = 0: 0.9
## R^2_M*R^2_Y* at which ACME = 0: 0.81
## R^2_M~R^2_Y~ at which ACME = 0: 0.4217 
## 
## 
## Mediation Sensitivity Analysis for Average Direct Effect
## 
## Sensitivity Region
## 
##       Rho     ADE 95% CI Lower 95% CI Upper R^2_M*R^2_Y* R^2_M~R^2_Y~
## [1,] -0.9 -0.0073      -0.0174       0.0029         0.81       0.4217
## 
## Rho at which ADE = 0: -0.9
## R^2_M*R^2_Y* at which ADE = 0: 0.81
## R^2_M~R^2_Y~ at which ADE = 0: 0.4217

Plot true ACMEs and ADEs as functions of rho and Plot true ACMEs and ADEs as functions of “R square tildes”

par.orig <- par(mfrow = c(2,2))
plot1<-plot(sens.cont)
# Plot true ACMEs and ADEs as functions of "R square tildes"
plot1<- plot1 + plot(sens.cont, sens.par="R2", r.type="total", sign.prod="positive")

par(par.orig)

Sensitivity Analysis for Probit model

#sens.cont.probit <- medsens(med.out.prob,rho.by=.1, eps=.01, effect.type="both")
#summary(sens.cont.probit)

Plot true ACMEs and ADEs as functions of rho and Plot true ACMEs and ADEs as functions of “R square tildes”

#par.orig <- par(mfrow = c(2,2))
#plot3<-plot(sens.cont.probit)
# Plot true ACMEs and ADEs as functions of "R square tildes"
#plot4<- plot3 + plot(sens.cont.probit, sens.par="R2", r.type="total", sign.prod="positive")
#par(par.orig)

ACHARYA FRAMEWORK

library(foreign)
library(lmtest)
library(sandwich)


seq.g.var <- function(mod.first, mod.direct, med.vars) {
  require(sandwich)
  mat.x <- model.matrix(mod.direct)
  mat.first <- model.matrix(mod.first)
  n <- nrow(mat.x)
  Fhat <- crossprod(mat.x, mat.first)/n
  Fhat[, !(colnames(mat.first) %in% med.vars)] <- 0
  Mhat.inv <- solve(crossprod(mat.first)/n)
  ghat <- t(estfun(mod.direct)) + Fhat %*% Mhat.inv %*% t(estfun(mod.first))
  meat <- crossprod(t(ghat))/n
  bread <- (t(mat.x)%*%mat.x)/n
  vv <- (n/(n-ncol(mat.x)))*(solve(bread) %*% meat %*% solve(bread))/n
  return(vv)
}

baseline <- lm(acad ~  pedu2, dtf)
summary(baseline)
## 
## Call:
## lm(formula = acad ~ pedu2, data = dtf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8170 -0.4788  0.1830  0.1830  0.5212 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.478748   0.004131  115.89   <2e-16 ***
## pedu2       0.338238   0.005520   61.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4399 on 25774 degrees of freedom
## Multiple R-squared:  0.1271, Adjusted R-squared:  0.1271 
## F-statistic:  3754 on 1 and 25774 DF,  p-value: < 2.2e-16
first <- lm(acad ~ pedu2 +  gpa9, data = dtf)
summary(first)
## 
## Call:
## lm(formula = acad ~ pedu2 + gpa9, data = dtf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.33276 -0.26824  0.04504  0.28365  1.22885 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.3083902  0.0079718  -38.69   <2e-16 ***
## pedu2        0.1498497  0.0048783   30.72   <2e-16 ***
## gpa9         0.0795358  0.0007278  109.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3637 on 25773 degrees of freedom
## Multiple R-squared:  0.4035, Adjusted R-squared:  0.4035 
## F-statistic:  8717 on 2 and 25773 DF,  p-value: < 2.2e-16
direct <- lm(I(acad - coef(first)["gpa9"]*gpa9) ~ pedu2, data = dtf, subset = rownames(dtf) %in% rownames(model.matrix(first)))
summary(direct)
## 
## Call:
## lm(formula = I(acad - coef(first)["gpa9"] * gpa9) ~ pedu2, data = dtf, 
##     subset = rownames(dtf) %in% rownames(model.matrix(first)))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.33276 -0.26824  0.04504  0.28365  1.22885 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.308390   0.003415  -90.30   <2e-16 ***
## pedu2        0.149850   0.004563   32.84   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3637 on 25774 degrees of freedom
## Multiple R-squared:  0.04016,    Adjusted R-squared:  0.04012 
## F-statistic:  1078 on 1 and 25774 DF,  p-value: < 2.2e-16
direct.vcov <- seq.g.var(first, direct, "gpa9")
coeftest(direct, vcov = direct.vcov)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) -0.3083902  0.0076967 -40.068 < 2.2e-16 ***
## pedu2        0.1498497  0.0044350  33.788 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acad.se <- sqrt(diag(direct.vcov))
haus <- (coef(direct)["pedu2"] - coef(first)["pedu2"])/(acad.se["pedu2"]^2 - (summary(first)$coef["pedu2", 2])^2)
haus
##        pedu2 
## 6.923532e-10
pchisq(q = haus, df = 1, lower.tail = FALSE)
##    pedu2 
## 0.999979

sensitivity analysis

rho <- seq(-0.9,0.9, by = 0.01)
acde.sens <- rep(NA, times = length(rho))
acde.sens.se <- rep(NA, times = length(rho))
res.y <- residuals(lm(acad ~ pedu2, data = dtf, subset = !is.na(gpa9)))
res.m <- residuals(lm(gpa9 ~ pedu2, data = dtf))
rho.tilde <- cor(res.y, res.m)
m.fixed <- coef(first)["gpa9"] - rho*sd(res.y)*sqrt((1-rho.tilde^2)/(1-rho^2))/sd(res.m)
n <- nrow(model.matrix(direct))
for(i in 1:length(rho)) {
  sens.direct  <- lm(I(acad - m.fixed[i]*gpa9) ~ pedu2, data = dtf, subset = rownames(dtf) %in% rownames(model.matrix(first)))
  acde.sens[i] <- coef(sens.direct)["pedu2"]
  sens.vcov <- seq.g.var(first, sens.direct, "gpa9")
  acde.sens.se[i] <- sqrt(sens.vcov["pedu2", "pedu2"])
}

ci.hi <- acde.sens + 1.96 * acde.sens.se
ci.lo <- acde.sens - 1.96 * acde.sens.se

PLOTING FIGURE 6 in Acharya et al

plot(rho, acde.sens, type = 'n', ylim = range(c(ci.hi,ci.lo)), xlab = bquote("Correlation between mediator and outcome errors" ~~ (rho)),
ylab = "Estimated ACDE", bty = "n", las = 1)
polygon(x = c(rho, rev(rho)), y = c(ci.lo, rev(ci.hi)), col = "grey70", border = NA)
lines(rho, acde.sens, lwd = 2)
abline(v=0, lty = 2)
abline(h=0, lty = 2)