seed(1)
fit1<-lm(acad ~ pedu2,dtf)
stargazer(fit1, type="html")
| Dependent variable: | |
| acad | |
| pedu2 | 0.338*** |
| (0.006) | |
| Constant | 0.479*** |
| (0.004) | |
| Observations | 25,776 |
| R2 | 0.127 |
| Adjusted R2 | 0.127 |
| Residual Std. Error | 0.440 (df = 25774) |
| F Statistic | 3,754.158*** (df = 1; 25774) |
| Note: | p<0.1; p<0.05; p<0.01 |
fit2<-lm(acad ~ gpa9 , dtf)
stargazer(fit2, type="html")
| Dependent variable: | |
| acad | |
| gpa9 | 0.087*** |
| (0.001) | |
| Constant | -0.313*** |
| (0.008) | |
| Observations | 25,776 |
| R2 | 0.382 |
| Adjusted R2 | 0.382 |
| Residual Std. Error | 0.370 (df = 25774) |
| F Statistic | 15,909.180*** (df = 1; 25774) |
| Note: | p<0.1; p<0.05; p<0.01 |
fit3<-lm(acad~ pedu2 + gpa9, dtf)
stargazer(fit3, type="html")
| Dependent variable: | |
| acad | |
| pedu2 | 0.150*** |
| (0.005) | |
| gpa9 | 0.080*** |
| (0.001) | |
| Constant | -0.308*** |
| (0.008) | |
| Observations | 25,776 |
| R2 | 0.404 |
| Adjusted R2 | 0.403 |
| Residual Std. Error | 0.364 (df = 25773) |
| F Statistic | 8,717.271*** (df = 2; 25773) |
| Note: | p<0.1; p<0.05; p<0.01 |
beta1 <- lm(acad ~ pedu2 , dtf)
summary(beta1)
##
## Call:
## lm(formula = acad ~ pedu2, data = dtf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8170 -0.4788 0.1830 0.1830 0.5212
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.478748 0.004131 115.89 <2e-16 ***
## pedu2 0.338238 0.005520 61.27 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4399 on 25774 degrees of freedom
## Multiple R-squared: 0.1271, Adjusted R-squared: 0.1271
## F-statistic: 3754 on 1 and 25774 DF, p-value: < 2.2e-16
First we are going to calculate using simple OLS model and then adding a interaction term:
gamma <- lm(acad ~ pedu2 + gpa9, dtf) #simple OLS
med.out <- mediate(beta1, gamma, treat="pedu2", mediator="gpa9", sims=1000)
summary(med.out)
##
## Causal Mediation Analysis
##
## Quasi-Bayesian Confidence Intervals
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME 0.0269 0.0259 0.03 <2e-16 ***
## ADE 0.1498 0.1400 0.16 <2e-16 ***
## Total Effect 0.1767 0.1673 0.19 <2e-16 ***
## Prop. Mediated 0.1521 0.1431 0.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 25776
##
##
## Simulations: 1000
## First step: The effect of the indep onto the mediator
beta.probit<- glm(acad~pedu2, dtf, family= binomial(link="probit"))
gamma.probit <- glm(acad ~ pedu2 + gpa9 , dtf, family = binomial(link="probit"))
med.out.prob<- mediate(beta.probit, gamma.probit, treat="pedu2", mediator="gpa9", sims=50)
summary(med.out.prob)
##
## Causal Mediation Analysis
##
## Quasi-Bayesian Confidence Intervals
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME (control) 0.000199 0.000156 0.00 <2e-16 ***
## ACME (treated) 0.001169 0.000920 0.00 <2e-16 ***
## ADE (control) 0.003019 0.002296 0.00 <2e-16 ***
## ADE (treated) 0.003989 0.003073 0.01 <2e-16 ***
## Total Effect 0.004188 0.003217 0.01 <2e-16 ***
## Prop. Mediated (control) 0.047221 0.041336 0.06 <2e-16 ***
## Prop. Mediated (treated) 0.279470 0.263179 0.29 <2e-16 ***
## ACME (average) 0.000684 0.000536 0.00 <2e-16 ***
## ADE (average) 0.003504 0.002685 0.00 <2e-16 ***
## Prop. Mediated (average) 0.163346 0.152877 0.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 25776
##
##
## Simulations: 50
First we need to pass mediate output through medsens function, we will do only for the OLS models, the probit ones take too long.
sens.cont <- medsens(med.out,rho.by=.1, eps=.01, effect.type="both")
summary(sens.cont)
##
## Mediation Sensitivity Analysis for Average Causal Mediation Effect
##
## Rho at which ACME = 0: 0.9
## R^2_M*R^2_Y* at which ACME = 0: 0.81
## R^2_M~R^2_Y~ at which ACME = 0: 0.4217
##
##
## Mediation Sensitivity Analysis for Average Direct Effect
##
## Sensitivity Region
##
## Rho ADE 95% CI Lower 95% CI Upper R^2_M*R^2_Y* R^2_M~R^2_Y~
## [1,] -0.9 -0.0073 -0.0174 0.0029 0.81 0.4217
##
## Rho at which ADE = 0: -0.9
## R^2_M*R^2_Y* at which ADE = 0: 0.81
## R^2_M~R^2_Y~ at which ADE = 0: 0.4217
Plot true ACMEs and ADEs as functions of rho and Plot true ACMEs and ADEs as functions of “R square tildes”
par.orig <- par(mfrow = c(2,2))
plot1<-plot(sens.cont)
# Plot true ACMEs and ADEs as functions of "R square tildes"
plot1<- plot1 + plot(sens.cont, sens.par="R2", r.type="total", sign.prod="positive")
par(par.orig)
#sens.cont.probit <- medsens(med.out.prob,rho.by=.1, eps=.01, effect.type="both")
#summary(sens.cont.probit)
Plot true ACMEs and ADEs as functions of rho and Plot true ACMEs and ADEs as functions of “R square tildes”
#par.orig <- par(mfrow = c(2,2))
#plot3<-plot(sens.cont.probit)
# Plot true ACMEs and ADEs as functions of "R square tildes"
#plot4<- plot3 + plot(sens.cont.probit, sens.par="R2", r.type="total", sign.prod="positive")
#par(par.orig)
library(foreign)
library(lmtest)
library(sandwich)
seq.g.var <- function(mod.first, mod.direct, med.vars) {
require(sandwich)
mat.x <- model.matrix(mod.direct)
mat.first <- model.matrix(mod.first)
n <- nrow(mat.x)
Fhat <- crossprod(mat.x, mat.first)/n
Fhat[, !(colnames(mat.first) %in% med.vars)] <- 0
Mhat.inv <- solve(crossprod(mat.first)/n)
ghat <- t(estfun(mod.direct)) + Fhat %*% Mhat.inv %*% t(estfun(mod.first))
meat <- crossprod(t(ghat))/n
bread <- (t(mat.x)%*%mat.x)/n
vv <- (n/(n-ncol(mat.x)))*(solve(bread) %*% meat %*% solve(bread))/n
return(vv)
}
baseline <- lm(acad ~ pedu2, dtf)
summary(baseline)
##
## Call:
## lm(formula = acad ~ pedu2, data = dtf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8170 -0.4788 0.1830 0.1830 0.5212
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.478748 0.004131 115.89 <2e-16 ***
## pedu2 0.338238 0.005520 61.27 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4399 on 25774 degrees of freedom
## Multiple R-squared: 0.1271, Adjusted R-squared: 0.1271
## F-statistic: 3754 on 1 and 25774 DF, p-value: < 2.2e-16
first <- lm(acad ~ pedu2 + gpa9, data = dtf)
summary(first)
##
## Call:
## lm(formula = acad ~ pedu2 + gpa9, data = dtf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.33276 -0.26824 0.04504 0.28365 1.22885
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3083902 0.0079718 -38.69 <2e-16 ***
## pedu2 0.1498497 0.0048783 30.72 <2e-16 ***
## gpa9 0.0795358 0.0007278 109.28 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3637 on 25773 degrees of freedom
## Multiple R-squared: 0.4035, Adjusted R-squared: 0.4035
## F-statistic: 8717 on 2 and 25773 DF, p-value: < 2.2e-16
direct <- lm(I(acad - coef(first)["gpa9"]*gpa9) ~ pedu2, data = dtf, subset = rownames(dtf) %in% rownames(model.matrix(first)))
summary(direct)
##
## Call:
## lm(formula = I(acad - coef(first)["gpa9"] * gpa9) ~ pedu2, data = dtf,
## subset = rownames(dtf) %in% rownames(model.matrix(first)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.33276 -0.26824 0.04504 0.28365 1.22885
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.308390 0.003415 -90.30 <2e-16 ***
## pedu2 0.149850 0.004563 32.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3637 on 25774 degrees of freedom
## Multiple R-squared: 0.04016, Adjusted R-squared: 0.04012
## F-statistic: 1078 on 1 and 25774 DF, p-value: < 2.2e-16
direct.vcov <- seq.g.var(first, direct, "gpa9")
coeftest(direct, vcov = direct.vcov)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3083902 0.0076967 -40.068 < 2.2e-16 ***
## pedu2 0.1498497 0.0044350 33.788 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acad.se <- sqrt(diag(direct.vcov))
haus <- (coef(direct)["pedu2"] - coef(first)["pedu2"])/(acad.se["pedu2"]^2 - (summary(first)$coef["pedu2", 2])^2)
haus
## pedu2
## 6.923532e-10
pchisq(q = haus, df = 1, lower.tail = FALSE)
## pedu2
## 0.999979
rho <- seq(-0.9,0.9, by = 0.01)
acde.sens <- rep(NA, times = length(rho))
acde.sens.se <- rep(NA, times = length(rho))
res.y <- residuals(lm(acad ~ pedu2, data = dtf, subset = !is.na(gpa9)))
res.m <- residuals(lm(gpa9 ~ pedu2, data = dtf))
rho.tilde <- cor(res.y, res.m)
m.fixed <- coef(first)["gpa9"] - rho*sd(res.y)*sqrt((1-rho.tilde^2)/(1-rho^2))/sd(res.m)
n <- nrow(model.matrix(direct))
for(i in 1:length(rho)) {
sens.direct <- lm(I(acad - m.fixed[i]*gpa9) ~ pedu2, data = dtf, subset = rownames(dtf) %in% rownames(model.matrix(first)))
acde.sens[i] <- coef(sens.direct)["pedu2"]
sens.vcov <- seq.g.var(first, sens.direct, "gpa9")
acde.sens.se[i] <- sqrt(sens.vcov["pedu2", "pedu2"])
}
ci.hi <- acde.sens + 1.96 * acde.sens.se
ci.lo <- acde.sens - 1.96 * acde.sens.se
plot(rho, acde.sens, type = 'n', ylim = range(c(ci.hi,ci.lo)), xlab = bquote("Correlation between mediator and outcome errors" ~~ (rho)),
ylab = "Estimated ACDE", bty = "n", las = 1)
polygon(x = c(rho, rev(rho)), y = c(ci.lo, rev(ci.hi)), col = "grey70", border = NA)
lines(rho, acde.sens, lwd = 2)
abline(v=0, lty = 2)
abline(h=0, lty = 2)