1.中介变量模型 \(E[M|a,c]=\beta_o+\beta_1 a +\beta_{2}' c\)
2.结局变量模型 \(E[Y|a,m,c]=\theta_0+\theta_1 a+\theta_2 m +\theta_3 am + \theta_4' c\)
3.利用模型参数实现因果效应的估计
模型设定
中介模型: \[E[M|a, c] = \beta_0 + \beta_1 a + \beta_2' c \]
结局模型: \[E[Y|a, m, c] = \theta_0 + \theta_1 a + \theta_2 m + \theta_3 am + \theta_4' c \]
自然直接效应 (NDE) 的推导
定义:
\[NDE = E[Y(a, M(a^*)) | c] - E[Y(a^*, M(a^*)) | c]\]
推导: \[\begin{aligned} NDE &= \{ \theta_0 +\theta_1 a + (\theta_2 + \theta_3 a) E[M | a^*, c] \} - \{\theta_0 + \theta_1 a^* + (\theta_2 + \theta_3 a^*) E[M | a^*, c] \} \\ &= \theta_1 (a - a^*) + \theta_3 E[M | a^*, c] (a - a^*) \\ &= \{ \theta_1 + \theta_3 (\beta_0 + \beta_1 a^* + \beta_2' c) \} (a - a^*) \end{aligned}\]
自然间接效应 (NIE) 的推导
定义:
\[NIE = E[Y(a, M(a)) | c] - E[Y(a, M(a^*)) | c]\] 推导: \[\begin{aligned} NIE &= \{ \theta_0 + \theta_1 a + (\theta_2 + \theta_3 a) E[M | a, c] + \theta_4' c \} - \{ \theta_0 + \theta_1 a + (\theta_2 + \theta_3 a) E[M | a^*, c] + \theta_4' c \} \\ &= (\theta_2 + \theta_3 a) \{ E[M | a, c] - E[M | a^*, c] \} \\ &= (\theta_2 + \theta_3 a) \{ (\beta_0 + \beta_1 a + \beta_2' c) - (\beta_0 + \beta_1 a^* + \beta_2' c) \} \\ &= (\theta_2 + \theta_3 a) \beta_1 (a - a^*) \\ &= (\theta_2 \beta_1 + \theta_3 \beta_1 a)(a - a^*) \end{aligned}\]
代码实现
set.seed(2026)
# 1. 定义参数
n <- 10000
a <- 1; a_star <- 0 # 暴露水平对比
# 中介模型参数
beta_0 <- 0.5; beta_1 <- 0.8; beta_2 <- 0.3
# 结局模型参数
theta_0 <- 1.0; theta_1 <- 0.5; theta_2 <- 0.7; theta_3 <- 0.4; theta_4 <- -0.2
# 2. 生成模拟数据函数
generate_data <- function(n_obs) {
C <- rnorm(n_obs, 0, 1) # 混杂因素
A <- rbinom(n_obs, 1, 0.5) # 二分类暴露
M <- beta_0 + beta_1 * A + beta_2 * C + rnorm(n_obs, 0, 0.5) # 中介模型
Y <- theta_0 + theta_1 * A + theta_2 * M + theta_3 * A * M +
theta_4 * C + rnorm(n_obs, 0, 0.5) # 结局模型
return(data.frame(A, M, Y, C))
}
sim_data <- generate_data(n)
# 3. 蒙特卡洛模拟:获取“真实值”
# 思路:在已知真实生成机制下,模拟无限大群体中的反事实结局
get_true_values <- function(n_huge = 100000) {
C_huge <- rnorm(n_huge, 0, 1)
# 计算 M(a) 和 M(a*)
M_a <- beta_0 + beta_1 * a + beta_2 * C_huge + rnorm(n_huge, 0, 0.5)
M_a_star <- beta_0 + beta_1 * a_star + beta_2 * C_huge + rnorm(n_huge, 0, 0.5)
# 计算各种反事实结局 Y(a, M), Y(a*, M)
Y_a_Ma <- theta_0 + theta_1 * a + theta_2 * M_a + theta_3 * a * M_a + theta_4 * C_huge
Y_a_Ma_star <- theta_0 + theta_1 * a + theta_2 * M_a_star + theta_3 * a * M_a_star + theta_4 * C_huge
Y_a_star_Ma_star <- theta_0 + theta_1 * a_star + theta_2 * M_a_star + theta_3 * a_star * M_a_star + theta_4 * C_huge
true_nde <- mean(Y_a_Ma_star - Y_a_star_Ma_star)
true_nie <- mean(Y_a_Ma - Y_a_Ma_star)
return(c(True_NDE = true_nde, True_NIE = true_nie))
}
true_vals <- get_true_values()
# 4. 参数化回归估计函数
calc_mediation <- function(data, indices) {
d <- data[indices, ] # 供 Bootstrap 抽样使用
# 拟合模型
m_mod <- lm(M ~ A + C, data = d)
y_mod <- lm(Y ~ A * M + C, data = d) # A*M 包含了交互项
# 提取系数
b <- coef(m_mod)
t <- coef(y_mod)
c_mean <- mean(d$C) # 在协变量均值水平下估计
# 应用公式 (a - a_star)
diff_a <- a - a_star
est_nde <- (t["A"] + t["A:M"] * (b["(Intercept)"] + b["A"] * a_star + b["C"] * c_mean)) * diff_a
est_nie <- (t["M"] * b["A"] + t["A:M"] * b["A"] * a) * diff_a
return(c(NDE = unname(est_nde), NIE = unname(est_nie)))
}
# 5. Bootstrap 计算置信区间
library(boot)
boot_results <- boot(data = sim_data, statistic = calc_mediation, R = 50)
# 6. 结果汇总与对比
print(true_vals) # 真实值## True_NDE True_NIE
## 0.7004585 0.8803234
## NDE NIE
## 0.7146041 0.8600962
nde_ci <- boot.ci(boot_results, type = "perc", index = 1)
nie_ci <- boot.ci(boot_results, type = "perc", index = 2)
print(nde_ci) ## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 50 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_results, type = "perc", index = 1)
##
## Intervals :
## Level Percentile
## 95% ( 0.6838, 0.7424 )
## Calculations and Intervals on Original Scale
## Some percentile intervals may be unstable
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 50 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_results, type = "perc", index = 2)
##
## Intervals :
## Level Percentile
## 95% ( 0.8200, 0.8953 )
## Calculations and Intervals on Original Scale
## Some percentile intervals may be unstable
模型设定
中介模型: \[logit\{P(M=1 | a, c)\} = \beta_0 + \beta_1 a + \beta_2' c\] 由此可得,\(M=1\) 的条件概率为 \(Q(a, c) = \frac{\exp(\beta_0 + \beta_1 a + \beta_2' c)}{1 + \exp(\beta_0 + \beta_1 a + \beta_2' c)}\)。
结局模型(线性回归): \[E[Y | a, m, c] = \theta_0 + \theta_1 a + \theta_2 m + \theta_3 am + \theta_4' c\]
自然直接效应 (NDE) 的推导
在 \(M\) 是二分类变量时,嵌套反事实期望值 \(E[Y(a, M(a^*))|c]\) 需要对 \(M\) 的两种可能(0 和 1)进行加权求和: \[E[Y(a, M(a^*))|c] = E[Y|a, M=0, c] \cdot P(M=0|a^*, c) + E[Y|a, M=1, c] \cdot P(M=1|a^*, c)\]
代入模型公式: \[\begin{aligned} E[Y(a, M(a^*))|c] &= (\theta_0 + \theta_1 a + \theta_4' c) \cdot [1 - Q(a^*, c)] + (\theta_0 + \theta_1 a + \theta_2 + \theta_3 a + \theta_4' c) \cdot Q(a^*, c) \\ &= (\theta_0 + \theta_1 a + \theta_4' c) + (\theta_2 + \theta_3 a) Q(a^*, c) \end{aligned}\]
根据 \(NDE = E[Y(a, M(a^*))|c] - E[Y(a^*, M(a^*))|c]\): \[\begin{aligned} NDE &= [(\theta_0 + \theta_1 a + \theta_4' c) + (\theta_2 + \theta_3 a) Q(a^*, c)] - [(\theta_0 + \theta_1 a^* + \theta_4' c) + (\theta_2 + \theta_3 a^*) Q(a^*, c)] \\ &= \theta_1(a - a^*) + \theta_3 Q(a^*, c)(a - a^*) \\ &= \{ \theta_1 + \theta_3 \frac{\exp(\beta_0 + \beta_1 a^* + \beta_2' c)}{1 + \exp(\beta_0 + \beta_1 a^* + \beta_2' c)} \} (a - a^*) \end{aligned}\]
自然间接效应 (NIE) 的推导
根据 \(NIE = E[Y(a, M(a))|c] - E[Y(a, M(a^*))|c]\): \[\begin{aligned} NIE &= [(\theta_0 + \theta_1 a + \theta_4' c) + (\theta_2 + \theta_3 a) Q(a, c)] - [(\theta_0 + \theta_1 a + \theta_4' c) + (\theta_2 + \theta_3 a) Q(a^*, c)] \\ &= (\theta_2 + \theta_3 a) [Q(a, c) - Q(a^*, c)] \\ &= (\theta_2 + \theta_3 a) \{ \frac{\exp(\beta_0 + \beta_1 a + \beta_2' c)}{1 + \exp(\beta_0 + \beta_1 a + \beta_2' c)} - \frac{\exp(\beta_0 + \beta_1 a^* + \beta_2' c)}{1 + \exp(\beta_0 + \beta_1 a^* + \beta_2' c)} \} \end{aligned}\]
代码实现
set.seed(2026)
library(boot)
# 1. 定义参数
n <- 10000
a <- 1; a_star <- 0
# 中介模型参数 (Logistic 系数)
beta_0 <- -1.5; beta_1 <- 1.2; beta_2 <- 0.5
# 结局模型参数 (线性系数)
theta_0 <- 1.0; theta_1 <- 0.5; theta_2 <- 0.8; theta_3 <- 0.4; theta_4 <- -0.2
# 2. 生成模拟数据函数 (M 为二分类)
generate_data <- function(n_obs) {
C <- rnorm(n_obs, 0, 1)
A <- rbinom(n_obs, 1, 0.5)
# Logistic 概率转换
prob_m <- plogis(beta_0 + beta_1 * A + beta_2 * C)
M <- rbinom(n_obs, 1, prob_m)
Y <- theta_0 + theta_1 * A + theta_2 * M + theta_3 * A * M +
theta_4 * C + rnorm(n_obs, 0, 0.5)
return(data.frame(A, M, Y, C))
}
sim_data <- generate_data(n)
# 3. 蒙特卡洛模拟获取真实值
get_true_values <- function(n_huge = 100000) {
C_h <- rnorm(n_huge, 0, 1)
# M 在 a 和 a* 下的概率
Q_a <- plogis(beta_0 + beta_1 * a + beta_2 * C_h)
Q_a_star <- plogis(beta_0 + beta_1 * a_star + beta_2 * C_h)
# Y(a, M(a*)) 的期望: 分别考虑 M=0 和 M=1 的加权
Y_a_Mastar <- (theta_0 + theta_1 * a + theta_4 * C_h) * (1 - Q_a_star) +
(theta_0 + theta_1 * a + theta_2 + theta_3 * a + theta_4 * C_h) * Q_a_star
Y_astar_Mastar <- (theta_0 + theta_1 * a_star + theta_4 * C_h) * (1 - Q_a_star) +
(theta_0 + theta_1 * a_star + theta_2 + theta_3 * a_star + theta_4 * C_h) * Q_a_star
Y_a_Ma <- (theta_0 + theta_1 * a + theta_4 * C_h) * (1 - Q_a) +
(theta_0 + theta_1 * a + theta_2 + theta_3 * a + theta_4 * C_h) * Q_a
return(c(True_NDE = mean(Y_a_Mastar - Y_astar_Mastar),
True_NIE = mean(Y_a_Ma - Y_a_Mastar)))
}
true_vals <- get_true_values()
# 4. 估计函数:
calc_mediation_formula <- function(data, indices) {
d <- data[indices, ]
# 1. 拟合模型
m_mod <- glm(M ~ A + C, family = binomial, data = d)
y_mod <- lm(Y ~ A * M + C, data = d)
# 2. 提取系数 (注意系数对应的名称)
b <- coef(m_mod) # b[1]:intercept, b[2]:A, b[3]:C
t <- coef(y_mod) # t[1]:intercept, t[2]:A, t[3]:M, t[4]:C, t[5]:A:M
# 3. 确定协变量水平 (通常取均值)
c_val <- mean(d$C)
# 4. 定义辅助函数 Q(a, c) —— 即文献中的 exp(...)/(1+exp(...))
# 对应公式里的概率项
get_Q <- function(a_val, c_val) {
linear_pred <- b["(Intercept)"] + b["A"] * a_val + b["C"] * c_val
return(exp(linear_pred) / (1 + exp(linear_pred)))
}
# 5. 直接代入图片中的公式
diff_a <- a - a_star # (a - a*)
# NDE 公式: theta_1*(a-a*) + theta_3*(a-a*)*Q(a_star, c)
# 注意:y_mod 中 A:M 的系数是 theta_3
est_nde <- (t["A"] * diff_a) + (t["A:M"] * diff_a * get_Q(a_star, c_val))
# NIE 公式: (theta_2 + theta_3*a) * [Q(a, c) - Q(a_star, c)]
est_nie <- (t["M"] + t["A:M"] * a) * (get_Q(a, c_val) - get_Q(a_star, c_val))
return(c(NDE = unname(est_nde), NIE = unname(est_nie)))
}
# 5. Bootstrap 计算
boot_results <- boot(data = sim_data, statistic = calc_mediation_formula , R = 50)
# 6. 结果汇总
print(true_vals)## True_NDE True_NIE
## 0.5774930 0.2830858
## NDE NIE
## 0.5672269 0.3057082
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 50 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_results, type = "perc", index = 1)
##
## Intervals :
## Level Percentile
## 95% ( 0.5429, 0.5955 )
## Calculations and Intervals on Original Scale
## Some percentile intervals may be unstable
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 50 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_results, type = "perc", index = 2)
##
## Intervals :
## Level Percentile
## 95% ( 0.2753, 0.3268 )
## Calculations and Intervals on Original Scale
## Some percentile intervals may be unstable
模型设定
中介模型(线性回归): \[E[M | a, c] = \beta_0 + \beta_1 a + \beta_2' c\] 假设残差服从正态分布,方差为 \(\sigma^2\)。
结局模型(Logistic 回归): \[logit\{P(Y=1 | a, m, c)\} = \theta_0 + \theta_1 a + \theta_2 m + \theta_3 am + \theta_4' c\]
因果效应估计 (基于罕见病假设下的 OR 尺度)
当结局发生率较低时,可以直接利用模型参数通过以下闭式公式估计 \(\log(OR)\):
自然直接效应 (NDE): \[\log(OR^{NDE}) \cong \{ \theta_1 + \theta_3(\beta_0 + \beta_1 a^* + \beta_2' c + \theta_2 \sigma^2) \}(a - a^*) + 0.5 \theta_3^2 \sigma^2 (a^2 - a^{*2})\]
自然间接效应 (NIE): \[\log(OR^{NIE}) \cong (\theta_2 \beta_1 + \theta_3 \beta_1 a)(a - a^*)\]
set.seed(2026)
library(boot)
# 1. 定义参数
n <- 20000 # 增加样本量以应对二分类结局的稀疏性
a <- 1; a_star <- 0
# 中介模型参数
beta_0 <- 0.5; beta_1 <- 0.6; beta_2 <- 0.3; sigma_m <- 0.5
# 结局模型参数 (theta_0 设为较小负数以满足罕见病假设)
theta_0 <- -4.0; theta_1 <- 0.4; theta_2 <- 0.6; theta_3 <- 0.3; theta_4 <- -0.2
# 2. 生成模拟数据函数
generate_data <- function(n_obs) {
C <- rnorm(n_obs, 0, 1)
A <- rbinom(n_obs, 1, 0.5)
M <- beta_0 + beta_1 * A + beta_2 * C + rnorm(n_obs, 0, sigma_m)
# Logistic 链接
prob_y <- plogis(theta_0 + theta_1 * A + theta_2 * M + theta_3 * A * M + theta_4 * C)
Y <- rbinom(n_obs, 1, prob_y)
return(data.frame(A, M, Y, C))
}
sim_data <- generate_data(n)
cat("结局发生率:", mean(sim_data$Y), "\n") # 验证是否为罕见结局## 结局发生率: 0.04695
# 3. 蒙特卡洛模拟获取真实值 (OR 尺度取 Log)
get_true_values_or <- function(n_huge = 100000) {
C_h <- rnorm(n_huge, 0, 1)
M_a <- beta_0 + beta_1 * a + beta_2 * C_h + rnorm(n_huge, 0, sigma_m)
M_a_star <- beta_0 + beta_1 * a_star + beta_2 * C_h + rnorm(n_huge, 0, sigma_m)
# 计算反事实概率 P(Y(a, M(a*))=1)
p_a_Mastar <- mean(plogis(theta_0 + theta_1 * a + theta_2 * M_a_star + theta_3 * a * M_a_star + theta_4 * C_h))
p_astar_Mastar <- mean(plogis(theta_0 + theta_1 * a_star + theta_2 * M_a_star + theta_3 * a_star * M_a_star + theta_4 * C_h))
p_a_Ma <- mean(plogis(theta_0 + theta_1 * a + theta_2 * M_a + theta_3 * a * M_a + theta_4 * C_h))
# 转换为 Log Odds Ratio
true_log_nde <- log((p_a_Mastar/(1-p_a_Mastar)) / (p_astar_Mastar/(1-p_astar_Mastar)))
true_log_nie <- log((p_a_Ma/(1-p_a_Ma)) / (p_a_Mastar/(1-p_a_Mastar)))
return(c(True_Log_NDE = true_log_nde, True_Log_NIE = true_log_nie))
}
true_vals <- get_true_values_or()
# 4. 估计函数:基于图片中的闭式公式
calc_mediation_binary_y <- function(data, indices) {
d <- data[indices, ]
m_mod <- lm(M ~ A + C, data = d)
y_mod <- glm(Y ~ A * M + C, family = binomial, data = d)
b <- coef(m_mod)
t <- coef(y_mod)
sig2 <- summary(m_mod)$sigma^2 # 提取中介模型剩余方差 sigma^2
c_val <- mean(d$C)
# 代入公式计算 log(OR)
# NDE 公式较长,包含 0.5 * theta3^2 * sigma^2 项
log_nde <- (t["A"] + t["A:M"] * (b["(Intercept)"] + b["A"] * a_star + b["C"] * c_val + t["M"] * sig2)) * (a - a_star) +
0.5 * (t["A:M"]^2) * sig2 * (a^2 - a_star^2)
# NIE 公式
log_nie <- (t["M"] * b["A"] + t["A:M"] * b["A"] * a) * (a - a_star)
return(c(log_NDE = unname(log_nde), log_NIE = unname(log_nie)))
}
# 5. Bootstrap
boot_results <- boot(data = sim_data, statistic = calc_mediation_binary_y, R = 100)
# 6. 汇总 (统一转换到 OR 尺度进行对比)
cat("\n--- 真实值 vs 估计值 (OR Scale) ---\n")##
## --- 真实值 vs 估计值 (OR Scale) ---
## True_NDE_OR True_NIE_OR
## 1.823798 1.707206
# 转换估计值 (t0)
est_or <- exp(boot_results$t0)
names(est_or) <- c("Est_NDE_OR", "Est_NIE_OR")
print(est_or)## Est_NDE_OR Est_NIE_OR
## 1.83625 1.77085
# 7. 置信区间 (同样转换回 OR)
# NDE
nde_ci_log <- boot.ci(boot_results, type = "perc", index = 1)$percent[4:5]
cat("\nNDE OR 95% CI:", exp(nde_ci_log[1]), "-", exp(nde_ci_log[2]))##
## NDE OR 95% CI: 1.543513 - 2.251946
# NIE
nie_ci_log <- boot.ci(boot_results, type = "perc", index = 2)$percent[4:5]
cat("\nNIE OR 95% CI:", exp(nie_ci_log[1]), "-", exp(nie_ci_log[2]), "\n")##
## NIE OR 95% CI: 1.596881 - 1.953045
模型设定
在这种场景下,我们需要拟合两个 Logistic 回归模型:
中介模型(Logistic 回归): \[logit\{P(M=1 | a, c)\} = \beta_0 + \beta_1 a + \beta_2' c\]
结局模型(Logistic 回归): \[logit\{P(Y=1 | a, m, c)\} = \theta_0 + \theta_1 a + \theta_2 m + \theta_3 am + \theta_4' c\]
因果效应估计 (基于罕见病假设下的 OR 尺度)
在结局发生率较低时,自然效应的几率比(OR)计算如下:
set.seed(2026)
library(boot)
# 1. 定义参数
n <- 50000 # 二分类/二分类场景需要极大的样本量来保证数值稳定
a <- 1; a_star <- 0
# 中介模型参数 (Logistic)
beta_0 <- -1.5; beta_1 <- 0.8; beta_2 <- 0.3
# 结局模型参数 (theta_0 设极小以满足罕见病假设)
theta_0 <- -5.0; theta_1 <- 0.5; theta_2 <- 0.7; theta_3 <- 0.4; theta_4 <- -0.2
# 2. 生成模拟数据函数 (M, Y 均为二分类)
generate_data <- function(n_obs) {
C <- rnorm(n_obs, 0, 1)
A <- rbinom(n_obs, 1, 0.5)
M <- rbinom(n_obs, 1, plogis(beta_0 + beta_1 * A + beta_2 * C))
Y <- rbinom(n_obs, 1, plogis(theta_0 + theta_1 * A + theta_2 * M + theta_3 * A * M + theta_4 * C))
return(data.frame(A, M, Y, C))
}
sim_data <- generate_data(n)
cat("结局发生率:", mean(sim_data$Y), " | 中介变量发生率:", mean(sim_data$M), "\n")## 结局发生率: 0.01336 | 中介变量发生率: 0.26376
# 3. 蒙特卡洛模拟获取真实值 (OR 尺度)
get_true_values_or <- function(n_huge = 200000) {
C_h <- rnorm(n_huge, 0, 1)
# 计算 M 在 a 和 a* 下的概率
Q_a <- plogis(beta_0 + beta_1 * a + beta_2 * C_h)
Q_a_star <- plogis(beta_0 + beta_1 * a_star + beta_2 * C_h)
# 计算各种嵌套反事实概率 P(Y(a, M(a*))=1)
# 注意:这里需要对 M 的两种状态 (0, 1) 进行加权
calc_p <- function(a_val, q_val) {
p_m0 <- plogis(theta_0 + theta_1 * a_val + theta_2 * 0 + theta_3 * a_val * 0 + theta_4 * C_h)
p_m1 <- plogis(theta_0 + theta_1 * a_val + theta_2 * 1 + theta_3 * a_val * 1 + theta_4 * C_h)
return(mean(p_m0 * (1 - q_val) + p_m1 * q_val))
}
p_a_Ma <- calc_p(a, Q_a)
p_a_Mastar <- calc_p(a, Q_a_star)
p_astar_Mastar <- calc_p(a_star, Q_a_star)
# 转换为 Odds Ratio (OR)
true_nde_or <- (p_a_Mastar/(1-p_a_Mastar)) / (p_astar_Mastar/(1-p_astar_Mastar))
true_nie_or <- (p_a_Ma/(1-p_a_Ma)) / (p_a_Mastar/(1-p_a_Mastar))
return(c(True_NDE_OR = true_nde_or, True_NIE_OR = true_nie_or))
}
true_vals_or <- get_true_values_or()
# 4. 估计函数:
calc_mediation_all_binary <- function(data, indices) {
d <- data[indices, ]
m_mod <- glm(M ~ A + C, family = binomial, data = d)
y_mod <- glm(Y ~ A * M + C, family = binomial, data = d)
b <- coef(m_mod)
t <- coef(y_mod)
c_val <- mean(d$C)
# 为了公式整洁,先定义一些中间项
# beta_0 + beta_1*a* + beta_2*c
lin_m_astar <- b[1] + b[2]*a_star + b[3]*c_val
# beta_0 + beta_1*a + beta_2*c
lin_m_a <- b[1] + b[2]*a + b[3]*c_val
# NDE 公式 (直接计算 OR)
num_nde <- exp(t[2]*a) * (1 + exp(t[3] + t[5]*a + lin_m_astar))
den_nde <- exp(t[2]*a_star) * (1 + exp(t[3] + t[5]*a_star + lin_m_astar))
est_nde_or <- num_nde / den_nde
# NIE 公式 (直接计算 OR)
num_nie <- (1 + exp(lin_m_astar)) * (1 + exp(t[3] + t[5]*a + lin_m_a))
den_nie <- (1 + exp(lin_m_a)) * (1 + exp(t[3] + t[5]*a + lin_m_astar))
est_nie_or <- num_nie / den_nie
# 由于 boot 最好在正态尺度下工作,我们返回 log(OR)
return(c(log(est_nde_or), log(est_nie_or)))
}
# 5. Bootstrap
boot_results <- boot(data = sim_data, statistic = calc_mediation_all_binary, R = 100)
# 6. 结果汇总 (汇报 OR 尺度)
cat("\n--- 二分类结局 & 二分类中介分析结果 (OR 尺度) ---\n")##
## --- 二分类结局 & 二分类中介分析结果 (OR 尺度) ---
## True_NDE_OR True_NIE_OR
## 1.887441 1.211849
## Est_NDE_OR Est_NIE_OR
## 1.921301 1.230226
# 7. 置信区间 (转换为 OR)
nde_ci_or <- exp(boot.ci(boot_results, type = "perc", index = 1)$percent[4:5])
nie_ci_or <- exp(boot.ci(boot_results, type = "perc", index = 2)$percent[4:5])
cat(sprintf("\nNDE OR: %.3f (95%% CI: %.3f - %.3f)", est_or[1], nde_ci_or[1], nde_ci_or[2]))##
## NDE OR: 1.921 (95% CI: 1.582 - 2.231)
##
## NIE OR: 1.230 (95% CI: 1.184 - 1.299)
Mediation analysis allowing for exposure-mediator interactions and causal interpretation: theoretical assumptions and implementation with SAS and SPSS macros