# 设置随机数种子以保证结果的可复现性
set.seed(123)
# 生成模拟数据
n <- 100
# 参与者总数
times <- c('baseline', 'month1', 'month3', 'month6')
groups <- c('control', 'treatment')
num_times <- length(times)
num_groups <- length(groups)
# 创建参与者ID、分组和时间的数据框
id <- rep(1:n, each = num_times)
group <- rep(rep(groups, each = n / num_groups), each = num_times)
time <- rep(times, times = n)
# 模拟血压测量值# 基线血压
bp_baseline <- rnorm(n, mean = 120, sd = 10)
# 血压随时间的变化(假设对所有人相同,仅为简化示例)
time_effect <- c(0, -1, -3, -5)
# 治疗效应(对于试验组和对照组不同)
group_effect <- ifelse(group == 'treatment', -2, 0)
# 模拟血压测量值
bp <- numeric(length(id))
for (i in 1:length(id)) {
time_idx <- match(time[i], times)
bp[i] <- bp_baseline[id[i]] + time_effect[time_idx] + group_effect[i] + rnorm(1, mean = 0, sd = 5)
}
# 创建数据框
bp_data <- data.frame(id, group, time, BP = bp)
# 添加性别列,随机分配"male"或"female"
bp_data$gender <- sample(c("male", "female"), size = n * num_times, replace = TRUE)
# 添加年龄列,假设平均年龄为40岁,标准差为10岁
bp_data$age <- rnorm(n * num_times, mean = 40, sd = 10)
# 检查是否有NA值
anyNA(bp_data)
## [1] FALSE
# 查看前几行数据
head(bp_data)
## id group time BP gender age
## 1 1 control baseline 110.8432 male 32.71781
## 2 1 control month1 114.6797 female 24.59558
## 3 1 control month3 110.1618 female 33.06905
## 4 1 control month6 107.6575 male 41.18849
## 5 2 control baseline 112.9401 female 26.35291
## 6 2 control month1 116.4731 male 45.89983
bp_data$time <- factor(bp_data$time)
箱线图展示趋势
#绘制箱状图
library(ggplot2)
fig1 <- ggplot(bp_data,aes(x=factor(time),y=BP,fill=group))+
geom_boxplot(position = position_dodge(width=0.8))+
labs(x="时间",y="血压",title="不同组的血压指标箱状图")+
theme_minimal()
fig1
展示baseline
library(gtsummary)
tb1 <- bp_data %>%
select(age,gender,group) %>%
tbl_summary(by= group,missing = "no") %>%
add_n() %>%
add_overall() %>%
add_p() %>%
modify_header(label = "**Variable**") %>%
modify_caption("Table 1. Baseline characteristics")%>%#添加表头
bold_labels() #标签加粗
tb1
| Variable | N | Overall N = 4001 |
control N = 2001 |
treatment N = 2001 |
p-value2 |
|---|---|---|---|---|---|
| age | 400 | 41 (34, 47) | 42 (34, 48) | 41 (34, 46) | 0.4 |
| gender | 400 | 0.5 | |||
| female | 192 (48%) | 99 (50%) | 93 (47%) | ||
| male | 208 (52%) | 101 (51%) | 107 (54%) | ||
| 1 Median (Q1, Q3); n (%) | |||||
| 2 Wilcoxon rank sum test; Pearson’s Chi-squared test | |||||
library(lme4)
#线性混合效应模型1
model <- lmer(BP ~ group * time +(1 |id),data = bp_data)#随机效应是每个对象的随机截距id。
#线性混合效应模型2
#model2 <- lmer(BP ~ group * time + (1 + time | id), data = bp_data)#(1 + time | id) 表示每个参与者(id)的随机截距和时间的随机斜率
anova(model)#计算效应
## Analysis of Variance Table
## npar Sum Sq Mean Sq F value
## group 1 4.18 4.18 0.1611
## time 3 1991.86 663.95 25.5716
## group:time 3 22.91 7.64 0.2941
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: BP ~ group * time + (1 | id)
## Data: bp_data
##
## REML criterion at convergence: 2672
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8802 -0.5326 -0.0345 0.5882 3.0865
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 78.08 8.836
## Residual 25.96 5.096
## Number of obs: 400, groups: id, 100
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 120.96637 1.44252 83.857
## grouptreatment -1.20277 2.04004 -0.590
## timemonth1 -1.96970 1.01911 -1.933
## timemonth3 -3.33077 1.01911 -3.268
## timemonth6 -6.05966 1.01911 -5.946
## grouptreatment:timemonth1 0.82904 1.44124 0.575
## grouptreatment:timemonth3 1.04509 1.44124 0.725
## grouptreatment:timemonth6 -0.01576 1.44124 -0.011
##
## Correlation of Fixed Effects:
## (Intr) grptrt tmmnt1 tmmnt3 tmmnt6 grpt:1 grpt:3
## grouptrtmnt -0.707
## timemonth1 -0.353 0.250
## timemonth3 -0.353 0.250 0.500
## timemonth6 -0.353 0.250 0.500 0.500
## grptrtmnt:1 0.250 -0.353 -0.707 -0.354 -0.354
## grptrtmnt:3 0.250 -0.353 -0.354 -0.707 -0.354 0.500
## grptrtmnt:6 0.250 -0.353 -0.354 -0.354 -0.707 0.500 0.500
library(emmeans)
# 计算边际均值
emm <- emmeans(model, ~ group * time)
emm_table <- summary(emm)
#emm_table现在包含每个组在每个时间点的估计均值、标准误差、置信区间等。
# 计算组间的对比
emm_pairs <- pairs(emm, comparisons = contrasts, adjust = "none")
emm_pairs_table <- summary(emm_pairs)
#emm_pairs_table应该包含组间差异的估计值、标准误差、置信区间和p值。
#从模型中提取每个治疗组在不同时间点的预测值和标准误差
bp_data2 <- bp_data
bp_data2$predicted_bp <- predict(model)
bp_data2$se<-sqrt(diag(vcov(model)))
#计算95%置信区间
bp_data2$ci_low <- bp_data2$predicted_bp -1.96 * bp_data2$se
bp_data2$ci_high <- bp_data2$predicted_bp + 1.96 * bp_data2$se
#计算每个时间点的平均预测值和95%置信区间
mean_predicted_bp <- aggregate(predicted_bp ~ group + time, data = bp_data2, mean)
mean_predicted_bp$ci_low <- aggregate(ci_low ~ group + time, data = bp_data2, mean)$ci_low
mean_predicted_bp$ci_high <- aggregate(ci_high ~ group + time, data = bp_data2, mean)$ci_high
#绘制折线图
fig3 <- ggplot(mean_predicted_bp, aes(x= time,y= predicted_bp, group= group, color = group))+
geom_line()+
geom_point()+
geom_errorbar(aes(ymin= ci_low, ymax=ci_high), width= 1)+
labs(x="时间",y="血压",title="血压随时间的变化")+
theme_minimal()
fig3
可视化emm_table结果
fig2 <- ggplot(emm_table, aes(x = time, y = emmean, group = group, color = group)) +
geom_line(position = position_dodge(0.25)) +
geom_point(aes(shape = group),position = position_dodge(0.25),size = 4) +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2,position = position_dodge(0.25)) + # 添加误差条(置信区间)
theme_minimal() +
labs(x = "Months", y = "BP", title = "BP over Time") +
theme(legend.position = "bottom")+
annotate("text", x = 0, y = 113, label = "p < 0.001", size = 5, hjust = 0, vjust = 1) # 在左下角添加 p 值
fig2
library(tidyverse)
#baseline_bp作为固定协变量
#准备数据
#创建一个新的数据框,包含 id、group、BP、baseline_BP
bp_data_long <- data.frame(
id = bp_data$id,
group = bp_data$group,
BP = bp_data$BP,
time = bp_data$time,
baseline_BP = ifelse(bp_data$time == "baseline", bp_data$BP, NA)
)
# 首先,我们按 id 和 group 分组,并找到每个组中第一个 baseline 时间点的 baseline_BP 值
first_baseline <- bp_data_long %>%
filter(time == "baseline") %>%
group_by(id, group) %>%
slice(1) %>%
select(id, group, baseline_BP)
# 然后,我们将这个值合并回原始数据框,并用它来填充缺失的 baseline_BP 值
bp_data_long <- bp_data_long %>%
left_join(first_baseline, by = c("id", "group")) %>%
select(-baseline_BP.x) %>% # 删除辅助列
rename(baseline_BP = baseline_BP.y) %>%
filter(time != "baseline")
#计算血压差值
bp_data_long$diff_bp <- bp_data_long$BP - bp_data_long$baseline_BP
将baseline_BP作为协变量拟合模型。模型公式将包括group、time及其交互项,以及baseline_BP作为协变量。
# 拟合包含协变量的混合线性模型
model_covariate <- lmer(BP ~ group * time + baseline_BP + (1 | id), data = bp_data_long)
# 拟合以差值为结局,并包含协变量的混合线性模型
model_covdiff <- lmer(diff_bp ~ group * time + baseline_BP + (1 | id), data = bp_data_long)