lmm

# 设置随机数种子以保证结果的可复现性
set.seed(123)
# 生成模拟数据
n <- 100 
# 参与者总数
times <- c('baseline', 'month1', 'month3', 'month6')
groups <- c('control', 'treatment')
num_times <- length(times)
num_groups <- length(groups)
# 创建参与者ID、分组和时间的数据框
id <- rep(1:n, each = num_times)
group <- rep(rep(groups, each = n / num_groups), each = num_times)
time <- rep(times, times = n)
# 模拟血压测量值# 基线血压
bp_baseline <- rnorm(n, mean = 120, sd = 10)
# 血压随时间的变化（假设对所有人相同，仅为简化示例）
time_effect <- c(0, -1, -3, -5)
# 治疗效应（对于试验组和对照组不同）
group_effect <- ifelse(group == 'treatment', -2, 0)
# 模拟血压测量值
bp <- numeric(length(id))
  for (i in 1:length(id)) { 
    time_idx <- match(time[i], times) 
    bp[i] <- bp_baseline[id[i]] + time_effect[time_idx] + group_effect[i] + rnorm(1, mean = 0, sd = 5)
    }
# 创建数据框
bp_data <- data.frame(id, group, time, BP = bp)

# 添加性别列，随机分配"male"或"female"
bp_data$gender <- sample(c("male", "female"), size = n * num_times, replace = TRUE)

# 添加年龄列，假设平均年龄为40岁，标准差为10岁
bp_data$age <- rnorm(n * num_times, mean = 40, sd = 10)

# 检查是否有NA值
anyNA(bp_data)

## [1] FALSE

# 查看前几行数据
head(bp_data)

##   id   group     time       BP gender      age
## 1  1 control baseline 110.8432   male 32.71781
## 2  1 control   month1 114.6797 female 24.59558
## 3  1 control   month3 110.1618 female 33.06905
## 4  1 control   month6 107.6575   male 41.18849
## 5  2 control baseline 112.9401 female 26.35291
## 6  2 control   month1 116.4731   male 45.89983

bp_data$time <- factor(bp_data$time)

箱线图展示趋势

#绘制箱状图
library(ggplot2)
fig1 <- ggplot(bp_data,aes(x=factor(time),y=BP,fill=group))+
  geom_boxplot(position = position_dodge(width=0.8))+
  labs(x="时间",y="血压",title="不同组的血压指标箱状图")+
  theme_minimal()

fig1

展示baseline

library(gtsummary)
tb1 <- bp_data %>% 
  select(age,gender,group) %>%
  tbl_summary(by= group,missing = "no") %>%
  add_n() %>%
  add_overall() %>% 
  add_p() %>%
  modify_header(label = "**Variable**") %>%
  modify_caption("Table 1. Baseline characteristics")%>%#添加表头
  bold_labels() #标签加粗

tb1

Table 1. Baseline characteristics
Variable	N	Overall N = 400¹	control N = 200¹	treatment N = 200¹	p-value²
age	400	41 (34, 47)	42 (34, 48)	41 (34, 46)	0.4
gender	400				0.5
female		192 (48%)	99 (50%)	93 (47%)
male		208 (52%)	101 (51%)	107 (54%)
¹ Median (Q1, Q3); n (%)
² Wilcoxon rank sum test; Pearson’s Chi-squared test

建模 研究的是血压（BP）如何随时间在不同组别中变化，同时考虑了个体间的随机效应

library(lme4)
#线性混合效应模型1
model <- lmer(BP ~ group * time +(1 |id),data = bp_data)#随机效应是每个对象的随机截距id。
#线性混合效应模型2
#model2 <- lmer(BP ~ group * time + (1 + time | id), data = bp_data)#(1 + time | id) 表示每个参与者（id）的随机截距和时间的随机斜率
anova(model)#计算效应

## Analysis of Variance Table
##            npar  Sum Sq Mean Sq F value
## group         1    4.18    4.18  0.1611
## time          3 1991.86  663.95 25.5716
## group:time    3   22.91    7.64  0.2941

summary(model)

## Linear mixed model fit by REML ['lmerMod']
## Formula: BP ~ group * time + (1 | id)
##    Data: bp_data
## 
## REML criterion at convergence: 2672
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8802 -0.5326 -0.0345  0.5882  3.0865 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 78.08    8.836   
##  Residual             25.96    5.096   
## Number of obs: 400, groups:  id, 100
## 
## Fixed effects:
##                            Estimate Std. Error t value
## (Intercept)               120.96637    1.44252  83.857
## grouptreatment             -1.20277    2.04004  -0.590
## timemonth1                 -1.96970    1.01911  -1.933
## timemonth3                 -3.33077    1.01911  -3.268
## timemonth6                 -6.05966    1.01911  -5.946
## grouptreatment:timemonth1   0.82904    1.44124   0.575
## grouptreatment:timemonth3   1.04509    1.44124   0.725
## grouptreatment:timemonth6  -0.01576    1.44124  -0.011
## 
## Correlation of Fixed Effects:
##             (Intr) grptrt tmmnt1 tmmnt3 tmmnt6 grpt:1 grpt:3
## grouptrtmnt -0.707                                          
## timemonth1  -0.353  0.250                                   
## timemonth3  -0.353  0.250  0.500                            
## timemonth6  -0.353  0.250  0.500  0.500                     
## grptrtmnt:1  0.250 -0.353 -0.707 -0.354 -0.354              
## grptrtmnt:3  0.250 -0.353 -0.354 -0.707 -0.354  0.500       
## grptrtmnt:6  0.250 -0.353 -0.354 -0.354 -0.707  0.500  0.500

组间的差异，需要计算这些边际均值的对比

使用emmeans包来计算每个组在每个时间点的边际均值及其置信区间

library(emmeans)
# 计算边际均值
emm <- emmeans(model, ~ group * time)
emm_table <- summary(emm)
#emm_table现在包含每个组在每个时间点的估计均值、标准误差、置信区间等。

# 计算组间的对比
emm_pairs <- pairs(emm, comparisons = contrasts, adjust = "none")
emm_pairs_table <- summary(emm_pairs)
#emm_pairs_table应该包含组间差异的估计值、标准误差、置信区间和p值。

直接利用模型预测

#从模型中提取每个治疗组在不同时间点的预测值和标准误差
bp_data2 <- bp_data
bp_data2$predicted_bp <- predict(model)
bp_data2$se<-sqrt(diag(vcov(model)))

#计算95%置信区间
bp_data2$ci_low <- bp_data2$predicted_bp -1.96 * bp_data2$se
bp_data2$ci_high <- bp_data2$predicted_bp + 1.96 * bp_data2$se

#计算每个时间点的平均预测值和95%置信区间
mean_predicted_bp <- aggregate(predicted_bp ~ group + time, data = bp_data2, mean)
mean_predicted_bp$ci_low <- aggregate(ci_low ~ group + time, data = bp_data2, mean)$ci_low
mean_predicted_bp$ci_high <- aggregate(ci_high ~ group + time, data = bp_data2, mean)$ci_high


#绘制折线图
fig3 <- ggplot(mean_predicted_bp, aes(x= time,y= predicted_bp, group= group, color = group))+
  geom_line()+
  geom_point()+
  geom_errorbar(aes(ymin= ci_low, ymax=ci_high), width= 1)+
  labs(x="时间",y="血压",title="血压随时间的变化")+
  theme_minimal()
fig3

可视化emm_table结果

fig2 <- ggplot(emm_table, aes(x = time, y = emmean, group = group, color = group)) +
    geom_line(position = position_dodge(0.25)) + 
    geom_point(aes(shape = group),position = position_dodge(0.25),size = 4) +
     geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2,position = position_dodge(0.25)) +  # 添加误差条（置信区间）
  theme_minimal() +
  labs(x = "Months", y = "BP", title = "BP over Time") +
  theme(legend.position = "bottom")+
  annotate("text", x = 0, y = 113, label = "p < 0.001", size = 5, hjust = 0, vjust = 1)  # 在左下角添加 p 值
fig2

library(tidyverse)
#baseline_bp作为固定协变量
#准备数据
#创建一个新的数据框，包含 id、group、BP、baseline_BP
bp_data_long <- data.frame(
  id = bp_data$id,
  group = bp_data$group,
  BP = bp_data$BP,
  time = bp_data$time,
  baseline_BP = ifelse(bp_data$time == "baseline", bp_data$BP, NA)
) 

# 首先，我们按 id 和 group 分组，并找到每个组中第一个 baseline 时间点的 baseline_BP 值
first_baseline <- bp_data_long %>%
  filter(time == "baseline") %>%
  group_by(id, group) %>%
  slice(1) %>%
  select(id, group, baseline_BP)

# 然后，我们将这个值合并回原始数据框，并用它来填充缺失的 baseline_BP 值
bp_data_long <- bp_data_long %>%
  left_join(first_baseline, by = c("id", "group")) %>%
  select(-baseline_BP.x) %>% # 删除辅助列
  rename(baseline_BP = baseline_BP.y) %>%
  filter(time != "baseline")
#计算血压差值  
bp_data_long$diff_bp <- bp_data_long$BP - bp_data_long$baseline_BP

将baseline_BP作为协变量拟合模型。模型公式将包括group、time及其交互项，以及baseline_BP作为协变量。

# 拟合包含协变量的混合线性模型
model_covariate <- lmer(BP ~ group * time + baseline_BP + (1 | id), data = bp_data_long)
# 拟合以差值为结局，并包含协变量的混合线性模型
model_covdiff <- lmer(diff_bp ~ group * time + baseline_BP + (1 | id), data = bp_data_long)

lmm

shengwei_lin

2025-01-08