options(repos = c(CRAN = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
install.packages("dplyr")
## 将程序包安装入'C:/Users/86183/AppData/Local/R/win-library/4.5'
## (因为'lib'没有被指定)
## 还安装依赖关系'lifecycle', 'rlang', 'vctrs'
## 程序包'lifecycle'打开成功,MD5和检查也通过
## 程序包'rlang'打开成功,MD5和检查也通过
## Warning: 无法删除软件包 'rlang' 的先前安装
## Warning in file.copy(savedcopy, lib, recursive = TRUE):
## 拷贝C:\Users\86183\AppData\Local\R\win-library\4.5\00LOCK\rlang\libs\x64\rlang.dll到C:\Users\86183\AppData\Local\R\win-library\4.5\rlang\libs\x64\rlang.dll时出了问题:Permission
## denied
## Warning: 回复了'rlang'
## 程序包'vctrs'打开成功,MD5和检查也通过
## Warning: 无法删除软件包 'vctrs' 的先前安装
## Warning in file.copy(savedcopy, lib, recursive = TRUE):
## 拷贝C:\Users\86183\AppData\Local\R\win-library\4.5\00LOCK\vctrs\libs\x64\vctrs.dll到C:\Users\86183\AppData\Local\R\win-library\4.5\vctrs\libs\x64\vctrs.dll时出了问题:Permission
## denied
## Warning: 回复了'vctrs'
## 程序包'dplyr'打开成功,MD5和检查也通过
## Warning: 无法删除软件包 'dplyr' 的先前安装
## Warning in file.copy(savedcopy, lib, recursive = TRUE):
## 拷贝C:\Users\86183\AppData\Local\R\win-library\4.5\00LOCK\dplyr\libs\x64\dplyr.dll到C:\Users\86183\AppData\Local\R\win-library\4.5\dplyr\libs\x64\dplyr.dll时出了问题:Permission
## denied
## Warning: 回复了'dplyr'
##
## 下载的二进制程序包在
## C:\Users\86183\AppData\Local\Temp\RtmpmU81ko\downloaded_packages里
数据加载与预处理
# 首先确保加载必要的包
library(tidyverse) # 包含read_csv和管道操作符%>%
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tableone)
library(kableExtra)
##
## 载入程序包:'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(survival)
library(survminer)
## 载入需要的程序包:ggpubr
##
## 载入程序包:'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
library(timeROC)
# 读取并转换数据
surv_data <- read_csv("第6章习题2(1).csv") %>%
mutate(
death_status = factor(death_status,
levels = c(0, 1),
labels = c("Censored", "Dead")),
gender = factor(gender,
levels = c(1, 2),
labels = c("Male", "Female")),
cancer_type = factor(cancer_type,
levels = 1:3,
labels = c("Type1", "Type2", "Type3"))
)
## Rows: 500 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): survival_time, death_status, age, gender, cancer_type, weight
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 查看数据结构
glimpse(surv_data)
## Rows: 500
## Columns: 6
## $ survival_time <dbl> 364, 524, 60, 185, 821, 11, 524, 173, 306, 61, 60, 180, …
## $ death_status <fct> Dead, Dead, Dead, Censored, Censored, Dead, Dead, Censor…
## $ age <dbl> 56, 54, 64, 69, 64, 81, 68, 59, 74, 56, 65, 56, 57, 68, …
## $ gender <fct> Male, Female, Male, Male, Female, Male, Male, Female, Ma…
## $ cancer_type <fct> Type2, Type2, Type2, Type2, Type1, Type1, Type3, Type2, …
## $ weight <dbl> 53.75, 47.11, 51.92, 43.27, 47.41, 42.98, 53.55, 46.10, …
描述性统计
# 创建TableOne表格
table_vars <- c("age", "gender", "cancer_type", "weight", "survival_time")
cat_vars <- c("gender", "cancer_type")
table1 <- CreateTableOne(
vars = table_vars,
strata = "death_status",
data = surv_data,
factorVars = cat_vars
)
# 打印美化后的表格
print(table1, showAllLevels = TRUE) %>%
kable("html") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
## Stratified by death_status
## level Censored Dead p test
## n 135 365
## age (mean (SD)) 60.38 (10.16) 62.49 (9.64) 0.033
## gender (%) Male 49 (36.3) 225 (61.6) <0.001
## Female 86 (63.7) 140 (38.4)
## cancer_type (%) Type1 60 (44.4) 87 (23.8) <0.001
## Type2 67 (49.6) 176 (48.2)
## Type3 8 ( 5.9) 102 (27.9)
## weight (mean (SD)) 49.11 (4.90) 49.95 (4.95) 0.093
## survival_time (mean (SD)) 358.76 (218.44) 288.69 (202.20) 0.001
|
|
level
|
Censored
|
Dead
|
p
|
test
|
|
n
|
|
135
|
365
|
|
|
|
age (mean (SD))
|
|
60.38 (10.16)
|
62.49 (9.64)
|
0.033
|
|
|
gender (%)
|
Male
|
49 (36.3)
|
225 (61.6)
|
<0.001
|
|
|
|
Female
|
86 (63.7)
|
140 (38.4)
|
|
|
|
cancer_type (%)
|
Type1
|
60 (44.4)
|
87 (23.8)
|
<0.001
|
|
|
|
Type2
|
67 (49.6)
|
176 (48.2)
|
|
|
|
|
Type3
|
8 ( 5.9)
|
102 (27.9)
|
|
|
|
weight (mean (SD))
|
|
49.11 (4.90)
|
49.95 (4.95)
|
0.093
|
|
|
survival_time (mean (SD))
|
|
358.76 (218.44)
|
288.69 (202.20)
|
0.001
|
|
生存分析
# 创建生存对象
surv_obj <- Surv(time = surv_data$survival_time,
event = surv_data$death_status == "Dead")
# 总体生存曲线
fit_overall <- survfit(surv_obj ~ 1)
ggsurvplot(
fit_overall,
data = surv_data,
risk.table = TRUE,
surv.median.line = "hv",
palette = "jco",
title = "Overall Survival Curve",
xlab = "Time (days)",
legend = "none"
)

# 按癌症类型分组的生存曲线
fit_type <- survfit(surv_obj ~ cancer_type, data = surv_data)
ggsurvplot(
fit_type,
data = surv_data,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
surv.median.line = "hv",
palette = "jco",
title = "Survival by Cancer Type",
xlab = "Time (days)"
)
## Warning in geom_segment(aes(x = 0, y = max(y2), xend = max(x1), yend = max(y2)), : All aesthetics have length 1, but the data has 3 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## All aesthetics have length 1, but the data has 3 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## All aesthetics have length 1, but the data has 3 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## All aesthetics have length 1, but the data has 3 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

Cox比例风险模型
单因素
# 单因素Cox回归
covariates <- c("age", "gender", "cancer_type", "weight")
univ_models <- lapply(covariates, function(x) {
coxph(as.formula(paste("surv_obj ~", x)), data = surv_data)
})
# 提取结果并美化
univ_results <- map_dfr(univ_models, ~{
res <- summary(.)
tibble(
variable = rownames(res$coefficients),
hr = res$coefficients[, 2],
ci_lower = res$conf.int[, 3],
ci_upper = res$conf.int[, 4],
p_value = res$coefficients[, 5]
)
})
# 显示结果表格
univ_results %>%
mutate(across(where(is.numeric), ~round(., 3))) %>%
kable("html") %>%
kable_styling()
|
variable
|
hr
|
ci_lower
|
ci_upper
|
p_value
|
|
age
|
1.015
|
1.003
|
1.026
|
0.011
|
|
genderFemale
|
0.641
|
0.519
|
0.792
|
0.000
|
|
cancer_typeType2
|
1.327
|
1.026
|
1.717
|
0.031
|
|
cancer_typeType3
|
2.239
|
1.678
|
2.988
|
0.000
|
|
weight
|
1.008
|
0.987
|
1.029
|
0.474
|
多因素分析
# 多因素Cox模型
full_model <- coxph(
surv_obj ~ age + gender + cancer_type + weight,
data = surv_data
)
# 模型摘要
summary(full_model)
## Call:
## coxph(formula = surv_obj ~ age + gender + cancer_type + weight,
## data = surv_data)
##
## n= 500, number of events= 365
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.005351 1.005365 0.005886 0.909 0.363290
## genderFemale -0.380251 0.683690 0.108893 -3.492 0.000479 ***
## cancer_typeType2 0.264264 1.302472 0.131851 2.004 0.045042 *
## cancer_typeType3 0.711271 2.036578 0.152028 4.679 2.89e-06 ***
## weight 0.008711 1.008749 0.010412 0.837 0.402825
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0054 0.9947 0.9938 1.0170
## genderFemale 0.6837 1.4627 0.5523 0.8463
## cancer_typeType2 1.3025 0.7678 1.0059 1.6866
## cancer_typeType3 2.0366 0.4910 1.5118 2.7435
## weight 1.0087 0.9913 0.9884 1.0295
##
## Concordance= 0.61 (se = 0.017 )
## Likelihood ratio test= 44.51 on 5 df, p=2e-08
## Wald test = 45.21 on 5 df, p=1e-08
## Score (logrank) test = 46.75 on 5 df, p=6e-09
# 模型假设检验
cox.zph(full_model)
## chisq df p
## age 2.205 1 0.138
## gender 3.135 1 0.077
## cancer_type 1.169 2 0.557
## weight 0.777 1 0.378
## GLOBAL 6.678 5 0.246
ggcoxzph(cox.zph(full_model))

时间依赖性ROC分析
# 1年、3年、5年生存预测ROC
if(!require(timeROC)) install.packages("timeROC")
library(timeROC)
# 预测时间点
times <- c(365, 365*3, 365*5)
# 计算ROC
roc <- timeROC(
T = surv_data$survival_time,
delta = as.numeric(surv_data$death_status == "Dead"),
marker = predict(full_model),
cause = 1,
times = times,
ROC = TRUE
)
# 绘制ROC曲线
plot(roc, time = 365, title = FALSE)
plot(roc, time = 365*3, add = TRUE, col = "blue")
plot(roc, time = 365*5, add = TRUE, col = "red")
legend("bottomright",
c(paste("1-year AUC =", round(roc$AUC[1], 2)),
paste("3-year AUC =", round(roc$AUC[2], 2)),
paste("5-year AUC =", round(roc$AUC[3], 2))),
col = c("black", "blue", "red"),
lty = 1)
