options(repos = c(CRAN = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
install.packages("dplyr")
## 将程序包安装入'C:/Users/86183/AppData/Local/R/win-library/4.5'
## (因为'lib'没有被指定)
## 还安装依赖关系'lifecycle', 'rlang', 'vctrs'
## 程序包'lifecycle'打开成功,MD5和检查也通过
## 程序包'rlang'打开成功,MD5和检查也通过
## Warning: 无法删除软件包 'rlang' 的先前安装
## Warning in file.copy(savedcopy, lib, recursive = TRUE):
## 拷贝C:\Users\86183\AppData\Local\R\win-library\4.5\00LOCK\rlang\libs\x64\rlang.dll到C:\Users\86183\AppData\Local\R\win-library\4.5\rlang\libs\x64\rlang.dll时出了问题:Permission
## denied
## Warning: 回复了'rlang'
## 程序包'vctrs'打开成功,MD5和检查也通过
## Warning: 无法删除软件包 'vctrs' 的先前安装
## Warning in file.copy(savedcopy, lib, recursive = TRUE):
## 拷贝C:\Users\86183\AppData\Local\R\win-library\4.5\00LOCK\vctrs\libs\x64\vctrs.dll到C:\Users\86183\AppData\Local\R\win-library\4.5\vctrs\libs\x64\vctrs.dll时出了问题:Permission
## denied
## Warning: 回复了'vctrs'
## 程序包'dplyr'打开成功,MD5和检查也通过
## Warning: 无法删除软件包 'dplyr' 的先前安装
## Warning in file.copy(savedcopy, lib, recursive = TRUE):
## 拷贝C:\Users\86183\AppData\Local\R\win-library\4.5\00LOCK\dplyr\libs\x64\dplyr.dll到C:\Users\86183\AppData\Local\R\win-library\4.5\dplyr\libs\x64\dplyr.dll时出了问题:Permission
## denied
## Warning: 回复了'dplyr'
## 
## 下载的二进制程序包在
##  C:\Users\86183\AppData\Local\Temp\RtmpmU81ko\downloaded_packages里

数据加载与预处理

# 首先确保加载必要的包
library(tidyverse) # 包含read_csv和管道操作符%>%
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tableone)
library(kableExtra)
## 
## 载入程序包:'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(survival)
library(survminer)
## 载入需要的程序包:ggpubr
## 
## 载入程序包:'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma
library(timeROC)
# 读取并转换数据
surv_data <- read_csv("第6章习题2(1).csv") %>% 
  mutate(
    death_status = factor(death_status, 
                         levels = c(0, 1), 
                         labels = c("Censored", "Dead")),
    gender = factor(gender, 
                   levels = c(1, 2), 
                   labels = c("Male", "Female")),
    cancer_type = factor(cancer_type,
                        levels = 1:3,
                        labels = c("Type1", "Type2", "Type3"))
  )
## Rows: 500 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (6): survival_time, death_status, age, gender, cancer_type, weight
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 查看数据结构
glimpse(surv_data)
## Rows: 500
## Columns: 6
## $ survival_time <dbl> 364, 524, 60, 185, 821, 11, 524, 173, 306, 61, 60, 180, …
## $ death_status  <fct> Dead, Dead, Dead, Censored, Censored, Dead, Dead, Censor…
## $ age           <dbl> 56, 54, 64, 69, 64, 81, 68, 59, 74, 56, 65, 56, 57, 68, …
## $ gender        <fct> Male, Female, Male, Male, Female, Male, Male, Female, Ma…
## $ cancer_type   <fct> Type2, Type2, Type2, Type2, Type1, Type1, Type3, Type2, …
## $ weight        <dbl> 53.75, 47.11, 51.92, 43.27, 47.41, 42.98, 53.55, 46.10, …

描述性统计

# 创建TableOne表格
table_vars <- c("age", "gender", "cancer_type", "weight", "survival_time")
cat_vars <- c("gender", "cancer_type")

table1 <- CreateTableOne(
  vars = table_vars,
  strata = "death_status",
  data = surv_data,
  factorVars = cat_vars
)

# 打印美化后的表格
print(table1, showAllLevels = TRUE) %>%
  kable("html") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
##                            Stratified by death_status
##                             level  Censored        Dead            p      test
##   n                                   135             365                     
##   age (mean (SD))                   60.38 (10.16)   62.49 (9.64)    0.033     
##   gender (%)                Male       49 (36.3)      225 (61.6)   <0.001     
##                             Female     86 (63.7)      140 (38.4)              
##   cancer_type (%)           Type1      60 (44.4)       87 (23.8)   <0.001     
##                             Type2      67 (49.6)      176 (48.2)              
##                             Type3       8 ( 5.9)      102 (27.9)              
##   weight (mean (SD))                49.11 (4.90)    49.95 (4.95)    0.093     
##   survival_time (mean (SD))        358.76 (218.44) 288.69 (202.20)  0.001
level Censored Dead p test
n 135 365
age (mean (SD)) 60.38 (10.16) 62.49 (9.64) 0.033
gender (%) Male 49 (36.3) 225 (61.6) <0.001
Female 86 (63.7) 140 (38.4)
cancer_type (%) Type1 60 (44.4) 87 (23.8) <0.001
Type2 67 (49.6) 176 (48.2)
Type3 8 ( 5.9) 102 (27.9)
weight (mean (SD)) 49.11 (4.90) 49.95 (4.95) 0.093
survival_time (mean (SD)) 358.76 (218.44) 288.69 (202.20) 0.001

生存分析

# 创建生存对象
surv_obj <- Surv(time = surv_data$survival_time, 
                event = surv_data$death_status == "Dead")

# 总体生存曲线
fit_overall <- survfit(surv_obj ~ 1)
ggsurvplot(
  fit_overall,
  data = surv_data,
  risk.table = TRUE,
  surv.median.line = "hv",
  palette = "jco",
  title = "Overall Survival Curve",
  xlab = "Time (days)",
  legend = "none"
)

# 按癌症类型分组的生存曲线
fit_type <- survfit(surv_obj ~ cancer_type, data = surv_data)
ggsurvplot(
  fit_type,
  data = surv_data,
  risk.table = TRUE,
  pval = TRUE,
  conf.int = TRUE,
  surv.median.line = "hv",
  palette = "jco",
  title = "Survival by Cancer Type",
  xlab = "Time (days)"
)
## Warning in geom_segment(aes(x = 0, y = max(y2), xend = max(x1), yend = max(y2)), : All aesthetics have length 1, but the data has 3 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## All aesthetics have length 1, but the data has 3 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## All aesthetics have length 1, but the data has 3 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## All aesthetics have length 1, but the data has 3 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

Cox比例风险模型

单因素

# 单因素Cox回归
covariates <- c("age", "gender", "cancer_type", "weight")
univ_models <- lapply(covariates, function(x) {
  coxph(as.formula(paste("surv_obj ~", x)), data = surv_data)
})

# 提取结果并美化
univ_results <- map_dfr(univ_models, ~{
  res <- summary(.)
  tibble(
    variable = rownames(res$coefficients),
    hr = res$coefficients[, 2],
    ci_lower = res$conf.int[, 3],
    ci_upper = res$conf.int[, 4],
    p_value = res$coefficients[, 5]
  )
})

# 显示结果表格
univ_results %>%
  mutate(across(where(is.numeric), ~round(., 3))) %>%
  kable("html") %>%
  kable_styling()
variable hr ci_lower ci_upper p_value
age 1.015 1.003 1.026 0.011
genderFemale 0.641 0.519 0.792 0.000
cancer_typeType2 1.327 1.026 1.717 0.031
cancer_typeType3 2.239 1.678 2.988 0.000
weight 1.008 0.987 1.029 0.474

多因素分析

# 多因素Cox模型
full_model <- coxph(
  surv_obj ~ age + gender + cancer_type + weight,
  data = surv_data
)

# 模型摘要
summary(full_model)
## Call:
## coxph(formula = surv_obj ~ age + gender + cancer_type + weight, 
##     data = surv_data)
## 
##   n= 500, number of events= 365 
## 
##                       coef exp(coef)  se(coef)      z Pr(>|z|)    
## age               0.005351  1.005365  0.005886  0.909 0.363290    
## genderFemale     -0.380251  0.683690  0.108893 -3.492 0.000479 ***
## cancer_typeType2  0.264264  1.302472  0.131851  2.004 0.045042 *  
## cancer_typeType3  0.711271  2.036578  0.152028  4.679 2.89e-06 ***
## weight            0.008711  1.008749  0.010412  0.837 0.402825    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## age                 1.0054     0.9947    0.9938    1.0170
## genderFemale        0.6837     1.4627    0.5523    0.8463
## cancer_typeType2    1.3025     0.7678    1.0059    1.6866
## cancer_typeType3    2.0366     0.4910    1.5118    2.7435
## weight              1.0087     0.9913    0.9884    1.0295
## 
## Concordance= 0.61  (se = 0.017 )
## Likelihood ratio test= 44.51  on 5 df,   p=2e-08
## Wald test            = 45.21  on 5 df,   p=1e-08
## Score (logrank) test = 46.75  on 5 df,   p=6e-09
# 模型假设检验
cox.zph(full_model)
##             chisq df     p
## age         2.205  1 0.138
## gender      3.135  1 0.077
## cancer_type 1.169  2 0.557
## weight      0.777  1 0.378
## GLOBAL      6.678  5 0.246
ggcoxzph(cox.zph(full_model))

时间依赖性ROC分析

# 1年、3年、5年生存预测ROC
if(!require(timeROC)) install.packages("timeROC")
library(timeROC)

# 预测时间点
times <- c(365, 365*3, 365*5)

# 计算ROC
roc <- timeROC(
  T = surv_data$survival_time,
  delta = as.numeric(surv_data$death_status == "Dead"),
  marker = predict(full_model),
  cause = 1,
  times = times,
  ROC = TRUE
)

# 绘制ROC曲线
plot(roc, time = 365, title = FALSE)
plot(roc, time = 365*3, add = TRUE, col = "blue")
plot(roc, time = 365*5, add = TRUE, col = "red")
legend("bottomright",
       c(paste("1-year AUC =", round(roc$AUC[1], 2)),
         paste("3-year AUC =", round(roc$AUC[2], 2)),
         paste("5-year AUC =", round(roc$AUC[3], 2))),
       col = c("black", "blue", "red"),
       lty = 1)