Simulating Data
set.seed(123)
n <- 500
data <- data.frame(
time = rexp(n, rate = 0.1),
status = sample(0:1, n, replace = TRUE),
arm = sample(c("A", "B"), n, replace = TRUE),
ctDNA_9p24.1 = sample(c(0, 1), n, replace = TRUE),
MTB_baseline = runif(n, 0, 100),
MTB_C3D1 = rnorm(n, mean = 0, sd = 1),
MTB_EOT = sample(c(0, 1), n, replace = TRUE),
EBV_ctDNA = sample(c(0, 1), n, replace = TRUE),
EBER_ISH = sample(c(0, 1), n, replace = TRUE)
)
Survival Analysis Function
do_survival_analysis <- function(data, variable, threshold, cohort_label) {
data <- data %>%
mutate(Group = factor(ifelse(get(variable) > threshold, "High", "Low")))
fit <- survfit(Surv(time, status) ~ Group, data = data)
ggsurvplot(fit, data = data, pval = TRUE)
cox_fit <- tryCatch({
coxph(Surv(time, status) ~ Group, data = data)
}, error = function(e) return(NULL))
if (!is.null(cox_fit)) {
cox_results <- summary(cox_fit)$coefficients %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "Variable") %>%
mutate(across(where(is.numeric), ~ signif(.x, 3))) # Round numbers to 3 significant figures
print(knitr::kable(cox_results))
} else {
message("Survival analysis for ", variable, " in ", cohort_label, " skipped due to insufficient data.")
}
}
Perform Survival Analyses
do_survival_analysis(data, "ctDNA_9p24.1", 0, "Entire Cohort")
##
##
## |Variable | coef| exp(coef)| se(coef)| z| Pr(>|z|)|
## |:--------|------:|---------:|--------:|------:|------------------:|
## |GroupLow | 0.0103| 1.01| 0.128| 0.0806| 0.936|
for (arm in unique(data$arm)) {
do_survival_analysis(filter(data, arm == arm), "ctDNA_9p24.1", 0, paste("Arm", arm))
}
##
##
## |Variable | coef| exp(coef)| se(coef)| z| Pr(>|z|)|
## |:--------|------:|---------:|--------:|------:|------------------:|
## |GroupLow | 0.0103| 1.01| 0.128| 0.0806| 0.936|
##
##
## |Variable | coef| exp(coef)| se(coef)| z| Pr(>|z|)|
## |:--------|------:|---------:|--------:|------:|------------------:|
## |GroupLow | 0.0103| 1.01| 0.128| 0.0806| 0.936|
do_survival_analysis(data, "MTB_baseline", median(data$MTB_baseline, na.rm = TRUE), "Entire Cohort")
##
##
## |Variable | coef| exp(coef)| se(coef)| z| Pr(>|z|)|
## |:--------|------:|---------:|--------:|------:|------------------:|
## |GroupLow | -0.031| 0.969| 0.127| -0.245| 0.807|
for (arm in unique(data$arm)) {
do_survival_analysis(filter(data, arm == arm), "MTB_baseline", median(data$MTB_baseline, na.rm = TRUE), paste("Arm", arm))
}
##
##
## |Variable | coef| exp(coef)| se(coef)| z| Pr(>|z|)|
## |:--------|------:|---------:|--------:|------:|------------------:|
## |GroupLow | -0.031| 0.969| 0.127| -0.245| 0.807|
##
##
## |Variable | coef| exp(coef)| se(coef)| z| Pr(>|z|)|
## |:--------|------:|---------:|--------:|------:|------------------:|
## |GroupLow | -0.031| 0.969| 0.127| -0.245| 0.807|
do_survival_analysis(data, "EBV_ctDNA", 0, "Entire Cohort")
##
##
## |Variable | coef| exp(coef)| se(coef)| z| Pr(>|z|)|
## |:--------|-------:|---------:|--------:|------:|------------------:|
## |GroupLow | -0.0716| 0.931| 0.127| -0.564| 0.573|
for (arm in unique(data$arm)) {
do_survival_analysis(filter(data, arm == arm), "EBV_ctDNA", 0, paste("Arm", arm))
}
##
##
## |Variable | coef| exp(coef)| se(coef)| z| Pr(>|z|)|
## |:--------|-------:|---------:|--------:|------:|------------------:|
## |GroupLow | -0.0716| 0.931| 0.127| -0.564| 0.573|
##
##
## |Variable | coef| exp(coef)| se(coef)| z| Pr(>|z|)|
## |:--------|-------:|---------:|--------:|------:|------------------:|
## |GroupLow | -0.0716| 0.931| 0.127| -0.564| 0.573|
do_survival_analysis(data, "EBER_ISH", 0, "Validation Cohort")
##
##
## |Variable | coef| exp(coef)| se(coef)| z| Pr(>|z|)|
## |:--------|-------:|---------:|--------:|------:|------------------:|
## |GroupLow | -0.0645| 0.938| 0.128| -0.502| 0.615|