#load library
library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(summarytools)
##
## Attaching package: 'summarytools'
##
## The following object is masked from 'package:tibble':
##
## view
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(patchwork)
library(gtsummary)
library(broom)
#load data
data(veteran)
## Warning in data(veteran): data set 'veteran' not found
df <- veteran %>%
select(time, status, trt, age, celltype, karno, diagtime, prior) %>%
as.data.frame()
head(df)
#data description
str(veteran)
## 'data.frame': 137 obs. of 8 variables:
## $ trt : num 1 1 1 1 1 1 1 1 1 1 ...
## $ celltype: Factor w/ 4 levels "squamous","smallcell",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ time : num 72 411 228 126 118 10 82 110 314 100 ...
## $ status : num 1 1 1 1 1 1 1 1 1 0 ...
## $ karno : num 60 70 60 60 70 20 40 80 50 70 ...
## $ diagtime: num 7 5 3 9 11 5 10 29 18 6 ...
## $ age : num 69 64 38 63 65 49 69 68 43 70 ...
## $ prior : num 0 10 0 10 10 0 10 0 0 0 ...
summary(veteran)
## trt celltype time status
## Min. :1.000 squamous :35 Min. : 1.0 Min. :0.0000
## 1st Qu.:1.000 smallcell:48 1st Qu.: 25.0 1st Qu.:1.0000
## Median :1.000 adeno :27 Median : 80.0 Median :1.0000
## Mean :1.496 large :27 Mean :121.6 Mean :0.9343
## 3rd Qu.:2.000 3rd Qu.:144.0 3rd Qu.:1.0000
## Max. :2.000 Max. :999.0 Max. :1.0000
## karno diagtime age prior
## Min. :10.00 Min. : 1.000 Min. :34.00 Min. : 0.00
## 1st Qu.:40.00 1st Qu.: 3.000 1st Qu.:51.00 1st Qu.: 0.00
## Median :60.00 Median : 5.000 Median :62.00 Median : 0.00
## Mean :58.57 Mean : 8.774 Mean :58.31 Mean : 2.92
## 3rd Qu.:75.00 3rd Qu.:11.000 3rd Qu.:66.00 3rd Qu.:10.00
## Max. :99.00 Max. :87.000 Max. :81.00 Max. :10.00
#Kaplan by treatment group
km_trt <- survfit(Surv(time, status) ~ trt, data = df)
ggsurvplot(km_trt, data = df, pval = TRUE, conf.int = TRUE, risk.table = TRUE,
ggtheme = theme_minimal(), legend.labs = c("Standard", "Test"),
title = "Kaplan-Meier Survival Curves by Treatment Group",
xlab = "Time (days)", ylab = "Survival Probability")
#Kaplan by cell type
km_cell <- survfit(Surv(time, status) ~ celltype, data = df)
ggsurvplot(km_cell, data = df, pval = TRUE, conf.int = TRUE, risk.table = TRUE,
ggtheme = theme_minimal(), legend.title = "Cancer Type",
title = "Kaplan-Meier Survival Curves by Cancer Cell Type",
xlab = "Time (days)", ylab = "Survival Probability")
#Nelson Aalen by treatment group
na_trt <- survfit(Surv(time, status) ~ trt, data = df, type = "fleming-harrington")
ggsurvplot(na_trt, data = df, fun = "cumhaz", conf.int = TRUE, risk.table = TRUE,
ggtheme = theme_minimal(), legend.labs = c("Standard", "Test"),
title = "Nelson-Aalen Cumulative Hazard by Treatment Group",
xlab = "Time (days)", ylab = "Cumulative Hazard")
#Nelson Aalen by celltype
na_cell <- survfit(Surv(time, status) ~ celltype, data = df, type = "fleming-harrington")
ggsurvplot(na_cell, data = df, fun = "cumhaz", conf.int = TRUE, risk.table = TRUE,
ggtheme = theme_minimal(), legend.title = "Cancer Type",
title = "Nelson-Aalen Cumulative Hazard by Cancer Cell Type",
xlab = "Time (days)", ylab = "Cumulative Hazard")
#Log rank test
logrank_test <- survdiff(Surv(time, status) ~ trt, data = df)
print(logrank_test)
## Call:
## survdiff(formula = Surv(time, status) ~ trt, data = df)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=1 69 64 64.5 0.00388 0.00823
## trt=2 68 64 63.5 0.00394 0.00823
##
## Chisq= 0 on 1 degrees of freedom, p= 0.9
A p value of 0.9 shows no statistically significant difference in survival between the groups. #Cox proportional hazard model
cox_model <- coxph(Surv(time, status) ~ age + trt + celltype + karno, data = df)
summary(cox_model)
## Call:
## coxph(formula = Surv(time, status) ~ age + trt + celltype + karno,
## data = df)
##
## n= 137, number of events= 128
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age -0.008903 0.991136 0.009224 -0.965 0.3345
## trt 0.303048 1.353980 0.205656 1.474 0.1406
## celltypesmallcell 0.856340 2.354528 0.271322 3.156 0.0016 **
## celltypeadeno 1.178807 3.250494 0.296440 3.977 6.99e-05 ***
## celltypelarge 0.402332 1.495308 0.282544 1.424 0.1545
## karno -0.032685 0.967843 0.005409 -6.043 1.51e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 0.9911 1.0089 0.9734 1.0092
## trt 1.3540 0.7386 0.9048 2.0261
## celltypesmallcell 2.3545 0.4247 1.3834 4.0073
## celltypeadeno 3.2505 0.3076 1.8181 5.8114
## celltypelarge 1.4953 0.6688 0.8595 2.6016
## karno 0.9678 1.0332 0.9576 0.9782
##
## Concordance= 0.738 (se = 0.021 )
## Likelihood ratio test= 61.98 on 6 df, p=2e-11
## Wald test = 62.35 on 6 df, p=1e-11
## Score (logrank) test = 66.62 on 6 df, p=2e-12
From the results only small cell type adeno celltype and large cell type statistically affect survival since they have a p-value less than 0.05.