Survival Analysis in R
library(tidyverse)
library(survival)
library(ggfortify)
library(survminer)
theme_set(theme_bw())
var_names <- c("patient", "treatment", "time", "status", "age", "psa", "size", "gleason")
prostate <- read_delim ("prostate_cancer.txt", delim = " ", col_names = var_names)
glimpse(prostate)
Rows: 63
Columns: 8
$ patient <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1~
$ treatment <dbl> 1, 2, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, ~
$ time <dbl> 65, 61, 60, 58, 51, 51, 14, 43, 16, 52, 59, 55, 68, 51, 2, 6~
$ status <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, ~
$ age <dbl> 67, 60, 77, 64, 65, 61, 73, 60, 73, 73, 77, 74, 71, 65, 76, ~
$ psa <dbl> 13.4, 14.6, 15.6, 16.2, 14.1, 13.5, 12.4, 13.6, 13.8, 11.7, ~
$ size <dbl> 34, 4, 3, 6, 21, 8, 18, 7, 8, 5, 7, 7, 19, 10, 8, 7, 8, 15, ~
$ gleason <dbl> 8, 10, 8, 9, 9, 8, 11, 9, 9, 9, 10, 10, 9, 11, 9, 9, 9, 11, ~
KM sin grupos
Surv(prostate$time, prostate$status)
[1] 65+ 61+ 60+ 58+ 51+ 51+ 14 43+ 16+ 52+ 59 55+ 68+ 51 2+ 67+ 66+ 66 28+
[20] 50 69 67 65+ 24+ 45+ 64+ 61+ 26 42 57+ 70+ 5+ 54+ 36 70 67+ 23+ 52
[39] 65+ 24 45+ 64+ 61 21 22 37+ 51 12+ 67+ 61+ 66 65+ 50 35 67 65+ 33+
[58] 45+ 62+ 61+ 36 42 57+
km1 <- survfit(Surv(prostate$time, prostate$status) ~ 1) # el 1 es xq es sin grupos
plot(km1, mark.time = T)

KM con 2 grupos
km2 <- survfit(Surv(prostate$time, prostate$status) ~ prostate$treatment) # el 1 es xq es sin grupos
plot(km2, mark.time = T)

km2 %>% autoplot() #from ggfortify

survdiff(Surv(prostate$time, prostate$status) ~ prostate$treatment)
Call:
survdiff(formula = Surv(prostate$time, prostate$status) ~ prostate$treatment)
N Observed Expected (O-E)^2/E (O-E)^2/V
prostate$treatment=1 23 7 5.13 0.686 0.958
prostate$treatment=2 40 16 17.87 0.197 0.958
Chisq= 1 on 1 degrees of freedom, p= 0.3
data(motorette, package = "SMPracticals")
moto <- motorette
head(moto)
x cens y
1 150 0 8064
2 150 0 8064
3 150 0 8064
4 150 0 8064
5 150 0 8064
6 150 0 8064
# x= temp in F
# cens= censoring indicator
# y= failure time in hours
plot(moto$x, moto$y)

Surv(moto$y, moto$cens)
[1] 8064+ 8064+ 8064+ 8064+ 8064+ 8064+ 8064+ 8064+ 8064+ 8064+ 1764 2772
[13] 3444 3542 3780 4860 5196 5448+ 5448+ 5448+ 408 408 1344 1344
[25] 1440 1680+ 1680+ 1680+ 1680+ 1680+ 408 408 504 504 504 528+
[37] 528+ 528+ 528+ 528+
survfit(Surv(moto$y, moto$cens) ~ 1)
Call: survfit(formula = Surv(moto$y, moto$cens) ~ 1)
n events median 0.95LCL 0.95UCL
40 17 5196 3444 NA
moto_km1 <- survfit(Surv(moto$y, moto$cens) ~ 1)
moto_km1 %>% plot(mark.time = T)

moto <- moto %>%
mutate(temp_level = if_else(x < 180, "Low", "High"))
head(moto)
x cens y temp_level
1 150 0 8064 Low
2 150 0 8064 Low
3 150 0 8064 Low
4 150 0 8064 Low
5 150 0 8064 Low
6 150 0 8064 Low
(moto_km2 <- survfit(Surv(moto$y, moto$cens) ~ moto$temp_level))
Call: survfit(formula = Surv(moto$y, moto$cens) ~ moto$temp_level)
n events median 0.95LCL 0.95UCL
moto$temp_level=High 20 10 1344 504 NA
moto$temp_level=Low 20 7 NA 5196 NA
plot(moto_km2, mark.time = T)

survdiff(Surv(moto$y, moto$cens) ~ moto$temp_level)
Call:
survdiff(formula = Surv(moto$y, moto$cens) ~ moto$temp_level)
N Observed Expected (O-E)^2/E (O-E)^2/V
moto$temp_level=High 20 10 4.14 8.32 15.7
moto$temp_level=Low 20 7 12.86 2.67 15.7
Chisq= 15.7 on 1 degrees of freedom, p= 7e-05
df <- tibble(udca1)
dim(df)
[1] 170 7
head(df)
# A tibble: 6 x 7
id trt stage bili riskscore futime status
<int> <int> <int> <dbl> <dbl> <dbl> <dbl>
1 1 1 1 1 5.1 1896 0
2 2 0 1 1.7 4.2 1456 1
3 3 1 0 0.5 3.4 1892 0
4 4 1 1 1.4 5 343 0
5 5 0 1 1.1 4.3 1875 0
6 6 1 1 1.4 5.9 768 1
Surv(df$futime, df$status)
[1] 1896+ 1456 1892+ 343+ 1875+ 768 1869+ 768 1552+ 735 1846+ 1099
[13] 1034 362 1834+ 1834+ 750 391 1826+ 992 826 1804+ 1487+ 734
[25] 1511 370 731 633 685 711 1756+ 1755+ 1750+ 1741+ 1446 1742+
[37] 1741+ 1735+ 1731+ 1416 733 1508 559+ 383 1707+ 726 1706+ 1692+
[49] 1688+ 686 1086 351 876 1092 1680+ 189 530 1674+ 1355+ 1033
[61] 1134+ 727 47 1453 340 499 361 1681+ 874+ 307+ 1630+ 252+
[73] 183+ 734+ 1336 1554+ 737 1526+ 1526+ 1519+ 833 1518+ 1241+ 272
[85] 1108+ 1088+ 1486+ 1483+ 1471+ 1470+ 1064+ 1087 1468+ 1468+ 1463+ 755
[97] 1098+ 1449+ 783 1443+ 1478+ 395 742 462 1414+ 973 377 536
[109] 1372+ 714 1358+ 761 1153+ 275+ 1367+ 709 739+ 1323+ 559 1310+
[121] 1293+ 35+ 1288+ 1282+ 810 513+ 1258+ 1127+ 1246+ 1233+ 1217+ 573+
[133] 1205+ 1199+ 717 727 800 1176+ 816 1148+ 1127+ 1107+ 763 364
[145] 1084+ 834 1078+ 1058+ 1056+ 705 0+ 144 233+ 604 971+ 946+
[157] 671 946+ 375 683 937+ 905+ 812 888+ 776 868+ 810 825+
[169] 803+ 791+
km_model <- survfit(Surv(futime, status) ~ trt, data = df)
autoplot(km_model)

cox_model <- coxph(Surv(futime, status) ~ trt + stage + bili + riskscore, data = df)
summary(cox_model)
Call:
coxph(formula = Surv(futime, status) ~ trt + stage + bili + riskscore,
data = df)
n= 169, number of events= 72
(1 observation deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
trt -1.01636 0.36191 0.25628 -3.966 7.31e-05 ***
stage 0.03453 1.03514 0.28768 0.120 0.9045
bili 0.08971 1.09385 0.07827 1.146 0.2518
riskscore 0.30950 1.36275 0.16142 1.917 0.0552 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
trt 0.3619 2.7631 0.2190 0.5981
stage 1.0351 0.9661 0.5890 1.8192
bili 1.0939 0.9142 0.9383 1.2752
riskscore 1.3628 0.7338 0.9932 1.8699
Concordance= 0.684 (se = 0.031 )
Likelihood ratio test= 30.32 on 4 df, p=4e-06
Wald test = 29.86 on 4 df, p=5e-06
Score (logrank) test = 31.47 on 4 df, p=2e-06
cox_model2 <- coxph(Surv(futime, status) ~ trt + riskscore, data = df)
summary(cox_model2)
Call:
coxph(formula = Surv(futime, status) ~ trt + riskscore, data = df)
n= 169, number of events= 72
(1 observation deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
trt -0.9656 0.3808 0.2473 -3.904 9.47e-05 ***
riskscore 0.4421 1.5560 0.1069 4.136 3.53e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
trt 0.3808 2.6263 0.2345 0.6183
riskscore 1.5560 0.6427 1.2619 1.9186
Concordance= 0.681 (se = 0.032 )
Likelihood ratio test= 29.11 on 2 df, p=5e-07
Wald test = 28.46 on 2 df, p=7e-07
Score (logrank) test = 29.46 on 2 df, p=4e-07
survfit(cox_model2) %>% autoplot()

lung %>% slice(3:5)
inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
3 3 1010 1 56 1 0 90 90 NA 15
4 5 210 2 57 1 1 90 60 1150 11
5 1 883 2 60 1 0 100 90 NA 0
surv_obj <- Surv(lung$time, lung$status)
survfit(surv_obj ~ 1)
Call: survfit(formula = surv_obj ~ 1)
n events median 0.95LCL 0.95UCL
228 165 310 285 363
(surv_fit <- survfit(surv_obj ~ sex, data = lung))
Call: survfit(formula = surv_obj ~ sex, data = lung)
n events median 0.95LCL 0.95UCL
sex=1 138 112 270 212 310
sex=2 90 53 426 348 550
survdiff(surv_obj ~ lung$sex)
Call:
survdiff(formula = surv_obj ~ lung$sex)
N Observed Expected (O-E)^2/E (O-E)^2/V
lung$sex=1 138 112 91.6 4.55 10.3
lung$sex=2 90 53 73.4 5.68 10.3
Chisq= 10.3 on 1 degrees of freedom, p= 0.001
survfit(surv_obj ~ lung$sex) %>% autoplot()

# survminer
ggsurvplot(surv_fit,
pval = T,
conf.int = T,
risk.table = T,
surv.median.line = "hv",
risk.table.col = "strata",
ncensor.plot = T)

ggsurvplot(surv_fit,
pval = T,
conf.int = T,
risk.table = T,
surv.median.line = "hv",
risk.table.col = "strata",
fun = "cumhaz")

ggsurvplot(surv_fit,
pval = T,
conf.int = T,
risk.table = T,
surv.median.line = "hv",
risk.table.col = "strata",
fun = "event")
