Survival Analysis in R

library(tidyverse)
library(survival)
library(ggfortify)
library(survminer)

theme_set(theme_bw())
var_names <- c("patient", "treatment", "time", "status", "age", "psa", "size", "gleason")
prostate <- read_delim ("prostate_cancer.txt", delim = " ", col_names = var_names)

glimpse(prostate)
Rows: 63
Columns: 8
$ patient   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1~
$ treatment <dbl> 1, 2, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, ~
$ time      <dbl> 65, 61, 60, 58, 51, 51, 14, 43, 16, 52, 59, 55, 68, 51, 2, 6~
$ status    <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, ~
$ age       <dbl> 67, 60, 77, 64, 65, 61, 73, 60, 73, 73, 77, 74, 71, 65, 76, ~
$ psa       <dbl> 13.4, 14.6, 15.6, 16.2, 14.1, 13.5, 12.4, 13.6, 13.8, 11.7, ~
$ size      <dbl> 34, 4, 3, 6, 21, 8, 18, 7, 8, 5, 7, 7, 19, 10, 8, 7, 8, 15, ~
$ gleason   <dbl> 8, 10, 8, 9, 9, 8, 11, 9, 9, 9, 10, 10, 9, 11, 9, 9, 9, 11, ~

KM sin grupos

Surv(prostate$time, prostate$status)
 [1] 65+ 61+ 60+ 58+ 51+ 51+ 14  43+ 16+ 52+ 59  55+ 68+ 51   2+ 67+ 66+ 66  28+
[20] 50  69  67  65+ 24+ 45+ 64+ 61+ 26  42  57+ 70+  5+ 54+ 36  70  67+ 23+ 52 
[39] 65+ 24  45+ 64+ 61  21  22  37+ 51  12+ 67+ 61+ 66  65+ 50  35  67  65+ 33+
[58] 45+ 62+ 61+ 36  42  57+
km1 <- survfit(Surv(prostate$time, prostate$status) ~ 1) # el 1 es xq es sin grupos

plot(km1, mark.time = T)

KM con 2 grupos

km2 <- survfit(Surv(prostate$time, prostate$status) ~ prostate$treatment) # el 1 es xq es sin grupos

plot(km2, mark.time = T)

km2 %>% autoplot() #from ggfortify

survdiff(Surv(prostate$time, prostate$status) ~ prostate$treatment)
Call:
survdiff(formula = Surv(prostate$time, prostate$status) ~ prostate$treatment)

                      N Observed Expected (O-E)^2/E (O-E)^2/V
prostate$treatment=1 23        7     5.13     0.686     0.958
prostate$treatment=2 40       16    17.87     0.197     0.958

 Chisq= 1  on 1 degrees of freedom, p= 0.3 
data(motorette, package = "SMPracticals")

moto <- motorette
head(moto)
    x cens    y
1 150    0 8064
2 150    0 8064
3 150    0 8064
4 150    0 8064
5 150    0 8064
6 150    0 8064
# x= temp in F
# cens= censoring indicator
# y= failure time in hours

plot(moto$x, moto$y)

Surv(moto$y, moto$cens)
 [1] 8064+ 8064+ 8064+ 8064+ 8064+ 8064+ 8064+ 8064+ 8064+ 8064+ 1764  2772 
[13] 3444  3542  3780  4860  5196  5448+ 5448+ 5448+  408   408  1344  1344 
[25] 1440  1680+ 1680+ 1680+ 1680+ 1680+  408   408   504   504   504   528+
[37]  528+  528+  528+  528+
survfit(Surv(moto$y, moto$cens) ~ 1)
Call: survfit(formula = Surv(moto$y, moto$cens) ~ 1)

      n  events  median 0.95LCL 0.95UCL 
     40      17    5196    3444      NA 
moto_km1 <- survfit(Surv(moto$y, moto$cens) ~ 1)

moto_km1 %>% plot(mark.time = T)

moto <- moto %>% 
  mutate(temp_level = if_else(x < 180, "Low", "High"))

head(moto)
    x cens    y temp_level
1 150    0 8064        Low
2 150    0 8064        Low
3 150    0 8064        Low
4 150    0 8064        Low
5 150    0 8064        Low
6 150    0 8064        Low
(moto_km2 <- survfit(Surv(moto$y, moto$cens) ~ moto$temp_level))
Call: survfit(formula = Surv(moto$y, moto$cens) ~ moto$temp_level)

                      n events median 0.95LCL 0.95UCL
moto$temp_level=High 20     10   1344     504      NA
moto$temp_level=Low  20      7     NA    5196      NA
plot(moto_km2, mark.time = T)

survdiff(Surv(moto$y, moto$cens) ~ moto$temp_level)
Call:
survdiff(formula = Surv(moto$y, moto$cens) ~ moto$temp_level)

                      N Observed Expected (O-E)^2/E (O-E)^2/V
moto$temp_level=High 20       10     4.14      8.32      15.7
moto$temp_level=Low  20        7    12.86      2.67      15.7

 Chisq= 15.7  on 1 degrees of freedom, p= 7e-05 
df <- tibble(udca1)
dim(df)
[1] 170   7
head(df)
# A tibble: 6 x 7
     id   trt stage  bili riskscore futime status
  <int> <int> <int> <dbl>     <dbl>  <dbl>  <dbl>
1     1     1     1   1         5.1   1896      0
2     2     0     1   1.7       4.2   1456      1
3     3     1     0   0.5       3.4   1892      0
4     4     1     1   1.4       5      343      0
5     5     0     1   1.1       4.3   1875      0
6     6     1     1   1.4       5.9    768      1
Surv(df$futime, df$status)
  [1] 1896+ 1456  1892+  343+ 1875+  768  1869+  768  1552+  735  1846+ 1099 
 [13] 1034   362  1834+ 1834+  750   391  1826+  992   826  1804+ 1487+  734 
 [25] 1511   370   731   633   685   711  1756+ 1755+ 1750+ 1741+ 1446  1742+
 [37] 1741+ 1735+ 1731+ 1416   733  1508   559+  383  1707+  726  1706+ 1692+
 [49] 1688+  686  1086   351   876  1092  1680+  189   530  1674+ 1355+ 1033 
 [61] 1134+  727    47  1453   340   499   361  1681+  874+  307+ 1630+  252+
 [73]  183+  734+ 1336  1554+  737  1526+ 1526+ 1519+  833  1518+ 1241+  272 
 [85] 1108+ 1088+ 1486+ 1483+ 1471+ 1470+ 1064+ 1087  1468+ 1468+ 1463+  755 
 [97] 1098+ 1449+  783  1443+ 1478+  395   742   462  1414+  973   377   536 
[109] 1372+  714  1358+  761  1153+  275+ 1367+  709   739+ 1323+  559  1310+
[121] 1293+   35+ 1288+ 1282+  810   513+ 1258+ 1127+ 1246+ 1233+ 1217+  573+
[133] 1205+ 1199+  717   727   800  1176+  816  1148+ 1127+ 1107+  763   364 
[145] 1084+  834  1078+ 1058+ 1056+  705     0+  144   233+  604   971+  946+
[157]  671   946+  375   683   937+  905+  812   888+  776   868+  810   825+
[169]  803+  791+
km_model <- survfit(Surv(futime, status) ~ trt, data = df)
autoplot(km_model)

cox_model <- coxph(Surv(futime, status) ~ trt + stage + bili + riskscore, data = df)
summary(cox_model)
Call:
coxph(formula = Surv(futime, status) ~ trt + stage + bili + riskscore, 
    data = df)

  n= 169, number of events= 72 
   (1 observation deleted due to missingness)

              coef exp(coef) se(coef)      z Pr(>|z|)    
trt       -1.01636   0.36191  0.25628 -3.966 7.31e-05 ***
stage      0.03453   1.03514  0.28768  0.120   0.9045    
bili       0.08971   1.09385  0.07827  1.146   0.2518    
riskscore  0.30950   1.36275  0.16142  1.917   0.0552 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

          exp(coef) exp(-coef) lower .95 upper .95
trt          0.3619     2.7631    0.2190    0.5981
stage        1.0351     0.9661    0.5890    1.8192
bili         1.0939     0.9142    0.9383    1.2752
riskscore    1.3628     0.7338    0.9932    1.8699

Concordance= 0.684  (se = 0.031 )
Likelihood ratio test= 30.32  on 4 df,   p=4e-06
Wald test            = 29.86  on 4 df,   p=5e-06
Score (logrank) test = 31.47  on 4 df,   p=2e-06
cox_model2 <- coxph(Surv(futime, status) ~ trt + riskscore, data = df)
summary(cox_model2)
Call:
coxph(formula = Surv(futime, status) ~ trt + riskscore, data = df)

  n= 169, number of events= 72 
   (1 observation deleted due to missingness)

             coef exp(coef) se(coef)      z Pr(>|z|)    
trt       -0.9656    0.3808   0.2473 -3.904 9.47e-05 ***
riskscore  0.4421    1.5560   0.1069  4.136 3.53e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

          exp(coef) exp(-coef) lower .95 upper .95
trt          0.3808     2.6263    0.2345    0.6183
riskscore    1.5560     0.6427    1.2619    1.9186

Concordance= 0.681  (se = 0.032 )
Likelihood ratio test= 29.11  on 2 df,   p=5e-07
Wald test            = 28.46  on 2 df,   p=7e-07
Score (logrank) test = 29.46  on 2 df,   p=4e-07
survfit(cox_model2) %>% autoplot()

lung %>%  slice(3:5)
  inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
3    3 1010      1  56   1       0       90        90       NA      15
4    5  210      2  57   1       1       90        60     1150      11
5    1  883      2  60   1       0      100        90       NA       0
surv_obj <- Surv(lung$time, lung$status)

survfit(surv_obj ~ 1)
Call: survfit(formula = surv_obj ~ 1)

      n  events  median 0.95LCL 0.95UCL 
    228     165     310     285     363 
(surv_fit <- survfit(surv_obj ~ sex, data = lung))
Call: survfit(formula = surv_obj ~ sex, data = lung)

        n events median 0.95LCL 0.95UCL
sex=1 138    112    270     212     310
sex=2  90     53    426     348     550
survdiff(surv_obj ~ lung$sex)
Call:
survdiff(formula = surv_obj ~ lung$sex)

             N Observed Expected (O-E)^2/E (O-E)^2/V
lung$sex=1 138      112     91.6      4.55      10.3
lung$sex=2  90       53     73.4      5.68      10.3

 Chisq= 10.3  on 1 degrees of freedom, p= 0.001 
survfit(surv_obj ~ lung$sex) %>% autoplot()

# survminer

ggsurvplot(surv_fit, 
           pval = T,
           conf.int = T,
           risk.table = T,
           surv.median.line = "hv",
           risk.table.col = "strata",
           ncensor.plot = T)

ggsurvplot(surv_fit, 
           pval = T,
           conf.int = T,
           risk.table = T,
           surv.median.line = "hv",
           risk.table.col = "strata",
           fun = "cumhaz")

ggsurvplot(surv_fit, 
           pval = T,
           conf.int = T,
           risk.table = T,
           surv.median.line = "hv",
           risk.table.col = "strata",
           fun = "event")