Drawing survival curves in R

Load data

## Load survival package
library(survival)
## List datasets in survival package
data(package = "survival")

## Load lung data
data(lung)

## Show first 6 rows
head(lung)

  inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
1    3  306      2  74   1       1       90       100     1175      NA
2    3  455      2  68   1       0       90        90     1225      15
3    3 1010      1  56   1       0       90        90       NA      15
4    5  210      2  57   1       1       90        60     1150      11
5    1  883      2  60   1       0      100        90       NA       0
6   12 1022      1  74   1       1       50        80      513       0

Coding (from help file)

NCCTG Lung Cancer Data

Description:
     Survival in patients with advanced lung cancer from the North
     Central Cancer Treatment Group.  Performance scores rate how well
     the patient can perform usual daily activities.

inst:       Institution code
time:       Survival time in days
status:     censoring status 1=censored, 2=dead
age:        Age in years
sex:        Male=1 Female=2
ph.ecog:    ECOG performance score (0=good 5=dead)
ph.karno:   Karnofsky performance score (bad=0-good=100) rated by physician
pat.karno:  Karnofsky performance score as rated by patient
meal.cal:   Calories consumed at meals
wt.loss:    Weight loss in last six months

Create a survival object

## Add survival object
lung$SurvObj <- with(lung, Surv(time, status == 2))

## Check data
head(lung)

  inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss SurvObj
1    3  306      2  74   1       1       90       100     1175      NA    306 
2    3  455      2  68   1       0       90        90     1225      15    455 
3    3 1010      1  56   1       0       90        90       NA      15   1010+
4    5  210      2  57   1       1       90        60     1150      11    210 
5    1  883      2  60   1       0      100        90       NA       0    883 
6   12 1022      1  74   1       1       50        80      513       0   1022+

Kaplan-Meier estimator without grouping

## Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
km.as.one <- survfit(SurvObj ~ 1, data = lung, conf.type = "log-log")
km.by.sex <- survfit(SurvObj ~ sex, data = lung, conf.type = "log-log")

## Show object
km.as.one

Call: survfit(formula = SurvObj ~ 1, data = lung, conf.type = "log-log")

records   n.max n.start  events  median 0.95LCL 0.95UCL 
    228     228     228     165     310     284     361

km.by.sex

Call: survfit(formula = SurvObj ~ sex, data = lung, conf.type = "log-log")

      records n.max n.start events median 0.95LCL 0.95UCL
sex=1     138   138     138    112    270     210     306
sex=2      90    90      90     53    426     345     524


## See survival estimates at given time (lots of outputs)
## summary(km.as.one)
## summary(km.by.sex)

## Plotting without any specification
plot(km.as.one)

plot of chunk unnamed-chunk-4

plot(km.by.sex)

plot of chunk unnamed-chunk-4


## Without
plot(km.as.one, conf = F, mark.time = F)

plot of chunk unnamed-chunk-4

plot(km.by.sex, conf = F, mark.time = F)

plot of chunk unnamed-chunk-4

rms::survplot() function

## Load rms package
library(rms)

## without specification
survplot(km.by.sex)

plot of chunk unnamed-chunk-5


## Minimum decent
survplot(km.by.sex,
         conf = "none")

plot of chunk unnamed-chunk-5


## Full-fledged grammer
survplot(fit  = km.by.sex,
         conf = c("none","bands","bars")[1],
         xlab = "", ylab = "Survival",
         ## xlim(0,100),
         label.curves = TRUE,                     # label curves directly
         ## label.curves = list(keys = "lines"),  # legend instead of direct label
         levels.only  = FALSE,                    # show only levels, no label
         abbrev.label = FALSE,                    # if label used, abbreviate
         ## fun = function(x) {1 - x},            # Cumulative probability plot         
         loglog   = FALSE,                        # log(-log Survival) plot
         logt     = FALSE,                        # log time
         time.inc = 100,                          # time increment
         dots     = TRUE,                         # dot grid
         n.risk   = TRUE,                         # show number at risk
         ## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
         ## y.n.risk = 0, cex.n.risk = 0.6
         )

plot of chunk unnamed-chunk-5


## Without dots
survplot(fit  = km.by.sex,
         conf = c("none","bands","bars")[1],
         xlab = "", ylab = "Survival",
         ## xlim(0,100),
         label.curves = TRUE,                     # label curves directly
         ## label.curves = list(keys = "lines"),  # legend instead of direct label
         levels.only  = FALSE,                    # show only levels, no label
         abbrev.label = FALSE,                    # if label used, abbreviate
         ## fun = function(x) {1 - x},            # Cumulative probability plot         
         loglog   = FALSE,                        # log(-log Survival) plot
         logt     = FALSE,                        # log time
         time.inc = 100,                          # time increment
         dots     = FALSE,                         # dot grid
         n.risk   = TRUE,                         # show number at risk
         ## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
         ## y.n.risk = 0, cex.n.risk = 0.6
         )

plot of chunk unnamed-chunk-5


## Different time incremente
survplot(fit  = km.by.sex,
         conf = c("none","bands","bars")[1],
         xlab = "", ylab = "Survival",
         ## xlim(0,100),
         label.curves = TRUE,                     # label curves directly
         ## label.curves = list(keys = "lines"),  # legend instead of direct label
         levels.only  = FALSE,                    # show only levels, no label
         abbrev.label = FALSE,                    # if label used, abbreviate
         ## fun = function(x) {1 - x},            # Cumulative probability plot         
         loglog   = FALSE,                        # log(-log Survival) plot
         logt     = FALSE,                        # log time
         time.inc = 300,                           # time increment
         dots     = FALSE,                        # dot grid
         n.risk   = TRUE,                         # show number at risk
         ## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
         ## y.n.risk = 0, cex.n.risk = 0.6
         )

plot of chunk unnamed-chunk-5



## Plot cumulative probability F(t) = 1 - S(t)
survplot(fit  = km.by.sex,
         conf = c("none","bands","bars")[1],
         xlab = "", ylab = "Cumulative Incidence",
         ## xlim(0,100),
         label.curves = TRUE,                     # label curves directly
         ## label.curves = list(keys = "lines"),  # legend instead of direct label
         levels.only  = FALSE,                    # show only levels, no label
         abbrev.label = FALSE,                    # if label used, abbreviate
         fun = function(x) {1 - x},             # Cumulative probability plot         
         loglog   = FALSE,                        # log(-log Survival) plot
         logt     = FALSE,                        # log time
         time.inc = 100,                          # time increment
         dots     = FALSE,                        # dot grid
         n.risk   = TRUE,                         # show number at risk
         ## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
         ## y.n.risk = 0, cex.n.risk = 0.6
         )

plot of chunk unnamed-chunk-5


## Change position of number at risk
survplot(fit  = km.by.sex,
         conf = c("none","bands","bars")[1],
         xlab = "", ylab = "Survival",
         ## xlim(0,100),
         label.curves = TRUE,                     # label curves directly
         ## label.curves = list(keys = "lines"),  # legend instead of direct label
         levels.only  = FALSE,                    # show only levels, no label
         abbrev.label = FALSE,                    # if label used, abbreviate
         ## fun = function(x) {1 - x},            # Cumulative probability plot         
         loglog   = FALSE,                        # log(-log Survival) plot
         logt     = FALSE,                        # log time
         time.inc = 100,                          # time increment
         dots     = FALSE,                        # dot grid
         n.risk   = TRUE,                         # show number at risk
         ## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
         y.n.risk = -0.2, cex.n.risk = 0.6
         )

plot of chunk unnamed-chunk-5