## Load survival package
library(survival)
## List datasets in survival package
data(package = "survival")
## Load lung data
data(lung)
## Show first 6 rows
head(lung)
inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
1 3 306 2 74 1 1 90 100 1175 NA
2 3 455 2 68 1 0 90 90 1225 15
3 3 1010 1 56 1 0 90 90 NA 15
4 5 210 2 57 1 1 90 60 1150 11
5 1 883 2 60 1 0 100 90 NA 0
6 12 1022 1 74 1 1 50 80 513 0
Coding (from help file)
NCCTG Lung Cancer Data
Description:
Survival in patients with advanced lung cancer from the North
Central Cancer Treatment Group. Performance scores rate how well
the patient can perform usual daily activities.
inst: Institution code
time: Survival time in days
status: censoring status 1=censored, 2=dead
age: Age in years
sex: Male=1 Female=2
ph.ecog: ECOG performance score (0=good 5=dead)
ph.karno: Karnofsky performance score (bad=0-good=100) rated by physician
pat.karno: Karnofsky performance score as rated by patient
meal.cal: Calories consumed at meals
wt.loss: Weight loss in last six months
## Add survival object
lung$SurvObj <- with(lung, Surv(time, status == 2))
## Check data
head(lung)
inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss SurvObj
1 3 306 2 74 1 1 90 100 1175 NA 306
2 3 455 2 68 1 0 90 90 1225 15 455
3 3 1010 1 56 1 0 90 90 NA 15 1010+
4 5 210 2 57 1 1 90 60 1150 11 210
5 1 883 2 60 1 0 100 90 NA 0 883
6 12 1022 1 74 1 1 50 80 513 0 1022+
## Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
km.as.one <- survfit(SurvObj ~ 1, data = lung, conf.type = "log-log")
km.by.sex <- survfit(SurvObj ~ sex, data = lung, conf.type = "log-log")
## Show object
km.as.one
Call: survfit(formula = SurvObj ~ 1, data = lung, conf.type = "log-log")
records n.max n.start events median 0.95LCL 0.95UCL
228 228 228 165 310 284 361
km.by.sex
Call: survfit(formula = SurvObj ~ sex, data = lung, conf.type = "log-log")
records n.max n.start events median 0.95LCL 0.95UCL
sex=1 138 138 138 112 270 210 306
sex=2 90 90 90 53 426 345 524
## See survival estimates at given time (lots of outputs)
## summary(km.as.one)
## summary(km.by.sex)
## Plotting without any specification
plot(km.as.one)
plot(km.by.sex)
## Without
plot(km.as.one, conf = F, mark.time = F)
plot(km.by.sex, conf = F, mark.time = F)
## Load rms package
library(rms)
## without specification
survplot(km.by.sex)
## Minimum decent
survplot(km.by.sex,
conf = "none")
## Full-fledged grammer
survplot(fit = km.by.sex,
conf = c("none","bands","bars")[1],
xlab = "", ylab = "Survival",
## xlim(0,100),
label.curves = TRUE, # label curves directly
## label.curves = list(keys = "lines"), # legend instead of direct label
levels.only = FALSE, # show only levels, no label
abbrev.label = FALSE, # if label used, abbreviate
## fun = function(x) {1 - x}, # Cumulative probability plot
loglog = FALSE, # log(-log Survival) plot
logt = FALSE, # log time
time.inc = 100, # time increment
dots = TRUE, # dot grid
n.risk = TRUE, # show number at risk
## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
## y.n.risk = 0, cex.n.risk = 0.6
)
## Without dots
survplot(fit = km.by.sex,
conf = c("none","bands","bars")[1],
xlab = "", ylab = "Survival",
## xlim(0,100),
label.curves = TRUE, # label curves directly
## label.curves = list(keys = "lines"), # legend instead of direct label
levels.only = FALSE, # show only levels, no label
abbrev.label = FALSE, # if label used, abbreviate
## fun = function(x) {1 - x}, # Cumulative probability plot
loglog = FALSE, # log(-log Survival) plot
logt = FALSE, # log time
time.inc = 100, # time increment
dots = FALSE, # dot grid
n.risk = TRUE, # show number at risk
## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
## y.n.risk = 0, cex.n.risk = 0.6
)
## Different time incremente
survplot(fit = km.by.sex,
conf = c("none","bands","bars")[1],
xlab = "", ylab = "Survival",
## xlim(0,100),
label.curves = TRUE, # label curves directly
## label.curves = list(keys = "lines"), # legend instead of direct label
levels.only = FALSE, # show only levels, no label
abbrev.label = FALSE, # if label used, abbreviate
## fun = function(x) {1 - x}, # Cumulative probability plot
loglog = FALSE, # log(-log Survival) plot
logt = FALSE, # log time
time.inc = 300, # time increment
dots = FALSE, # dot grid
n.risk = TRUE, # show number at risk
## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
## y.n.risk = 0, cex.n.risk = 0.6
)
## Plot cumulative probability F(t) = 1 - S(t)
survplot(fit = km.by.sex,
conf = c("none","bands","bars")[1],
xlab = "", ylab = "Cumulative Incidence",
## xlim(0,100),
label.curves = TRUE, # label curves directly
## label.curves = list(keys = "lines"), # legend instead of direct label
levels.only = FALSE, # show only levels, no label
abbrev.label = FALSE, # if label used, abbreviate
fun = function(x) {1 - x}, # Cumulative probability plot
loglog = FALSE, # log(-log Survival) plot
logt = FALSE, # log time
time.inc = 100, # time increment
dots = FALSE, # dot grid
n.risk = TRUE, # show number at risk
## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
## y.n.risk = 0, cex.n.risk = 0.6
)
## Change position of number at risk
survplot(fit = km.by.sex,
conf = c("none","bands","bars")[1],
xlab = "", ylab = "Survival",
## xlim(0,100),
label.curves = TRUE, # label curves directly
## label.curves = list(keys = "lines"), # legend instead of direct label
levels.only = FALSE, # show only levels, no label
abbrev.label = FALSE, # if label used, abbreviate
## fun = function(x) {1 - x}, # Cumulative probability plot
loglog = FALSE, # log(-log Survival) plot
logt = FALSE, # log time
time.inc = 100, # time increment
dots = FALSE, # dot grid
n.risk = TRUE, # show number at risk
## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
y.n.risk = -0.2, cex.n.risk = 0.6
)