library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
# library(OIsurv)
library(KMsurv)
library(survival)
library(IRdisplay)
data(tongue)
summary(tongue)
## type time delta
## Min. :1.00 Min. : 1.00 Min. :0.0000
## 1st Qu.:1.00 1st Qu.: 23.75 1st Qu.:0.0000
## Median :1.00 Median : 69.50 Median :1.0000
## Mean :1.35 Mean : 73.83 Mean :0.6625
## 3rd Qu.:2.00 3rd Qu.:101.75 3rd Qu.:1.0000
## Max. :2.00 Max. :400.00 Max. :1.0000
attach(tongue)
tongue.surv <- Surv(time[type==1], delta[type==1])
tongue.surv
## [1] 1 3 3 4 10 13 13 16 16 24 26 27 28 30 30
## [16] 32 41 51 65 67 70 72 73 77 91 93 96 100 104 157
## [31] 167 61+ 74+ 79+ 80+ 81+ 87+ 87+ 88+ 89+ 93+ 97+ 101+ 104+ 108+
## [46] 109+ 120+ 131+ 150+ 231+ 240+ 400+
surv.fit <- survfit(tongue.surv~1)
surv.fit
## Call: survfit(formula = tongue.surv ~ 1)
##
## n events median 0.95LCL 0.95UCL
## [1,] 52 31 93 67 NA
summary(surv.fit)
## Call: survfit(formula = tongue.surv ~ 1)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 52 1 0.981 0.0190 0.944 1.000
## 3 51 2 0.942 0.0323 0.881 1.000
## 4 49 1 0.923 0.0370 0.853 0.998
## 10 48 1 0.904 0.0409 0.827 0.988
## 13 47 2 0.865 0.0473 0.777 0.963
## 16 45 2 0.827 0.0525 0.730 0.936
## 24 43 1 0.808 0.0547 0.707 0.922
## 26 42 1 0.788 0.0566 0.685 0.908
## 27 41 1 0.769 0.0584 0.663 0.893
## 28 40 1 0.750 0.0600 0.641 0.877
## 30 39 2 0.712 0.0628 0.598 0.846
## 32 37 1 0.692 0.0640 0.578 0.830
## 41 36 1 0.673 0.0651 0.557 0.813
## 51 35 1 0.654 0.0660 0.537 0.797
## 65 33 1 0.634 0.0669 0.516 0.780
## 67 32 1 0.614 0.0677 0.495 0.762
## 70 31 1 0.594 0.0683 0.475 0.745
## 72 30 1 0.575 0.0689 0.454 0.727
## 73 29 1 0.555 0.0693 0.434 0.709
## 77 27 1 0.534 0.0697 0.414 0.690
## 91 19 1 0.506 0.0715 0.384 0.667
## 93 18 1 0.478 0.0728 0.355 0.644
## 96 16 1 0.448 0.0741 0.324 0.620
## 100 14 1 0.416 0.0754 0.292 0.594
## 104 12 1 0.381 0.0767 0.257 0.566
## 157 5 1 0.305 0.0918 0.169 0.550
## 167 4 1 0.229 0.0954 0.101 0.518
plot(surv.fit, main='Kaplan-Meier estimate with 95% confidence bounds',
xlab='time', ylab='survival function')
ggsurv <- function(s, CI = 'def', plot.cens = T, surv.col = 'gg.def',
cens.col = 'red', lty.est = 1, lty.ci = 2,
cens.shape = 3, back.white = F, xlab = 'Time',
ylab = 'Survival', main = ''){
library(ggplot2)
strata <- ifelse(is.null(s$strata) ==T, 1, length(s$strata))
stopifnot(length(surv.col) == 1 | length(surv.col) == strata)
stopifnot(length(lty.est) == 1 | length(lty.est) == strata)
ggsurv.s <- function(s, CI = 'def', plot.cens = T, surv.col = 'gg.def',
cens.col = 'red', lty.est = 1, lty.ci = 2,
cens.shape = 3, back.white = F, xlab = 'Time',
ylab = 'Survival', main = ''){
dat <- data.frame(time = c(0, s$time),
surv = c(1, s$surv),
up = c(1, s$upper),
low = c(1, s$lower),
cens = c(0, s$n.censor))
dat.cens <- subset(dat, cens != 0)
col <- ifelse(surv.col == 'gg.def', 'black', surv.col)
pl <- ggplot(dat, aes(x = time, y = surv)) +
xlab(xlab) + ylab(ylab) + ggtitle(main) +
geom_step(col = col, lty = lty.est)
pl <- if(CI == T | CI == 'def') {
pl + geom_step(aes(y = up), color = col, lty = lty.ci) +
geom_step(aes(y = low), color = col, lty = lty.ci)
} else (pl)
pl <- if(plot.cens == T & length(dat.cens) > 0){
pl + geom_point(data = dat.cens, aes(y = surv), shape = cens.shape,
col = cens.col)
} else if (plot.cens == T & length(dat.cens) == 0){
stop ('There are no censored observations')
} else(pl)
pl <- if(back.white == T) {pl + theme_bw()
} else (pl)
pl
}
ggsurv.m <- function(s, CI = 'def', plot.cens = T, surv.col = 'gg.def',
cens.col = 'red', lty.est = 1, lty.ci = 2,
cens.shape = 3, back.white = F, xlab = 'Time',
ylab = 'Survival', main = '') {
n <- s$strata
groups <- factor(unlist(strsplit(names
(s$strata), '='))[seq(2, 2*strata, by = 2)])
gr.name <- unlist(strsplit(names(s$strata), '='))[1]
gr.df <- vector('list', strata)
ind <- vector('list', strata)
n.ind <- c(0,n); n.ind <- cumsum(n.ind)
for(i in 1:strata) ind[[i]] <- (n.ind[i]+1):n.ind[i+1]
for(i in 1:strata){
gr.df[[i]] <- data.frame(
time = c(0, s$time[ ind[[i]] ]),
surv = c(1, s$surv[ ind[[i]] ]),
up = c(1, s$upper[ ind[[i]] ]),
low = c(1, s$lower[ ind[[i]] ]),
cens = c(0, s$n.censor[ ind[[i]] ]),
group = rep(groups[i], n[i] + 1))
}
dat <- do.call(rbind, gr.df)
dat.cens <- subset(dat, cens != 0)
pl <- ggplot(dat, aes(x = time, y = surv, group = group)) +
xlab(xlab) + ylab(ylab) + ggtitle(main) +
geom_step(aes(col = group, lty = group))
col <- if(length(surv.col == 1)){
scale_colour_manual(name = gr.name, values = rep(surv.col, strata))
} else{
scale_colour_manual(name = gr.name, values = surv.col)
}
pl <- if(surv.col[1] != 'gg.def'){
pl + col
} else {pl + scale_colour_discrete(name = gr.name)}
line <- if(length(lty.est) == 1){
scale_linetype_manual(name = gr.name, values = rep(lty.est, strata))
} else {scale_linetype_manual(name = gr.name, values = lty.est)}
pl <- pl + line
pl <- if(CI == T) {
if(length(surv.col) > 1 && length(lty.est) > 1){
stop('Either surv.col or lty.est should be of length 1 in order
to plot 95% CI with multiple strata')
}else if((length(surv.col) > 1 | surv.col == 'gg.def')[1]){
pl + geom_step(aes(y = up, color = group), lty = lty.ci) +
geom_step(aes(y = low, color = group), lty = lty.ci)
} else{pl + geom_step(aes(y = up, lty = group), col = surv.col) +
geom_step(aes(y = low,lty = group), col = surv.col)}
} else {pl}
pl <- if(plot.cens == T & length(dat.cens) > 0){
pl + geom_point(data = dat.cens, aes(y = surv), shape = cens.shape,
col = cens.col)
} else if (plot.cens == T & length(dat.cens) == 0){
stop ('There are no censored observations')
} else(pl)
pl <- if(back.white == T) {pl + theme_bw()
} else (pl)
pl
}
pl <- if(strata == 1) {ggsurv.s(s, CI , plot.cens, surv.col ,
cens.col, lty.est, lty.ci,
cens.shape, back.white, xlab,
ylab, main)
} else {ggsurv.m(s, CI, plot.cens, surv.col ,
cens.col, lty.est, lty.ci,
cens.shape, back.white, xlab,
ylab, main)}
pl
}
p <- ggsurv(surv.fit) + theme_bw()
p
ggplotly(p)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked _by_ '.GlobalEnv':
##
## ggsurv
data(lung, package = "survival")
## Warning in data(lung, package = "survival"): data set 'lung' not found
sf_lung <- survival::survfit(survival::Surv(time, status) ~ 1, data = lung)
p1 <- GGally::ggsurv(sf_lung, main = "Kaplan-Meier Curve for the NCCTG Lung Cancer Data")
plotly::ggplotly(p1)
lung <- transform(lung, sex = factor(sex, levels = 1:2, labels = c("Male","Female")))
sf_sex <- survival::survfit(Surv(time, status) ~ sex, data = lung)
pl_sex <- GGally::ggsurv(sf_sex, main = "Kaplan-Meier Curve for the NCCTG Lung Cancer Data Stratified by Sex")
log_rank_sex <- survival::survdiff(Surv(time, status) ~ sex, data = lung)
pl_sex_annotated <- pl_sex + ggplot2::geom_text(aes(label = sprintf("log-rank test p-value: %0.2g",
pchisq(log_rank_sex$chisq, df = 1, lower.tail = F)),
x = 750, y = 0.9))
plotly::ggplotly(pl_sex_annotated)
reference: https://plotly.com/python/v3/ipython-notebooks/survival-analysis-r-vs-python/#using-r https://www.r-bloggers.com/2017/01/7-interactive-plots-from-the-pharmaceutical-industry/