R Notebook

library(plotly)

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(ggplot2)
# library(OIsurv)
library(KMsurv)
library(survival)
library(IRdisplay)

data(tongue)
summary(tongue)

##       type           time            delta       
##  Min.   :1.00   Min.   :  1.00   Min.   :0.0000  
##  1st Qu.:1.00   1st Qu.: 23.75   1st Qu.:0.0000  
##  Median :1.00   Median : 69.50   Median :1.0000  
##  Mean   :1.35   Mean   : 73.83   Mean   :0.6625  
##  3rd Qu.:2.00   3rd Qu.:101.75   3rd Qu.:1.0000  
##  Max.   :2.00   Max.   :400.00   Max.   :1.0000

attach(tongue)
tongue.surv <- Surv(time[type==1], delta[type==1])
tongue.surv

##  [1]   1    3    3    4   10   13   13   16   16   24   26   27   28   30   30 
## [16]  32   41   51   65   67   70   72   73   77   91   93   96  100  104  157 
## [31] 167   61+  74+  79+  80+  81+  87+  87+  88+  89+  93+  97+ 101+ 104+ 108+
## [46] 109+ 120+ 131+ 150+ 231+ 240+ 400+

surv.fit <- survfit(tongue.surv~1)
surv.fit

## Call: survfit(formula = tongue.surv ~ 1)
## 
##       n events median 0.95LCL 0.95UCL
## [1,] 52     31     93      67      NA

summary(surv.fit)

## Call: survfit(formula = tongue.surv ~ 1)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1     52       1    0.981  0.0190        0.944        1.000
##     3     51       2    0.942  0.0323        0.881        1.000
##     4     49       1    0.923  0.0370        0.853        0.998
##    10     48       1    0.904  0.0409        0.827        0.988
##    13     47       2    0.865  0.0473        0.777        0.963
##    16     45       2    0.827  0.0525        0.730        0.936
##    24     43       1    0.808  0.0547        0.707        0.922
##    26     42       1    0.788  0.0566        0.685        0.908
##    27     41       1    0.769  0.0584        0.663        0.893
##    28     40       1    0.750  0.0600        0.641        0.877
##    30     39       2    0.712  0.0628        0.598        0.846
##    32     37       1    0.692  0.0640        0.578        0.830
##    41     36       1    0.673  0.0651        0.557        0.813
##    51     35       1    0.654  0.0660        0.537        0.797
##    65     33       1    0.634  0.0669        0.516        0.780
##    67     32       1    0.614  0.0677        0.495        0.762
##    70     31       1    0.594  0.0683        0.475        0.745
##    72     30       1    0.575  0.0689        0.454        0.727
##    73     29       1    0.555  0.0693        0.434        0.709
##    77     27       1    0.534  0.0697        0.414        0.690
##    91     19       1    0.506  0.0715        0.384        0.667
##    93     18       1    0.478  0.0728        0.355        0.644
##    96     16       1    0.448  0.0741        0.324        0.620
##   100     14       1    0.416  0.0754        0.292        0.594
##   104     12       1    0.381  0.0767        0.257        0.566
##   157      5       1    0.305  0.0918        0.169        0.550
##   167      4       1    0.229  0.0954        0.101        0.518

plot(surv.fit, main='Kaplan-Meier estimate with 95% confidence bounds',
     xlab='time', ylab='survival function')

ggsurv <- function(s, CI = 'def', plot.cens = T, surv.col = 'gg.def',
                   cens.col = 'red', lty.est = 1, lty.ci = 2,
                   cens.shape = 3, back.white = F, xlab = 'Time',
                   ylab = 'Survival', main = ''){
 
  library(ggplot2)
  strata <- ifelse(is.null(s$strata) ==T, 1, length(s$strata))
  stopifnot(length(surv.col) == 1 | length(surv.col) == strata)
  stopifnot(length(lty.est) == 1 | length(lty.est) == strata)
 
  ggsurv.s <- function(s, CI = 'def', plot.cens = T, surv.col = 'gg.def',
                       cens.col = 'red', lty.est = 1, lty.ci = 2,
                       cens.shape = 3, back.white = F, xlab = 'Time',
                       ylab = 'Survival', main = ''){
 
    dat <- data.frame(time = c(0, s$time),
                      surv = c(1, s$surv),
                      up = c(1, s$upper),
                      low = c(1, s$lower),
                      cens = c(0, s$n.censor))
    dat.cens <- subset(dat, cens != 0)
 
    col <- ifelse(surv.col == 'gg.def', 'black', surv.col)
 
    pl <- ggplot(dat, aes(x = time, y = surv)) +
      xlab(xlab) + ylab(ylab) + ggtitle(main) +
      geom_step(col = col, lty = lty.est)
 
    pl <- if(CI == T | CI == 'def') {
      pl + geom_step(aes(y = up), color = col, lty = lty.ci) +
        geom_step(aes(y = low), color = col, lty = lty.ci)
    } else (pl)
 
    pl <- if(plot.cens == T & length(dat.cens) > 0){
      pl + geom_point(data = dat.cens, aes(y = surv), shape = cens.shape,
                       col = cens.col)
    } else if (plot.cens == T & length(dat.cens) == 0){
      stop ('There are no censored observations')
    } else(pl)
 
    pl <- if(back.white == T) {pl + theme_bw()
    } else (pl)
    pl
  }
 
  ggsurv.m <- function(s, CI = 'def', plot.cens = T, surv.col = 'gg.def',
                       cens.col = 'red', lty.est = 1, lty.ci = 2,
                       cens.shape = 3, back.white = F, xlab = 'Time',
                       ylab = 'Survival', main = '') {
    n <- s$strata
 
    groups <- factor(unlist(strsplit(names
                                     (s$strata), '='))[seq(2, 2*strata, by = 2)])
    gr.name <-  unlist(strsplit(names(s$strata), '='))[1]
    gr.df <- vector('list', strata)
    ind <- vector('list', strata)
    n.ind <- c(0,n); n.ind <- cumsum(n.ind)
    for(i in 1:strata) ind[[i]] <- (n.ind[i]+1):n.ind[i+1]
 
    for(i in 1:strata){
      gr.df[[i]] <- data.frame(
        time = c(0, s$time[ ind[[i]] ]),
        surv = c(1, s$surv[ ind[[i]] ]),
        up = c(1, s$upper[ ind[[i]] ]),
        low = c(1, s$lower[ ind[[i]] ]),
        cens = c(0, s$n.censor[ ind[[i]] ]),
        group = rep(groups[i], n[i] + 1))
    }
 
    dat <- do.call(rbind, gr.df)
    dat.cens <- subset(dat, cens != 0)
 
    pl <- ggplot(dat, aes(x = time, y = surv, group = group)) +
      xlab(xlab) + ylab(ylab) + ggtitle(main) +
      geom_step(aes(col = group, lty = group))
 
    col <- if(length(surv.col == 1)){
      scale_colour_manual(name = gr.name, values = rep(surv.col, strata))
    } else{
      scale_colour_manual(name = gr.name, values = surv.col)
    }
 
    pl <- if(surv.col[1] != 'gg.def'){
      pl + col
    } else {pl + scale_colour_discrete(name = gr.name)}
 
    line <- if(length(lty.est) == 1){
      scale_linetype_manual(name = gr.name, values = rep(lty.est, strata))
    } else {scale_linetype_manual(name = gr.name, values = lty.est)}
 
    pl <- pl + line
 
    pl <- if(CI == T) {
      if(length(surv.col) > 1 && length(lty.est) > 1){
        stop('Either surv.col or lty.est should be of length 1 in order
             to plot 95% CI with multiple strata')
      }else if((length(surv.col) > 1 | surv.col == 'gg.def')[1]){
        pl + geom_step(aes(y = up, color = group), lty = lty.ci) +
          geom_step(aes(y = low, color = group), lty = lty.ci)
      } else{pl +  geom_step(aes(y = up, lty = group), col = surv.col) +
               geom_step(aes(y = low,lty = group), col = surv.col)}
    } else {pl}
 
 
    pl <- if(plot.cens == T & length(dat.cens) > 0){
      pl + geom_point(data = dat.cens, aes(y = surv), shape = cens.shape,
                      col = cens.col)
    } else if (plot.cens == T & length(dat.cens) == 0){
      stop ('There are no censored observations')
    } else(pl)
 
    pl <- if(back.white == T) {pl + theme_bw()
    } else (pl)
    pl
  }
  pl <- if(strata == 1) {ggsurv.s(s, CI , plot.cens, surv.col ,
                                  cens.col, lty.est, lty.ci,
                                  cens.shape, back.white, xlab,
                                  ylab, main)
  } else {ggsurv.m(s, CI, plot.cens, surv.col ,
                   cens.col, lty.est, lty.ci,
                   cens.shape, back.white, xlab,
                   ylab, main)}
  pl
}

p <- ggsurv(surv.fit) + theme_bw()
p

ggplotly(p)

library(GGally)

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## 
## Attaching package: 'GGally'

## The following object is masked _by_ '.GlobalEnv':
## 
##     ggsurv

data(lung, package = "survival")

## Warning in data(lung, package = "survival"): data set 'lung' not found

sf_lung <- survival::survfit(survival::Surv(time, status) ~ 1, data = lung)
p1 <- GGally::ggsurv(sf_lung, main = "Kaplan-Meier Curve for the NCCTG Lung Cancer Data")
plotly::ggplotly(p1)

lung <- transform(lung, sex = factor(sex, levels = 1:2, labels = c("Male","Female")))
sf_sex <- survival::survfit(Surv(time, status) ~ sex, data = lung)
pl_sex <- GGally::ggsurv(sf_sex, main = "Kaplan-Meier Curve for the NCCTG Lung Cancer Data Stratified by Sex")
log_rank_sex <- survival::survdiff(Surv(time, status) ~ sex, data = lung)


pl_sex_annotated <- pl_sex + ggplot2::geom_text(aes(label = sprintf("log-rank test p-value: %0.2g", 
                                                pchisq(log_rank_sex$chisq, df = 1, lower.tail = F)),
                            x = 750, y = 0.9))

plotly::ggplotly(pl_sex_annotated)

reference: https://plotly.com/python/v3/ipython-notebooks/survival-analysis-r-vs-python/#using-r https://www.r-bloggers.com/2017/01/7-interactive-plots-from-the-pharmaceutical-industry/