# ~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 245 Module 1 Day 4 ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~
# Ovarian Cancer Survival Data Example:
# A randomized clinical trial was performed to compare two treatments of patients with
# advanced ovarian carcinoma (stages IIIB and IV) using either cyclophosphamide alone
# (1 g/m2) or cyclophosphamide (500 mg/m2) plus adriamycin (40 mg/m2) by iv injection
# every 3 weeks.
# Data Set: ovarian
# Data Dictionary:
# (1) futime: time to death or censoring post-randomization (days)
# (2) fustat: censoring status (1 = died; 0 = censored)
# (3) age: in years
# (4) resid.ds: residual disease present (1=no; 2=yes)
# (5) rx: treatment group (1=cyclophosphamide alone; 2=cyclophosphamide+adriamycin)
# (6) ecog.ps: ECOG performance status (1 is better;
# see reference: http://ecog.org/general/perf_stat.html)
## Download and load the data file = ovarian
download.file("http://www.duke.edu/~sgrambow/crp241data/ovarian2.RData",
destfile = "ovarian2.RData",quiet=TRUE,mode="wb",cacheOK=FALSE)
load("ovarian2.RData")
# Install and load package for Survival Analysis
# - Recall: You only need to install the package ONCE, but you must load the
# package EVERY time you want to use it!
# install.packages('survival')
# we need the survival package for performing
# survival analyses
library(survival)
# install.packages('surminer')
# we need this package for the ggsurvplot function
# which creates KM plots.
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
## Loading required package: magrittr
# Example -- Ovarian Cancer Data
# Question 1
# Obtain the table of Kaplan Meier survival estimates for the observed data set.
# - Obtain survival estimates using survfit() function
fit <- survfit(Surv(futime, fustat)~1, data=ovarian2)
# - Print table of survival estimates
# -- Default ouput only shows UNIQUE EVENT times
summary(fit)
## Call: survfit(formula = Surv(futime, fustat) ~ 1, data = ovarian2)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 59 26 1 0.962 0.0377 0.890 1.000
## 115 25 1 0.923 0.0523 0.826 1.000
## 156 24 1 0.885 0.0627 0.770 1.000
## 268 23 1 0.846 0.0708 0.718 0.997
## 329 22 1 0.808 0.0773 0.670 0.974
## 353 21 1 0.769 0.0826 0.623 0.949
## 365 20 1 0.731 0.0870 0.579 0.923
## 431 17 1 0.688 0.0919 0.529 0.894
## 464 15 1 0.642 0.0965 0.478 0.862
## 475 14 1 0.596 0.0999 0.429 0.828
## 563 12 1 0.546 0.1032 0.377 0.791
## 638 11 1 0.497 0.1051 0.328 0.752
# -- Have to ask R to show censored event times
summary(fit,censor=TRUE)
## Call: survfit(formula = Surv(futime, fustat) ~ 1, data = ovarian2)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 59 26 1 0.962 0.0377 0.890 1.000
## 115 25 1 0.923 0.0523 0.826 1.000
## 156 24 1 0.885 0.0627 0.770 1.000
## 268 23 1 0.846 0.0708 0.718 0.997
## 329 22 1 0.808 0.0773 0.670 0.974
## 353 21 1 0.769 0.0826 0.623 0.949
## 365 20 1 0.731 0.0870 0.579 0.923
## 377 19 0 0.731 0.0870 0.579 0.923
## 421 18 0 0.731 0.0870 0.579 0.923
## 431 17 1 0.688 0.0919 0.529 0.894
## 448 16 0 0.688 0.0919 0.529 0.894
## 464 15 1 0.642 0.0965 0.478 0.862
## 475 14 1 0.596 0.0999 0.429 0.828
## 477 13 0 0.596 0.0999 0.429 0.828
## 563 12 1 0.546 0.1032 0.377 0.791
## 638 11 1 0.497 0.1051 0.328 0.752
## 744 10 0 0.497 0.1051 0.328 0.752
## 769 9 0 0.497 0.1051 0.328 0.752
## 770 8 0 0.497 0.1051 0.328 0.752
## 803 7 0 0.497 0.1051 0.328 0.752
## 855 6 0 0.497 0.1051 0.328 0.752
## 1040 5 0 0.497 0.1051 0.328 0.752
## 1106 4 0 0.497 0.1051 0.328 0.752
## 1129 3 0 0.497 0.1051 0.328 0.752
## 1206 2 0 0.497 0.1051 0.328 0.752
## 1227 1 0 0.497 0.1051 0.328 0.752
# generates median and 95% CI for median
summary(fit)$table
## records n.max n.start events *rmean *se(rmean) median
## 26.00000 26.00000 26.00000 12.00000 793.99930 91.51788 638.00000
## 0.95LCL 0.95UCL
## 464.00000 NA
# Question 2
# Create the Kaplan Meier plot that estimates the true survival curve.
# see this link for info on plot options:
# https://cran.r-project.org/web/packages/survminer/vignettes/Informative_Survival_Plots.html
# https://www.r-bloggers.com/survival-analysis-basics/
ggsurvplot(fit, data = ovarian2, risk.table = TRUE, surv.median.line = "hv")

# Question 3
# Using the output from Question 1 and 2, what is the estimated
# probability that a patient with advanced ovarian carcinoma will
# survive at least 2 years?
# Question 4
# Using the output from Question 1 and 2, what is the
# estimated probability that a patient with advanced
# ovarian carcinoma will die before 6 months?
# End of Program