# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 245 | Final Exam Spring 2019 ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Prostate Data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Description:
# These data were from a randomized trial
# comparing four treatments for stage 3 and 4
# prostate cancer, with almost equal numbers of
# patients on placebo and each of three doses
# of estrogen.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Data Set: prostate
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Data Dictionary:
# Name Labels
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# patno Patient Number
# stage Stage
# rx Trt Grp (Placebo,0.2mg estrogen, 1mg estrogen, 5mg estrogen)
# dtime Months of Follow-up
# status alive vs (death + cause)
# age Age in Years
# wt Weight Index = wt(kg)-ht(cm)+200
# pf performance status
# hx History of Cardiovascular Disease
# sbp Systolic Blood Pressure/10
# dbp Diastolic Blood Pressure/10
# ekg ekg status
# hg Serum Hemoglobin (g/100ml)
# sz Size of Primary Tumor (cm^2)
# sg Combined Index of Stage and Hist. Grade
# ap Serum Prostatic Acid Phosphatase
# bm Bone Metastases
# sdate Date on study
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Variable Levels
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# rx
# placebo
# 0.2 mg estrogen
# 1.0 mg estrogen
# 5.0 mg estrogen
# ~~~~~~~~~~~~~~~
# status
# alive
# dead - prostatic ca
# dead - heart or vascular
# dead - cerebrovascular
# dead - pulmonary embolus
# dead - other ca
# dead - respiratory disease
# dead - other specific non-ca
# dead - unspecified non-ca
# dead - unknown cause
# ~~~~~~~~~~~~~~~
# pf
# normal activity
# in bed < 50% daytime
# in bed > 50% daytime
# confined to bed
# ~~~~~~~~~~~~~~~
# ekg
# normal
# benign
# rhythmic disturb & electrolyte ch
# heart block or conduction def
# heart strain
# old MI
# recent MI
# ~~~~~~~~~~~~~~~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Download and load the Prostate Data dataset:
download.file("http://www.duke.edu/~sgrambow/crp241data/prostate.RData",
destfile = "prostate.RData",quiet=TRUE,mode="wb",cacheOK=FALSE)
load("prostate.RData")
# Load Libraries
library(survival) # for KM and cox analyses
library(survminer) # for Plotting nice KM plots
## Warning: package 'survminer' was built under R version 3.5.2
## Loading required package: ggplot2
## Loading required package: ggpubr
## Loading required package: magrittr
# Basic Analysis
# Create survival object for survial analysis
# dead is alive vs dead
# dtime is Months of Follow-up
# creating t_death survival object
prostate <- within(prostate, {
dead <- NA
dead[status != 'alive'] <- 1
dead[status == 'alive'] <- 0})
# check descriptive statistics
summary(prostate$dead)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 1.0000 0.7052 1.0000 1.0000
summary(prostate$dtime)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 14.25 34.00 36.13 57.75 76.00
table(prostate$dead)
##
## 0 1
## 148 354
summary(prostate$rx)
## placebo 0.2 mg estrogen 1.0 mg estrogen 5.0 mg estrogen
## 127 124 126 125
# create survival object
prostate$t_death <- Surv(prostate$dtime,prostate$dead)
# Standard Kaplan-Meier analysis
fit <- survfit(t_death ~ rx, data=prostate)
fit # generates median and 95% CI for median
## Call: survfit(formula = t_death ~ rx, data = prostate)
##
## n events median 0.95LCL 0.95UCL
## rx=placebo 127 95 33.0 27 40
## rx=0.2 mg estrogen 124 95 30.0 26 41
## rx=1.0 mg estrogen 126 71 41.5 31 NA
## rx=5.0 mg estrogen 125 93 35.0 28 46
# Compare survival curves by treatment arm using log-rank test
survdiff(prostate$t_death ~ rx, data=prostate)
## Call:
## survdiff(formula = prostate$t_death ~ rx, data = prostate)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## rx=placebo 127 95 87.6 0.619 0.839
## rx=0.2 mg estrogen 124 95 84.9 1.212 1.626
## rx=1.0 mg estrogen 126 71 95.9 6.479 9.072
## rx=5.0 mg estrogen 125 93 85.6 0.644 0.867
##
## Chisq= 9.1 on 3 degrees of freedom, p= 0.03
# Create Kaplan Meier plot of survival curve
# KM by trt group
ggsurvplot(fit,
pval = TRUE, conf.int = FALSE, # add log-rank p-value & CIs
risk.table = TRUE, # Add risk table
risk.table.col = "strata", # Change risk table color by groups
risk.table.height = 0.35,
linetype = c("solid"), # Change line type by groups
surv.median.line = "hv", # Specify median survival
ggtheme = theme_bw(base_size = 10), # Change ggplot2 theme
palette = c('red','blue','black','green') # colors used in plot
)

# Compare survival curves by treatment arm using Cox PH regression
fit_trt <- coxph(t_death ~ rx,data=prostate)
summary(fit_trt)
## Call:
## coxph(formula = t_death ~ rx, data = prostate)
##
## n= 502, number of events= 354
##
## coef exp(coef) se(coef) z Pr(>|z|)
## rx0.2 mg estrogen 0.032124 1.032645 0.145119 0.221 0.825
## rx1.0 mg estrogen -0.385517 0.680099 0.156960 -2.456 0.014 *
## rx5.0 mg estrogen 0.002912 1.002916 0.145937 0.020 0.984
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## rx0.2 mg estrogen 1.0326 0.9684 0.7770 1.3724
## rx1.0 mg estrogen 0.6801 1.4704 0.5000 0.9251
## rx5.0 mg estrogen 1.0029 0.9971 0.7534 1.3350
##
## Concordance= 0.531 (se = 0.016 )
## Rsquare= 0.019 (max possible= 1 )
## Likelihood ratio test= 9.71 on 3 df, p=0.02
## Wald test = 9.01 on 3 df, p=0.03
## Score (logrank) test = 9.13 on 3 df, p=0.03
# End of Program