# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 245 | Final Exam Spring 2019 ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# Prostate Data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Description: 
# These data were from a randomized trial 
# comparing four treatments for stage 3 and 4 
# prostate cancer, with almost equal numbers of 
# patients on placebo and each of three doses 
# of estrogen.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Data Set: prostate 
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Data Dictionary: 
# Name  Labels
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# patno     Patient Number
# stage     Stage
# rx        Trt Grp (Placebo,0.2mg estrogen, 1mg estrogen, 5mg estrogen)
# dtime     Months of Follow-up
# status    alive vs (death + cause)    
# age         Age in Years
# wt          Weight Index = wt(kg)-ht(cm)+200
# pf          performance status
# hx          History of Cardiovascular Disease
# sbp         Systolic Blood Pressure/10
# dbp         Diastolic Blood Pressure/10
# ekg         ekg status
# hg          Serum Hemoglobin (g/100ml)
# sz          Size of Primary Tumor (cm^2)
# sg          Combined Index of Stage and Hist. Grade
# ap          Serum Prostatic Acid Phosphatase
# bm          Bone Metastases
# sdate     Date on study
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Variable Levels
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# rx    
# placebo
# 0.2 mg estrogen
# 1.0 mg estrogen
# 5.0 mg estrogen
# ~~~~~~~~~~~~~~~
# status    
# alive
# dead - prostatic ca
# dead - heart or vascular
# dead - cerebrovascular
# dead - pulmonary embolus
# dead - other ca
# dead - respiratory disease
# dead - other specific non-ca
# dead - unspecified non-ca
# dead - unknown cause
# ~~~~~~~~~~~~~~~
# pf    
# normal activity
# in bed < 50% daytime
# in bed > 50% daytime
# confined to bed
# ~~~~~~~~~~~~~~~
# ekg   
# normal
# benign
# rhythmic disturb & electrolyte ch
# heart block or conduction def
# heart strain
# old MI
# recent MI
# ~~~~~~~~~~~~~~~

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Download and load the Prostate Data dataset: 
download.file("http://www.duke.edu/~sgrambow/crp241data/prostate.RData",
              destfile = "prostate.RData",quiet=TRUE,mode="wb",cacheOK=FALSE)
load("prostate.RData")
# Load Libraries
library(survival) # for KM and cox analyses
library(survminer) # for Plotting nice KM plots
## Warning: package 'survminer' was built under R version 3.5.2
## Loading required package: ggplot2
## Loading required package: ggpubr
## Loading required package: magrittr
# Basic Analysis

# Create survival object for survial analysis
# dead is alive vs dead
# dtime is Months of Follow-up
# creating t_death survival object
prostate <- within(prostate, {
  dead <- NA
  dead[status != 'alive'] <- 1 
  dead[status == 'alive'] <- 0})

# check descriptive statistics
summary(prostate$dead)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  1.0000  0.7052  1.0000  1.0000
summary(prostate$dtime)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   14.25   34.00   36.13   57.75   76.00
table(prostate$dead)
## 
##   0   1 
## 148 354
summary(prostate$rx)
##         placebo 0.2 mg estrogen 1.0 mg estrogen 5.0 mg estrogen 
##             127             124             126             125
# create survival object
prostate$t_death <- Surv(prostate$dtime,prostate$dead)

# Standard Kaplan-Meier analysis
fit <- survfit(t_death ~ rx, data=prostate)
fit # generates median and 95% CI for median
## Call: survfit(formula = t_death ~ rx, data = prostate)
## 
##                      n events median 0.95LCL 0.95UCL
## rx=placebo         127     95   33.0      27      40
## rx=0.2 mg estrogen 124     95   30.0      26      41
## rx=1.0 mg estrogen 126     71   41.5      31      NA
## rx=5.0 mg estrogen 125     93   35.0      28      46
# Compare survival curves by treatment arm using log-rank test
survdiff(prostate$t_death ~ rx, data=prostate)
## Call:
## survdiff(formula = prostate$t_death ~ rx, data = prostate)
## 
##                      N Observed Expected (O-E)^2/E (O-E)^2/V
## rx=placebo         127       95     87.6     0.619     0.839
## rx=0.2 mg estrogen 124       95     84.9     1.212     1.626
## rx=1.0 mg estrogen 126       71     95.9     6.479     9.072
## rx=5.0 mg estrogen 125       93     85.6     0.644     0.867
## 
##  Chisq= 9.1  on 3 degrees of freedom, p= 0.03
# Create Kaplan Meier plot of survival curve 
# KM by trt group
ggsurvplot(fit,
           pval = TRUE, conf.int = FALSE, # add log-rank p-value & CIs
           risk.table = TRUE, # Add risk table
           risk.table.col = "strata", # Change risk table color by groups
           risk.table.height = 0.35,
           linetype = c("solid"), # Change line type by groups
           surv.median.line = "hv", # Specify median survival
           ggtheme = theme_bw(base_size = 10), # Change ggplot2 theme
           palette = c('red','blue','black','green') # colors used in plot
           ) 

# Compare survival curves by treatment arm using Cox PH regression 
fit_trt <- coxph(t_death ~ rx,data=prostate)
summary(fit_trt)
## Call:
## coxph(formula = t_death ~ rx, data = prostate)
## 
##   n= 502, number of events= 354 
## 
##                        coef exp(coef)  se(coef)      z Pr(>|z|)  
## rx0.2 mg estrogen  0.032124  1.032645  0.145119  0.221    0.825  
## rx1.0 mg estrogen -0.385517  0.680099  0.156960 -2.456    0.014 *
## rx5.0 mg estrogen  0.002912  1.002916  0.145937  0.020    0.984  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## rx0.2 mg estrogen    1.0326     0.9684    0.7770    1.3724
## rx1.0 mg estrogen    0.6801     1.4704    0.5000    0.9251
## rx5.0 mg estrogen    1.0029     0.9971    0.7534    1.3350
## 
## Concordance= 0.531  (se = 0.016 )
## Rsquare= 0.019   (max possible= 1 )
## Likelihood ratio test= 9.71  on 3 df,   p=0.02
## Wald test            = 9.01  on 3 df,   p=0.03
## Score (logrank) test = 9.13  on 3 df,   p=0.03
# End of Program