Clinical Trial Data Analysis

dat <- read.csv("dat.csv", header = TRUE)
# load the R library
library(survival)
# fit Kaplan--Meier
fit.km = survfit(Surv(time,status==0)~trt, type=c("kaplan-meier"),dat)
# print the model fitting
fit.km

## Call: survfit(formula = Surv(time, status == 0) ~ trt, data = dat, 
##     type = c("kaplan-meier"))
## 
##            n events median 0.95LCL 0.95UCL
## trt=SCT   11      6    144     102      NA
## trt=SCTIT 10      3     NA     158      NA
## trt=SIT   10      5    192     144      NA

plot(fit.km, lty=c(1,4,8),lwd=2,xlab="Time in Weeks",ylab="S(t)")
legend("bottomleft",title="Line Types",
c("S+CT","S+CT+IT","S+IT"),lty=c(1,4,8), lwd=2)

# call "survfit" to fit the model
fit.fleming =survfit(Surv(time,status==0)~trt,dat,type='fleming')
# plot the estimated cumulative hazard function
plot(fit.fleming,lty=c(1,4,8),lwd=2,fun="cumhaz", xlab="Time in Weeks", ylab="Cumulative Hazard")
legend("topleft",title="Line Types",
c("S+CT","S+CT+IT","S+IT"),lty=c(1,4,8),lwd=2)

# use "survdiff" to test difference
fit.diff = survdiff(Surv(time, status==0)~trt,data=dat)
fit.diff

## Call:
## survdiff(formula = Surv(time, status == 0) ~ trt, data = dat)
## 
##            N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=SCT   11        6     3.64   1.52842   2.12654
## trt=SCTIT 10        3     5.19   0.92444   1.51837
## trt=SIT   10        5     5.17   0.00549   0.00887
## 
##  Chisq= 2.5  on 2 degrees of freedom, p= 0.28

# fit exponential model
fit.exp =survreg(Surv(time, status==0)~trt,dat, dist="exponential")
summary(fit.exp)

## 
## Call:
## survreg(formula = Surv(time, status == 0) ~ trt, data = dat, 
##     dist = "exponential")
##             Value Std. Error      z        p
## (Intercept) 5.449      0.408 13.347 1.23e-40
## trtSCTIT    0.919      0.707  1.300 1.94e-01
## trtSIT      0.401      0.606  0.662 5.08e-01
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -95   Loglik(intercept only)= -96
##  Chisq= 1.81 on 2 degrees of freedom, p= 0.4 
## Number of Newton-Raphson Iterations: 4 
## n= 31

# fit Weibull model
fit.Weibull =survreg(Surv(time, status==0)~trt,dat)
summary(fit.Weibull)

## 
## Call:
## survreg(formula = Surv(time, status == 0) ~ trt, data = dat)
##              Value Std. Error      z         p
## (Intercept)  5.263      0.221 23.842 1.23e-125
## trtSCTIT     0.611      0.388  1.574  1.16e-01
## trtSIT       0.293      0.326  0.901  3.68e-01
## Log(scale)  -0.627      0.234 -2.679  7.39e-03
## 
## Scale= 0.534 
## 
## Weibull distribution
## Loglik(model)= -92.2   Loglik(intercept only)= -93.6
##  Chisq= 2.77 on 2 degrees of freedom, p= 0.25 
## Number of Newton-Raphson Iterations: 5 
## n= 31

# fit exponential model +Age
fit.exp.age = survreg(Surv(time, status==0)~trt+age, dat,dist="exponential")
summary(fit.exp.age)

## 
## Call:
## survreg(formula = Surv(time, status == 0) ~ trt + age, data = dat, 
##     dist = "exponential")
##               Value Std. Error      z        p
## (Intercept) 11.0560     2.2253  4.968 6.76e-07
## trtSCTIT     0.7334     0.7072  1.037 3.00e-01
## trtSIT       0.2329     0.6068  0.384 7.01e-01
## age         -0.0966     0.0366 -2.636 8.38e-03
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -91   Loglik(intercept only)= -96
##  Chisq= 9.91 on 3 degrees of freedom, p= 0.019 
## Number of Newton-Raphson Iterations: 5 
## n= 31

# fit Weibull model+Age
fit.Weibull.age = survreg(Surv(time,status==0)~trt+age,dat)
summary(fit.Weibull.age)

## 
## Call:
## survreg(formula = Surv(time, status == 0) ~ trt + age, data = dat)
##               Value Std. Error      z        p
## (Intercept)  8.5885     1.3214  6.500 8.04e-11
## trtSCTIT     0.3899     0.3505  1.112 2.66e-01
## trtSIT       0.1038     0.2947  0.352 7.25e-01
## age         -0.0569     0.0217 -2.620 8.78e-03
## Log(scale)  -0.7294     0.2291 -3.184 1.45e-03
## 
## Scale= 0.482 
## 
## Weibull distribution
## Loglik(model)= -87.2   Loglik(intercept only)= -93.6
##  Chisq= 12.79 on 3 degrees of freedom, p= 0.0051 
## Number of Newton-Raphson Iterations: 7 
## n= 31

# fit Cox
fit.Cox = coxph(Surv(time, status==0)~trt,dat)
summary(fit.Cox)

## Call:
## coxph(formula = Surv(time, status == 0) ~ trt, data = dat)
## 
##   n= 31, number of events= 14 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)
## trtSCTIT -1.0855    0.3377   0.7156 -1.517    0.129
## trtSIT   -0.5482    0.5780   0.6082 -0.901    0.367
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## trtSCTIT    0.3377      2.961   0.08308     1.373
## trtSIT      0.5780      1.730   0.17547     1.904
## 
## Concordance= 0.599  (se = 0.08 )
## Rsquare= 0.077   (max possible= 0.933 )
## Likelihood ratio test= 2.49  on 2 df,   p=0.2886
## Wald test            = 2.41  on 2 df,   p=0.2997
## Score (logrank) test = 2.57  on 2 df,   p=0.2764

# fit Cox +Age
fit.Cox.age = coxph(Surv(time, status==0)~trt+age,dat)
summary(fit.Cox.age)

## Call:
## coxph(formula = Surv(time, status == 0) ~ trt + age, data = dat)
## 
##   n= 31, number of events= 14 
## 
##              coef exp(coef) se(coef)      z Pr(>|z|)   
## trtSCTIT -0.86214   0.42226  0.71845 -1.200  0.23014   
## trtSIT   -0.29422   0.74511  0.61201 -0.481  0.63070   
## age       0.11251   1.11908  0.04125  2.728  0.00638 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## trtSCTIT    0.4223     2.3682    0.1033     1.726
## trtSIT      0.7451     1.3421    0.2245     2.473
## age         1.1191     0.8936    1.0322     1.213
## 
## Concordance= 0.769  (se = 0.086 )
## Rsquare= 0.314   (max possible= 0.933 )
## Likelihood ratio test= 11.68  on 3 df,   p=0.008546
## Wald test            = 9.13  on 3 df,   p=0.02755
## Score (logrank) test = 9.72  on 3 df,   p=0.0211

datcan <- read.csv("datcan.csv", header = TRUE)
head(datcan)

##     trt tl tu status
## 1 radio  0  7      1
## 2 radio  0  8      1
## 3 radio  0  5      1
## 4 radio  4 11      1
## 5 radio  5 12      1
## 6 radio  5 11      1

# create a new dataset "breast" and keep "dat" for future use
breast = datcan
str(breast)

## 'data.frame':    94 obs. of  4 variables:
##  $ trt   : Factor w/ 2 levels "radchemo","radio": 2 2 2 2 2 2 2 2 2 2 ...
##  $ tl    : int  0 0 0 4 5 5 6 7 7 11 ...
##  $ tu    : int  7 8 5 11 12 11 10 16 14 15 ...
##  $ status: int  1 1 1 1 1 1 1 1 1 1 ...

# replace 0's with NA as left-censored
breast$tl[breast$tl== 0]= NA
# print the first 4 observations
head(breast, n=4)

##     trt tl tu status
## 1 radio NA  7      1
## 2 radio NA  8      1
## 3 radio NA  5      1
## 4 radio  4 11      1

# fit exponential
fit.exp=survreg(Surv(tl,tu,type="interval2")~trt, breast, dist="exponential")
summary(fit.exp)

## 
## Call:
## survreg(formula = Surv(tl, tu, type = "interval2") ~ trt, data = breast, 
##     dist = "exponential")
##             Value Std. Error     z        p
## (Intercept) 3.460      0.178 19.45 3.15e-84
## trtradio    0.659      0.282  2.34 1.94e-02
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -143.9   Loglik(intercept only)= -146.7
##  Chisq= 5.62 on 1 degrees of freedom, p= 0.018 
## Number of Newton-Raphson Iterations: 4 
## n= 94

# fit Weibull
fit.Weibull=survreg(Surv(tl,tu,type="interval2")~trt, breast)
summary(fit.Weibull)

## 
## Call:
## survreg(formula = Surv(tl, tu, type = "interval2") ~ trt, data = breast)
##              Value Std. Error     z         p
## (Intercept)  3.384      0.119 28.54 3.58e-179
## trtradio     0.535      0.188  2.84  4.54e-03
## Log(scale)  -0.419      0.125 -3.35  8.21e-04
## 
## Scale= 0.658 
## 
## Weibull distribution
## Loglik(model)= -139.2   Loglik(intercept only)= -143.3
##  Chisq= 8.22 on 1 degrees of freedom, p= 0.0042 
## Number of Newton-Raphson Iterations: 5 
## n= 94

Clinical Trial Data Analysis

Kushan De Silva

January 2, 2018