dat <- read.csv("dat.csv", header = TRUE)
# load the R library
library(survival)
# fit Kaplan--Meier
fit.km = survfit(Surv(time,status==0)~trt, type=c("kaplan-meier"),dat)
# print the model fitting
fit.km
## Call: survfit(formula = Surv(time, status == 0) ~ trt, data = dat,
## type = c("kaplan-meier"))
##
## n events median 0.95LCL 0.95UCL
## trt=SCT 11 6 144 102 NA
## trt=SCTIT 10 3 NA 158 NA
## trt=SIT 10 5 192 144 NA
plot(fit.km, lty=c(1,4,8),lwd=2,xlab="Time in Weeks",ylab="S(t)")
legend("bottomleft",title="Line Types",
c("S+CT","S+CT+IT","S+IT"),lty=c(1,4,8), lwd=2)

# call "survfit" to fit the model
fit.fleming =survfit(Surv(time,status==0)~trt,dat,type='fleming')
# plot the estimated cumulative hazard function
plot(fit.fleming,lty=c(1,4,8),lwd=2,fun="cumhaz", xlab="Time in Weeks", ylab="Cumulative Hazard")
legend("topleft",title="Line Types",
c("S+CT","S+CT+IT","S+IT"),lty=c(1,4,8),lwd=2)

# use "survdiff" to test difference
fit.diff = survdiff(Surv(time, status==0)~trt,data=dat)
fit.diff
## Call:
## survdiff(formula = Surv(time, status == 0) ~ trt, data = dat)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=SCT 11 6 3.64 1.52842 2.12654
## trt=SCTIT 10 3 5.19 0.92444 1.51837
## trt=SIT 10 5 5.17 0.00549 0.00887
##
## Chisq= 2.5 on 2 degrees of freedom, p= 0.28
# fit exponential model
fit.exp =survreg(Surv(time, status==0)~trt,dat, dist="exponential")
summary(fit.exp)
##
## Call:
## survreg(formula = Surv(time, status == 0) ~ trt, data = dat,
## dist = "exponential")
## Value Std. Error z p
## (Intercept) 5.449 0.408 13.347 1.23e-40
## trtSCTIT 0.919 0.707 1.300 1.94e-01
## trtSIT 0.401 0.606 0.662 5.08e-01
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -95 Loglik(intercept only)= -96
## Chisq= 1.81 on 2 degrees of freedom, p= 0.4
## Number of Newton-Raphson Iterations: 4
## n= 31
# fit Weibull model
fit.Weibull =survreg(Surv(time, status==0)~trt,dat)
summary(fit.Weibull)
##
## Call:
## survreg(formula = Surv(time, status == 0) ~ trt, data = dat)
## Value Std. Error z p
## (Intercept) 5.263 0.221 23.842 1.23e-125
## trtSCTIT 0.611 0.388 1.574 1.16e-01
## trtSIT 0.293 0.326 0.901 3.68e-01
## Log(scale) -0.627 0.234 -2.679 7.39e-03
##
## Scale= 0.534
##
## Weibull distribution
## Loglik(model)= -92.2 Loglik(intercept only)= -93.6
## Chisq= 2.77 on 2 degrees of freedom, p= 0.25
## Number of Newton-Raphson Iterations: 5
## n= 31
# fit exponential model +Age
fit.exp.age = survreg(Surv(time, status==0)~trt+age, dat,dist="exponential")
summary(fit.exp.age)
##
## Call:
## survreg(formula = Surv(time, status == 0) ~ trt + age, data = dat,
## dist = "exponential")
## Value Std. Error z p
## (Intercept) 11.0560 2.2253 4.968 6.76e-07
## trtSCTIT 0.7334 0.7072 1.037 3.00e-01
## trtSIT 0.2329 0.6068 0.384 7.01e-01
## age -0.0966 0.0366 -2.636 8.38e-03
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -91 Loglik(intercept only)= -96
## Chisq= 9.91 on 3 degrees of freedom, p= 0.019
## Number of Newton-Raphson Iterations: 5
## n= 31
# fit Weibull model+Age
fit.Weibull.age = survreg(Surv(time,status==0)~trt+age,dat)
summary(fit.Weibull.age)
##
## Call:
## survreg(formula = Surv(time, status == 0) ~ trt + age, data = dat)
## Value Std. Error z p
## (Intercept) 8.5885 1.3214 6.500 8.04e-11
## trtSCTIT 0.3899 0.3505 1.112 2.66e-01
## trtSIT 0.1038 0.2947 0.352 7.25e-01
## age -0.0569 0.0217 -2.620 8.78e-03
## Log(scale) -0.7294 0.2291 -3.184 1.45e-03
##
## Scale= 0.482
##
## Weibull distribution
## Loglik(model)= -87.2 Loglik(intercept only)= -93.6
## Chisq= 12.79 on 3 degrees of freedom, p= 0.0051
## Number of Newton-Raphson Iterations: 7
## n= 31
# fit Cox
fit.Cox = coxph(Surv(time, status==0)~trt,dat)
summary(fit.Cox)
## Call:
## coxph(formula = Surv(time, status == 0) ~ trt, data = dat)
##
## n= 31, number of events= 14
##
## coef exp(coef) se(coef) z Pr(>|z|)
## trtSCTIT -1.0855 0.3377 0.7156 -1.517 0.129
## trtSIT -0.5482 0.5780 0.6082 -0.901 0.367
##
## exp(coef) exp(-coef) lower .95 upper .95
## trtSCTIT 0.3377 2.961 0.08308 1.373
## trtSIT 0.5780 1.730 0.17547 1.904
##
## Concordance= 0.599 (se = 0.08 )
## Rsquare= 0.077 (max possible= 0.933 )
## Likelihood ratio test= 2.49 on 2 df, p=0.2886
## Wald test = 2.41 on 2 df, p=0.2997
## Score (logrank) test = 2.57 on 2 df, p=0.2764
# fit Cox +Age
fit.Cox.age = coxph(Surv(time, status==0)~trt+age,dat)
summary(fit.Cox.age)
## Call:
## coxph(formula = Surv(time, status == 0) ~ trt + age, data = dat)
##
## n= 31, number of events= 14
##
## coef exp(coef) se(coef) z Pr(>|z|)
## trtSCTIT -0.86214 0.42226 0.71845 -1.200 0.23014
## trtSIT -0.29422 0.74511 0.61201 -0.481 0.63070
## age 0.11251 1.11908 0.04125 2.728 0.00638 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## trtSCTIT 0.4223 2.3682 0.1033 1.726
## trtSIT 0.7451 1.3421 0.2245 2.473
## age 1.1191 0.8936 1.0322 1.213
##
## Concordance= 0.769 (se = 0.086 )
## Rsquare= 0.314 (max possible= 0.933 )
## Likelihood ratio test= 11.68 on 3 df, p=0.008546
## Wald test = 9.13 on 3 df, p=0.02755
## Score (logrank) test = 9.72 on 3 df, p=0.0211
datcan <- read.csv("datcan.csv", header = TRUE)
head(datcan)
## trt tl tu status
## 1 radio 0 7 1
## 2 radio 0 8 1
## 3 radio 0 5 1
## 4 radio 4 11 1
## 5 radio 5 12 1
## 6 radio 5 11 1
# create a new dataset "breast" and keep "dat" for future use
breast = datcan
str(breast)
## 'data.frame': 94 obs. of 4 variables:
## $ trt : Factor w/ 2 levels "radchemo","radio": 2 2 2 2 2 2 2 2 2 2 ...
## $ tl : int 0 0 0 4 5 5 6 7 7 11 ...
## $ tu : int 7 8 5 11 12 11 10 16 14 15 ...
## $ status: int 1 1 1 1 1 1 1 1 1 1 ...
# replace 0's with NA as left-censored
breast$tl[breast$tl== 0]= NA
# print the first 4 observations
head(breast, n=4)
## trt tl tu status
## 1 radio NA 7 1
## 2 radio NA 8 1
## 3 radio NA 5 1
## 4 radio 4 11 1
# fit exponential
fit.exp=survreg(Surv(tl,tu,type="interval2")~trt, breast, dist="exponential")
summary(fit.exp)
##
## Call:
## survreg(formula = Surv(tl, tu, type = "interval2") ~ trt, data = breast,
## dist = "exponential")
## Value Std. Error z p
## (Intercept) 3.460 0.178 19.45 3.15e-84
## trtradio 0.659 0.282 2.34 1.94e-02
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -143.9 Loglik(intercept only)= -146.7
## Chisq= 5.62 on 1 degrees of freedom, p= 0.018
## Number of Newton-Raphson Iterations: 4
## n= 94
# fit Weibull
fit.Weibull=survreg(Surv(tl,tu,type="interval2")~trt, breast)
summary(fit.Weibull)
##
## Call:
## survreg(formula = Surv(tl, tu, type = "interval2") ~ trt, data = breast)
## Value Std. Error z p
## (Intercept) 3.384 0.119 28.54 3.58e-179
## trtradio 0.535 0.188 2.84 4.54e-03
## Log(scale) -0.419 0.125 -3.35 8.21e-04
##
## Scale= 0.658
##
## Weibull distribution
## Loglik(model)= -139.2 Loglik(intercept only)= -143.3
## Chisq= 8.22 on 1 degrees of freedom, p= 0.0042
## Number of Newton-Raphson Iterations: 5
## n= 94