Install and load survival package.
Install.packages('survival')
library(survival)
Install package for fitting probability distributions. This will be used when doing parametric survival regression modelling.
install.packages('fitdistrplus')
library(fitdistrplus)
Surv() #Create survival object
survfit() #Create Kaplan Meier Survival prob and plot
survdiff() #Log-rank test
coxph() #Fit Cox Regression Model
survreg() #Fit Parametric Regression Model
motion data: Subjects were subjected to motion in a cubical cabin, time to Frank Emesis was the outcome of interest. Interest was in comparing subjects who were exposed to stronger motion to those who were not.
flies data: Time to death of the flies according to whether they received a treatment or not.
Load motion data
motiondata<-read.csv('motiondata.csv')
Create survival object with times as the continuous outcome and censored observations defined as censored. The term !motiondata$censored indicates whether the individual has had the event. ! negates a logical statement. If censored is 1 !censored will be 0.
motiondata$event<-!motiondata$censored
motiondata$survmotion<-Surv(motiondata$times, motiondata$event)
Kaplan Meier survival probabilities for intercept only (no groups)
KM1<-survfit(survmotion~1,data=motiondata)
summary(KM1)
## Call: survfit(formula = survmotion ~ 1, data = motiondata)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 5 49 1 0.980 0.0202 0.941 1.000
## 11 47 2 0.938 0.0347 0.872 1.000
## 13 45 1 0.917 0.0397 0.842 0.998
## 24 44 1 0.896 0.0439 0.814 0.987
## 30 43 1 0.875 0.0476 0.787 0.974
## 50 42 1 0.855 0.0508 0.760 0.960
## 51 40 1 0.833 0.0539 0.734 0.946
## 63 39 1 0.812 0.0566 0.708 0.931
## 65 38 1 0.790 0.0590 0.683 0.915
## 69 36 2 0.747 0.0633 0.632 0.882
## 79 34 1 0.725 0.0652 0.607 0.864
## 82 33 3 0.659 0.0695 0.536 0.810
## 92 30 1 0.637 0.0705 0.512 0.791
## 102 29 1 0.615 0.0714 0.490 0.772
## 115 28 1 0.593 0.0722 0.467 0.753
Kaplan Meier survival probabilities for different motion types
KM2<-survfit(survmotion~strongermotion,data=motiondata)
summary(KM2)
## Call: survfit(formula = survmotion ~ strongermotion, data = motiondata)
##
## strongermotion=0
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 30 21 1 0.952 0.0465 0.866 1.000
## 50 20 1 0.905 0.0641 0.788 1.000
## 51 18 1 0.854 0.0778 0.715 1.000
## 82 16 1 0.801 0.0894 0.644 0.997
## 92 15 1 0.748 0.0981 0.578 0.967
##
## strongermotion=1
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 5 28 1 0.964 0.0351 0.898 1.000
## 11 26 2 0.890 0.0599 0.780 1.000
## 13 24 1 0.853 0.0679 0.730 0.997
## 24 23 1 0.816 0.0744 0.682 0.976
## 63 22 1 0.779 0.0797 0.637 0.952
## 65 21 1 0.742 0.0841 0.594 0.926
## 69 20 2 0.668 0.0906 0.512 0.871
## 79 18 1 0.630 0.0928 0.472 0.841
## 82 17 2 0.556 0.0956 0.397 0.779
## 102 15 1 0.519 0.0961 0.361 0.746
## 115 14 1 0.482 0.0962 0.326 0.713
Plot the survival probabilities
plot(KM1,conf.int=F,lty=1)
Plot survival probabilities by type of motion
plot(KM2,conf.int=T,lty=2:3,yscale=100)
#Plot legend with position at x=60, y=0.4
legend(60, .4, c("Normal Motion", "Stronger Motion"), lty = 2:3)
Log rank test to compare the survival probabilities by type of motion.
survdiff(survmotion~strongermotion,data=motiondata)
## Call:
## survdiff(formula = survmotion ~ strongermotion, data = motiondata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## strongermotion=0 21 5 8.86 1.68 3.21
## strongermotion=1 28 14 10.14 1.47 3.21
##
## Chisq= 3.2 on 1 degrees of freedom, p= 0.07
CR1<-coxph(survmotion~strongermotion,data=motiondata)
summary(CR1)
## Call:
## coxph(formula = survmotion ~ strongermotion, data = motiondata)
##
## n= 49, number of events= 19
##
## coef exp(coef) se(coef) z Pr(>|z|)
## strongermotion 0.9042 2.4699 0.5214 1.734 0.0829 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## strongermotion 2.47 0.4049 0.889 6.862
##
## Concordance= 0.607 (se = 0.054 )
## Likelihood ratio test= 3.38 on 1 df, p=0.07
## Wald test = 3.01 on 1 df, p=0.08
## Score (logrank) test = 3.22 on 1 df, p=0.07
BIC(CR1)
## [1] 137.1908
Exponential distribution
PR1<-survreg(survmotion~strongermotion,data=motiondata,dist = 'exponential')
summary(PR1)
##
## Call:
## survreg(formula = survmotion ~ strongermotion, data = motiondata,
## dist = "exponential")
## Value Std. Error z p
## (Intercept) 6.041 0.447 13.51 <2e-16
## strongermotion -0.915 0.521 -1.76 0.079
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -121 Loglik(intercept only)= -122.7
## Chisq= 3.47 on 1 degrees of freedom, p= 0.062
## Number of Newton-Raphson Iterations: 5
## n= 49
BIC(PR1)
## [1] 249.7095
Weibull distribution
PR2<-survreg(survmotion~strongermotion,data=motiondata,dist = 'weibull')
summary(PR2)
##
## Call:
## survreg(formula = survmotion ~ strongermotion, data = motiondata,
## dist = "weibull")
## Value Std. Error z p
## (Intercept) 5.861 0.464 12.64 <2e-16
## strongermotion -0.801 0.481 -1.67 0.095
## Log(scale) -0.140 0.213 -0.66 0.510
##
## Scale= 0.869
##
## Weibull distribution
## Loglik(model)= -120.8 Loglik(intercept only)= -122.5
## Chisq= 3.53 on 1 degrees of freedom, p= 0.06
## Number of Newton-Raphson Iterations: 5
## n= 49
BIC(PR2)
## [1] 253.1873
Lognormal distribution
PR3<-survreg(survmotion~strongermotion,data=motiondata,dist = 'lognormal')
summary(PR3)
##
## Call:
## survreg(formula = survmotion ~ strongermotion, data = motiondata,
## dist = "lognormal")
## Value Std. Error z p
## (Intercept) 5.763 0.467 12.33 <2e-16
## strongermotion -1.021 0.512 -1.99 0.046
## Log(scale) 0.332 0.180 1.85 0.065
##
## Scale= 1.39
##
## Log Normal distribution
## Loglik(model)= -120.6 Loglik(intercept only)= -122.7
## Chisq= 4.24 on 1 degrees of freedom, p= 0.039
## Number of Newton-Raphson Iterations: 3
## n= 49
BIC(PR3)
## [1] 252.822
Comparing the BICs
data.frame("Cox"=BIC(CR1), "Exponential"=BIC(PR1),"Weibull"=BIC(PR2), "Lognormal"=BIC(PR3))
Plot of the different Parametric distributions and how they fit the outcome data
plot.legend <- c("Weibull", "lognormal", "exponential")
fw<-fitdist(motiondata$times,"weibull")
fl<-fitdist(motiondata$times,"lnorm")
fe<-fitdist(motiondata$times,"exp")
denscomp(list(fw,fl,fe), legendtext = plot.legend)
Load flies data
fliesdata<-read.csv('fliesdata.csv')
Plot outcome
hist(fliesdata$daysfup)
Create survival object
#Create survival object
survflies<-Surv(fliesdata$daysfup, fliesdata$death)
Cox Regression
CR1flies<-coxph(survflies~Treated,data=fliesdata)
summary(CR1flies)
## Call:
## coxph(formula = survflies ~ Treated, data = fliesdata)
##
## n= 1078, number of events= 1050
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Treated -0.26023 0.77088 0.06209 -4.191 2.78e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Treated 0.7709 1.297 0.6825 0.8706
##
## Concordance= 0.538 (se = 0.009 )
## Likelihood ratio test= 17.55 on 1 df, p=3e-05
## Wald test = 17.56 on 1 df, p=3e-05
## Score (logrank) test = 17.66 on 1 df, p=3e-05
BIC(CR1flies)
## [1] 12528.35
Exponential distribution
PR1<-survreg(survflies~Treated,data=fliesdata,dist = 'exponential')
summary(PR1)
##
## Call:
## survreg(formula = survflies ~ Treated, data = fliesdata, dist = "exponential")
## Value Std. Error z p
## (Intercept) 4.1416 0.0433 95.71 <2e-16
## Treated 0.0643 0.0617 1.04 0.3
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -5431.9 Loglik(intercept only)= -5432.4
## Chisq= 1.09 on 1 degrees of freedom, p= 0.3
## Number of Newton-Raphson Iterations: 3
## n= 1078
BIC(PR1)
## [1] 10877.68
Weibull distribution
PR2<-survreg(survflies~Treated,data=fliesdata,dist = 'weibull')
summary(PR2)
##
## Call:
## survreg(formula = survflies ~ Treated, data = fliesdata, dist = "weibull")
## Value Std. Error z p
## (Intercept) 4.2219 0.0103 408.55 < 2e-16
## Treated 0.0566 0.0144 3.93 8.5e-05
## Log(scale) -1.4564 0.0248 -58.83 < 2e-16
##
## Scale= 0.233
##
## Weibull distribution
## Loglik(model)= -4490.8 Loglik(intercept only)= -4498.5
## Chisq= 15.34 on 1 degrees of freedom, p= 9e-05
## Number of Newton-Raphson Iterations: 7
## n= 1078
BIC(PR2)
## [1] 9002.637
Lognormal distribution
PR3<-survreg(survflies~Treated,data=fliesdata,dist = 'lognormal')
summary(PR3)
##
## Call:
## survreg(formula = survflies ~ Treated, data = fliesdata, dist = "lognormal")
## Value Std. Error z p
## (Intercept) 4.0777 0.0161 253.66 <2e-16
## Treated 0.0599 0.0229 2.61 0.009
## Log(scale) -0.9852 0.0218 -45.22 <2e-16
##
## Scale= 0.373
##
## Log Normal distribution
## Loglik(model)= -4774.3 Loglik(intercept only)= -4777.7
## Chisq= 6.81 on 1 degrees of freedom, p= 0.0091
## Number of Newton-Raphson Iterations: 3
## n= 1078
BIC(PR3)
## [1] 9569.579
Compare BICs
data.frame("Cox"=BIC(CR1flies), "Exponential"=BIC(PR1),"Weibull"=BIC(PR2), "Lognormal"=BIC(PR3))
Plot the outcome with the different distributions fitted
plot.legend <- c("Weibull", "lognormal", "exponential")
fw<-fitdist(fliesdata$daysfup,"weibull")
fl<-fitdist(fliesdata$daysfup,"lnorm")
fe<-fitdist(fliesdata$daysfup,"exp")
denscomp(list(fw,fl,fe), legendtext = plot.legend)