1 Install Packages

Install and load survival package.

Install.packages('survival')
library(survival)

Install package for fitting probability distributions. This will be used when doing parametric survival regression modelling.

install.packages('fitdistrplus')
library(fitdistrplus)

2 Some Important Functions that will be used

Surv()   #Create survival object

survfit()   #Create Kaplan Meier Survival prob and plot

survdiff() #Log-rank test

coxph()  #Fit Cox Regression Model

survreg() #Fit Parametric Regression Model

3 Datasets

motion data: Subjects were subjected to motion in a cubical cabin, time to Frank Emesis was the outcome of interest. Interest was in comparing subjects who were exposed to stronger motion to those who were not.

flies data: Time to death of the flies according to whether they received a treatment or not.

4 Explore Motion data

Load motion data

motiondata<-read.csv('motiondata.csv')

Create survival object with times as the continuous outcome and censored observations defined as censored. The term !motiondata$censored indicates whether the individual has had the event. ! negates a logical statement. If censored is 1 !censored will be 0.

motiondata$event<-!motiondata$censored
  
motiondata$survmotion<-Surv(motiondata$times, motiondata$event)

5 Kaplan Meier Survival probabilities and plot

Kaplan Meier survival probabilities for intercept only (no groups)

KM1<-survfit(survmotion~1,data=motiondata)
summary(KM1)
## Call: survfit(formula = survmotion ~ 1, data = motiondata)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     5     49       1    0.980  0.0202        0.941        1.000
##    11     47       2    0.938  0.0347        0.872        1.000
##    13     45       1    0.917  0.0397        0.842        0.998
##    24     44       1    0.896  0.0439        0.814        0.987
##    30     43       1    0.875  0.0476        0.787        0.974
##    50     42       1    0.855  0.0508        0.760        0.960
##    51     40       1    0.833  0.0539        0.734        0.946
##    63     39       1    0.812  0.0566        0.708        0.931
##    65     38       1    0.790  0.0590        0.683        0.915
##    69     36       2    0.747  0.0633        0.632        0.882
##    79     34       1    0.725  0.0652        0.607        0.864
##    82     33       3    0.659  0.0695        0.536        0.810
##    92     30       1    0.637  0.0705        0.512        0.791
##   102     29       1    0.615  0.0714        0.490        0.772
##   115     28       1    0.593  0.0722        0.467        0.753

Kaplan Meier survival probabilities for different motion types

KM2<-survfit(survmotion~strongermotion,data=motiondata)
summary(KM2)
## Call: survfit(formula = survmotion ~ strongermotion, data = motiondata)
## 
##                 strongermotion=0 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    30     21       1    0.952  0.0465        0.866        1.000
##    50     20       1    0.905  0.0641        0.788        1.000
##    51     18       1    0.854  0.0778        0.715        1.000
##    82     16       1    0.801  0.0894        0.644        0.997
##    92     15       1    0.748  0.0981        0.578        0.967
## 
##                 strongermotion=1 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     5     28       1    0.964  0.0351        0.898        1.000
##    11     26       2    0.890  0.0599        0.780        1.000
##    13     24       1    0.853  0.0679        0.730        0.997
##    24     23       1    0.816  0.0744        0.682        0.976
##    63     22       1    0.779  0.0797        0.637        0.952
##    65     21       1    0.742  0.0841        0.594        0.926
##    69     20       2    0.668  0.0906        0.512        0.871
##    79     18       1    0.630  0.0928        0.472        0.841
##    82     17       2    0.556  0.0956        0.397        0.779
##   102     15       1    0.519  0.0961        0.361        0.746
##   115     14       1    0.482  0.0962        0.326        0.713

Plot the survival probabilities

plot(KM1,conf.int=F,lty=1)

Plot survival probabilities by type of motion

plot(KM2,conf.int=T,lty=2:3,yscale=100)
#Plot legend with position at x=60, y=0.4
legend(60, .4, c("Normal Motion", "Stronger Motion"), lty = 2:3) 

6 Log-Rank Test

Log rank test to compare the survival probabilities by type of motion.

survdiff(survmotion~strongermotion,data=motiondata)
## Call:
## survdiff(formula = survmotion ~ strongermotion, data = motiondata)
## 
##                   N Observed Expected (O-E)^2/E (O-E)^2/V
## strongermotion=0 21        5     8.86      1.68      3.21
## strongermotion=1 28       14    10.14      1.47      3.21
## 
##  Chisq= 3.2  on 1 degrees of freedom, p= 0.07

7 Cox Regression

CR1<-coxph(survmotion~strongermotion,data=motiondata)
summary(CR1)
## Call:
## coxph(formula = survmotion ~ strongermotion, data = motiondata)
## 
##   n= 49, number of events= 19 
## 
##                  coef exp(coef) se(coef)     z Pr(>|z|)  
## strongermotion 0.9042    2.4699   0.5214 1.734   0.0829 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                exp(coef) exp(-coef) lower .95 upper .95
## strongermotion      2.47     0.4049     0.889     6.862
## 
## Concordance= 0.607  (se = 0.054 )
## Likelihood ratio test= 3.38  on 1 df,   p=0.07
## Wald test            = 3.01  on 1 df,   p=0.08
## Score (logrank) test = 3.22  on 1 df,   p=0.07
BIC(CR1)
## [1] 137.1908

8 Parametric Regresion Models

Exponential distribution

PR1<-survreg(survmotion~strongermotion,data=motiondata,dist = 'exponential')

summary(PR1)
## 
## Call:
## survreg(formula = survmotion ~ strongermotion, data = motiondata, 
##     dist = "exponential")
##                 Value Std. Error     z      p
## (Intercept)     6.041      0.447 13.51 <2e-16
## strongermotion -0.915      0.521 -1.76  0.079
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -121   Loglik(intercept only)= -122.7
##  Chisq= 3.47 on 1 degrees of freedom, p= 0.062 
## Number of Newton-Raphson Iterations: 5 
## n= 49
BIC(PR1)
## [1] 249.7095

Weibull distribution

PR2<-survreg(survmotion~strongermotion,data=motiondata,dist = 'weibull')

summary(PR2)
## 
## Call:
## survreg(formula = survmotion ~ strongermotion, data = motiondata, 
##     dist = "weibull")
##                 Value Std. Error     z      p
## (Intercept)     5.861      0.464 12.64 <2e-16
## strongermotion -0.801      0.481 -1.67  0.095
## Log(scale)     -0.140      0.213 -0.66  0.510
## 
## Scale= 0.869 
## 
## Weibull distribution
## Loglik(model)= -120.8   Loglik(intercept only)= -122.5
##  Chisq= 3.53 on 1 degrees of freedom, p= 0.06 
## Number of Newton-Raphson Iterations: 5 
## n= 49
BIC(PR2)
## [1] 253.1873

Lognormal distribution

PR3<-survreg(survmotion~strongermotion,data=motiondata,dist = 'lognormal')

summary(PR3)
## 
## Call:
## survreg(formula = survmotion ~ strongermotion, data = motiondata, 
##     dist = "lognormal")
##                 Value Std. Error     z      p
## (Intercept)     5.763      0.467 12.33 <2e-16
## strongermotion -1.021      0.512 -1.99  0.046
## Log(scale)      0.332      0.180  1.85  0.065
## 
## Scale= 1.39 
## 
## Log Normal distribution
## Loglik(model)= -120.6   Loglik(intercept only)= -122.7
##  Chisq= 4.24 on 1 degrees of freedom, p= 0.039 
## Number of Newton-Raphson Iterations: 3 
## n= 49
BIC(PR3)
## [1] 252.822

Comparing the BICs

data.frame("Cox"=BIC(CR1), "Exponential"=BIC(PR1),"Weibull"=BIC(PR2), "Lognormal"=BIC(PR3))

Plot of the different Parametric distributions and how they fit the outcome data

plot.legend <- c("Weibull", "lognormal", "exponential")

fw<-fitdist(motiondata$times,"weibull")
fl<-fitdist(motiondata$times,"lnorm")
fe<-fitdist(motiondata$times,"exp")

denscomp(list(fw,fl,fe), legendtext = plot.legend)

9 Parametric Regression with Flies data

Load flies data

fliesdata<-read.csv('fliesdata.csv')

Plot outcome

hist(fliesdata$daysfup)

Create survival object

#Create survival object
survflies<-Surv(fliesdata$daysfup, fliesdata$death)

Cox Regression

CR1flies<-coxph(survflies~Treated,data=fliesdata)

summary(CR1flies)
## Call:
## coxph(formula = survflies ~ Treated, data = fliesdata)
## 
##   n= 1078, number of events= 1050 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)    
## Treated -0.26023   0.77088  0.06209 -4.191 2.78e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## Treated    0.7709      1.297    0.6825    0.8706
## 
## Concordance= 0.538  (se = 0.009 )
## Likelihood ratio test= 17.55  on 1 df,   p=3e-05
## Wald test            = 17.56  on 1 df,   p=3e-05
## Score (logrank) test = 17.66  on 1 df,   p=3e-05
BIC(CR1flies)
## [1] 12528.35

Exponential distribution

PR1<-survreg(survflies~Treated,data=fliesdata,dist = 'exponential')

summary(PR1)
## 
## Call:
## survreg(formula = survflies ~ Treated, data = fliesdata, dist = "exponential")
##              Value Std. Error     z      p
## (Intercept) 4.1416     0.0433 95.71 <2e-16
## Treated     0.0643     0.0617  1.04    0.3
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -5431.9   Loglik(intercept only)= -5432.4
##  Chisq= 1.09 on 1 degrees of freedom, p= 0.3 
## Number of Newton-Raphson Iterations: 3 
## n= 1078
BIC(PR1)
## [1] 10877.68

Weibull distribution

PR2<-survreg(survflies~Treated,data=fliesdata,dist = 'weibull')

summary(PR2)
## 
## Call:
## survreg(formula = survflies ~ Treated, data = fliesdata, dist = "weibull")
##               Value Std. Error      z       p
## (Intercept)  4.2219     0.0103 408.55 < 2e-16
## Treated      0.0566     0.0144   3.93 8.5e-05
## Log(scale)  -1.4564     0.0248 -58.83 < 2e-16
## 
## Scale= 0.233 
## 
## Weibull distribution
## Loglik(model)= -4490.8   Loglik(intercept only)= -4498.5
##  Chisq= 15.34 on 1 degrees of freedom, p= 9e-05 
## Number of Newton-Raphson Iterations: 7 
## n= 1078
BIC(PR2)
## [1] 9002.637

Lognormal distribution

PR3<-survreg(survflies~Treated,data=fliesdata,dist = 'lognormal')

summary(PR3)
## 
## Call:
## survreg(formula = survflies ~ Treated, data = fliesdata, dist = "lognormal")
##               Value Std. Error      z      p
## (Intercept)  4.0777     0.0161 253.66 <2e-16
## Treated      0.0599     0.0229   2.61  0.009
## Log(scale)  -0.9852     0.0218 -45.22 <2e-16
## 
## Scale= 0.373 
## 
## Log Normal distribution
## Loglik(model)= -4774.3   Loglik(intercept only)= -4777.7
##  Chisq= 6.81 on 1 degrees of freedom, p= 0.0091 
## Number of Newton-Raphson Iterations: 3 
## n= 1078
BIC(PR3)
## [1] 9569.579

Compare BICs

data.frame("Cox"=BIC(CR1flies), "Exponential"=BIC(PR1),"Weibull"=BIC(PR2), "Lognormal"=BIC(PR3))

Plot the outcome with the different distributions fitted

plot.legend <- c("Weibull", "lognormal", "exponential")

fw<-fitdist(fliesdata$daysfup,"weibull")
fl<-fitdist(fliesdata$daysfup,"lnorm")
fe<-fitdist(fliesdata$daysfup,"exp")

denscomp(list(fw,fl,fe), legendtext = plot.legend)