Survival Analysis Assignment

Reading Data into R and importing packages:

library(survival)
library(flexsurv)

## Warning: package 'flexsurv' was built under R version 4.0.3

data1=read.csv("C:/Users/Prokarso/Documents/Book1.csv")
data1

Creating Indicator Functions for the two groups:

pstatus=1:21
pdata=1:21
tstatus=1:21
tdata=1:21
for(i in 1:21){
  
  if(substr(data1$Placebo[i],(nchar(data1$Placebo[i])),nchar(data1$Placebo[i]))=="+"){
    pstatus[i]=0
    pdata[i]=as.numeric(substr(data1$Placebo[i],0,nchar(data1$Placebo[i])-1))
    
  }
  else{
    pstatus[i]=1
    pdata[i]=as.numeric(data1$Placebo[i])
  }
  
  if(substr(data1$Treatment[i],(nchar(data1$Treatment[i])),nchar(data1$Treatment[i]))=="+"){
    tstatus[i]=0
    tdata[i]=as.numeric(substr(data1$Treatment[i],0,nchar(data1$Treatment[i])-1))
    
    
  }
  else{
    tstatus[i]=1
    tdata[i]=as.numeric(data1$Treatment[i])
  }
}

Inserting new data columns in our data:

data1["PlaceboDataValues"]=pdata
data1["PlaceboStatus"]=pstatus
data1["TreatmentDataValues"]=tdata
data1["TreatmentStatus"]=tstatus
data1

Creating survival objects:

attach(data1)
surv_dat1=Surv(time=PlaceboDataValues,event=PlaceboStatus,type="right")
surv_dat2=Surv(time=TreatmentDataValues,event=TreatmentStatus,type="right")

Fitting Survival functions for Weibull distribution:

weibullfit1=flexsurvreg(surv_dat1~1,dist = "Weibull")
weibullfit1

## Call:
## flexsurvreg(formula = surv_dat1 ~ 1, dist = "Weibull")
## 
## Estimates: 
##        est     L95%    U95%    se    
## shape   1.509   0.909   2.506   0.391
## scale  30.192  19.551  46.622   6.693
## 
## N = 21,  Events: 10,  Censored: 11
## Total time at risk: 359
## Log-likelihood = -44.75689, df = 2
## AIC = 93.51378

plot(weibullfit1,col="green")

weibullfit2=flexsurvreg(surv_dat2~1,dist = "Weibull")
weibullfit2

## Call:
## flexsurvreg(formula = surv_dat2 ~ 1, dist = "Weibull")
## 
## Estimates: 
##        est     L95%    U95%    se    
## shape   1.298   0.886   1.902   0.253
## scale  10.977   7.584  15.889   2.071
## 
## N = 21,  Events: 17,  Censored: 4
## Total time at risk: 182
## Log-likelihood = -56.50655, df = 2
## AIC = 117.0131

plot(weibullfit2,add=TRUE,col="blue")

Fitting Survival functions for Gamma distribution:

gammafit1=flexsurvreg(surv_dat1~1,dist = "gamma")
gammafit1

## Call:
## flexsurvreg(formula = surv_dat1 ~ 1, dist = "gamma")
## 
## Estimates: 
##        est     L95%    U95%    se    
## shape  1.9263  0.9060  4.0954  0.7413
## rate   0.0692  0.0240  0.1996  0.0374
## 
## N = 21,  Events: 10,  Censored: 11
## Total time at risk: 359
## Log-likelihood = -44.55196, df = 2
## AIC = 93.10392

plot(gammafit1,col="green")

gammafit2=flexsurvreg(surv_dat2~1,dist = "gamma")
gammafit2

## Call:
## flexsurvreg(formula = surv_dat2 ~ 1, dist = "gamma")
## 
## Estimates: 
##        est     L95%    U95%    se    
## shape  1.4584  0.8095  2.6274  0.4380
## rate   0.1421  0.0669  0.3019  0.0546
## 
## N = 21,  Events: 17,  Censored: 4
## Total time at risk: 182
## Log-likelihood = -56.58677, df = 2
## AIC = 117.1735

plot(gammafit2,add=TRUE,col="blue")

Fitting Survival functions for Log Logistic distribution:

logisfit1=flexsurvreg(surv_dat1~1,dist = "llogis")
logisfit1

## Call:
## flexsurvreg(formula = surv_dat1 ~ 1, dist = "llogis")
## 
## Estimates: 
##        est     L95%    U95%    se    
## shape   1.860   1.127   3.068   0.475
## scale  22.560  13.983  36.399   5.506
## 
## N = 21,  Events: 10,  Censored: 11
## Total time at risk: 359
## Log-likelihood = -44.48992, df = 2
## AIC = 92.97983

plot(logisfit1,col="green")

logisfit2=flexsurvreg(surv_dat2~1,dist = "llogis")
logisfit2

## Call:
## flexsurvreg(formula = surv_dat2 ~ 1, dist = "llogis")
## 
## Estimates: 
##        est     L95%    U95%    se    
## shape   1.674   1.133   2.475   0.334
## scale   7.714   4.875  12.205   1.806
## 
## N = 21,  Events: 17,  Censored: 4
## Total time at risk: 182
## Log-likelihood = -57.67306, df = 2
## AIC = 119.3461

plot(logisfit2,add=TRUE,col="blue")

Conclusion: The placebo group has a smaller AIC for each distribution, than the treatment group, and therefore, has a better fit. For the placebo group, the log logistic distribution has the smallest AIC among the 3 distributions, and therefore, the best fit. For the treatment group, the weibull distribution has the smallest AIC among the 3 distributions, and therefore, the best fit.

Fitting survival function using product limit estimator:

s1=survfit(surv_dat1~1,type="kaplan-meier")
summary(s1)

## Call: survfit(formula = surv_dat1 ~ 1, type = "kaplan-meier")
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     6     21       3    0.857  0.0764        0.720        1.000
##     7     17       1    0.807  0.0869        0.653        0.996
##    10     15       1    0.753  0.0963        0.586        0.968
##    13     12       1    0.690  0.1068        0.510        0.935
##    16     11       1    0.627  0.1141        0.439        0.896
##    22      7       1    0.538  0.1282        0.337        0.858
##    23      6       1    0.448  0.1346        0.249        0.807
##    32      4       1    0.336  0.1400        0.149        0.760

plot(s1,col = "red")


s2=survfit(surv_dat2~1,type="kaplan-meier")
summary(s2)

## Call: survfit(formula = surv_dat2 ~ 1, type = "kaplan-meier")
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1     21       2   0.9048  0.0641       0.7875        1.000
##     2     19       2   0.8095  0.0857       0.6579        0.996
##     3     17       1   0.7619  0.0929       0.5999        0.968
##     4     16       1   0.7143  0.0986       0.5450        0.936
##     5     14       2   0.6122  0.1077       0.4337        0.864
##     8     12       2   0.5102  0.1113       0.3327        0.783
##    11      8       2   0.3827  0.1143       0.2130        0.687
##    12      6       2   0.2551  0.1060       0.1130        0.576
##    15      4       1   0.1913  0.0968       0.0710        0.516
##    22      2       1   0.0957  0.0832       0.0174        0.526
##    23      1       1   0.0000     NaN           NA           NA

lines(s2,col="blue")

Checking whether two survival functions are same:

data1

data2=data.frame(cbind(c(PlaceboDataValues,TreatmentDataValues),c(PlaceboStatus,TreatmentStatus)))
group=rep(c("placebo","treatment"),each=21)
data2["group"]=group
data2

s3=Surv(time=data2$X1,event=data2$X2,type="right")
survdiff(s3~group,data=data2)

## Call:
## survdiff(formula = s3 ~ group, data = data2)
## 
##                  N Observed Expected (O-E)^2/E (O-E)^2/V
## group=placebo   21       10    17.77      3.40      11.1
## group=treatment 21       17     9.23      6.54      11.1
## 
##  Chisq= 11.1  on 1 degrees of freedom, p= 9e-04

Survival Analysis Assignment

Prokarso Chattopadhyay