Reading Data into R and importing packages:
library(survival)
library(flexsurv)
## Warning: package 'flexsurv' was built under R version 4.0.3
data1=read.csv("C:/Users/Prokarso/Documents/Book1.csv")
data1
Creating Indicator Functions for the two groups:
pstatus=1:21
pdata=1:21
tstatus=1:21
tdata=1:21
for(i in 1:21){
if(substr(data1$Placebo[i],(nchar(data1$Placebo[i])),nchar(data1$Placebo[i]))=="+"){
pstatus[i]=0
pdata[i]=as.numeric(substr(data1$Placebo[i],0,nchar(data1$Placebo[i])-1))
}
else{
pstatus[i]=1
pdata[i]=as.numeric(data1$Placebo[i])
}
if(substr(data1$Treatment[i],(nchar(data1$Treatment[i])),nchar(data1$Treatment[i]))=="+"){
tstatus[i]=0
tdata[i]=as.numeric(substr(data1$Treatment[i],0,nchar(data1$Treatment[i])-1))
}
else{
tstatus[i]=1
tdata[i]=as.numeric(data1$Treatment[i])
}
}
Inserting new data columns in our data:
data1["PlaceboDataValues"]=pdata
data1["PlaceboStatus"]=pstatus
data1["TreatmentDataValues"]=tdata
data1["TreatmentStatus"]=tstatus
data1
Creating survival objects:
attach(data1)
surv_dat1=Surv(time=PlaceboDataValues,event=PlaceboStatus,type="right")
surv_dat2=Surv(time=TreatmentDataValues,event=TreatmentStatus,type="right")
Fitting Survival functions for Weibull distribution:
weibullfit1=flexsurvreg(surv_dat1~1,dist = "Weibull")
weibullfit1
## Call:
## flexsurvreg(formula = surv_dat1 ~ 1, dist = "Weibull")
##
## Estimates:
## est L95% U95% se
## shape 1.509 0.909 2.506 0.391
## scale 30.192 19.551 46.622 6.693
##
## N = 21, Events: 10, Censored: 11
## Total time at risk: 359
## Log-likelihood = -44.75689, df = 2
## AIC = 93.51378
plot(weibullfit1,col="green")
weibullfit2=flexsurvreg(surv_dat2~1,dist = "Weibull")
weibullfit2
## Call:
## flexsurvreg(formula = surv_dat2 ~ 1, dist = "Weibull")
##
## Estimates:
## est L95% U95% se
## shape 1.298 0.886 1.902 0.253
## scale 10.977 7.584 15.889 2.071
##
## N = 21, Events: 17, Censored: 4
## Total time at risk: 182
## Log-likelihood = -56.50655, df = 2
## AIC = 117.0131
plot(weibullfit2,add=TRUE,col="blue")
Fitting Survival functions for Gamma distribution:
gammafit1=flexsurvreg(surv_dat1~1,dist = "gamma")
gammafit1
## Call:
## flexsurvreg(formula = surv_dat1 ~ 1, dist = "gamma")
##
## Estimates:
## est L95% U95% se
## shape 1.9263 0.9060 4.0954 0.7413
## rate 0.0692 0.0240 0.1996 0.0374
##
## N = 21, Events: 10, Censored: 11
## Total time at risk: 359
## Log-likelihood = -44.55196, df = 2
## AIC = 93.10392
plot(gammafit1,col="green")
gammafit2=flexsurvreg(surv_dat2~1,dist = "gamma")
gammafit2
## Call:
## flexsurvreg(formula = surv_dat2 ~ 1, dist = "gamma")
##
## Estimates:
## est L95% U95% se
## shape 1.4584 0.8095 2.6274 0.4380
## rate 0.1421 0.0669 0.3019 0.0546
##
## N = 21, Events: 17, Censored: 4
## Total time at risk: 182
## Log-likelihood = -56.58677, df = 2
## AIC = 117.1735
plot(gammafit2,add=TRUE,col="blue")
Fitting Survival functions for Log Logistic distribution:
logisfit1=flexsurvreg(surv_dat1~1,dist = "llogis")
logisfit1
## Call:
## flexsurvreg(formula = surv_dat1 ~ 1, dist = "llogis")
##
## Estimates:
## est L95% U95% se
## shape 1.860 1.127 3.068 0.475
## scale 22.560 13.983 36.399 5.506
##
## N = 21, Events: 10, Censored: 11
## Total time at risk: 359
## Log-likelihood = -44.48992, df = 2
## AIC = 92.97983
plot(logisfit1,col="green")
logisfit2=flexsurvreg(surv_dat2~1,dist = "llogis")
logisfit2
## Call:
## flexsurvreg(formula = surv_dat2 ~ 1, dist = "llogis")
##
## Estimates:
## est L95% U95% se
## shape 1.674 1.133 2.475 0.334
## scale 7.714 4.875 12.205 1.806
##
## N = 21, Events: 17, Censored: 4
## Total time at risk: 182
## Log-likelihood = -57.67306, df = 2
## AIC = 119.3461
plot(logisfit2,add=TRUE,col="blue")
Conclusion: The placebo group has a smaller AIC for each distribution, than the treatment group, and therefore, has a better fit. For the placebo group, the log logistic distribution has the smallest AIC among the 3 distributions, and therefore, the best fit. For the treatment group, the weibull distribution has the smallest AIC among the 3 distributions, and therefore, the best fit.
Fitting survival function using product limit estimator:
s1=survfit(surv_dat1~1,type="kaplan-meier")
summary(s1)
## Call: survfit(formula = surv_dat1 ~ 1, type = "kaplan-meier")
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 6 21 3 0.857 0.0764 0.720 1.000
## 7 17 1 0.807 0.0869 0.653 0.996
## 10 15 1 0.753 0.0963 0.586 0.968
## 13 12 1 0.690 0.1068 0.510 0.935
## 16 11 1 0.627 0.1141 0.439 0.896
## 22 7 1 0.538 0.1282 0.337 0.858
## 23 6 1 0.448 0.1346 0.249 0.807
## 32 4 1 0.336 0.1400 0.149 0.760
plot(s1,col = "red")
s2=survfit(surv_dat2~1,type="kaplan-meier")
summary(s2)
## Call: survfit(formula = surv_dat2 ~ 1, type = "kaplan-meier")
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 21 2 0.9048 0.0641 0.7875 1.000
## 2 19 2 0.8095 0.0857 0.6579 0.996
## 3 17 1 0.7619 0.0929 0.5999 0.968
## 4 16 1 0.7143 0.0986 0.5450 0.936
## 5 14 2 0.6122 0.1077 0.4337 0.864
## 8 12 2 0.5102 0.1113 0.3327 0.783
## 11 8 2 0.3827 0.1143 0.2130 0.687
## 12 6 2 0.2551 0.1060 0.1130 0.576
## 15 4 1 0.1913 0.0968 0.0710 0.516
## 22 2 1 0.0957 0.0832 0.0174 0.526
## 23 1 1 0.0000 NaN NA NA
lines(s2,col="blue")
Checking whether two survival functions are same:
data1
data2=data.frame(cbind(c(PlaceboDataValues,TreatmentDataValues),c(PlaceboStatus,TreatmentStatus)))
group=rep(c("placebo","treatment"),each=21)
data2["group"]=group
data2
s3=Surv(time=data2$X1,event=data2$X2,type="right")
survdiff(s3~group,data=data2)
## Call:
## survdiff(formula = s3 ~ group, data = data2)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## group=placebo 21 10 17.77 3.40 11.1
## group=treatment 21 17 9.23 6.54 11.1
##
## Chisq= 11.1 on 1 degrees of freedom, p= 9e-04