Data Description

Diabetic Data is a dataset with 394 observations with the following 2 variables.

The 197 patients in this dataset were a 50% random sample of the patients with “high-risk” diabetic retinopathy as defined by the Diabetic Retinopathy Study (DRS). Each patient had one eye randomized to laser treatment and the other eye received no treatment. For each eye, the event of interest was the time from initiation of treatment to the time when visual acuity dropped below 5/200 two visits in a row. Thus there is a built-in lag time of approximately 6 months (visits were every 3 months). Survival times in this dataset are therefore the actual time to blindness in months, minus the minimum possible time to event (6.5 months). Censoring was caused by death, dropout, or end.

Load the required libraries

library(survival)
library(flexsurv)
library(ggplot2)
library(EnvStats)

Load the diabetic dataset and create the survival object

diabetic = diabetic[c("time", "status")]
head(diabetic)
##    time status
## 1 46.23      0
## 2 46.23      0
## 3 42.50      0
## 4 31.30      1
## 5 42.27      0
## 6 42.27      0
s <- with(diabetic,Surv(time,status))

Histogram of time variable

diabetic["status"] = as.factor(diabetic$status)
g_hist <- ggplot(diabetic, aes(x = time, color = status, fill = status))
g_hist + geom_histogram(position = "identity")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(diabetic, aes(x= status, fill = status))+
  geom_bar( width=0.3)+
  geom_text(stat = "count", aes(label=..count..), vjust=-1)+
  theme_minimal()

Fitting Weibull, Exponential, LogNormal and Gamma distributions

sWei  <- flexsurvreg(s ~ 1,dist='weibull',data= diabetic)
sLno  <- flexsurvreg(s ~ 1,dist='lnorm',data= diabetic)   
sExp <- flexsurvreg(s ~ 1, dist = "exponential", data = diabetic) 
sGam <- flexsurvreg(s ~ 1, dist = "gamma", data = diabetic)
sGenGam <- flexsurvreg(s ~ 1, dist = "gengamma", data = diabetic)

S(t) for the fitted distributions

par(mfrow = c(2, 2))
plot(sWei, col.obs = "white", ci = FALSE, type = "hazard", col = "brown3", xlab = "time", main = "Weibull")
plot(sLno, col.obs = "white", ci = FALSE, col="brown3", type = "hazard", xlab = "time", main = "Lognormal")
plot(sExp, col.obs = "white", ci = FALSE, col = "brown3", type = "hazard", xlab = "time", main = "Exponential")
plot(sGam, col.obs = "white", ci = FALSE, col = "brown3", type = "hazard", xlab = "time", main = "Gamma")

H(t) for the fitted distributions

par(mfrow = c(2, 2))
plot(sWei, col.obs = "white", ci = FALSE, type = "survival", col = "steelblue", xlab = "time", ylab = "Surv Prob", main = "Weibull")
plot(sLno, col.obs = "white", ci = FALSE, col="steelblue", type = "survival", xlab = "time", ylab = "Surv Prob", main = "Lognormal")
plot(sExp, col.obs = "white", ci = FALSE, col = "steelblue", type = "survival", xlab = "time", ylab = "Surv Prob", main = "Exponential")
plot(sGam, col.obs = "white", ci = FALSE, col = "steelblue", type = "survival", xlab = "time", ylab = "Surv Prob", main = "Generalized Gamma")

Plotting S(t) for fitted distributions

Kaplan Meier curve

km = survfit(s ~ 1, data = diabetic)


kmi <- rep("KM",length(km$time))
km_df <- data.frame(km$time,km$surv,kmi)
names(km_df) <- c("Time","Surv","Model")


Weibull <- rep("Weibull",length(km$time))
Weibull_df <- data.frame(as.data.frame(summary(sWei))$time,as.data.frame(summary(sWei))$est, Weibull)
names(Weibull_df) <- c("Time","Surv","Model")

Exp <- rep("Exponential",length(km$time))
Exp_df <- data.frame(as.data.frame(summary(sExp))$time,as.data.frame(summary(sExp))$est,Exp)
names(Exp_df) <- c("Time","Surv","Model")

LNo <- rep("LogNormal",length(km$time))
LNo_df <- data.frame(as.data.frame(summary(sLno))$time,as.data.frame(summary(sLno))$est, LNo)
names(LNo_df) <- c("Time","Surv","Model")

Gamma <- rep("Gamma",length(km$time))
Gamma_df <- data.frame(as.data.frame(summary(sGam))$time,as.data.frame(summary(sGam))$est, Gamma)
names(Gamma_df) <- c("Time","Surv","Model")

plot_df <- rbind(km_df, Weibull_df, Exp_df, LNo_df, Gamma_df)

library(ggplot2)
p <- ggplot(plot_df, aes(x = Time, y = Surv, color = Model))
p + geom_line(size = 1)

QQPLOT for the fitted distributions

censored <- diabetic$status == "0"

par(mfrow = c(2, 2))

with(diabetic, qqPlotCensored(time, censored, prob.method = "kaplan-meier",
 points.col = "blue", add.line = TRUE,
 dist = "weibull", param.list = list(shape = 0.7974, scale = 109.2914), main="Q-Q Plot for\nCensored Data Set"))

with(diabetic, qqPlotCensored(time, censored, prob.method = "kaplan-meier",
 points.col = "blue", add.line = TRUE,
 dist = "lnorm", param.list = list(meanlog = 4.334, sdlog = 1.948), main="Q-Q Plot for\nCensored Data Set"))

with(diabetic, qqPlotCensored(time, censored, prob.method = "kaplan-meier",
 points.col = "blue", add.line = TRUE,
 distribution = "exp", param.list = list(rate = 0.011057), main="Q-Q Plot for\nCensored Data Set"))

with(diabetic, qqPlotCensored(time, censored, prob.method = "kaplan-meier",
 points.col = "blue", add.line = TRUE,
 distribution = "gamma", param.list = list(shape = 0.77490, scale = 0.00703), main="Q-Q Plot for\nCensored Data Set"))

Log Likelihood Test

exp_ll = sExp$loglik
wei_ll = sWei$loglik
lno_ll = sLno$loglik
gam_ll = sGam$loglik
ggam_ll = sGenGam$loglik

GOF = 2 * (ggam_ll - lno_ll)
GOF
## [1] 0.7436528
GOF1 = 2 * (ggam_ll - wei_ll)
GOF1
## [1] 10.93731
GOF2 = 2 * (wei_ll - exp_ll)
GOF2
## [1] 10.51515
pchisq(GOF, 1, lower.tail = FALSE)
## [1] 0.3884933
pchisq(GOF1, 1, lower.tail = FALSE)
## [1] 0.0009424668
pchisq(GOF2, 1, lower.tail = FALSE)
## [1] 0.001184001