Diabetic Data is a dataset with 394 observations with the following 2 variables.
The 197 patients in this dataset were a 50% random sample of the patients with “high-risk” diabetic retinopathy as defined by the Diabetic Retinopathy Study (DRS). Each patient had one eye randomized to laser treatment and the other eye received no treatment. For each eye, the event of interest was the time from initiation of treatment to the time when visual acuity dropped below 5/200 two visits in a row. Thus there is a built-in lag time of approximately 6 months (visits were every 3 months). Survival times in this dataset are therefore the actual time to blindness in months, minus the minimum possible time to event (6.5 months). Censoring was caused by death, dropout, or end.
library(survival)
library(flexsurv)
library(ggplot2)
library(EnvStats)
diabetic = diabetic[c("time", "status")]
head(diabetic)
## time status
## 1 46.23 0
## 2 46.23 0
## 3 42.50 0
## 4 31.30 1
## 5 42.27 0
## 6 42.27 0
s <- with(diabetic,Surv(time,status))
diabetic["status"] = as.factor(diabetic$status)
g_hist <- ggplot(diabetic, aes(x = time, color = status, fill = status))
g_hist + geom_histogram(position = "identity")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(diabetic, aes(x= status, fill = status))+
geom_bar( width=0.3)+
geom_text(stat = "count", aes(label=..count..), vjust=-1)+
theme_minimal()
sWei <- flexsurvreg(s ~ 1,dist='weibull',data= diabetic)
sLno <- flexsurvreg(s ~ 1,dist='lnorm',data= diabetic)
sExp <- flexsurvreg(s ~ 1, dist = "exponential", data = diabetic)
sGam <- flexsurvreg(s ~ 1, dist = "gamma", data = diabetic)
sGenGam <- flexsurvreg(s ~ 1, dist = "gengamma", data = diabetic)
par(mfrow = c(2, 2))
plot(sWei, col.obs = "white", ci = FALSE, type = "hazard", col = "brown3", xlab = "time", main = "Weibull")
plot(sLno, col.obs = "white", ci = FALSE, col="brown3", type = "hazard", xlab = "time", main = "Lognormal")
plot(sExp, col.obs = "white", ci = FALSE, col = "brown3", type = "hazard", xlab = "time", main = "Exponential")
plot(sGam, col.obs = "white", ci = FALSE, col = "brown3", type = "hazard", xlab = "time", main = "Gamma")
par(mfrow = c(2, 2))
plot(sWei, col.obs = "white", ci = FALSE, type = "survival", col = "steelblue", xlab = "time", ylab = "Surv Prob", main = "Weibull")
plot(sLno, col.obs = "white", ci = FALSE, col="steelblue", type = "survival", xlab = "time", ylab = "Surv Prob", main = "Lognormal")
plot(sExp, col.obs = "white", ci = FALSE, col = "steelblue", type = "survival", xlab = "time", ylab = "Surv Prob", main = "Exponential")
plot(sGam, col.obs = "white", ci = FALSE, col = "steelblue", type = "survival", xlab = "time", ylab = "Surv Prob", main = "Generalized Gamma")
km = survfit(s ~ 1, data = diabetic)
kmi <- rep("KM",length(km$time))
km_df <- data.frame(km$time,km$surv,kmi)
names(km_df) <- c("Time","Surv","Model")
Weibull <- rep("Weibull",length(km$time))
Weibull_df <- data.frame(as.data.frame(summary(sWei))$time,as.data.frame(summary(sWei))$est, Weibull)
names(Weibull_df) <- c("Time","Surv","Model")
Exp <- rep("Exponential",length(km$time))
Exp_df <- data.frame(as.data.frame(summary(sExp))$time,as.data.frame(summary(sExp))$est,Exp)
names(Exp_df) <- c("Time","Surv","Model")
LNo <- rep("LogNormal",length(km$time))
LNo_df <- data.frame(as.data.frame(summary(sLno))$time,as.data.frame(summary(sLno))$est, LNo)
names(LNo_df) <- c("Time","Surv","Model")
Gamma <- rep("Gamma",length(km$time))
Gamma_df <- data.frame(as.data.frame(summary(sGam))$time,as.data.frame(summary(sGam))$est, Gamma)
names(Gamma_df) <- c("Time","Surv","Model")
plot_df <- rbind(km_df, Weibull_df, Exp_df, LNo_df, Gamma_df)
library(ggplot2)
p <- ggplot(plot_df, aes(x = Time, y = Surv, color = Model))
p + geom_line(size = 1)
censored <- diabetic$status == "0"
par(mfrow = c(2, 2))
with(diabetic, qqPlotCensored(time, censored, prob.method = "kaplan-meier",
points.col = "blue", add.line = TRUE,
dist = "weibull", param.list = list(shape = 0.7974, scale = 109.2914), main="Q-Q Plot for\nCensored Data Set"))
with(diabetic, qqPlotCensored(time, censored, prob.method = "kaplan-meier",
points.col = "blue", add.line = TRUE,
dist = "lnorm", param.list = list(meanlog = 4.334, sdlog = 1.948), main="Q-Q Plot for\nCensored Data Set"))
with(diabetic, qqPlotCensored(time, censored, prob.method = "kaplan-meier",
points.col = "blue", add.line = TRUE,
distribution = "exp", param.list = list(rate = 0.011057), main="Q-Q Plot for\nCensored Data Set"))
with(diabetic, qqPlotCensored(time, censored, prob.method = "kaplan-meier",
points.col = "blue", add.line = TRUE,
distribution = "gamma", param.list = list(shape = 0.77490, scale = 0.00703), main="Q-Q Plot for\nCensored Data Set"))
exp_ll = sExp$loglik
wei_ll = sWei$loglik
lno_ll = sLno$loglik
gam_ll = sGam$loglik
ggam_ll = sGenGam$loglik
GOF = 2 * (ggam_ll - lno_ll)
GOF
## [1] 0.7436528
GOF1 = 2 * (ggam_ll - wei_ll)
GOF1
## [1] 10.93731
GOF2 = 2 * (wei_ll - exp_ll)
GOF2
## [1] 10.51515
pchisq(GOF, 1, lower.tail = FALSE)
## [1] 0.3884933
pchisq(GOF1, 1, lower.tail = FALSE)
## [1] 0.0009424668
pchisq(GOF2, 1, lower.tail = FALSE)
## [1] 0.001184001