rm(list=ls())
setwd("~/OneDrive - Danmarks Tekniske Universitet/Courses, Edward, DTU OD/DTU02418, Statistical Modelling = Theory and Practice/L6, DTU02418")
## Example 11.5:
xdat<- scan('rat.dat',skip = 1,
what = list(group = 0, surv = 0, status = 0))
Read 40 records
## Rat example, group 1
timeSurv_1 <- xdat$surv[xdat$group == 1]
status_1 <- xdat$status[xdat$group == 1]
index_timeSurv_1 <- sort(timeSurv_1, index.return = TRUE)$ix
cbind(timeSurv_1, status_1)[index_timeSurv_1, ]
timeSurv_1 status_1
[1,] 143 1
[2,] 164 1
[3,] 188 1
[4,] 188 1
[5,] 190 1
[6,] 192 1
[7,] 206 1
[8,] 209 1
[9,] 213 1
[10,] 216 1
[11,] 216 0
[12,] 220 1
[13,] 227 1
[14,] 230 1
[15,] 234 1
[16,] 244 0
[17,] 246 1
[18,] 265 1
[19,] 304 1
number_1 <- length(timeSurv_1) ## no of rats
status.sort_1 <- status_1[sort(timeSurv_1, index.return = TRUE)$ix]
timeSurv.sort_1 <- sort(timeSurv_1) ## time of events
## Setting up the table
timeSurv.sort.new_1 <- timeSurv.sort_1[c(TRUE, diff(timeSurv.sort_1) != 0)]
number.new_1 <- length(timeSurv.sort.new_1)
seq_number.new_1 <- 1 : number.new_1
death_timeSurv.sort.new_1 <- numeric(number.new_1) # To define [number of ratdeath at time t(seq_number.new_1)]
censor_timeSurv.sort.new_1 <- numeric(number.new_1) # To define [number of ratcensor at time t(seq_number.new_1)]
for(i in 1:number.new_1){
death_timeSurv.sort.new_1[i] <- sum(status_1[timeSurv.sort_1 == timeSurv.sort.new_1[i]])
censor_timeSurv.sort.new_1[i] <- sum(1 - status_1[timeSurv.sort_1 == timeSurv.sort.new_1[i]])
}
risk_timeSurv.sort.new_1 <- number_1 - c(0, cumsum((death_timeSurv.sort.new_1 + censor_timeSurv.sort.new_1)[- number.new_1])) # [number of rat at risk at time t(seq_number.new_1)]
funcSurv_1 <- cumprod(1 - death_timeSurv.sort.new_1 / risk_timeSurv.sort.new_1) # [survival function]
round(cbind(seq_number.new_1, timeSurv.sort.new_1, risk_timeSurv.sort.new_1, d = death_timeSurv.sort.new_1, c = censor_timeSurv.sort.new_1, CProb = 1 - death_timeSurv.sort.new_1 / risk_timeSurv.sort.new_1, funcSurv_1 = funcSurv_1), digits = 2)
seq_number.new_1 timeSurv.sort.new_1 risk_timeSurv.sort.new_1 d c
[1,] 1 143 19 1 0
[2,] 2 164 18 1 0
[3,] 3 188 17 2 0
[4,] 4 190 15 1 0
[5,] 5 192 14 1 0
[6,] 6 206 13 1 0
[7,] 7 209 12 1 0
[8,] 8 213 11 11
[9,] 9 216 10 2 0
[10,] 10 220 8 1 0
[11,] 11 227 7 1 0
[12,] 12 230 6 1 0
[13,] 13 234 5 1 0
[14,] 14 244 4 1 0
[15,] 15 246 3 1 0
[16,] 16 265 2 0 1
[17,] 17 304 1 0 1
CProb funcSurv_1
[1,] 0.95 0.95
[2,] 0.94 0.89
[3,] 0.88 0.79
[4,] 0.93 0.74
[5,] 0.93 0.68
[6,] 0.92 0.63
[7,] 0.92 0.58
[8,] 0.91 0.53
[9,] 0.80 0.42
[10,] 0.88 0.37
[11,] 0.86 0.32
[12,] 0.83 0.26
[13,] 0.80 0.21
[14,] 0.75 0.16
[15,] 0.67 0.11
[16,] 1.00 0.11
[17,] 1.00 0.11
plot(timeSurv.sort.new_1, funcSurv_1, type = "s", ylim = c(-0.05,1.025), xlim = c(100,350), xlab = "Survival Time (Days)", ylab = "Kaplan-Meier Estimated Survival Probability") # [Kaplan-Meier estimate of survival function]
title("Kaplan-Meier Estimate of Survival Function for Group 1 in Rats Test")
## Variance of log-S (and naive CI)
variance_survFunc.log_1 <- cumsum(death_timeSurv.sort.new_1 / (risk_timeSurv.sort.new_1 * (risk_timeSurv.sort.new_1 - death_timeSurv.sort.new_1)))
variance_survFunc_1 <- variance_survFunc.log_1 * funcSurv_1^2
lines(timeSurv.sort.new_1, funcSurv_1 + qnorm(0.975) * sqrt(variance_survFunc_1), lty = 2, type = "s")
lines(timeSurv.sort.new_1, funcSurv_1 - qnorm(0.975) * sqrt(variance_survFunc_1), lty = 2, type = "s") #! Note how lines cross 1 and 0
## Unceartainty using log-log
variance_survFunc.loglog_1 <- 1 / (log(funcSurv_1))^2 * cumsum(death_timeSurv.sort.new_1 / (risk_timeSurv.sort.new_1 * (risk_timeSurv.sort.new_1 - death_timeSurv.sort.new_1))) # [\hat{Var}[log(-log(\hat{S}(t)))]]
confidenceU_funcSurv_1 <- log(- log(funcSurv_1)) + qnorm(0.975) * sqrt(variance_survFunc.loglog_1)
confidenceL_funcSurv_1 <- log(- log(funcSurv_1)) - qnorm(0.975) * sqrt(variance_survFunc.loglog_1)
lines(timeSurv.sort.new_1, exp(- exp(confidenceU_funcSurv_1)), lty = 2, col = 2, type = "s")
lines(timeSurv.sort.new_1, exp(- exp(confidenceL_funcSurv_1)), lty = 2, col = 2, type = "s") #! intervals in (0,1)
lines(c(100,320), 0.5 * c(1,1), col = 3, lty = 3)
lines(c(100,320), 0.25 * c(1,1), col = 4, lty = 3)
lines(c(100,320), 0.75 * c(1,1), col = 4, lty = 3)
There is a R package for survival analysis.
## Directly in R
library(survival)
dat_Surv <- data.frame(time = xdat$surv, dead = xdat$status, group = xdat$group)
Surv_1 <- survfit(Surv(time, dead == 1) ~ 1, conf.type = "plain",
data = dat_Surv[dat_Surv$group == 1, ])
summary(Surv_1)
Call: survfit(formula = Surv(time, dead == 1) ~ 1, data = dat_Surv[dat_Surv$group ==
1, ], conf.type = "plain")
time n.risk n.event survival std.err lower 95% CI upper 95% CI
143 19 1 0.9474 0.0512 0.8470 1.000
164 18 1 0.8947 0.0704 0.7567 1.000
188 17 2 0.7895 0.0935 0.6062 0.973
190 15 1 0.7368 0.1010 0.5388 0.935
192 14 1 0.6842 0.1066 0.4752 0.893
206 13 1 0.6316 0.1107 0.4147 0.848
209 12 1 0.5789 0.1133 0.3569 0.801
213 11 1 0.5263 0.1145 0.3018 0.751
216 10 1 0.4737 0.1145 0.2492 0.698
220 8 1 0.4145 0.1145 0.1900 0.639
227 7 1 0.3553 0.1124 0.1349 0.576
230 6 1 0.2961 0.1082 0.0841 0.508
234 5 1 0.2368 0.1015 0.0380 0.436
246 3 1 0.1579 0.0934 0.0000 0.341
265 2 1 0.0789 0.0728 0.0000 0.222
304 1 1 0.0000 NaN NaN NaN
quantile(Surv_1,c(0.25,0.5,0.75))
$quantile
25 50 75
190 216 234
$lower
25 50 75
188 192 216
$upper
25 50 75
216 234 265
## Kaplan-Meier plot
plot(Surv_1, xlim = c(100,320))
plot(Surv_1, fun = function(x){1-x}) #? [death function] = 1 - survival function
## Using R (log-log)
Surv.loglog_1 <- survfit(Surv(time, dead == 1) ~ 1, conf.type = "log-log",
data = dat_Surv[dat_Surv$group == 1, ])
summary(Surv.loglog_1)
Call: survfit(formula = Surv(time, dead == 1) ~ 1, data = dat_Surv[dat_Surv$group ==
1, ], conf.type = "log-log")
time n.risk n.event survival std.err lower 95% CI upper 95% CI
143 19 1 0.9474 0.0512 0.68119 0.992
164 18 1 0.8947 0.0704 0.64079 0.973
188 17 2 0.7895 0.0935 0.53191 0.915
190 15 1 0.7368 0.1010 0.47893 0.881
192 14 1 0.6842 0.1066 0.42794 0.844
206 13 1 0.6316 0.1107 0.37899 0.804
209 12 1 0.5789 0.1133 0.33208 0.763
213 11 1 0.5263 0.1145 0.28720 0.719
216 10 1 0.4737 0.1145 0.24438 0.673
220 8 1 0.4145 0.1145 0.19616 0.621
227 7 1 0.3553 0.1124 0.15191 0.566
230 6 1 0.2961 0.1082 0.11168 0.509
234 5 1 0.2368 0.1015 0.07578 0.447
246 3 1 0.1579 0.0934 0.03143 0.374
265 2 1 0.0789 0.0728 0.00567 0.288
304 1 1 0.0000 NaN NA NA
quantile(Surv.loglog_1, c(0.25,0.5,0.75))
$quantile
25 50 75
190 216 234
$lower
25 50 75
143 190 216
$upper
25 50 75
213 234 NA
plot(Surv.loglog_1, xlim = c(100,320))