t4 <- read.table("Exercise10.1-data.txt", header = TRUE)
#attach(t4)
AG <- c(rep(1,17), rep(0,16))
t4 <- cbind(t4[,c(1,2)], AG)
t4
## survival.time WBC AG
## 1 65 2.30 1
## 2 156 0.75 1
## 3 100 4.30 1
## 4 134 2.60 1
## 5 16 6.00 1
## 6 108 10.50 1
## 7 121 10.00 1
## 8 4 17.00 1
## 9 39 5.40 1
## 10 143 7.00 1
## 11 56 9.40 1
## 12 26 32.00 1
## 13 22 35.00 1
## 14 1 100.00 1
## 15 1 100.00 1
## 16 5 52.00 1
## 17 65 100.00 1
## 18 56 4.40 0
## 19 65 3.00 0
## 20 17 4.00 0
## 21 7 1.50 0
## 22 16 9.00 0
## 23 22 5.30 0
## 24 3 10.00 0
## 25 4 19.00 0
## 26 2 27.00 0
## 27 3 28.00 0
## 28 8 31.00 0
## 29 4 26.00 0
## 30 3 21.00 0
## 31 30 79.00 0
## 32 4 100.00 0
## 33 43 100.00 0
t4.p <- t4[AG == 1,][,c(1,2)]
t4.n <- t4[AG == 0,][,c(1,2)]
t4.p # AG positive
## survival.time WBC
## 1 65 2.30
## 2 156 0.75
## 3 100 4.30
## 4 134 2.60
## 5 16 6.00
## 6 108 10.50
## 7 121 10.00
## 8 4 17.00
## 9 39 5.40
## 10 143 7.00
## 11 56 9.40
## 12 26 32.00
## 13 22 35.00
## 14 1 100.00
## 15 1 100.00
## 16 5 52.00
## 17 65 100.00
t4.n # AG negative
## survival.time WBC
## 18 56 4.4
## 19 65 3.0
## 20 17 4.0
## 21 7 1.5
## 22 16 9.0
## 23 22 5.3
## 24 3 10.0
## 25 4 19.0
## 26 2 27.0
## 27 3 28.0
## 28 8 31.0
## 29 4 26.0
## 30 3 21.0
## 31 30 79.0
## 32 4 100.0
## 33 43 100.0
The empirical survival functions for AG positive:
t4.p.st <- sort(t4.p[,1])
t4.p.st
## [1] 1 1 4 5 16 22 26 39 56 65 65 100 108 121 134 143 156
uni <- c(0, unique(t4.p.st))
uni
## [1] 0 1 4 5 16 22 26 39 56 65 100 108 121 134 143 156
km.p <- matrix(0,length(uni),3)
rownames(km.p) <- c("0-<1", "1-<4", "4-<5", "5-<16", "16-<22", "22-<26",
"26-<39", "39-<56", "56-<65", "65-<100", "100-<108",
"108-<121", "121-<134", "134-<143", "143-<156", " >=156")
colnames(km.p) <- c("nj", "dj","shat.y")
km.p
## nj dj shat.y
## 0-<1 0 0 0
## 1-<4 0 0 0
## 4-<5 0 0 0
## 5-<16 0 0 0
## 16-<22 0 0 0
## 22-<26 0 0 0
## 26-<39 0 0 0
## 39-<56 0 0 0
## 56-<65 0 0 0
## 65-<100 0 0 0
## 100-<108 0 0 0
## 108-<121 0 0 0
## 121-<134 0 0 0
## 134-<143 0 0 0
## 143-<156 0 0 0
## >=156 0 0 0
n <- length(t4.p.st)
uni <- c(0, unique(t4.p.st))
km.p[1,1] <- n
km.p[1,2] <- 0
km.p[1,3] <- 1
for (i in 2:length(uni)){
km.p[i,2] <- sum(uni[i] == t4.p.st)
km.p[i,1] <- km.p[i-1,1] - km.p[i-1,2]
km.p[i,3] <- km.p[i-1,3] * (km.p[i,1] - km.p[i,2]) / km.p[i,1]
}
round(km.p, 2)
## nj dj shat.y
## 0-<1 17 0 1.00
## 1-<4 17 2 0.88
## 4-<5 15 1 0.82
## 5-<16 14 1 0.76
## 16-<22 13 1 0.71
## 22-<26 12 1 0.65
## 26-<39 11 1 0.59
## 39-<56 10 1 0.53
## 56-<65 9 1 0.47
## 65-<100 8 2 0.35
## 100-<108 6 1 0.29
## 108-<121 5 1 0.24
## 121-<134 4 1 0.18
## 134-<143 3 1 0.12
## 143-<156 2 1 0.06
## >=156 1 1 0.00
t4.p.st
## [1] 1 1 4 5 16 22 26 39 56 65 65 100 108 121 134 143 156
the empirical survival functions for AG negative
t4.n.st <- sort(t4.n[,1])
t4.n.st
## [1] 2 3 3 3 4 4 4 7 8 16 17 22 30 43 56 65
uni <- c(0, unique(t4.n.st))
km.n <- matrix(0, length(uni), 3)
rownames(km.n) <- c("0-<2", "2-<3", "3-<4", "4-<7", "7-<8", "8-<16",
"16-<17", "17-<22", "22-<30", "30-<43", "43-<56",
"56-<65", " >=65")
colnames(km.n) <- c("nj", "dj","shat.y")
km.n
## nj dj shat.y
## 0-<2 0 0 0
## 2-<3 0 0 0
## 3-<4 0 0 0
## 4-<7 0 0 0
## 7-<8 0 0 0
## 8-<16 0 0 0
## 16-<17 0 0 0
## 17-<22 0 0 0
## 22-<30 0 0 0
## 30-<43 0 0 0
## 43-<56 0 0 0
## 56-<65 0 0 0
## >=65 0 0 0
n <- length(t4.n.st)
uni <- c(0, unique(t4.n.st))
km.n[1,1] <- n
km.n[1,2] <- 0
km.n[1,3] <- 1
for (i in 2:length(uni)){
km.n[i,2] <- sum(uni[i] == t4.n.st)
km.n[i,1] <- km.n[i-1,1] - km.n[i-1,2]
km.n[i,3] <- km.n[i-1,3] * (km.n[i,1] - km.n[i,2]) / km.n[i,1]
}
round(km.n, 2)
## nj dj shat.y
## 0-<2 16 0 1.00
## 2-<3 16 1 0.94
## 3-<4 15 3 0.75
## 4-<7 12 3 0.56
## 7-<8 9 1 0.50
## 8-<16 8 1 0.44
## 16-<17 7 1 0.38
## 17-<22 6 1 0.31
## 22-<30 5 1 0.25
## 30-<43 4 1 0.19
## 43-<56 3 1 0.12
## 56-<65 2 1 0.06
## >=65 1 1 0.00
t4.n.st
## [1] 2 3 3 3 4 4 4 7 8 16 17 22 30 43 56 65
Using the function “survfit” KM survival function for both groups
library(survival)
censor <- c(rep(1, nrow(t4)))
data <- cbind(t4, censor)
km.sur = survfit(Surv(survival.time, censor) ~ 1, data=data, type="kaplan-meier")
summary(km.sur)
## Call: survfit(formula = Surv(survival.time, censor) ~ 1, data = data,
## type = "kaplan-meier")
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 33 2 0.9394 0.0415 0.8614 1.000
## 2 31 1 0.9091 0.0500 0.8161 1.000
## 3 30 3 0.8182 0.0671 0.6966 0.961
## 4 27 4 0.6970 0.0800 0.5566 0.873
## 5 23 1 0.6667 0.0821 0.5238 0.849
## 7 22 1 0.6364 0.0837 0.4917 0.824
## 8 21 1 0.6061 0.0851 0.4603 0.798
## 16 20 2 0.5455 0.0867 0.3995 0.745
## 17 18 1 0.5152 0.0870 0.3700 0.717
## 22 17 2 0.4545 0.0867 0.3128 0.661
## 26 15 1 0.4242 0.0860 0.2851 0.631
## 30 14 1 0.3939 0.0851 0.2580 0.601
## 39 13 1 0.3636 0.0837 0.2316 0.571
## 43 12 1 0.3333 0.0821 0.2057 0.540
## 56 11 2 0.2727 0.0775 0.1562 0.476
## 65 9 3 0.1818 0.0671 0.0882 0.375
## 100 6 1 0.1515 0.0624 0.0676 0.340
## 108 5 1 0.1212 0.0568 0.0484 0.304
## 121 4 1 0.0909 0.0500 0.0309 0.267
## 134 3 1 0.0606 0.0415 0.0158 0.232
## 143 2 1 0.0303 0.0298 0.0044 0.209
## 156 1 1 0.0000 NaN NA NA
plot(km.sur)
##AG positive
data1=data[AG==1,][,c(1,4)]
km.sur1 = survfit(Surv(survival.time, censor) ~ 1, data=data1, type="kaplan-meier")
summary(km.sur1)
## Call: survfit(formula = Surv(survival.time, censor) ~ 1, data = data1,
## type = "kaplan-meier")
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 17 2 0.8824 0.0781 0.74175 1.000
## 4 15 1 0.8235 0.0925 0.66087 1.000
## 5 14 1 0.7647 0.1029 0.58746 0.995
## 16 13 1 0.7059 0.1105 0.51936 0.959
## 22 12 1 0.6471 0.1159 0.45548 0.919
## 26 11 1 0.5882 0.1194 0.39521 0.876
## 39 10 1 0.5294 0.1211 0.33818 0.829
## 56 9 1 0.4706 0.1211 0.28423 0.779
## 65 8 2 0.3529 0.1159 0.18543 0.672
## 100 6 1 0.2941 0.1105 0.14083 0.614
## 108 5 1 0.2353 0.1029 0.09987 0.554
## 121 4 1 0.1765 0.0925 0.06320 0.493
## 134 3 1 0.1176 0.0781 0.03200 0.432
## 143 2 1 0.0588 0.0571 0.00879 0.394
## 156 1 1 0.0000 NaN NA NA
plot(km.sur1)
##
data2=data[AG==0,][,c(1,4)]
km.sur2 = survfit(Surv(survival.time, censor) ~ 1, data=data2, type="kaplan-meier")
summary(km.sur2)
## Call: survfit(formula = Surv(survival.time, censor) ~ 1, data = data2,
## type = "kaplan-meier")
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 2 16 1 0.9375 0.0605 0.82609 1.000
## 3 15 3 0.7500 0.1083 0.56520 0.995
## 4 12 3 0.5625 0.1240 0.36513 0.867
## 7 9 1 0.5000 0.1250 0.30632 0.816
## 8 8 1 0.4375 0.1240 0.25101 0.763
## 16 7 1 0.3750 0.1210 0.19921 0.706
## 17 6 1 0.3125 0.1159 0.15108 0.646
## 22 5 1 0.2500 0.1083 0.10699 0.584
## 30 4 1 0.1875 0.0976 0.06761 0.520
## 43 3 1 0.1250 0.0827 0.03419 0.457
## 56 2 1 0.0625 0.0605 0.00937 0.417
## 65 1 1 0.0000 NaN NA NA
plot(km.sur2)
op <- par(mfrow = c(1, 2), pty = “s”) # 2 x 3 pictures on one plot plot the empirical survival functions for AG positive
uni.p <- c(0, unique(t4.p.st))
plot(uni.p, c(km.p[,3]), type = 's', main = "plot S.hat for AG positive")
the empirical survival functions for AG negative
uni.n <- c(0, unique(t4.n.st))
plot(uni.n, c(km.n[,3]), type = 's', main = "plot S.hat for AG negative")
At end of plotting, reset to previous settings:
op <- par(mfrow = c(1, 2), pty = "s") # 2 x 3 pictures on one plot
#par(op)
plot the cumulative hazard function for AG positive
H.p <- - log(c(km.p[,3]))
H.p
## 0-<1 1-<4 4-<5 5-<16 16-<22 22-<26 26-<39
## 0.0000000 0.1251631 0.1941560 0.2682640 0.3483067 0.4353181 0.5306283
## 39-<56 56-<65 65-<100 100-<108 108-<121 121-<134 134-<143
## 0.6359888 0.7537718 1.0414539 1.2237754 1.4469190 1.7346011 2.1400662
## 143-<156 >=156
## 2.8332133 Inf
plot(c(unique(t4.p.st), 157), H.p, main = "cumulative hazard function for AG positive")
plot the cumulative hazard function for AG negative
H.n <- - log(c(km.n[,3]))
H.n
## 0-<2 2-<3 3-<4 4-<7 7-<8 8-<16
## 0.00000000 0.06453852 0.28768207 0.57536414 0.69314718 0.82667857
## 16-<17 17-<22 22-<30 30-<43 43-<56 56-<65
## 0.98082925 1.16315081 1.38629436 1.67397643 2.07944154 2.77258872
## >=65
## Inf
plot(c(unique(t4.n.st), 66), H.n, main = "cumulative hazard function for AG negative")
#par(op)
plots suggest that both the Weibull and exponential distributions are appropriate.
censor <- c(rep(1, nrow(t4)))
data <- cbind(t4, censor)
km.sur = survfit(Surv(survival.time, censor) ~ 1, data=data, type="kaplan-meier")
summary(km.sur)
## Call: survfit(formula = Surv(survival.time, censor) ~ 1, data = data,
## type = "kaplan-meier")
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 33 2 0.9394 0.0415 0.8614 1.000
## 2 31 1 0.9091 0.0500 0.8161 1.000
## 3 30 3 0.8182 0.0671 0.6966 0.961
## 4 27 4 0.6970 0.0800 0.5566 0.873
## 5 23 1 0.6667 0.0821 0.5238 0.849
## 7 22 1 0.6364 0.0837 0.4917 0.824
## 8 21 1 0.6061 0.0851 0.4603 0.798
## 16 20 2 0.5455 0.0867 0.3995 0.745
## 17 18 1 0.5152 0.0870 0.3700 0.717
## 22 17 2 0.4545 0.0867 0.3128 0.661
## 26 15 1 0.4242 0.0860 0.2851 0.631
## 30 14 1 0.3939 0.0851 0.2580 0.601
## 39 13 1 0.3636 0.0837 0.2316 0.571
## 43 12 1 0.3333 0.0821 0.2057 0.540
## 56 11 2 0.2727 0.0775 0.1562 0.476
## 65 9 3 0.1818 0.0671 0.0882 0.375
## 100 6 1 0.1515 0.0624 0.0676 0.340
## 108 5 1 0.1212 0.0568 0.0484 0.304
## 121 4 1 0.0909 0.0500 0.0309 0.267
## 134 3 1 0.0606 0.0415 0.0158 0.232
## 143 2 1 0.0303 0.0298 0.0044 0.209
## 156 1 1 0.0000 NaN NA NA
plot(km.sur)
res.t4.w <- survreg(Surv(survival.time, censor) ~ AG + log(WBC), dist = "weibull", data = data)
res.t4.w
## Call:
## survreg(formula = Surv(survival.time, censor) ~ AG + log(WBC),
## data = data, dist = "weibull")
##
## Coefficients:
## (Intercept) AG log(WBC)
## 3.7087230 1.0206092 -0.3103361
##
## Scale= 1.040689
##
## Loglik(model)= -146.5 Loglik(intercept only)= -153.6
## Chisq= 14.18 on 2 degrees of freedom, p= 0.00084
## n= 33
summary(res.t4.w)
##
## Call:
## survreg(formula = Surv(survival.time, censor) ~ AG + log(WBC),
## data = data, dist = "weibull")
## Value Std. Error z p
## (Intercept) 3.7087 0.473 7.846 4.29e-15
## AG 1.0206 0.378 2.699 6.95e-03
## log(WBC) -0.3103 0.131 -2.363 1.81e-02
## Log(scale) 0.0399 0.139 0.287 7.74e-01
##
## Scale= 1.04
##
## Weibull distribution
## Loglik(model)= -146.5 Loglik(intercept only)= -153.6
## Chisq= 14.18 on 2 degrees of freedom, p= 0.00084
## Number of Newton-Raphson Iterations: 6
## n= 33
?survreg
lambda <- 1 / res.t4.w$scale
lambda
## [1] 0.9609015
res.t4.e <- survreg(Surv(survival.time, censor) ~ AG + log(WBC), dist = "exponential", data = data)
res.t4.e
## Call:
## survreg(formula = Surv(survival.time, censor) ~ AG + log(WBC),
## data = data, dist = "exponential")
##
## Coefficients:
## (Intercept) AG log(WBC)
## 3.7127119 1.0176268 -0.3044061
##
## Scale fixed at 1
##
## Loglik(model)= -146.5 Loglik(intercept only)= -155.5
## Chisq= 17.82 on 2 degrees of freedom, p= 0.00014
## n= 33
summary(res.t4.e)
##
## Call:
## survreg(formula = Surv(survival.time, censor) ~ AG + log(WBC),
## data = data, dist = "exponential")
## Value Std. Error z p
## (Intercept) 3.713 0.454 8.17 2.96e-16
## AG 1.018 0.364 2.80 5.14e-03
## log(WBC) -0.304 0.124 -2.45 1.44e-02
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -146.5 Loglik(intercept only)= -155.5
## Chisq= 17.82 on 2 degrees of freedom, p= 0.00014
## Number of Newton-Raphson Iterations: 5
## n= 33
res.t4.w
## Call:
## survreg(formula = Surv(survival.time, censor) ~ AG + log(WBC),
## data = data, dist = "weibull")
##
## Coefficients:
## (Intercept) AG log(WBC)
## 3.7087230 1.0206092 -0.3103361
##
## Scale= 1.040689
##
## Loglik(model)= -146.5 Loglik(intercept only)= -153.6
## Chisq= 14.18 on 2 degrees of freedom, p= 0.00084
## n= 33
rr <- residuals(res.t4.w)
rr
## 1 2 3 4 5 6
## -22.4310433 32.2070689 27.9996267 49.8330367 -48.9283640 53.4228328
## 7 8 9 10 11 12
## 65.5901731 -42.9970051 -28.0864265 81.1045882 -0.4840969 -12.6207355
## 13 14 15 16 17 18
## -15.5614943 -26.1175679 -26.1175679 -28.2188917 37.8824321 30.2373017
## 19 20 21 22 23 24
## 35.9858411 -9.5360936 -28.9774557 -4.6320345 -2.3169441 -16.9683351
## 25 26 27 28 29 30
## -12.3619312 -12.6714868 -11.5068325 -6.0557688 -10.8443322 -12.8615488
## 31 32 33
## 19.4858104 -5.7725027 33.2274973
boxplot(rr, names = c("residuls"))
hist(rr)
t4.p.st
## [1] 1 1 4 5 16 22 26 39 56 65 65 100 108 121 134 143 156
t4.n.st
## [1] 2 3 3 3 4 4 4 7 8 16 17 22 30 43 56 65
boxplot(t4.p.st, t4.n.st, names = c("AG positive", "AG negative"))
Survival.time <- t4[,1]
White.blood.cell.count <- t4[,2]
plot(Survival.time, White.blood.cell.count)
h = function(y, theta, lambda){
(exp(theta) * lambda * y ^ (lambda - 1)) / (1 + exp(theta) * y ^ (lambda))
}
op <- par(mfrow = c(2, 3), pty = "s") # 2 x 3 pictures on one plot
# lambda = 1 and theta = -5
y <- seq(0, 1000, length = 10000)
plot(y, h(y, lambda = 1, theta = - 5), type = "l", main = "lambda=1 & theta=-5")
# lambda = 1 and theta = -2
y <- seq(0, 100, length = 10000)
plot(y, h(y, lambda = 1, theta = - 2), type = "l", main = "lambda=1 & theta=-2")
# lambda = 1 and theta = 1/2
y <- seq(0, 10, length = 10000)
plot(y, h(y, lambda = 1, theta = 1 / 2), type = "l", main = "lambda=1 & theta=1/2")
# lambda = 5 and theta = -5
y <- seq(0, 50, length = 10000)
plot(y, h(y, lambda = 5, theta = - 5), type = "l", main = "lambda=5 & theta=-5")
# lambda = 5 and theta = -2
y <- seq(0, 50, length = 10000)
plot(y, h(y, lambda = 5, theta = - 2), type = "l", main = "lambda=5 & theta=-2")
# lambda = 5 and theta = 1/2
y <- seq(0, 50, length = 10000)
plot(y, h(y, lambda = 5, theta = - 2), type = "l", main = "lambda=5 & theta=1/2")
## At end of plotting, reset to previous settings:
par(op)