Chapter 10

10.1

t4 <- read.table("Exercise10.1-data.txt", header = TRUE)
#attach(t4)
AG <- c(rep(1,17), rep(0,16))
t4 <- cbind(t4[,c(1,2)], AG)
t4
##    survival.time    WBC AG
## 1             65   2.30  1
## 2            156   0.75  1
## 3            100   4.30  1
## 4            134   2.60  1
## 5             16   6.00  1
## 6            108  10.50  1
## 7            121  10.00  1
## 8              4  17.00  1
## 9             39   5.40  1
## 10           143   7.00  1
## 11            56   9.40  1
## 12            26  32.00  1
## 13            22  35.00  1
## 14             1 100.00  1
## 15             1 100.00  1
## 16             5  52.00  1
## 17            65 100.00  1
## 18            56   4.40  0
## 19            65   3.00  0
## 20            17   4.00  0
## 21             7   1.50  0
## 22            16   9.00  0
## 23            22   5.30  0
## 24             3  10.00  0
## 25             4  19.00  0
## 26             2  27.00  0
## 27             3  28.00  0
## 28             8  31.00  0
## 29             4  26.00  0
## 30             3  21.00  0
## 31            30  79.00  0
## 32             4 100.00  0
## 33            43 100.00  0
t4.p <- t4[AG == 1,][,c(1,2)]
t4.n <- t4[AG == 0,][,c(1,2)]
t4.p # AG positive
##    survival.time    WBC
## 1             65   2.30
## 2            156   0.75
## 3            100   4.30
## 4            134   2.60
## 5             16   6.00
## 6            108  10.50
## 7            121  10.00
## 8              4  17.00
## 9             39   5.40
## 10           143   7.00
## 11            56   9.40
## 12            26  32.00
## 13            22  35.00
## 14             1 100.00
## 15             1 100.00
## 16             5  52.00
## 17            65 100.00
t4.n # AG negative
##    survival.time   WBC
## 18            56   4.4
## 19            65   3.0
## 20            17   4.0
## 21             7   1.5
## 22            16   9.0
## 23            22   5.3
## 24             3  10.0
## 25             4  19.0
## 26             2  27.0
## 27             3  28.0
## 28             8  31.0
## 29             4  26.0
## 30             3  21.0
## 31            30  79.0
## 32             4 100.0
## 33            43 100.0

a.

The empirical survival functions for AG positive:

t4.p.st <- sort(t4.p[,1])
t4.p.st
##  [1]   1   1   4   5  16  22  26  39  56  65  65 100 108 121 134 143 156
uni <- c(0, unique(t4.p.st))
uni
##  [1]   0   1   4   5  16  22  26  39  56  65 100 108 121 134 143 156
km.p <- matrix(0,length(uni),3)
rownames(km.p) <- c("0-<1", "1-<4", "4-<5", "5-<16", "16-<22", "22-<26", 
                  "26-<39", "39-<56", "56-<65", "65-<100", "100-<108", 
                  "108-<121", "121-<134", "134-<143", "143-<156", "   >=156")
colnames(km.p) <- c("nj", "dj","shat.y")
km.p
##          nj dj shat.y
## 0-<1      0  0      0
## 1-<4      0  0      0
## 4-<5      0  0      0
## 5-<16     0  0      0
## 16-<22    0  0      0
## 22-<26    0  0      0
## 26-<39    0  0      0
## 39-<56    0  0      0
## 56-<65    0  0      0
## 65-<100   0  0      0
## 100-<108  0  0      0
## 108-<121  0  0      0
## 121-<134  0  0      0
## 134-<143  0  0      0
## 143-<156  0  0      0
##    >=156  0  0      0
n <- length(t4.p.st)
uni <- c(0, unique(t4.p.st))

km.p[1,1] <- n
km.p[1,2] <- 0
km.p[1,3] <- 1
for (i in 2:length(uni)){
  km.p[i,2] <- sum(uni[i] == t4.p.st)
  km.p[i,1] <- km.p[i-1,1] - km.p[i-1,2]
  km.p[i,3] <- km.p[i-1,3] * (km.p[i,1] - km.p[i,2]) / km.p[i,1]
}
round(km.p, 2)
##          nj dj shat.y
## 0-<1     17  0   1.00
## 1-<4     17  2   0.88
## 4-<5     15  1   0.82
## 5-<16    14  1   0.76
## 16-<22   13  1   0.71
## 22-<26   12  1   0.65
## 26-<39   11  1   0.59
## 39-<56   10  1   0.53
## 56-<65    9  1   0.47
## 65-<100   8  2   0.35
## 100-<108  6  1   0.29
## 108-<121  5  1   0.24
## 121-<134  4  1   0.18
## 134-<143  3  1   0.12
## 143-<156  2  1   0.06
##    >=156  1  1   0.00
t4.p.st
##  [1]   1   1   4   5  16  22  26  39  56  65  65 100 108 121 134 143 156

the empirical survival functions for AG negative

t4.n.st <- sort(t4.n[,1])
t4.n.st
##  [1]  2  3  3  3  4  4  4  7  8 16 17 22 30 43 56 65
uni <- c(0, unique(t4.n.st))

km.n <- matrix(0, length(uni), 3)
rownames(km.n) <- c("0-<2", "2-<3", "3-<4", "4-<7", "7-<8", "8-<16", 
                  "16-<17", "17-<22", "22-<30", "30-<43", "43-<56", 
                  "56-<65", " >=65")
colnames(km.n) <- c("nj", "dj","shat.y")
km.n
##        nj dj shat.y
## 0-<2    0  0      0
## 2-<3    0  0      0
## 3-<4    0  0      0
## 4-<7    0  0      0
## 7-<8    0  0      0
## 8-<16   0  0      0
## 16-<17  0  0      0
## 17-<22  0  0      0
## 22-<30  0  0      0
## 30-<43  0  0      0
## 43-<56  0  0      0
## 56-<65  0  0      0
##  >=65   0  0      0
n <- length(t4.n.st)
uni <- c(0, unique(t4.n.st))

km.n[1,1] <- n
km.n[1,2] <- 0
km.n[1,3] <- 1
for (i in 2:length(uni)){
  km.n[i,2] <- sum(uni[i] == t4.n.st)
  km.n[i,1] <- km.n[i-1,1] - km.n[i-1,2]
  km.n[i,3] <- km.n[i-1,3] * (km.n[i,1] - km.n[i,2]) / km.n[i,1]
}
round(km.n, 2)
##        nj dj shat.y
## 0-<2   16  0   1.00
## 2-<3   16  1   0.94
## 3-<4   15  3   0.75
## 4-<7   12  3   0.56
## 7-<8    9  1   0.50
## 8-<16   8  1   0.44
## 16-<17  7  1   0.38
## 17-<22  6  1   0.31
## 22-<30  5  1   0.25
## 30-<43  4  1   0.19
## 43-<56  3  1   0.12
## 56-<65  2  1   0.06
##  >=65   1  1   0.00
t4.n.st
##  [1]  2  3  3  3  4  4  4  7  8 16 17 22 30 43 56 65

Using the function “survfit” KM survival function for both groups

library(survival)
censor <- c(rep(1, nrow(t4)))
data <- cbind(t4, censor)

km.sur = survfit(Surv(survival.time, censor) ~ 1, data=data, type="kaplan-meier")
summary(km.sur)
## Call: survfit(formula = Surv(survival.time, censor) ~ 1, data = data, 
##     type = "kaplan-meier")
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1     33       2   0.9394  0.0415       0.8614        1.000
##     2     31       1   0.9091  0.0500       0.8161        1.000
##     3     30       3   0.8182  0.0671       0.6966        0.961
##     4     27       4   0.6970  0.0800       0.5566        0.873
##     5     23       1   0.6667  0.0821       0.5238        0.849
##     7     22       1   0.6364  0.0837       0.4917        0.824
##     8     21       1   0.6061  0.0851       0.4603        0.798
##    16     20       2   0.5455  0.0867       0.3995        0.745
##    17     18       1   0.5152  0.0870       0.3700        0.717
##    22     17       2   0.4545  0.0867       0.3128        0.661
##    26     15       1   0.4242  0.0860       0.2851        0.631
##    30     14       1   0.3939  0.0851       0.2580        0.601
##    39     13       1   0.3636  0.0837       0.2316        0.571
##    43     12       1   0.3333  0.0821       0.2057        0.540
##    56     11       2   0.2727  0.0775       0.1562        0.476
##    65      9       3   0.1818  0.0671       0.0882        0.375
##   100      6       1   0.1515  0.0624       0.0676        0.340
##   108      5       1   0.1212  0.0568       0.0484        0.304
##   121      4       1   0.0909  0.0500       0.0309        0.267
##   134      3       1   0.0606  0.0415       0.0158        0.232
##   143      2       1   0.0303  0.0298       0.0044        0.209
##   156      1       1   0.0000     NaN           NA           NA
plot(km.sur)       

 ##AG   positive
data1=data[AG==1,][,c(1,4)]
km.sur1 = survfit(Surv(survival.time, censor) ~ 1, data=data1, type="kaplan-meier")
summary(km.sur1)
## Call: survfit(formula = Surv(survival.time, censor) ~ 1, data = data1, 
##     type = "kaplan-meier")
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1     17       2   0.8824  0.0781      0.74175        1.000
##     4     15       1   0.8235  0.0925      0.66087        1.000
##     5     14       1   0.7647  0.1029      0.58746        0.995
##    16     13       1   0.7059  0.1105      0.51936        0.959
##    22     12       1   0.6471  0.1159      0.45548        0.919
##    26     11       1   0.5882  0.1194      0.39521        0.876
##    39     10       1   0.5294  0.1211      0.33818        0.829
##    56      9       1   0.4706  0.1211      0.28423        0.779
##    65      8       2   0.3529  0.1159      0.18543        0.672
##   100      6       1   0.2941  0.1105      0.14083        0.614
##   108      5       1   0.2353  0.1029      0.09987        0.554
##   121      4       1   0.1765  0.0925      0.06320        0.493
##   134      3       1   0.1176  0.0781      0.03200        0.432
##   143      2       1   0.0588  0.0571      0.00879        0.394
##   156      1       1   0.0000     NaN           NA           NA
plot(km.sur1)

   ##
   data2=data[AG==0,][,c(1,4)]
km.sur2 = survfit(Surv(survival.time, censor) ~ 1, data=data2, type="kaplan-meier")
summary(km.sur2)
## Call: survfit(formula = Surv(survival.time, censor) ~ 1, data = data2, 
##     type = "kaplan-meier")
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     2     16       1   0.9375  0.0605      0.82609        1.000
##     3     15       3   0.7500  0.1083      0.56520        0.995
##     4     12       3   0.5625  0.1240      0.36513        0.867
##     7      9       1   0.5000  0.1250      0.30632        0.816
##     8      8       1   0.4375  0.1240      0.25101        0.763
##    16      7       1   0.3750  0.1210      0.19921        0.706
##    17      6       1   0.3125  0.1159      0.15108        0.646
##    22      5       1   0.2500  0.1083      0.10699        0.584
##    30      4       1   0.1875  0.0976      0.06761        0.520
##    43      3       1   0.1250  0.0827      0.03419        0.457
##    56      2       1   0.0625  0.0605      0.00937        0.417
##    65      1       1   0.0000     NaN           NA           NA
plot(km.sur2)

b.

op <- par(mfrow = c(1, 2), pty = “s”) # 2 x 3 pictures on one plot plot the empirical survival functions for AG positive

uni.p <- c(0, unique(t4.p.st))
plot(uni.p, c(km.p[,3]), type = 's', main = "plot S.hat for AG positive")

the empirical survival functions for AG negative

uni.n <- c(0, unique(t4.n.st))
plot(uni.n, c(km.n[,3]), type = 's', main = "plot S.hat for AG negative")

At end of plotting, reset to previous settings:

op <- par(mfrow = c(1, 2), pty = "s") # 2 x 3 pictures on one plot
#par(op)

plot the cumulative hazard function for AG positive

H.p <- - log(c(km.p[,3])) 
H.p
##      0-<1      1-<4      4-<5     5-<16    16-<22    22-<26    26-<39 
## 0.0000000 0.1251631 0.1941560 0.2682640 0.3483067 0.4353181 0.5306283 
##    39-<56    56-<65   65-<100  100-<108  108-<121  121-<134  134-<143 
## 0.6359888 0.7537718 1.0414539 1.2237754 1.4469190 1.7346011 2.1400662 
##  143-<156     >=156 
## 2.8332133       Inf
plot(c(unique(t4.p.st), 157), H.p, main = "cumulative hazard function for AG positive")

plot the cumulative hazard function for AG negative

H.n <- - log(c(km.n[,3])) 
H.n
##       0-<2       2-<3       3-<4       4-<7       7-<8      8-<16 
## 0.00000000 0.06453852 0.28768207 0.57536414 0.69314718 0.82667857 
##     16-<17     17-<22     22-<30     30-<43     43-<56     56-<65 
## 0.98082925 1.16315081 1.38629436 1.67397643 2.07944154 2.77258872 
##       >=65 
##        Inf
plot(c(unique(t4.n.st), 66), H.n, main = "cumulative hazard function for AG negative")

#par(op) 

plots suggest that both the Weibull and exponential distributions are appropriate.

c.

censor <- c(rep(1, nrow(t4)))
data <- cbind(t4, censor)

km.sur = survfit(Surv(survival.time, censor) ~ 1, data=data, type="kaplan-meier")
summary(km.sur)
## Call: survfit(formula = Surv(survival.time, censor) ~ 1, data = data, 
##     type = "kaplan-meier")
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1     33       2   0.9394  0.0415       0.8614        1.000
##     2     31       1   0.9091  0.0500       0.8161        1.000
##     3     30       3   0.8182  0.0671       0.6966        0.961
##     4     27       4   0.6970  0.0800       0.5566        0.873
##     5     23       1   0.6667  0.0821       0.5238        0.849
##     7     22       1   0.6364  0.0837       0.4917        0.824
##     8     21       1   0.6061  0.0851       0.4603        0.798
##    16     20       2   0.5455  0.0867       0.3995        0.745
##    17     18       1   0.5152  0.0870       0.3700        0.717
##    22     17       2   0.4545  0.0867       0.3128        0.661
##    26     15       1   0.4242  0.0860       0.2851        0.631
##    30     14       1   0.3939  0.0851       0.2580        0.601
##    39     13       1   0.3636  0.0837       0.2316        0.571
##    43     12       1   0.3333  0.0821       0.2057        0.540
##    56     11       2   0.2727  0.0775       0.1562        0.476
##    65      9       3   0.1818  0.0671       0.0882        0.375
##   100      6       1   0.1515  0.0624       0.0676        0.340
##   108      5       1   0.1212  0.0568       0.0484        0.304
##   121      4       1   0.0909  0.0500       0.0309        0.267
##   134      3       1   0.0606  0.0415       0.0158        0.232
##   143      2       1   0.0303  0.0298       0.0044        0.209
##   156      1       1   0.0000     NaN           NA           NA
plot(km.sur)

res.t4.w <- survreg(Surv(survival.time, censor) ~ AG + log(WBC), dist = "weibull", data = data)
res.t4.w
## Call:
## survreg(formula = Surv(survival.time, censor) ~ AG + log(WBC), 
##     data = data, dist = "weibull")
## 
## Coefficients:
## (Intercept)          AG    log(WBC) 
##   3.7087230   1.0206092  -0.3103361 
## 
## Scale= 1.040689 
## 
## Loglik(model)= -146.5   Loglik(intercept only)= -153.6
##  Chisq= 14.18 on 2 degrees of freedom, p= 0.00084 
## n= 33
summary(res.t4.w)
## 
## Call:
## survreg(formula = Surv(survival.time, censor) ~ AG + log(WBC), 
##     data = data, dist = "weibull")
##               Value Std. Error      z        p
## (Intercept)  3.7087      0.473  7.846 4.29e-15
## AG           1.0206      0.378  2.699 6.95e-03
## log(WBC)    -0.3103      0.131 -2.363 1.81e-02
## Log(scale)   0.0399      0.139  0.287 7.74e-01
## 
## Scale= 1.04 
## 
## Weibull distribution
## Loglik(model)= -146.5   Loglik(intercept only)= -153.6
##  Chisq= 14.18 on 2 degrees of freedom, p= 0.00084 
## Number of Newton-Raphson Iterations: 6 
## n= 33
?survreg

lambda <- 1 / res.t4.w$scale
lambda
## [1] 0.9609015
res.t4.e <- survreg(Surv(survival.time, censor) ~ AG + log(WBC), dist = "exponential", data = data)
res.t4.e
## Call:
## survreg(formula = Surv(survival.time, censor) ~ AG + log(WBC), 
##     data = data, dist = "exponential")
## 
## Coefficients:
## (Intercept)          AG    log(WBC) 
##   3.7127119   1.0176268  -0.3044061 
## 
## Scale fixed at 1 
## 
## Loglik(model)= -146.5   Loglik(intercept only)= -155.5
##  Chisq= 17.82 on 2 degrees of freedom, p= 0.00014 
## n= 33
summary(res.t4.e)
## 
## Call:
## survreg(formula = Surv(survival.time, censor) ~ AG + log(WBC), 
##     data = data, dist = "exponential")
##              Value Std. Error     z        p
## (Intercept)  3.713      0.454  8.17 2.96e-16
## AG           1.018      0.364  2.80 5.14e-03
## log(WBC)    -0.304      0.124 -2.45 1.44e-02
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -146.5   Loglik(intercept only)= -155.5
##  Chisq= 17.82 on 2 degrees of freedom, p= 0.00014 
## Number of Newton-Raphson Iterations: 5 
## n= 33
res.t4.w
## Call:
## survreg(formula = Surv(survival.time, censor) ~ AG + log(WBC), 
##     data = data, dist = "weibull")
## 
## Coefficients:
## (Intercept)          AG    log(WBC) 
##   3.7087230   1.0206092  -0.3103361 
## 
## Scale= 1.040689 
## 
## Loglik(model)= -146.5   Loglik(intercept only)= -153.6
##  Chisq= 14.18 on 2 degrees of freedom, p= 0.00084 
## n= 33

d.

rr <- residuals(res.t4.w)
rr
##           1           2           3           4           5           6 
## -22.4310433  32.2070689  27.9996267  49.8330367 -48.9283640  53.4228328 
##           7           8           9          10          11          12 
##  65.5901731 -42.9970051 -28.0864265  81.1045882  -0.4840969 -12.6207355 
##          13          14          15          16          17          18 
## -15.5614943 -26.1175679 -26.1175679 -28.2188917  37.8824321  30.2373017 
##          19          20          21          22          23          24 
##  35.9858411  -9.5360936 -28.9774557  -4.6320345  -2.3169441 -16.9683351 
##          25          26          27          28          29          30 
## -12.3619312 -12.6714868 -11.5068325  -6.0557688 -10.8443322 -12.8615488 
##          31          32          33 
##  19.4858104  -5.7725027  33.2274973
boxplot(rr, names = c("residuls"))

hist(rr)

e.

t4.p.st 
##  [1]   1   1   4   5  16  22  26  39  56  65  65 100 108 121 134 143 156
t4.n.st
##  [1]  2  3  3  3  4  4  4  7  8 16 17 22 30 43 56 65
boxplot(t4.p.st, t4.n.st, names = c("AG positive", "AG negative"))

Survival.time <- t4[,1]
White.blood.cell.count <- t4[,2]

plot(Survival.time, White.blood.cell.count)

10.2

h = function(y, theta, lambda){
   (exp(theta) * lambda * y ^ (lambda - 1)) / (1 + exp(theta) * y ^ (lambda))
   }

op <- par(mfrow = c(2, 3), pty = "s") # 2 x 3 pictures on one plot

# lambda = 1 and theta = -5
y <- seq(0, 1000, length = 10000)
plot(y, h(y, lambda = 1, theta = - 5), type = "l", main = "lambda=1 & theta=-5")

# lambda = 1 and theta = -2
y <- seq(0, 100, length = 10000)
plot(y, h(y, lambda = 1, theta = - 2), type = "l", main = "lambda=1 & theta=-2")

# lambda = 1 and theta = 1/2
y <- seq(0, 10, length = 10000)
plot(y, h(y, lambda = 1, theta = 1 / 2), type = "l", main = "lambda=1 & theta=1/2")

# lambda = 5 and theta = -5
y <- seq(0, 50, length = 10000)
plot(y, h(y, lambda = 5, theta = - 5), type = "l", main = "lambda=5 & theta=-5")

# lambda = 5 and theta = -2
y <- seq(0, 50, length = 10000)
plot(y, h(y, lambda = 5, theta = - 2), type = "l", main = "lambda=5 & theta=-2")

# lambda = 5 and theta = 1/2
y <- seq(0, 50, length = 10000)
plot(y, h(y, lambda = 5, theta = - 2), type = "l", main = "lambda=5 & theta=1/2")

## At end of plotting, reset to previous settings:
par(op)