Survival analysis

Miao Yu

2015/1/6

Concepts

Notation

Cox proportional-hazards regression model

Case: Recidivism

# perform survival analysis
Rossi <- read.table('http://cran.r-project.org/doc/contrib/Fox-Companion/Rossi.txt', header=T)
Rossi[1:5, 1:10]
##   week arrest fin age race wexp mar paro prio educ
## 1   20      1   0  27    1    0   0    1    3    3
## 2   17      1   0  18    1    0   0    1    8    4
## 3   25      1   0  19    0    1   0    1   13    3
## 4   52      0   1  23    1    1   1    1    1    5
## 5   52      0   0  19    0    1   0    1    3    3
mod.allison <- coxph(Surv(week, arrest) ~ fin + age + race + wexp + mar + paro + prio, data=Rossi)
summary(mod.allison)
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + age + race + wexp + 
##     mar + paro + prio, data = Rossi)
## 
##   n= 432, number of events= 114 
## 
##          coef exp(coef) se(coef)      z Pr(>|z|)   
## fin  -0.37942   0.68426  0.19138 -1.983  0.04742 * 
## age  -0.05744   0.94418  0.02200 -2.611  0.00903 **
## race  0.31390   1.36875  0.30799  1.019  0.30812   
## wexp -0.14980   0.86088  0.21222 -0.706  0.48029   
## mar  -0.43370   0.64810  0.38187 -1.136  0.25606   
## paro -0.08487   0.91863  0.19576 -0.434  0.66461   
## prio  0.09150   1.09581  0.02865  3.194  0.00140 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## fin     0.6843     1.4614    0.4702    0.9957
## age     0.9442     1.0591    0.9043    0.9858
## race    1.3688     0.7306    0.7484    2.5032
## wexp    0.8609     1.1616    0.5679    1.3049
## mar     0.6481     1.5430    0.3066    1.3699
## paro    0.9186     1.0886    0.6259    1.3482
## prio    1.0958     0.9126    1.0360    1.1591
## 
## Concordance= 0.64  (se = 0.027 )
## Rsquare= 0.074   (max possible= 0.956 )
## Likelihood ratio test= 33.27  on 7 df,   p=2.362e-05
## Wald test            = 32.11  on 7 df,   p=3.871e-05
## Score (logrank) test = 33.53  on 7 df,   p=2.11e-05
# plot time vs survival prob
plot(survfit(mod.allison), ylim=c(.7, 1), xlab='Weeks', ylab='Proportion Not Rearrested')

result

further

attach(Rossi)
Rossi.fin <- data.frame(fin=c(0,1), age=rep(mean(age),2), race=rep(mean(race),2), wexp=rep(mean(wexp),2), mar=rep(mean(mar),2), paro=rep(mean(paro),2), prio=rep(mean(prio),2))
detach()
plot(survfit(mod.allison, newdata=Rossi.fin), conf.int=T, lty=c(1,2), ylim=c(.6, 1))
legend("bottomleft", legend=c('fin = 0', 'fin = 1'), lty=c(1,2))

Time-Dependent Covariates

sum(!is.na(Rossi[,11:62])) # record count
## [1] 19809
Rossi2 <- matrix(0, 19809, 14) # to hold new data set
colnames(Rossi2) <- c('start', 'stop', 'arresttime', names(Rossi)[1:10], 'employed')

row<-0
for (i in 1:nrow(Rossi)) { 
    for (j in 11:62) { 
        if (is.na(Rossi[i, j])) next
        else {
            row <- row + 1 # increment row counter
            start <- j - 11 # start time (previous week)
            stop <- start + 1 # stop time (current week)
            arresttime <- if (stop == Rossi[i, 1] && Rossi[i, 2] ==1) 1 else 0
            Rossi2[row,] <- c(start, stop, arresttime, unlist(Rossi[i, c(1:10, j)]))
            }
        }
}
Rossi2 <- as.data.frame(Rossi2)
remove(i, j, row, start, stop, arresttime)
modallison2 <- coxph(Surv(start, stop, arresttime) ~ fin + age + race + wexp + mar + paro + prio + employed, data=Rossi2)
summary(modallison2)
## Call:
## coxph(formula = Surv(start, stop, arresttime) ~ fin + age + race + 
##     wexp + mar + paro + prio + employed, data = Rossi2)
## 
##   n= 19809, number of events= 114 
## 
##              coef exp(coef) se(coef)      z Pr(>|z|)    
## fin      -0.35672   0.69997  0.19113 -1.866  0.06198 .  
## age      -0.04634   0.95472  0.02174 -2.132  0.03301 *  
## race      0.33866   1.40306  0.30960  1.094  0.27402    
## wexp     -0.02555   0.97477  0.21142 -0.121  0.90380    
## mar      -0.29375   0.74546  0.38303 -0.767  0.44314    
## paro     -0.06421   0.93781  0.19468 -0.330  0.74156    
## prio      0.08514   1.08887  0.02896  2.940  0.00328 ** 
## employed -1.32832   0.26492  0.25072 -5.298 1.17e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## fin         0.7000     1.4286    0.4813    1.0180
## age         0.9547     1.0474    0.9149    0.9963
## race        1.4031     0.7127    0.7648    2.5740
## wexp        0.9748     1.0259    0.6441    1.4753
## mar         0.7455     1.3414    0.3519    1.5793
## paro        0.9378     1.0663    0.6403    1.3735
## prio        1.0889     0.9184    1.0288    1.1525
## employed    0.2649     3.7747    0.1621    0.4330
## 
## Concordance= 0.708  (se = 0.027 )
## Rsquare= 0.003   (max possible= 0.066 )
## Likelihood ratio test= 68.65  on 8 df,   p=9.114e-12
## Wald test            = 56.15  on 8 df,   p=2.632e-09
## Score (logrank) test = 64.48  on 8 df,   p=6.102e-11

Model Diagnostics

modallison3 <- coxph(Surv(week, arrest) ~ fin + age + prio, data=Rossi)
modallison3
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + age + prio, data = Rossi)
## 
## 
##         coef exp(coef) se(coef)     z       p
## fin  -0.3470     0.707   0.1902 -1.82 0.06800
## age  -0.0671     0.935   0.0209 -3.22 0.00130
## prio  0.0969     1.102   0.0273  3.56 0.00038
## 
## Likelihood ratio test=29.1  on 3 df, p=2.19e-06  n= 432, number of events= 114
cox.zph(modallison3)
##             rho   chisq      p
## fin    -0.00657 0.00507 0.9433
## age    -0.20976 6.54147 0.0105
## prio   -0.08004 0.77288 0.3793
## GLOBAL       NA 7.13046 0.0679
par(mfrow=c(2,2))
plot(cox.zph(modallison3))

modallison4 <- coxph(Surv(start,stop,arresttime)~fin+age+age:stop:stop+prio, data = Rossi2)
modallison4
## Call:
## coxph(formula = Surv(start, stop, arresttime) ~ fin + age + age:stop:stop + 
##     prio, data = Rossi2)
## 
## 
##              coef exp(coef) se(coef)      z       p
## fin      -0.34856     0.706  0.19023 -1.832 0.06700
## age       0.03228     1.033  0.03943  0.819 0.41000
## prio      0.09818     1.103  0.02726  3.602 0.00032
## age:stop -0.00383     0.996  0.00147 -2.612 0.00900
## 
## Likelihood ratio test=36  on 4 df, p=2.85e-07  n= 19809, number of events= 114
Rossi$agecat <- recode(Rossi$age, " lo:19=1; 20:25=2; 26:30=3; 31:hi=4 ")
table(Rossi$agecat)
## 
##   1   2   3   4 
##  66 236  66  64
modallison5 <- coxph(Surv(week,arrest)~fin+prio+strata(agecat),data = Rossi)
modallison5
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + prio + strata(agecat), 
##     data = Rossi)
## 
## 
##         coef exp(coef) se(coef)     z      p
## fin  -0.3408     0.711    0.190 -1.79 0.0730
## prio  0.0941     1.099    0.027  3.48 0.0005
## 
## Likelihood ratio test=13.4  on 2 df, p=0.00122  n= 432, number of events= 114
cox.zph(modallison5)
##            rho  chisq     p
## fin    -0.0183 0.0392 0.843
## prio   -0.0771 0.6859 0.408
## GLOBAL      NA 0.7299 0.694

Reference