Miao Yu
2015/1/6
# perform survival analysis
Rossi <- read.table('http://cran.r-project.org/doc/contrib/Fox-Companion/Rossi.txt', header=T)
Rossi[1:5, 1:10]
## week arrest fin age race wexp mar paro prio educ
## 1 20 1 0 27 1 0 0 1 3 3
## 2 17 1 0 18 1 0 0 1 8 4
## 3 25 1 0 19 0 1 0 1 13 3
## 4 52 0 1 23 1 1 1 1 1 5
## 5 52 0 0 19 0 1 0 1 3 3
mod.allison <- coxph(Surv(week, arrest) ~ fin + age + race + wexp + mar + paro + prio, data=Rossi)
summary(mod.allison)
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + age + race + wexp +
## mar + paro + prio, data = Rossi)
##
## n= 432, number of events= 114
##
## coef exp(coef) se(coef) z Pr(>|z|)
## fin -0.37942 0.68426 0.19138 -1.983 0.04742 *
## age -0.05744 0.94418 0.02200 -2.611 0.00903 **
## race 0.31390 1.36875 0.30799 1.019 0.30812
## wexp -0.14980 0.86088 0.21222 -0.706 0.48029
## mar -0.43370 0.64810 0.38187 -1.136 0.25606
## paro -0.08487 0.91863 0.19576 -0.434 0.66461
## prio 0.09150 1.09581 0.02865 3.194 0.00140 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## fin 0.6843 1.4614 0.4702 0.9957
## age 0.9442 1.0591 0.9043 0.9858
## race 1.3688 0.7306 0.7484 2.5032
## wexp 0.8609 1.1616 0.5679 1.3049
## mar 0.6481 1.5430 0.3066 1.3699
## paro 0.9186 1.0886 0.6259 1.3482
## prio 1.0958 0.9126 1.0360 1.1591
##
## Concordance= 0.64 (se = 0.027 )
## Rsquare= 0.074 (max possible= 0.956 )
## Likelihood ratio test= 33.27 on 7 df, p=2.362e-05
## Wald test = 32.11 on 7 df, p=3.871e-05
## Score (logrank) test = 33.53 on 7 df, p=2.11e-05
# plot time vs survival prob
plot(survfit(mod.allison), ylim=c(.7, 1), xlab='Weeks', ylab='Proportion Not Rearrested')
attach(Rossi)
Rossi.fin <- data.frame(fin=c(0,1), age=rep(mean(age),2), race=rep(mean(race),2), wexp=rep(mean(wexp),2), mar=rep(mean(mar),2), paro=rep(mean(paro),2), prio=rep(mean(prio),2))
detach()
plot(survfit(mod.allison, newdata=Rossi.fin), conf.int=T, lty=c(1,2), ylim=c(.6, 1))
legend("bottomleft", legend=c('fin = 0', 'fin = 1'), lty=c(1,2))
sum(!is.na(Rossi[,11:62])) # record count
## [1] 19809
Rossi2 <- matrix(0, 19809, 14) # to hold new data set
colnames(Rossi2) <- c('start', 'stop', 'arresttime', names(Rossi)[1:10], 'employed')
row<-0
for (i in 1:nrow(Rossi)) {
for (j in 11:62) {
if (is.na(Rossi[i, j])) next
else {
row <- row + 1 # increment row counter
start <- j - 11 # start time (previous week)
stop <- start + 1 # stop time (current week)
arresttime <- if (stop == Rossi[i, 1] && Rossi[i, 2] ==1) 1 else 0
Rossi2[row,] <- c(start, stop, arresttime, unlist(Rossi[i, c(1:10, j)]))
}
}
}
Rossi2 <- as.data.frame(Rossi2)
remove(i, j, row, start, stop, arresttime)
modallison2 <- coxph(Surv(start, stop, arresttime) ~ fin + age + race + wexp + mar + paro + prio + employed, data=Rossi2)
summary(modallison2)
## Call:
## coxph(formula = Surv(start, stop, arresttime) ~ fin + age + race +
## wexp + mar + paro + prio + employed, data = Rossi2)
##
## n= 19809, number of events= 114
##
## coef exp(coef) se(coef) z Pr(>|z|)
## fin -0.35672 0.69997 0.19113 -1.866 0.06198 .
## age -0.04634 0.95472 0.02174 -2.132 0.03301 *
## race 0.33866 1.40306 0.30960 1.094 0.27402
## wexp -0.02555 0.97477 0.21142 -0.121 0.90380
## mar -0.29375 0.74546 0.38303 -0.767 0.44314
## paro -0.06421 0.93781 0.19468 -0.330 0.74156
## prio 0.08514 1.08887 0.02896 2.940 0.00328 **
## employed -1.32832 0.26492 0.25072 -5.298 1.17e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## fin 0.7000 1.4286 0.4813 1.0180
## age 0.9547 1.0474 0.9149 0.9963
## race 1.4031 0.7127 0.7648 2.5740
## wexp 0.9748 1.0259 0.6441 1.4753
## mar 0.7455 1.3414 0.3519 1.5793
## paro 0.9378 1.0663 0.6403 1.3735
## prio 1.0889 0.9184 1.0288 1.1525
## employed 0.2649 3.7747 0.1621 0.4330
##
## Concordance= 0.708 (se = 0.027 )
## Rsquare= 0.003 (max possible= 0.066 )
## Likelihood ratio test= 68.65 on 8 df, p=9.114e-12
## Wald test = 56.15 on 8 df, p=2.632e-09
## Score (logrank) test = 64.48 on 8 df, p=6.102e-11
modallison3 <- coxph(Surv(week, arrest) ~ fin + age + prio, data=Rossi)
modallison3
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + age + prio, data = Rossi)
##
##
## coef exp(coef) se(coef) z p
## fin -0.3470 0.707 0.1902 -1.82 0.06800
## age -0.0671 0.935 0.0209 -3.22 0.00130
## prio 0.0969 1.102 0.0273 3.56 0.00038
##
## Likelihood ratio test=29.1 on 3 df, p=2.19e-06 n= 432, number of events= 114
cox.zph(modallison3)
## rho chisq p
## fin -0.00657 0.00507 0.9433
## age -0.20976 6.54147 0.0105
## prio -0.08004 0.77288 0.3793
## GLOBAL NA 7.13046 0.0679
par(mfrow=c(2,2))
plot(cox.zph(modallison3))
modallison4 <- coxph(Surv(start,stop,arresttime)~fin+age+age:stop:stop+prio, data = Rossi2)
modallison4
## Call:
## coxph(formula = Surv(start, stop, arresttime) ~ fin + age + age:stop:stop +
## prio, data = Rossi2)
##
##
## coef exp(coef) se(coef) z p
## fin -0.34856 0.706 0.19023 -1.832 0.06700
## age 0.03228 1.033 0.03943 0.819 0.41000
## prio 0.09818 1.103 0.02726 3.602 0.00032
## age:stop -0.00383 0.996 0.00147 -2.612 0.00900
##
## Likelihood ratio test=36 on 4 df, p=2.85e-07 n= 19809, number of events= 114
Rossi$agecat <- recode(Rossi$age, " lo:19=1; 20:25=2; 26:30=3; 31:hi=4 ")
table(Rossi$agecat)
##
## 1 2 3 4
## 66 236 66 64
modallison5 <- coxph(Surv(week,arrest)~fin+prio+strata(agecat),data = Rossi)
modallison5
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + prio + strata(agecat),
## data = Rossi)
##
##
## coef exp(coef) se(coef) z p
## fin -0.3408 0.711 0.190 -1.79 0.0730
## prio 0.0941 1.099 0.027 3.48 0.0005
##
## Likelihood ratio test=13.4 on 2 df, p=0.00122 n= 432, number of events= 114
cox.zph(modallison5)
## rho chisq p
## fin -0.0183 0.0392 0.843
## prio -0.0771 0.6859 0.408
## GLOBAL NA 0.7299 0.694