R codes for Cox’s proportional hazards model

# Get access to the data 'Rossi' 
library(carData)
data(Rossi)
Rossi = Rossi[, c('week', 'arrest', 'fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio', 'educ')]
head(Rossi)

##   week arrest fin age  race wexp         mar paro prio educ
## 1   20      1  no  27 black   no not married  yes    3    3
## 2   17      1  no  18 black   no not married  yes    8    4
## 3   25      1  no  19 other  yes not married  yes   13    3
## 4   52      0 yes  23 black  yes     married  yes    1    5
## 5   52      0  no  19 other  yes not married  yes    3    3
## 6   52      0  no  24 black  yes not married   no    2    4

# Get descriptive data by arrest 
library(table1)

## 
## Attaching package: 'table1'

## The following objects are masked from 'package:base':
## 
##     units, units<-

table1(~age + fin + race + wexp + mar + paro + prio | arrest, data=Rossi)

## Warning in table1.formula(~age + fin + race + wexp + mar + paro + prio | : Terms
## to the right of '|' in formula 'x' define table columns and are expected to be
## factors with meaningful labels.

	0 (N=318)	1 (N=114)	Overall (N=432)
age
Mean (SD)	25.3 (6.31)	22.8 (5.12)	24.6 (6.11)
Median [Min, Max]	23.0 [17.0, 44.0]	21.0 [17.0, 44.0]	23.0 [17.0, 44.0]
fin
no	150 (47.2%)	66 (57.9%)	216 (50.0%)
yes	168 (52.8%)	48 (42.1%)	216 (50.0%)
race
black	277 (87.1%)	102 (89.5%)	379 (87.7%)
other	41 (12.9%)	12 (10.5%)	53 (12.3%)
wexp
no	123 (38.7%)	62 (54.4%)	185 (42.8%)
yes	195 (61.3%)	52 (45.6%)	247 (57.2%)
mar
married	45 (14.2%)	8 (7.0%)	53 (12.3%)
not married	273 (85.8%)	106 (93.0%)	379 (87.7%)
paro
no	119 (37.4%)	46 (40.4%)	165 (38.2%)
yes	199 (62.6%)	68 (59.6%)	267 (61.8%)
prio
Mean (SD)	2.70 (2.55)	3.77 (3.59)	2.98 (2.90)
Median [Min, Max]	2.00 [0, 15.0]	3.00 [0, 18.0]	2.00 [0, 18.0]

# Overall KP curve 
library(survival); library(survminer)

## Loading required package: ggplot2

## Loading required package: ggpubr

km = survfit(Surv(week, arrest) ~ 1, data=Rossi)
ggsurvplot(km, fun="event", risk.table=T, xlab="Tuần", censor=T)

# KP curve for two groups (fin)
km = survfit(Surv(week, arrest) ~ fin, data=Rossi)
ggsurvplot(km, fun="event", risk.table=T, xlab="Tuần", censor=T)

## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.

# Cox's model 1 
m1 = coxph(Surv(week, arrest) ~ fin, data=Rossi)
summary(m1)

## Call:
## coxph(formula = Surv(week, arrest) ~ fin, data = Rossi)
## 
##   n= 432, number of events= 114 
## 
##           coef exp(coef) se(coef)      z Pr(>|z|)  
## finyes -0.3691    0.6914   0.1897 -1.945   0.0517 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##        exp(coef) exp(-coef) lower .95 upper .95
## finyes    0.6914      1.446    0.4767     1.003
## 
## Concordance= 0.546  (se = 0.023 )
## Likelihood ratio test= 3.84  on 1 df,   p=0.05
## Wald test            = 3.78  on 1 df,   p=0.05
## Score (logrank) test = 3.83  on 1 df,   p=0.05

# Baseline hazard 
basehaz(m1)

##         hazard time
## 1  0.002275953    1
## 2  0.004558153    2
## 3  0.006846634    3
## 4  0.009141432    4
## 5  0.011442579    5
## 6  0.013750113    6
## 7  0.016064069    7
## 8  0.027709186    8
## 9  0.032403879    9
## 10 0.034758116   10
## 11 0.039485641   11
## 12 0.044234958   12
## 13 0.046616668   13
## 14 0.053800871   14
## 15 0.058620023   15
## 16 0.063459653   16
## 17 0.070767374   17
## 18 0.078137640   18
## 19 0.083083535   19
## 20 0.095551266   20
## 21 0.100579930   21
## 22 0.103105713   22
## 23 0.105636811   23
## 24 0.115831691   24
## 25 0.123551527   25
## 26 0.131343709   26
## 27 0.136579364   27
## 28 0.141848194   28
## 29 0.147149316   30
## 30 0.149810634   31
## 31 0.155157630   32
## 32 0.160535209   33
## 33 0.165945069   34
## 34 0.176842493   35
## 35 0.185086092   36
## 36 0.196174787   37
## 37 0.198966611   38
## 38 0.204577075   39
## 39 0.215899453   40
## 40 0.221612169   42
## 41 0.233138211   43
## 42 0.238961705   44
## 43 0.244824673   45
## 44 0.256651654   46
## 45 0.259628711   47
## 46 0.265613327   48
## 47 0.280732786   49
## 48 0.289912474   50
## 49 0.302292912   52

# Cox's model 2 
m2 = coxph(Surv(week, arrest) ~ fin + age, data=Rossi)
summary(m2)

## Call:
## coxph(formula = Surv(week, arrest) ~ fin + age, data = Rossi)
## 
##   n= 432, number of events= 114 
## 
##            coef exp(coef) se(coef)      z Pr(>|z|)    
## finyes -0.32790   0.72043  0.18985 -1.727 0.084145 .  
## age    -0.07148   0.93101  0.02087 -3.425 0.000614 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##        exp(coef) exp(-coef) lower .95 upper .95
## finyes    0.7204      1.388    0.4966    1.0452
## age       0.9310      1.074    0.8937    0.9699
## 
## Concordance= 0.612  (se = 0.027 )
## Likelihood ratio test= 18.29  on 2 df,   p=1e-04
## Wald test            = 15.18  on 2 df,   p=5e-04
## Score (logrank) test = 15.58  on 2 df,   p=4e-04

# Visualization of predicted probability 
# age = 30, group = yes 
t1 = survfit(m2, newdata = data.frame(age=30, fin="yes"))

# age = 30, group = no 
t2 = survfit(m2, newdata = data.frame(age=30, fin="no"))

# Plot results 

library(ggfortify); library(gridExtra)

p1 = autoplot(t1, fun="event", xlab="Tuần", ylab="Xác suất tái phạm", ylim=c(0, 0.50), main="30 tuổi, nhóm can thiệp")

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

p2 = autoplot(t2, fun="event", xlab="Tuần", ylab="Xác suất tái phạm", ylim=c(0, 0.50), main="30 tuổi, nhóm chứng")

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

grid.arrange(p1, p2,ncol=2)

# Putting together
t1 = survfit(m2, newdata = data.frame(age=30, fin="yes"))
t2 = survfit(m2, newdata = data.frame(age=30, fin="no"))

time = c(t1$time, t2$time)
surv = c(t1$surv, t2$surv)
group = c(rep("Intervention", 49), rep("Control", 49))
dat = data.frame(time, surv, group)

# Plot cumulative survival probability 
ggplot(data=dat, aes(x=time, y=surv, group=group, col=group)) + geom_step()

# Plot cumulative hazard 
ggplot(data=dat, aes(x=time, y=1-surv, group=group, col=group)) + geom_step()

Cox’s proportional hazards model

T Nguyen

14/12/2020

R codes for Cox’s proportional hazards model