R codes for Cox’s proportional hazards model

# Get access to the data 'Rossi' 
library(carData)
data(Rossi)
Rossi = Rossi[, c('week', 'arrest', 'fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio', 'educ')]
head(Rossi)
##   week arrest fin age  race wexp         mar paro prio educ
## 1   20      1  no  27 black   no not married  yes    3    3
## 2   17      1  no  18 black   no not married  yes    8    4
## 3   25      1  no  19 other  yes not married  yes   13    3
## 4   52      0 yes  23 black  yes     married  yes    1    5
## 5   52      0  no  19 other  yes not married  yes    3    3
## 6   52      0  no  24 black  yes not married   no    2    4
# Get descriptive data by arrest 
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1(~age + fin + race + wexp + mar + paro + prio | arrest, data=Rossi)
## Warning in table1.formula(~age + fin + race + wexp + mar + paro + prio | : Terms
## to the right of '|' in formula 'x' define table columns and are expected to be
## factors with meaningful labels.
0
(N=318)
1
(N=114)
Overall
(N=432)
age
Mean (SD) 25.3 (6.31) 22.8 (5.12) 24.6 (6.11)
Median [Min, Max] 23.0 [17.0, 44.0] 21.0 [17.0, 44.0] 23.0 [17.0, 44.0]
fin
no 150 (47.2%) 66 (57.9%) 216 (50.0%)
yes 168 (52.8%) 48 (42.1%) 216 (50.0%)
race
black 277 (87.1%) 102 (89.5%) 379 (87.7%)
other 41 (12.9%) 12 (10.5%) 53 (12.3%)
wexp
no 123 (38.7%) 62 (54.4%) 185 (42.8%)
yes 195 (61.3%) 52 (45.6%) 247 (57.2%)
mar
married 45 (14.2%) 8 (7.0%) 53 (12.3%)
not married 273 (85.8%) 106 (93.0%) 379 (87.7%)
paro
no 119 (37.4%) 46 (40.4%) 165 (38.2%)
yes 199 (62.6%) 68 (59.6%) 267 (61.8%)
prio
Mean (SD) 2.70 (2.55) 3.77 (3.59) 2.98 (2.90)
Median [Min, Max] 2.00 [0, 15.0] 3.00 [0, 18.0] 2.00 [0, 18.0]
# Overall KP curve 
library(survival); library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
km = survfit(Surv(week, arrest) ~ 1, data=Rossi)
ggsurvplot(km, fun="event", risk.table=T, xlab="Tuần", censor=T)

# KP curve for two groups (fin)
km = survfit(Surv(week, arrest) ~ fin, data=Rossi)
ggsurvplot(km, fun="event", risk.table=T, xlab="Tuần", censor=T)
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.

# Cox's model 1 
m1 = coxph(Surv(week, arrest) ~ fin, data=Rossi)
summary(m1)
## Call:
## coxph(formula = Surv(week, arrest) ~ fin, data = Rossi)
## 
##   n= 432, number of events= 114 
## 
##           coef exp(coef) se(coef)      z Pr(>|z|)  
## finyes -0.3691    0.6914   0.1897 -1.945   0.0517 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##        exp(coef) exp(-coef) lower .95 upper .95
## finyes    0.6914      1.446    0.4767     1.003
## 
## Concordance= 0.546  (se = 0.023 )
## Likelihood ratio test= 3.84  on 1 df,   p=0.05
## Wald test            = 3.78  on 1 df,   p=0.05
## Score (logrank) test = 3.83  on 1 df,   p=0.05
# Baseline hazard 
basehaz(m1)
##         hazard time
## 1  0.002275953    1
## 2  0.004558153    2
## 3  0.006846634    3
## 4  0.009141432    4
## 5  0.011442579    5
## 6  0.013750113    6
## 7  0.016064069    7
## 8  0.027709186    8
## 9  0.032403879    9
## 10 0.034758116   10
## 11 0.039485641   11
## 12 0.044234958   12
## 13 0.046616668   13
## 14 0.053800871   14
## 15 0.058620023   15
## 16 0.063459653   16
## 17 0.070767374   17
## 18 0.078137640   18
## 19 0.083083535   19
## 20 0.095551266   20
## 21 0.100579930   21
## 22 0.103105713   22
## 23 0.105636811   23
## 24 0.115831691   24
## 25 0.123551527   25
## 26 0.131343709   26
## 27 0.136579364   27
## 28 0.141848194   28
## 29 0.147149316   30
## 30 0.149810634   31
## 31 0.155157630   32
## 32 0.160535209   33
## 33 0.165945069   34
## 34 0.176842493   35
## 35 0.185086092   36
## 36 0.196174787   37
## 37 0.198966611   38
## 38 0.204577075   39
## 39 0.215899453   40
## 40 0.221612169   42
## 41 0.233138211   43
## 42 0.238961705   44
## 43 0.244824673   45
## 44 0.256651654   46
## 45 0.259628711   47
## 46 0.265613327   48
## 47 0.280732786   49
## 48 0.289912474   50
## 49 0.302292912   52
# Cox's model 2 
m2 = coxph(Surv(week, arrest) ~ fin + age, data=Rossi)
summary(m2)
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + age, data = Rossi)
## 
##   n= 432, number of events= 114 
## 
##            coef exp(coef) se(coef)      z Pr(>|z|)    
## finyes -0.32790   0.72043  0.18985 -1.727 0.084145 .  
## age    -0.07148   0.93101  0.02087 -3.425 0.000614 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##        exp(coef) exp(-coef) lower .95 upper .95
## finyes    0.7204      1.388    0.4966    1.0452
## age       0.9310      1.074    0.8937    0.9699
## 
## Concordance= 0.612  (se = 0.027 )
## Likelihood ratio test= 18.29  on 2 df,   p=1e-04
## Wald test            = 15.18  on 2 df,   p=5e-04
## Score (logrank) test = 15.58  on 2 df,   p=4e-04
# Visualization of predicted probability 
# age = 30, group = yes 
t1 = survfit(m2, newdata = data.frame(age=30, fin="yes"))

# age = 30, group = no 
t2 = survfit(m2, newdata = data.frame(age=30, fin="no"))

# Plot results 

library(ggfortify); library(gridExtra)

p1 = autoplot(t1, fun="event", xlab="Tuần", ylab="Xác suất tái phạm", ylim=c(0, 0.50), main="30 tuổi, nhóm can thiệp")
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
p2 = autoplot(t2, fun="event", xlab="Tuần", ylab="Xác suất tái phạm", ylim=c(0, 0.50), main="30 tuổi, nhóm chứng")
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
grid.arrange(p1, p2,ncol=2)

# Putting together
t1 = survfit(m2, newdata = data.frame(age=30, fin="yes"))
t2 = survfit(m2, newdata = data.frame(age=30, fin="no"))

time = c(t1$time, t2$time)
surv = c(t1$surv, t2$surv)
group = c(rep("Intervention", 49), rep("Control", 49))
dat = data.frame(time, surv, group)

# Plot cumulative survival probability 
ggplot(data=dat, aes(x=time, y=surv, group=group, col=group)) + geom_step()

# Plot cumulative hazard 
ggplot(data=dat, aes(x=time, y=1-surv, group=group, col=group)) + geom_step()