# Get access to the data 'Rossi'
library(carData)
data(Rossi)
Rossi = Rossi[, c('week', 'arrest', 'fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio', 'educ')]
head(Rossi)
## week arrest fin age race wexp mar paro prio educ
## 1 20 1 no 27 black no not married yes 3 3
## 2 17 1 no 18 black no not married yes 8 4
## 3 25 1 no 19 other yes not married yes 13 3
## 4 52 0 yes 23 black yes married yes 1 5
## 5 52 0 no 19 other yes not married yes 3 3
## 6 52 0 no 24 black yes not married no 2 4
# Get descriptive data by arrest
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~age + fin + race + wexp + mar + paro + prio | arrest, data=Rossi)
## Warning in table1.formula(~age + fin + race + wexp + mar + paro + prio | : Terms
## to the right of '|' in formula 'x' define table columns and are expected to be
## factors with meaningful labels.
| 0 (N=318) |
1 (N=114) |
Overall (N=432) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 25.3 (6.31) | 22.8 (5.12) | 24.6 (6.11) |
| Median [Min, Max] | 23.0 [17.0, 44.0] | 21.0 [17.0, 44.0] | 23.0 [17.0, 44.0] |
| fin | |||
| no | 150 (47.2%) | 66 (57.9%) | 216 (50.0%) |
| yes | 168 (52.8%) | 48 (42.1%) | 216 (50.0%) |
| race | |||
| black | 277 (87.1%) | 102 (89.5%) | 379 (87.7%) |
| other | 41 (12.9%) | 12 (10.5%) | 53 (12.3%) |
| wexp | |||
| no | 123 (38.7%) | 62 (54.4%) | 185 (42.8%) |
| yes | 195 (61.3%) | 52 (45.6%) | 247 (57.2%) |
| mar | |||
| married | 45 (14.2%) | 8 (7.0%) | 53 (12.3%) |
| not married | 273 (85.8%) | 106 (93.0%) | 379 (87.7%) |
| paro | |||
| no | 119 (37.4%) | 46 (40.4%) | 165 (38.2%) |
| yes | 199 (62.6%) | 68 (59.6%) | 267 (61.8%) |
| prio | |||
| Mean (SD) | 2.70 (2.55) | 3.77 (3.59) | 2.98 (2.90) |
| Median [Min, Max] | 2.00 [0, 15.0] | 3.00 [0, 18.0] | 2.00 [0, 18.0] |
# Overall KP curve
library(survival); library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
km = survfit(Surv(week, arrest) ~ 1, data=Rossi)
ggsurvplot(km, fun="event", risk.table=T, xlab="Tuần", censor=T)
# KP curve for two groups (fin)
km = survfit(Surv(week, arrest) ~ fin, data=Rossi)
ggsurvplot(km, fun="event", risk.table=T, xlab="Tuần", censor=T)
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
# Cox's model 1
m1 = coxph(Surv(week, arrest) ~ fin, data=Rossi)
summary(m1)
## Call:
## coxph(formula = Surv(week, arrest) ~ fin, data = Rossi)
##
## n= 432, number of events= 114
##
## coef exp(coef) se(coef) z Pr(>|z|)
## finyes -0.3691 0.6914 0.1897 -1.945 0.0517 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## finyes 0.6914 1.446 0.4767 1.003
##
## Concordance= 0.546 (se = 0.023 )
## Likelihood ratio test= 3.84 on 1 df, p=0.05
## Wald test = 3.78 on 1 df, p=0.05
## Score (logrank) test = 3.83 on 1 df, p=0.05
# Baseline hazard
basehaz(m1)
## hazard time
## 1 0.002275953 1
## 2 0.004558153 2
## 3 0.006846634 3
## 4 0.009141432 4
## 5 0.011442579 5
## 6 0.013750113 6
## 7 0.016064069 7
## 8 0.027709186 8
## 9 0.032403879 9
## 10 0.034758116 10
## 11 0.039485641 11
## 12 0.044234958 12
## 13 0.046616668 13
## 14 0.053800871 14
## 15 0.058620023 15
## 16 0.063459653 16
## 17 0.070767374 17
## 18 0.078137640 18
## 19 0.083083535 19
## 20 0.095551266 20
## 21 0.100579930 21
## 22 0.103105713 22
## 23 0.105636811 23
## 24 0.115831691 24
## 25 0.123551527 25
## 26 0.131343709 26
## 27 0.136579364 27
## 28 0.141848194 28
## 29 0.147149316 30
## 30 0.149810634 31
## 31 0.155157630 32
## 32 0.160535209 33
## 33 0.165945069 34
## 34 0.176842493 35
## 35 0.185086092 36
## 36 0.196174787 37
## 37 0.198966611 38
## 38 0.204577075 39
## 39 0.215899453 40
## 40 0.221612169 42
## 41 0.233138211 43
## 42 0.238961705 44
## 43 0.244824673 45
## 44 0.256651654 46
## 45 0.259628711 47
## 46 0.265613327 48
## 47 0.280732786 49
## 48 0.289912474 50
## 49 0.302292912 52
# Cox's model 2
m2 = coxph(Surv(week, arrest) ~ fin + age, data=Rossi)
summary(m2)
## Call:
## coxph(formula = Surv(week, arrest) ~ fin + age, data = Rossi)
##
## n= 432, number of events= 114
##
## coef exp(coef) se(coef) z Pr(>|z|)
## finyes -0.32790 0.72043 0.18985 -1.727 0.084145 .
## age -0.07148 0.93101 0.02087 -3.425 0.000614 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## finyes 0.7204 1.388 0.4966 1.0452
## age 0.9310 1.074 0.8937 0.9699
##
## Concordance= 0.612 (se = 0.027 )
## Likelihood ratio test= 18.29 on 2 df, p=1e-04
## Wald test = 15.18 on 2 df, p=5e-04
## Score (logrank) test = 15.58 on 2 df, p=4e-04
# Visualization of predicted probability
# age = 30, group = yes
t1 = survfit(m2, newdata = data.frame(age=30, fin="yes"))
# age = 30, group = no
t2 = survfit(m2, newdata = data.frame(age=30, fin="no"))
# Plot results
library(ggfortify); library(gridExtra)
p1 = autoplot(t1, fun="event", xlab="Tuần", ylab="Xác suất tái phạm", ylim=c(0, 0.50), main="30 tuổi, nhóm can thiệp")
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
p2 = autoplot(t2, fun="event", xlab="Tuần", ylab="Xác suất tái phạm", ylim=c(0, 0.50), main="30 tuổi, nhóm chứng")
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
grid.arrange(p1, p2,ncol=2)
# Putting together
t1 = survfit(m2, newdata = data.frame(age=30, fin="yes"))
t2 = survfit(m2, newdata = data.frame(age=30, fin="no"))
time = c(t1$time, t2$time)
surv = c(t1$surv, t2$surv)
group = c(rep("Intervention", 49), rep("Control", 49))
dat = data.frame(time, surv, group)
# Plot cumulative survival probability
ggplot(data=dat, aes(x=time, y=surv, group=group, col=group)) + geom_step()
# Plot cumulative hazard
ggplot(data=dat, aes(x=time, y=1-surv, group=group, col=group)) + geom_step()