library(tidyverse)
library(survival)
data(heart)
head(heart)
## start stop event age year surgery transplant id
## 1 0 50 1 -17.155373 0.1232033 0 0 1
## 2 0 6 1 3.835729 0.2546201 0 0 2
## 3 0 1 0 6.297057 0.2655715 0 0 3
## 4 1 16 1 6.297057 0.2655715 0 1 3
## 5 0 36 0 -7.737166 0.4900753 0 0 4
## 6 36 39 1 -7.737166 0.4900753 0 1 4
jasa1, heart: processed data
start, stop, event: Entry and exit time and status for this interval of time
age: age-48 years
year: year of acceptance (in years after 1 Nov 1967)
surgery: prior bypass surgery 1=yes
transplant: received transplant 1=yes
id: patient id
fit0 <- coxph(formula = Surv(start, stop, event) ~ 1,
data = heart,
ties = c("efron","breslow","exact")[1])
fit1 <- coxph(formula = Surv(start, stop, event) ~ (age + surgery) * transplant,
data = heart,
ties = c("efron","breslow","exact")[1])
summary(fit1)
## Call:
## coxph(formula = Surv(start, stop, event) ~ (age + surgery) *
## transplant, data = heart, ties = c("efron", "breslow", "exact")[1])
##
## n= 172, number of events= 75
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.01391 1.01401 0.01812 0.768 0.443
## surgery -0.54654 0.57895 0.61091 -0.895 0.371
## transplant1 0.11950 1.12694 0.32774 0.365 0.715
## age:transplant1 0.03461 1.03522 0.02725 1.270 0.204
## surgery:transplant1 -0.29295 0.74606 0.75817 -0.386 0.699
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0140 0.9862 0.9786 1.051
## surgery 0.5789 1.7273 0.1748 1.917
## transplant1 1.1269 0.8874 0.5928 2.142
## age:transplant1 1.0352 0.9660 0.9814 1.092
## surgery:transplant1 0.7461 1.3404 0.1688 3.297
##
## Concordance= 0.594 (se = 0.037 )
## Rsquare= 0.07 (max possible= 0.969 )
## Likelihood ratio test= 12.48 on 5 df, p=0.02882
## Wald test = 11.66 on 5 df, p=0.03981
## Score (logrank) test = 12.05 on 5 df, p=0.03408
The martingale residual is the estimated martingale process. In a single-event model, the maximum is 1, but can be arbitrarily negative.
heart$resid_mart <- residuals(fit1, type = "martingale")
ggplot(data = heart, mapping = aes(x = age, y = resid_mart)) +
geom_point() +
geom_smooth() +
labs(title = "age") +
theme_bw() + theme(legend.key = element_blank())
ggplot(data = heart, mapping = aes(x = factor(surgery), y = resid_mart)) +
geom_violin() +
geom_point() +
labs(title = "surgery") +
theme_bw() + theme(legend.key = element_blank())
This can be calculated from the martingale residual, which is the event status minus Cox-Snell residual. Then the Cox-Snell residual can be used as pseudo observation times to fit a null Cox model to obtain the Nelson-Aalen cumulative hazard estimator (basehaz function) at unique values of Cox-Snell residuals. The slope should be approximately 45 degrees.
## Cox-Snell residuals
heart$resid_coxsnell <- -(heart$resid_mart - heart$event)
## Fit model on Cox-Snell residuals (Approximately Expo(1) distributed under correct model)
fit_coxsnell <- coxph(formula = Surv(resid_coxsnell, event) ~ 1,
data = heart,
ties = c("efron","breslow","exact")[1])
## Nelson-Aalen estimator for baseline hazard (all covariates zero)
df_base_haz <- basehaz(fit_coxsnell, centered = FALSE)
## Warning in is.na(coef(fit)): is.na() applied to non-(list or vector) of type 'NULL'
head(df_base_haz)
## hazard time
## 1 0.000000000 0.003677932
## 2 0.005847953 0.006724770
## 3 0.005847953 0.009089276
## 4 0.005847953 0.011693582
## 5 0.011800334 0.014092916
## 6 0.017788358 0.017577733
## Plot
ggplot(data = df_base_haz, mapping = aes(x = time, y = hazard)) +
geom_point() +
scale_x_continuous(limit = c(0,3.5)) +
scale_y_continuous(limit = c(0,3.5)) +
labs(x = "Cox-Snell residuals as pseudo observed times",
y = "Estimated cumulative hazard at pseudo observed times") +
theme_bw() + theme(legend.key = element_blank())