These examples are reproduction of Chapter 5 of the book. To get identical results, use ties == “breslow”.
## Load foreign package
library(foreign)
## Load leukemia dataset
leuk <- read.dta("http://www.sph.emory.edu/~dkleinb/surv2datasets/anderson.dta")
## Load survival package
library(survival)
## Create survival vector for fish dataset
leuk$SurvObj <- with(leuk, Surv(survt, status))
The coefficient for the rx variable is very different between females and males, but it is assumed to be the same in the stratified
## Stratified by sex
res.strata.sex <- coxph(formula = SurvObj ~ logwbc + rx + strata(sex),
data = leuk,
ties = c("efron","breslow","exact")[1])
res.strata.sex
Call:
coxph(formula = SurvObj ~ logwbc + rx + strata(sex), data = leuk,
ties = c("efron", "breslow", "exact")[1])
coef exp(coef) se(coef) z p
logwbc 1.454 4.28 0.344 4.22 0.000024
rx 0.998 2.71 0.474 2.11 0.035000
Likelihood ratio test=32.1 on 2 df, p=0.000000109 n= 42, number of events= 30
## Separate models for females and males
res.separate <- lapply(split(leuk, leuk$sex),
FUN = function(DF) {
coxph(SurvObj ~ logwbc + rx, DF)
})
res.separate
$`0`
Call:
coxph(formula = SurvObj ~ logwbc + rx, data = DF)
coef exp(coef) se(coef) z p
logwbc 1.206 3.34 0.503 2.396 0.017
rx 0.311 1.37 0.564 0.552 0.580
Likelihood ratio test=6.65 on 2 df, p=0.0361 n= 22, number of events= 16
$`1`
Call:
coxph(formula = SurvObj ~ logwbc + rx, data = DF)
coef exp(coef) se(coef) z p
logwbc 1.74 5.71 0.536 3.25 0.0011
rx 1.98 7.23 0.739 2.68 0.0075
Likelihood ratio test=29.2 on 2 df, p=0.000000461 n= 20, number of events= 14
An interaction model allows for different coefficients for different strata.
## Model including interaction between sex and logwbc and rx
## Do not include the main effect of sex
res.interaction.sex <- coxph(formula = SurvObj ~ (logwbc + rx)*sex - sex + strata(sex),
data = leuk,
ties = c("efron","breslow","exact")[1])
res.interaction.sex
Call:
coxph(formula = SurvObj ~ (logwbc + rx) * sex - sex + strata(sex),
data = leuk, ties = c("efron", "breslow", "exact")[1])
coef exp(coef) se(coef) z p
logwbc 1.206 3.34 0.503 2.396 0.017
rx 0.311 1.37 0.564 0.552 0.580
logwbc:sex 0.537 1.71 0.735 0.730 0.470
rx:sex 1.667 5.29 0.930 1.793 0.073
Likelihood ratio test=35.8 on 4 df, p=0.000000314 n= 42, number of events= 30
## Comparing stratified model and stratified with interaction model
anova(res.interaction.sex, res.strata.sex)
Analysis of Deviance Table
Cox model: response is SurvObj
Model 1: ~ (logwbc + rx) * sex - sex + strata(sex)
Model 2: ~ logwbc + rx + strata(sex)
loglik Chisq Df P(>|Chi|)
1 -53.9
2 -55.7 3.77 2 0.15
This model is not statistically significantly different from the no interaction model at the 0.05 level, thus, we conclude that the model without interaction is adequate.
## Load vets.dta dataset
vets <- read.dta("http://www.sph.emory.edu/~dkleinb/surv2datasets/vets.dta")
head(vets)
tx ct1 ct2 ct3 ct4 survt perf dd age priortx status
1 1 0 0 0 1 72 60 7 69 0 1
2 1 0 0 0 1 411 70 5 64 10 1
3 1 0 0 0 1 228 60 3 38 0 1
4 1 0 0 0 1 126 60 9 63 10 1
5 1 0 0 0 1 118 70 11 65 10 1
6 1 0 0 0 1 10 20 5 49 0 1
vets <- within(vets, {
tx <- factor(tx, levels = 1:2, labels = c("standard","test"))
priortx <- factor(priortx, levels = c(0,10), labels = c("none","some"))
require(survival)
SurvObj <- Surv(survt, status)
})
## Multiple dummy variables to one variable
## http://stackoverflow.com/questions/5450538/going-from-multiple-dummy-variables-to-a-single-variable
dummies <- vets[, paste0("ct", 1:4)]
vets$CellType <- names(dummies)[apply(dummies == 1, MARGIN = 1, FUN = which)]
vets$CellType <- factor(vets$CellType)
head(vets)
tx ct1 ct2 ct3 ct4 survt perf dd age priortx status SurvObj CellType
1 standard 0 0 0 1 72 60 7 69 none 1 72 ct4
2 standard 0 0 0 1 411 70 5 64 some 1 411 ct4
3 standard 0 0 0 1 228 60 3 38 none 1 228 ct4
4 standard 0 0 0 1 126 60 9 63 some 1 126 ct4
5 standard 0 0 0 1 118 70 11 65 some 1 118 ct4
6 standard 0 0 0 1 10 20 5 49 none 1 10 ct4
The cell type variable and performance status variable do not satisfy
## Fit an additive contribution model without interaction
all.additive.model <- coxph(SurvObj ~ tx + ct1 + ct2 + ct3 + perf + dd + age + priortx, data = vets)
## Check for PH
res.zph <- cox.zph(all.additive.model)
res.zph
rho chisq p
txtest -0.0273 0.1227 0.726104
ct1 0.1712 4.1093 0.042649
ct2 0.1424 2.9794 0.084329
ct3 0.0128 0.0261 0.871621
perf 0.3073 13.0449 0.000304
dd 0.1491 2.9436 0.086217
age 0.1890 5.3476 0.020750
priortxsome -0.1767 4.4714 0.034467
GLOBAL NA 27.9972 0.000475
## Plot
plot(res.zph)
## Create dichotomous PSbin variable
vets$PSbin <- as.numeric(with(vets, perf >= 60))
## Create a single stratification variable using interaction()
vets$Z <- with(vets, interaction(CellType, PSbin))
## Change reference in accordance with Kelinbaum pages 218-219
vets$Z <- relevel(vets$Z, ref = "ct4.0")
## Stratified by cell type and PSbin
stratified1 <- coxph(SurvObj ~ tx + age + strata(Z), data = vets)
stratified1
Call:
coxph(formula = SurvObj ~ tx + age + strata(Z), data = vets)
coef exp(coef) se(coef) z p
txtest 0.12778 1.136 0.2085 0.613 0.54
age -0.00127 0.999 0.0101 -0.126 0.90
Likelihood ratio test=0.38 on 2 df, p=0.829 n= 137, number of events= 128
## Create 0,1 tx.num variable to avoid overparametrization
vets$tx.num <- as.numeric(vets$tx == "test")
## Interaction model allowing for different coefficients in each stratum
strat.int1 <- coxph(SurvObj ~ (tx.num + age)*Z - Z + strata(Z), data = vets)
strat.int1
Call:
coxph(formula = SurvObj ~ (tx.num + age) * Z - Z + strata(Z),
data = vets)
coef exp(coef) se(coef) z p
tx.num 0.30440 1.356 0.6647 0.4580 0.65
age -0.00158 0.998 0.0301 -0.0524 0.96
tx.num:Zct1.0 2.33240 10.303 1.7724 1.3159 0.19
tx.num:Zct2.0 -1.19200 0.304 0.9566 -1.2460 0.21
tx.num:Zct3.0 0.56805 1.765 0.8558 0.6638 0.51
tx.num:Zct1.1 0.50474 1.657 0.9541 0.5290 0.60
tx.num:Zct2.1 0.57517 1.777 0.9726 0.5914 0.55
tx.num:Zct3.1 0.01072 1.011 0.8226 0.0130 0.99
tx.num:Zct4.1 -1.05184 0.349 0.8683 -1.2114 0.23
age:Zct1.0 0.07865 1.082 0.0640 1.2289 0.22
age:Zct2.0 -0.04623 0.955 0.0452 -1.0233 0.31
age:Zct3.0 -0.05897 0.943 0.0423 -1.3953 0.16
age:Zct1.1 -0.03653 0.964 0.0459 -0.7951 0.43
age:Zct2.1 -0.03962 0.961 0.0450 -0.8801 0.38
age:Zct3.1 0.03573 1.036 0.0383 0.9331 0.35
age:Zct4.1 0.05215 1.054 0.0483 1.0787 0.28
Likelihood ratio test=24.8 on 16 df, p=0.074 n= 137, number of events= 128
## Model comparison between no interaction vs interaction
anova(stratified1, strat.int1)
Analysis of Deviance Table
Cox model: response is SurvObj
Model 1: ~ tx + age + strata(Z)
Model 2: ~ (tx.num + age) * Z - Z + strata(Z)
loglik Chisq Df P(>|Chi|)
1 -261
2 -249 24.4 14 0.041 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Interaction for both tx and age vs age only
strat.int2 <- coxph(SurvObj ~ tx.num + age*Z - Z + strata(Z), data = vets)
strat.int2
Call:
coxph(formula = SurvObj ~ tx.num + age * Z - Z + strata(Z), data = vets)
coef exp(coef) se(coef) z p
tx.num 0.32073 1.378 0.2284 1.4040 0.160
age -0.00173 0.998 0.0295 -0.0586 0.950
age:Zct1.0 0.02345 1.024 0.0503 0.4661 0.640
age:Zct2.0 -0.04309 0.958 0.0420 -1.0251 0.310
age:Zct3.0 -0.04505 0.956 0.0393 -1.1478 0.250
age:Zct1.1 -0.01875 0.981 0.0384 -0.4878 0.630
age:Zct2.1 -0.02610 0.974 0.0406 -0.6432 0.520
age:Zct3.1 0.03584 1.036 0.0376 0.9539 0.340
age:Zct4.1 0.08305 1.087 0.0501 1.6575 0.097
Likelihood ratio test=13.8 on 9 df, p=0.13 n= 137, number of events= 128
anova(strat.int1, strat.int2)
Analysis of Deviance Table
Cox model: response is SurvObj
Model 1: ~ (tx.num + age) * Z - Z + strata(Z)
Model 2: ~ tx.num + age * Z - Z + strata(Z)
loglik Chisq Df P(>|Chi|)
1 -249
2 -255 11 7 0.14
## Interaction for both tx and age vs tx only
strat.int3 <- coxph(SurvObj ~ age + tx.num*Z - Z + strata(Z), data = vets)
strat.int3
Call:
coxph(formula = SurvObj ~ age + tx.num * Z - Z + strata(Z), data = vets)
coef exp(coef) se(coef) z p
age -0.00343 0.997 0.0106 -0.3231 0.75
tx.num 0.31293 1.367 0.6522 0.4798 0.63
tx.num:Zct1.0 0.83227 2.299 1.3626 0.6108 0.54
tx.num:Zct2.0 -1.23243 0.292 0.9412 -1.3094 0.19
tx.num:Zct3.0 0.12572 1.134 0.8015 0.1569 0.88
tx.num:Zct1.1 0.03161 1.032 0.8037 0.0393 0.97
tx.num:Zct2.1 0.19607 1.217 0.8780 0.2233 0.82
tx.num:Zct3.1 0.09769 1.103 0.8073 0.1210 0.90
tx.num:Zct4.1 -1.29379 0.274 0.8336 -1.5521 0.12
Likelihood ratio test=9.35 on 9 df, p=0.406 n= 137, number of events= 128
anova(strat.int1, strat.int3)
Analysis of Deviance Table
Cox model: response is SurvObj
Model 1: ~ (tx.num + age) * Z - Z + strata(Z)
Model 2: ~ age + tx.num * Z - Z + strata(Z)
loglik Chisq Df P(>|Chi|)
1 -249
2 -257 15.4 7 0.031 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
These models are significantly different, indicating the no-interaction model is not adequate. The full interaction model is not statistically different from the model with interaction for age only but statistically different from the model with interaction for tx only. Therefore, the interaction model for age only is adequate.
## Create dataset to predict survival for
newdat <- expand.grid(rx = 0:1, sex = 0:1)
## Define log(-log(S)) vs log(time) plot function
plot.fun <- function(MODEL, newdata = newdat) {
## Plot
plot(survfit(MODEL, newdata = newdata),
log = "x", fun = function(S) log(-log(S)),
lty = rep(1:2, 2), col = rep(c("red","blue"), each = 2)
)
## Add legend
legend("topleft", legend = strata(newdat, sep = ":"),
lty = rep(1:2, 2), col = rep(c("red","blue"), each = 2))
}
## PH for both rx and sex without interaction between rx and sex
model1 <- coxph(formula = SurvObj ~ rx + sex,
data = leuk)
model1
Call:
coxph(formula = SurvObj ~ rx + sex, data = leuk)
coef exp(coef) se(coef) z p
rx 1.674 5.33 0.437 3.825 0.00013
sex 0.295 1.34 0.424 0.697 0.49000
Likelihood ratio test=16.8 on 2 df, p=0.000222 n= 42, number of events= 30
plot.fun(model1)
title("Same baselines, same effects
all parallel with same distance")
This model assumes all four curves are parallel (proportional hazards assumption), and the effect of rx is the same across levels of sex.
## PH for both rx and sex with interaction between rx and sex
model2 <- coxph(formula = SurvObj ~ rx * sex,
data = leuk)
model2
Call:
coxph(formula = SurvObj ~ rx * sex, data = leuk)
coef exp(coef) se(coef) z p
rx 0.624 1.87 0.541 1.15 0.250
sex -1.108 0.33 0.706 -1.57 0.120
rx:sex 1.957 7.08 0.815 2.40 0.016
Likelihood ratio test=22.5 on 3 df, p=0.000052 n= 42, number of events= 30
plot.fun(model2)
title("Same baselines, different effects
all parallel with different distances")
This model assumes all four curves are parallel (proportional hazards assumption), but the effect of rx can be different across levels of sex.
## PH for rx, stratified by sex
model3 <- coxph(formula = SurvObj ~ rx + strata(sex),
data = leuk)
model3
Call:
coxph(formula = SurvObj ~ rx + strata(sex), data = leuk)
coef exp(coef) se(coef) z p
rx 1.32 3.76 0.438 3.02 0.0025
Likelihood ratio test=10.1 on 1 df, p=0.00148 n= 42, number of events= 30
plot.fun(model3, newdata = data.frame(rx = 0:1))
title("Different baselines, same effects
parallel within each stratum with equal distance")
This model assumes curves are parallel within each sex stratum, but the effect of rx is the same across sex strata.
## PH for rx, stratified by sex, interaction with sex
model4 <- coxph(formula = SurvObj ~ rx + rx:sex + strata(sex),
data = leuk)
model4
Call:
coxph(formula = SurvObj ~ rx + rx:sex + strata(sex), data = leuk)
coef exp(coef) se(coef) z p
rx 0.487 1.63 0.550 0.885 0.380
rx:sex 1.660 5.26 0.834 1.989 0.047
Likelihood ratio test=14 on 2 df, p=0.000898 n= 42, number of events= 30
## Model for female
model4.0 <- coxph(formula = SurvObj ~ rx,
data = leuk, subset = (sex == 0))
## Model for male
model4.1 <- coxph(formula = SurvObj ~ rx,
data = leuk, subset = (sex == 1))
plot.fun2 <- function(MODEL, ...){
plot(survfit(MODEL, newdata = data.frame(rx = 0:1)),
log = "x", fun = function(S) log(-log(S)),
lty = 1:2, xlim = c(1,35), ylim = c(-4,1.5), ...
)
}
## Plot female curves
plot.fun2(model4.0, col = "red")
## Add male curves
par(new = T)
plot.fun2(model4.1, col = "blue", bty = "n", xaxt = "n", yaxt = "n")
## Add legend
legend("topleft", legend = strata(newdat, sep = ":"),
lty = rep(1:2, 2), col = rep(c("blue","red"), each = 2))
title("Different baselines, Different effects
parallel within each stratum with different distances")
This model assumes curves are parallel within each sex stratum, but the effect of rx can be different across sex strata.