# Kleinbaum: Stratified Cox regression

## References

These examples are reproduction of Chapter 5 of the book. To get identical results, use ties == “breslow”.

``````## Load foreign package
library(foreign)

## Load leukemia dataset

## Load survival package
library(survival)

## Create survival vector for fish dataset
leuk\$SurvObj <- with(leuk, Surv(survt, status))
``````

## Stratified Cox model vs subset models

The coefficient for the rx variable is very different between females and males, but it is assumed to be the same in the stratified

``````## Stratified by sex
res.strata.sex <- coxph(formula = SurvObj ~ logwbc + rx + strata(sex),
data    = leuk,
ties    = c("efron","breslow","exact")[1])
res.strata.sex
``````
``````Call:
coxph(formula = SurvObj ~ logwbc + rx + strata(sex), data = leuk,
ties = c("efron", "breslow", "exact")[1])

coef exp(coef) se(coef)    z        p
logwbc 1.454      4.28    0.344 4.22 0.000024
rx     0.998      2.71    0.474 2.11 0.035000

Likelihood ratio test=32.1  on 2 df, p=0.000000109  n= 42, number of events= 30
``````
``````
## Separate models for females and males
res.separate <- lapply(split(leuk, leuk\$sex),
FUN = function(DF) {

coxph(SurvObj ~ logwbc + rx, DF)
})
res.separate
``````
``````\$`0`
Call:
coxph(formula = SurvObj ~ logwbc + rx, data = DF)

coef exp(coef) se(coef)     z     p
logwbc 1.206      3.34    0.503 2.396 0.017
rx     0.311      1.37    0.564 0.552 0.580

Likelihood ratio test=6.65  on 2 df, p=0.0361  n= 22, number of events= 16

\$`1`
Call:
coxph(formula = SurvObj ~ logwbc + rx, data = DF)

coef exp(coef) se(coef)    z      p
logwbc 1.74      5.71    0.536 3.25 0.0011
rx     1.98      7.23    0.739 2.68 0.0075

Likelihood ratio test=29.2  on 2 df, p=0.000000461  n= 20, number of events= 14
``````

## Interaction model

An interaction model allows for different coefficients for different strata.

``````## Model including interaction between sex and logwbc and rx
## Do not include the main effect of sex
res.interaction.sex <- coxph(formula = SurvObj ~ (logwbc + rx)*sex - sex + strata(sex),
data    = leuk,
ties    = c("efron","breslow","exact")[1])
res.interaction.sex
``````
``````Call:
coxph(formula = SurvObj ~ (logwbc + rx) * sex - sex + strata(sex),
data = leuk, ties = c("efron", "breslow", "exact")[1])

coef exp(coef) se(coef)     z     p
logwbc     1.206      3.34    0.503 2.396 0.017
rx         0.311      1.37    0.564 0.552 0.580
logwbc:sex 0.537      1.71    0.735 0.730 0.470
rx:sex     1.667      5.29    0.930 1.793 0.073

Likelihood ratio test=35.8  on 4 df, p=0.000000314  n= 42, number of events= 30
``````
``````
## Comparing stratified model and stratified with interaction model
anova(res.interaction.sex, res.strata.sex)
``````
``````Analysis of Deviance Table
Cox model: response is  SurvObj
Model 1: ~ (logwbc + rx) * sex - sex + strata(sex)
Model 2: ~ logwbc + rx + strata(sex)
loglik Chisq Df P(>|Chi|)
1  -53.9
2  -55.7  3.77  2      0.15
``````

This model is not statistically significantly different from the no interaction model at the 0.05 level, thus, we conclude that the model without interaction is adequate.

## A second example with multiple stratification variables

``````## Load vets.dta dataset
``````
``````  tx ct1 ct2 ct3 ct4 survt perf dd age priortx status
1  1   0   0   0   1    72   60  7  69       0      1
2  1   0   0   0   1   411   70  5  64      10      1
3  1   0   0   0   1   228   60  3  38       0      1
4  1   0   0   0   1   126   60  9  63      10      1
5  1   0   0   0   1   118   70 11  65      10      1
6  1   0   0   0   1    10   20  5  49       0      1
``````
``````
vets <- within(vets, {
tx <- factor(tx, levels = 1:2, labels = c("standard","test"))

priortx <- factor(priortx, levels = c(0,10), labels = c("none","some"))

require(survival)
SurvObj <- Surv(survt, status)
})

## Multiple dummy variables to one variable
## http://stackoverflow.com/questions/5450538/going-from-multiple-dummy-variables-to-a-single-variable
dummies <- vets[, paste0("ct", 1:4)]
vets\$CellType <- names(dummies)[apply(dummies == 1, MARGIN = 1, FUN = which)]
vets\$CellType <- factor(vets\$CellType)

``````
``````        tx ct1 ct2 ct3 ct4 survt perf dd age priortx status SurvObj CellType
1 standard   0   0   0   1    72   60  7  69    none      1     72       ct4
2 standard   0   0   0   1   411   70  5  64    some      1    411       ct4
3 standard   0   0   0   1   228   60  3  38    none      1    228       ct4
4 standard   0   0   0   1   126   60  9  63    some      1    126       ct4
5 standard   0   0   0   1   118   70 11  65    some      1    118       ct4
6 standard   0   0   0   1    10   20  5  49    none      1     10       ct4
``````

## Check proportional hazard assumption by testing

The cell type variable and performance status variable do not satisfy

``````## Fit an additive contribution model without interaction
all.additive.model <- coxph(SurvObj ~ tx + ct1 + ct2 + ct3 + perf + dd + age + priortx, data = vets)

## Check for PH
res.zph
``````
``````                rho   chisq        p
txtest      -0.0273  0.1227 0.726104
ct1          0.1712  4.1093 0.042649
ct2          0.1424  2.9794 0.084329
ct3          0.0128  0.0261 0.871621
perf         0.3073 13.0449 0.000304
dd           0.1491  2.9436 0.086217
age          0.1890  5.3476 0.020750
priortxsome -0.1767  4.4714 0.034467
GLOBAL           NA 27.9972 0.000475
``````
``````
## Plot
plot(res.zph)
``````

## Stratify by cell type and performance status

``````## Create dichotomous PSbin variable
vets\$PSbin <- as.numeric(with(vets, perf >= 60))

## Create a single stratification variable using interaction()
vets\$Z <- with(vets, interaction(CellType, PSbin))

## Change reference in accordance with Kelinbaum pages 218-219
vets\$Z <- relevel(vets\$Z, ref = "ct4.0")

## Stratified by cell type and PSbin
stratified1 <- coxph(SurvObj ~ tx + age + strata(Z), data = vets)
stratified1
``````
``````Call:
coxph(formula = SurvObj ~ tx + age + strata(Z), data = vets)

coef exp(coef) se(coef)      z    p
txtest  0.12778     1.136   0.2085  0.613 0.54
age    -0.00127     0.999   0.0101 -0.126 0.90

Likelihood ratio test=0.38  on 2 df, p=0.829  n= 137, number of events= 128
``````
``````

## Create 0,1 tx.num variable to avoid overparametrization
vets\$tx.num <- as.numeric(vets\$tx == "test")

## Interaction model allowing for different coefficients in each stratum
strat.int1 <- coxph(SurvObj ~ (tx.num + age)*Z - Z + strata(Z), data = vets)
strat.int1
``````
``````Call:
coxph(formula = SurvObj ~ (tx.num + age) * Z - Z + strata(Z),
data = vets)

coef exp(coef) se(coef)       z    p
tx.num         0.30440     1.356   0.6647  0.4580 0.65
age           -0.00158     0.998   0.0301 -0.0524 0.96
tx.num:Zct1.0  2.33240    10.303   1.7724  1.3159 0.19
tx.num:Zct2.0 -1.19200     0.304   0.9566 -1.2460 0.21
tx.num:Zct3.0  0.56805     1.765   0.8558  0.6638 0.51
tx.num:Zct1.1  0.50474     1.657   0.9541  0.5290 0.60
tx.num:Zct2.1  0.57517     1.777   0.9726  0.5914 0.55
tx.num:Zct3.1  0.01072     1.011   0.8226  0.0130 0.99
tx.num:Zct4.1 -1.05184     0.349   0.8683 -1.2114 0.23
age:Zct1.0     0.07865     1.082   0.0640  1.2289 0.22
age:Zct2.0    -0.04623     0.955   0.0452 -1.0233 0.31
age:Zct3.0    -0.05897     0.943   0.0423 -1.3953 0.16
age:Zct1.1    -0.03653     0.964   0.0459 -0.7951 0.43
age:Zct2.1    -0.03962     0.961   0.0450 -0.8801 0.38
age:Zct3.1     0.03573     1.036   0.0383  0.9331 0.35
age:Zct4.1     0.05215     1.054   0.0483  1.0787 0.28

Likelihood ratio test=24.8  on 16 df, p=0.074  n= 137, number of events= 128
``````
``````
## Model comparison between no interaction vs interaction
anova(stratified1, strat.int1)
``````
``````Analysis of Deviance Table
Cox model: response is  SurvObj
Model 1: ~ tx + age + strata(Z)
Model 2: ~ (tx.num + age) * Z - Z + strata(Z)
loglik Chisq Df P(>|Chi|)
1   -261
2   -249  24.4 14     0.041 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
``````
``````

## Interaction for both tx and age vs age only
strat.int2 <- coxph(SurvObj ~ tx.num + age*Z - Z + strata(Z), data = vets)
strat.int2
``````
``````Call:
coxph(formula = SurvObj ~ tx.num + age * Z - Z + strata(Z), data = vets)

coef exp(coef) se(coef)       z     p
tx.num      0.32073     1.378   0.2284  1.4040 0.160
age        -0.00173     0.998   0.0295 -0.0586 0.950
age:Zct1.0  0.02345     1.024   0.0503  0.4661 0.640
age:Zct2.0 -0.04309     0.958   0.0420 -1.0251 0.310
age:Zct3.0 -0.04505     0.956   0.0393 -1.1478 0.250
age:Zct1.1 -0.01875     0.981   0.0384 -0.4878 0.630
age:Zct2.1 -0.02610     0.974   0.0406 -0.6432 0.520
age:Zct3.1  0.03584     1.036   0.0376  0.9539 0.340
age:Zct4.1  0.08305     1.087   0.0501  1.6575 0.097

Likelihood ratio test=13.8  on 9 df, p=0.13  n= 137, number of events= 128
``````
``````anova(strat.int1, strat.int2)
``````
``````Analysis of Deviance Table
Cox model: response is  SurvObj
Model 1: ~ (tx.num + age) * Z - Z + strata(Z)
Model 2: ~ tx.num + age * Z - Z + strata(Z)
loglik Chisq Df P(>|Chi|)
1   -249
2   -255    11  7      0.14
``````
``````
## Interaction for both tx and age vs tx only
strat.int3 <- coxph(SurvObj ~ age + tx.num*Z - Z + strata(Z), data = vets)
strat.int3
``````
``````Call:
coxph(formula = SurvObj ~ age + tx.num * Z - Z + strata(Z), data = vets)

coef exp(coef) se(coef)       z    p
age           -0.00343     0.997   0.0106 -0.3231 0.75
tx.num         0.31293     1.367   0.6522  0.4798 0.63
tx.num:Zct1.0  0.83227     2.299   1.3626  0.6108 0.54
tx.num:Zct2.0 -1.23243     0.292   0.9412 -1.3094 0.19
tx.num:Zct3.0  0.12572     1.134   0.8015  0.1569 0.88
tx.num:Zct1.1  0.03161     1.032   0.8037  0.0393 0.97
tx.num:Zct2.1  0.19607     1.217   0.8780  0.2233 0.82
tx.num:Zct3.1  0.09769     1.103   0.8073  0.1210 0.90
tx.num:Zct4.1 -1.29379     0.274   0.8336 -1.5521 0.12

Likelihood ratio test=9.35  on 9 df, p=0.406  n= 137, number of events= 128
``````
``````anova(strat.int1, strat.int3)
``````
``````Analysis of Deviance Table
Cox model: response is  SurvObj
Model 1: ~ (tx.num + age) * Z - Z + strata(Z)
Model 2: ~ age + tx.num * Z - Z + strata(Z)
loglik Chisq Df P(>|Chi|)
1   -249
2   -257  15.4  7     0.031 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
``````

These models are significantly different, indicating the no-interaction model is not adequate. The full interaction model is not statistically different from the model with interaction for age only but statistically different from the model with interaction for tx only. Therefore, the interaction model for age only is adequate.

## A graphical view of the stratified Cox approach

``````## Create dataset to predict survival for
newdat <- expand.grid(rx = 0:1, sex = 0:1)

## Define log(-log(S)) vs log(time) plot function
plot.fun <- function(MODEL, newdata = newdat) {
## Plot
plot(survfit(MODEL, newdata = newdata),
log = "x", fun = function(S) log(-log(S)),
lty = rep(1:2, 2), col = rep(c("red","blue"), each = 2)
)

legend("topleft", legend = strata(newdat, sep = ":"),
lty = rep(1:2, 2), col = rep(c("red","blue"), each = 2))
}
``````

## a. PH for both rx and sex

``````## PH for both rx and sex without interaction between rx and sex
model1 <- coxph(formula = SurvObj ~ rx + sex,
data    = leuk)
model1
``````
``````Call:
coxph(formula = SurvObj ~ rx + sex, data = leuk)

coef exp(coef) se(coef)     z       p
rx  1.674      5.33    0.437 3.825 0.00013
sex 0.295      1.34    0.424 0.697 0.49000

Likelihood ratio test=16.8  on 2 df, p=0.000222  n= 42, number of events= 30
``````
``````plot.fun(model1)
title("Same baselines, same effects
all parallel with same distance")
``````

This model assumes all four curves are parallel (proportional hazards assumption), and the effect of rx is the same across levels of sex.

## b. PH for both rx and sex, but allows for different effects

``````## PH for both rx and sex with interaction between rx and sex
model2 <- coxph(formula = SurvObj ~ rx * sex,
data    = leuk)
model2
``````
``````Call:
coxph(formula = SurvObj ~ rx * sex, data = leuk)

coef exp(coef) se(coef)     z     p
rx      0.624      1.87    0.541  1.15 0.250
sex    -1.108      0.33    0.706 -1.57 0.120
rx:sex  1.957      7.08    0.815  2.40 0.016

Likelihood ratio test=22.5  on 3 df, p=0.000052  n= 42, number of events= 30
``````
``````plot.fun(model2)
title("Same baselines, different effects
all parallel with different distances")
``````

This model assumes all four curves are parallel (proportional hazards assumption), but the effect of rx can be different across levels of sex.

## c. Stratified by sex, PH for rx

``````## PH for rx, stratified by sex
model3 <- coxph(formula = SurvObj ~ rx + strata(sex),
data    = leuk)
model3
``````
``````Call:
coxph(formula = SurvObj ~ rx + strata(sex), data = leuk)

coef exp(coef) se(coef)    z      p
rx 1.32      3.76    0.438 3.02 0.0025

Likelihood ratio test=10.1  on 1 df, p=0.00148  n= 42, number of events= 30
``````
``````plot.fun(model3, newdata = data.frame(rx = 0:1))
title("Different baselines, same effects
parallel within each stratum with equal distance")
``````

This model assumes curves are parallel within each sex stratum, but the effect of rx is the same across sex strata.

## d. Stratified by sex, PH for rx, and includes interaction with sex

``````## PH for rx, stratified by sex, interaction with sex
model4 <- coxph(formula = SurvObj ~ rx + rx:sex + strata(sex),
data    = leuk)
model4
``````
``````Call:
coxph(formula = SurvObj ~ rx + rx:sex + strata(sex), data = leuk)

coef exp(coef) se(coef)     z     p
rx     0.487      1.63    0.550 0.885 0.380
rx:sex 1.660      5.26    0.834 1.989 0.047

Likelihood ratio test=14  on 2 df, p=0.000898  n= 42, number of events= 30
``````
``````
## Model for female
model4.0 <- coxph(formula = SurvObj ~ rx,
data    = leuk, subset = (sex == 0))
## Model for male
model4.1 <- coxph(formula = SurvObj ~ rx,
data    = leuk, subset = (sex == 1))

plot.fun2 <- function(MODEL, ...){
plot(survfit(MODEL, newdata = data.frame(rx = 0:1)),
log = "x", fun = function(S) log(-log(S)),
lty = 1:2, xlim = c(1,35), ylim = c(-4,1.5), ...
)
}

## Plot female curves
plot.fun2(model4.0, col = "red")
## Add male curves
par(new = T)
plot.fun2(model4.1, col = "blue", bty = "n", xaxt = "n", yaxt = "n")