Kleinbaum: Stratified Cox regression

References

These examples are reproduction of Chapter 5 of the book. To get identical results, use ties == “breslow”.

Load dataset

## Load foreign package
library(foreign)

## Load leukemia dataset
leuk <- read.dta("http://www.sph.emory.edu/~dkleinb/surv2datasets/anderson.dta")

## Load survival package
library(survival)

## Create survival vector for fish dataset
leuk$SurvObj <- with(leuk, Surv(survt, status))

Stratified Cox model vs subset models

The coefficient for the rx variable is very different between females and males, but it is assumed to be the same in the stratified

## Stratified by sex
res.strata.sex <- coxph(formula = SurvObj ~ logwbc + rx + strata(sex),
                        data    = leuk,
                        ties    = c("efron","breslow","exact")[1])
res.strata.sex
Call:
coxph(formula = SurvObj ~ logwbc + rx + strata(sex), data = leuk, 
    ties = c("efron", "breslow", "exact")[1])

        coef exp(coef) se(coef)    z        p
logwbc 1.454      4.28    0.344 4.22 0.000024
rx     0.998      2.71    0.474 2.11 0.035000

Likelihood ratio test=32.1  on 2 df, p=0.000000109  n= 42, number of events= 30 

## Separate models for females and males
res.separate <- lapply(split(leuk, leuk$sex),
                       FUN = function(DF) {

                           coxph(SurvObj ~ logwbc + rx, DF)
                       })
res.separate
$`0`
Call:
coxph(formula = SurvObj ~ logwbc + rx, data = DF)

        coef exp(coef) se(coef)     z     p
logwbc 1.206      3.34    0.503 2.396 0.017
rx     0.311      1.37    0.564 0.552 0.580

Likelihood ratio test=6.65  on 2 df, p=0.0361  n= 22, number of events= 16 

$`1`
Call:
coxph(formula = SurvObj ~ logwbc + rx, data = DF)

       coef exp(coef) se(coef)    z      p
logwbc 1.74      5.71    0.536 3.25 0.0011
rx     1.98      7.23    0.739 2.68 0.0075

Likelihood ratio test=29.2  on 2 df, p=0.000000461  n= 20, number of events= 14 

Interaction model

An interaction model allows for different coefficients for different strata.

## Model including interaction between sex and logwbc and rx
## Do not include the main effect of sex
res.interaction.sex <- coxph(formula = SurvObj ~ (logwbc + rx)*sex - sex + strata(sex),
                             data    = leuk,
                             ties    = c("efron","breslow","exact")[1])
res.interaction.sex
Call:
coxph(formula = SurvObj ~ (logwbc + rx) * sex - sex + strata(sex), 
    data = leuk, ties = c("efron", "breslow", "exact")[1])

            coef exp(coef) se(coef)     z     p
logwbc     1.206      3.34    0.503 2.396 0.017
rx         0.311      1.37    0.564 0.552 0.580
logwbc:sex 0.537      1.71    0.735 0.730 0.470
rx:sex     1.667      5.29    0.930 1.793 0.073

Likelihood ratio test=35.8  on 4 df, p=0.000000314  n= 42, number of events= 30 

## Comparing stratified model and stratified with interaction model
anova(res.interaction.sex, res.strata.sex)
Analysis of Deviance Table
 Cox model: response is  SurvObj
 Model 1: ~ (logwbc + rx) * sex - sex + strata(sex)
 Model 2: ~ logwbc + rx + strata(sex)
  loglik Chisq Df P(>|Chi|)
1  -53.9                   
2  -55.7  3.77  2      0.15

This model is not statistically significantly different from the no interaction model at the 0.05 level, thus, we conclude that the model without interaction is adequate.

A second example with multiple stratification variables

## Load vets.dta dataset
vets <- read.dta("http://www.sph.emory.edu/~dkleinb/surv2datasets/vets.dta")
head(vets)
  tx ct1 ct2 ct3 ct4 survt perf dd age priortx status
1  1   0   0   0   1    72   60  7  69       0      1
2  1   0   0   0   1   411   70  5  64      10      1
3  1   0   0   0   1   228   60  3  38       0      1
4  1   0   0   0   1   126   60  9  63      10      1
5  1   0   0   0   1   118   70 11  65      10      1
6  1   0   0   0   1    10   20  5  49       0      1

vets <- within(vets, {
     tx <- factor(tx, levels = 1:2, labels = c("standard","test"))

     priortx <- factor(priortx, levels = c(0,10), labels = c("none","some"))

     require(survival)
     SurvObj <- Surv(survt, status)
 })


## Multiple dummy variables to one variable
## http://stackoverflow.com/questions/5450538/going-from-multiple-dummy-variables-to-a-single-variable
dummies <- vets[, paste0("ct", 1:4)]
vets$CellType <- names(dummies)[apply(dummies == 1, MARGIN = 1, FUN = which)]
vets$CellType <- factor(vets$CellType)

head(vets)
        tx ct1 ct2 ct3 ct4 survt perf dd age priortx status SurvObj CellType
1 standard   0   0   0   1    72   60  7  69    none      1     72       ct4
2 standard   0   0   0   1   411   70  5  64    some      1    411       ct4
3 standard   0   0   0   1   228   60  3  38    none      1    228       ct4
4 standard   0   0   0   1   126   60  9  63    some      1    126       ct4
5 standard   0   0   0   1   118   70 11  65    some      1    118       ct4
6 standard   0   0   0   1    10   20  5  49    none      1     10       ct4

Check proportional hazard assumption by testing

The cell type variable and performance status variable do not satisfy

## Fit an additive contribution model without interaction
all.additive.model <- coxph(SurvObj ~ tx + ct1 + ct2 + ct3 + perf + dd + age + priortx, data = vets)

## Check for PH
res.zph <- cox.zph(all.additive.model)
res.zph
                rho   chisq        p
txtest      -0.0273  0.1227 0.726104
ct1          0.1712  4.1093 0.042649
ct2          0.1424  2.9794 0.084329
ct3          0.0128  0.0261 0.871621
perf         0.3073 13.0449 0.000304
dd           0.1491  2.9436 0.086217
age          0.1890  5.3476 0.020750
priortxsome -0.1767  4.4714 0.034467
GLOBAL           NA 27.9972 0.000475

## Plot
plot(res.zph)

plot of chunk unnamed-chunk-6 plot of chunk unnamed-chunk-6 plot of chunk unnamed-chunk-6 plot of chunk unnamed-chunk-6 plot of chunk unnamed-chunk-6 plot of chunk unnamed-chunk-6 plot of chunk unnamed-chunk-6 plot of chunk unnamed-chunk-6

Stratify by cell type and performance status

## Create dichotomous PSbin variable
vets$PSbin <- as.numeric(with(vets, perf >= 60))

## Create a single stratification variable using interaction()
vets$Z <- with(vets, interaction(CellType, PSbin))

## Change reference in accordance with Kelinbaum pages 218-219
vets$Z <- relevel(vets$Z, ref = "ct4.0")


## Stratified by cell type and PSbin
stratified1 <- coxph(SurvObj ~ tx + age + strata(Z), data = vets)
stratified1
Call:
coxph(formula = SurvObj ~ tx + age + strata(Z), data = vets)

           coef exp(coef) se(coef)      z    p
txtest  0.12778     1.136   0.2085  0.613 0.54
age    -0.00127     0.999   0.0101 -0.126 0.90

Likelihood ratio test=0.38  on 2 df, p=0.829  n= 137, number of events= 128 


## Create 0,1 tx.num variable to avoid overparametrization
vets$tx.num <- as.numeric(vets$tx == "test")

## Interaction model allowing for different coefficients in each stratum
strat.int1 <- coxph(SurvObj ~ (tx.num + age)*Z - Z + strata(Z), data = vets)
strat.int1
Call:
coxph(formula = SurvObj ~ (tx.num + age) * Z - Z + strata(Z), 
    data = vets)

                  coef exp(coef) se(coef)       z    p
tx.num         0.30440     1.356   0.6647  0.4580 0.65
age           -0.00158     0.998   0.0301 -0.0524 0.96
tx.num:Zct1.0  2.33240    10.303   1.7724  1.3159 0.19
tx.num:Zct2.0 -1.19200     0.304   0.9566 -1.2460 0.21
tx.num:Zct3.0  0.56805     1.765   0.8558  0.6638 0.51
tx.num:Zct1.1  0.50474     1.657   0.9541  0.5290 0.60
tx.num:Zct2.1  0.57517     1.777   0.9726  0.5914 0.55
tx.num:Zct3.1  0.01072     1.011   0.8226  0.0130 0.99
tx.num:Zct4.1 -1.05184     0.349   0.8683 -1.2114 0.23
age:Zct1.0     0.07865     1.082   0.0640  1.2289 0.22
age:Zct2.0    -0.04623     0.955   0.0452 -1.0233 0.31
age:Zct3.0    -0.05897     0.943   0.0423 -1.3953 0.16
age:Zct1.1    -0.03653     0.964   0.0459 -0.7951 0.43
age:Zct2.1    -0.03962     0.961   0.0450 -0.8801 0.38
age:Zct3.1     0.03573     1.036   0.0383  0.9331 0.35
age:Zct4.1     0.05215     1.054   0.0483  1.0787 0.28

Likelihood ratio test=24.8  on 16 df, p=0.074  n= 137, number of events= 128 

## Model comparison between no interaction vs interaction
anova(stratified1, strat.int1)
Analysis of Deviance Table
 Cox model: response is  SurvObj
 Model 1: ~ tx + age + strata(Z)
 Model 2: ~ (tx.num + age) * Z - Z + strata(Z)
  loglik Chisq Df P(>|Chi|)  
1   -261                     
2   -249  24.4 14     0.041 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 


## Interaction for both tx and age vs age only
strat.int2 <- coxph(SurvObj ~ tx.num + age*Z - Z + strata(Z), data = vets)
strat.int2
Call:
coxph(formula = SurvObj ~ tx.num + age * Z - Z + strata(Z), data = vets)

               coef exp(coef) se(coef)       z     p
tx.num      0.32073     1.378   0.2284  1.4040 0.160
age        -0.00173     0.998   0.0295 -0.0586 0.950
age:Zct1.0  0.02345     1.024   0.0503  0.4661 0.640
age:Zct2.0 -0.04309     0.958   0.0420 -1.0251 0.310
age:Zct3.0 -0.04505     0.956   0.0393 -1.1478 0.250
age:Zct1.1 -0.01875     0.981   0.0384 -0.4878 0.630
age:Zct2.1 -0.02610     0.974   0.0406 -0.6432 0.520
age:Zct3.1  0.03584     1.036   0.0376  0.9539 0.340
age:Zct4.1  0.08305     1.087   0.0501  1.6575 0.097

Likelihood ratio test=13.8  on 9 df, p=0.13  n= 137, number of events= 128 
anova(strat.int1, strat.int2)
Analysis of Deviance Table
 Cox model: response is  SurvObj
 Model 1: ~ (tx.num + age) * Z - Z + strata(Z)
 Model 2: ~ tx.num + age * Z - Z + strata(Z)
  loglik Chisq Df P(>|Chi|)
1   -249                   
2   -255    11  7      0.14

## Interaction for both tx and age vs tx only
strat.int3 <- coxph(SurvObj ~ age + tx.num*Z - Z + strata(Z), data = vets)
strat.int3
Call:
coxph(formula = SurvObj ~ age + tx.num * Z - Z + strata(Z), data = vets)

                  coef exp(coef) se(coef)       z    p
age           -0.00343     0.997   0.0106 -0.3231 0.75
tx.num         0.31293     1.367   0.6522  0.4798 0.63
tx.num:Zct1.0  0.83227     2.299   1.3626  0.6108 0.54
tx.num:Zct2.0 -1.23243     0.292   0.9412 -1.3094 0.19
tx.num:Zct3.0  0.12572     1.134   0.8015  0.1569 0.88
tx.num:Zct1.1  0.03161     1.032   0.8037  0.0393 0.97
tx.num:Zct2.1  0.19607     1.217   0.8780  0.2233 0.82
tx.num:Zct3.1  0.09769     1.103   0.8073  0.1210 0.90
tx.num:Zct4.1 -1.29379     0.274   0.8336 -1.5521 0.12

Likelihood ratio test=9.35  on 9 df, p=0.406  n= 137, number of events= 128 
anova(strat.int1, strat.int3)
Analysis of Deviance Table
 Cox model: response is  SurvObj
 Model 1: ~ (tx.num + age) * Z - Z + strata(Z)
 Model 2: ~ age + tx.num * Z - Z + strata(Z)
  loglik Chisq Df P(>|Chi|)  
1   -249                     
2   -257  15.4  7     0.031 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

These models are significantly different, indicating the no-interaction model is not adequate. The full interaction model is not statistically different from the model with interaction for age only but statistically different from the model with interaction for tx only. Therefore, the interaction model for age only is adequate.

A graphical view of the stratified Cox approach

## Create dataset to predict survival for
newdat <- expand.grid(rx = 0:1, sex = 0:1)

## Define log(-log(S)) vs log(time) plot function
plot.fun <- function(MODEL, newdata = newdat) {
    ## Plot
    plot(survfit(MODEL, newdata = newdata),
         log = "x", fun = function(S) log(-log(S)),
         lty = rep(1:2, 2), col = rep(c("red","blue"), each = 2)
         )

    ## Add legend
    legend("topleft", legend = strata(newdat, sep = ":"),
           lty = rep(1:2, 2), col = rep(c("red","blue"), each = 2))
}

a. PH for both rx and sex

## PH for both rx and sex without interaction between rx and sex
model1 <- coxph(formula = SurvObj ~ rx + sex,
                data    = leuk)
model1
Call:
coxph(formula = SurvObj ~ rx + sex, data = leuk)

     coef exp(coef) se(coef)     z       p
rx  1.674      5.33    0.437 3.825 0.00013
sex 0.295      1.34    0.424 0.697 0.49000

Likelihood ratio test=16.8  on 2 df, p=0.000222  n= 42, number of events= 30 
plot.fun(model1)
title("Same baselines, same effects
all parallel with same distance")

plot of chunk unnamed-chunk-9

This model assumes all four curves are parallel (proportional hazards assumption), and the effect of rx is the same across levels of sex.

b. PH for both rx and sex, but allows for different effects

## PH for both rx and sex with interaction between rx and sex
model2 <- coxph(formula = SurvObj ~ rx * sex,
                data    = leuk)
model2
Call:
coxph(formula = SurvObj ~ rx * sex, data = leuk)

         coef exp(coef) se(coef)     z     p
rx      0.624      1.87    0.541  1.15 0.250
sex    -1.108      0.33    0.706 -1.57 0.120
rx:sex  1.957      7.08    0.815  2.40 0.016

Likelihood ratio test=22.5  on 3 df, p=0.000052  n= 42, number of events= 30 
plot.fun(model2)
title("Same baselines, different effects
all parallel with different distances")

plot of chunk unnamed-chunk-10

This model assumes all four curves are parallel (proportional hazards assumption), but the effect of rx can be different across levels of sex.

c. Stratified by sex, PH for rx

## PH for rx, stratified by sex
model3 <- coxph(formula = SurvObj ~ rx + strata(sex),
                data    = leuk)
model3
Call:
coxph(formula = SurvObj ~ rx + strata(sex), data = leuk)


   coef exp(coef) se(coef)    z      p
rx 1.32      3.76    0.438 3.02 0.0025

Likelihood ratio test=10.1  on 1 df, p=0.00148  n= 42, number of events= 30 
plot.fun(model3, newdata = data.frame(rx = 0:1))
title("Different baselines, same effects
parallel within each stratum with equal distance")

plot of chunk unnamed-chunk-11

This model assumes curves are parallel within each sex stratum, but the effect of rx is the same across sex strata.

d. Stratified by sex, PH for rx, and includes interaction with sex

## PH for rx, stratified by sex, interaction with sex
model4 <- coxph(formula = SurvObj ~ rx + rx:sex + strata(sex),
                data    = leuk)
model4
Call:
coxph(formula = SurvObj ~ rx + rx:sex + strata(sex), data = leuk)

        coef exp(coef) se(coef)     z     p
rx     0.487      1.63    0.550 0.885 0.380
rx:sex 1.660      5.26    0.834 1.989 0.047

Likelihood ratio test=14  on 2 df, p=0.000898  n= 42, number of events= 30 

## Model for female
model4.0 <- coxph(formula = SurvObj ~ rx,
                data    = leuk, subset = (sex == 0))
## Model for male
model4.1 <- coxph(formula = SurvObj ~ rx,
                data    = leuk, subset = (sex == 1))

plot.fun2 <- function(MODEL, ...){
    plot(survfit(MODEL, newdata = data.frame(rx = 0:1)),
         log = "x", fun = function(S) log(-log(S)),
         lty = 1:2, xlim = c(1,35), ylim = c(-4,1.5), ...
         )
}

## Plot female curves
plot.fun2(model4.0, col = "red")
## Add male curves
par(new = T)
plot.fun2(model4.1, col = "blue", bty = "n", xaxt = "n", yaxt = "n")
## Add legend
legend("topleft", legend = strata(newdat, sep = ":"),
       lty = rep(1:2, 2), col = rep(c("blue","red"), each = 2))
title("Different baselines, Different effects
parallel within each stratum with different distances")

plot of chunk unnamed-chunk-12

This model assumes curves are parallel within each sex stratum, but the effect of rx can be different across sex strata.