This example uses data from the National Health Interview Survey (NHIS) linked mortality data obtained from the Minnesota Population Center’s IHIS program, which links the NHIS survey files from 1986 tp 2004 to mortality data from the National Death Index (NDI). The death follow up currently ends at 2006.

Below, I code a competing risk outcome, using four different causes of death as competing events, and age at death as the outcome variable.

The data are pretty big, so I take a subset of 20,000 people for the example presented below. Using the whole sample may make your computer explode. You have been warned

library(survey)
## Loading required package: grid
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(survival)
library(car)
library(cmprsk)
load("~/Google Drive/dem7223/ihis_mort.Rdata")
names(ihis_mort)
##  [1] "year"         "serial"       "strata"       "psu"         
##  [5] "hhweight"     "pernum"       "perweight"    "sampweight"  
##  [9] "SUPP2WT"      "telephone"    "astatflg"     "cstatflg"    
## [13] "intervwyr"    "age"          "sex"          "marstat"     
## [17] "marst"        "marstcohab"   "cohabmarst"   "cohabevmar"  
## [21] "birthyr"      "relate"       "famsize"      "momed"       
## [25] "racea"        "hispeth"      "yrsinus"      "hispyn"      
## [29] "usborn"       "citizen"      "racenew"      "EDUCREC2"    
## [33] "educ"         "empstat"      "pooryn"       "INCFAMR82ON" 
## [37] "gotchsup"     "gotdiv"       "gotnewelf"    "gotnonssdis" 
## [41] "gotssiwhy"    "gotstamp"     "poverty"      "POVERTY2"    
## [45] "ownership"    "lowrent"      "health"       "height"      
## [49] "weight"       "bmicalc"      "hstatyr"      "wldayr"      
## [53] "usualpl"      "typplsick"    "routcare"     "delaycost"   
## [57] "placecar"     "delayappt"    "delayhrs"     "delayphone"  
## [61] "delaytrans"   "delaywait"    "ybarcare"     "ybardental"  
## [65] "ybarglass"    "ybarmeds"     "ybarmental"   "ALC5UPYR"    
## [69] "smokev"       "smokagereg"   "mortelig"     "mortstat"    
## [73] "mortdody"     "mortucod"     "mortwt"       "mortcms"     
## [77] "mortndi"      "mortssa"      "mortwtsa"     "famdelaycono"
## [81] "famdelaycost" "famybarcar"   "famybarcarno"
sub<-subset(ihis_mort, ihis_mort$mortelig==1&is.na(ihis_mort$racenew)==F)
samps<-sample(1:length(sub$psu), size = 20000, replace = F)
sub<-sub[samps,]

rm(ihis_mort)
sub$d.age<-ifelse(sub$mortstat==1,sub$mortdody-(sub$year-sub$age) ,
                  ifelse(sub$mortstat==2,2006-(sub$year-sub$age), NA))
sub$d.event<-ifelse(sub$mortstat==1,1,0)
sub$married<-recode(sub$marstat, recodes="00=NA; 10:13='married'; 20:40='sep'; 50='nm'; 99=NA" ,as.factor.result=T )
sub$male<-ifelse(sub$sex==1,1,0)
sub$mwt<-sub$mortwt/mean(sub$mortwt, na.rm=T)
sub$race<-recode(sub$racenew, recodes ="10='wht'; 20 ='blk'; 30:61='other'; 97:99=NA", as.factor.result=T)
sub$college<-recode(sub$EDUCREC2, recodes="00=NA; 10:42='hs or less'; 50:53='some coll'; 54:60='coll'; else=NA", as.factor.result=T)
sub$black<-ifelse(sub$race=='blk',1,0)
sub$oth<-ifelse(sub$race=='other',1,0)
sub$hs<-ifelse(sub$college=='hs or less',1,0)
sub$col1<-ifelse(sub$college=='some coll',1,0)
sub$sep<-ifelse(sub$married=='sep',1,0)
sub$nm<-ifelse(sub$married=='nm',1,0)

Now we want to examine the competing risks of mortality from various causes, we use the mortucod variable create a variable indicating major causes of death lumping other causes together(1=cancers, 2=CVD, 3=infectious, 4=other causes, NA=alive)

sub$cod<-recode(sub$mortucod, recodes="20:44=1; 55:75=2;1:18=3;77:83=3;46:52=4;45=4;84:135=4; 999=NA")

#Here I generate censoring indicators, one for each type of failure
sub$fail1<-ifelse(sub$cod==1&sub$d.event==1, 1,0)
sub$fail2<-ifelse(sub$cod==2&sub$d.event==1, 1,0)
sub$fail3<-ifelse(sub$cod==3&sub$d.event==1, 1,0)
sub$fail4<-ifelse(sub$cod==4&sub$d.event==1, 1,0)
#sub$codcens=ifelse(is.na(sub$cod)==T,0,sub$cod)
sub$codcens[sub$fail1==0&sub$fail2==0&sub$fail3==0&sub$fail4==0]<-0
sub$codcens[sub$fail1==1|sub$fail2==1|sub$fail3==1|sub$fail4==1]<-1
table(sub$codcens, sub$d.event)
##    
##         0     1
##   0 19093     0
##   1     0   894
table(sub$cod, sub$d.event)
##    
##       0   1
##   1   0 237
##   2   0 325
##   3   0  49
##   4   0 283

Form a survey design object and examine some basic mortality curves by sex and failure type:

options(survey.lonely.psu="adjust")
des<-svydesign(ids=~psu, strata=~strata, weights = ~mortwt, data=sub[sub$mortwt>0,], nest=T)

fit.s<-svykm(Surv(d.age, d.event)~male, design=des, se=F)
fit.s
## Weighted survival curves:
## svykm(formula = Surv(d.age, d.event) ~ male, design = des, se = F)
## 0 : Q1 = 85  median = 89  Q3 = Inf 
## 1 : Q1 = 82  median = 87  Q3 = 91
plot(fit.s, pars=list(col=c(1,2)) )
title(main="Survival Function for Adult Mortality", sub="Male vs. Female")
legend("bottom", legend = c("Female","Male" ), col=c(1,2), lty=1)

#test statistic
svylogrank(Surv(d.age, d.event)~male, design=des)
## [[1]]
##       score                               
## [1,] 400062 54905.87 7.286325 3.185247e-13
## 
## [[2]]
##        chisq            p 
## 5.309053e+01 3.185247e-13 
## 
## attr(,"class")
## [1] "svylogrank"
fit.s2<-svykm(Surv(d.age, d.event)~strata(cod), design=des, se=F)
fit.s2
## Weighted survival curves:
## svykm(formula = Surv(d.age, d.event) ~ strata(cod), design = des, 
##     se = F)
## cod=1 : Q1 = 63  median = 74  Q3 = 82 
## cod=2 : Q1 = 70  median = 80  Q3 = 85 
## cod=3 : Q1 = 58  median = 74  Q3 = 85 
## cod=4 : Q1 = 57  median = 75  Q3 = 84
plot(fit.s2, pars=list(col=c(1,2,3,4)), ylab="Survival", xlab="Age", main="Survival functions for competing causes of death")
legend("bottom", legend=c("Cancers", "CVD","Infectious", "Other"), lty=1, col=1:4, cex=.8)

Here is the overall hazard model using the Cox PH model, this model is for all-cause mortality.

#all failures
fita<-svycoxph(Surv(d.age,d.event)~male+married+race+college, design=des)
summary(fita)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (678) clusters.
## svydesign(ids = ~psu, strata = ~strata, weights = ~mortwt, data = sub[sub$mortwt > 
##     0, ], nest = T)
## Call:
## svycoxph(formula = Surv(d.age, d.event) ~ male + married + race + 
##     college, design = des)
## 
##   n= 19559, number of events= 890 
##    (368 observations deleted due to missingness)
## 
##                       coef exp(coef) se(coef)      z Pr(>|z|)    
## male               0.55941   1.74964  0.08133  6.878 6.06e-12 ***
## marriednm          0.76075   2.13988  0.15449  4.924 8.47e-07 ***
## marriedsep        -0.05825   0.94342  0.07954 -0.732 0.463979    
## raceother         -0.51098   0.59990  0.23742 -2.152 0.031375 *  
## racewht           -0.46334   0.62918  0.11865 -3.905 9.42e-05 ***
## collegehs or less  0.43100   1.53880  0.11476  3.756 0.000173 ***
## collegesome coll   0.47545   1.60874  0.13824  3.439 0.000583 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## male                 1.7496     0.5715    1.4918    2.0520
## marriednm            2.1399     0.4673    1.5808    2.8966
## marriedsep           0.9434     1.0600    0.8072    1.1026
## raceother            0.5999     1.6669    0.3767    0.9554
## racewht              0.6292     1.5894    0.4986    0.7939
## collegehs or less    1.5388     0.6499    1.2288    1.9269
## collegesome coll     1.6087     0.6216    1.2269    2.1094
## 
## Concordance= 0.664  (se = 0.015 )
## Rsquare= NA   (max possible= NA )
## Likelihood ratio test= NA  on 7 df,   p=NA
## Wald test            = 124.6  on 7 df,   p=0
## Score (logrank) test = NA  on 7 df,   p=NA
plot(survfit(fita))

Type-specific hazard models

These models take the approach suggested by Allison, where for a given cause of death, any other cause is assumed to be censored.

#Cancer
fit1<-svycoxph(Surv(d.age,fail1==1)~male+married+race+college,des)
summary(fit1)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (678) clusters.
## svydesign(ids = ~psu, strata = ~strata, weights = ~mortwt, data = sub[sub$mortwt > 
##     0, ], nest = T)
## Call:
## svycoxph(formula = Surv(d.age, fail1 == 1) ~ male + married + 
##     race + college, design = des)
## 
##   n= 19547, number of events= 235 
##    (380 observations deleted due to missingness)
## 
##                      coef exp(coef) se(coef)      z Pr(>|z|)    
## male               0.5561    1.7439   0.1544  3.601 0.000317 ***
## marriednm          0.5545    1.7410   0.3057  1.814 0.069678 .  
## marriedsep        -0.1338    0.8748   0.1526 -0.877 0.380654    
## raceother         -0.4823    0.6173   0.4879 -0.989 0.322827    
## racewht           -0.5205    0.5942   0.2125 -2.449 0.014314 *  
## collegehs or less  0.4983    1.6458   0.2299  2.167 0.030223 *  
## collegesome coll   0.6964    2.0065   0.2474  2.814 0.004889 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## male                 1.7439     0.5734    1.2885    2.3604
## marriednm            1.7410     0.5744    0.9564    3.1695
## marriedsep           0.8748     1.1432    0.6486    1.1798
## raceother            0.6173     1.6199    0.2373    1.6062
## racewht              0.5942     1.6828    0.3918    0.9012
## collegehs or less    1.6458     0.6076    1.0488    2.5828
## collegesome coll     2.0065     0.4984    1.2354    3.2588
## 
## Concordance= 0.659  (se = 0.026 )
## Rsquare= NA   (max possible= NA )
## Likelihood ratio test= NA  on 7 df,   p=NA
## Wald test            = 37.13  on 7 df,   p=4.429e-06
## Score (logrank) test = NA  on 7 df,   p=NA
plot(survfit(fit1), main="Survival for Cancer Mortality")

#CVD
fit2<-svycoxph(Surv(d.age, fail2==1)~male+married+race+college, des)
summary(fit2)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (678) clusters.
## svydesign(ids = ~psu, strata = ~strata, weights = ~mortwt, data = sub[sub$mortwt > 
##     0, ], nest = T)
## Call:
## svycoxph(formula = Surv(d.age, fail2 == 1) ~ male + married + 
##     race + college, design = des)
## 
##   n= 19547, number of events= 318 
##    (380 observations deleted due to missingness)
## 
##                       coef exp(coef) se(coef)      z Pr(>|z|)    
## male               0.53661   1.71021  0.13365  4.015 5.94e-05 ***
## marriednm          0.67010   1.95444  0.24851  2.696  0.00701 ** 
## marriedsep        -0.01708   0.98307  0.13378 -0.128  0.89844    
## raceother         -0.27100   0.76261  0.30939 -0.876  0.38106    
## racewht           -0.48644   0.61481  0.18018 -2.700  0.00694 ** 
## collegehs or less  0.31306   1.36760  0.18325  1.708  0.08757 .  
## collegesome coll   0.25782   1.29411  0.23914  1.078  0.28099    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## male                 1.7102     0.5847    1.3161    2.2223
## marriednm            1.9544     0.5117    1.2008    3.1810
## marriedsep           0.9831     1.0172    0.7563    1.2778
## raceother            0.7626     1.3113    0.4159    1.3985
## racewht              0.6148     1.6265    0.4319    0.8752
## collegehs or less    1.3676     0.7312    0.9549    1.9586
## collegesome coll     1.2941     0.7727    0.8099    2.0679
## 
## Concordance= 0.644  (se = 0.026 )
## Rsquare= NA   (max possible= NA )
## Likelihood ratio test= NA  on 7 df,   p=NA
## Wald test            = 47.77  on 7 df,   p=3.947e-08
## Score (logrank) test = NA  on 7 df,   p=NA
plot(survfit(fit2), main="Survival for CVD Mortality")

#Infectious
fit3<-svycoxph(Surv(d.age, fail3==1)~male+married+race+college, des)
summary(fit3)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (678) clusters.
## svydesign(ids = ~psu, strata = ~strata, weights = ~mortwt, data = sub[sub$mortwt > 
##     0, ], nest = T)
## Call:
## svycoxph(formula = Surv(d.age, fail3 == 1) ~ male + married + 
##     race + college, design = des)
## 
##   n= 19547, number of events= 47 
##    (380 observations deleted due to missingness)
## 
##                       coef exp(coef) se(coef)      z Pr(>|z|)
## male               0.57087   1.76980  0.35238  1.620    0.105
## marriednm          0.08286   1.08639  0.61587  0.135    0.893
## marriedsep        -0.11027   0.89559  0.34225 -0.322    0.747
## raceother          0.34865   1.41715  0.81821  0.426    0.670
## racewht            0.31152   1.36550  0.54580  0.571    0.568
## collegehs or less -0.10717   0.89837  0.48660 -0.220    0.826
## collegesome coll  -0.25534   0.77465  0.64881 -0.394    0.694
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## male                 1.7698     0.5650    0.8871     3.531
## marriednm            1.0864     0.9205    0.3249     3.633
## marriedsep           0.8956     1.1166    0.4579     1.752
## raceother            1.4171     0.7056    0.2851     7.045
## racewht              1.3655     0.7323    0.4685     3.980
## collegehs or less    0.8984     1.1131    0.3461     2.332
## collegesome coll     0.7746     1.2909    0.2172     2.763
## 
## Concordance= 0.586  (se = 0.06 )
## Rsquare= NA   (max possible= NA )
## Likelihood ratio test= NA  on 7 df,   p=NA
## Wald test            = 7.52  on 7 df,   p=0.3771
## Score (logrank) test = NA  on 7 df,   p=NA
plot(survfit(fit3), main="Survival for Infectious Mortality")

#Other
fit4<-svycoxph(Surv(d.age, fail4==1)~male+married+race+college, des)
summary(fit4)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (678) clusters.
## svydesign(ids = ~psu, strata = ~strata, weights = ~mortwt, data = sub[sub$mortwt > 
##     0, ], nest = T)
## Call:
## svycoxph(formula = Surv(d.age, fail4 == 1) ~ male + married + 
##     race + college, design = des)
## 
##   n= 19547, number of events= 278 
##    (380 observations deleted due to missingness)
## 
##                         coef  exp(coef)   se(coef)      z Pr(>|z|)    
## male               0.6260822  1.8702688  0.1332976  4.697 2.64e-06 ***
## marriednm          1.0432325  2.8383773  0.2003764  5.206 1.93e-07 ***
## marriedsep        -0.0005407  0.9994595  0.1477554 -0.004  0.99708    
## raceother         -1.1464775  0.3177541  0.4013217 -2.857  0.00428 ** 
## racewht           -0.4883579  0.6136332  0.1960580 -2.491  0.01274 *  
## collegehs or less  0.6478720  1.9114688  0.2159316  3.000  0.00270 ** 
## collegesome coll   0.6977048  2.0091360  0.2486251  2.806  0.00501 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## male                 1.8703     0.5347    1.4403    2.4287
## marriednm            2.8384     0.3523    1.9165    4.2037
## marriedsep           0.9995     1.0005    0.7482    1.3352
## raceother            0.3178     3.1471    0.1447    0.6977
## racewht              0.6136     1.6296    0.4179    0.9011
## collegehs or less    1.9115     0.5232    1.2519    2.9186
## collegesome coll     2.0091     0.4977    1.2342    3.2707
## 
## Concordance= 0.698  (se = 0.027 )
## Rsquare= NA   (max possible= NA )
## Likelihood ratio test= NA  on 7 df,   p=NA
## Wald test            = 83.21  on 7 df,   p=2.998e-15
## Score (logrank) test = NA  on 7 df,   p=NA
plot(survfit(fit4), main="Survival for Other Cause Mortality")

Construct a test of whether the betas are the same for each failure type using a Chow Test (See Allison p 217 for this). Basically we compare the deviance of the model with all causes of death to the sum of the deviances from each of the competing risk situations. If the test is significant, the it suggests that each cause of death has a different combination of the beta’s in the model. I.e. the regression effects are not the same across causes of death.

#deviance from total model
d1<--2*fita$ll[2]

#sum of deviances from cause-specific models
otherds<- (-2*fit1$ll[2]+ -2*fit2$ll[2]+ -2*fit3$ll[2]+ -2*fit4$ll[2])

#Chow test
test<- d1-otherds
df<-(length(coef(fit1))*3)-length(coef(fita))
#print the test results
print(list(test=test, df=df,pval= pchisq(test, df=df, lower=F)))
## $test
## [1] 150.4996
## 
## $df
## [1] 14
## 
## $pval
## [1] 5.710724e-25

Alternatively, we could simply stratifiy the baseline hazard by type of failure

fits<-svycoxph(Surv(d.age, d.event)~male+married+race+college+strata(cod), des)
summary(fits)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (678) clusters.
## svydesign(ids = ~psu, strata = ~strata, weights = ~mortwt, data = sub[sub$mortwt > 
##     0, ], nest = T)
## Call:
## svycoxph(formula = Surv(d.age, d.event) ~ male + married + race + 
##     college + strata(cod), design = des)
## 
##   n= 878, number of events= 878 
##    (19049 observations deleted due to missingness)
## 
##                       coef exp(coef) se(coef)      z Pr(>|z|)    
## male               0.15841   1.17164  0.08633  1.835 0.066521 .  
## marriednm          0.68914   1.99199  0.17790  3.874 0.000107 ***
## marriedsep        -0.37762   0.68549  0.08795 -4.294 1.76e-05 ***
## raceother          0.30861   1.36153  0.25618  1.205 0.228344    
## racewht           -0.66813   0.51267  0.11795 -5.664 1.47e-08 ***
## collegehs or less  0.04346   1.04442  0.11450  0.380 0.704246    
## collegesome coll   0.32648   1.38608  0.12897  2.531 0.011362 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## male                 1.1716     0.8535    0.9893    1.3876
## marriednm            1.9920     0.5020    1.4056    2.8230
## marriedsep           0.6855     1.4588    0.5770    0.8144
## raceother            1.3615     0.7345    0.8241    2.2495
## racewht              0.5127     1.9506    0.4069    0.6460
## collegehs or less    1.0444     0.9575    0.8345    1.3072
## collegesome coll     1.3861     0.7215    1.0765    1.7847
## 
## Concordance= 0.629  (se = 0.021 )
## Rsquare= NA   (max possible= NA )
## Likelihood ratio test= NA  on 7 df,   p=NA
## Wald test            = 142.9  on 7 df,   p=0
## Score (logrank) test = NA  on 7 df,   p=NA
plot(survfit(fits), col=1:4)
legend("bottom", legend=c("Cancers", "CVD","Infectious", "Other"), lty=1, col=1:4)

Competing Risk Regression

The crr() function in the cmprsk library uses the methods discussed in Fine and Gray, 1999 for regression modeling for the subdistribution function for a competing risk. This is still a proportional hazards model for the key event of interest, but takes into account failures from other causes.

sub$cod2<-ifelse(is.na(sub$cod)==T,0,sub$cod)
#Make a matrix of predictors
covs<-data.frame(sub$male,sub$nm, sub$sep, sub$black, sub$oth, sub$hs, sub$col1)
names(covs)<-c("male", "neverm", "separated", "black", "other", "hsorless", "somecoll")
head(covs)
##   male neverm separated black other hsorless somecoll
## 1    1      1         0     0     0        1        0
## 2    1      0         0     0     0        0        0
## 3    1      1         0     0     1        0        1
## 4    0      1         0     0     0        1        0
## 5    0      0         1     0     0        1        0
## 6    1      1         0     0     0        1        0
#Fit the cumulative incidence model of Fine and Gray for cancer mortality
fit.crr<-crr(ftime=sub$d.age, fstatus=sub$cod2,cov1=covs, failcode=1,cencode=0 )
## 369 cases omitted due to missing values
summary(fit.crr)
## Competing Risks Regression
## 
## Call:
## crr(ftime = sub$d.age, fstatus = sub$cod2, cov1 = covs, failcode = 1, 
##     cencode = 0)
## 
##              coef exp(coef) se(coef)      z p-value
## male       0.4172     1.518    0.147  2.847  0.0044
## neverm     0.2688     1.308    0.284  0.946  0.3400
## separated -0.1171     0.889    0.153 -0.764  0.4500
## black      0.5172     1.677    0.185  2.801  0.0051
## other      0.0418     1.043    0.326  0.128  0.9000
## hsorless   0.3697     1.447    0.213  1.733  0.0830
## somecoll   0.5366     1.710    0.238  2.257  0.0240
## 
##           exp(coef) exp(-coef)  2.5% 97.5%
## male          1.518      0.659 1.139  2.02
## neverm        1.308      0.764 0.750  2.28
## separated     0.889      1.124 0.658  1.20
## black         1.677      0.596 1.168  2.41
## other         1.043      0.959 0.550  1.98
## hsorless      1.447      0.691 0.953  2.20
## somecoll      1.710      0.585 1.073  2.73
## 
## Num. cases = 19631 (369 cases omitted due to missing values)
## Pseudo Log-likelihood = -1811 
## Pseudo likelihood ratio test = 24.8  on 7 df,
#Plot some interesting cases
z.p<-predict(fit.crr, rbind(c(0,0,0,0,0,0,0),c(0,0,0,1,0,0,0),c(1,1,0,0,0,1,0),c(1,1,0,1,0,1,0)))
plot(z.p, col=1:4, lty=1, xlim=c(40,90), ylab="Cumulative Incidence", xlab="Age")
legend("topleft", legend=(c("Fem, Mar,Wh,Col","Fem, Mar,Bl,Col","Ma,NMar,Wh,HS","Ma,NMar,Bl,HS")), col=1:4, lty=1)
title(main="Cumulative Incidence of Cancer Mortalty")