Cox Model

Author

Jyoti Nepal

Published

September 29, 2022

#Load required libraries
library(foreign)
library(survival)
library(car)
library(survey)
library(eha)
library(tidyverse)
options(survey.lonely.psu = "adjust")

My data:

setwd("C:/Users/spara/Desktop/Cox Model HW")

In this study I have used ADD Health data to evaluate Cox Model. I have used sexual abuse, education, sex, and race/ethnicity as covariates to study their influence on being diagnosed with depression as adult.

#Read wave 3, wave 4, and weight

w3 <- as_tibble(read.delim("21600-0008-Data.tsv", sep = '\t', header = TRUE ))
w4 <- as_tibble(read.delim("21600-0022-Data.tsv", sep = '\t', header = TRUE ))
#w5 <- as_tibble(read.delim("21600-0032-Data.tsv", sep = '\t', header = TRUE ))
wt <- as_tibble(read.delim("21600-0004-Data.tsv", sep = '\t', header = TRUE ))

names(w3)<-tolower(names(w3))
names(w4)<-tolower(names(w4))
#names(w5)<-tolower(names(w5))
names(wt)<-tolower(names(wt))

#Data Clean

# Year of survey for wave 3
summary(w3$iyear3)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2001    2001    2001    2001    2002    2002 
#year of birth
summary(w3$h3od1y)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1974    1978    1979    1979    1981    1983 
# Current age: year of survey minus age at birth
w3$age <- (w3$iyear3 -w3$h3od1y)
summary(w3$age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   18.0    21.0    22.0    22.2    24.0    28.0 
# Year of survey for wave 4
summary(w4$iyear4)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2007    2008    2008    2008    2008    2009 
#year of birth
summary(w4$h4od1y)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1974    1978    1979    1979    1980    1983 
# Current age is year of survey minus age at birth
w4$age <- (w4$iyear4 -w4$h4od1y)
summary(w4$age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
     25      28      29      29      30      34 
#Current age: year of survey minus age at birth
#w5$age <- (w5$iyear5 -w5$h5od1y)
#summary(w5$age)
## Childhood maltreatments

#w3$neglect1<- car::Recode(w3$h3ma1,recodes="1:5='1'; 6='0'; else=NA")
#w3$neglect2<- car::Recode(w3$h3ma2,recodes="1:5='1'; 6='0'; else=NA")
w3$physical<- car::Recode(w3$h3ma3,recodes="1:5='Physcial abuse'; 6='No physical abuse'; else=NA")
w3$sexual<- car::Recode(w3$h3ma4,recodes="1:5='sexually abused'; 6='No sexual abuse'; else=NA")


#w3$neg<- paste(w3$neglect1, w3$neglect2, sep = "")
#table(w3$neg)

#w3$neglect<- car::Recode(w3$neg,recodes="11|12|13|21|31=1; 22|23|32=0; else=NA")
#table(w3$neglect)

#w4$neglect<- car::Recode(w4$h4ma1,recodes="1:5=1; 6=0; else=NA")
#w4$physical<- car::Recode(w4$h4ma3,recodes="1:5=1; 6=0; else=NA")
#w4$sexual<- car::Recode(w4$h4ma5,recodes="1:5=1; 6=0; else=NA")

#Employment status

#h3da28: Do you currently have a job?
w3$empstat<- car::Recode(w3$h3da28,recodes="1=1; 0=0; else=NA")

#H4LM11: Do you currently work 10 hours per week?
#w4$empstat<- car::Recode(w4$h4lm11,recodes="1=1; 0=0; else=NA")


#Race/Ethnicity
w3$hisp <- car::Recode(w3$h3od2, recodes = "1= 'hisp' ; 0= 'non_hisp'; else=NA")
w3$race <- car::Recode(w3$h3ir4, recodes = "1= 'wh'; 2= 'bl'; 3 = 'asian'; 4='other'; else=NA")
w3$race_eth<-ifelse(w3$hisp ==1, "hisp", w3$race)
w3$race_eth <- interaction(w3$hisp, w3$race)


  w3$race_ethr<- mutate(w3, ifelse(hisp==0 & race==1, 1,
                    ifelse(hisp==0 & race==2, 2,
                     ifelse(hisp==1, 3,
                    ifelse(hisp==0 & race==3, 4,
                     ifelse(hisp==0 & race==4, 5, "NA"))))))
w3$race_ethr<- ifelse(substr(w3$race_eth, 1, 4)=="hisp", "hisp", as.character(w3$race_eth))

#Education
#What is the highest grade or year of regular school you have completed?

w3$educ <- car::Recode(w3$h3ed1, recodes = "6:12= 'upto HS' ; 13:22= 'higher than HS'; else=NA")


#w4$educ <- car:: Recode(w4$h4ed2, recodes = "1:3 =  'upto to HS'; 4:13 = 'higher than HS'; else = NA")
# Depression

## h3id15: Have you ever been diagnosed with depression
##h4id5h: Has a doctor, nurse or other health care provider ever told you that you have or had: depression?

w3$dep <-  car::Recode(w3$h3id15,recodes="1=1; 0=0; else=NA")

w4$dep <-  car::Recode(w4$h4id5h,recodes="1=1; 0=0; else=NA")
  

#w3 <- w3%>%
#  filter(h3id15 == 1)

#w4<-w4%>%
 # filter(h4id5h==1)
# Combine all ways
allwaves <- left_join(w3, w4, by="aid")
#allwaves <- left_join(allwaves, w5, by="aid")
allwaves <- left_join(allwaves,wt, by = "aid")

#selecting variables

myvars<-c("aid","dep.x", "dep.y", "physical", "sexual", "race_ethr","educ","age.x", "age.y", "bio_sex3", "empstat","gswgt1", "cluster2")
allwaves2<-allwaves[,myvars]

allwaves2$sex<- car::Recode(allwaves2$bio_sex3, recodes="1='Male'; 2='Female'; else = NA")
# time varying variables- Health and age
allwaves2$age_1<- allwaves2$age.x
allwaves2$age_2<- allwaves2$age.y



allwaves2$dep_1<-allwaves2$dep.x
allwaves2$dep_2<-allwaves2$dep.y

#Filter data

allwaves2<-allwaves2 %>% filter(is.na(dep_1)==F &
                  is.na(dep_2)==F &
                  is.na(age_1)==F &
                  is.na(age_2)==F &
                  dep_1!=1) %>%
  mutate(deptrans_1 =ifelse(dep_1==0 & dep_2==0, 0,1))
elong <- allwaves2 %>%
  select(physical,sexual,race_ethr, sex, aid,gswgt1,cluster2,educ, empstat,    #time constant
        age_1, age_2,#t-varying variables
         dep_1, dep_2)%>%
   pivot_longer(cols = c(-physical,-sexual, -race_ethr, -sex, -aid, -empstat, -educ, -gswgt1, -cluster2), #time constant variables go here
               names_to  = c(".value", "wave"), #make wave variable and put t-v vars into columns
               names_sep = "_") %>% #all t-v variables have _ between name and time, like age_1, age_2
   group_by(aid)%>%
  mutate(age_enter = as.numeric(age), 
         age_exit = lead(age, 1, order_by= aid))%>%
  mutate(nexdep = dplyr::lead(dep,n=1, order_by = age))%>%
  mutate(deptran = ifelse(nexdep == 1, 1, 0))%>%
  filter(is.na(age_exit)==F)%>%
  ungroup()%>%
  filter(complete.cases(age_enter, age_exit, deptran,
              physical,sexual,race_ethr, sex, aid,educ, gswgt1,cluster2))
options(survey.lonely.psu = "adjust")


#des<-svydesign(ids=~psuscid, strata = ~region , weights=~gswgt4_2, data=dat3, nest = T)

# cluster
#des<-svydesign(ids=~psuscid, strata = ~1 , weights=~gswgt4_2, data=dat3, nest = T)



des2<-svydesign(ids = ~cluster2,
                #strata = 1,
                weights=~gswgt1, 
                data=elong,
                nest=T)

Depression among US adults who were victims of maltreatment (Sexual abuse) as a child

Predictor variables: sexual abuse, education, sex, and race and ethnicity

elong <- elong %>%
 filter(complete.cases(.))
fitl1<-svycoxph(Surv(time = age_enter, time2=age_exit, event = deptran)~sexual+educ+sex+race_ethr , design=des2)
summary(fitl1)
1 - level Cluster Sampling design (with replacement)
With (132) clusters.
svydesign(ids = ~cluster2, weights = ~gswgt1, data = elong, nest = T)
Call:
svycoxph(formula = Surv(time = age_enter, time2 = age_exit, event = deptran) ~ 
    sexual + educ + sex + race_ethr, design = des2)

  n= 2944, number of events= 300 

                           coef exp(coef) se(coef) robust se      z Pr(>|z|)
sexualsexually abused    0.2342    1.2639   0.2677    0.2953  0.793 0.427726
educupto HS              0.4693    1.5989   0.1150    0.1300  3.611 0.000305
sexMale                 -0.7924    0.4527   0.1209    0.1494 -5.306 1.12e-07
race_ethrnon_hisp.asian -1.1785    0.3077   1.7980    0.5828 -2.022 0.043160
race_ethrnon_hisp.bl    -0.5433    0.5808   0.2852    0.2863 -1.898 0.057699
race_ethrnon_hisp.other  0.0295    1.0299   0.4243    0.4994  0.059 0.952897
race_ethrnon_hisp.wh     0.5203    1.6826   0.2097    0.2321  2.241 0.025002
                           
sexualsexually abused      
educupto HS             ***
sexMale                 ***
race_ethrnon_hisp.asian *  
race_ethrnon_hisp.bl    .  
race_ethrnon_hisp.other    
race_ethrnon_hisp.wh    *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                        exp(coef) exp(-coef) lower .95 upper .95
sexualsexually abused      1.2639     0.7912    0.7086    2.2544
educupto HS                1.5989     0.6254    1.2394    2.0628
sexMale                    0.4528     2.2087    0.3379    0.6067
race_ethrnon_hisp.asian    0.3077     3.2495    0.0982    0.9644
race_ethrnon_hisp.bl       0.5808     1.7217    0.3314    1.0179
race_ethrnon_hisp.other    1.0299     0.9709    0.3870    2.7407
race_ethrnon_hisp.wh       1.6826     0.5943    1.0675    2.6521

Concordance= 0.663  (se = 0.018 )
Likelihood ratio test= NA  on 7 df,   p=NA
Wald test            = 82.01  on 7 df,   p=5e-15
Score (logrank) test = NA  on 7 df,   p=NA

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).

Residuals

schoenresid<-resid(fitl1, type="schoenfeld")

fit.sr<-lm(schoenresid~des2$variables$age_enter[des2$variables$deptran==1])

fit.sr%>%
  broom::tidy()%>%
  filter(term != "(Intercept)")%>%
  select(response, estimate, statistic, p.value)
# A tibble: 7 × 4
  response                estimate statistic p.value
  <chr>                      <dbl>     <dbl>   <dbl>
1 sexualsexually abused    0.0127      1.94   0.0529
2 educupto HS              0.00694     0.462  0.644 
3 sexMale                 -0.0135     -0.971  0.332 
4 race_ethrnon_hisp.asian -0.00284    -1.64   0.103 
5 race_ethrnon_hisp.bl     0.00677     0.649  0.517 
6 race_ethrnon_hisp.other -0.00795    -1.89   0.0602
7 race_ethrnon_hisp.wh     0.0215      1.67   0.0969

Weighted Residuals

fit.test<-cox.zph(fitl1)
fit.test
             chisq df    p
sexual    0.000317  1 0.99
educ      0.000486  1 0.98
sex       0.000109  1 0.99
race_ethr 0.002844  4 1.00
GLOBAL    0.003873  7 1.00
plot(fit.test, df=2)

Based on the result there appears to be no dependence between the residuals and the outcome, or are correlated with the timing of the transition and thus is most likely proportional.

#extract Martingale residuals
res.mar<-resid(fitl1, type="martingale")
#plot vs physical abuse
scatter.smooth(des2$variables$empstat, res.mar,degree = 2,
               span = 5, ylab="Martingale Residual",xlab = "Employment status", main = "Martingale Residual plotted against Employment status",
               col=1,  cex=.25, lpars=list(col = "red", lwd = 3))
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
pseudoinverse used at 1.005
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
neighborhood radius 2.2472
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
reciprocal condition number 7.7503e-15
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
pseudoinverse used at -0.005
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
neighborhood radius 2.2472
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
reciprocal condition number 5.7182e-15
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
There are other near singularities as well. 5.0501
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
pseudoinverse used at -0.005
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
neighborhood radius 2.2472
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
reciprocal condition number 1.0147e-15
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
There are other near singularities as well. 5.0501
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
pseudoinverse used at -0.005
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
neighborhood radius 2.2472
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
reciprocal condition number 2.7748e-15
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
There are other near singularities as well. 5.0501
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
pseudoinverse used at 1.005
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
neighborhood radius 2.2472
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
reciprocal condition number 7.7503e-15

Stratification

fitl2<-svycoxph(Surv(time = age_enter, time2=age_exit, event = deptran)~sexual+strata(educ)+sex+race_ethr , design=des2)
summary(fitl2)
1 - level Cluster Sampling design (with replacement)
With (132) clusters.
svydesign(ids = ~cluster2, weights = ~gswgt1, data = elong, nest = T)
Call:
svycoxph(formula = Surv(time = age_enter, time2 = age_exit, event = deptran) ~ 
    sexual + strata(educ) + sex + race_ethr, design = des2)

  n= 2944, number of events= 300 

                            coef exp(coef) se(coef) robust se      z Pr(>|z|)
sexualsexually abused    0.24656   1.27962  0.26780   0.29377  0.839   0.4013
sexMale                 -0.79531   0.45144  0.12097   0.14962 -5.315 1.06e-07
race_ethrnon_hisp.asian -1.19503   0.30269  1.79805   0.58921 -2.028   0.0425
race_ethrnon_hisp.bl    -0.54066   0.58236  0.28524   0.28937 -1.868   0.0617
race_ethrnon_hisp.other  0.03136   1.03186  0.42429   0.49907  0.063   0.9499
race_ethrnon_hisp.wh     0.51915   1.68059  0.20976   0.23438  2.215   0.0268
                           
sexualsexually abused      
sexMale                 ***
race_ethrnon_hisp.asian *  
race_ethrnon_hisp.bl    .  
race_ethrnon_hisp.other    
race_ethrnon_hisp.wh    *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                        exp(coef) exp(-coef) lower .95 upper .95
sexualsexually abused      1.2796     0.7815   0.71948    2.2758
sexMale                    0.4514     2.2151   0.33670    0.6053
race_ethrnon_hisp.asian    0.3027     3.3037   0.09538    0.9606
race_ethrnon_hisp.bl       0.5824     1.7171   0.33028    1.0268
race_ethrnon_hisp.other    1.0319     0.9691   0.38798    2.7443
race_ethrnon_hisp.wh       1.6806     0.5950   1.06158    2.6606

Concordance= 0.65  (se = 0.017 )
Likelihood ratio test= NA  on 6 df,   p=NA
Wald test            = 77.97  on 6 df,   p=9e-15
Score (logrank) test = NA  on 6 df,   p=NA

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).

ANNOVA like test for factors

fit3<-svycoxph(Surv(time = age_enter, time2=age_exit, event = deptran)~sexual+educ,
               design=des2)
summary(fit3)
1 - level Cluster Sampling design (with replacement)
With (132) clusters.
svydesign(ids = ~cluster2, weights = ~gswgt1, data = elong, nest = T)
Call:
svycoxph(formula = Surv(time = age_enter, time2 = age_exit, event = deptran) ~ 
    sexual + educ, design = des2)

  n= 2944, number of events= 300 

                        coef exp(coef) se(coef) robust se     z Pr(>|z|)   
sexualsexually abused 0.1193    1.1267   0.2671    0.2911 0.410  0.68206   
educupto HS           0.3285    1.3889   0.1139    0.1246 2.637  0.00835 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                      exp(coef) exp(-coef) lower .95 upper .95
sexualsexually abused     1.127     0.8876    0.6368     1.993
educupto HS               1.389     0.7200    1.0880     1.773

Concordance= 0.534  (se = 0.02 )
Likelihood ratio test= NA  on 2 df,   p=NA
Wald test            = 6.96  on 2 df,   p=0.03
Score (logrank) test = NA  on 2 df,   p=NA

  (Note: the likelihood ratio and score tests assume independence of
     observations within a cluster, the Wald and robust score tests do not).
regTermTest(fit3, ~sexual, method="LRT")
Working (Rao-Scott+F) LRT for sexual
 in svycoxph(formula = Surv(time = age_enter, time2 = age_exit, event = deptran) ~ 
    sexual + educ, design = des2)
Working 2logLR =  0.2108057 p= 0.64186 
df=1;  denominator df= 130

Based on the evaluation of Cox model for sexual abuse and likelihood of developing depression, it appears that the time-constant covariate effect, i.e. the effect does not vary with time, the cox model seems to be a good model for this analysis.