#Load required libraries
library(foreign)
library(survival)
library(car)
library(survey)
library(eha)
library(tidyverse)
options(survey.lonely.psu = "adjust")Cox Model
My data:
setwd("C:/Users/spara/Desktop/Cox Model HW")In this study I have used ADD Health data to evaluate Cox Model. I have used sexual abuse, education, sex, and race/ethnicity as covariates to study their influence on being diagnosed with depression as adult.
#Read wave 3, wave 4, and weight
w3 <- as_tibble(read.delim("21600-0008-Data.tsv", sep = '\t', header = TRUE ))
w4 <- as_tibble(read.delim("21600-0022-Data.tsv", sep = '\t', header = TRUE ))
#w5 <- as_tibble(read.delim("21600-0032-Data.tsv", sep = '\t', header = TRUE ))
wt <- as_tibble(read.delim("21600-0004-Data.tsv", sep = '\t', header = TRUE ))
names(w3)<-tolower(names(w3))
names(w4)<-tolower(names(w4))
#names(w5)<-tolower(names(w5))
names(wt)<-tolower(names(wt))#Data Clean
# Year of survey for wave 3
summary(w3$iyear3) Min. 1st Qu. Median Mean 3rd Qu. Max.
2001 2001 2001 2001 2002 2002
#year of birth
summary(w3$h3od1y) Min. 1st Qu. Median Mean 3rd Qu. Max.
1974 1978 1979 1979 1981 1983
# Current age: year of survey minus age at birth
w3$age <- (w3$iyear3 -w3$h3od1y)
summary(w3$age) Min. 1st Qu. Median Mean 3rd Qu. Max.
18.0 21.0 22.0 22.2 24.0 28.0
# Year of survey for wave 4
summary(w4$iyear4) Min. 1st Qu. Median Mean 3rd Qu. Max.
2007 2008 2008 2008 2008 2009
#year of birth
summary(w4$h4od1y) Min. 1st Qu. Median Mean 3rd Qu. Max.
1974 1978 1979 1979 1980 1983
# Current age is year of survey minus age at birth
w4$age <- (w4$iyear4 -w4$h4od1y)
summary(w4$age) Min. 1st Qu. Median Mean 3rd Qu. Max.
25 28 29 29 30 34
#Current age: year of survey minus age at birth
#w5$age <- (w5$iyear5 -w5$h5od1y)
#summary(w5$age)## Childhood maltreatments
#w3$neglect1<- car::Recode(w3$h3ma1,recodes="1:5='1'; 6='0'; else=NA")
#w3$neglect2<- car::Recode(w3$h3ma2,recodes="1:5='1'; 6='0'; else=NA")
w3$physical<- car::Recode(w3$h3ma3,recodes="1:5='Physcial abuse'; 6='No physical abuse'; else=NA")
w3$sexual<- car::Recode(w3$h3ma4,recodes="1:5='sexually abused'; 6='No sexual abuse'; else=NA")
#w3$neg<- paste(w3$neglect1, w3$neglect2, sep = "")
#table(w3$neg)
#w3$neglect<- car::Recode(w3$neg,recodes="11|12|13|21|31=1; 22|23|32=0; else=NA")
#table(w3$neglect)
#w4$neglect<- car::Recode(w4$h4ma1,recodes="1:5=1; 6=0; else=NA")
#w4$physical<- car::Recode(w4$h4ma3,recodes="1:5=1; 6=0; else=NA")
#w4$sexual<- car::Recode(w4$h4ma5,recodes="1:5=1; 6=0; else=NA")
#Employment status
#h3da28: Do you currently have a job?
w3$empstat<- car::Recode(w3$h3da28,recodes="1=1; 0=0; else=NA")
#H4LM11: Do you currently work 10 hours per week?
#w4$empstat<- car::Recode(w4$h4lm11,recodes="1=1; 0=0; else=NA")
#Race/Ethnicity
w3$hisp <- car::Recode(w3$h3od2, recodes = "1= 'hisp' ; 0= 'non_hisp'; else=NA")
w3$race <- car::Recode(w3$h3ir4, recodes = "1= 'wh'; 2= 'bl'; 3 = 'asian'; 4='other'; else=NA")
w3$race_eth<-ifelse(w3$hisp ==1, "hisp", w3$race)
w3$race_eth <- interaction(w3$hisp, w3$race)
w3$race_ethr<- mutate(w3, ifelse(hisp==0 & race==1, 1,
ifelse(hisp==0 & race==2, 2,
ifelse(hisp==1, 3,
ifelse(hisp==0 & race==3, 4,
ifelse(hisp==0 & race==4, 5, "NA"))))))
w3$race_ethr<- ifelse(substr(w3$race_eth, 1, 4)=="hisp", "hisp", as.character(w3$race_eth))
#Education
#What is the highest grade or year of regular school you have completed?
w3$educ <- car::Recode(w3$h3ed1, recodes = "6:12= 'upto HS' ; 13:22= 'higher than HS'; else=NA")
#w4$educ <- car:: Recode(w4$h4ed2, recodes = "1:3 = 'upto to HS'; 4:13 = 'higher than HS'; else = NA")# Depression
## h3id15: Have you ever been diagnosed with depression
##h4id5h: Has a doctor, nurse or other health care provider ever told you that you have or had: depression?
w3$dep <- car::Recode(w3$h3id15,recodes="1=1; 0=0; else=NA")
w4$dep <- car::Recode(w4$h4id5h,recodes="1=1; 0=0; else=NA")
#w3 <- w3%>%
# filter(h3id15 == 1)
#w4<-w4%>%
# filter(h4id5h==1)# Combine all ways
allwaves <- left_join(w3, w4, by="aid")
#allwaves <- left_join(allwaves, w5, by="aid")
allwaves <- left_join(allwaves,wt, by = "aid")#selecting variables
myvars<-c("aid","dep.x", "dep.y", "physical", "sexual", "race_ethr","educ","age.x", "age.y", "bio_sex3", "empstat","gswgt1", "cluster2")
allwaves2<-allwaves[,myvars]
allwaves2$sex<- car::Recode(allwaves2$bio_sex3, recodes="1='Male'; 2='Female'; else = NA")# time varying variables- Health and age
allwaves2$age_1<- allwaves2$age.x
allwaves2$age_2<- allwaves2$age.y
allwaves2$dep_1<-allwaves2$dep.x
allwaves2$dep_2<-allwaves2$dep.y#Filter data
allwaves2<-allwaves2 %>% filter(is.na(dep_1)==F &
is.na(dep_2)==F &
is.na(age_1)==F &
is.na(age_2)==F &
dep_1!=1) %>%
mutate(deptrans_1 =ifelse(dep_1==0 & dep_2==0, 0,1))elong <- allwaves2 %>%
select(physical,sexual,race_ethr, sex, aid,gswgt1,cluster2,educ, empstat, #time constant
age_1, age_2,#t-varying variables
dep_1, dep_2)%>%
pivot_longer(cols = c(-physical,-sexual, -race_ethr, -sex, -aid, -empstat, -educ, -gswgt1, -cluster2), #time constant variables go here
names_to = c(".value", "wave"), #make wave variable and put t-v vars into columns
names_sep = "_") %>% #all t-v variables have _ between name and time, like age_1, age_2
group_by(aid)%>%
mutate(age_enter = as.numeric(age),
age_exit = lead(age, 1, order_by= aid))%>%
mutate(nexdep = dplyr::lead(dep,n=1, order_by = age))%>%
mutate(deptran = ifelse(nexdep == 1, 1, 0))%>%
filter(is.na(age_exit)==F)%>%
ungroup()%>%
filter(complete.cases(age_enter, age_exit, deptran,
physical,sexual,race_ethr, sex, aid,educ, gswgt1,cluster2))options(survey.lonely.psu = "adjust")
#des<-svydesign(ids=~psuscid, strata = ~region , weights=~gswgt4_2, data=dat3, nest = T)
# cluster
#des<-svydesign(ids=~psuscid, strata = ~1 , weights=~gswgt4_2, data=dat3, nest = T)
des2<-svydesign(ids = ~cluster2,
#strata = 1,
weights=~gswgt1,
data=elong,
nest=T)Depression among US adults who were victims of maltreatment (Sexual abuse) as a child
Predictor variables: sexual abuse, education, sex, and race and ethnicity
elong <- elong %>%
filter(complete.cases(.))fitl1<-svycoxph(Surv(time = age_enter, time2=age_exit, event = deptran)~sexual+educ+sex+race_ethr , design=des2)
summary(fitl1)1 - level Cluster Sampling design (with replacement)
With (132) clusters.
svydesign(ids = ~cluster2, weights = ~gswgt1, data = elong, nest = T)
Call:
svycoxph(formula = Surv(time = age_enter, time2 = age_exit, event = deptran) ~
sexual + educ + sex + race_ethr, design = des2)
n= 2944, number of events= 300
coef exp(coef) se(coef) robust se z Pr(>|z|)
sexualsexually abused 0.2342 1.2639 0.2677 0.2953 0.793 0.427726
educupto HS 0.4693 1.5989 0.1150 0.1300 3.611 0.000305
sexMale -0.7924 0.4527 0.1209 0.1494 -5.306 1.12e-07
race_ethrnon_hisp.asian -1.1785 0.3077 1.7980 0.5828 -2.022 0.043160
race_ethrnon_hisp.bl -0.5433 0.5808 0.2852 0.2863 -1.898 0.057699
race_ethrnon_hisp.other 0.0295 1.0299 0.4243 0.4994 0.059 0.952897
race_ethrnon_hisp.wh 0.5203 1.6826 0.2097 0.2321 2.241 0.025002
sexualsexually abused
educupto HS ***
sexMale ***
race_ethrnon_hisp.asian *
race_ethrnon_hisp.bl .
race_ethrnon_hisp.other
race_ethrnon_hisp.wh *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
sexualsexually abused 1.2639 0.7912 0.7086 2.2544
educupto HS 1.5989 0.6254 1.2394 2.0628
sexMale 0.4528 2.2087 0.3379 0.6067
race_ethrnon_hisp.asian 0.3077 3.2495 0.0982 0.9644
race_ethrnon_hisp.bl 0.5808 1.7217 0.3314 1.0179
race_ethrnon_hisp.other 1.0299 0.9709 0.3870 2.7407
race_ethrnon_hisp.wh 1.6826 0.5943 1.0675 2.6521
Concordance= 0.663 (se = 0.018 )
Likelihood ratio test= NA on 7 df, p=NA
Wald test = 82.01 on 7 df, p=5e-15
Score (logrank) test = NA on 7 df, p=NA
(Note: the likelihood ratio and score tests assume independence of
observations within a cluster, the Wald and robust score tests do not).
Residuals
schoenresid<-resid(fitl1, type="schoenfeld")
fit.sr<-lm(schoenresid~des2$variables$age_enter[des2$variables$deptran==1])
fit.sr%>%
broom::tidy()%>%
filter(term != "(Intercept)")%>%
select(response, estimate, statistic, p.value)# A tibble: 7 × 4
response estimate statistic p.value
<chr> <dbl> <dbl> <dbl>
1 sexualsexually abused 0.0127 1.94 0.0529
2 educupto HS 0.00694 0.462 0.644
3 sexMale -0.0135 -0.971 0.332
4 race_ethrnon_hisp.asian -0.00284 -1.64 0.103
5 race_ethrnon_hisp.bl 0.00677 0.649 0.517
6 race_ethrnon_hisp.other -0.00795 -1.89 0.0602
7 race_ethrnon_hisp.wh 0.0215 1.67 0.0969
Weighted Residuals
fit.test<-cox.zph(fitl1)
fit.test chisq df p
sexual 0.000317 1 0.99
educ 0.000486 1 0.98
sex 0.000109 1 0.99
race_ethr 0.002844 4 1.00
GLOBAL 0.003873 7 1.00
plot(fit.test, df=2)Based on the result there appears to be no dependence between the residuals and the outcome, or are correlated with the timing of the transition and thus is most likely proportional.
#extract Martingale residuals
res.mar<-resid(fitl1, type="martingale")
#plot vs physical abuse
scatter.smooth(des2$variables$empstat, res.mar,degree = 2,
span = 5, ylab="Martingale Residual",xlab = "Employment status", main = "Martingale Residual plotted against Employment status",
col=1, cex=.25, lpars=list(col = "red", lwd = 3))Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
pseudoinverse used at 1.005
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
neighborhood radius 2.2472
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
reciprocal condition number 7.7503e-15
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
pseudoinverse used at -0.005
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
neighborhood radius 2.2472
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
reciprocal condition number 5.7182e-15
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
There are other near singularities as well. 5.0501
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
pseudoinverse used at -0.005
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
neighborhood radius 2.2472
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
reciprocal condition number 1.0147e-15
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
There are other near singularities as well. 5.0501
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
pseudoinverse used at -0.005
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
neighborhood radius 2.2472
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
reciprocal condition number 2.7748e-15
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
There are other near singularities as well. 5.0501
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
pseudoinverse used at 1.005
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
neighborhood radius 2.2472
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
reciprocal condition number 7.7503e-15
Stratification
fitl2<-svycoxph(Surv(time = age_enter, time2=age_exit, event = deptran)~sexual+strata(educ)+sex+race_ethr , design=des2)
summary(fitl2)1 - level Cluster Sampling design (with replacement)
With (132) clusters.
svydesign(ids = ~cluster2, weights = ~gswgt1, data = elong, nest = T)
Call:
svycoxph(formula = Surv(time = age_enter, time2 = age_exit, event = deptran) ~
sexual + strata(educ) + sex + race_ethr, design = des2)
n= 2944, number of events= 300
coef exp(coef) se(coef) robust se z Pr(>|z|)
sexualsexually abused 0.24656 1.27962 0.26780 0.29377 0.839 0.4013
sexMale -0.79531 0.45144 0.12097 0.14962 -5.315 1.06e-07
race_ethrnon_hisp.asian -1.19503 0.30269 1.79805 0.58921 -2.028 0.0425
race_ethrnon_hisp.bl -0.54066 0.58236 0.28524 0.28937 -1.868 0.0617
race_ethrnon_hisp.other 0.03136 1.03186 0.42429 0.49907 0.063 0.9499
race_ethrnon_hisp.wh 0.51915 1.68059 0.20976 0.23438 2.215 0.0268
sexualsexually abused
sexMale ***
race_ethrnon_hisp.asian *
race_ethrnon_hisp.bl .
race_ethrnon_hisp.other
race_ethrnon_hisp.wh *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
sexualsexually abused 1.2796 0.7815 0.71948 2.2758
sexMale 0.4514 2.2151 0.33670 0.6053
race_ethrnon_hisp.asian 0.3027 3.3037 0.09538 0.9606
race_ethrnon_hisp.bl 0.5824 1.7171 0.33028 1.0268
race_ethrnon_hisp.other 1.0319 0.9691 0.38798 2.7443
race_ethrnon_hisp.wh 1.6806 0.5950 1.06158 2.6606
Concordance= 0.65 (se = 0.017 )
Likelihood ratio test= NA on 6 df, p=NA
Wald test = 77.97 on 6 df, p=9e-15
Score (logrank) test = NA on 6 df, p=NA
(Note: the likelihood ratio and score tests assume independence of
observations within a cluster, the Wald and robust score tests do not).
ANNOVA like test for factors
fit3<-svycoxph(Surv(time = age_enter, time2=age_exit, event = deptran)~sexual+educ,
design=des2)
summary(fit3)1 - level Cluster Sampling design (with replacement)
With (132) clusters.
svydesign(ids = ~cluster2, weights = ~gswgt1, data = elong, nest = T)
Call:
svycoxph(formula = Surv(time = age_enter, time2 = age_exit, event = deptran) ~
sexual + educ, design = des2)
n= 2944, number of events= 300
coef exp(coef) se(coef) robust se z Pr(>|z|)
sexualsexually abused 0.1193 1.1267 0.2671 0.2911 0.410 0.68206
educupto HS 0.3285 1.3889 0.1139 0.1246 2.637 0.00835 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
sexualsexually abused 1.127 0.8876 0.6368 1.993
educupto HS 1.389 0.7200 1.0880 1.773
Concordance= 0.534 (se = 0.02 )
Likelihood ratio test= NA on 2 df, p=NA
Wald test = 6.96 on 2 df, p=0.03
Score (logrank) test = NA on 2 df, p=NA
(Note: the likelihood ratio and score tests assume independence of
observations within a cluster, the Wald and robust score tests do not).
regTermTest(fit3, ~sexual, method="LRT")Working (Rao-Scott+F) LRT for sexual
in svycoxph(formula = Surv(time = age_enter, time2 = age_exit, event = deptran) ~
sexual + educ, design = des2)
Working 2logLR = 0.2108057 p= 0.64186
df=1; denominator df= 130
Based on the evaluation of Cox model for sexual abuse and likelihood of developing depression, it appears that the time-constant covariate effect, i.e. the effect does not vary with time, the cox model seems to be a good model for this analysis.