#Load required libraries
library(foreign)
library(survival)
library(car)
library(survey)
library(eha)
library(tidyverse)
options(survey.lonely.psu = "adjust")
##The current study analyzes child maltreatment (in the form of physical and sexual abuse) and depression.
#Data
setwd("C:/Users/spara/Desktop/Cox Model HW")
#Read Data
w3 <- as_tibble(read.delim("21600-0008-Data.tsv", sep = '\t', header = TRUE ))
w4 <- as_tibble(read.delim("21600-0022-Data.tsv", sep = '\t', header = TRUE ))
#w5 <- as_tibble(read.delim("21600-0032-Data.tsv", sep = '\t', header = TRUE ))
wt <- as_tibble(read.delim("21600-0004-Data.tsv", sep = '\t', header = TRUE ))
names(w3)<-tolower(names(w3))
names(w4)<-tolower(names(w4))
#names(w5)<-tolower(names(w5))
names(wt)<-tolower(names(wt))
#Data clean
# Year of survey for wave 3
summary(w3$iyear3)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2001 2001 2001 2001 2002 2002
#year of birth
summary(w3$h3od1y)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1974 1978 1979 1979 1981 1983
# Current age: year of survey minus age at birth
w3$age3 <- (w3$iyear3 -w3$h3od1y)
summary(w3$age3)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.0 21.0 22.0 22.2 24.0 28.0
# Year of survey for wave 4
summary(w4$iyear4)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2007 2008 2008 2008 2008 2009
#year of birth
summary(w4$h4od1y)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1974 1978 1979 1979 1980 1983
# Current age is year of survey minus age at birth
w4$age4 <- (w4$iyear4 -w4$h4od1y)
summary(w4$age4)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 25 28 29 29 30 34
## Childhood maltreatment
w3$physical<- car::Recode(w3$h3ma3,recodes="1:5='Physcial abuse'; 6='No physical abuse'; else=NA")
w3$sexual<- car::Recode(w3$h3ma4,recodes="1:5='sexually abused'; 6='No sexual abuse'; else=NA")
#Employment status
w3$empstat<- car::Recode(w3$h3da28,recodes="1=1; 0=0; else=NA")
#Race/Ethnicity
w3$hisp <- car::Recode(w3$h3od2, recodes = "1= 'hisp' ; 0= 'non_hisp'; else=NA")
w3$race <- car::Recode(w3$h3ir4, recodes = "1= 'wh'; 2= 'bl'; 3 = 'asian'; 4='other'; else=NA")
w3$race_eth<-ifelse(w3$hisp ==1, "hisp", w3$race)
w3$race_eth <- interaction(w3$hisp, w3$race)
w3$race_ethr<- mutate(w3, ifelse(hisp==0 & race==1, 1,
ifelse(hisp==0 & race==2, 2,
ifelse(hisp==1, 3,
ifelse(hisp==0 & race==3, 4,
ifelse(hisp==0 & race==4, 5, "NA"))))))
w3$race_ethr<- ifelse(substr(w3$race_eth, 1, 4)=="hisp", "hisp", as.character(w3$race_eth))
w3$sex<- car::Recode(w3$bio_sex3, recodes="1='Male'; 2='Female'; else = NA")
#Education
w3$educ <- car::Recode(w3$h3ed1, recodes = "6:12= 'upto HS' ; 13:22= 'higher than HS'; else=NA")
# Depression (Outcome Variable)
w3$dep3 <- car::Recode(w3$h3id15,recodes="1=1; 0=0; else=NA")
w4$dep4 <- car::Recode(w4$h4id5h,recodes="1=1; 0=0; else=NA")
# Combine all ways
allwaves <- left_join(w3, w4, by="aid")
#allwaves <- left_join(allwaves, w5, by="aid")
allwaves <- left_join(allwaves,wt, by = "aid")
#selecting variables
myvars<-c("aid","dep3", "dep4", "physical", "sexual", "race_ethr","educ","age3", "age4", "sex", "empstat","gswgt1", "cluster2")
allwaves2<-allwaves[,myvars]
allwaves2 <- allwaves2%>%
filter(complete.cases(.))
#Filter data
allwaves2<-allwaves2 %>% #filter(#is.na(dep3)==F &
#is.na(dep4)==F &
#is.na(age3)==F &
#is.na(age4)==F &
#dep3!=1) %>%
mutate(deptrans_1 =ifelse(dep3==0 & dep4==0,0,1))
##You must form a person-period data set
subpp<-survSplit(Surv(age3, dep3)~.,
data = allwaves2, cut=seq(18,28,1),
episode ="age")
## Survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids =~cluster2, weights = ~gswgt1,
data = subpp, nest = T)
##Fit the discrete time hazard model to your outcome
##Consider both the general model and other time specifications
##Include all main effects in the model
##Test for an interaction between at least two of the predictors
## Basic discrete time model
m0<-svyglm(dep3~age3+sex+as.factor(educ)+physical+sexual+as.factor(race_ethr),
design=des, family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m0)
##
## Call:
## svyglm(formula = dep3 ~ age3 + sex + as.factor(educ) + physical +
## sexual + as.factor(race_ethr), design = des, family = binomial(link = "cloglog"))
##
## Survey design:
## svydesign(ids = ~cluster2, weights = ~gswgt1, data = subpp, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.71393 0.70786 -17.961 < 2e-16 ***
## age3 0.40254 0.02842 14.167 < 2e-16 ***
## sexMale -0.81372 0.12115 -6.717 6.27e-10 ***
## as.factor(educ)upto HS 0.34517 0.13757 2.509 0.01342 *
## physicalPhyscial abuse 0.27803 0.12619 2.203 0.02946 *
## sexualsexually abused 0.57291 0.20964 2.733 0.00721 **
## as.factor(race_ethr)non_hisp.asian -11.83353 0.62812 -18.840 < 2e-16 ***
## as.factor(race_ethr)non_hisp.bl -0.39636 0.31874 -1.244 0.21607
## as.factor(race_ethr)non_hisp.other -0.16944 0.50821 -0.333 0.73940
## as.factor(race_ethr)non_hisp.wh 0.74441 0.25211 2.953 0.00378 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9016897)
##
## Number of Fisher Scoring iterations: 15
## With interaction terms
m1<-svyglm(deptrans_1~age3+sex+as.factor(educ)+sex*as.factor(educ)+sex*sexual+ sex*physical++as.factor(race_ethr),
design=des, family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m1)
##
## Call:
## svyglm(formula = deptrans_1 ~ age3 + sex + as.factor(educ) +
## sex * as.factor(educ) + sex * sexual + sex * physical + +as.factor(race_ethr),
## design = des, family = binomial(link = "cloglog"))
##
## Survey design:
## svydesign(ids = ~cluster2, weights = ~gswgt1, data = subpp, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.023562 0.260572 -7.766 3.15e-12 ***
## age3 0.004003 0.010385 0.385 0.700573
## sexMale -0.706721 0.155440 -4.547 1.32e-05 ***
## as.factor(educ)upto HS 0.425618 0.107095 3.974 0.000122 ***
## sexualsexually abused 0.431092 0.189501 2.275 0.024704 *
## physicalPhyscial abuse 0.337417 0.109934 3.069 0.002658 **
## as.factor(race_ethr)non_hisp.asian -1.934888 0.640140 -3.023 0.003069 **
## as.factor(race_ethr)non_hisp.bl -0.114966 0.201207 -0.571 0.568817
## as.factor(race_ethr)non_hisp.other -0.005953 0.386791 -0.015 0.987746
## as.factor(race_ethr)non_hisp.wh 0.694863 0.164274 4.230 4.62e-05 ***
## sexMale:as.factor(educ)upto HS 0.069918 0.190223 0.368 0.713857
## sexMale:sexualsexually abused -0.436161 0.371004 -1.176 0.242091
## sexMale:physicalPhyscial abuse -0.128877 0.219206 -0.588 0.557697
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9956311)
##
## Number of Fisher Scoring iterations: 6
## linear model
m2<-svyglm(deptrans_1~age3+sex+as.factor(educ)+physical+sexual+as.factor(race_ethr),
design=des, family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m2)
##
## Call:
## svyglm(formula = deptrans_1 ~ age3 + sex + as.factor(educ) +
## physical + sexual + as.factor(race_ethr), design = des, family = binomial(link = "cloglog"))
##
## Survey design:
## svydesign(ids = ~cluster2, weights = ~gswgt1, data = subpp, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.009136 0.255401 -7.867 1.65e-12 ***
## age3 0.003875 0.010430 0.371 0.71093
## sexMale -0.736862 0.099726 -7.389 2.03e-11 ***
## as.factor(educ)upto HS 0.450675 0.096194 4.685 7.34e-06 ***
## physicalPhyscial abuse 0.287802 0.090643 3.175 0.00190 **
## sexualsexually abused 0.297433 0.169706 1.753 0.08218 .
## as.factor(race_ethr)non_hisp.asian -1.934124 0.629216 -3.074 0.00261 **
## as.factor(race_ethr)non_hisp.bl -0.117635 0.201068 -0.585 0.55960
## as.factor(race_ethr)non_hisp.other -0.003275 0.386556 -0.008 0.99325
## as.factor(race_ethr)non_hisp.wh 0.695203 0.164287 4.232 4.52e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9980024)
##
## Number of Fisher Scoring iterations: 6
exp(coef(m0))
## (Intercept) age3
## 3.008916e-06 1.495623e+00
## sexMale as.factor(educ)upto HS
## 4.432041e-01 1.412233e+00
## physicalPhyscial abuse sexualsexually abused
## 1.320532e+00 1.773424e+00
## as.factor(race_ethr)non_hisp.asian as.factor(race_ethr)non_hisp.bl
## 7.257113e-06 6.727671e-01
## as.factor(race_ethr)non_hisp.other as.factor(race_ethr)non_hisp.wh
## 8.441368e-01 2.105193e+00
exp(coef(m1))
## (Intercept) age3
## 0.1321838 1.0040113
## sexMale as.factor(educ)upto HS
## 0.4932589 1.5305367
## sexualsexually abused physicalPhyscial abuse
## 1.5389367 1.4013239
## as.factor(race_ethr)non_hisp.asian as.factor(race_ethr)non_hisp.bl
## 0.1444404 0.8913962
## as.factor(race_ethr)non_hisp.other as.factor(race_ethr)non_hisp.wh
## 0.9940644 2.0034353
## sexMale:as.factor(educ)upto HS sexMale:sexualsexually abused
## 1.0724200 0.6465134
## sexMale:physicalPhyscial abuse
## 0.8790821
exp(coef(m2))
## (Intercept) age3
## 0.1341045 1.0038820
## sexMale as.factor(educ)upto HS
## 0.4786136 1.5693710
## physicalPhyscial abuse sexualsexually abused
## 1.3334928 1.3463985
## as.factor(race_ethr)non_hisp.asian as.factor(race_ethr)non_hisp.bl
## 0.1445508 0.8890206
## as.factor(race_ethr)non_hisp.other as.factor(race_ethr)non_hisp.wh
## 0.9967306 2.0041150
##A general discrete time model was fitted to the data. Since Add Health survey was used in the current study, it only allows for the linear specification. Moreover, each survey wave take place in at least 6 years time gap which might also be a reason to use only the linear model.
##Generate hazard plots for interesting cases highlighting the significant predictors in your analysis
## Model AIC
AIC0<-AIC(m0)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC1<-AIC(m1)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC2<-AIC(m2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICS<-data.frame(AIC = c(AIC0["AIC"],AIC1["AIC"],AIC2["AIC"]),
mod = factor(c("General", "Interaction", "Linear")) )
AICS$Model<-forcats::fct_relevel(AICS$mod,c("General", "Interaction", "Linear"))
AICS$Delta_AIC<-AICS$AIC - AICS$AIC[AICS$mod=="General"]
knitr::kable(AICS[, c("Model", "AIC", "Delta_AIC")],
caption = "Relative AIC for alternative time specifications")
| Model | AIC | Delta_AIC |
|---|---|---|
| General | 3624.221 | 0.00 |
| Interaction | 17254.969 | 13630.75 |
| Linear | 17228.896 | 13604.68 |
##The general model appears to be the best fit (AIC: 3624.221). It is also noticeable that the interaction and linear based models have relatievly closer AICs (17254.969 and 17228.896) respectively .