Untitled

#Load required libraries
library(foreign)
library(survival)
library(car)
library(survey)
library(eha)
library(tidyverse)
library(ggpubr)
library(svyVGAM)
Warning: package 'svyVGAM' was built under R version 4.2.2
Warning: package 'VGAM' was built under R version 4.2.2
library(table1)
Warning: package 'table1' was built under R version 4.2.2
library(kableExtra)
Warning: package 'kableExtra' was built under R version 4.2.2
library(gtsummary)
options(survey.lonely.psu = "adjust")
setwd("C:/Users/spara/Desktop/Cox Model HW")
w3 <- as_tibble(read.delim("21600-0008-Data.tsv", sep = '\t', header = TRUE ))
w4 <- as_tibble(read.delim("21600-0022-Data.tsv", sep = '\t', header = TRUE ))
#w5 <- as_tibble(read.delim("21600-0032-Data.tsv", sep = '\t', header = TRUE ))
wt <- as_tibble(read.delim("21600-0004-Data.tsv", sep = '\t', header = TRUE ))

names(w3)<-tolower(names(w3))
names(w4)<-tolower(names(w4))
#names(w5)<-tolower(names(w5))
names(wt)<-tolower(names(wt))

Data processing

# Year of survey for wave 3
summary(w3$iyear3)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2001    2001    2001    2001    2002    2002 
#year of birth
summary(w3$h3od1y)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1974    1978    1979    1979    1981    1983 
# Current age: year of survey minus age at birth
w3$age <- (w3$iyear3 -w3$h3od1y)
summary(w3$age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   18.0    21.0    22.0    22.2    24.0    28.0 
# Year of survey for wave 4
summary(w4$iyear4)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2007    2008    2008    2008    2008    2009 
#year of birth
summary(w4$h4od1y)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1974    1978    1979    1979    1980    1983 
# Current age is year of survey minus age at birth
w4$age <- (w4$iyear4 -w4$h4od1y)
summary(w4$age)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
     25      28      29      29      30      34 
#Current age: year of survey minus age at birth
#w5$age <- (w5$iyear5 -w5$h5od1y)
#summary(w5$age)
## Childhood maltreatments

#w3$neglect1<- car::Recode(w3$h3ma1,recodes="1:5='1'; 6='0'; else=NA")
#w3$neglect2<- car::Recode(w3$h3ma2,recodes="1:5='1'; 6='0'; else=NA")
w3$physical<- car::Recode(w3$h3ma3,recodes="1:5='Physical abuse'; 6='No physical abuse'; else=NA", as.factor = T)
w3$sexual<- car::Recode(w3$h3ma4,recodes="1:5='sexually abused'; 6='No sexual abuse'; else=NA", as.factor = T)


#w3$neg<- paste(w3$neglect1, w3$neglect2, sep = "")
#table(w3$neg)

#w3$neglect<- car::Recode(w3$neg,recodes="11|12|13|21|31=1; 22|23|32=0; else=NA")
#table(w3$neglect)

#w4$neglect<- car::Recode(w4$h4ma1,recodes="1:5=1; 6=0; else=NA")
#w4$physical<- car::Recode(w4$h4ma3,recodes="1:5=1; 6=0; else=NA")
#w4$sexual<- car::Recode(w4$h4ma5,recodes="1:5=1; 6=0; else=NA")

#Employment status

#h3da28: Do you currently have a job?
w3$empstat<- car::Recode(w3$h3da28,recodes="1='Yes'; 0='No'; else=NA", as.factor = T)

#H4LM11: Do you currently work 10 hours per week?
#w4$empstat<- car::Recode(w4$h4lm11,recodes="1=1; 0=0; else=NA")


#Race/Ethnicity
w3$hisp <- car::Recode(w3$h3od2, recodes = "1= 'Hisp' ; 0= 'NH'; else=NA")
w3$race <- car::Recode(w3$h3ir4, recodes = "1= 'White'; 2= 'Black'; 3:4='Other'; else=NA")
w3$race_eth<-ifelse(w3$hisp ==1, "Hisp", w3$race)
w3$race_eth <- interaction(w3$hisp, w3$race)


  w3$race_ethr<- mutate(w3, ifelse(hisp==0 & race==1, 1,
                    ifelse(hisp==0 & race==2, 2,
                     ifelse(hisp==1, 3,
                    ifelse(hisp==0 & race==3, 4,
                     ifelse(hisp==0 & race==4, 5, "NA"))))))
w3$race_ethr<- ifelse(substr(w3$race_eth, 1, 4)=="Hisp", "Hisp", as.character(w3$race_eth))

#Education
#What is the highest grade or year of regular school you have completed?

w3$educ <- car::Recode(w3$h3ed1, recodes = "6:12= 'Upto HS' ; 13:22= 'Higher than HS'; else=NA", as.factor = T)


#w4$educ <- car:: Recode(w4$h4ed2, recodes = "1:3 =  'upto to HS'; 4:13 = 'higher than HS'; else = NA")
# Depression

## h3id15: Have you ever been diagnosed with depression
##h4id5h: Has a doctor, nurse or other health care provider ever told you that you have or had: depression?

w3$dep <-  car::Recode(w3$h3id15,recodes="1=1; 0=0; else=NA", as.factor= T)

w4$dep <-  car::Recode(w4$h4id5h,recodes="1=1; 0=0; else=NA", as.factor= T)
  

#w3 <- w3%>%
#  filter(h3id15 == 1)

#w4<-w4%>%
 # filter(h4id5h==1)
# Combine all waves
allwaves <- left_join(w3, w4, by="aid")
#allwaves <- left_join(allwaves, w5, by="aid")
allwaves <- left_join(allwaves,wt, by = "aid")
myvars<-c("aid","dep.x", "dep.y", "physical", "sexual", "race_ethr","educ","age.x", "age.y", "bio_sex3", "empstat","gswgt1", "cluster2")
allwaves2<-allwaves[,myvars]

allwaves2$sex<- car::Recode(allwaves2$bio_sex3, recodes="1='Male'; 2='Female'; else = NA", as.factor=T)
# time varying variables- Health and age
allwaves2$age_1<- allwaves2$age.x
allwaves2$age_2<- allwaves2$age.y



allwaves2$dep_1<-allwaves2$dep.x
allwaves2$dep_2<-allwaves2$dep.y
allwaves2<-allwaves2 %>% filter(is.na(dep_1)==F &
                  is.na(dep_2)==F &
                  is.na(age_1)==F &
                  is.na(age_2)==F &
                  dep_1!=1) %>%
  mutate(deptrans_1 =ifelse(dep_1==0 & dep_2==0, 0,1))
elong <- allwaves2 %>%
  select(physical,sexual,race_ethr, sex, aid,gswgt1,cluster2,educ, empstat,    #time constant
        age_1, age_2,#t-varying variables
         dep_1, dep_2)%>%
   pivot_longer(cols = c(-physical,-sexual, -race_ethr, -sex, -aid, -empstat, -educ, -gswgt1, -cluster2), #time constant variables go here
               names_to  = c(".value", "transition"), #make wave variable and put t-v vars into columns
               names_sep = "_") %>% #all t-v variables have _ between name and time, like age_1, age_2
   group_by(aid)%>%
  mutate(age_enter = as.numeric(age), 
         age_exit = lead(age, 1, order_by= aid))%>%
  mutate(nexdep = dplyr::lead(dep,n=1, order_by = age))%>%
  mutate(deptran = ifelse(nexdep == 1, 1, 0))%>%
  filter(is.na(age_exit)==F)%>%
  ungroup()%>%
  filter(complete.cases(age_enter, age_exit, deptran,
              physical,sexual,race_ethr, sex, aid,educ, empstat,gswgt1,cluster2))
elong$depevent <- 
  factor(elong$deptran, levels=c(1,0),
         labels=c("Yes", 
                  "No"))

label(elong$sex)       <- "Sex"
label(elong$physical)       <- "Physical Abuse"
label(elong$sexual)     <- "Sexual Abuse"
label(elong$educ) <- "Education attainment"
label(elong$empstat) <- "Employment status"
label(elong$race_ethr) <- "Race/Ethnicity"      



tab_1 <- table1(~ sex +  physical + sexual + educ + empstat + race_ethr | depevent, data=elong, overall="Total")

Descriptive analysis of risk depression

elong%>%
mutate(ager = round((age_enter+age_enter)/2) )%>%
group_by(ager)%>%
summarise(prop_event= mean(deptran, na.rm=T))%>%
ggplot()+
aes(x=ager, y= prop_event)+
   scale_x_continuous(breaks = seq(18, 30, by = 1))+
   scale_y_continuous(breaks = seq(0, 0.3, by = 0.05))+
 ggtitle("Figure 1: Hazard of getting diagnosed with depression by time")+
geom_line()

Figure 1 shows the risk of being diagnosed with depression among adults aged 19-26 who were exposed to childhood maltreatment as a child. The graph shows that between age 19 and 26 the risk of being diagnosed with depression is largely constant.

e.plot <- elong%>%
mutate(ager = round((age_enter+age_enter)/2) )%>%
group_by(ager, educ)%>%
summarise(prop_event= mean(deptran, na.rm=T))%>%
 ggplot()+
aes(x=ager, y= prop_event, color=factor(educ) )+
  scale_x_continuous(breaks = seq(18, 30, by = 1))+
   scale_y_continuous(breaks = seq(0, 0.3, by = 0.05))+
  #ggtitle(subtitle = "Figure 2: Hazard of depression diagnosis by age and demographic properties")+
geom_line()
`summarise()` has grouped output by 'ager'. You can override using the
`.groups` argument.
s.plot <- elong%>%
mutate(ager = round((age_enter+age_enter)/2) )%>%
group_by(ager, sex)%>%
summarise(prop_event= mean(deptran, na.rm=T))%>%
ggplot()+
aes(x=ager, y= prop_event, color=factor(sex) )+
  scale_x_continuous(breaks = seq(18, 30, by = 1))+
   scale_y_continuous(breaks = seq(0, 0.3, by = 0.05))+
  #ggtitle("Figure 3: Hazard of getting diagnosed with depression as function of age by sex")+
geom_line()
`summarise()` has grouped output by 'ager'. You can override using the
`.groups` argument.
r.plot <-elong%>%
mutate(ager = round((age_enter+age_enter)/2) )%>%
group_by(ager, race_ethr)%>%
summarise(prop_event= mean(deptran, na.rm=T))%>%
ggplot()+
aes(x=ager, y= prop_event, color=factor(race_ethr) )+
  scale_x_continuous(breaks = seq(18, 30, by = 1))+
   scale_y_continuous(breaks = seq(0, 0.3, by = 0.05))+
  #ggtitle("Figure 4: Hazard of getting diagnosed with depression as function of age by race/ethnicity")+
geom_line()
`summarise()` has grouped output by 'ager'. You can override using the
`.groups` argument.
emp.plot <-elong%>%
mutate(ager = round((age_enter+age_enter)/2) )%>%
group_by(ager, empstat)%>%
summarise(prop_event= mean(deptran, na.rm=T))%>%
ggplot()+
aes(x=ager, y= prop_event, color=factor(empstat) )+
  scale_x_continuous(breaks = seq(18, 30, by = 1))+
   scale_y_continuous(breaks = seq(0, 0.3, by = 0.05))+
  #ggtitle("Figure 5: Hazard of getting diagnosed with depression as function of age by employment status")+
geom_line()
`summarise()` has grouped output by 'ager'. You can override using the
`.groups` argument.
fig2 <- ggarrange(e.plot, s.plot,r.plot,emp.plot,
                  labels = c("A", "B", "C","D"), 
                  nrow = 2, ncol = 2)
  
  caption = "Data: ADD Health"
fig2

Figure 2 summarizes the probability of getting diagnosed with depression as function of age among adults who have experienced either physical or sexual maltreatment as a child. Figure 2A shows that adults across the age of 19-26 who have education level upto high school are more likely to be diagnosed with depression compared to people with higher degree. Female who experience childhood abuse are also more likely to develop depression compared male (Figure 2B). Similarly Figure 2C shows that NH-Whites are more likely to be diagnosed with depression across age of 18-26 if they experienced childhood maltreatment. Finally, Figure 2D shows that through age 18-24, age and depression among were independent but at and beyond age 25 unemployed people with childhood maltreatment are more likely to be diagnosed with depression.

Discrete Time Model

subpp<-survSplit(Surv(time = age_enter, time2=age_exit, event = deptran)~., data=elong,
   cut=seq(18, 28, 1),  episode="age")

  subpp$year <-   subpp$age-1
options(survey.lonely.psu = "adjust")
des<-survey::svydesign(ids=~cluster2,
               #strata=~strata,
               data=subpp,
               weight=~gswgt1 )
fitl1<-glm(deptran~as.factor(age_enter)-1+educ,
subpp, family=binomial)
summary(fitl1)

Call:
glm(formula = deptran ~ as.factor(age_enter) - 1 + educ, family = binomial, 
    data = subpp)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.4918  -0.2134  -0.1389  -0.0001   4.0134  

Coefficients:
                         Estimate Std. Error z value Pr(>|z|)    
as.factor(age_enter)18   -20.9698 10236.6338  -0.002  0.99837    
as.factor(age_enter)19   -20.7451  1120.9607  -0.019  0.98523    
as.factor(age_enter)20   -20.7569   695.3920  -0.030  0.97619    
as.factor(age_enter)21   -20.7571   523.1891  -0.040  0.96835    
as.factor(age_enter)22   -20.7612   436.3770  -0.048  0.96205    
as.factor(age_enter)23   -20.7575   383.8899  -0.054  0.95688    
as.factor(age_enter)24    -8.0532     1.0021  -8.036 9.28e-16 ***
as.factor(age_enter)25    -4.6360     0.1857 -24.959  < 2e-16 ***
as.factor(age_enter)26    -4.1747     0.1585 -26.333  < 2e-16 ***
as.factor(age_enter)27    -3.7396     0.1437 -26.022  < 2e-16 ***
as.factor(age_enter)28    -2.4552     0.1028 -23.873  < 2e-16 ***
educUpto HS                0.4038     0.1194   3.382  0.00072 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 24590.1  on 17738  degrees of freedom
Residual deviance:  2438.8  on 17726  degrees of freedom
AIC: 2462.8

Number of Fisher Scoring iterations: 19
#basic log models 
#Linear term for time
fit.0<-svyglm(deptran~as.factor(year)-1,
                 design=des,
                 
                 family=binomial(link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.0)

Call:
svyglm(formula = deptran ~ as.factor(year) - 1, design = des, 
    family = binomial(link = "cloglog"))

Survey design:
survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)

Coefficients:
                   Estimate Std. Error  t value Pr(>|t|)    
as.factor(year)1  -20.23205    0.70497  -28.699  < 2e-16 ***
as.factor(year)2  -20.55414    0.11447 -179.562  < 2e-16 ***
as.factor(year)3  -20.56886    0.10674 -192.701  < 2e-16 ***
as.factor(year)4  -20.55258    0.08309 -247.355  < 2e-16 ***
as.factor(year)5  -20.54163    0.05674 -362.027  < 2e-16 ***
as.factor(year)6  -20.52757    0.04679 -438.680  < 2e-16 ***
as.factor(year)7   -7.09267    0.99997   -7.093  9.6e-11 ***
as.factor(year)8   -4.28845    0.21039  -20.384  < 2e-16 ***
as.factor(year)9   -4.01219    0.20245  -19.818  < 2e-16 ***
as.factor(year)10  -3.40467    0.15493  -21.975  < 2e-16 ***
as.factor(year)11  -2.27089    0.10718  -21.187  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 0.6599063)

Number of Fisher Scoring iterations: 19
haz<-1/(1+exp(-coef(fit.0)))
time<-1:length(haz)
plot(haz~time, type="l", ylab="h(t)")
title(main="Fig 3: Discrete Time Hazard for Depression Diagnosis ")

matrix_coef <- as.data.frame(summary(fitl1)$coefficients)

matrix_coef %>% 
  knitr::kable(
    format = "latex",
    align = "l",
    booktabs = TRUE,
    longtable = TRUE,
    linesep = "",
    )%>%
   kableExtra::kable_styling(
      position = "left",
      latex_options = c("striped", "repeat_header"),
      stripe_color = "gray!15"
    )

General linear binomial modeling suggests adults with education level upto high school and who were maltreated as child are 40% ( 28.5% using log-log model) times more likely to be diagnosed with depression as adult compared to adults with HS degree or higher. Furthermore, Figure 3 shows the hazard of developing depression among adults who were maltreated as child. The result shows that there is a gap of atleast 6 years before adults are at risk of being diagnosed with depression and the risk of getting diagnosed with depression increases slightly between year 6 and 8 before it increases sharply around year 9 and year 10 (age24-26).

St<-NA
  time<-1:length(haz)
  St[1]<-1-haz[1]
  for(i in 2:length(haz)){
    St[i]<-St[i-1]* (1-haz[i])
  }
  
  St<-c(1, St)
  time<-c(0, time)
  plot(y=St,x=time, type="l",
       main="Figure 4: Survival function for Depression")

Survival function of depression diagnosis is plotted in Figure 4 and shows that the risk of developing depression among adults decreases to ~0.7 from 1 as the time from maltreatment incident increases. In this study Figure 4 shows that there is a steady drop in risk of depression diagnosis between year 1 and year 8 and this risk decreases sharply between year 9 and year 10.

 1/(1+exp(-coef(fit.0)))
 as.factor(year)1  as.factor(year)2  as.factor(year)3  as.factor(year)4 
     1.634304e-09      1.184270e-09      1.166959e-09      1.186124e-09 
 as.factor(year)5  as.factor(year)6  as.factor(year)7  as.factor(year)8 
     1.199174e-09      1.216161e-09      8.304811e-04      1.354036e-02 
 as.factor(year)9 as.factor(year)10 as.factor(year)11 
     1.777220e-02      3.214998e-02      9.356308e-02 
fit.1<-svyglm(deptran~as.factor(year)-1+physical+sexual+empstat+race_ethr+sex+educ,
              design=des,
              family=binomial(link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.1)

Call:
svyglm(formula = deptran ~ as.factor(year) - 1 + physical + sexual + 
    empstat + race_ethr + sex + educ, design = des, family = binomial(link = "cloglog"))

Survey design:
survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)    
as.factor(year)1       -22.00869    0.79693 -27.617  < 2e-16 ***
as.factor(year)2       -21.94888    0.28600 -76.744  < 2e-16 ***
as.factor(year)3       -21.93459    0.26734 -82.048  < 2e-16 ***
as.factor(year)4       -21.93106    0.26606 -82.429  < 2e-16 ***
as.factor(year)5       -21.91445    0.25745 -85.120  < 2e-16 ***
as.factor(year)6       -21.88078    0.25482 -85.868  < 2e-16 ***
as.factor(year)7        -7.45976    1.00327  -7.435 2.17e-11 ***
as.factor(year)8        -4.64993    0.32888 -14.139  < 2e-16 ***
as.factor(year)9        -4.36031    0.32023 -13.616  < 2e-16 ***
as.factor(year)10       -3.74404    0.28107 -13.321  < 2e-16 ***
as.factor(year)11       -2.57550    0.26369  -9.767  < 2e-16 ***
physicalPhysical abuse   0.38664    0.14444   2.677 0.008541 ** 
sexualsexually abused    0.01773    0.30892   0.057 0.954343    
empstatYes              -0.12243    0.15453  -0.792 0.429887    
race_ethrNH.Black       -0.37388    0.27680  -1.351 0.179481    
race_ethrNH.Other       -0.13764    0.48264  -0.285 0.776025    
race_ethrNH.White        0.57881    0.22746   2.545 0.012286 *  
sexMale                 -0.76204    0.15007  -5.078 1.52e-06 ***
educUpto HS              0.48996    0.12715   3.853 0.000194 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 0.6851029)

Number of Fisher Scoring iterations: 20

Table 1 shows the relationship between risk of being diagnosed with depression among adults who experienced childhood maltreatment (physical and sexual). Results show that as the risk of depression diagnosis increases between year 7 and year 11. Similarly, it was found that the childhood physical abuse increases the risk of getting depression by 32.6%. Similarly, adults with high school degree or less are at higher risk (41.3 %) of developing depression compared adults with HS degree or higher. Finally in terms of race /ethnicity, it was found that NH-Whites are at 55.0% higher risk of developing depression compared to other races. Sexual maltreatment and employment status did not show statistically significant association to risk of developing depression. It should be noted that interaction parameters did not affect risk od being diagnosed with depression.

fit.2<-svyglm(deptran~as.factor(year)-1+(physical*educ)+(physical*race_ethr)+(physical * sex),
              design=des,
              family=binomial(link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.2)

Call:
svyglm(formula = deptran ~ as.factor(year) - 1 + (physical * 
    educ) + (physical * race_ethr) + (physical * sex), design = des, 
    family = binomial(link = "cloglog"))

Survey design:
survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)

Coefficients:
                                          Estimate Std. Error t value Pr(>|t|)
as.factor(year)1                         -22.03905    0.76765 -28.710  < 2e-16
as.factor(year)2                         -21.84731    0.34349 -63.604  < 2e-16
as.factor(year)3                         -21.84273    0.32356 -67.507  < 2e-16
as.factor(year)4                         -21.84235    0.31610 -69.100  < 2e-16
as.factor(year)5                         -21.82815    0.30623 -71.281  < 2e-16
as.factor(year)6                         -21.79725    0.30298 -71.942  < 2e-16
as.factor(year)7                          -7.37677    1.01449  -7.271 5.52e-11
as.factor(year)8                          -4.56670    0.35170 -12.985  < 2e-16
as.factor(year)9                          -4.28012    0.36895 -11.601  < 2e-16
as.factor(year)10                         -3.66279    0.33252 -11.015  < 2e-16
as.factor(year)11                         -2.49188    0.31783  -7.840 3.11e-12
physicalPhysical abuse                    -0.07038    0.52073  -0.135   0.8927
educUpto HS                                0.39520    0.15642   2.526   0.0129
race_ethrNH.Black                         -0.66479    0.39833  -1.669   0.0980
race_ethrNH.Other                         -0.36610    0.62393  -0.587   0.5586
race_ethrNH.White                          0.45397    0.29286   1.550   0.1240
sexMale                                   -0.75057    0.18414  -4.076 8.68e-05
physicalPhysical abuse:educUpto HS         0.31599    0.28106   1.124   0.2633
physicalPhysical abuse:race_ethrNH.Black   0.81470    0.58131   1.401   0.1639
physicalPhysical abuse:race_ethrNH.Other   0.46994    0.97843   0.480   0.6320
physicalPhysical abuse:race_ethrNH.White   0.28035    0.47483   0.590   0.5561
physicalPhysical abuse:sexMale            -0.05257    0.31806  -0.165   0.8690
                                            
as.factor(year)1                         ***
as.factor(year)2                         ***
as.factor(year)3                         ***
as.factor(year)4                         ***
as.factor(year)5                         ***
as.factor(year)6                         ***
as.factor(year)7                         ***
as.factor(year)8                         ***
as.factor(year)9                         ***
as.factor(year)10                        ***
as.factor(year)11                        ***
physicalPhysical abuse                      
educUpto HS                              *  
race_ethrNH.Black                        .  
race_ethrNH.Other                           
race_ethrNH.White                           
sexMale                                  ***
physicalPhysical abuse:educUpto HS          
physicalPhysical abuse:race_ethrNH.Black    
physicalPhysical abuse:race_ethrNH.Other    
physicalPhysical abuse:race_ethrNH.White    
physicalPhysical abuse:sexMale              
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 0.7265251)

Number of Fisher Scoring iterations: 20

Linear and other models for time

fit.l<-svyglm(deptran~year,
                design=des ,
                family=binomial(link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.l)

Call:
svyglm(formula = deptran ~ year, design = des, family = binomial(link = "cloglog"))

Survey design:
survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -11.95757    0.70305  -17.01   <2e-16 ***
year          0.87728    0.06728   13.04   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 0.6882792)

Number of Fisher Scoring iterations: 8
fit.s<-svyglm(deptran~year+I(year^2),
                 design=des ,
                 family=binomial(link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.s)

Call:
svyglm(formula = deptran ~ year + I(year^2), design = des, family = binomial(link = "cloglog"))

Survey design:
survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -15.56395    2.43797  -6.384 2.85e-09 ***
year          1.67024    0.57934   2.883  0.00462 ** 
I(year^2)    -0.04264    0.03342  -1.276  0.20426    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 0.6794044)

Number of Fisher Scoring iterations: 10
fit.c<-svyglm(deptran~year+I(year^2)+I(year^3),
                design=des ,
                family=binomial(link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit.c)

Call:
svyglm(formula = deptran ~ year + I(year^2) + I(year^3), design = des, 
    family = binomial(link = "cloglog"))

Survey design:
survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -163.69571   45.59459  -3.590 0.000469 ***
year          51.40803   14.86910   3.457 0.000741 ***
I(year^2)     -5.53429    1.60459  -3.449 0.000762 ***
I(year^3)      0.19955    0.05728   3.484 0.000678 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 0.6174027)

Number of Fisher Scoring iterations: 13
fit.q<-svyglm(deptran~year+I(year^2)+I(year^3 )+I(year^4),
                design=des ,
                family=binomial(link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(fit.q)

Call:
svyglm(formula = deptran ~ year + I(year^2) + I(year^3) + I(year^4), 
    design = des, family = binomial(link = "cloglog"))

Survey design:
survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept) -897.22032  466.01339  -1.925   0.0564 .
year         380.78256  207.59323   1.834   0.0690 .
I(year^2)    -60.54975   34.48025  -1.756   0.0815 .
I(year^3)      4.25113    2.52988   1.680   0.0953 .
I(year^4)     -0.11102    0.06917  -1.605   0.1110  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 0.6599173)

Number of Fisher Scoring iterations: 17
#Spline
  library(splines)
  fit.sp<-svyglm(deptran~ns(year, df = 3),
                 design=des ,
                 family=binomial(link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
  summary(fit.sp)

Call:
svyglm(formula = deptran ~ ns(year, df = 3), design = des, family = binomial(link = "cloglog"))

Survey design:
survey::svydesign(ids = ~cluster2, data = subpp, weight = ~gswgt1)

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)   
(Intercept)        -112.10      37.42  -2.995  0.00329 **
ns(year, df = 3)1    75.36      26.19   2.878  0.00470 **
ns(year, df = 3)2   205.14      70.79   2.898  0.00442 **
ns(year, df = 3)3    45.74      15.18   3.013  0.00311 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 0.6607351)

Number of Fisher Scoring iterations: 14
dat<-expand.grid(year=seq(1,11,1))
  dat$genmod<-predict(fit.0, newdata=data.frame(year=as.factor(1:11)), type="response")
  dat$lin<-predict(fit.l, newdata=dat, type="response")
  dat$sq<-predict(fit.s, newdata=dat, type="response")
  dat$cub<-predict(fit.c, newdata=dat, type="response")
  dat$quart<-predict(fit.q, newdata=dat, type="response")
  dat$spline<-predict(fit.sp, newdata=expand.grid(year=seq(1,11,1)), type="response")
dat
   year       genmod          lin           sq          cub        quart
1     1 1.634304e-09 1.541298e-05 8.861688e-07 2.220446e-16 2.220446e-16
2     2 1.184270e-09 3.705785e-05 4.143183e-06 2.220446e-16 2.220446e-16
3     3 1.166959e-09 8.909783e-05 1.778740e-05 2.220446e-16 2.220446e-16
4     4 1.186124e-09 2.142092e-04 7.012052e-05 2.008621e-15 2.220446e-16
5     5 1.199174e-09 5.149571e-04 2.538120e-04 1.922533e-09 2.220446e-16
6     6 1.216161e-09 1.237691e-03 8.434395e-04 1.142023e-05 1.602831e-08
7     7 8.308260e-04 2.973260e-03 2.572235e-03 1.393081e-03 8.307595e-04
8     8 1.363245e-02 7.133847e-03 7.192813e-03 1.151167e-02 1.363257e-02
9     9 1.793106e-02 1.706645e-02 1.840754e-02 2.143213e-02 1.793093e-02
10   10 3.267228e-02 4.054316e-02 4.295814e-02 2.978745e-02 3.267236e-02
11   11 9.807214e-02 9.472022e-02 9.088713e-02 9.900232e-02 9.807212e-02
         spline
1  2.220446e-16
2  2.220446e-16
3  2.220446e-16
4  2.727256e-11
5  5.196107e-05
6  2.103455e-02
7  5.643636e-02
8  3.303804e-02
9  3.237207e-02
10 5.044205e-02
11 9.801707e-02
plot(genmod~year, dat, type="l", ylab="h(t)", xlab="Time", ylim=c(0, .12), xlim=c(0, 12))
title(main="Figure 5: Hazard function from different time parameterizations")
lines(lin~year, dat, col=2, lwd=2)
lines(sq~year, dat, col=3, lwd=2)
lines(cub~year, dat, col=4, lwd=2)
lines(quart~year, dat, col=5, lwd=2)
lines(spline~year, dat, col=6, lwd=2)
legend("topleft",
       legend=c("General Model", "Linear","Square", "Cubic", "Quartic", "Natural spline"),
       col=1:6, lwd=1.5)

Figure 5 shows the hazard function for different particularization of time (linear, quadratic, cubic, quartic and spline), it can be seen in the figure that other than natural spline fit all other parametrization showed similar trend in risk of being diagnosed with depression where there is an gradual increase in risk between year 7 and 9 and a sharp jump in risk around year 10 and year 11. Spline parametrization, on the other hand, show gradual increase from year 5 to 7 and drop in risk of depression diagnosis between year 7 and year 9 after which there is a spike. AIC analysis (Table 2) shows that spline fit have lowest aid hence is considered most accurate model.

#AIC table
  aic<-round(c(
    fit.l$deviance+2*length(fit.l$coefficients),
    fit.s$deviance+2*length(fit.s$coefficients),
    fit.c$deviance+2*length(fit.c$coefficients),
    fit.q$deviance+2*length(fit.q$coefficients),
    fit.sp$deviance+2*length(fit.sp$coefficients),
    fit.0$deviance+2*length(fit.0$coefficients)),2)
  #compare all aics to the one from the general model
  dif.aic<-round(aic-aic[6],2)
 t3<- data.frame(Model =c( "Linear","Quadratic", "Cubic", "Quartic","Spline", "General"), AIC=aic, AIC_dif=dif.aic)

 knitr::kable(t3)
Model AIC AIC_dif
Linear 2567.03 12.58
Quadratic 2567.21 12.76
Cubic 2544.45 -10.00
Quartic 2542.45 -12.00
Spline 2540.45 -14.00
General 2554.45 0.00
#AIC table
  aic<-round(c(
    fit.l$deviance+2*length(fit.l$coefficients),
    fit.s$deviance+2*length(fit.s$coefficients),
    fit.c$deviance+2*length(fit.c$coefficients),
    fit.q$deviance+2*length(fit.q$coefficients),
    fit.sp$deviance+2*length(fit.sp$coefficients),
    fit.0$deviance+2*length(fit.0$coefficients)),2)
  #compare all aics to the one from the general model
  dif.aic<-round(aic-aic[6],2)
  data.frame(model =c( "linear","quadratic", "cubic", "quartic","spline", "general"),
             aic=aic,
             aic_dif=dif.aic)
      model     aic aic_dif
1    linear 2567.03   12.58
2 quadratic 2567.21   12.76
3     cubic 2544.45  -10.00
4   quartic 2542.45  -12.00
5    spline 2540.45  -14.00
6   general 2554.45    0.00