library (haven)
df1<- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0001/da27021p1.dta")
df1w <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0025/da27021p25.dta")
wv1 <- merge(df1, df1w, by="aid")
wv3<-read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0003/da27021p3.dta")
wv2 <- merge(wv1, wv3, by="aid")
wave4a <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0012/da27021p12.dta")
wave4wgt <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0028/da27021p28.dta")
wave4b <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0017/da27021p17.dta")
wave4 <- merge(wave4a, wave4b, by="aid")
wave4totalwgt <- merge(wave4, wave4wgt, by="aid")
addhealth <-merge(wave4totalwgt, wv2, by="aid")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
summary(wv1$H1SU2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 7.000 7.000 6.137 7.000 8.000
addhealth<-addhealth%>%
select(aid,iyear,BIO_SEX,H4ID5H,iyear,imonth,iday,H1FV6,H4DS18,H4SE1,H4SE2,IMONTH4,IYEAR4,H1SU1,H4OD1M, H4OD1Y, H1FS6, H1GI1M, H1GI1Y, H3OD1Y,H3OD1M, IYEAR3, IMONTH3, H3TO130, H1SU2, H3TO131, H4SE2)
addhealth<-addhealth%>%
filter(complete.cases(.))
addhealth$suicidewave1<-recode(addhealth$H1SU1, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
is.numeric(addhealth$suicidewave1)
## [1] TRUE
table(addhealth$suicidewave1)
##
## 0 1
## 10026 1786
addhealth$suicidewave3<-recode(addhealth$H3TO130, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
is.numeric(addhealth$suicidewave3)
## [1] TRUE
table(addhealth$suicidewave3)
##
## 0 1 7
## 11043 559 136
addhealth$birthmonthw1 <- factor(ifelse(addhealth$H1GI1M=="1",01,
ifelse(addhealth$H1GI1M=="2", 02,
ifelse(addhealth$H1GI1M=="3", 03,
ifelse(addhealth$H1GI1M=="4", 04,
ifelse(addhealth$H1GI1M=="5", 05,
ifelse(addhealth$H1GI1M=="6", 06,
ifelse(addhealth$H1GI1M=="7", 07,
ifelse(addhealth$H1GI1M=="8", 08,
ifelse(addhealth$H1GI1M=="9r", 09,
ifelse(addhealth$H1GI1M=="10", 10,
ifelse(addhealth$H1GI1M=="11", 11,
ifelse(addhealth$H1GI1M=="12",12,
ifelse(addhealth$H1GI1M=="96", 6,NA))))))))))))))
addhealth$birthyearw1 <- factor(ifelse(addhealth$H1GI1Y=="74",1974,
ifelse(addhealth$H1GI1Y=="75",1975,
ifelse(addhealth$H1GI1Y=="76",1976,
ifelse(addhealth$H1GI1Y=="77",1977,
ifelse(addhealth$H1GI1Y=="78",1978,
ifelse(addhealth$H1GI1Y=="79",1979,
ifelse(addhealth$H1GI1Y=="80",1980,
ifelse(addhealth$H1GI1Y=="81",1981,
ifelse(addhealth$H1GI1Y=="82",1982,
ifelse(addhealth$H1GI1Y=="83",1983,1979)))))))))))
addhealth$birthdatew1 <- as.yearmon(paste(addhealth$birthyearw1, addhealth$birthmonthw1), "%Y %m")
addhealth$iyearfix <- Recode(addhealth$iyear,recodes="'95'='1995'")
addhealth$interviewdatew1 <- as.yearmon(paste(addhealth$iyearfix, addhealth$imonth), "%Y %m")
addhealth$age3<-(as.numeric(round(addhealth$interviewdatew1 - addhealth$birthdatew1)))
addhealth$age<-Recode(addhealth$age3, recode="12=12; 13=13; 14=14; 15=15; 16=16; 17=17; 18=18; 19=19; 20=20; else=NA")
table(addhealth$age)
##
## 12 13 14 15 16 17 18 19 20
## 37 576 1190 1446 2246 2039 2472 717 89
addhealth$birthdatew3<-as.yearmon(paste(addhealth$H3OD1Y,addhealth$H3OD1M),"%Y %m")
addhealth$interviewdatew3<-as.yearmon(paste(addhealth$IYEAR3,addhealth$IMONTH3 ),"%Y %m")
addhealth$agew3<-(as.numeric(round(addhealth$interviewdatew3 - addhealth$birthdatew3)))
table(addhealth$agew3)
##
## 18 19 20 21 22 23 24 25 26 27 28
## 27 314 1117 1484 2237 2141 2677 1516 331 40 3
addhealth$month<- Recode(addhealth$H4OD1M,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$year <- Recode(addhealth$H4OD1Y,recodes="1974=1974;1975=1975;1976=1976;1977=1977;1978=1978;1979=1979;1980=1980;1981=1981;1982=1982;1983=1983")
addhealth$date <- as.yearmon(paste(addhealth$year, addhealth$month), "%Y %m")
addhealth$intmon <- Recode(addhealth$IMONTH4,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$intyear <- Recode(addhealth$IYEAR4,recodes="2007=2007;2008=2008;2009=2009")
addhealth$intdate<-as.yearmon(paste(addhealth$intyear,addhealth$intmon),"%Y %m")
addhealth$agewv4<-(as.numeric(round(addhealth$intdate - addhealth$date )))
table(addhealth$agewv4)
##
## 24 25 26 27 28 29 30 31 32 33 34
## 4 78 758 1202 1974 2081 2683 2166 802 122 17
table(addhealth$age)
##
## 12 13 14 15 16 17 18 19 20
## 37 576 1190 1446 2246 2039 2472 717 89
addhealth$yessuicidethoughtW4<-Recode(addhealth$H4SE1,recodes="0='no thoughts';1='yes thoughts';else=NA")
addhealth$nosuicidethoughtW1<-Recode(addhealth$H1SU1,recodes="0='no thoughts';1='yes thoughts';else=NA")
addhealth$suicideevent<-ifelse(addhealth$nosuicidethoughtW1=='no thoughts' & addhealth$yessuicidethoughtW4=='yes thoughts', 1, 0)
table((addhealth$suicideevent))
##
## 0 1
## 11245 581
summary(addhealth$suicideevent)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00000 0.00000 0.00000 0.04913 0.00000 1.00000 61
addhealth$suicideevent.age<-ifelse(addhealth$suicideevent==1, (addhealth$age+addhealth$agewv4)/2,
addhealth$agewv4)
summary((addhealth$age+addhealth$agewv4)/2)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 18.00 21.50 23.00 22.79 24.00 27.00 1075
addhealth$depressed<-Recode(addhealth$H1FS6, recodes="0=0; 1:3=1; 6=NA; 8=NA")
table(addhealth$depressed)
##
## 0 1
## 6320 5553
Event - having suicide thoughts and/or attempt Duration - from wave 1 to wave 4 Censoring - if someone is depressed Predictors - biological sex, age
addhealthmini<-subset(addhealth, is.na(H1SU1)==F&is.na(H3TO130)==F&is.na(age)==F&is.na(agew3)==F&is.na(agewv4)==F&H1SU1!=1)
addhealth$W1.suicide.attempt<-Recode(addhealth$H1SU2, recodes="0='a.no'; 1:4='b.yes'; else=NA")
addhealth$W3.suicide.thought<-ifelse(addhealth$H3TO130==1& addhealth$H1SU1==0, 1, ifelse(addhealth$H4SE1==1& addhealth$H3TO130==0, 1, 0))
addhealth$transition.w1<-ifelse(addhealth$W1.suicide.attempt=="a.no"&addhealth$W3.suicide.thought==1, 1, 0)
table(addhealth$W3.suicide.thought)
##
## 0 1
## 10786 1101
addhealth$W1.suicide.thought<-Recode(addhealth$H1SU1, recodes="0='a.no'; 1:4='b.yes'; else=NA")
addhealth$W3.suicide.thought<-ifelse(addhealth$H3TO130==1& addhealth$H1SU1==0, 1, ifelse(addhealth$H4SE1==1& addhealth$H3TO130==0, 1, 0))
addhealth$transition.w1<-ifelse(addhealth$W1.suicide.thought=="a.no"&addhealth$W3.suicide.thought==1, 1, 0)
table(addhealth$W3.suicide.thought)
##
## 0 1
## 10786 1101
table(addhealth$W3.suicide.thought)
##
## 0 1
## 10786 1101
library(muhaz)
library (eha)
addhealthb<-addhealth%>%
select(age,agew3, agewv4, depressed, BIO_SEX, W3.suicide.thought, H1SU1, H3TO130, H4SE1)
#filter complete cases
addhealth<-addhealth%>%
filter(complete.cases(.))
adlong<-reshape(data.frame(addhealthb), idvar = 'aid', varying=list(c('age', 'agew3'), c('agew3','agewv4')),
v.names = c('age_enter', 'age_exit'),
times = 1:2, direction='long')
adlong<-adlong[order(adlong$aid, adlong$time),]
head(adlong)
## depressed BIO_SEX W3.suicide.thought H1SU1 H3TO130 H4SE1 time age_enter
## 1.1 1 1 1 0 1 0 1 16
## 1.2 1 1 1 0 1 0 2 22
## 2.1 1 1 1 0 1 0 1 16
## 2.2 1 1 1 0 1 0 2 22
## 3.1 0 2 0 0 0 0 1 14
## 3.2 0 2 0 0 0 0 2 20
## age_exit aid
## 1.1 22 1
## 1.2 29 1
## 2.1 22 2
## 2.2 29 2
## 3.1 20 3
## 3.2 26 3
adlong$povtran <-NA
adlong$povtran[adlong$H1SU1==0&adlong$H3TO130==1&adlong$time==1]<-1
adlong$povtran[adlong$H3TO130==0&adlong$H4SE1==1&adlong$time==2]<-1
adlong$povtran[adlong$H1SU1==0&adlong$H3TO130==0&adlong$time==1]<-0
adlong$povtran[adlong$H3TO130==0&adlong$H4SE1==0&adlong$time==2]<-0
failed1<-which(is.na(adlong$povtran)==T)
adlong<-adlong[-failed1,]
adlong$age1r<-round(adlong$age_enter, 0)
adlong$age2r<-round(adlong$age_exit,0)
head(adlong, n=10)
## depressed BIO_SEX W3.suicide.thought H1SU1 H3TO130 H4SE1 time age_enter
## 1.1 1 1 1 0 1 0 1 16
## 2.1 1 1 1 0 1 0 1 16
## 3.1 0 2 0 0 0 0 1 14
## 3.2 0 2 0 0 0 0 2 20
## 4.1 0 2 0 0 0 0 1 14
## 4.2 0 2 0 0 0 0 2 20
## 5.1 0 2 0 0 0 0 1 14
## 5.2 0 2 0 0 0 0 2 20
## 6.1 1 1 0 0 0 0 1 15
## 6.2 1 1 0 0 0 0 2 21
## age_exit aid povtran age1r age2r
## 1.1 22 1 1 16 22
## 2.1 22 2 1 16 22
## 3.1 20 3 0 14 20
## 3.2 26 3 0 20 26
## 4.1 20 4 0 14 20
## 4.2 26 4 0 20 26
## 5.1 20 5 0 14 20
## 5.2 26 5 0 20 26
## 6.1 21 6 0 15 21
## 6.2 27 6 0 21 27
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
fit<-survfit(Surv(time = time, event = povtran)~1, adlong)
summary(fit)
## Call: survfit(formula = Surv(time = time, event = povtran) ~ 1, data = adlong)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 20806 373 0.982 0.00092 0.980 0.984
## 2 10996 728 0.917 0.00248 0.912 0.922
ggsurvplot(fit, conf.int=T, risk.table=F, title="Survival function for suicide thought transition",ylim=c(.85,1))
#Exponential
fitl1<-phreg((Surv(time = time, event = povtran)~1+depressed), data = adlong, dist = "weibull", shape=1)
summary(fitl1)
## Call:
## phreg(formula = (Surv(time = time, event = povtran) ~ 1 + depressed),
## data = adlong, dist = "weibull", shape = 1)
##
## Covariate W.mean Coef Exp(Coef) se(Coef) Wald p
## depressed 0.446 0.401 1.493 0.061 0.000
##
## log(scale) 3.561 0.045 0.000
##
## Shape is fixed at 1
##
## Events 1101
## Total time at risk 31771
## Max. log. likelihood -4780.9
## LR test statistic 44.12
## Degrees of freedom 1
## Overall p-value 3.08615e-11
fitl2<-phreg((Surv(time = time, event = povtran)~1+depressed), data = adlong, dist = "weibull")
summary(fitl2)
## Call:
## phreg(formula = (Surv(time = time, event = povtran) ~ 1 + depressed),
## data = adlong, dist = "weibull")
##
## Covariate W.mean Coef Exp(Coef) se(Coef) Wald p
## depressed 0.446 0.352 1.422 0.061 0.000
##
## log(scale) 1.226 0.017 0.000
## log(shape) 1.550 0.026 0.000
##
## Events 1101
## Total time at risk 31771
## Max. log. likelihood -3669.4
## LR test statistic 33.99
## Degrees of freedom 1
## Overall p-value 5.54183e-09
fitl3<-phreg((Surv(time = time, event = povtran)~1+depressed), data = adlong, dist = "pch", cuts = c(1))
summary(fitl3)
## Call:
## phreg(formula = (Surv(time = time, event = povtran) ~ 1 + depressed),
## data = adlong, dist = "pch", cuts = c(1))
##
## Covariate W.mean Coef Exp(Coef) se(Coef) Wald p
## depressed 0.446 0.375 1.454 0.061 0.000
##
##
## Events 1101
## Total time at risk 31771
## Max. log. likelihood -4557
## LR test statistic 38.50
## Degrees of freedom 1
## Overall p-value 5.48078e-10
-2*fitl1$loglik[2]+2*length(fitl1$coefficients)
## [1] 9565.74
-2*fitl2$loglik[2]+2*length(fitl2$coefficients)
## [1] 7344.865
-2*fitl3$loglik[2]+2*length(fitl3$coefficients)
## [1] 9116.035
fitle<-coxreg(Surv(time = time, event = povtran)~1+depressed, data = adlong)
check.dist(fitle, fitl1, main = "Exponential")
check.dist(fitle, fitl2, main = "Weibull")
check.dist(fitle, fitl3, main = "Piecewise Exponential")