library(haven)
library(foreign)
library(survival)
library(car)
## Loading required package: carData
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
df1<- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0001/da27021p1.dta")
df1w <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0025/da27021p25.dta")
wv1 <- merge(df1, df1w, by="aid")
wv3<-read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0003/da27021p3.dta")
wv2 <- merge(wv1, wv3, by="aid")
wave4a <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0012/da27021p12.dta")
wave4wgt <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0028/da27021p28.dta")
wave4b <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0017/da27021p17.dta")
wave4 <- merge(wave4a, wave4b, by="aid")
wave4totalwgt <- merge(wave4, wave4wgt, by="aid")
addhealth <-merge(wave4totalwgt, wv2, by="aid")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
library(questionr)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

Complete Case

addhealth<-addhealth%>%
  select(aid,iyear,BIO_SEX,H4ID5H,iyear,imonth,iday,H1FV6,H4DS18,H4SE1,H4SE2,IMONTH4,IYEAR4,H1SU1,H4OD1M, H4OD1Y, H1FS6, H1GI1M, H1GI1Y, H3OD1Y,H3OD1M, IYEAR3, IMONTH3, H3TO130, H1SU2, H3TO131, H4SE2, H4WS4, H4WP24, H4WP38,H4SE1, H4WP24, H4WS4, GSWGT4,region.x,psuscid.x, H1GI6A, H1GI6B, H1GI4, H1GI6D, H1GI6C, H1GI6E)
addhealth<-addhealth%>%
 filter(complete.cases(.))

Suicidal Ideation

addhealth$suicidewave1<-recode(addhealth$H1SU1, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
## Warning in recode.numeric(addhealth$H1SU1, recodes = "0=0; 1=1; 6=NA; 8=NA;
## 9=NA"): NAs introduced by coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
  is.numeric(addhealth$suicidewave1)                        
## [1] FALSE
table(addhealth$suicidewave1)
## < table of extent 0 >
addhealth$suicidewave3<-recode(addhealth$H3TO130, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
## Warning in recode.numeric(addhealth$H3TO130, recodes = "0=0; 1=1; 6=NA; 8=NA;
## 9=NA"): NAs introduced by coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
  is.numeric(addhealth$suicidewave3)                        
## [1] FALSE
table(addhealth$suicidewave3)
## < table of extent 0 >
addhealth$suicidewave4<-recode(addhealth$H4SE1, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
## Warning in recode.numeric(addhealth$H4SE1, recodes = "0=0; 1=1; 6=NA; 8=NA;
## 9=NA"): NAs introduced by coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
  is.numeric(addhealth$suicidewave4)                        
## [1] FALSE
table(addhealth$suicidewave4)
## < table of extent 0 >

Age for Wave 1, Wave 3, Wave 4

addhealth$birthmonthw1 <- factor(ifelse(addhealth$H1GI1M=="1",01, 
                                            ifelse(addhealth$H1GI1M=="2", 02, 
                                            ifelse(addhealth$H1GI1M=="3", 03,
                                                   ifelse(addhealth$H1GI1M=="4", 04,
                                                          ifelse(addhealth$H1GI1M=="5", 05,
                                                                 ifelse(addhealth$H1GI1M=="6", 06,
                                                                        ifelse(addhealth$H1GI1M=="7", 07,
                                                                               ifelse(addhealth$H1GI1M=="8", 08,
                                                                                      ifelse(addhealth$H1GI1M=="9r", 09,
                                                                                             ifelse(addhealth$H1GI1M=="10", 10,
                                                                                                    ifelse(addhealth$H1GI1M=="11", 11,
                                                                                                           ifelse(addhealth$H1GI1M=="12",12,
                                                                                                                  ifelse(addhealth$H1GI1M=="96", 6,NA))))))))))))))
addhealth$birthyearw1 <- factor(ifelse(addhealth$H1GI1Y=="74",1974,
                                             ifelse(addhealth$H1GI1Y=="75",1975,
                                                           ifelse(addhealth$H1GI1Y=="76",1976,
                                                                         ifelse(addhealth$H1GI1Y=="77",1977,
                                                                                       ifelse(addhealth$H1GI1Y=="78",1978,
                                                                                                     ifelse(addhealth$H1GI1Y=="79",1979,
                                                                                                                   ifelse(addhealth$H1GI1Y=="80",1980,
                                                                                                                                 ifelse(addhealth$H1GI1Y=="81",1981,
                                                                                                                                               ifelse(addhealth$H1GI1Y=="82",1982,
                                                                                                                                                             ifelse(addhealth$H1GI1Y=="83",1983,1979)))))))))))
addhealth$birthdatew1 <- as.yearmon(paste(addhealth$birthyearw1, addhealth$birthmonthw1), "%Y %m")
addhealth$iyearfix <- Recode(addhealth$iyear,recodes="'95'='1995'") 
addhealth$interviewdatew1 <- as.yearmon(paste(addhealth$iyearfix, addhealth$imonth), "%Y %m")
addhealth$age3<-(as.numeric(round(addhealth$interviewdatew1 - addhealth$birthdatew1)))
addhealth$age<-Recode(addhealth$age3, recode="12=12; 13=13; 14=14; 15=15; 16=16; 17=17; 18=18; 19=19; 20=20; else=NA")
table(addhealth$age)
## 
##   12   13   14   15   16   17   18   19   20 
##   16  469  944 1135 1770 1578 1218  270   30

Wave 3

addhealth$birthdatew3<-as.yearmon(paste(addhealth$H3OD1Y,addhealth$H3OD1M),"%Y %m")
addhealth$interviewdatew3<-as.yearmon(paste(addhealth$IYEAR3,addhealth$IMONTH3 ),"%Y %m")
addhealth$agew3<-(as.numeric(round(addhealth$interviewdatew3 - addhealth$birthdatew3)))
table(addhealth$agew3)
## 
##   18   19   20   21   22   23   24   25   26   27 
##   11  251  867 1176 1755 1721 1685  538  127   14

Wave 4

addhealth$month<- Recode(addhealth$H4OD1M,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$year <- Recode(addhealth$H4OD1Y,recodes="1974=1974;1975=1975;1976=1976;1977=1977;1978=1978;1979=1979;1980=1980;1981=1981;1982=1982;1983=1983")
addhealth$date <- as.yearmon(paste(addhealth$year, addhealth$month), "%Y %m")
addhealth$intmon <- Recode(addhealth$IMONTH4,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")

addhealth$intyear <- Recode(addhealth$IYEAR4,recodes="2007=2007;2008=2008;2009=2009")

addhealth$intdate<-as.yearmon(paste(addhealth$intyear,addhealth$intmon),"%Y %m")
addhealth$agewv4<-(as.numeric(round(addhealth$intdate - addhealth$date )))
table(addhealth$agewv4)
## 
##   24   25   26   27   28   29   30   31   32   33   34 
##    4   34  605  934 1563 1655 2027  974  309   33    7
table(addhealth$age)
## 
##   12   13   14   15   16   17   18   19   20 
##   16  469  944 1135 1770 1578 1218  270   30
table(addhealth$agew3)
## 
##   18   19   20   21   22   23   24   25   26   27 
##   11  251  867 1176 1755 1721 1685  538  127   14

Social Capital

table(addhealth$H4WP24)
## 
##    1    2    3    4    5    7 
##   65  223  825 1429 5062  541
addhealth$closemom<-(Recode(addhealth$H4WP24, recodes="1=0; 2=1; 3=2; 4=3; 5=4; else=NA"))
table(addhealth$closemom)
## 
##    0    1    2    3    4 
##   65  223  825 1429 5062
#race variables - Reference group is nhwhite
#nhwhite
addhealth$nhwhite <- ifelse(addhealth$H1GI6A==1,1,0)
addhealth$nhwhite<-Recode(addhealth$nhwhite, recodes="1='nhwhite'; 0='nonnhwhite'; 6:8=NA",as.factor=T)
#nhblack
addhealth$nhblack <- ifelse(addhealth$H1GI6B==1,1,0)
addhealth$nhblack<-Recode(addhealth$nhblack, recodes="1='nhblack'; 0='nonnhblack'; 6:8=NA",as.factor=T)
#Hispanic
addhealth$hispanic <- ifelse(addhealth$H1GI4==1,1,0)
addhealth$hispanic<-Recode(addhealth$hispanic, recodes="1='hispanic'; 0='nonhispanic'; 6:8=NA",as.factor=T)
#Asian
addhealth$asian <- ifelse(addhealth$H1GI6D==1,1,0)
addhealth$asian<-Recode(addhealth$asian, recodes="1='asian'; 0='nonasian'; 6:8=NA",as.factor=T)
#Native American
addhealth$native_american <- ifelse(addhealth$H1GI6C==1,1,0)
addhealth$native_american<-Recode(addhealth$native_american, recodes="1='native_american'; 0='nonnative_american'; 6:8=NA",as.factor=T)
#other
addhealth$other <- ifelse(addhealth$H1GI6E==1,1,0)
addhealth$other<-Recode(addhealth$other, recodes="1='other'; 0='nonother'; 6:8=NA",as.factor=T)
#combine race to one variable
addhealth$racethnic <- factor(ifelse(addhealth$nhwhite=="nhwhite","a-nhwhite", 
                                            ifelse(addhealth$nhblack=="nhblack", "b-nhblack", 
                                            ifelse(addhealth$hispanic=="hispanic", "c-hispanic",
                                                   ifelse(addhealth$asian=="asian","d-asian",
                                                          ifelse(addhealth$native_american=="native_american","e-native_american",
                                                                 ifelse(addhealth$other=="other","f-other",NA)))))))
table(addhealth$H4WS4)
## 
##    1    2    3    4    5   95   98 
##  333 2529 3508  888  775  109    3
addhealth$friends<-(Recode(addhealth$H4WS4, recodes="1=0; 2=1; 3=2; 4=3; 5=4; else=NA"))
table(addhealth$friends)
## 
##    0    1    2    3    4 
##  333 2529 3508  888  775
table(addhealth$BIO_SEX)
## 
##    1    2 
## 2910 5235
addhealth$sex<-Recode(addhealth$BIO_SEX, recodes="1=1; 2=0; else=NA")
table(addhealth$sex)
## 
##    0    1 
## 5235 2910
addhealth$yessuicidethoughtW4<-Recode(addhealth$H4SE1,recodes="0='no thoughts';1='yes thoughts';else=NA")
addhealth$nosuicidethoughtW1<-Recode(addhealth$H1SU1,recodes="0='no thoughts';1='yes thoughts';else=NA")

1. Event Variable

variables for having a no suicide thoughts to having suicide thoughts

addhealth$suicideevent<-ifelse(addhealth$nosuicidethoughtW1=='no thoughts' & addhealth$yessuicidethoughtW4=='yes thoughts', 1, 0) 
table((addhealth$suicideevent))
## 
##    0    1 
## 7685  419
addhealth$suicideevent.age<-ifelse(addhealth$suicideevent==1, (addhealth$age+addhealth$agewv4)/2,
                            addhealth$agewv4)
summary((addhealth$age+addhealth$agewv4)/2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    18.0    21.5    22.5    22.5    23.5    26.5     715
addhealthmini<-subset(addhealth, is.na(H1SU1)==F&is.na(H3TO130)==F&is.na(age)==F&is.na(agew3)==F&is.na(agewv4)==F&H1SU1!=1)

event/transition

addhealth$W1.suicide.thought<-Recode(addhealth$H1SU1, recodes="0='a.no'; 1:4='b.yes'; else=NA")
addhealth$W3.suicide.thought<-ifelse(addhealth$H3TO130==1& addhealth$H1SU1==0, 1, ifelse(addhealth$H4SE1==1& addhealth$H3TO130==0, 1, 0))
addhealth$transition.w1<-ifelse(addhealth$W1.suicide.thought=="a.no"&addhealth$W3.suicide.thought==1, 1, 0)
table(addhealth$W3.suicide.thought)
## 
##    0    1 
## 7370  775
addhealth$depressed<-Recode(addhealth$H1FS6, recodes="0=0; 1:3=1; 6=NA; 8=NA")
table(addhealth$depressed)
## 
##    0    1 
## 4394 3739
addhealthb<-addhealth%>%
  select(age,agew3, agewv4, depressed, BIO_SEX, W3.suicide.thought, H1SU1, H3TO130, H4SE1, closemom, friends, GSWGT4,region.x,psuscid.x, racethnic)
#filter complete cases
addhealth<-addhealth%>%
 filter(complete.cases(.))
adlong<-reshape(data.frame(addhealthb), idvar = 'aid', varying=list(c('age', 'agew3'), c('agew3','agewv4')),
                v.names = c('age_enter', 'age_exit'), 
                times = 1:2, direction='long')

adlong<-adlong[order(adlong$aid, adlong$time),]
head(adlong)
##     depressed BIO_SEX W3.suicide.thought H1SU1 H3TO130 H4SE1 closemom friends
## 1.1         1       1                  1     0       1     0        4       1
## 1.2         1       1                  1     0       1     0        4       1
## 2.1         1       1                  1     0       1     0        4       1
## 2.2         1       1                  1     0       1     0        4       1
## 3.1         0       2                  0     0       0     0        4       2
## 3.2         0       2                  0     0       0     0        4       2
##       GSWGT4 region.x psuscid.x racethnic time age_enter age_exit aid
## 1.1 199.9222        1       371 b-nhblack    1        16       22   1
## 1.2 199.9222        1       371 b-nhblack    2        22       29   1
## 2.1 199.9222        1       371 b-nhblack    1        16       22   2
## 2.2 199.9222        1       371 b-nhblack    2        22       29   2
## 3.1 443.6169        3       044 a-nhwhite    1        14       20   3
## 3.2 443.6169        3       044 a-nhwhite    2        20       26   3
adlong$povtran <-NA

adlong$povtran[adlong$H1SU1==0&adlong$H3TO130==1&adlong$time==1]<-1
adlong$povtran[adlong$H3TO130==0&adlong$H4SE1==1&adlong$time==2]<-1
adlong$povtran[adlong$H1SU1==0&adlong$H3TO130==0&adlong$time==1]<-0
adlong$povtran[adlong$H3TO130==0&adlong$H4SE1==0&adlong$time==2]<-0
failed1<-which(is.na(adlong$povtran)==T)
adlong<-adlong[-failed1,]
adlong$age1r<-round(adlong$age_enter, 0)
adlong$age2r<-round(adlong$age_exit,0)
adlong$time_start<-adlong$time-1
head(adlong[, c("aid","time", "age_enter" , "age_exit")], n=20)
##      aid time age_enter age_exit
## 1.1    1    1        16       22
## 2.1    2    1        16       22
## 3.1    3    1        14       20
## 3.2    3    2        20       26
## 4.1    4    1        14       20
## 4.2    4    2        20       26
## 5.1    5    1        14       20
## 5.2    5    2        20       26
## 6.1    6    1        15       21
## 6.2    6    2        21       27
## 7.1    7    1        15       21
## 7.2    7    2        21       27
## 8.1    8    1        14       20
## 8.2    8    2        20       26
## 9.1    9    1        14       20
## 9.2    9    2        20       26
## 10.1  10    1        13       19
## 10.2  10    2        19       26
## 11.1  11    1        18       25
## 12.1  12    1        18       25
head(adlong[, c("aid","time", "age_enter" , "age_exit", "depressed", "closemom", "friends", "povtran")], n=20)
##      aid time age_enter age_exit depressed closemom friends povtran
## 1.1    1    1        16       22         1        4       1       1
## 2.1    2    1        16       22         1        4       1       1
## 3.1    3    1        14       20         0        4       2       0
## 3.2    3    2        20       26         0        4       2       0
## 4.1    4    1        14       20         0        4       2       0
## 4.2    4    2        20       26         0        4       2       0
## 5.1    5    1        14       20         0        4       2       0
## 5.2    5    2        20       26         0        4       2       0
## 6.1    6    1        15       21         1        3       3       0
## 6.2    6    2        21       27         1        3       3       0
## 7.1    7    1        15       21         1        3       3       0
## 7.2    7    2        21       27         1        3       3       0
## 8.1    8    1        14       20         0        4       2       0
## 8.2    8    2        20       26         0        4       2       0
## 9.1    9    1        14       20         0        4       2       0
## 9.2    9    2        20       26         0        4       2       0
## 10.1  10    1        13       19         0        4       0       0
## 10.2  10    2        19       26         0        4       0       0
## 11.1  11    1        18       25         0        2       3       1
## 12.1  12    1        18       25         0        2       3       1
library(dplyr)
adlong%>%
  group_by(time)%>%
  summarise(prop_event=mean(povtran, na.rm=T))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
##    time prop_event
##   <int>      <dbl>
## 1     1     0.0372
## 2     2     0.0697
adlong%>%
  filter(complete.cases(closemom))%>%
  group_by(time, closemom)%>%
  summarise(prop_event=mean(povtran, na.rm=T))
## `summarise()` regrouping output by 'time' (override with `.groups` argument)
## # A tibble: 10 x 3
## # Groups:   time [2]
##     time closemom prop_event
##    <int>    <dbl>      <dbl>
##  1     1        0     0     
##  2     1        1     0.105 
##  3     1        2     0.0514
##  4     1        3     0.0273
##  5     1        4     0.0375
##  6     2        0     0.0820
##  7     2        1     0.134 
##  8     2        2     0.118 
##  9     2        3     0.0859
## 10     2        4     0.0518
options(survey.lonely.psu = "adjust")
adlong<-adlong%>%
  filter(complete.cases(psuscid.x, racethnic, closemom))

GSWGT4,region.x,psuscid.x

des2<-svydesign(ids=~psuscid.x,
                strata = ~region.x,
                weights=~GSWGT4,
                data=adlong,
                nest=T)
fitl1<-svyglm(povtran~as.factor(time_start)+closemom+racethnic, design=des2, family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fitl1)
## 
## Call:
## svyglm(formula = povtran ~ as.factor(time_start) + closemom + 
##     racethnic, design = des2, family = binomial(link = "cloglog"))
## 
## Survey design:
## svydesign(ids = ~psuscid.x, strata = ~region.x, weights = ~GSWGT4, 
##     data = adlong, nest = T)
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -2.23058    0.24993  -8.925 6.17e-15 ***
## as.factor(time_start)1      0.55179    0.16390   3.367  0.00102 ** 
## closemom                   -0.28607    0.06708  -4.265 4.01e-05 ***
## racethnicb-nhblack          0.26875    0.18925   1.420  0.15818    
## racethnicc-hispanic        -0.07166    0.34027  -0.211  0.83356    
## racethnicd-asian           -0.21735    0.57862  -0.376  0.70785    
## racethnice-native_american  0.54383    0.55616   0.978  0.33012    
## racethnicf-other           -1.12009    1.01629  -1.102  0.27261    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.009105)
## 
## Number of Fisher Scoring iterations: 6
sums<-data.frame(round(summary(fitl1)$coef, 3))
sums$HR<-round(exp(sums[,1]), 3)
sums
##                            Estimate Std..Error t.value Pr...t..    HR
## (Intercept)                  -2.231      0.250  -8.925    0.000 0.107
## as.factor(time_start)1        0.552      0.164   3.367    0.001 1.737
## closemom                     -0.286      0.067  -4.265    0.000 0.751
## racethnicb-nhblack            0.269      0.189   1.420    0.158 1.309
## racethnicc-hispanic          -0.072      0.340  -0.211    0.834 0.931
## racethnicd-asian             -0.217      0.579  -0.376    0.708 0.805
## racethnice-native_american    0.544      0.556   0.978    0.330 1.723
## racethnicf-other             -1.120      1.016  -1.102    0.273 0.326
dat3<-expand.grid(time_start=as.factor(c(0,1)), closemom=c(0, 1,2,3, 4), racethnic=levels(des2$variables$racethnic))
dat3$fitted<-as.numeric(predict(fitl1, dat3, type = "response"))
head(dat3)
##   time_start closemom racethnic     fitted
## 1          0        0 a-nhwhite 0.10189313
## 2          1        0 a-nhwhite 0.17022468
## 3          0        1 a-nhwhite 0.07755711
## 4          1        1 a-nhwhite 0.13079496
## 5          0        2 a-nhwhite 0.05884283
## 6          1        2 a-nhwhite 0.09994728
library(ggplot2)
dat3%>%
  mutate(socialcapitalmom=ifelse(closemom==1, "Not close to mom", "close to mom"),
         group=paste(socialcapitalmom, racethnic, sep = "-"),
         period=ifelse(time_start==1, "Period1", "Period2"))%>%
  ggplot()+
  geom_bar(aes(y=fitted, x=racethnic, fill=racethnic, group=racethnic), stat = "identity", position = "dodge")+facet_grid(~socialcapitalmom)+ggtitle(label = "Hazard of Having Suicide Thoughts by Race and low social Capital")

dat3%>%
  mutate(socialcapitalmom=ifelse(closemom==1, "Not close to mom", "close to mom"),
         group=paste(socialcapitalmom, racethnic, sep = "-"),
         period=ifelse(time_start==1, "Period1", "Period2"))%>%
  ggplot()+
  geom_bar(aes(y=fitted, x=racethnic, fill=racethnic, group=racethnic), stat = "identity", position = "dodge")+facet_wrap(~socialcapitalmom)+ggtitle(label = "Hazard of Having Suicide Thoughts by Race and low social Capital")