library(haven)
library(foreign)
library(survival)
library(car)
## Loading required package: carData
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
df1<- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0001/da27021p1.dta")
df1w <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0025/da27021p25.dta")
wv1 <- merge(df1, df1w, by="aid")
wv3<-read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0003/da27021p3.dta")
wv2 <- merge(wv1, wv3, by="aid")
wave4a <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0012/da27021p12.dta")
wave4wgt <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0028/da27021p28.dta")
wave4b <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0017/da27021p17.dta")
wave4 <- merge(wave4a, wave4b, by="aid")
wave4totalwgt <- merge(wave4, wave4wgt, by="aid")
addhealth <-merge(wave4totalwgt, wv2, by="aid")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
library(questionr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
Complete Case
addhealth<-addhealth%>%
select(aid,iyear,BIO_SEX,H4ID5H,iyear,imonth,iday,H1FV6,H4DS18,H4SE1,H4SE2,IMONTH4,IYEAR4,H1SU1,H4OD1M, H4OD1Y, H1FS6, H1GI1M, H1GI1Y, H3OD1Y,H3OD1M, IYEAR3, IMONTH3, H3TO130, H1SU2, H3TO131, H4SE2, H4WS4, H4WP24, H4WP38,H4SE1, H4WP24, H4WS4, GSWGT4,region.x,psuscid.x, H1GI6A, H1GI6B, H1GI4, H1GI6D, H1GI6C, H1GI6E)
addhealth<-addhealth%>%
filter(complete.cases(.))
Suicidal Ideation
addhealth$suicidewave1<-recode(addhealth$H1SU1, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
## Warning in recode.numeric(addhealth$H1SU1, recodes = "0=0; 1=1; 6=NA; 8=NA;
## 9=NA"): NAs introduced by coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
is.numeric(addhealth$suicidewave1)
## [1] FALSE
table(addhealth$suicidewave1)
## < table of extent 0 >
addhealth$suicidewave3<-recode(addhealth$H3TO130, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
## Warning in recode.numeric(addhealth$H3TO130, recodes = "0=0; 1=1; 6=NA; 8=NA;
## 9=NA"): NAs introduced by coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
is.numeric(addhealth$suicidewave3)
## [1] FALSE
table(addhealth$suicidewave3)
## < table of extent 0 >
addhealth$suicidewave4<-recode(addhealth$H4SE1, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
## Warning in recode.numeric(addhealth$H4SE1, recodes = "0=0; 1=1; 6=NA; 8=NA;
## 9=NA"): NAs introduced by coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
is.numeric(addhealth$suicidewave4)
## [1] FALSE
table(addhealth$suicidewave4)
## < table of extent 0 >
Age for Wave 1, Wave 3, Wave 4
addhealth$birthmonthw1 <- factor(ifelse(addhealth$H1GI1M=="1",01,
ifelse(addhealth$H1GI1M=="2", 02,
ifelse(addhealth$H1GI1M=="3", 03,
ifelse(addhealth$H1GI1M=="4", 04,
ifelse(addhealth$H1GI1M=="5", 05,
ifelse(addhealth$H1GI1M=="6", 06,
ifelse(addhealth$H1GI1M=="7", 07,
ifelse(addhealth$H1GI1M=="8", 08,
ifelse(addhealth$H1GI1M=="9r", 09,
ifelse(addhealth$H1GI1M=="10", 10,
ifelse(addhealth$H1GI1M=="11", 11,
ifelse(addhealth$H1GI1M=="12",12,
ifelse(addhealth$H1GI1M=="96", 6,NA))))))))))))))
addhealth$birthyearw1 <- factor(ifelse(addhealth$H1GI1Y=="74",1974,
ifelse(addhealth$H1GI1Y=="75",1975,
ifelse(addhealth$H1GI1Y=="76",1976,
ifelse(addhealth$H1GI1Y=="77",1977,
ifelse(addhealth$H1GI1Y=="78",1978,
ifelse(addhealth$H1GI1Y=="79",1979,
ifelse(addhealth$H1GI1Y=="80",1980,
ifelse(addhealth$H1GI1Y=="81",1981,
ifelse(addhealth$H1GI1Y=="82",1982,
ifelse(addhealth$H1GI1Y=="83",1983,1979)))))))))))
addhealth$birthdatew1 <- as.yearmon(paste(addhealth$birthyearw1, addhealth$birthmonthw1), "%Y %m")
addhealth$iyearfix <- Recode(addhealth$iyear,recodes="'95'='1995'")
addhealth$interviewdatew1 <- as.yearmon(paste(addhealth$iyearfix, addhealth$imonth), "%Y %m")
addhealth$age3<-(as.numeric(round(addhealth$interviewdatew1 - addhealth$birthdatew1)))
addhealth$age<-Recode(addhealth$age3, recode="12=12; 13=13; 14=14; 15=15; 16=16; 17=17; 18=18; 19=19; 20=20; else=NA")
table(addhealth$age)
##
## 12 13 14 15 16 17 18 19 20
## 16 469 944 1135 1770 1578 1218 270 30
Wave 3
addhealth$birthdatew3<-as.yearmon(paste(addhealth$H3OD1Y,addhealth$H3OD1M),"%Y %m")
addhealth$interviewdatew3<-as.yearmon(paste(addhealth$IYEAR3,addhealth$IMONTH3 ),"%Y %m")
addhealth$agew3<-(as.numeric(round(addhealth$interviewdatew3 - addhealth$birthdatew3)))
table(addhealth$agew3)
##
## 18 19 20 21 22 23 24 25 26 27
## 11 251 867 1176 1755 1721 1685 538 127 14
Wave 4
addhealth$month<- Recode(addhealth$H4OD1M,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$year <- Recode(addhealth$H4OD1Y,recodes="1974=1974;1975=1975;1976=1976;1977=1977;1978=1978;1979=1979;1980=1980;1981=1981;1982=1982;1983=1983")
addhealth$date <- as.yearmon(paste(addhealth$year, addhealth$month), "%Y %m")
addhealth$intmon <- Recode(addhealth$IMONTH4,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$intyear <- Recode(addhealth$IYEAR4,recodes="2007=2007;2008=2008;2009=2009")
addhealth$intdate<-as.yearmon(paste(addhealth$intyear,addhealth$intmon),"%Y %m")
addhealth$agewv4<-(as.numeric(round(addhealth$intdate - addhealth$date )))
table(addhealth$agewv4)
##
## 24 25 26 27 28 29 30 31 32 33 34
## 4 34 605 934 1563 1655 2027 974 309 33 7
table(addhealth$age)
##
## 12 13 14 15 16 17 18 19 20
## 16 469 944 1135 1770 1578 1218 270 30
table(addhealth$agew3)
##
## 18 19 20 21 22 23 24 25 26 27
## 11 251 867 1176 1755 1721 1685 538 127 14
Social Capital
table(addhealth$H4WP24)
##
## 1 2 3 4 5 7
## 65 223 825 1429 5062 541
addhealth$closemom<-(Recode(addhealth$H4WP24, recodes="1=0; 2=1; 3=2; 4=3; 5=4; else=NA"))
table(addhealth$closemom)
##
## 0 1 2 3 4
## 65 223 825 1429 5062
#race variables - Reference group is nhwhite
#nhwhite
addhealth$nhwhite <- ifelse(addhealth$H1GI6A==1,1,0)
addhealth$nhwhite<-Recode(addhealth$nhwhite, recodes="1='nhwhite'; 0='nonnhwhite'; 6:8=NA",as.factor=T)
#nhblack
addhealth$nhblack <- ifelse(addhealth$H1GI6B==1,1,0)
addhealth$nhblack<-Recode(addhealth$nhblack, recodes="1='nhblack'; 0='nonnhblack'; 6:8=NA",as.factor=T)
#Hispanic
addhealth$hispanic <- ifelse(addhealth$H1GI4==1,1,0)
addhealth$hispanic<-Recode(addhealth$hispanic, recodes="1='hispanic'; 0='nonhispanic'; 6:8=NA",as.factor=T)
#Asian
addhealth$asian <- ifelse(addhealth$H1GI6D==1,1,0)
addhealth$asian<-Recode(addhealth$asian, recodes="1='asian'; 0='nonasian'; 6:8=NA",as.factor=T)
#Native American
addhealth$native_american <- ifelse(addhealth$H1GI6C==1,1,0)
addhealth$native_american<-Recode(addhealth$native_american, recodes="1='native_american'; 0='nonnative_american'; 6:8=NA",as.factor=T)
#other
addhealth$other <- ifelse(addhealth$H1GI6E==1,1,0)
addhealth$other<-Recode(addhealth$other, recodes="1='other'; 0='nonother'; 6:8=NA",as.factor=T)
#combine race to one variable
addhealth$racethnic <- factor(ifelse(addhealth$nhwhite=="nhwhite","a-nhwhite",
ifelse(addhealth$nhblack=="nhblack", "b-nhblack",
ifelse(addhealth$hispanic=="hispanic", "c-hispanic",
ifelse(addhealth$asian=="asian","d-asian",
ifelse(addhealth$native_american=="native_american","e-native_american",
ifelse(addhealth$other=="other","f-other",NA)))))))
table(addhealth$H4WS4)
##
## 1 2 3 4 5 95 98
## 333 2529 3508 888 775 109 3
addhealth$friends<-(Recode(addhealth$H4WS4, recodes="1=0; 2=1; 3=2; 4=3; 5=4; else=NA"))
table(addhealth$friends)
##
## 0 1 2 3 4
## 333 2529 3508 888 775
table(addhealth$BIO_SEX)
##
## 1 2
## 2910 5235
addhealth$sex<-Recode(addhealth$BIO_SEX, recodes="1=1; 2=0; else=NA")
table(addhealth$sex)
##
## 0 1
## 5235 2910
addhealth$yessuicidethoughtW4<-Recode(addhealth$H4SE1,recodes="0='no thoughts';1='yes thoughts';else=NA")
addhealth$nosuicidethoughtW1<-Recode(addhealth$H1SU1,recodes="0='no thoughts';1='yes thoughts';else=NA")
1. Event Variable
variables for having a no suicide thoughts to having suicide thoughts
addhealth$suicideevent<-ifelse(addhealth$nosuicidethoughtW1=='no thoughts' & addhealth$yessuicidethoughtW4=='yes thoughts', 1, 0)
table((addhealth$suicideevent))
##
## 0 1
## 7685 419
addhealth$suicideevent.age<-ifelse(addhealth$suicideevent==1, (addhealth$age+addhealth$agewv4)/2,
addhealth$agewv4)
summary((addhealth$age+addhealth$agewv4)/2)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 18.0 21.5 22.5 22.5 23.5 26.5 715
addhealthmini<-subset(addhealth, is.na(H1SU1)==F&is.na(H3TO130)==F&is.na(age)==F&is.na(agew3)==F&is.na(agewv4)==F&H1SU1!=1)
event/transition
addhealth$W1.suicide.thought<-Recode(addhealth$H1SU1, recodes="0='a.no'; 1:4='b.yes'; else=NA")
addhealth$W3.suicide.thought<-ifelse(addhealth$H3TO130==1& addhealth$H1SU1==0, 1, ifelse(addhealth$H4SE1==1& addhealth$H3TO130==0, 1, 0))
addhealth$transition.w1<-ifelse(addhealth$W1.suicide.thought=="a.no"&addhealth$W3.suicide.thought==1, 1, 0)
table(addhealth$W3.suicide.thought)
##
## 0 1
## 7370 775
addhealth$depressed<-Recode(addhealth$H1FS6, recodes="0=0; 1:3=1; 6=NA; 8=NA")
table(addhealth$depressed)
##
## 0 1
## 4394 3739
addhealthb<-addhealth%>%
select(age,agew3, agewv4, depressed, BIO_SEX, W3.suicide.thought, H1SU1, H3TO130, H4SE1, closemom, friends, GSWGT4,region.x,psuscid.x, racethnic)
#filter complete cases
addhealth<-addhealth%>%
filter(complete.cases(.))
adlong<-reshape(data.frame(addhealthb), idvar = 'aid', varying=list(c('age', 'agew3'), c('agew3','agewv4')),
v.names = c('age_enter', 'age_exit'),
times = 1:2, direction='long')
adlong<-adlong[order(adlong$aid, adlong$time),]
head(adlong)
## depressed BIO_SEX W3.suicide.thought H1SU1 H3TO130 H4SE1 closemom friends
## 1.1 1 1 1 0 1 0 4 1
## 1.2 1 1 1 0 1 0 4 1
## 2.1 1 1 1 0 1 0 4 1
## 2.2 1 1 1 0 1 0 4 1
## 3.1 0 2 0 0 0 0 4 2
## 3.2 0 2 0 0 0 0 4 2
## GSWGT4 region.x psuscid.x racethnic time age_enter age_exit aid
## 1.1 199.9222 1 371 b-nhblack 1 16 22 1
## 1.2 199.9222 1 371 b-nhblack 2 22 29 1
## 2.1 199.9222 1 371 b-nhblack 1 16 22 2
## 2.2 199.9222 1 371 b-nhblack 2 22 29 2
## 3.1 443.6169 3 044 a-nhwhite 1 14 20 3
## 3.2 443.6169 3 044 a-nhwhite 2 20 26 3
adlong$povtran <-NA
adlong$povtran[adlong$H1SU1==0&adlong$H3TO130==1&adlong$time==1]<-1
adlong$povtran[adlong$H3TO130==0&adlong$H4SE1==1&adlong$time==2]<-1
adlong$povtran[adlong$H1SU1==0&adlong$H3TO130==0&adlong$time==1]<-0
adlong$povtran[adlong$H3TO130==0&adlong$H4SE1==0&adlong$time==2]<-0
failed1<-which(is.na(adlong$povtran)==T)
adlong<-adlong[-failed1,]
adlong$age1r<-round(adlong$age_enter, 0)
adlong$age2r<-round(adlong$age_exit,0)
adlong$time_start<-adlong$time-1
head(adlong[, c("aid","time", "age_enter" , "age_exit")], n=20)
## aid time age_enter age_exit
## 1.1 1 1 16 22
## 2.1 2 1 16 22
## 3.1 3 1 14 20
## 3.2 3 2 20 26
## 4.1 4 1 14 20
## 4.2 4 2 20 26
## 5.1 5 1 14 20
## 5.2 5 2 20 26
## 6.1 6 1 15 21
## 6.2 6 2 21 27
## 7.1 7 1 15 21
## 7.2 7 2 21 27
## 8.1 8 1 14 20
## 8.2 8 2 20 26
## 9.1 9 1 14 20
## 9.2 9 2 20 26
## 10.1 10 1 13 19
## 10.2 10 2 19 26
## 11.1 11 1 18 25
## 12.1 12 1 18 25
head(adlong[, c("aid","time", "age_enter" , "age_exit", "depressed", "closemom", "friends", "povtran")], n=20)
## aid time age_enter age_exit depressed closemom friends povtran
## 1.1 1 1 16 22 1 4 1 1
## 2.1 2 1 16 22 1 4 1 1
## 3.1 3 1 14 20 0 4 2 0
## 3.2 3 2 20 26 0 4 2 0
## 4.1 4 1 14 20 0 4 2 0
## 4.2 4 2 20 26 0 4 2 0
## 5.1 5 1 14 20 0 4 2 0
## 5.2 5 2 20 26 0 4 2 0
## 6.1 6 1 15 21 1 3 3 0
## 6.2 6 2 21 27 1 3 3 0
## 7.1 7 1 15 21 1 3 3 0
## 7.2 7 2 21 27 1 3 3 0
## 8.1 8 1 14 20 0 4 2 0
## 8.2 8 2 20 26 0 4 2 0
## 9.1 9 1 14 20 0 4 2 0
## 9.2 9 2 20 26 0 4 2 0
## 10.1 10 1 13 19 0 4 0 0
## 10.2 10 2 19 26 0 4 0 0
## 11.1 11 1 18 25 0 2 3 1
## 12.1 12 1 18 25 0 2 3 1
library(dplyr)
adlong%>%
group_by(time)%>%
summarise(prop_event=mean(povtran, na.rm=T))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## time prop_event
## <int> <dbl>
## 1 1 0.0372
## 2 2 0.0697
adlong%>%
filter(complete.cases(closemom))%>%
group_by(time, closemom)%>%
summarise(prop_event=mean(povtran, na.rm=T))
## `summarise()` regrouping output by 'time' (override with `.groups` argument)
## # A tibble: 10 x 3
## # Groups: time [2]
## time closemom prop_event
## <int> <dbl> <dbl>
## 1 1 0 0
## 2 1 1 0.105
## 3 1 2 0.0514
## 4 1 3 0.0273
## 5 1 4 0.0375
## 6 2 0 0.0820
## 7 2 1 0.134
## 8 2 2 0.118
## 9 2 3 0.0859
## 10 2 4 0.0518
options(survey.lonely.psu = "adjust")
adlong<-adlong%>%
filter(complete.cases(psuscid.x, racethnic, closemom))
GSWGT4,region.x,psuscid.x
des2<-svydesign(ids=~psuscid.x,
strata = ~region.x,
weights=~GSWGT4,
data=adlong,
nest=T)
fitl1<-svyglm(povtran~as.factor(time_start)+closemom+racethnic, design=des2, family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fitl1)
##
## Call:
## svyglm(formula = povtran ~ as.factor(time_start) + closemom +
## racethnic, design = des2, family = binomial(link = "cloglog"))
##
## Survey design:
## svydesign(ids = ~psuscid.x, strata = ~region.x, weights = ~GSWGT4,
## data = adlong, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.23058 0.24993 -8.925 6.17e-15 ***
## as.factor(time_start)1 0.55179 0.16390 3.367 0.00102 **
## closemom -0.28607 0.06708 -4.265 4.01e-05 ***
## racethnicb-nhblack 0.26875 0.18925 1.420 0.15818
## racethnicc-hispanic -0.07166 0.34027 -0.211 0.83356
## racethnicd-asian -0.21735 0.57862 -0.376 0.70785
## racethnice-native_american 0.54383 0.55616 0.978 0.33012
## racethnicf-other -1.12009 1.01629 -1.102 0.27261
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.009105)
##
## Number of Fisher Scoring iterations: 6
sums<-data.frame(round(summary(fitl1)$coef, 3))
sums$HR<-round(exp(sums[,1]), 3)
sums
## Estimate Std..Error t.value Pr...t.. HR
## (Intercept) -2.231 0.250 -8.925 0.000 0.107
## as.factor(time_start)1 0.552 0.164 3.367 0.001 1.737
## closemom -0.286 0.067 -4.265 0.000 0.751
## racethnicb-nhblack 0.269 0.189 1.420 0.158 1.309
## racethnicc-hispanic -0.072 0.340 -0.211 0.834 0.931
## racethnicd-asian -0.217 0.579 -0.376 0.708 0.805
## racethnice-native_american 0.544 0.556 0.978 0.330 1.723
## racethnicf-other -1.120 1.016 -1.102 0.273 0.326
dat3<-expand.grid(time_start=as.factor(c(0,1)), closemom=c(0, 1,2,3, 4), racethnic=levels(des2$variables$racethnic))
dat3$fitted<-as.numeric(predict(fitl1, dat3, type = "response"))
head(dat3)
## time_start closemom racethnic fitted
## 1 0 0 a-nhwhite 0.10189313
## 2 1 0 a-nhwhite 0.17022468
## 3 0 1 a-nhwhite 0.07755711
## 4 1 1 a-nhwhite 0.13079496
## 5 0 2 a-nhwhite 0.05884283
## 6 1 2 a-nhwhite 0.09994728
library(ggplot2)
dat3%>%
mutate(socialcapitalmom=ifelse(closemom==1, "Not close to mom", "close to mom"),
group=paste(socialcapitalmom, racethnic, sep = "-"),
period=ifelse(time_start==1, "Period1", "Period2"))%>%
ggplot()+
geom_bar(aes(y=fitted, x=racethnic, fill=racethnic, group=racethnic), stat = "identity", position = "dodge")+facet_grid(~socialcapitalmom)+ggtitle(label = "Hazard of Having Suicide Thoughts by Race and low social Capital")

dat3%>%
mutate(socialcapitalmom=ifelse(closemom==1, "Not close to mom", "close to mom"),
group=paste(socialcapitalmom, racethnic, sep = "-"),
period=ifelse(time_start==1, "Period1", "Period2"))%>%
ggplot()+
geom_bar(aes(y=fitted, x=racethnic, fill=racethnic, group=racethnic), stat = "identity", position = "dodge")+facet_wrap(~socialcapitalmom)+ggtitle(label = "Hazard of Having Suicide Thoughts by Race and low social Capital")

Social Capital
1. Event Variable