library(haven)
library(foreign)
library(survival)
library(car)
## Loading required package: carData
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
df1<- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0001/da27021p1.dta")
df1w <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0025/da27021p25.dta")
wv1 <- merge(df1, df1w, by="aid")
wv3<-read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0003/da27021p3.dta")
wv2 <- merge(wv1, wv3, by="aid")
wave4a <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0012/da27021p12.dta")
wave4wgt <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0028/da27021p28.dta")
wave4b <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0017/da27021p17.dta")
wave4 <- merge(wave4a, wave4b, by="aid")
wave4totalwgt <- merge(wave4, wave4wgt, by="aid")
addhealth <-merge(wave4totalwgt, wv2, by="aid")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
library(questionr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
addhealth<-addhealth%>%
select(aid,iyear,BIO_SEX,H4ID5H,iyear,imonth,iday,H1FV6,H4DS18,H4SE1,H4SE2,IMONTH4,IYEAR4,H1SU1,H4OD1M, H4OD1Y, H1FS6, H1GI1M, H1GI1Y, H3OD1Y,H3OD1M, IYEAR3, IMONTH3, H3TO130, H1SU2, H3TO131, H4SE2, H4WS4, H4WP24, H4WP38,H4SE1, H4WP24, H4WS4, GSWGT4,region.x,psuscid.x, H1GI6A, H1GI6B, H1GI4, H1GI6D, H1GI6C, H1GI6E)
addhealth<-addhealth%>%
filter(complete.cases(.))
addhealth$suicidewave1<-recode(addhealth$H1SU1, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
## Warning in recode.numeric(addhealth$H1SU1, recodes = "0=0; 1=1; 6=NA; 8=NA;
## 9=NA"): NAs introduced by coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
is.numeric(addhealth$suicidewave1)
## [1] FALSE
table(addhealth$suicidewave1)
## < table of extent 0 >
addhealth$suicidewave3<-recode(addhealth$H3TO130, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
## Warning in recode.numeric(addhealth$H3TO130, recodes = "0=0; 1=1; 6=NA; 8=NA;
## 9=NA"): NAs introduced by coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
is.numeric(addhealth$suicidewave3)
## [1] FALSE
table(addhealth$suicidewave3)
## < table of extent 0 >
addhealth$suicidewave4<-recode(addhealth$H4SE1, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
## Warning in recode.numeric(addhealth$H4SE1, recodes = "0=0; 1=1; 6=NA; 8=NA;
## 9=NA"): NAs introduced by coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
is.numeric(addhealth$suicidewave4)
## [1] FALSE
table(addhealth$suicidewave4)
## < table of extent 0 >
addhealth$birthmonthw1 <- factor(ifelse(addhealth$H1GI1M=="1",01,
ifelse(addhealth$H1GI1M=="2", 02,
ifelse(addhealth$H1GI1M=="3", 03,
ifelse(addhealth$H1GI1M=="4", 04,
ifelse(addhealth$H1GI1M=="5", 05,
ifelse(addhealth$H1GI1M=="6", 06,
ifelse(addhealth$H1GI1M=="7", 07,
ifelse(addhealth$H1GI1M=="8", 08,
ifelse(addhealth$H1GI1M=="9r", 09,
ifelse(addhealth$H1GI1M=="10", 10,
ifelse(addhealth$H1GI1M=="11", 11,
ifelse(addhealth$H1GI1M=="12",12,
ifelse(addhealth$H1GI1M=="96", 6,NA))))))))))))))
addhealth$birthyearw1 <- factor(ifelse(addhealth$H1GI1Y=="74",1974,
ifelse(addhealth$H1GI1Y=="75",1975,
ifelse(addhealth$H1GI1Y=="76",1976,
ifelse(addhealth$H1GI1Y=="77",1977,
ifelse(addhealth$H1GI1Y=="78",1978,
ifelse(addhealth$H1GI1Y=="79",1979,
ifelse(addhealth$H1GI1Y=="80",1980,
ifelse(addhealth$H1GI1Y=="81",1981,
ifelse(addhealth$H1GI1Y=="82",1982,
ifelse(addhealth$H1GI1Y=="83",1983,1979)))))))))))
addhealth$birthdatew1 <- as.yearmon(paste(addhealth$birthyearw1, addhealth$birthmonthw1), "%Y %m")
addhealth$iyearfix <- Recode(addhealth$iyear,recodes="'95'='1995'")
addhealth$interviewdatew1 <- as.yearmon(paste(addhealth$iyearfix, addhealth$imonth), "%Y %m")
addhealth$age3<-(as.numeric(round(addhealth$interviewdatew1 - addhealth$birthdatew1)))
addhealth$age<-Recode(addhealth$age3, recode="12=12; 13=13; 14=14; 15=15; 16=16; 17=17; 18=18; 19=19; 20=20; else=NA")
table(addhealth$age)
##
## 12 13 14 15 16 17 18 19 20
## 16 469 944 1135 1770 1578 1218 270 30
addhealth$birthdatew3<-as.yearmon(paste(addhealth$H3OD1Y,addhealth$H3OD1M),"%Y %m")
addhealth$interviewdatew3<-as.yearmon(paste(addhealth$IYEAR3,addhealth$IMONTH3 ),"%Y %m")
addhealth$agew3<-(as.numeric(round(addhealth$interviewdatew3 - addhealth$birthdatew3)))
table(addhealth$agew3)
##
## 18 19 20 21 22 23 24 25 26 27
## 11 251 867 1176 1755 1721 1685 538 127 14
addhealth$month<- Recode(addhealth$H4OD1M,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$year <- Recode(addhealth$H4OD1Y,recodes="1974=1974;1975=1975;1976=1976;1977=1977;1978=1978;1979=1979;1980=1980;1981=1981;1982=1982;1983=1983")
addhealth$date <- as.yearmon(paste(addhealth$year, addhealth$month), "%Y %m")
addhealth$intmon <- Recode(addhealth$IMONTH4,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$intyear <- Recode(addhealth$IYEAR4,recodes="2007=2007;2008=2008;2009=2009")
addhealth$intdate<-as.yearmon(paste(addhealth$intyear,addhealth$intmon),"%Y %m")
addhealth$agewv4<-(as.numeric(round(addhealth$intdate - addhealth$date )))
table(addhealth$agewv4)
##
## 24 25 26 27 28 29 30 31 32 33 34
## 4 34 605 934 1563 1655 2027 974 309 33 7
table(addhealth$age)
##
## 12 13 14 15 16 17 18 19 20
## 16 469 944 1135 1770 1578 1218 270 30
table(addhealth$agew3)
##
## 18 19 20 21 22 23 24 25 26 27
## 11 251 867 1176 1755 1721 1685 538 127 14
addhealth$suicideevent<-ifelse(addhealth$nosuicidethoughtW1=='no thoughts' & addhealth$yessuicidethoughtW4=='yes thoughts', 1, 0)
table((addhealth$suicideevent))
##
## 0 1
## 7685 419
addhealth$suicideevent.age<-ifelse(addhealth$suicideevent==1, (addhealth$age+addhealth$agewv4)/2,
addhealth$agewv4)
summary((addhealth$age+addhealth$agewv4)/2)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 18.0 21.5 22.5 22.5 23.5 26.5 715
addhealthmini<-subset(addhealth, is.na(H1SU1)==F&is.na(H3TO130)==F&is.na(age)==F&is.na(agew3)==F&is.na(agewv4)==F&H1SU1!=1)
addhealth$W1.suicide.thought<-Recode(addhealth$H1SU1, recodes="0='a.no'; 1:4='b.yes'; else=NA")
addhealth$W3.suicide.thought<-ifelse(addhealth$H3TO130==1& addhealth$H1SU1==0, 1, ifelse(addhealth$H4SE1==1& addhealth$H3TO130==0, 1, 0))
addhealth$transition.w1<-ifelse(addhealth$W1.suicide.thought=="a.no"&addhealth$W3.suicide.thought==1, 1, 0)
table(addhealth$W3.suicide.thought)
##
## 0 1
## 7370 775
addhealth$depressed<-Recode(addhealth$H1FS6, recodes="0=0; 1:3=1; 6=NA; 8=NA")
table(addhealth$depressed)
##
## 0 1
## 4394 3739
addhealthb<-addhealth%>%
select(age,agew3, agewv4, depressed, BIO_SEX, W3.suicide.thought, H1SU1, H3TO130, H4SE1, closemom, friends, GSWGT4,region.x,psuscid.x, racethnic)
#filter complete cases
addhealth<-addhealth%>%
filter(complete.cases(.))
adlong<-reshape(data.frame(addhealthb), idvar = 'aid', varying=list(c('age', 'agew3'), c('agew3','agewv4')),
v.names = c('age_enter', 'age_exit'),
times = 1:2, direction='long')
adlong<-adlong[order(adlong$aid, adlong$time),]
head(adlong)
## depressed BIO_SEX W3.suicide.thought H1SU1 H3TO130 H4SE1 closemom friends
## 1.1 1 1 1 0 1 0 4 1
## 1.2 1 1 1 0 1 0 4 1
## 2.1 1 1 1 0 1 0 4 1
## 2.2 1 1 1 0 1 0 4 1
## 3.1 0 2 0 0 0 0 4 2
## 3.2 0 2 0 0 0 0 4 2
## GSWGT4 region.x psuscid.x racethnic time age_enter age_exit aid
## 1.1 199.9222 1 371 b-nhblack 1 16 22 1
## 1.2 199.9222 1 371 b-nhblack 2 22 29 1
## 2.1 199.9222 1 371 b-nhblack 1 16 22 2
## 2.2 199.9222 1 371 b-nhblack 2 22 29 2
## 3.1 443.6169 3 044 a-nhwhite 1 14 20 3
## 3.2 443.6169 3 044 a-nhwhite 2 20 26 3
adlong$povtran <-NA
adlong$povtran[adlong$H1SU1==0&adlong$H3TO130==1&adlong$time==1]<-1
adlong$povtran[adlong$H3TO130==0&adlong$H4SE1==1&adlong$time==2]<-1
adlong$povtran[adlong$H1SU1==0&adlong$H3TO130==0&adlong$time==1]<-0
adlong$povtran[adlong$H3TO130==0&adlong$H4SE1==0&adlong$time==2]<-0
failed1<-which(is.na(adlong$povtran)==T)
adlong<-adlong[-failed1,]
adlong$age1r<-round(adlong$age_enter, 0)
adlong$age2r<-round(adlong$age_exit,0)
adlong$time_start<-adlong$time-1
head(adlong[, c("aid","time", "age_enter" , "age_exit")], n=20)
## aid time age_enter age_exit
## 1.1 1 1 16 22
## 2.1 2 1 16 22
## 3.1 3 1 14 20
## 3.2 3 2 20 26
## 4.1 4 1 14 20
## 4.2 4 2 20 26
## 5.1 5 1 14 20
## 5.2 5 2 20 26
## 6.1 6 1 15 21
## 6.2 6 2 21 27
## 7.1 7 1 15 21
## 7.2 7 2 21 27
## 8.1 8 1 14 20
## 8.2 8 2 20 26
## 9.1 9 1 14 20
## 9.2 9 2 20 26
## 10.1 10 1 13 19
## 10.2 10 2 19 26
## 11.1 11 1 18 25
## 12.1 12 1 18 25
head(adlong[, c("aid","time", "age_enter" , "age_exit", "depressed", "closemom", "friends", "povtran")], n=20)
## aid time age_enter age_exit depressed closemom friends povtran
## 1.1 1 1 16 22 1 4 1 1
## 2.1 2 1 16 22 1 4 1 1
## 3.1 3 1 14 20 0 4 2 0
## 3.2 3 2 20 26 0 4 2 0
## 4.1 4 1 14 20 0 4 2 0
## 4.2 4 2 20 26 0 4 2 0
## 5.1 5 1 14 20 0 4 2 0
## 5.2 5 2 20 26 0 4 2 0
## 6.1 6 1 15 21 1 3 3 0
## 6.2 6 2 21 27 1 3 3 0
## 7.1 7 1 15 21 1 3 3 0
## 7.2 7 2 21 27 1 3 3 0
## 8.1 8 1 14 20 0 4 2 0
## 8.2 8 2 20 26 0 4 2 0
## 9.1 9 1 14 20 0 4 2 0
## 9.2 9 2 20 26 0 4 2 0
## 10.1 10 1 13 19 0 4 0 0
## 10.2 10 2 19 26 0 4 0 0
## 11.1 11 1 18 25 0 2 3 1
## 12.1 12 1 18 25 0 2 3 1
library(dplyr)
adlong%>%
group_by(time)%>%
summarise(prop_event=mean(povtran, na.rm=T))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## time prop_event
## <int> <dbl>
## 1 1 0.0372
## 2 2 0.0697
adlong%>%
filter(complete.cases(closemom))%>%
group_by(time, closemom)%>%
summarise(prop_event=mean(povtran, na.rm=T))
## `summarise()` regrouping output by 'time' (override with `.groups` argument)
## # A tibble: 10 x 3
## # Groups: time [2]
## time closemom prop_event
## <int> <dbl> <dbl>
## 1 1 0 0
## 2 1 1 0.105
## 3 1 2 0.0514
## 4 1 3 0.0273
## 5 1 4 0.0375
## 6 2 0 0.0820
## 7 2 1 0.134
## 8 2 2 0.118
## 9 2 3 0.0859
## 10 2 4 0.0518
options(survey.lonely.psu = "adjust")
adlong<-adlong%>%
filter(complete.cases(psuscid.x, racethnic, closemom))
des2<-svydesign(ids=~psuscid.x,
strata = ~region.x,
weights=~GSWGT4,
data=adlong,
nest=T)
fitl1<-svyglm(povtran~as.factor(time_start)+closemom+racethnic, design=des2, family=binomial(link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fitl1)
##
## Call:
## svyglm(formula = povtran ~ as.factor(time_start) + closemom +
## racethnic, design = des2, family = binomial(link = "cloglog"))
##
## Survey design:
## svydesign(ids = ~psuscid.x, strata = ~region.x, weights = ~GSWGT4,
## data = adlong, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.23058 0.24993 -8.925 6.17e-15 ***
## as.factor(time_start)1 0.55179 0.16390 3.367 0.00102 **
## closemom -0.28607 0.06708 -4.265 4.01e-05 ***
## racethnicb-nhblack 0.26875 0.18925 1.420 0.15818
## racethnicc-hispanic -0.07166 0.34027 -0.211 0.83356
## racethnicd-asian -0.21735 0.57862 -0.376 0.70785
## racethnice-native_american 0.54383 0.55616 0.978 0.33012
## racethnicf-other -1.12009 1.01629 -1.102 0.27261
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.009105)
##
## Number of Fisher Scoring iterations: 6
sums<-data.frame(round(summary(fitl1)$coef, 3))
sums$HR<-round(exp(sums[,1]), 3)
sums
## Estimate Std..Error t.value Pr...t.. HR
## (Intercept) -2.231 0.250 -8.925 0.000 0.107
## as.factor(time_start)1 0.552 0.164 3.367 0.001 1.737
## closemom -0.286 0.067 -4.265 0.000 0.751
## racethnicb-nhblack 0.269 0.189 1.420 0.158 1.309
## racethnicc-hispanic -0.072 0.340 -0.211 0.834 0.931
## racethnicd-asian -0.217 0.579 -0.376 0.708 0.805
## racethnice-native_american 0.544 0.556 0.978 0.330 1.723
## racethnicf-other -1.120 1.016 -1.102 0.273 0.326
dat3<-expand.grid(time_start=as.factor(c(0,1)), closemom=c(0, 1,2,3, 4), racethnic=levels(des2$variables$racethnic))
dat3$fitted<-as.numeric(predict(fitl1, dat3, type = "response"))
head(dat3)
## time_start closemom racethnic fitted
## 1 0 0 a-nhwhite 0.10189313
## 2 1 0 a-nhwhite 0.17022468
## 3 0 1 a-nhwhite 0.07755711
## 4 1 1 a-nhwhite 0.13079496
## 5 0 2 a-nhwhite 0.05884283
## 6 1 2 a-nhwhite 0.09994728
library(ggplot2)
dat3%>%
mutate(socialcapitalmom=ifelse(closemom==1, "Not close to mom", "close to mom"),
group=paste(socialcapitalmom, racethnic, sep = "-"),
period=ifelse(time_start==1, "Period1", "Period2"))%>%
ggplot()+
geom_bar(aes(y=fitted, x=racethnic, fill=racethnic, group=racethnic), stat = "identity", position = "dodge")+facet_grid(~socialcapitalmom)+ggtitle(label = "Hazard of Having Suicide Thoughts by Race and low social Capital")
dat3%>%
mutate(socialcapitalmom=ifelse(closemom==1, "Not close to mom", "close to mom"),
group=paste(socialcapitalmom, racethnic, sep = "-"),
period=ifelse(time_start==1, "Period1", "Period2"))%>%
ggplot()+
geom_bar(aes(y=fitted, x=racethnic, fill=racethnic, group=racethnic), stat = "identity", position = "dodge")+facet_wrap(~socialcapitalmom)+ggtitle(label = "Hazard of Having Suicide Thoughts by Race and low social Capital")
m0<-svyglm(povtran~racethnic+closemom, design=des2, family=binomial (link = "logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
fitl1<-svyglm(povtran~as.factor(time_start)+closemom+racethnic, design=des2, family=binomial(link="cloglog"))
m1<-svyglm(povtran~factor(time_start)+racethnic+closemom, design=des2, family=binomial (link = "logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
m2<-svyglm(povtran~time_start+racethnic+closemom, design=des2, family=binomial (link = "logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
newdat<-expand.grid(time_start=unique(adlong$time_start), racethnic=unique(adlong$racethnic), closemom=mean(adlong$closemom))
newdat<-newdat%>%
dplyr::filter(complete.cases(.))
newdat$h0<-predict(m0, newdata = newdat, type="response")
newdat$h1<-predict(m1, newdata = newdat, type="response")
newdat$h2<-predict(m2, newdata = newdat, type="response")
head(newdat)
## time_start racethnic closemom h0 h1 h2
## 1 0 b-nhblack 3.488099 0.06908323 0.05066363 0.05066363
## 2 1 b-nhblack 3.488099 0.06908323 0.08595088 0.08595088
## 3 0 a-nhwhite 3.488099 0.05347506 0.03885161 0.03885161
## 4 1 a-nhwhite 3.488099 0.05347506 0.06648798 0.06648798
## 5 0 c-hispanic 3.488099 0.04952576 0.03630344 0.03630344
## 6 1 c-hispanic 3.488099 0.04952576 0.06224460 0.06224460
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(magrittr)
out<-melt(setDT(newdat), id=c("time_start", "racethnic", "closemom"), measure.vars = list(haz=c("h0", "h1", "h2")))
head(out, n=20)
## time_start racethnic closemom variable value
## 1: 0 b-nhblack 3.488099 h0 0.06908323
## 2: 1 b-nhblack 3.488099 h0 0.06908323
## 3: 0 a-nhwhite 3.488099 h0 0.05347506
## 4: 1 a-nhwhite 3.488099 h0 0.05347506
## 5: 0 c-hispanic 3.488099 h0 0.04952576
## 6: 1 c-hispanic 3.488099 h0 0.04952576
## 7: 0 d-asian 3.488099 h0 0.04330482
## 8: 1 d-asian 3.488099 h0 0.04330482
## 9: 0 f-other 3.488099 h0 0.01780261
## 10: 1 f-other 3.488099 h0 0.01780261
## 11: 0 e-native_american 3.488099 h0 0.09595879
## 12: 1 e-native_american 3.488099 h0 0.09595879
## 13: 0 b-nhblack 3.488099 h1 0.05066363
## 14: 1 b-nhblack 3.488099 h1 0.08595088
## 15: 0 a-nhwhite 3.488099 h1 0.03885161
## 16: 1 a-nhwhite 3.488099 h1 0.06648798
## 17: 0 c-hispanic 3.488099 h1 0.03630344
## 18: 1 c-hispanic 3.488099 h1 0.06224460
## 19: 0 d-asian 3.488099 h1 0.03139812
## 20: 1 d-asian 3.488099 h1 0.05403063
library(ggplot2)
library(rlang)
##
## Attaching package: 'rlang'
## The following object is masked from 'package:magrittr':
##
## set_names
## The following object is masked from 'package:data.table':
##
## :=
out%>%
dplyr::mutate(mod =dplyr::case_when(.$variable =="h0" ~"Constant", .$variable =="h1" ~ "General", .$variable == "h2" ~"Linear"))%>%
ggplot(aes(x=time_start, y=value))+geom_line(aes(group=racethnic, color=racethnic))+facet_wrap(~mod)
## Don't know how to automatically pick scale for object of type svystat. Defaulting to continuous.
AIC0<-AIC(m0)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC1<-AIC(m1)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC2<-AIC(m2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
library(forcats)
AICS<-data.frame(AIC =c(AIC0["AIC"], AIC1["AIC"], AIC2["AIC"]), mod=factor(c("constant", "general", "linear")))
AICS$mod<-forcats::fct_relevel(AICS$mod, c("general", "constant", "linear"))
AICS$deltaAIC<-AICS$AIC-AICS$AIC[AICS$mod=="general"]
knitr::kable(AICS[, c("mod", "AIC", "deltaAIC")], caption = "Relative AIC for alternative time specifications")
| mod | AIC | deltaAIC |
|---|---|---|
| constant | 5840.892 | 44.78393 |
| general | 5796.108 | 0.00000 |
| linear | 5796.108 | 0.00000 |
Social Capital
1. Event Variable