library(haven)
df1<- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0001/da27021p1.dta")
df1w <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0025/da27021p25.dta")
wv1 <- merge(df1, df1w, by="aid")
library (car)
## Loading required package: carData
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
wave4a <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0012/da27021p12.dta")
wave4wgt <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0028/da27021p28.dta")
wave4b <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0017/da27021p17.dta")
wave4 <- merge(wave4a, wave4b, by="aid")
wave4totalwgt <- merge(wave4, wave4wgt, by="aid")
addhealth <-merge(wave4totalwgt, wv1, by="aid")
addhealth<-addhealth%>%
select(aid,iyear,BIO_SEX,H4ID5H,iyear,imonth,iday,H1FV6,H4DS18,H4SE1,H4SE2,IMONTH4,IYEAR4,H1SU1,H4OD1M, H4OD1Y, H1FS6, H1GI1M, H1GI1Y)
addhealth<-addhealth%>%
filter(complete.cases(.))
table(is.na(addhealth))
##
## FALSE
## 261900
addhealth$suicidewave1<-recode(addhealth$H1SU1, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
## Warning in recode.numeric(addhealth$H1SU1, recodes = "0=0; 1=1; 6=NA; 8=NA;
## 9=NA"): NAs introduced by coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
is.numeric(addhealth$suicidewave1)
## [1] FALSE
table(addhealth$suicidewave1)
## < table of extent 0 >
addhealth$suicidewave4 <-Recode(addhealth$H4SE1, recodes="0=0; 1=1; else=NA")
table(addhealth$suicidewave4)
##
## 0 1
## 13394 1044
is.numeric(addhealth$suicidewave4)
## [1] TRUE
addhealth$birthmonthw1 <- factor(ifelse(addhealth$H1GI1M=="1",01,
ifelse(addhealth$H1GI1M=="2", 02,
ifelse(addhealth$H1GI1M=="3", 03,
ifelse(addhealth$H1GI1M=="4", 04,
ifelse(addhealth$H1GI1M=="5", 05,
ifelse(addhealth$H1GI1M=="6", 06,
ifelse(addhealth$H1GI1M=="7", 07,
ifelse(addhealth$H1GI1M=="8", 08,
ifelse(addhealth$H1GI1M=="9r", 09,
ifelse(addhealth$H1GI1M=="10", 10,
ifelse(addhealth$H1GI1M=="11", 11,
ifelse(addhealth$H1GI1M=="12",12,
ifelse(addhealth$H1GI1M=="96", 6,NA))))))))))))))
addhealth$birthyearw1 <- factor(ifelse(addhealth$H1GI1Y=="74",1974,
ifelse(addhealth$H1GI1Y=="75",1975,
ifelse(addhealth$H1GI1Y=="76",1976,
ifelse(addhealth$H1GI1Y=="77",1977,
ifelse(addhealth$H1GI1Y=="78",1978,
ifelse(addhealth$H1GI1Y=="79",1979,
ifelse(addhealth$H1GI1Y=="80",1980,
ifelse(addhealth$H1GI1Y=="81",1981,
ifelse(addhealth$H1GI1Y=="82",1982,
ifelse(addhealth$H1GI1Y=="83",1983,1979)))))))))))
addhealth$birthdatew1 <- as.yearmon(paste(addhealth$birthyearw1, addhealth$birthmonthw1), "%Y %m")
addhealth$iyearfix <- Recode(addhealth$iyear,recodes="'95'='1995'")
addhealth$interviewdatew1 <- as.yearmon(paste(addhealth$iyearfix, addhealth$imonth), "%Y %m")
addhealth$age3<-(as.numeric(round(addhealth$interviewdatew1 - addhealth$birthdatew1)))
addhealth$age<-Recode(addhealth$age3, recode="12=12; 13=13; 14=14; 15=15; 16=16; 17=17; 18=18; 19=19; 20=20; else=NA")
table(addhealth$age)
##
## 12 13 14 15 16 17 18 19 20
## 44 676 1439 1764 2794 2482 3046 909 109
addhealth$month<- Recode(addhealth$H4OD1M,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$year <- Recode(addhealth$H4OD1Y,recodes="1974=1974;1975=1975;1976=1976;1977=1977;1978=1978;1979=1979;1980=1980;1981=1981;1982=1982;1983=1983")
addhealth$date <- as.yearmon(paste(addhealth$year, addhealth$month), "%Y %m")
addhealth$intmon <- Recode(addhealth$IMONTH4,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$intyear <- Recode(addhealth$IYEAR4,recodes="2007=2007;2008=2008;2009=2009")
addhealth$intdate<-as.yearmon(paste(addhealth$intyear,addhealth$intmon),"%Y %m")
addhealth$agewv4<-(as.numeric(round(addhealth$intdate - addhealth$date )))
table(addhealth$agewv4)
##
## 24 25 26 27 28 29 30 31 32 33 34 35
## 4 91 881 1416 2392 2590 3255 2662 1080 154 24 1
table(addhealth$age)
##
## 12 13 14 15 16 17 18 19 20
## 44 676 1439 1764 2794 2482 3046 909 109
addhealth$yessuicidethoughtW4<-Recode(addhealth$H4SE1,recodes="0='no thoughts';1='yes thoughts';else=NA")
addhealth$nosuicidethoughtW1<-Recode(addhealth$H1SU1,recodes="0='no thoughts';1='yes thoughts';else=NA")
addhealth$suicideevent<-ifelse(addhealth$nosuicidethoughtW1=='no thoughts' & addhealth$yessuicidethoughtW4=='yes thoughts', 1, 0)
table((addhealth$suicideevent))
##
## 0 1
## 13747 697
summary(addhealth$suicideevent)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00000 0.00000 0.00000 0.04826 0.00000 1.00000 106
Age when had Suicide thoughts
addhealth$suicideevent.age<-ifelse(addhealth$suicideevent==1, (addhealth$age+addhealth$agewv4)/2,
addhealth$agewv4)
summary((addhealth$age+addhealth$agewv4)/2)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 18.00 21.50 23.00 22.82 24.50 27.00 1287
summary(addhealth$suicideevent.age)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 18.5 28.0 29.0 29.0 31.0 35.0 143
class(addhealth$agewv4)
## [1] "numeric"
table(addhealth$H1FS6)
##
## 0 1 2 3 6 8
## 7769 4790 1379 593 7 12
addhealth$depressed<-Recode(addhealth$H1FS6, recodes="0=0; 1:3=1; 6=NA; 8=NA")
table(addhealth$depressed)
##
## 0 1
## 7769 6762
library(survival)
library(ggplot2)
head(addhealth[,c("suicideevent.age", "suicideevent")], n=20)
## suicideevent.age suicideevent
## 1 29 0
## 2 29 0
## 3 26 0
## 4 26 0
## 5 26 0
## 6 27 0
## 7 27 0
## 8 29 0
## 9 29 0
## 10 29 0
## 11 26 0
## 12 26 0
## 13 26 0
## 14 31 0
## 15 31 0
## 16 27 0
## 17 27 0
## 18 27 0
## 19 27 0
## 20 27 0
survivaltime<-survfit(Surv(time = suicideevent.age,event = suicideevent)~1,data=addhealth)
summary(survivaltime)
## Call: survfit(formula = Surv(time = suicideevent.age, event = suicideevent) ~
## 1, data = addhealth)
##
## 143 observations deleted due to missingness
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 18.5 14407 3 1.000 0.000120 1.000 1.000
## 19.0 14404 4 1.000 0.000184 0.999 1.000
## 19.5 14400 27 0.998 0.000404 0.997 0.998
## 20.0 14373 22 0.996 0.000518 0.995 0.997
## 20.5 14351 70 0.991 0.000776 0.990 0.993
## 21.0 14281 16 0.990 0.000823 0.989 0.992
## 21.5 14265 92 0.984 0.001053 0.982 0.986
## 22.0 14173 38 0.981 0.001134 0.979 0.983
## 22.5 14135 76 0.976 0.001279 0.973 0.978
## 23.0 14059 34 0.973 0.001339 0.971 0.976
## 23.5 14025 99 0.967 0.001497 0.964 0.970
## 24.0 13926 26 0.965 0.001535 0.962 0.968
## 24.5 13896 85 0.959 0.001654 0.956 0.962
## 25.0 13811 25 0.957 0.001687 0.954 0.960
## 25.5 13702 30 0.955 0.001726 0.952 0.958
## 26.0 13672 5 0.955 0.001732 0.951 0.958
## 26.5 12841 8 0.954 0.001744 0.951 0.958
plot(survivaltime, ylim = c(0.95,1), xlim = c(17,30), main="Survival function for suicide thoughts")
survivaltime2<-survfit(Surv(time = age,time2 = agewv4,event = suicideevent)~1,data=addhealth)
summary(survivaltime)
## Call: survfit(formula = Surv(time = suicideevent.age, event = suicideevent) ~
## 1, data = addhealth)
##
## 143 observations deleted due to missingness
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 18.5 14407 3 1.000 0.000120 1.000 1.000
## 19.0 14404 4 1.000 0.000184 0.999 1.000
## 19.5 14400 27 0.998 0.000404 0.997 0.998
## 20.0 14373 22 0.996 0.000518 0.995 0.997
## 20.5 14351 70 0.991 0.000776 0.990 0.993
## 21.0 14281 16 0.990 0.000823 0.989 0.992
## 21.5 14265 92 0.984 0.001053 0.982 0.986
## 22.0 14173 38 0.981 0.001134 0.979 0.983
## 22.5 14135 76 0.976 0.001279 0.973 0.978
## 23.0 14059 34 0.973 0.001339 0.971 0.976
## 23.5 14025 99 0.967 0.001497 0.964 0.970
## 24.0 13926 26 0.965 0.001535 0.962 0.968
## 24.5 13896 85 0.959 0.001654 0.956 0.962
## 25.0 13811 25 0.957 0.001687 0.954 0.960
## 25.5 13702 30 0.955 0.001726 0.952 0.958
## 26.0 13672 5 0.955 0.001732 0.951 0.958
## 26.5 12841 8 0.954 0.001744 0.951 0.958
plot(survivaltime2, ylim = c(0.95,1), xlim = c(17,30), main="Survival function for suicide thoughts")
library(muhaz)
summary(addhealth$suicideevent.age)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 18.5 28.0 29.0 29.0 31.0 35.0 143
addhealth2<-addhealth%>%
filter(is.na(suicideevent.age)==F)
summary(addhealth2$suicideevent.age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.5 28.0 29.0 29.0 31.0 35.0
haz <- kphaz.fit(time = addhealth2$suicideevent.age, status = addhealth2$suicideevent, method = "product-limit")
kphaz.plot(haz, main="Hazard function plot")
data.frame(haz)
## time haz var
## 1 18.75 0.0005554784 7.713907e-08
## 2 19.25 0.0037535200 5.218117e-07
## 3 19.75 0.0030636408 4.266317e-07
## 4 20.25 0.0097792874 1.366209e-06
## 5 20.75 0.0022419956 3.141591e-07
## 6 21.25 0.0129404770 1.820180e-06
## 7 21.75 0.0053695101 7.587278e-07
## 8 22.25 0.0107824621 1.529760e-06
## 9 22.75 0.0048426174 6.897340e-07
## 10 23.25 0.0141677098 2.027524e-06
## 11 23.75 0.0037375956 5.372933e-07
## 12 24.25 0.0122713057 1.771593e-06
## 13 24.75 0.0036377865 5.293426e-07
## 14 25.25 0.0043837235 6.405680e-07
## 15 25.75 0.0007632837 1.165223e-07
## 16 26.25 0.0012463972 1.941882e-07
library(survival)
library(broom)
library(ggplot2)
library(ggpubr)
library(survminer)
ggsurvplot(survivaltime,data = addhealth2,risk.table = T,fun="cumhaz",title="Cumulative Hazard function for suicide thought",xlim=c(17,30))
fit.kaplan<-survfit(Surv(suicideevent.age,suicideevent)~1,data=addhealth2)
summary(fit.kaplan)
## Call: survfit(formula = Surv(suicideevent.age, suicideevent) ~ 1, data = addhealth2)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 18.5 14407 3 1.000 0.000120 1.000 1.000
## 19.0 14404 4 1.000 0.000184 0.999 1.000
## 19.5 14400 27 0.998 0.000404 0.997 0.998
## 20.0 14373 22 0.996 0.000518 0.995 0.997
## 20.5 14351 70 0.991 0.000776 0.990 0.993
## 21.0 14281 16 0.990 0.000823 0.989 0.992
## 21.5 14265 92 0.984 0.001053 0.982 0.986
## 22.0 14173 38 0.981 0.001134 0.979 0.983
## 22.5 14135 76 0.976 0.001279 0.973 0.978
## 23.0 14059 34 0.973 0.001339 0.971 0.976
## 23.5 14025 99 0.967 0.001497 0.964 0.970
## 24.0 13926 26 0.965 0.001535 0.962 0.968
## 24.5 13896 85 0.959 0.001654 0.956 0.962
## 25.0 13811 25 0.957 0.001687 0.954 0.960
## 25.5 13702 30 0.955 0.001726 0.952 0.958
## 26.0 13672 5 0.955 0.001732 0.951 0.958
## 26.5 12841 8 0.954 0.001744 0.951 0.958
Those who are depressed.
table(addhealth$H1FS6)
##
## 0 1 2 3 6 8
## 7769 4790 1379 593 7 12
The younger an individual is the more likely they will have suicide thoughts. Another option, younger individuals who are depressed are more likely to suffer from suicidal ideation.
fit.kaplan.2<-survfit(Surv(suicideevent.age,suicideevent)~depressed,data=addhealth2)
summary(fit.kaplan.2)
## Call: survfit(formula = Surv(suicideevent.age, suicideevent) ~ depressed,
## data = addhealth2)
##
## 19 observations deleted due to missingness
## depressed=0
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 19.0 7694 4 0.999 0.000260 0.999 1.000
## 19.5 7690 14 0.998 0.000551 0.997 0.999
## 20.0 7676 12 0.996 0.000710 0.995 0.997
## 20.5 7664 28 0.992 0.000986 0.991 0.994
## 21.0 7636 10 0.991 0.001067 0.989 0.993
## 21.5 7626 53 0.984 0.001418 0.981 0.987
## 22.0 7573 29 0.981 0.001576 0.977 0.984
## 22.5 7544 52 0.974 0.001823 0.970 0.977
## 23.0 7492 13 0.972 0.001879 0.968 0.976
## 23.5 7479 51 0.965 0.002083 0.961 0.970
## 24.0 7428 16 0.963 0.002142 0.959 0.968
## 24.5 7408 39 0.958 0.002280 0.954 0.963
## 25.0 7369 11 0.957 0.002317 0.952 0.961
## 25.5 7318 8 0.956 0.002344 0.951 0.960
## 26.0 7310 3 0.955 0.002353 0.951 0.960
##
## depressed=1
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 18.5 6694 3 1.000 0.000259 0.999 1.000
## 19.5 6691 13 0.998 0.000597 0.996 0.999
## 20.0 6678 10 0.996 0.000760 0.995 0.998
## 20.5 6668 42 0.990 0.001226 0.987 0.992
## 21.0 6626 6 0.989 0.001278 0.986 0.991
## 21.5 6620 39 0.983 0.001575 0.980 0.986
## 22.0 6581 9 0.982 0.001635 0.979 0.985
## 22.5 6572 24 0.978 0.001785 0.975 0.982
## 23.0 6548 21 0.975 0.001906 0.971 0.979
## 23.5 6527 48 0.968 0.002155 0.964 0.972
## 24.0 6479 10 0.966 0.002203 0.962 0.971
## 24.5 6469 46 0.960 0.002409 0.955 0.964
## 25.0 6423 14 0.957 0.002468 0.953 0.962
## 25.5 6365 22 0.954 0.002558 0.949 0.959
## 26.0 6343 2 0.954 0.002566 0.949 0.959
## 26.5 6039 8 0.953 0.002601 0.947 0.958
ggsurvplot(fit.kaplan.2,conf.int = T,risk.table = F,title="Survivorship Function for Suicide Thought",xlab="Wave of Survey",ylim=c(0.95,1),xlim=c(17,35))
```