library(haven)
df1<- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0001/da27021p1.dta")
df1w <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0025/da27021p25.dta")
wv1 <- merge(df1, df1w, by="aid")
library (car)
## Loading required package: carData
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(questionr)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
wave4a <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0012/da27021p12.dta")
wave4wgt <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0028/da27021p28.dta")
wave4b <- read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0017/da27021p17.dta")
wave4 <- merge(wave4a, wave4b, by="aid")
wave4totalwgt <- merge(wave4, wave4wgt, by="aid")
addhealth <-merge(wave4totalwgt, wv1, by="aid")

Complete Case

addhealth<-addhealth%>%
  select(aid,iyear,BIO_SEX,H4ID5H,iyear,imonth,iday,H1FV6,H4DS18,H4SE1,H4SE2,IMONTH4,IYEAR4,H1SU1,H4OD1M, H4OD1Y, H1FS6, H1GI1M, H1GI1Y)
addhealth<-addhealth%>%
 filter(complete.cases(.))
table(is.na(addhealth))
## 
##  FALSE 
## 261900

Suicidal Ideation

addhealth$suicidewave1<-recode(addhealth$H1SU1, recodes="0=0; 1=1; 6=NA; 8=NA; 9=NA")
## Warning in recode.numeric(addhealth$H1SU1, recodes = "0=0; 1=1; 6=NA; 8=NA;
## 9=NA"): NAs introduced by coercion
## Warning: Unreplaced values treated as NA as .x is not compatible. Please specify
## replacements exhaustively or supply .default
  is.numeric(addhealth$suicidewave1)                        
## [1] FALSE
table(addhealth$suicidewave1)
## < table of extent 0 >
addhealth$suicidewave4 <-Recode(addhealth$H4SE1, recodes="0=0; 1=1; else=NA")
table(addhealth$suicidewave4)
## 
##     0     1 
## 13394  1044
is.numeric(addhealth$suicidewave4)
## [1] TRUE

Age for Wave 1 and Wave 4

addhealth$birthmonthw1 <- factor(ifelse(addhealth$H1GI1M=="1",01, 
                                            ifelse(addhealth$H1GI1M=="2", 02, 
                                            ifelse(addhealth$H1GI1M=="3", 03,
                                                   ifelse(addhealth$H1GI1M=="4", 04,
                                                          ifelse(addhealth$H1GI1M=="5", 05,
                                                                 ifelse(addhealth$H1GI1M=="6", 06,
                                                                        ifelse(addhealth$H1GI1M=="7", 07,
                                                                               ifelse(addhealth$H1GI1M=="8", 08,
                                                                                      ifelse(addhealth$H1GI1M=="9r", 09,
                                                                                             ifelse(addhealth$H1GI1M=="10", 10,
                                                                                                    ifelse(addhealth$H1GI1M=="11", 11,
                                                                                                           ifelse(addhealth$H1GI1M=="12",12,
                                                                                                                  ifelse(addhealth$H1GI1M=="96", 6,NA))))))))))))))
addhealth$birthyearw1 <- factor(ifelse(addhealth$H1GI1Y=="74",1974,
                                             ifelse(addhealth$H1GI1Y=="75",1975,
                                                           ifelse(addhealth$H1GI1Y=="76",1976,
                                                                         ifelse(addhealth$H1GI1Y=="77",1977,
                                                                                       ifelse(addhealth$H1GI1Y=="78",1978,
                                                                                                     ifelse(addhealth$H1GI1Y=="79",1979,
                                                                                                                   ifelse(addhealth$H1GI1Y=="80",1980,
                                                                                                                                 ifelse(addhealth$H1GI1Y=="81",1981,
                                                                                                                                               ifelse(addhealth$H1GI1Y=="82",1982,
                                                                                                                                                             ifelse(addhealth$H1GI1Y=="83",1983,1979)))))))))))
addhealth$birthdatew1 <- as.yearmon(paste(addhealth$birthyearw1, addhealth$birthmonthw1), "%Y %m")
addhealth$iyearfix <- Recode(addhealth$iyear,recodes="'95'='1995'") 
addhealth$interviewdatew1 <- as.yearmon(paste(addhealth$iyearfix, addhealth$imonth), "%Y %m")
addhealth$age3<-(as.numeric(round(addhealth$interviewdatew1 - addhealth$birthdatew1)))
addhealth$age<-Recode(addhealth$age3, recode="12=12; 13=13; 14=14; 15=15; 16=16; 17=17; 18=18; 19=19; 20=20; else=NA")
table(addhealth$age)
## 
##   12   13   14   15   16   17   18   19   20 
##   44  676 1439 1764 2794 2482 3046  909  109

Wave4 Age

addhealth$month<- Recode(addhealth$H4OD1M,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$year <- Recode(addhealth$H4OD1Y,recodes="1974=1974;1975=1975;1976=1976;1977=1977;1978=1978;1979=1979;1980=1980;1981=1981;1982=1982;1983=1983")
addhealth$date <- as.yearmon(paste(addhealth$year, addhealth$month), "%Y %m")
addhealth$intmon <- Recode(addhealth$IMONTH4,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")

addhealth$intyear <- Recode(addhealth$IYEAR4,recodes="2007=2007;2008=2008;2009=2009")

addhealth$intdate<-as.yearmon(paste(addhealth$intyear,addhealth$intmon),"%Y %m")
addhealth$agewv4<-(as.numeric(round(addhealth$intdate - addhealth$date )))
table(addhealth$agewv4)
## 
##   24   25   26   27   28   29   30   31   32   33   34   35 
##    4   91  881 1416 2392 2590 3255 2662 1080  154   24    1
table(addhealth$age)
## 
##   12   13   14   15   16   17   18   19   20 
##   44  676 1439 1764 2794 2482 3046  909  109

variables for having no suicide thoughts and then having suicide thoughts

addhealth$yessuicidethoughtW4<-Recode(addhealth$H4SE1,recodes="0='no thoughts';1='yes thoughts';else=NA")
addhealth$nosuicidethoughtW1<-Recode(addhealth$H1SU1,recodes="0='no thoughts';1='yes thoughts';else=NA")

1. Event Variable

variables for having a no suicide thoughts to having suicide thoughts

addhealth$suicideevent<-ifelse(addhealth$nosuicidethoughtW1=='no thoughts' & addhealth$yessuicidethoughtW4=='yes thoughts', 1, 0) 
table((addhealth$suicideevent))
## 
##     0     1 
## 13747   697
summary(addhealth$suicideevent)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.00000 0.00000 0.00000 0.04826 0.00000 1.00000     106

2. Duration

Age when had Suicide thoughts

addhealth$suicideevent.age<-ifelse(addhealth$suicideevent==1, (addhealth$age+addhealth$agewv4)/2,
                            addhealth$agewv4)
summary((addhealth$age+addhealth$agewv4)/2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   18.00   21.50   23.00   22.82   24.50   27.00    1287
summary(addhealth$suicideevent.age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    18.5    28.0    29.0    29.0    31.0    35.0     143
class(addhealth$agewv4)
## [1] "numeric"

3. Censoring Indicator

table(addhealth$H1FS6)
## 
##    0    1    2    3    6    8 
## 7769 4790 1379  593    7   12
addhealth$depressed<-Recode(addhealth$H1FS6, recodes="0=0; 1:3=1; 6=NA; 8=NA")
table(addhealth$depressed)
## 
##    0    1 
## 7769 6762

4. Estimate the following functions and plot them

A. Survival

library(survival)
library(ggplot2)
head(addhealth[,c("suicideevent.age", "suicideevent")], n=20)
##    suicideevent.age suicideevent
## 1                29            0
## 2                29            0
## 3                26            0
## 4                26            0
## 5                26            0
## 6                27            0
## 7                27            0
## 8                29            0
## 9                29            0
## 10               29            0
## 11               26            0
## 12               26            0
## 13               26            0
## 14               31            0
## 15               31            0
## 16               27            0
## 17               27            0
## 18               27            0
## 19               27            0
## 20               27            0
survivaltime<-survfit(Surv(time = suicideevent.age,event = suicideevent)~1,data=addhealth)
summary(survivaltime)
## Call: survfit(formula = Surv(time = suicideevent.age, event = suicideevent) ~ 
##     1, data = addhealth)
## 
## 143 observations deleted due to missingness 
##  time n.risk n.event survival  std.err lower 95% CI upper 95% CI
##  18.5  14407       3    1.000 0.000120        1.000        1.000
##  19.0  14404       4    1.000 0.000184        0.999        1.000
##  19.5  14400      27    0.998 0.000404        0.997        0.998
##  20.0  14373      22    0.996 0.000518        0.995        0.997
##  20.5  14351      70    0.991 0.000776        0.990        0.993
##  21.0  14281      16    0.990 0.000823        0.989        0.992
##  21.5  14265      92    0.984 0.001053        0.982        0.986
##  22.0  14173      38    0.981 0.001134        0.979        0.983
##  22.5  14135      76    0.976 0.001279        0.973        0.978
##  23.0  14059      34    0.973 0.001339        0.971        0.976
##  23.5  14025      99    0.967 0.001497        0.964        0.970
##  24.0  13926      26    0.965 0.001535        0.962        0.968
##  24.5  13896      85    0.959 0.001654        0.956        0.962
##  25.0  13811      25    0.957 0.001687        0.954        0.960
##  25.5  13702      30    0.955 0.001726        0.952        0.958
##  26.0  13672       5    0.955 0.001732        0.951        0.958
##  26.5  12841       8    0.954 0.001744        0.951        0.958
plot(survivaltime, ylim = c(0.95,1), xlim = c(17,30), main="Survival function for suicide thoughts")

survivaltime2<-survfit(Surv(time = age,time2 = agewv4,event = suicideevent)~1,data=addhealth)
summary(survivaltime)
## Call: survfit(formula = Surv(time = suicideevent.age, event = suicideevent) ~ 
##     1, data = addhealth)
## 
## 143 observations deleted due to missingness 
##  time n.risk n.event survival  std.err lower 95% CI upper 95% CI
##  18.5  14407       3    1.000 0.000120        1.000        1.000
##  19.0  14404       4    1.000 0.000184        0.999        1.000
##  19.5  14400      27    0.998 0.000404        0.997        0.998
##  20.0  14373      22    0.996 0.000518        0.995        0.997
##  20.5  14351      70    0.991 0.000776        0.990        0.993
##  21.0  14281      16    0.990 0.000823        0.989        0.992
##  21.5  14265      92    0.984 0.001053        0.982        0.986
##  22.0  14173      38    0.981 0.001134        0.979        0.983
##  22.5  14135      76    0.976 0.001279        0.973        0.978
##  23.0  14059      34    0.973 0.001339        0.971        0.976
##  23.5  14025      99    0.967 0.001497        0.964        0.970
##  24.0  13926      26    0.965 0.001535        0.962        0.968
##  24.5  13896      85    0.959 0.001654        0.956        0.962
##  25.0  13811      25    0.957 0.001687        0.954        0.960
##  25.5  13702      30    0.955 0.001726        0.952        0.958
##  26.0  13672       5    0.955 0.001732        0.951        0.958
##  26.5  12841       8    0.954 0.001744        0.951        0.958
plot(survivaltime2, ylim = c(0.95,1), xlim = c(17,30), main="Survival function for suicide thoughts")

b. Hazard-

library(muhaz)
summary(addhealth$suicideevent.age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    18.5    28.0    29.0    29.0    31.0    35.0     143
addhealth2<-addhealth%>%
filter(is.na(suicideevent.age)==F)
summary(addhealth2$suicideevent.age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    18.5    28.0    29.0    29.0    31.0    35.0
haz <- kphaz.fit(time = addhealth2$suicideevent.age, status = addhealth2$suicideevent, method = "product-limit")
kphaz.plot(haz, main="Hazard function plot")

data.frame(haz)
##     time          haz          var
## 1  18.75 0.0005554784 7.713907e-08
## 2  19.25 0.0037535200 5.218117e-07
## 3  19.75 0.0030636408 4.266317e-07
## 4  20.25 0.0097792874 1.366209e-06
## 5  20.75 0.0022419956 3.141591e-07
## 6  21.25 0.0129404770 1.820180e-06
## 7  21.75 0.0053695101 7.587278e-07
## 8  22.25 0.0107824621 1.529760e-06
## 9  22.75 0.0048426174 6.897340e-07
## 10 23.25 0.0141677098 2.027524e-06
## 11 23.75 0.0037375956 5.372933e-07
## 12 24.25 0.0122713057 1.771593e-06
## 13 24.75 0.0036377865 5.293426e-07
## 14 25.25 0.0043837235 6.405680e-07
## 15 25.75 0.0007632837 1.165223e-07
## 16 26.25 0.0012463972 1.941882e-07
library(survival)
library(broom)
library(ggplot2)
library(ggpubr)
library(survminer)

Cumulative Hazard

ggsurvplot(survivaltime,data = addhealth2,risk.table = T,fun="cumhaz",title="Cumulative Hazard function for suicide thought",xlim=c(17,30))

5. Analysis

A. Kaplan Meier

fit.kaplan<-survfit(Surv(suicideevent.age,suicideevent)~1,data=addhealth2)
summary(fit.kaplan)
## Call: survfit(formula = Surv(suicideevent.age, suicideevent) ~ 1, data = addhealth2)
## 
##  time n.risk n.event survival  std.err lower 95% CI upper 95% CI
##  18.5  14407       3    1.000 0.000120        1.000        1.000
##  19.0  14404       4    1.000 0.000184        0.999        1.000
##  19.5  14400      27    0.998 0.000404        0.997        0.998
##  20.0  14373      22    0.996 0.000518        0.995        0.997
##  20.5  14351      70    0.991 0.000776        0.990        0.993
##  21.0  14281      16    0.990 0.000823        0.989        0.992
##  21.5  14265      92    0.984 0.001053        0.982        0.986
##  22.0  14173      38    0.981 0.001134        0.979        0.983
##  22.5  14135      76    0.976 0.001279        0.973        0.978
##  23.0  14059      34    0.973 0.001339        0.971        0.976
##  23.5  14025      99    0.967 0.001497        0.964        0.970
##  24.0  13926      26    0.965 0.001535        0.962        0.968
##  24.5  13896      85    0.959 0.001654        0.956        0.962
##  25.0  13811      25    0.957 0.001687        0.954        0.960
##  25.5  13702      30    0.955 0.001726        0.952        0.958
##  26.0  13672       5    0.955 0.001732        0.951        0.958
##  26.5  12841       8    0.954 0.001744        0.951        0.958

B. Define a grouping variable

Those who are depressed.

table(addhealth$H1FS6)
## 
##    0    1    2    3    6    8 
## 7769 4790 1379  593    7   12

C. Hypothesis

The younger an individual is the more likely they will have suicide thoughts. Another option, younger individuals who are depressed are more likely to suffer from suicidal ideation.

D. Comparison of Kaplan-Meier survival across grouping variable in your data. Interpret your results.

fit.kaplan.2<-survfit(Surv(suicideevent.age,suicideevent)~depressed,data=addhealth2)
summary(fit.kaplan.2)
## Call: survfit(formula = Surv(suicideevent.age, suicideevent) ~ depressed, 
##     data = addhealth2)
## 
## 19 observations deleted due to missingness 
##                 depressed=0 
##  time n.risk n.event survival  std.err lower 95% CI upper 95% CI
##  19.0   7694       4    0.999 0.000260        0.999        1.000
##  19.5   7690      14    0.998 0.000551        0.997        0.999
##  20.0   7676      12    0.996 0.000710        0.995        0.997
##  20.5   7664      28    0.992 0.000986        0.991        0.994
##  21.0   7636      10    0.991 0.001067        0.989        0.993
##  21.5   7626      53    0.984 0.001418        0.981        0.987
##  22.0   7573      29    0.981 0.001576        0.977        0.984
##  22.5   7544      52    0.974 0.001823        0.970        0.977
##  23.0   7492      13    0.972 0.001879        0.968        0.976
##  23.5   7479      51    0.965 0.002083        0.961        0.970
##  24.0   7428      16    0.963 0.002142        0.959        0.968
##  24.5   7408      39    0.958 0.002280        0.954        0.963
##  25.0   7369      11    0.957 0.002317        0.952        0.961
##  25.5   7318       8    0.956 0.002344        0.951        0.960
##  26.0   7310       3    0.955 0.002353        0.951        0.960
## 
##                 depressed=1 
##  time n.risk n.event survival  std.err lower 95% CI upper 95% CI
##  18.5   6694       3    1.000 0.000259        0.999        1.000
##  19.5   6691      13    0.998 0.000597        0.996        0.999
##  20.0   6678      10    0.996 0.000760        0.995        0.998
##  20.5   6668      42    0.990 0.001226        0.987        0.992
##  21.0   6626       6    0.989 0.001278        0.986        0.991
##  21.5   6620      39    0.983 0.001575        0.980        0.986
##  22.0   6581       9    0.982 0.001635        0.979        0.985
##  22.5   6572      24    0.978 0.001785        0.975        0.982
##  23.0   6548      21    0.975 0.001906        0.971        0.979
##  23.5   6527      48    0.968 0.002155        0.964        0.972
##  24.0   6479      10    0.966 0.002203        0.962        0.971
##  24.5   6469      46    0.960 0.002409        0.955        0.964
##  25.0   6423      14    0.957 0.002468        0.953        0.962
##  25.5   6365      22    0.954 0.002558        0.949        0.959
##  26.0   6343       2    0.954 0.002566        0.949        0.959
##  26.5   6039       8    0.953 0.002601        0.947        0.958

E. Plot the hazard function for the analysis for each level of the group variable

ggsurvplot(fit.kaplan.2,conf.int = T,risk.table = F,title="Survivorship Function for Suicide Thought",xlab="Wave of Survey",ylim=c(0.95,1),xlim=c(17,35))

```