library(haven)
library(survival)
library(car)
## Loading required package: carData
library(muhaz)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(survival)
library(forcats)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ readr 2.1.2
## ✔ tibble 3.1.8 ✔ purrr 0.3.4
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
library(ggsurvfit)
library(gtsummary)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:graphics':
##
## dotchart
library(rio)
library(haven)
library(eha)
X21600_0008_Data <- read_sav("21600-0008-Data.sav")
#View(X21600_0008_Data)
X21600_0022_Data <- read_sav("21600-0022-Data.sav")
#View(X21600_0022_Data)
X21600_0001_Data <- read_sav("21600-0001-Data.sav")
#View(X21600_0001_Data)
mergeddata<-dplyr::left_join( X21600_0008_Data, X21600_0001_Data, by=c("AID"= "AID"))
mergeddata2<-dplyr::left_join(mergeddata, X21600_0022_Data, by=c("AID"= "AID"))
rm(X21600_0008_Data,X21600_0022_Data,X21600_0001_Data,mergeddata);gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 2471041 132.0 4397940 234.9 4397940 234.9
## Vcells 31226673 238.3 89585662 683.5 89582238 683.5
##thoughts of suicide wave 1
mergeddata2$sui_1<-Recode(mergeddata2$H1SU1, recodes="1=1; 0=0; else=NA")
## age, if friend attempted suicide, if family attempted, if live alone, education, thoughts of suicide, sex, race, closeness to mentor, and thoughts on likelihood of divorce at wave 3
mergeddata2$age_3<-(mergeddata2$IYEAR3 - mergeddata2$H3OD1Y)
mergeddata2$fri_sui3<-Recode(mergeddata2$H3TO133, recodes="1=1; 0=0; else=NA")
mergeddata2$fam_sui3 <-Recode(mergeddata2$H3TO135, recodes="1=1; 0=0; else=NA")
mergeddata2$live_03<-Recode(mergeddata2$H3HR5, recodes="1=1; 2=0; else=NA")
mergeddata2$H3ED1 <- as.numeric(mergeddata2$H3ED1)
mergeddata2$educ_re3<-Recode(mergeddata2$H3ED1, recodes= "6:11='lesshigh';
12='high'; 13:15='somecol'; 16:22='col'; else=NA", as.factor=T)
mergeddata2$sui_3<-Recode(mergeddata2$H3TO130, recodes="1=1; 0=0; else=NA")
mergeddata2$male_3<-Recode(mergeddata2$BIO_SEX3, recodes="1=1; 2=0; else=NA")
mergeddata2$H3IR4 <- as.numeric(mergeddata2$H3IR4)
mergeddata2$race_3<-Recode(mergeddata2$H3IR4, recodes= "1='nwhite';
2='nhblack'; 3:5='other'; else=NA", as.factor=T)
mergeddata2$H3MN11 <- as.numeric(mergeddata2$H3MN11)
mergeddata2$clo_ment3<-Recode(mergeddata2$H3MN11, recodes= "0='no';
1:2='somewhat'; 3:4='very'; else=NA", as.factor=T)
mergeddata2$H3EC57 <- as.numeric(mergeddata2$H3EC57)
mergeddata2$div_poss3<-Recode(mergeddata2$H3EC57, recodes= "1='will';
2:3='morelikely'; 4:5='notlikely'; 6='already'; else=NA", as.factor=T)
## thoughts of suicide at wave four
mergeddata2$sui_4<-Recode(mergeddata2$H4SE1, recodes="1=1; 0=0; else=NA")
##race and divorce not sig may take out
myvars<-c( "AID", "age_3", "clo_ment3", "male_3", "fri_sui3", "educ_re3", "div_poss3",
"race_3", "fam_sui3", "sui_1", "sui_3", "sui_4")
mergeddata2<-mergeddata2[,myvars]
sam <- mergeddata2 %>%
filter(complete.cases(.))
sam<-sam %>%
mutate(suitran_1 =ifelse(sui_1==0 & sui_3 ==0, 0,1),
sui_tran2 =ifelse(suitran_1==1, NA,
ifelse(sui_3==0 & sui_4==0,0,1)))
#suicide thought transition based on self reported education at wave 3
fit<-survfit(formula=Surv(time=age_3, event =suitran_1)~educ_re3, data=sam)
summary(fit)
## Call: survfit(formula = Surv(time = age_3, event = suitran_1) ~ educ_re3,
## data = sam)
##
## educ_re3=col
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 20 503 1 0.998 0.00199 0.994 1.000
## 21 500 1 0.996 0.00281 0.991 1.000
## 22 492 12 0.972 0.00745 0.957 0.986
## 23 397 21 0.920 0.01300 0.895 0.946
## 24 239 17 0.855 0.01949 0.817 0.894
## 25 90 10 0.760 0.03320 0.698 0.828
## 26 12 3 0.570 0.09819 0.407 0.799
## 27 1 1 0.000 NaN NA NA
##
## educ_re3=high
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 19 811 12 0.985 0.00424 0.9769 0.994
## 20 739 14 0.967 0.00646 0.9540 0.979
## 21 635 33 0.916 0.01049 0.8960 0.937
## 22 491 32 0.857 0.01415 0.8293 0.885
## 23 335 32 0.775 0.01879 0.7388 0.812
## 24 217 21 0.700 0.02302 0.6561 0.746
## 25 101 6 0.658 0.02720 0.6070 0.714
## 26 34 4 0.581 0.04357 0.5014 0.673
## 27 2 1 0.290 0.20649 0.0721 1.000
##
## educ_re3=lesshigh
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 19 293 6 0.980 0.00827 0.963 0.996
## 20 270 11 0.940 0.01421 0.912 0.968
## 21 223 11 0.893 0.01919 0.856 0.932
## 22 163 13 0.822 0.02590 0.773 0.874
## 23 110 11 0.740 0.03311 0.678 0.808
## 24 61 6 0.667 0.04107 0.591 0.753
## 25 27 4 0.568 0.05748 0.466 0.693
## 26 6 1 0.474 0.09884 0.315 0.713
##
## educ_re3=somecol
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 19 1280 24 0.981 0.00379 0.974 0.989
## 20 1133 45 0.942 0.00676 0.929 0.956
## 21 888 42 0.898 0.00930 0.880 0.916
## 22 601 34 0.847 0.01219 0.823 0.871
## 23 406 50 0.743 0.01746 0.709 0.778
## 24 244 27 0.660 0.02153 0.620 0.704
## 25 86 17 0.530 0.03321 0.469 0.599
## 26 15 1 0.495 0.04610 0.412 0.594
fit%>%
ggsurvfit(conf.int = T,
risk.table = F,
title = "survivorship Function for Suicide Transition",
xlab = "Wave of survey")+
xlim(20,26)
## Warning: Ignoring unknown parameters: conf.int, risk.table, title, xlab
## Warning: Removed 14 row(s) containing missing values (geom_path).

##exponential
fit1<-phreg(Surv(time =age_3, event = suitran_1)~male_3+race_3+educ_re3+div_poss3+fam_sui3+fri_sui3+clo_ment3, data=sam, dist="weibull",
shape=1)
summary(fit1)
## Covariate Mean Coef Rel.Risk S.E. LR p
## male_3 0.432 -0.342 0.710 0.093 0.0002
## race_3 0.3907
## nhblack 0.226 0 1 (reference)
## nwhite 0.726 0.143 1.154 0.111
## other 0.047 0.199 1.220 0.220
## educ_re3 0.0061
## col 0.185 0 1 (reference)
## high 0.281 0.434 1.543 0.149
## lesshigh 0.101 0.500 1.649 0.180
## somecol 0.433 0.432 1.541 0.140
## div_poss3 0.2483
## already 0.013 0 1 (reference)
## morelikely 0.098 -0.168 0.845 0.343
## notlikely 0.876 -0.380 0.684 0.322
## will 0.013 -0.055 0.947 0.432
## fam_sui3 0.034 0.576 1.780 0.179 0.0026
## fri_sui3 0.073 0.467 1.595 0.138 0.0013
## clo_ment3 0.0027
## no 0.106 0 1 (reference)
## somewhat 0.315 -0.486 0.615 0.141
## very 0.579 -0.406 0.666 0.128
##
## Events 524
## Total time at risk 63985
## Max. log. likelihood -3004.7
## LR test statistic 74.25
## Degrees of freedom 13
## Overall p-value 1.31407e-10
##empirical (cox)
fitle<-coxreg(Surv(time =age_3, event = suitran_1)~male_3+race_3+educ_re3+div_poss3+fam_sui3+fri_sui3+clo_ment3, data=sam)
check.dist(fitle, fit1, main = "Exponential")

##weibull
fit2<-phreg(Surv(time =age_3, event = suitran_1)~male_3+race_3+educ_re3+div_poss3+fam_sui3+fri_sui3+clo_ment3, data=sam, dist="weibull")
summary(fit2)
## Covariate Mean Coef Rel.Risk S.E. LR p
## male_3 0.432 -0.467 0.627 0.093 0.0000
## race_3 0.3960
## nhblack 0.226 0 1 (reference)
## nwhite 0.726 0.143 1.154 0.111
## other 0.047 0.190 1.209 0.220
## educ_re3 0.0000
## col 0.185 0 1 (reference)
## high 0.281 0.840 2.316 0.149
## lesshigh 0.101 1.052 2.865 0.182
## somecol 0.433 1.067 2.905 0.140
## div_poss3 0.2849
## already 0.013 0 1 (reference)
## morelikely 0.098 0.071 1.073 0.343
## notlikely 0.876 -0.182 0.834 0.322
## will 0.013 -0.341 0.711 0.433
## fam_sui3 0.034 0.587 1.799 0.181 0.0023
## fri_sui3 0.073 0.710 2.034 0.140 0.0000
## clo_ment3 0.0100
## no 0.106 0 1 (reference)
## somewhat 0.315 -0.399 0.671 0.141
## very 0.579 -0.385 0.680 0.128
##
## Events 524
## Total time at risk 63985
## Max. log. likelihood -1900.4
## LR test statistic 154.46
## Degrees of freedom 13
## Overall p-value 0
check.dist(fitle, fit2, main = "Weibull")

##piecewise constant
fit3<-pchreg(Surv(time = age_3, event = suitran_1)~male_3+race_3+educ_re3+div_poss3+fam_sui3+fri_sui3+clo_ment3, data=sam, cuts = c(20, 22, 24,26))
summary(fit3)
## Covariate Mean Coef Rel.Risk S.E. LR p
## male_3 0.432 -0.445 0.641 0.105 0.0000
## race_3 0.1857
## nhblack 0.226 0 1 (reference)
## nwhite 0.726 0.225 1.253 0.129
## other 0.047 0.275 1.317 0.247
## educ_re3 0.0000
## col 0.185 0 1 (reference)
## high 0.281 0.727 2.069 0.156
## lesshigh 0.101 0.795 2.215 0.199
## somecol 0.433 0.817 2.265 0.148
## div_poss3 0.3218
## already 0.013 0 1 (reference)
## morelikely 0.098 -0.099 0.906 0.352
## notlikely 0.876 -0.362 0.696 0.323
## will 0.013 -0.277 0.758 0.442
## fam_sui3 0.034 0.627 1.871 0.214 0.0064
## fri_sui3 0.073 0.392 1.480 0.180 0.0368
## clo_ment3 0.0089
## no 0.106 0 1 (reference)
## somewhat 0.315 -0.497 0.608 0.159
## very 0.579 -0.373 0.689 0.143
##
## Events 524
## Total time at risk 63985
## Max. log. likelihood -1456.2
## LR test statistic 87.54
## Degrees of freedom 13
## Overall p-value 4.11671e-13
##
## Restricted mean survival: 4.919595 in (20, 26]
check.dist(fitle, fit3, main = "Piecewise Exponential")

##exponential with interactions
fit4<-phreg(Surv(time =age_3, event = suitran_1)~male_3+race_3+educ_re3+div_poss3+fam_sui3+fri_sui3+clo_ment3
+male_3*fri_sui3 + male_3*fam_sui3, data=sam, dist="weibull",
shape=1)
summary(fit4)
## Single term deletions
##
## Model:
## Surv(time = age_3, event = suitran_1) ~ male_3 + race_3 + educ_re3 +
## div_poss3 + fam_sui3 + fri_sui3 + clo_ment3 + male_3 * fri_sui3 +
## male_3 * fam_sui3
## Df AIC LRT Pr(>Chi)
## <none> 6037
## race_3 2 6035 1.94 0.3784
## educ_re3 3 6044 12.80 0.0051 **
## div_poss3 3 6035 4.28 0.2327
## clo_ment3 2 6045 11.70 0.0029 **
## male_3:fri_sui3 1 6038 2.57 0.1090
## male_3:fam_sui3 1 6035 0.38 0.5401
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Covariate Mean Coef Rel.Risk S.E. Wald p
## male_3 0.432 -0.420 0.657 0.101 0.0000
## race_3
## nhblack 0.226 0 1 (reference)
## nwhite 0.726 0.144 1.155 0.111 0.1947
## other 0.047 0.208 1.231 0.220 0.3449
## educ_re3
## col 0.185 0 1 (reference)
## high 0.281 0.443 1.557 0.149 0.0029
## lesshigh 0.101 0.506 1.659 0.180 0.0050
## somecol 0.433 0.438 1.549 0.140 0.0017
## div_poss3
## already 0.013 0 1 (reference)
## morelikely 0.098 -0.158 0.854 0.343 0.6444
## notlikely 0.876 -0.364 0.695 0.322 0.2577
## will 0.013 0.016 1.016 0.433 0.9714
## fam_sui3 0.034 0.482 1.620 0.226 0.0325
## fri_sui3 0.073 0.281 1.324 0.182 0.1235
## clo_ment3
## no 0.106 0 1 (reference)
## somewhat 0.315 -0.483 0.617 0.141 0.0006
## very 0.579 -0.404 0.667 0.128 0.0016
## male_3:fri_sui3
## : 0.459 1.582 0.284 0.1061
## male_3:fam_sui3
## : 0.231 1.260 0.375 0.5379
##
## Events 524
## Total time at risk 63985
## Max. log. likelihood -3002.5
## LR test statistic 78.45
## Degrees of freedom 15
## Overall p-value 1.33928e-10
##empirical (cox) with interactions
fitle2<-coxreg(Surv(time =age_3, event = suitran_1)~male_3+race_3+educ_re3+div_poss3+fam_sui3+fri_sui3+clo_ment3
+male_3*fri_sui3 +male_3*fam_sui3, data=sam)
check.dist(fitle2, fit4, main = "Exponential with interactions")

##weibull with interactions
fit5<-phreg(Surv(time =age_3, event=suitran_1)~male_3+race_3+educ_re3+div_poss3+fam_sui3+fri_sui3+clo_ment3
+male_3*fri_sui3 +male_3*fam_sui3, data=sam, dist="weibull")
summary(fit5)
## Single term deletions
##
## Model:
## Surv(time = age_3, event = suitran_1) ~ male_3 + race_3 + educ_re3 +
## div_poss3 + fam_sui3 + fri_sui3 + clo_ment3 + male_3 * fri_sui3 +
## male_3 * fam_sui3
## Df AIC LRT Pr(>Chi)
## <none> 3828
## race_3 2 3826 2.2 0.331
## educ_re3 3 3898 75.7 2.6e-16 ***
## div_poss3 3 3825 3.2 0.360
## clo_ment3 2 3833 8.9 0.012 *
## male_3:fri_sui3 1 3832 6.3 0.012 *
## male_3:fam_sui3 1 3829 2.8 0.097 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Covariate Mean Coef Rel.Risk S.E. Wald p
## male_3 0.432 -0.515 0.597 0.101 0.0000
## race_3
## nhblack 0.226 0 1 (reference)
## nwhite 0.726 0.156 1.168 0.111 0.1628
## other 0.047 0.213 1.237 0.220 0.3334
## educ_re3
## col 0.185 0 1 (reference)
## high 0.281 0.843 2.324 0.149 0.0000
## lesshigh 0.101 1.081 2.948 0.182 0.0000
## somecol 0.433 1.087 2.966 0.140 0.0000
## div_poss3
## already 0.013 0 1 (reference)
## morelikely 0.098 0.069 1.071 0.343 0.8416
## notlikely 0.876 -0.163 0.849 0.322 0.6119
## will 0.013 -0.315 0.730 0.435 0.4691
## fam_sui3 0.034 0.785 2.193 0.227 0.0005
## fri_sui3 0.073 0.456 1.578 0.183 0.0129
## clo_ment3
## no 0.106 0 1 (reference)
## somewhat 0.315 -0.396 0.673 0.141 0.0048
## very 0.579 -0.376 0.687 0.128 0.0034
## male_3:fri_sui3
## : 0.732 2.079 0.286 0.0106
## male_3:fam_sui3
## : -0.626 0.535 0.380 0.0997
##
## Events 524
## Total time at risk 63985
## Max. log. likelihood -1896.9
## LR test statistic 161.37
## Degrees of freedom 15
## Overall p-value 0
check.dist(fitle2, fit5, main = "Weibull with interactions")

##piecewise with interactions
fit6<-pchreg(Surv(time = age_3, event = suitran_1)~male_3+race_3+educ_re3+div_poss3+fam_sui3+fri_sui3+clo_ment3+male_3*fri_sui3 +male_3*fam_sui3, data=sam, cuts = c(20, 22, 24,26))
summary(fit6)
## Single term deletions
##
## Model:
## Surv(time = age_3, event = suitran_1) ~ male_3 + race_3 + educ_re3 +
## div_poss3 + fam_sui3 + fri_sui3 + clo_ment3 + male_3 * fri_sui3 +
## male_3 * fam_sui3
## Df AIC LRT Pr(>Chi)
## <none> 2941
## race_3 2 2941 3.6 0.1630
## educ_re3 3 2974 38.9 1.8e-08 ***
## div_poss3 3 2938 3.1 0.3696
## clo_ment3 2 2946 9.1 0.0107 *
## male_3:fri_sui3 1 2946 7.4 0.0067 **
## male_3:fam_sui3 1 2940 1.3 0.2632
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Covariate Mean Coef Rel.Risk S.E. Wald p
## male_3 0.432 -0.512 0.599 0.113 0.0000
## race_3
## nhblack 0.226 0 1 (reference)
## nwhite 0.726 0.232 1.261 0.129 0.0725
## other 0.047 0.296 1.345 0.247 0.2304
## educ_re3
## col 0.185 0 1 (reference)
## high 0.281 0.728 2.070 0.156 0.0000
## lesshigh 0.101 0.821 2.272 0.199 0.0000
## somecol 0.433 0.828 2.289 0.148 0.0000
## div_poss3
## already 0.013 0 1 (reference)
## morelikely 0.098 -0.098 0.907 0.352 0.7812
## notlikely 0.876 -0.344 0.709 0.323 0.2880
## will 0.013 -0.211 0.810 0.443 0.6342
## fam_sui3 0.034 0.775 2.171 0.271 0.0042
## fri_sui3 0.073 -0.022 0.978 0.257 0.9326
## clo_ment3
## no 0.106 0 1 (reference)
## somewhat 0.315 -0.488 0.614 0.159 0.0021
## very 0.579 -0.359 0.699 0.143 0.0120
## male_3:fri_sui3
## : 0.993 2.699 0.365 0.0065
## male_3:fam_sui3
## : -0.496 0.609 0.446 0.2663
##
## Events 524
## Total time at risk 63985
## Max. log. likelihood -1452.5
## LR test statistic 94.90
## Degrees of freedom 15
## Overall p-value 1.19682e-13
##
## Restricted mean survival: 4.93449 in (20, 26]
check.dist(fitle2, fit6, main = "Piecewise Exponential with interactions")

##AIC exponential
AIC(fit1)
## [1] 6037.3
##AIC wiebull
AIC(fit2)
## [1] 3830.753
##AIC piecewise constant
-2*fit3$loglik[2]+length(coef(fit3))
## [1] 2925.32
##AIC exponential with interactions
AIC(fit4)
## [1] 6037.097
##AIC wiebull with interactions
AIC(fit5)
## [1] 3827.846
##AIC piecewise constant with interactions
-2*fit3$loglik[2]+length(coef(fit6))
## [1] 2927.32
##Parametric models
##1) Carry out the following analysis: Define your outcome as in HW 1. Also consider what covariates are hypothesized to affect the outcome variable. Define these and construct a parametric model for your outcome. Fit the parametric model of your choosing to the data.
## The following homework made use of the variables thoughts of suicide at wave 1,2 and 3, highest level of education reached including college, some college, highschool, and less than highschool, age at time of survey, as commong demographic variables. Combing through the codebook it was decided to test for associations between covariates that might increase chances of suicidal thought including the binary variables of closeness to a mentor, if family or friend had recently attempted suicide in the past year, and if the respondent felt they would be divorced in the future in the latter case this was done via categorical variable.
##Did you choose an AFT or PH model and why?
##The following models will be based on the proportional hazards models, as similarly to logistic regression we want to see who is at greater or less risk of experiencing thoughts of suicide based on the chosen covariates. As our primary dependent variable is also a binary outcome, we assume this data will have a specific distribution and thus does not requre something similar to a linear model.
##Justify what parametric distribution you choose and Carry out model fit diagnostics for the model
## Based on the model fit statistics produced by the AIC the piecewise constant was the best fit for our model for predicting thoughts of suicide, following the wiebull, with the exponential being the least fit for our model.
##Include all main effects in the model and Provide tabular and graphical output to support your conclusions
fit3<-pchreg(Surv(time = age_3, event = suitran_1)~male_3+race_3+educ_re3+div_poss3+fam_sui3+fri_sui3+clo_ment3, data=sam, cuts = c(20, 22, 24,26))
summary(fit3)
## Covariate Mean Coef Rel.Risk S.E. LR p
## male_3 0.432 -0.445 0.641 0.105 0.0000
## race_3 0.1857
## nhblack 0.226 0 1 (reference)
## nwhite 0.726 0.225 1.253 0.129
## other 0.047 0.275 1.317 0.247
## educ_re3 0.0000
## col 0.185 0 1 (reference)
## high 0.281 0.727 2.069 0.156
## lesshigh 0.101 0.795 2.215 0.199
## somecol 0.433 0.817 2.265 0.148
## div_poss3 0.3218
## already 0.013 0 1 (reference)
## morelikely 0.098 -0.099 0.906 0.352
## notlikely 0.876 -0.362 0.696 0.323
## will 0.013 -0.277 0.758 0.442
## fam_sui3 0.034 0.627 1.871 0.214 0.0064
## fri_sui3 0.073 0.392 1.480 0.180 0.0368
## clo_ment3 0.0089
## no 0.106 0 1 (reference)
## somewhat 0.315 -0.497 0.608 0.159
## very 0.579 -0.373 0.689 0.143
##
## Events 524
## Total time at risk 63985
## Max. log. likelihood -1456.2
## LR test statistic 87.54
## Degrees of freedom 13
## Overall p-value 4.11671e-13
##
## Restricted mean survival: 4.919595 in (20, 26]
check.dist(fitle, fit3, main = "Piecewise Exponential")

exp(coef(fit3))
## male_3 race_3nwhite race_3other educ_re3high
## 0.6409949 1.2525085 1.3165892 2.0686538
## educ_re3lesshigh educ_re3somecol div_poss3morelikely div_poss3notlikely
## 2.2148815 2.2645180 0.9056136 0.6960879
## div_poss3will fam_sui3 fri_sui3 clo_ment3somewhat
## 0.7584214 1.8710617 1.4796049 0.6083156
## clo_ment3very
## 0.6887335
##Test for an interaction between at least two of the predictors
## These will be based on sex and family/friends suicide will also be fitted to another set of models
##see models 4,5,6
exp(coef(fit6))
## male_3 race_3nwhite race_3other educ_re3high
## 0.5990329 1.2608302 1.3450261 2.0702141
## educ_re3lesshigh educ_re3somecol div_poss3morelikely div_poss3notlikely
## 2.2716707 2.2894402 0.9069729 0.7092027
## div_poss3will fam_sui3 fri_sui3 clo_ment3somewhat
## 0.8100831 2.1707671 0.9784747 0.6135705
## clo_ment3very male_3:fri_sui3 male_3:fam_sui3
## 0.6985998 2.6985330 0.6091520
## The models were run again, as per fits 4,5,6 but this time included an interaction term wherein sex (1=male, female=0) was multiplied by if family/friends had attempted suicide in the previous year. Post this interaction term based models had their goodness of fit run, and checked against the prior models. It was largely striking, first once again the initial piecewise model was the best fit, second the AIC were nearly identical to the first set of models. Being within decimal points from each other which was striking. The results were largely similar to model 3. Interestingly, adding in the interaction terms actually decreased the magnitude of the association by at least .50. But, males with family or friends who attempted suicide were far more likely to have thoughts of suicide compared to those who did not, which was the opposite of the association in the base sex term. Along with this, friends who had attempted suicide, and both interaction terms were not significant associations, along with the family suicide barely being significant at all. It is likely that i will not be using these terms again. Holding all other variables in the model into account this also occurs with age increasing.
##Interpret your results and write them up
## According to the results compared to blacks, whites and those of the other racial category are at higher risk of having suicidal thoughts, with others having the highest risk of these thoughts having that risk of 1.31 times higher, whites was 1.25 times higher. Along with males actually having less of a chance compared to females. In terms of education, as college is the reference category i found it fascinating that having some college education was associated with risks upwards of 2.26 times, with just highschool having those lowest odds at 2.06 times more. It assumingly also meant that thoughts of divorce had no real affect on the model, as didn't matter if people were likely, not, or will get divorced in the future these were associated with less risk of actually thinking of suicide than being divorced. As expected if family or friends had attempted suicide in the last 12 months, these were associated with increased risk of thinking about suicide with risks being 1.87 and 1.47 times respectively. Interestingly, this association was higher for family member, there are some other family based variables so i might consider working with those. Lastly, feeling of closeness to a mentor were associated with comparable lower levels of risk of thoughts of suicide. Interestingly, race, and as previously states possible divorce in the future were not statistically significant for the model, and could possibly be removed for future assignments. With sex .001>, closeness of mentor, family and friend suicide being significantly associated with having thoughts of suicide.It is likely that i will not be using these terms again. Holding all other variables in the model into account this also occurs with age increasing.