library(haven)
#download datasets - AddHealth
wave1<-read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0001/da27021p1.dta")
wave3<-read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0003/da27021p3.dta")
wave4<-read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0012/da27021p12.dta")
addhealth.wghtW1<-read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0025/da27021p25.dta")
#addhealth<-merge(wave1,wave3,by="aid")
addhealth1<-merge(wave1,wave3,by="aid")
addhealth1<-merge(addhealth1,wave4,by="aid")
addhealth1<-merge(addhealth1,addhealth.wghtW1,by="aid")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#select variables to use
addhealth<-addhealth1%>%
select(GSWGT1,region,psuscid,BIO_SEX,H3SE13,H1GI6A,H1GI6B,H1GI4,H1GI6D,H1GI6E,H4ED2,iyear,imonth,H1GI1M,H1GI1Y,IYEAR3,IMONTH3,H3OD1Y,H3OD1M,H4OD1M,H4OD1Y,IMONTH4,IYEAR4,H4MH22,H3GH1,H4GH1,H1GI6C,H1FS6,H3SP9,H4DS18,H3DS18F,H4MH22,H4GH1,H3DS18G,H1FV6,H4LM1,H3HR2,H4OD4)
#complete cases
addhealth<-addhealth%>%
filter(complete.cases(.))
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
#UScitizen
addhealth$UScitizen<-Recode(addhealth$H4OD4,recodes="0='a_no';1='b_yes';else=NA",as.factor=T)
#residence
addhealth$w3.residence<-Recode(addhealth$H3HR2,recodes="1='a_withparents';2='b_withsomeone';3='c_myownplace';4='d_group.qrts';5='e_homeless';6='f_other';else=NA",as.factor=T)
#birth year w1
addhealth$birthyearw1 <- factor(ifelse(addhealth$H1GI1Y=="74",1974,
ifelse(addhealth$H1GI1Y=="75",1975,
ifelse(addhealth$H1GI1Y=="76",1976,
ifelse(addhealth$H1GI1Y=="77",1977,
ifelse(addhealth$H1GI1Y=="78",1978,
ifelse(addhealth$H1GI1Y=="79",1979,
ifelse(addhealth$H1GI1Y=="80",1980,
ifelse(addhealth$H1GI1Y=="81",1981,
ifelse(addhealth$H1GI1Y=="82",1982,
ifelse(addhealth$H1GI1Y=="83",1983,1979)))))))))))
#birth month w1
addhealth$birthmonthw1 <- Recode(addhealth$H1GI1M,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12;else=NA")
#combine year and month for a birth date wave 1
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
addhealth$birthdatew1 <- as.yearmon(paste(addhealth$birthyearw1, addhealth$birthmonthw1), "%Y %m")
#birth month wave 4
addhealth$birthmonthw4 <- Recode(addhealth$H4OD1M,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
#birth year wave 4
addhealth$birthyearw4<-addhealth$H4OD1Y
addhealth$birthyearw4 <- Recode(addhealth$H4OD1Y,recodes="1974=1974;1975=1975;1976=1976;1977=1977;1978=1978;1979=1979;1980=1980;1981=1981;1982=1982;1983=1983")
#combine year and month for birth date wave 4
addhealth$birthdatew4 <- as.yearmon(paste(addhealth$birthyearw4, addhealth$birthmonthw4), "%Y %m")
#birth date wave 3
addhealth$birthdatew3<-as.yearmon(paste(addhealth$H3OD1Y,addhealth$H3OD1M),"%Y %m")
#interview date w1
addhealth$iyearfix <- Recode(addhealth$iyear,recodes="'94'='1994';'95'='1995'")
addhealth$interviewdatew1 <- as.yearmon(paste(addhealth$iyearfix, addhealth$imonth), "%Y %m")
#interview date w4
addhealth$interviewmonthw4 <- Recode(addhealth$IMONTH4,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$interviewyearw4 <- Recode(addhealth$IYEAR4,recodes="2007=2007;2008=2008;2009=2009")
addhealth$interviewdatew4<-as.yearmon(paste(addhealth$interviewyearw4,addhealth$interviewmonthw4),"%Y %m")
#interview date w3
addhealth$interviewdatew3<-as.yearmon(paste(addhealth$IYEAR3,addhealth$IMONTH3 ),"%Y %m")
#age - derived from date of birth subtracted from wave interview date
addhealth$agew4<-(as.numeric(round(addhealth$interviewdatew4 - addhealth$birthdatew4)))
addhealth$agew3<-(as.numeric(round(addhealth$interviewdatew3 - addhealth$birthdatew3)))
addhealth$agew1<-(as.numeric(round(addhealth$interviewdatew1 - addhealth$birthdatew1)))
#sex variable - Male is reference group
addhealth$sex <- ifelse(addhealth$BIO_SEX==2,2,1)
addhealth$sex<-Recode(addhealth$sex, recodes="1='a_male'; 2='b_female'",as.factor=T)
#sexual orientation from wave 3. Reference group is straight people
addhealth$sexorient <- factor(ifelse(addhealth$H3SE13=="(1) 100% heterosexual (straight)",1,
ifelse(addhealth$H3SE13=="(2) Mostly #heterosexual.somewhat attracted to people of own",2,
ifelse(addhealth$H3SE13=="(3) Bisexual-attracted #to men and women equally",3,
ifelse(addhealth$H3SE13=="(4) Mostly #homosexual.somewhat attracted to opposite sex",4,
ifelse(addhealth$H3SE13=="(5) 100% #homosexual (gay)",5,NA))))))
addhealth$sexorient<-Recode(addhealth$H3SE13, recodes="1:2='a_straight'; 3='b_bisexual';4:5='c_LGB';else=NA",as.factor=T)
#race variables - Reference group is nhwhite
#nhwhite
addhealth$nhwhite <- ifelse(addhealth$H1GI6A==1,1,0)
addhealth$nhwhite<-Recode(addhealth$nhwhite, recodes="1='nhwhite'; 0='nonnhwhite'; 6:8=NA",as.factor=T)
#nhblack
addhealth$nhblack <- ifelse(addhealth$H1GI6B==1,1,0)
addhealth$nhblack<-Recode(addhealth$nhblack, recodes="1='nhblack'; 0='nonnhblack'; 6:8=NA",as.factor=T)
#Hispanic
addhealth$hispanic <- ifelse(addhealth$H1GI4==1,1,0)
addhealth$hispanic<-Recode(addhealth$hispanic, recodes="1='hispanic'; 0='nonhispanic'; 6:8=NA",as.factor=T)
#Asian
addhealth$asian <- ifelse(addhealth$H1GI6D==1,1,0)
addhealth$asian<-Recode(addhealth$asian, recodes="1='asian'; 0='nonasian'; 6:8=NA",as.factor=T)
#Native American
addhealth$native_american <- ifelse(addhealth$H1GI6C==1,1,0)
addhealth$native_american<-Recode(addhealth$native_american, recodes="1='native_american'; 0='nonnative_american'; 6:8=NA",as.factor=T)
#other
addhealth$other <- ifelse(addhealth$H1GI6E==1,1,0)
addhealth$other<-Recode(addhealth$other, recodes="1='other'; 0='nonother'; 6:8=NA",as.factor=T)
#combine race to one variable
addhealth$racethnic <- factor(ifelse(addhealth$nhwhite=="nhwhite","a-nhwhite",
ifelse(addhealth$nhblack=="nhblack", "b-nhblack",
ifelse(addhealth$hispanic=="hispanic", "c-hispanic",
ifelse(addhealth$asian=="asian","d-asian",
ifelse(addhealth$native_american=="native_american","e-native_american",
ifelse(addhealth$other=="other","f-other",NA)))))))
#Education. Less then high school is reference group
addhealth$educ<-Recode(addhealth$H4ED2,recodes="1:2='a_less_highschool';3:6='b_highschool_grad';7='c_college_bach';8:13='d_college+';else=NA",as.factor=T)
#transition
addhealth$W1.jumped.beatenup<-Recode(addhealth$H1FV6, recodes="0='a.no'; 1:2='b.yes';else=NA")
addhealth$W3.jumped1<-ifelse(addhealth$H3DS18F==1,1,
ifelse(addhealth$H3DS18F==0,0,NA))
addhealth$W3.jumped2<-ifelse(addhealth$H3DS18G==1,1,
ifelse(addhealth$H3DS18G==0,0,NA))
addhealth$transition.w1<-ifelse(addhealth$W1.jumped.beatenup=="a.no"&addhealth$W3.jumped1==1|addhealth$W3.jumped2==1,1,0)
addhealth$W4.jumped.beatenup<-Recode(addhealth$H4DS18, recodes="0='a.no'; 1='b.yes';else=NA")
addhealth$transition<-ifelse(addhealth$transition.w1==0&addhealth$W4.jumped.beatenup=="b.yes",1,0)
table(addhealth$transition)
##
## 0 1
## 10880 1364
#select variables to use
addhealth<-addhealth%>%
select(psuscid,region,GSWGT1,agew1,agew3, agew4, sex,sexorient,racethnic,educ,transition,transition.w1,w3.residence,UScitizen)
#filter complete cases
addhealth<-addhealth%>%
filter(complete.cases(.))
adlong<-reshape(data.frame(addhealth), idvar = 'aid', varying=list(c('agew1', 'agew3'), c('agew3','agew4')),
v.names = c('age_enter', 'age_exit'),
times = 1:2, direction='long')
adlong<-adlong[order(adlong$aid, adlong$time),]
head(adlong)
## psuscid region GSWGT1 sex sexorient racethnic
## 1.1 371 1 69.1244 a_male a_straight b-nhblack
## 1.2 371 1 69.1244 a_male a_straight b-nhblack
## 2.1 078 1 175.6404 a_male a_straight a-nhwhite
## 2.2 078 1 175.6404 a_male a_straight a-nhwhite
## 3.1 044 3 318.6804 b_female a_straight a-nhwhite
## 3.2 044 3 318.6804 b_female a_straight a-nhwhite
## educ transition transition.w1 w3.residence UScitizen
## 1.1 b_highschool_grad 0 0 a_withparents b_yes
## 1.2 b_highschool_grad 0 0 a_withparents b_yes
## 2.1 c_college_bach 0 0 c_myownplace b_yes
## 2.2 c_college_bach 0 0 c_myownplace b_yes
## 3.1 b_highschool_grad 0 0 c_myownplace b_yes
## 3.2 b_highschool_grad 0 0 c_myownplace b_yes
## time age_enter age_exit aid
## 1.1 1 16 22 1
## 1.2 2 22 29 1
## 2.1 1 14 20 2
## 2.2 2 20 27 2
## 3.1 1 14 20 3
## 3.2 2 20 26 3
adlong$trans<-NA
adlong$trans[adlong$transition.w1==0& adlong$time==1]<-0
adlong$trans[adlong$transition.w1==1& adlong$time==1]<-1
adlong$trans[adlong$transition==0& adlong$time==2]<-0
adlong$trans[adlong$transition==1& adlong$time==2]<-1
#recode race/ethnicity
adlong$racethnic_new<-as.factor(ifelse(adlong$racethnic%in%c("d-asian","e-native_american","f-other"), "other", adlong$racethnic))
adlong$racethnic_new<-Recode(adlong$racethnic_new, recodes = "1='a_white'; 2='b_black'; 3='c_hispanic'", as.factor=T)
library(survminer)
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Loading required package: ggpubr
## Loading required package: magrittr
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(survival)
#Survey design
library(survey)
des2<-svydesign(ids=~psuscid,
strata = ~region,
weights=~GSWGT1,
data=adlong,
nest=T)
#fit to cox model
library(eha)
fit.cox<-coxreg(Surv(time = time,event = trans)~age_enter-1,data = adlong)
summary(fit.cox)
## Call:
## coxreg(formula = Surv(time = time, event = trans) ~ age_enter -
## 1, data = adlong)
##
## Covariate Mean Coef Rel.Risk S.E. Wald p
## age_enter 20.320 -0.101 0.904 0.012 0.000
##
## Events 1614
## Total time at risk 36150
## Max. log. likelihood -15235
## LR test statistic 76.92
## Degrees of freedom 1
## Overall p-value 0
#Cox model with all main effects in the model
fit1<-svycoxph(Surv(time=time,event=trans)~sex+sexorient+racethnic_new+educ+w3.residence+UScitizen ,design=des2)
summary(fit1)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (132) clusters.
## svydesign(ids = ~psuscid, strata = ~region, weights = ~GSWGT1,
## data = adlong, nest = T)
## Call:
## svycoxph(formula = Surv(time = time, event = trans) ~ sex + sexorient +
## racethnic_new + educ + w3.residence + UScitizen, design = des2)
##
## n= 24100, number of events= 1614
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sexb_female 0.02851 1.02892 0.07102 0.401 0.68808
## sexorientb_bisexual 0.34674 1.41445 0.19777 1.753 0.07956 .
## sexorientc_LGB 0.12652 1.13488 0.23405 0.541 0.58879
## racethnic_newb_black 0.32833 1.38865 0.07783 4.218 2.46e-05 ***
## racethnic_newc_hispanic 0.29407 1.34188 0.10934 2.690 0.00715 **
## racethnic_newother -0.01568 0.98444 0.20098 -0.078 0.93780
## educb_highschool_grad -0.21941 0.80300 0.12459 -1.761 0.07823 .
## educc_college_bach -0.45523 0.63430 0.15205 -2.994 0.00275 **
## educd_college+ -0.45818 0.63244 0.16971 -2.700 0.00694 **
## w3.residenceb_withsomeone 0.17733 1.19403 0.13674 1.297 0.19469
## w3.residencec_myownplace 0.06340 1.06546 0.06977 0.909 0.36348
## w3.residenced_group.qrts 0.09031 1.09451 0.17776 0.508 0.61142
## w3.residencee_homeless 0.88251 2.41696 0.85849 1.028 0.30396
## w3.residencef_other 0.13876 1.14885 0.30732 0.452 0.65161
## UScitizenb_yes 0.03497 1.03559 0.18184 0.192 0.84749
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sexb_female 1.0289 0.9719 0.8952 1.1826
## sexorientb_bisexual 1.4144 0.7070 0.9599 2.0842
## sexorientc_LGB 1.1349 0.8812 0.7173 1.7954
## racethnic_newb_black 1.3887 0.7201 1.1922 1.6175
## racethnic_newc_hispanic 1.3419 0.7452 1.0830 1.6626
## racethnic_newother 0.9844 1.0158 0.6639 1.4597
## educb_highschool_grad 0.8030 1.2453 0.6290 1.0251
## educc_college_bach 0.6343 1.5765 0.4708 0.8545
## educd_college+ 0.6324 1.5812 0.4535 0.8820
## w3.residenceb_withsomeone 1.1940 0.8375 0.9133 1.5610
## w3.residencec_myownplace 1.0655 0.9386 0.9293 1.2216
## w3.residenced_group.qrts 1.0945 0.9136 0.7725 1.5507
## w3.residencee_homeless 2.4170 0.4137 0.4493 13.0023
## w3.residencef_other 1.1489 0.8704 0.6290 2.0982
## UScitizenb_yes 1.0356 0.9656 0.7251 1.4790
##
## Concordance= 0.561 (se = 0.011 )
## Likelihood ratio test= NA on 15 df, p=NA
## Wald test = 73.8 on 15 df, p=9e-10
## Score (logrank) test = NA on 15 df, p=NA
#Cox model with all main effects in the model and an interaction between sex and sexual orientation
fit.interact1<-svycoxph(Surv(time=time,event=trans)~sex*sexorient+racethnic_new+educ,design=des2)
summary(fit.interact1)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (132) clusters.
## svydesign(ids = ~psuscid, strata = ~region, weights = ~GSWGT1,
## data = adlong, nest = T)
## Call:
## svycoxph(formula = Surv(time = time, event = trans) ~ sex * sexorient +
## racethnic_new + educ, design = des2)
##
## n= 24100, number of events= 1614
##
## coef exp(coef) se(coef) z
## sexb_female 0.012349 1.012426 0.071874 0.172
## sexorientb_bisexual -1.147407 0.317459 0.923974 -1.242
## sexorientc_LGB 0.007104 1.007129 0.311442 0.023
## racethnic_newb_black 0.333245 1.395490 0.078020 4.271
## racethnic_newc_hispanic 0.279851 1.322933 0.096535 2.899
## racethnic_newother -0.027401 0.972971 0.175507 -0.156
## educb_highschool_grad -0.226442 0.797365 0.124389 -1.820
## educc_college_bach -0.458449 0.632264 0.148205 -3.093
## educd_college+ -0.455843 0.633913 0.164004 -2.779
## sexb_female:sexorientb_bisexual 1.678033 5.355010 0.949777 1.767
## sexb_female:sexorientc_LGB 0.288495 1.334418 0.487847 0.591
## Pr(>|z|)
## sexb_female 0.86358
## sexorientb_bisexual 0.21430
## sexorientc_LGB 0.98180
## racethnic_newb_black 1.94e-05 ***
## racethnic_newc_hispanic 0.00374 **
## racethnic_newother 0.87593
## educb_highschool_grad 0.06869 .
## educc_college_bach 0.00198 **
## educd_college+ 0.00544 **
## sexb_female:sexorientb_bisexual 0.07727 .
## sexb_female:sexorientc_LGB 0.55428
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sexb_female 1.0124 0.9877 0.8794 1.1656
## sexorientb_bisexual 0.3175 3.1500 0.0519 1.9417
## sexorientc_LGB 1.0071 0.9929 0.5470 1.8543
## racethnic_newb_black 1.3955 0.7166 1.1976 1.6261
## racethnic_newc_hispanic 1.3229 0.7559 1.0949 1.5985
## racethnic_newother 0.9730 1.0278 0.6898 1.3724
## educb_highschool_grad 0.7974 1.2541 0.6249 1.0175
## educc_college_bach 0.6323 1.5816 0.4729 0.8454
## educd_college+ 0.6339 1.5775 0.4597 0.8742
## sexb_female:sexorientb_bisexual 5.3550 0.1867 0.8324 34.4518
## sexb_female:sexorientc_LGB 1.3344 0.7494 0.5129 3.4717
##
## Concordance= 0.559 (se = 0.011 )
## Likelihood ratio test= NA on 11 df, p=NA
## Wald test = 96.71 on 11 df, p=8e-16
## Score (logrank) test = NA on 11 df, p=NA
AIC(fit1)
## eff.p AIC deltabar
## 23.296748 29591.390052 1.553117
AIC(fit.interact1)
## eff.p AIC deltabar
## 17.264477 29579.325510 1.569498
#Grambsch and Therneau
fit.test<-cox.zph(fit1)
fit.test
## rho chisq p
## sexb_female 0.16900 108.4905 2.10e-25
## sexorientb_bisexual 0.06253 12.4875 4.10e-04
## sexorientc_LGB -0.00320 0.0279 8.67e-01
## racethnic_newb_black 0.03770 4.6112 3.18e-02
## racethnic_newc_hispanic -0.03341 3.5483 5.96e-02
## racethnic_newother -0.00381 0.0889 7.66e-01
## educb_highschool_grad -0.04283 7.1561 7.47e-03
## educc_college_bach 0.03596 5.3255 2.10e-02
## educd_college+ 0.04033 6.5763 1.03e-02
## w3.residenceb_withsomeone 0.02434 1.9380 1.64e-01
## w3.residencec_myownplace 0.09095 24.5983 7.06e-07
## w3.residenced_group.qrts 0.01064 0.4029 5.26e-01
## w3.residencee_homeless -0.01593 0.9828 3.22e-01
## w3.residencef_other -0.05727 9.6886 1.85e-03
## UScitizenb_yes -0.05787 15.6383 7.67e-05
## GLOBAL NA 236.7351 6.63e-42
#I am going to stratify by the variables that showed significance.
fit.stratify<-svycoxph(Surv(time=time,event=trans)~sex+sexorient+strata(racethnic_new)+strata(educ),design=des2)
summary(fit.stratify)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (132) clusters.
## svydesign(ids = ~psuscid, strata = ~region, weights = ~GSWGT1,
## data = adlong, nest = T)
## Call:
## svycoxph(formula = Surv(time = time, event = trans) ~ sex + sexorient +
## strata(racethnic_new) + strata(educ), design = des2)
##
## n= 24100, number of events= 1614
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sexb_female 0.03360 1.03417 0.07128 0.471 0.6374
## sexorientb_bisexual 0.34289 1.40901 0.19754 1.736 0.0826 .
## sexorientc_LGB 0.14291 1.15363 0.23836 0.600 0.5488
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sexb_female 1.034 0.9670 0.8993 1.189
## sexorientb_bisexual 1.409 0.7097 0.9567 2.075
## sexorientc_LGB 1.154 0.8668 0.7231 1.841
##
## Concordance= 0.495 (se = 0.013 )
## Likelihood ratio test= NA on 3 df, p=NA
## Wald test = 3.61 on 3 df, p=0.3
## Score (logrank) test = NA on 3 df, p=NA
fit.stratify
## Call:
## svycoxph(formula = Surv(time = time, event = trans) ~ sex + sexorient +
## strata(racethnic_new) + strata(educ), design = des2)
##
## coef exp(coef) se(coef) z p
## sexb_female 0.03360 1.03417 0.07128 0.471 0.6374
## sexorientb_bisexual 0.34289 1.40901 0.19754 1.736 0.0826
## sexorientc_LGB 0.14291 1.15363 0.23836 0.600 0.5488
##
## Likelihood ratio test= on 3 df, p=
## n= 24100, number of events= 1614
#Grambsch and Therneau
fit.test<-cox.zph(fit.stratify)
fit.test
## rho chisq p
## sexb_female 0.1804 112.12 3.37e-26
## sexorientb_bisexual 0.0275 2.11 1.46e-01
## sexorientc_LGB 0.0411 3.72 5.36e-02
## GLOBAL NA 116.08 5.39e-25
#linear term for time
fit.1<-svyglm(trans~factor(age_enter)-1,design=des2,family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(fit.1)
##
## Call:
## svyglm(formula = trans ~ factor(age_enter) - 1, design = des2,
## family = "binomial")
##
## Survey design:
## svydesign(ids = ~psuscid, strata = ~region, weights = ~GSWGT1,
## data = adlong, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## factor(age_enter)11 -12.10136 1.00001 -12.101 < 2e-16 ***
## factor(age_enter)12 -1.88615 1.07535 -1.754 0.0822 .
## factor(age_enter)13 -3.13709 0.21153 -14.830 < 2e-16 ***
## factor(age_enter)14 -3.77107 0.26933 -14.002 < 2e-16 ***
## factor(age_enter)15 -3.82761 0.21212 -18.045 < 2e-16 ***
## factor(age_enter)16 -3.58145 0.15886 -22.544 < 2e-16 ***
## factor(age_enter)17 -4.03806 0.23611 -17.102 < 2e-16 ***
## factor(age_enter)18 -4.47073 0.27626 -16.183 < 2e-16 ***
## factor(age_enter)19 -2.88874 0.16890 -17.104 < 2e-16 ***
## factor(age_enter)20 -2.20200 0.10179 -21.633 < 2e-16 ***
## factor(age_enter)21 -2.21029 0.11061 -19.983 < 2e-16 ***
## factor(age_enter)22 -2.19125 0.07782 -28.157 < 2e-16 ***
## factor(age_enter)23 -2.03204 0.10756 -18.892 < 2e-16 ***
## factor(age_enter)24 -2.03765 0.06571 -31.011 < 2e-16 ***
## factor(age_enter)25 -2.07051 0.12490 -16.577 < 2e-16 ***
## factor(age_enter)26 -1.72465 0.27309 -6.315 5.73e-09 ***
## factor(age_enter)27 -0.96099 0.56662 -1.696 0.0927 .
## factor(age_enter)28 -0.34386 1.29933 -0.265 0.7918
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9999273)
##
## Number of Fisher Scoring iterations: 10
fit.00<-svyglm(trans~age_enter+sex+sexorient+racethnic_new+educ,design=des2,family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(fit.00)
##
## Call:
## svyglm(formula = trans ~ age_enter + sex + sexorient + racethnic_new +
## educ, design = des2, family = "binomial")
##
## Survey design:
## svydesign(ids = ~psuscid, strata = ~region, weights = ~GSWGT1,
## data = adlong, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.12221 0.27816 -22.009 < 2e-16 ***
## age_enter 0.18241 0.01177 15.501 < 2e-16 ***
## sexb_female 0.07078 0.07606 0.931 0.35398
## sexorientb_bisexual 0.43284 0.21752 1.990 0.04891 *
## sexorientc_LGB 0.08110 0.24846 0.326 0.74468
## racethnic_newb_black 0.29823 0.09274 3.216 0.00168 **
## racethnic_newc_hispanic 0.26559 0.10487 2.533 0.01264 *
## racethnic_newother -0.05640 0.18572 -0.304 0.76190
## educb_highschool_grad -0.27619 0.13058 -2.115 0.03652 *
## educc_college_bach -0.48279 0.15323 -3.151 0.00206 **
## educd_college+ -0.54858 0.16632 -3.298 0.00129 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.00188)
##
## Number of Fisher Scoring iterations: 6
fit.000<-svyglm(trans~age_enter+sex+sexorient+racethnic_new+educ+w3.residence+UScitizen,design=des2,family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(fit.000)
##
## Call:
## svyglm(formula = trans ~ age_enter + sex + sexorient + racethnic_new +
## educ + w3.residence + UScitizen, design = des2, family = "binomial")
##
## Survey design:
## svydesign(ids = ~psuscid, strata = ~region, weights = ~GSWGT1,
## data = adlong, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.33712 0.32122 -19.728 < 2e-16 ***
## age_enter 0.18672 0.01173 15.924 < 2e-16 ***
## sexb_female 0.08540 0.07553 1.131 0.26061
## sexorientb_bisexual 0.43159 0.21684 1.990 0.04898 *
## sexorientc_LGB 0.09202 0.24700 0.373 0.71018
## racethnic_newb_black 0.28146 0.09306 3.025 0.00309 **
## racethnic_newc_hispanic 0.28979 0.11666 2.484 0.01447 *
## racethnic_newother -0.02816 0.20983 -0.134 0.89349
## educb_highschool_grad -0.27973 0.13022 -2.148 0.03386 *
## educc_college_bach -0.50535 0.15741 -3.210 0.00173 **
## educd_college+ -0.56602 0.17302 -3.271 0.00142 **
## w3.residenceb_withsomeone 0.07928 0.14139 0.561 0.57609
## w3.residencec_myownplace -0.12972 0.07367 -1.761 0.08101 .
## w3.residenced_group.qrts 0.24837 0.19233 1.291 0.19924
## w3.residencee_homeless 0.62435 1.01484 0.615 0.53966
## w3.residencef_other 0.09111 0.33589 0.271 0.78671
## UScitizenb_yes 0.18349 0.18828 0.975 0.33190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9970073)
##
## Number of Fisher Scoring iterations: 6
AIC(fit.00)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
## eff.p AIC deltabar
## 16.996967 10939.567262 1.699697
AIC(fit.000)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
## eff.p AIC deltabar
## 25.863595 10943.142811 1.616475
anova(fit.00,fit.000)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
## Working (Rao-Scott+F) LRT for w3.residence UScitizen
## in svyglm(formula = trans ~ age_enter + sex + sexorient + racethnic_new +
## educ + w3.residence + UScitizen, design = des2, family = "binomial")
## Working 2logLR = 9.488531 p= 0.16091
## (scale factors: 1.3 1.3 1.1 0.96 0.7 0.65 ); denominator df= 112
logLik(fit.00)
## Warning in logLik.svyglm(fit.00): svyglm not fitted by maximum likelihood.
## [1] 10905.57
logLik(fit.000)
## Warning in logLik.svyglm(fit.000): svyglm not fitted by maximum likelihood.
## [1] 10891.42
fit.int<-svyglm(trans~age_enter+sex*sexorient+racethnic_new+educ,design=des2,family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(fit.int)
##
## Call:
## svyglm(formula = trans ~ age_enter + sex * sexorient + racethnic_new +
## educ, design = des2, family = "binomial")
##
## Survey design:
## svydesign(ids = ~psuscid, strata = ~region, weights = ~GSWGT1,
## data = adlong, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.10941 0.27713 -22.045 < 2e-16 ***
## age_enter 0.18224 0.01176 15.491 < 2e-16 ***
## sexb_female 0.05250 0.07645 0.687 0.49361
## sexorientb_bisexual -1.01910 0.95133 -1.071 0.28629
## sexorientc_LGB -0.05567 0.32965 -0.169 0.86619
## racethnic_newb_black 0.30217 0.09298 3.250 0.00151 **
## racethnic_newc_hispanic 0.26440 0.10446 2.531 0.01270 *
## racethnic_newother -0.05243 0.18525 -0.283 0.77765
## educb_highschool_grad -0.27806 0.12990 -2.141 0.03440 *
## educc_college_bach -0.48260 0.15227 -3.169 0.00195 **
## educd_college+ -0.54305 0.16636 -3.264 0.00144 **
## sexb_female:sexorientb_bisexual 1.62251 0.98314 1.650 0.10158
## sexb_female:sexorientc_LGB 0.31347 0.50503 0.621 0.53601
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.000591)
##
## Number of Fisher Scoring iterations: 6
#hazard for sexual orientation
fit.kaplan.LGB<-survfit(Surv(time = time , event=trans)~sexorient,data=adlong)
summary(fit.kaplan.LGB)
## Call: survfit(formula = Surv(time = time, event = trans) ~ sexorient,
## data = adlong)
##
## sexorient=a_straight
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 23344 260 0.989 0.000687 0.988 0.990
## 2 11672 1289 0.880 0.002933 0.874 0.885
##
## sexorient=b_bisexual
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 410 8 0.980 0.00683 0.967 0.994
## 2 205 33 0.823 0.02581 0.774 0.875
##
## sexorient=c_LGB
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 346 1 0.997 0.00289 0.991 1.000
## 2 173 23 0.865 0.02586 0.815 0.917
ggsurvplot(fit.kaplan.LGB,risk.table = T,fun="cumhaz",title="Cumulative Hazard Function for violence transition by sexual orientation")
#hazard for race
fit.kaplan.race<-survfit(Surv(time = time , event=trans)~racethnic_new,data=adlong)
summary(fit.kaplan.race)
## Call: survfit(formula = Surv(time = time, event = trans) ~ racethnic_new,
## data = adlong)
##
## racethnic_new=a_white
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 15510 167 0.989 0.000829 0.988 0.991
## 2 7755 805 0.887 0.003506 0.880 0.893
##
## racethnic_new=b_black
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 4980 60 0.988 0.00155 0.985 0.991
## 2 2490 324 0.859 0.00680 0.846 0.873
##
## racethnic_new=c_hispanic
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 1838 30 0.984 0.00296 0.978 0.989
## 2 919 115 0.861 0.01104 0.839 0.883
##
## racethnic_new=other
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 1772 12 0.993 0.00195 0.989 0.997
## 2 886 101 0.880 0.01074 0.859 0.901
ggsurvplot(fit.kaplan.race,risk.table = T,fun="cumhaz",title="Cumulative Hazard Function for violence transition by race")
#hazard for education
fit.kaplan.educ<-survfit(Surv(time = time , event=trans)~educ,data=adlong)
summary(fit.kaplan.educ)
## Call: survfit(formula = Surv(time = time, event = trans) ~ educ, data = adlong)
##
## educ=a_less_highschool
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 1728 33 0.981 0.00329 0.974 0.987
## 2 864 125 0.839 0.01207 0.816 0.863
##
## educ=b_highschool_grad
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 14244 197 0.986 0.000979 0.984 0.988
## 2 7122 807 0.874 0.003804 0.867 0.882
##
## educ=c_college_bach
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 4972 29 0.994 0.00108 0.992 0.996
## 2 2486 257 0.891 0.00615 0.879 0.904
##
## educ=d_college+
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 3156 10 0.997 0.00100 0.995 0.999
## 2 1578 156 0.898 0.00754 0.884 0.913
ggsurvplot(fit.kaplan.educ,risk.table = T,fun="cumhaz",title="Hazard for violence transition by education")
#Kaplan-Meier survival analysis of the outcome
fit.kaplan<-survfit(Surv(time = time , event=trans)~1,data=adlong)
summary(fit.kaplan)
## Call: survfit(formula = Surv(time = time, event = trans) ~ 1, data = adlong)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 24100 269 0.989 0.000677 0.988 0.990
## 2 12050 1345 0.878 0.002900 0.873 0.884
mean(adlong$trans)
## [1] 0.06697095
fit0 <- coxph(Surv(time = time , event=trans) ~ factor(sexorient), data=adlong,
iter=0, na.action=na.exclude)
o.minus.e <- tapply(resid(fit0), adlong$sexorient, sum)
obs <- tapply(adlong$transition, adlong$sexorient, sum)
cbind(observed=obs, expected= obs- o.minus.e, "o-e"=o.minus.e)
## observed expected o-e
## a_straight 2578 2593.20571 -15.205706
## b_bisexual 66 51.82881 14.171190
## c_LGB 46 44.96548 1.034517
summary(adlong)
## psuscid region GSWGT1 sex
## Length:24100 Min. :1.000 Min. : 16.32 a_male :11000
## Class :character 1st Qu.:2.000 1st Qu.: 206.10 b_female:13100
## Mode :character Median :2.000 Median : 948.81
## Mean :2.382 Mean :1174.81
## 3rd Qu.:3.000 3rd Qu.:1780.76
## Max. :4.000 Max. :6649.36
## sexorient racethnic educ
## a_straight:23344 a-nhwhite :15510 a_less_highschool: 1728
## b_bisexual: 410 b-nhblack : 4980 b_highschool_grad:14244
## c_LGB : 346 c-hispanic : 1838 c_college_bach : 4972
## d-asian : 1470 d_college+ : 3156
## e-native_american: 126
## f-other : 176
## transition transition.w1 w3.residence UScitizen
## Min. :0.0000 Min. :0.00000 a_withparents: 9644 a_no : 1488
## 1st Qu.:0.0000 1st Qu.:0.00000 b_withsomeone: 1372 b_yes:22612
## Median :0.0000 Median :0.00000 c_myownplace :11784
## Mean :0.1116 Mean :0.02232 d_group.qrts : 1152
## 3rd Qu.:0.0000 3rd Qu.:0.00000 e_homeless : 8
## Max. :1.0000 Max. :1.00000 f_other : 140
## time age_enter age_exit aid
## Min. :1.0 Min. :11.00 Min. :18.00 Min. : 1
## 1st Qu.:1.0 1st Qu.:16.00 1st Qu.:22.00 1st Qu.: 3013
## Median :1.5 Median :19.00 Median :26.00 Median : 6026
## Mean :1.5 Mean :19.26 Mean :25.69 Mean : 6026
## 3rd Qu.:2.0 3rd Qu.:22.00 3rd Qu.:29.00 3rd Qu.: 9038
## Max. :2.0 Max. :28.00 Max. :34.00 Max. :12050
## trans racethnic_new
## Min. :0.00000 a_white :15510
## 1st Qu.:0.00000 b_black : 4980
## Median :0.00000 c_hispanic: 1838
## Mean :0.06697 other : 1772
## 3rd Qu.:0.00000
## Max. :1.00000
summary(fit.000)
##
## Call:
## svyglm(formula = trans ~ age_enter + sex + sexorient + racethnic_new +
## educ + w3.residence + UScitizen, design = des2, family = "binomial")
##
## Survey design:
## svydesign(ids = ~psuscid, strata = ~region, weights = ~GSWGT1,
## data = adlong, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.33712 0.32122 -19.728 < 2e-16 ***
## age_enter 0.18672 0.01173 15.924 < 2e-16 ***
## sexb_female 0.08540 0.07553 1.131 0.26061
## sexorientb_bisexual 0.43159 0.21684 1.990 0.04898 *
## sexorientc_LGB 0.09202 0.24700 0.373 0.71018
## racethnic_newb_black 0.28146 0.09306 3.025 0.00309 **
## racethnic_newc_hispanic 0.28979 0.11666 2.484 0.01447 *
## racethnic_newother -0.02816 0.20983 -0.134 0.89349
## educb_highschool_grad -0.27973 0.13022 -2.148 0.03386 *
## educc_college_bach -0.50535 0.15741 -3.210 0.00173 **
## educd_college+ -0.56602 0.17302 -3.271 0.00142 **
## w3.residenceb_withsomeone 0.07928 0.14139 0.561 0.57609
## w3.residencec_myownplace -0.12972 0.07367 -1.761 0.08101 .
## w3.residenced_group.qrts 0.24837 0.19233 1.291 0.19924
## w3.residencee_homeless 0.62435 1.01484 0.615 0.53966
## w3.residencef_other 0.09111 0.33589 0.271 0.78671
## UScitizenb_yes 0.18349 0.18828 0.975 0.33190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9970073)
##
## Number of Fisher Scoring iterations: 6