#install.packages("SASxport")
#install.packages("foreign")
#install.packages('Hmisc')
#library(haven)
#library(Hmisc)
#library(SASxport)
library(foreign)
#download datasets - AddHealth
wave1<-read.dta("/projects/add_health/data/11613344/ICPSR_27021/DS0001/da27021p1.dta")
wave4<-read.dta("/projects/add_health/data/11613344/ICPSR_27021/DS0012/da27021p12.dta")
#wave4birth<-read.dta("/projects/add_health/data/11613344/ICPSR_27021/DS0016/da27021p16.dta")
#addhealth.wghtW1<-read_dta("/projects/add_health/data/11613344/ICPSR_27021/DS0025/da27021p25.dta")
#wave3<-read.dta("/projects/add_health/data/11613344/ICPSR_27021/DS0003/da27021p3.dta")
w4meds <- read.xport("/projects/add_health/data/wave_4/biomarkers/w4meds.xpt")
w4glu <- read.xport("/projects/add_health/data/wave_4/biomarkers/glu_a1c.xpt")
#w4barorecepsensitv<-read.xport("/projects/add_health/data/data12_18/W4SupplementalFiles-Biomarker/Baroreceptor Sensitivity/brs.xpt")
w4crp <- read.xport("/projects/add_health/data/wave_4/biomarkers/crp_ebv.xpt")
w4lipids <- read.xport("/projects/add_health/data/wave_4/biomarkers/lipids.xpt")
weightW4 <- read.dta("/projects/add_health/data/11613344/ICPSR_27021/DS0028/da27021p28.dta")
#w1context <- read.dta("/projects/add_health/data/11613344/ICPSR_27024/DS0001/waveIcontext.dta")
#addhealth<-merge(wave1,wave3,by="aid")
#addhealth1<-merge(wave1,wave3,by="aid")
#addhealth1<-merge(addhealth1,wave4,by="aid")
#merge wave1, wave 4, biomarkers, medication.
addhealth1<-merge(wave4,wave1,by="aid")
#addhealth1<-merge(addhealth1a,wave3,by="aid")
addhealth2<-merge(w4meds,w4glu,by="AID")
addhealth3<-merge(w4crp,w4lipids,by="AID")
#Change uppercase AID to lowercase aid in order to merge by AID/aid.
addhealth2$aid<-addhealth2$AID
addhealth3$aid<-addhealth3$AID
addhealthBiomerge<-merge(addhealth2,addhealth3,by="aid")
#merge into one dataset
addhealthmerge1<-merge(addhealth1,addhealthBiomerge,by="aid")
addhealth<-merge(addhealthmerge1,weightW4,by="aid")
#addhealth<-merge(addhealth,wave4birth,by="aid")
library(car)
## Loading required package: carData
#Date of birth variables; Wave 1: H1GI1M (month), H1GI1Y (year); Wave 3: H3OD1M (month), H3OD1Y (year); Wave 4: H4OD1M (month), H4OD1Y (year)
#addhealth$birthmonthw1 <- factor(ifelse(addhealth$H1GI1M=="1",01,
# ifelse(addhealth$H1GI1M=="2", 02,
# ifelse(addhealth$H1GI1M=="3", 03,
# ifelse(addhealth$H1GI1M=="4", 04,
# ifelse(addhealth$H1GI1M=="5", 05,
# ifelse(addhealth$H1GI1M=="6", 06,
# ifelse(addhealth$H1GI1M=="7", 07,
# ifelse(addhealth$H1GI1M=="8", 08,
# ifelse(addhealth$H1GI1M=="9r", 09,
# ifelse(addhealth$H1GI1M=="10", 10,
# ifelse(addhealth$H1GI1M=="11", 11,
# ifelse(addhealth$H1GI1M=="12",12,
# ifelse(addhealth$H1GI1M=="96", 6,NA))))))))))))))
#table(addhealth$H1GI1M)
#addhealth$birthyearw1 <- factor(ifelse(addhealth$H1GI1Y=="74",1974,
# ifelse(addhealth$H1GI1Y=="75",1975,
# ifelse(addhealth$H1GI1Y=="76",1976,
# ifelse(addhealth$H1GI1Y=="77",1977,
# ifelse(addhealth$H1GI1Y=="78",1978,
# ifelse(addhealth$H1GI1Y=="79",1979,
# ifelse(addhealth$H1GI1Y=="80",1980,
# ifelse(addhealth$H1GI1Y=="81",1981,
# ifelse(addhealth$H1GI1Y=="82",1982,
# #ifelse(addhealth$H1GI1Y=="83",1983,1979)))))))))))
#
#table(addhealth$birthyearw1)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
#addhealth$birthdatew1 <- as.yearmon(paste(addhealth$birthyearw1, addhealth$birthmonthw1), "%Y %m")
#table(addhealth$birthdatew1)
#interview date w1
#addhealth$iyearfix <- Recode(addhealth$iyear,recodes="'95'='1995'")
#addhealth$interviewdatew1 <- as.yearmon(paste(addhealth$iyearfix, addhealth$imonth), "%Y %m")
#table(addhealth$iyearfix)
#age - derived from date of birth subtracted from wave interview date
#addhealth$agew1<-(as.numeric(round(addhealth$interviewdatew1 - addhealth$birthdatew1)))
#table(addhealth$agew1)
#birth month wave 4
addhealth$birthmonthw4<-Recode(addhealth$H4OD1M,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
#birth year wave 4
addhealth$birthyearw4<-addhealth$H4OD1Y
addhealth$birthyearw4 <- Recode(addhealth$H4OD1Y,recodes="1974=1974;1975=1975;1976=1976;1977=1977;1978=1978;1979=1979;1980=1980;1981=1981;1982=1982;1983=1983")
#combine year and month for birth date wave 4
addhealth$birthdatew4 <- as.yearmon(paste(addhealth$birthyearw4, addhealth$birthmonthw4), "%Y %m")
#interview date w4
addhealth$interviewmonthw4 <- Recode(addhealth$IMONTH4,recodes="1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9=9;10=10;11=11;12=12")
addhealth$interviewyearw4 <- Recode(addhealth$IYEAR4,recodes="2007=2007;2008=2008;2009=2009")
addhealth$interviewdatew4<-as.yearmon(paste(addhealth$interviewyearw4,addhealth$interviewmonthw4),"%Y %m")
#age - derived from date of birth subtracted from wave interview date
addhealth$agew4<-(as.numeric(round(addhealth$interviewdatew4 - addhealth$birthdatew4)))
table(addhealth$agew4)
##
## 24 25 26 27 28 29 30 31 32 33 34
## 1 84 972 1217 1903 1940 2170 1740 601 59 24
#sexual orientation from wave 3. Reference group is straight people
#addhealth$sexorient <- factor(ifelse(addhealth$H3SE13=="(1) 100% heterosexual (straight)",1,
# ifelse(addhealth$H3SE13=="(2) Mostly #heterosexual.somewhat attracted to people of own",2,
# ifelse(addhealth$H3SE13=="(3) Bisexual-attracted #to men and women equally",3,
# ifelse(addhealth$H3SE13=="(4) Mostly #homosexual.somewhat attracted to opposite sex",4,
# ifelse(addhealth$H3SE13=="(5) 100% #homosexual (gay)",5,NA))))))
#addhealth$sexorient<-Recode(addhealth$H3SE13, recodes="1:2='a_straight'; 3='b_bisexual';4:5='c_LGB';else=NA",as.factor=T)
addhealth$sexorient<-Recode(addhealth$H4SE31,recodes="1:2='a_straight';3='b_bisexual';4:5='c_LGB';else=NA",as.factor=T)
#addhealth$sexorient<-Recode(addhealth$H4SE31,recodes="1:2='a_straight';3='b_bisexual';4:5='c_LGB';else=NA")
#table(addhealth$sexorienta)
table(addhealth$sexorient)
##
## a_straight b_bisexual c_LGB
## 10060 221 285
#sex
addhealth$sex <- ifelse(addhealth$BIO_SEX4==2,'b_female','a_male')
#addhealth$sex_bio<-Recode(addhealth$sex, recodes="1='male'; 0='female'",as.factor=T)
#table(addhealth$sex_bio)
table(addhealth$sex)
##
## a_male b_female
## 3201 7510
#Education. Less then high school is reference group
addhealth$W4_educ<-Recode(addhealth$H4ED2,recodes="1:2='a_less_highschool';3:6='b_HS_grad';7:13='d_college_bach.plus';else=NA")
table(addhealth$W4_educ)
##
## a_less_highschool b_HS_grad d_college_bach.plus
## 711 6271 3727
#biomarkers
#systolic bp
addhealth$H4SBP<-Recode(addhealth$H4SBP,recodes="999=NA;997=NA;996=NA")
addhealth$systolicbp<-ifelse(addhealth$H4SBP>=140,1,0)
table(addhealth$systolicbp)
##
## 0 1
## 9190 1210
#diastolic bp
addhealth$H4DBP<-Recode(addhealth$H4DBP,recodes="999=NA;997=NA;996=NA")
addhealth$diastolicbp<-ifelse(addhealth$H4DBP>=90,1,0)
table(addhealth$diastolicbp)
##
## 0 1
## 8736 1664
#pulse rate
addhealth$H4PR<-Recode(addhealth$H4PR,recodes="999=NA;997=NA;996=NA")
addhealth$pulserate<-ifelse(addhealth$H4PR>=85,1,0)
#pulse pressure
addhealth$pulsepressure<-ifelse(addhealth$H4PP>=60,1,0)
table(addhealth$pulsepressure)
##
## 0 1
## 9953 682
#TOTAL CHOLESTEROL
addhealth$TC<-Recode(addhealth$TC,recodes="99=NA")
addhealth$totalchol<-ifelse(addhealth$TC>=8,1,0)
table(addhealth$totalchol)
##
## 0 1
## 6665 3131
#HDL, HIGH-DENSITY LIPOPROTEIN CHOLESTEROL
addhealth$HDL<-Recode(addhealth$HDL,recodes="99=NA")
addhealth$hidensity_lip<-ifelse(addhealth$HDL<=2,1,0)
table(addhealth$hidensity_lip)
##
## 0 1
## 7885 1779
#TRIGLYCERIDES
addhealth$TG<-Recode(addhealth$TG,recodes="99=NA")
addhealth$triglyceride<-ifelse(addhealth$TG>=8,1,0)
table(addhealth$triglyceride)
##
## 0 1
## 6351 3240
#HEMOGLOBIN
addhealth$HBA1C<-Recode(addhealth$HBA1C,recodes="99=NA")
addhealth$hemoglobin<-ifelse(addhealth$HBA1C>=6.4,1,0)
table(addhealth$hemoglobin)
##
## 0 1
## 9171 873
#BMI
addhealth$H4BMI<-Recode(addhealth$H4BMI,recodes="999=NA;888=NA;889=NA;996-NA;997=NA")
addhealth$BMI<-ifelse(addhealth$H4BMI>=30,1,0)
table(addhealth$BMI)
##
## 0 1
## 6194 4432
# high-sensitivity C-reactive protein test.
addhealth$CRPa<-Recode(addhealth$CRP,recodes="999=NA;998=NA")
addhealth$hsCRP<-ifelse(addhealth$CRPa>=3,1,0)
table(addhealth$hsCRP)
##
## 0 1
## 4615 5212
#EPSTEIN BARR
#quantile(addhealth$EBVa, c(.80), na.rm = T)
addhealth$EBVa<-Recode(addhealth$EBV,recodes="9999=NA;9998=NA")
addhealth$EpsteinBar<-ifelse(addhealth$EBVa>=243,1,0)
table(addhealth$EpsteinBar)
##
## 0 1
## 8021 1852
addhealth$score<-addhealth$EpsteinBar+addhealth$hsCRP+addhealth$BMI+addhealth$triglyceride+addhealth$hemoglobin+addhealth$hidensity_lip+addhealth$totalchol+addhealth$pulserate+addhealth$systolicbp+addhealth$pulsepressure
table(addhealth$score)
##
## 0 1 2 3 4 5 6 7 8 9
## 1039 1797 2019 1869 1351 704 261 100 13 8
#addhealth.pc<-prcomp(~EpsteinBar+hsCRP+BMI+waistcirm+hemoglobin+hidensity_lip+totalchol+pulserate+systolicbp+pulsepressure,data=addhealth, retx=T)
#screeplot(addhealth.pc, type = "l", main = "Scree Plot")
#abline(h=1)
#summary(addhealth.pc)
#addhealth.pc$rotation
#hist(addhealth.pc$x[,2])
#race variables
#nhwhite
addhealth$nhwhite <- ifelse(addhealth$H1GI6A==1,1,0)
addhealth$nhwhite<-Recode(addhealth$nhwhite, recodes="1='nhwhite'; 0='nonnhwhite'; 6:8=NA",as.factor=T)
#nhblack
addhealth$nhblack <- ifelse(addhealth$H1GI6B==1,1,0)
addhealth$nhblack<-Recode(addhealth$nhblack, recodes="1='nhblack'; 0='nonnhblack'; 6:8=NA",as.factor=T)
#Hispanic
addhealth$hispanic <- ifelse(addhealth$H1GI4==1,1,0)
addhealth$hispanic<-Recode(addhealth$hispanic, recodes="1='hispanic'; 0='nonhispanic'; 6:8=NA",as.factor=T)
#Asian
addhealth$asian <- ifelse(addhealth$H1GI6D==1,1,0)
addhealth$asian<-Recode(addhealth$asian, recodes="1='asian'; 0='nonasian'; 6:8=NA",as.factor=T)
#Native American
#addhealth$native_american <- ifelse(addhealth$H1GI6C==1,1,0)
#addhealth$native_american<-Recode(addhealth$native_american, recodes="1='native_american'; 0='nonnative_american'; 6:8=NA",as.factor=T)
#other
#addhealth$other <- ifelse(addhealth$H1GI6E==1&addhealth$H1GI6C==1,1,0)
#addhealth$other<-Recode(addhealth$other, recodes="1='other'; 0='nonother'; 6:8=NA",as.factor=T)
#combine race to one variable
addhealth$racethnic <- factor(ifelse(addhealth$nhwhite=="nhwhite","a-nhwhite",
ifelse(addhealth$nhblack=="nhblack", "b-nhblack",
ifelse(addhealth$hispanic=="hispanic", "c-hispanic",
ifelse(addhealth$asian=="asian","d-asian",NA)))))
# ifelse(addhealth$native_american=="native_american","e-native_american",NA))))))
#ifelse(addhealth$other=="other","f-other",NA)))))))
table(addhealth$racethnic)
##
## a-nhwhite b-nhblack c-hispanic d-asian
## 7736 1789 635 422
#income variable
addhealth$incomeW4<-Recode(addhealth$H4EC1,recodes="1:5='a_<$25,000';6:9='b_$25,000<$75,000';10:11='c_$75,000<$150,000';12='d_>$150,000';else=NA")
#addhealth$treated_lessrespectW4<-Recode(addhealth$H4MH28,recodes="0:1='never/rarely';2:3='sometimes/often';else=NA")
#addhealth$suicideW4think<-Recode(addhealth$H4SE1, recodes="0='no';1='yes';else=NA",as.factor=T)
#addhealth$suicideW4attempt<-Recode(addhealth$H4SE2, recodes="0='no';1='once';2:3='2-4_times';4='5+times';else=NA",as.factor=T)
#Citizenship in Wave 4
addhealth$W4_UScitizenship<-ifelse(addhealth$H4OD4==1,"a_UScitizen_birth",
ifelse(addhealth$H4OD4==0&addhealth$H4OD5==1,"b_UScitizen_naturalized",
ifelse(addhealth$H4OD4==0&addhealth$H4OD5==0,"c_foreignBorn_nonUScitizen",NA)))
#addhealth$US_born<-ifelse(addhealth$H4OD4==1,"a_UScitizen_birth","b_foreign_born_non_UScitizen")
#born in the USA
addhealth$born_US_foreign<-ifelse(addhealth$H4OD4==1,"a_born.USA",
ifelse(addhealth$H4OD4==0,"b_born.foreign",NA))
#number of times married
addhealth$W4.married<-Recode(addhealth$H4TR1,recodes="0='a_never married';1='b_married_once';2:4='c_married_twice+';else=NA")
#Self reported general health
addhealth$health<-Recode(addhealth$H4GH1,recodes="1:3='a_Good.health';4:5='b_Bad.health';else=NA")
table(addhealth$health)
##
## a_Good.health b_Bad.health
## 8741 1970
#allostatic score by group
addhealth$score.grp<-Recode(addhealth$score,recodes="0:1='0';2:3='1';4:10='2';else=NA")
addhealth$score.grp<-as.factor(addhealth$score.grp)
table(addhealth$score.grp)
##
## 0 1 2
## 2836 3888 2437
quantile(addhealth$score,na.rm=T)
## 0% 25% 50% 75% 100%
## 0 1 2 4 9
str(addhealth$score.grp)
## Factor w/ 3 levels "0","1","2": 1 1 1 NA NA 1 1 2 1 3 ...
addhealth$raceXsexorient<-interaction(addhealth$racethnic,addhealth$sexorient)
addhealth$sexXsexorient<-interaction(addhealth$sex,addhealth$sexorient)
table(addhealth$sexXsexorient)
##
## a_male.a_straight b_female.a_straight a_male.b_bisexual b_female.b_bisexual
## 2979 7081 31 190
## a_male.c_LGB b_female.c_LGB
## 142 143
addhealth$seXorientXrace<-interaction(addhealth$sex,addhealth$sexorient,addhealth$racethnic)
table(addhealth$seXorientXrace)
##
## a_male.a_straight.a-nhwhite b_female.a_straight.a-nhwhite
## 2224 5089
## a_male.b_bisexual.a-nhwhite b_female.b_bisexual.a-nhwhite
## 22 149
## a_male.c_LGB.a-nhwhite b_female.c_LGB.a-nhwhite
## 75 90
## a_male.a_straight.b-nhblack b_female.a_straight.b-nhblack
## 446 1203
## a_male.b_bisexual.b-nhblack b_female.b_bisexual.b-nhblack
## 3 27
## a_male.c_LGB.b-nhblack b_female.c_LGB.b-nhblack
## 40 26
## a_male.a_straight.c-hispanic b_female.a_straight.c-hispanic
## 164 424
## a_male.b_bisexual.c-hispanic b_female.b_bisexual.c-hispanic
## 2 8
## a_male.c_LGB.c-hispanic b_female.c_LGB.c-hispanic
## 14 17
## a_male.a_straight.d-asian b_female.a_straight.d-asian
## 125 277
## a_male.b_bisexual.d-asian b_female.b_bisexual.d-asian
## 2 1
## a_male.c_LGB.d-asian b_female.c_LGB.d-asian
## 6 9
#sexorient
#find the most common value
#mcv.lgb<-factor(names(which.max(table(addhealth$sexorient))), levels=levels(addhealth$sexorient))
#mcv.lgb
#impute the cases
#addhealth$lgb.imp<-as.factor(ifelse(is.na(addhealth$sexorient)==T,mcv.lgb, addhealth$sexorient))
#levels(addhealth$lgb.imp)<-levels(addhealth$sexorient)
#prop.table(table(addhealth$sexorient))
#prop.table(table(addhealth$lgb.imp))
#table(addhealth$sexorient)
#mom born in USA
addhealth$mom.born.usa<-Recode(addhealth$PA3,recodes="0='b_mom.born.foreign';1='a_mom.born.usa';else=NA")
#mom race
addhealth$mom.latino<-Recode(addhealth$PA4,recodes="0=0;1=1;6=6;else=NA")
addhealth$mom.white<-Recode(addhealth$PA6_1,recodes="0=0;1=1;6=6;else=NA")
addhealth$mom.black<-Recode(addhealth$PA6_2,recodes="0=0;1=1;6=6;else=NA")
addhealth$mom.asian<-Recode(addhealth$PA6_4,recodes="0=0;1=1;6=6;else=NA")
addhealth$mom.nativeamer<-Recode(addhealth$PA6_3,recodes="0=0;1=1;6=6;else=NA")
addhealth$mom.other<-Recode(addhealth$PA6_5,recodes="0=0;1=1;6=6;else=NA")
addhealth$mom.race<-ifelse(addhealth$mom.latino==1,"c_latino",
ifelse(addhealth$mom.asian==1,"d_asian",
ifelse(addhealth$mom.black==1,"b_black",
ifelse(addhealth$mom.white==1,"a_white",NA))))
table(addhealth$mom.race)
##
## a_white b_black c_latino d_asian
## 6431 1451 926 281
#mothers education
addhealth$mom_educ<-Recode(addhealth$PA12,recodes="1:2='a_less.HS';3:7='b_HSgrad/voca';8:9='c_col.grad';10='a_less.HS';else=NA")
#mothers employment
addhealth$mom_emp<-ifelse(addhealth$PA13==1,"b_yes.emp",
ifelse(addhealth$PA13==0,"a_no.emp",NA))
#mom disabled
addhealth$mom_disabiled<-Recode(addhealth$PA18,recodes="0='a_no';1='b_yes';else=NA")
#mom public assistance
addhealth$mom_welfare<-Recode(addhealth$PA21,recodes="0='a_no';1='b_yes';else=NA")
#neighborhood according to parent
addhealth$hood.litter<-Recode(addhealth$PA33,recodes="1=0;2:3=1;else=NA")
addhealth$hood.drugs<-Recode(addhealth$PA34,recodes="1=0;2:3=1;else=NA")
addhealth$hood.move<-Recode(addhealth$PA35,recodes="1=0;2:3=1;else=NA")
addhealth$good.neigh<-addhealth$hood.litter+addhealth$hood.drugs+addhealth$hood.move
addhealth$good.neigh<-as.factor(addhealth$good.neigh)
addhealth$good.neighood<-Recode(addhealth$good.neigh,recodes="0='0';1:2='1';3=3")
table(addhealth$good.neigh)
##
## 0 1 2 3
## 2678 2719 2086 1590
table(addhealth$good.neighood)
##
## 0 1 3
## 2678 4805 1590
#drinking
addhealth$mom.drinking<-Recode(addhealth$PA61,recodes="1='a_non.drinker';2:6='b_drinker';else=NA")
addhealth$mom.smoking<-Recode(addhealth$PA64,recodes="0='a_no';1='b_yes';else=NA")
#respondant smoking drinking
#addhealth$smoke<-Recode(addhealth$H4TO1,recodes="0=0;1=1;else=NA")
#addhealth$have.ever.smoke.regular<-Recode(addhealth$H4TO3,recodes="0=0;1=1;else=NA")
addhealth$smoke<-ifelse(addhealth$H4TO1==0,"a_non.smoker",
ifelse(addhealth$H4TO1==1&addhealth$H4TO3==1,"c_smoker",
ifelse(addhealth$H4TO1==1&addhealth$H4TO3==0,"b_tried.smoking",NA)))
#addhealth$H4TO5[addhealth$H4TO5==96] <- NA
#addhealth$H4TO5[addhealth$H4TO5==98] <- NA
#addhealth$H4TO6[addhealth$H4TO6==996] <- NA
#addhealth$H4TO6[addhealth$H4TO6==997] <- NA
#addhealth$H4TO6[addhealth$H4TO6==998] <- NA
#addhealth$number.smokes.daily<-addhealth$H4TO6
#addhealth$smoke.past.30days<-Recode(addhealth$H4TO5,recodes="0=0;1:30=1;else=NA")
#addhealth$smoke<-ifelse(addhealth$have.smoke==0&addhealth$smoke.daily.currently==0,"a_non.smoker",
# ifelse(addhealth$have.smoke==1&addhealth$smoke.daily.currently==0,"a_non.smoker",
# ifelse(addhealth$have.smoke==1&addhealth$smoke.daily.currently==1,"b_smoker",
# ifelse(addhealth$have.smoke==0&addhealth$smoke.daily.currently==1,"b_smoker",NA))))
#addhealth$smoke<-Recode(addhealth$H4TO5,recodes="0='a_non.smoker';1:30='b_smoker';else=NA")
#addhealth$smoke2<-Recode(addhealth$H4TO5,recodes="0=0;1:30=1;else=NA")
#addhealth$smoke3<-Recode(addhealth$H4TO6,recodes="1:24=0;25:100=1;else=NA")
#addhealth$smoke<-ifelse(addhealth$smoke1==1&addhealth$smoke2==1&addhealth$smoke3==1,"d_heavy.smoker",
# ifelse(addhealth$smoke1==1&addhealth$smoke2==1&addhealth$smoke3==0,"c_moderate.smoker",
# ifelse(addhealth$smoke1==0&addhealth$smoke2==0,"a_non.smoker",
# ifelse(addhealth$smoke1==1&addhealth$smoke2==0,"b_previous.smoker",NA))))
#addhealth$smoke<-addhealth$have.ever.smoke+addhealth$have.ever.smoke.regular+addhealth$smoke.past.30days
#drinking respondant
addhealth$drink.yes.no<-ifelse(addhealth$H4TO33==0,0,
ifelse(addhealth$H4TO33==1,1,NA))
addhealth$drink.yes.12months<-Recode(addhealth$H4TO35,recodes="0:3=1;4:6=2;else=NA")
#addhealth$drink.current<-ifelse(addhealth$drink.yes.no==1&addhealth$drink.yes.12month==1,1,0)
#addhealth$drink.binge<-Recode(addhealth$H4TO36,recodes="1:4=0;5:18=1;else=NA")
#addhealth$drink<-ifelse(addhealth$drink.yes.no==1&addhealth$drink.yes.12months==1&addhealth$drink.binge==1,"d_binge.drinker",
# ifelse(addhealth$drink.yes.no==1&addhealth$drink.yes.12months==1&addhealth$drink.binge==0,"c_current.drinker",
# ifelse(addhealth$drink.yes.no==1&addhealth$drink.yes.12months==0,"b_previous.drinker",
# ifelse(addhealth$drink.yes.no==0,"a_non.drinker",NA))))
addhealth$drink<-ifelse(addhealth$drink.yes.no==0,"a_non.drinker",
ifelse(addhealth$drink.yes.no==1&addhealth$drink.yes.12months==1,"b_lite.drinker",
ifelse(addhealth$drink.yes.no==1&addhealth$drink.yes.12months==2,"c_drinker",NA)))
table(addhealth$smoke)
##
## a_non.smoker b_tried.smoking c_smoker
## 3712 2109 4864
table(addhealth$drink)
##
## a_non.drinker b_lite.drinker c_drinker
## 2196 5699 2761
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:car':
##
## logit
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#select variables to use
addhealth.pro<-addhealth%>%
select(psuscid,GSWGT4_2,score.grp,sex,racethnic,mom_educ,mom_emp,mom_disabiled,mom_welfare,good.neigh,mom.drinking,mom.smoking,born_US_foreign,sexorient,W4_educ,raceXsexorient,agew4,W4.married,incomeW4,W4_UScitizenship,health,mom.race,good.neigh,smoke,drink,sexXsexorient,seXorientXrace)
#filter complete cases
#library(tidyverse)
addhealth.pro<-addhealth.pro%>%
filter(complete.cases(.))
#Survey design
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:VGAM':
##
## calibrate
## The following object is masked from 'package:graphics':
##
## dotchart
#addhealth %>% drop_na()
options(survey.lonely.psu = "adjust")
#des<-svydesign(ids=~psuscid,
# weights=~GSWGT4_2,
# nest=T,
# data=addhealth.pro[!is.na(addhealth.pro$GSWGT4_2),])
fit<-vglm(as.ordered(score.grp)~agew4+sex+racethnic+sexorient,addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm=T), family=cumulative(parallel = T, reverse = T))
summary(fit)
##
## Call:
## vglm(formula = as.ordered(score.grp) ~ agew4 + sex + racethnic +
## sexorient, family = cumulative(parallel = T, reverse = T),
## data = addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -3.892 -0.4934 0.2985 0.6572 2.317
## logitlink(P[Y>=3]) -2.259 -0.6582 -0.2994 0.5690 3.761
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.79025 0.55858 -1.415 0.15714
## (Intercept):2 -2.61788 0.56071 -4.669 3.03e-06 ***
## agew4 0.06330 0.01908 3.317 0.00091 ***
## sexb_female -0.22980 0.07349 -3.127 0.00176 **
## racethnicb-nhblack 0.30543 0.11339 2.694 0.00707 **
## racethnicc-hispanic 0.93873 0.22953 4.090 4.32e-05 ***
## racethnicd-asian -0.54934 0.47052 -1.167 0.24301
## sexorientb_bisexual 0.12931 0.23093 0.560 0.57550
## sexorientc_LGB 0.23960 0.22941 1.044 0.29629
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 6419.869 on 5975 degrees of freedom
##
## Log-likelihood: -3209.934 on 5975 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## agew4 sexb_female racethnicb-nhblack racethnicc-hispanic
## 1.0653509 0.7946888 1.3572083 2.5567426
## racethnicd-asian sexorientb_bisexual sexorientc_LGB
## 0.5773334 1.1380469 1.2707465
fit2<-vglm(as.ordered(score.grp)~agew4+sex+racethnic+sexorient+W4_educ+born_US_foreign,addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm=T), family=cumulative(parallel = T, reverse = T))
summary(fit2)
##
## Call:
## vglm(formula = as.ordered(score.grp) ~ agew4 + sex + racethnic +
## sexorient + W4_educ + born_US_foreign, family = cumulative(parallel = T,
## reverse = T), data = addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2,
## na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -4.069 -0.4186 0.2867 0.6521 2.145
## logitlink(P[Y>=3]) -2.308 -0.6498 -0.2901 0.5501 3.541
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.67778 0.56708 -1.195 0.23201
## (Intercept):2 -2.53744 0.56909 -4.459 8.24e-06 ***
## agew4 0.06465 0.01916 3.374 0.00074 ***
## sexb_female -0.18150 0.07404 -2.451 0.01423 *
## racethnicb-nhblack 0.35217 0.11399 3.089 0.00201 **
## racethnicc-hispanic 0.92122 0.23170 3.976 7.01e-05 ***
## racethnicd-asian -0.44689 0.48064 -0.930 0.35248
## sexorientb_bisexual 0.18249 0.23252 0.785 0.43255
## sexorientc_LGB 0.21469 0.23233 0.924 0.35546
## W4_educb_HS_grad -0.04420 0.11387 -0.388 0.69789
## W4_educd_college_bach.plus -0.72001 0.13049 -5.518 3.43e-08 ***
## born_US_foreignb_born.foreign 0.53340 0.35720 1.493 0.13536
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 6354.434 on 5972 degrees of freedom
##
## Log-likelihood: -3177.217 on 5972 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## agew4 sexb_female
## 1.0667835 0.8340151
## racethnicb-nhblack racethnicc-hispanic
## 1.4221527 2.5123446
## racethnicd-asian sexorientb_bisexual
## 0.6396158 1.2001988
## sexorientc_LGB W4_educb_HS_grad
## 1.2394715 0.9567615
## W4_educd_college_bach.plus born_US_foreignb_born.foreign
## 0.4867453 1.7047208
fit3<-vglm(as.ordered(score.grp)~agew4+sex+racethnic+sexorient+W4_educ+born_US_foreign+W4.married+incomeW4,addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm=T), family=cumulative(parallel = T, reverse = T))
summary(fit3)
##
## Call:
## vglm(formula = as.ordered(score.grp) ~ agew4 + sex + racethnic +
## sexorient + W4_educ + born_US_foreign + W4.married + incomeW4,
## family = cumulative(parallel = T, reverse = T), data = addhealth.pro,
## weights = GSWGT4_2/mean(GSWGT4_2, na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -4.187 -0.3982 0.2861 0.6342 2.035
## logitlink(P[Y>=3]) -2.379 -0.6448 -0.2792 0.5713 3.568
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.5157110 0.5787965 -0.891 0.37293
## (Intercept):2 -2.3961532 0.5805768 -4.127 3.67e-05 ***
## agew4 0.0641091 0.0197969 3.238 0.00120 **
## sexb_female -0.2171979 0.0748123 -2.903 0.00369 **
## racethnicb-nhblack 0.2765178 0.1155514 2.393 0.01671 *
## racethnicc-hispanic 0.9764435 0.2325777 4.198 2.69e-05 ***
## racethnicd-asian -0.1020357 0.4865065 -0.210 0.83388
## sexorientb_bisexual 0.0005403 0.2349976 0.002 0.99817
## sexorientc_LGB 0.2551538 0.2348169 1.087 0.27721
## W4_educb_HS_grad 0.1216029 0.1195553 1.017 0.30909
## W4_educd_college_bach.plus -0.4176369 0.1432194 -2.916 0.00354 **
## born_US_foreignb_born.foreign 0.5527920 0.3569473 1.549 0.12146
## W4.marriedb_married_once -0.0397952 0.0757492 -0.525 0.59934
## W4.marriedc_married_twice+ 0.1178139 0.1417997 0.831 0.40606
## incomeW4b_$25,000<$75,000 -0.2320017 0.0910808 -2.547 0.01086 *
## incomeW4c_$75,000<$150,000 -0.5881723 0.1177872 -4.994 5.93e-07 ***
## incomeW4d_>$150,000 -1.0316149 0.2112767 -4.883 1.05e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 6310.984 on 5967 degrees of freedom
##
## Log-likelihood: -3155.492 on 5967 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## agew4 sexb_female
## 1.0662087 0.8047707
## racethnicb-nhblack racethnicc-hispanic
## 1.3185304 2.6549968
## racethnicd-asian sexorientb_bisexual
## 0.9029973 1.0005405
## sexorientc_LGB W4_educb_HS_grad
## 1.2906601 1.1293056
## W4_educd_college_bach.plus born_US_foreignb_born.foreign
## 0.6586013 1.7380990
## W4.marriedb_married_once W4.marriedc_married_twice+
## 0.9609863 1.1250347
## incomeW4b_$25,000<$75,000 incomeW4c_$75,000<$150,000
## 0.7929448 0.5553413
## incomeW4d_>$150,000
## 0.3564309
fit4<-vglm(as.ordered(score.grp)~agew4+sex+racethnic+sexorient+W4_educ+born_US_foreign+W4.married+incomeW4+drink+smoke+health,addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm=T), family=cumulative(parallel = T, reverse = T))
summary(fit4)
##
## Call:
## vglm(formula = as.ordered(score.grp) ~ agew4 + sex + racethnic +
## sexorient + W4_educ + born_US_foreign + W4.married + incomeW4 +
## drink + smoke + health, family = cumulative(parallel = T,
## reverse = T), data = addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2,
## na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -4.825 -0.4039 0.2637 0.5864 2.363
## logitlink(P[Y>=3]) -2.340 -0.6296 -0.2682 0.5212 4.163
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.56477 0.59059 -0.956 0.338925
## (Intercept):2 -2.52783 0.59257 -4.266 1.99e-05 ***
## agew4 0.07132 0.02004 3.559 0.000372 ***
## sexb_female -0.27825 0.07685 -3.621 0.000294 ***
## racethnicb-nhblack -0.02540 0.11982 -0.212 0.832117
## racethnicc-hispanic 0.85747 0.23722 3.615 0.000301 ***
## racethnicd-asian -0.12676 0.49092 -0.258 0.796238
## sexorientb_bisexual -0.13663 0.23961 -0.570 0.568513
## sexorientc_LGB 0.31002 0.23696 1.308 0.190758
## W4_educb_HS_grad 0.19287 0.12302 1.568 0.116931
## W4_educd_college_bach.plus -0.35639 0.14852 -2.400 0.016409 *
## born_US_foreignb_born.foreign 0.48942 0.36256 1.350 0.177051
## W4.marriedb_married_once -0.10131 0.07737 -1.310 0.190348
## W4.marriedc_married_twice+ -0.01967 0.14442 -0.136 0.891641
## incomeW4b_$25,000<$75,000 -0.09323 0.09352 -0.997 0.318834
## incomeW4c_$75,000<$150,000 -0.35287 0.12170 -2.900 0.003737 **
## incomeW4d_>$150,000 -0.87798 0.21857 -4.017 5.90e-05 ***
## drinkb_lite.drinker 0.07835 0.09567 0.819 0.412828
## drinkc_drinker -0.26021 0.11636 -2.236 0.025333 *
## smokeb_tried.smoking -0.18399 0.11496 -1.601 0.109480
## smokec_smoker -0.57485 0.08790 -6.540 6.14e-11 ***
## healthb_Bad.health 0.88025 0.09040 9.738 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 6145.602 on 5962 degrees of freedom
##
## Log-likelihood: -3072.801 on 5962 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## agew4 sexb_female
## 1.0739272 0.7571107
## racethnicb-nhblack racethnicc-hispanic
## 0.9749189 2.3571997
## racethnicd-asian sexorientb_bisexual
## 0.8809412 0.8722896
## sexorientc_LGB W4_educb_HS_grad
## 1.3634502 1.2127309
## W4_educd_college_bach.plus born_US_foreignb_born.foreign
## 0.7001988 1.6313694
## W4.marriedb_married_once W4.marriedc_married_twice+
## 0.9036487 0.9805179
## incomeW4b_$25,000<$75,000 incomeW4c_$75,000<$150,000
## 0.9109878 0.7026689
## incomeW4d_>$150,000 drinkb_lite.drinker
## 0.4156200 1.0815002
## drinkc_drinker smokeb_tried.smoking
## 0.7708873 0.8319402
## smokec_smoker healthb_Bad.health
## 0.5627868 2.4115128
fit5<-vglm(as.ordered(score.grp)~agew4+sex+racethnic+sexorient+W4_educ+born_US_foreign+W4.married+incomeW4+drink+smoke+health+mom.race+mom_educ+mom_emp+good.neigh,addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm=T), family=cumulative(parallel = T, reverse = T))
summary(fit5)
##
## Call:
## vglm(formula = as.ordered(score.grp) ~ agew4 + sex + racethnic +
## sexorient + W4_educ + born_US_foreign + W4.married + incomeW4 +
## drink + smoke + health + mom.race + mom_educ + mom_emp +
## good.neigh, family = cumulative(parallel = T, reverse = T),
## data = addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -5.600 -0.3989 0.262 0.5872 2.368
## logitlink(P[Y>=3]) -2.325 -0.6257 -0.263 0.5141 4.093
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.70773 0.60465 -1.170 0.241807
## (Intercept):2 -2.68374 0.60674 -4.423 9.73e-06 ***
## agew4 0.07585 0.02022 3.751 0.000176 ***
## sexb_female -0.26559 0.07727 -3.437 0.000588 ***
## racethnicb-nhblack -1.30768 0.47058 -2.779 0.005455 **
## racethnicc-hispanic 0.56584 0.27704 2.042 0.041108 *
## racethnicd-asian -0.01438 0.57739 -0.025 0.980131
## sexorientb_bisexual -0.11537 0.24143 -0.478 0.632753
## sexorientc_LGB 0.36291 0.24049 1.509 0.131282
## W4_educb_HS_grad 0.19262 0.12532 1.537 0.124310
## W4_educd_college_bach.plus -0.33892 0.15304 -2.215 0.026790 *
## born_US_foreignb_born.foreign 0.22931 0.38781 0.591 0.554322
## W4.marriedb_married_once -0.11498 0.07840 -1.467 0.142500
## W4.marriedc_married_twice+ -0.01701 0.14704 -0.116 0.907921
## incomeW4b_$25,000<$75,000 -0.12557 0.09508 -1.321 0.186585
## incomeW4c_$75,000<$150,000 -0.35829 0.12360 -2.899 0.003746 **
## incomeW4d_>$150,000 -0.89742 0.22267 -4.030 5.57e-05 ***
## drinkb_lite.drinker 0.12623 0.09678 1.304 0.192136
## drinkc_drinker -0.18496 0.11884 -1.556 0.119618
## smokeb_tried.smoking -0.17160 0.11590 -1.481 0.138721
## smokec_smoker -0.57687 0.08858 -6.512 7.40e-11 ***
## healthb_Bad.health 0.86516 0.09091 9.517 < 2e-16 ***
## mom.raceb_black 1.35348 0.47651 2.840 0.004506 **
## mom.racec_latino 0.36171 0.18039 2.005 0.044944 *
## mom.raced_asian -0.16110 0.52888 -0.305 0.760665
## mom_educb_HSgrad/voca 0.04375 0.10384 0.421 0.673486
## mom_educc_col.grad -0.11633 0.13960 -0.833 0.404682
## mom_empb_yes.emp -0.12869 0.08312 -1.548 0.121565
## good.neigh1 -0.06173 0.09832 -0.628 0.530126
## good.neigh2 0.10286 0.10270 1.002 0.316549
## good.neigh3 0.18081 0.10801 1.674 0.094140 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 6120.315 on 5953 degrees of freedom
##
## Log-likelihood: -3060.157 on 5953 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## agew4 sexb_female
## 1.0787964 0.7667512
## racethnicb-nhblack racethnicc-hispanic
## 0.2704469 1.7609191
## racethnicd-asian sexorientb_bisexual
## 0.9857233 0.8910397
## sexorientc_LGB W4_educb_HS_grad
## 1.4375076 1.2124169
## W4_educd_college_bach.plus born_US_foreignb_born.foreign
## 0.7125412 1.2577291
## W4.marriedb_married_once W4.marriedc_married_twice+
## 0.8913870 0.9831370
## incomeW4b_$25,000<$75,000 incomeW4c_$75,000<$150,000
## 0.8819923 0.6988689
## incomeW4d_>$150,000 drinkb_lite.drinker
## 0.4076211 1.1345409
## drinkc_drinker smokeb_tried.smoking
## 0.8311346 0.8423137
## smokec_smoker healthb_Bad.health
## 0.5616521 2.3753776
## mom.raceb_black mom.racec_latino
## 3.8708541 1.4357877
## mom.raced_asian mom_educb_HSgrad/voca
## 0.8512052 1.0447262
## mom_educc_col.grad mom_empb_yes.emp
## 0.8901859 0.8792477
## good.neigh1 good.neigh2
## 0.9401411 1.1083352
## good.neigh3
## 1.1981892
fit6<-vglm(as.ordered(score.grp)~agew4+sex+raceXsexorient+W4_educ+born_US_foreign+W4.married+incomeW4+drink+smoke+health+mom.race+mom_educ+mom_emp+good.neigh,addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm=T), family=cumulative(parallel = T, reverse = T))
summary(fit6)
##
## Call:
## vglm(formula = as.ordered(score.grp) ~ agew4 + sex + raceXsexorient +
## W4_educ + born_US_foreign + W4.married + incomeW4 + drink +
## smoke + health + mom.race + mom_educ + mom_emp + good.neigh,
## family = cumulative(parallel = T, reverse = T), data = addhealth.pro,
## weights = GSWGT4_2/mean(GSWGT4_2, na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -4.496 -0.3772 0.2629 0.5846 2.545
## logitlink(P[Y>=3]) -2.355 -0.6251 -0.2617 0.5149 4.091
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.614590 0.605527 -1.015 0.310122
## (Intercept):2 -2.595222 0.607515 -4.272 1.94e-05 ***
## agew4 0.072247 0.020256 3.567 0.000362 ***
## sexb_female -0.262080 0.077339 -3.389 0.000702 ***
## raceXsexorientb-nhblack.a_straight -0.834662 0.508433 -1.642 0.100666
## raceXsexorientc-hispanic.a_straight 0.588151 0.277362 2.121 0.033962 *
## raceXsexorientd-asian.a_straight 0.091666 0.580135 0.158 0.874451
## raceXsexorienta-nhwhite.b_bisexual -0.138386 0.249532 -0.555 0.579180
## raceXsexorientb-nhblack.b_bisexual -0.807316 1.014889 -0.795 0.426339
## raceXsexorientc-hispanic.b_bisexual 11.482490 445.366643 NA NA
## raceXsexorienta-nhwhite.c_LGB 0.637836 0.257122 2.481 0.013113 *
## raceXsexorientb-nhblack.c_LGB -3.011648 0.980250 -3.072 0.002124 **
## raceXsexorientc-hispanic.c_LGB 0.048186 8.130004 0.006 0.995271
## W4_educb_HS_grad 0.200265 0.125350 1.598 0.110120
## W4_educd_college_bach.plus -0.329276 0.153157 -2.150 0.031562 *
## born_US_foreignb_born.foreign 0.211629 0.387995 0.545 0.585450
## W4.marriedb_married_once -0.101772 0.078533 -1.296 0.195007
## W4.marriedc_married_twice+ -0.005375 0.147072 -0.037 0.970849
## incomeW4b_$25,000<$75,000 -0.143691 0.095274 -1.508 0.131507
## incomeW4c_$75,000<$150,000 -0.377490 0.123775 -3.050 0.002290 **
## incomeW4d_>$150,000 -0.912167 0.222845 -4.093 4.25e-05 ***
## drinkb_lite.drinker 0.132442 0.096874 1.367 0.171574
## drinkc_drinker -0.180091 0.118951 -1.514 0.130027
## smokeb_tried.smoking -0.179715 0.115970 -1.550 0.121220
## smokec_smoker -0.568655 0.088661 -6.414 1.42e-10 ***
## healthb_Bad.health 0.859048 0.091225 9.417 < 2e-16 ***
## mom.raceb_black 0.923416 0.508575 1.816 0.069417 .
## mom.racec_latino 0.350516 0.180524 1.942 0.052179 .
## mom.raced_asian -0.337405 0.533955 -0.632 0.527453
## mom_educb_HSgrad/voca 0.045835 0.103972 0.441 0.659330
## mom_educc_col.grad -0.109831 0.139696 -0.786 0.431742
## mom_empb_yes.emp -0.139201 0.083270 -1.672 0.094587 .
## good.neigh1 -0.059485 0.098377 -0.605 0.545407
## good.neigh2 0.114637 0.102823 1.115 0.264894
## good.neigh3 0.185512 0.108047 1.717 0.085986 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 6108.857 on 5949 degrees of freedom
##
## Log-likelihood: -3054.428 on 5949 degrees of freedom
##
## Number of Fisher scoring iterations: 10
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## 'raceXsexorientc-hispanic.b_bisexual'
##
##
## Exponentiated coefficients:
## agew4 sexb_female
## 1.074920e+00 7.694497e-01
## raceXsexorientb-nhblack.a_straight raceXsexorientc-hispanic.a_straight
## 4.340210e-01 1.800657e+00
## raceXsexorientd-asian.a_straight raceXsexorienta-nhwhite.b_bisexual
## 1.095998e+00 8.707625e-01
## raceXsexorientb-nhblack.b_bisexual raceXsexorientc-hispanic.b_bisexual
## 4.460538e-01 9.700234e+04
## raceXsexorienta-nhwhite.c_LGB raceXsexorientb-nhblack.c_LGB
## 1.892382e+00 4.921052e-02
## raceXsexorientc-hispanic.c_LGB W4_educb_HS_grad
## 1.049366e+00 1.221727e+00
## W4_educd_college_bach.plus born_US_foreignb_born.foreign
## 7.194445e-01 1.235689e+00
## W4.marriedb_married_once W4.marriedc_married_twice+
## 9.032356e-01 9.946399e-01
## incomeW4b_$25,000<$75,000 incomeW4c_$75,000<$150,000
## 8.661551e-01 6.855802e-01
## incomeW4d_>$150,000 drinkb_lite.drinker
## 4.016530e-01 1.141613e+00
## drinkc_drinker smokeb_tried.smoking
## 8.351941e-01 8.355083e-01
## smokec_smoker healthb_Bad.health
## 5.662868e-01 2.360912e+00
## mom.raceb_black mom.racec_latino
## 2.517876e+00 1.419799e+00
## mom.raced_asian mom_educb_HSgrad/voca
## 7.136194e-01 1.046902e+00
## mom_educc_col.grad mom_empb_yes.emp
## 8.959856e-01 8.700535e-01
## good.neigh1 good.neigh2
## 9.422500e-01 1.121467e+00
## good.neigh3
## 1.203834e+00
fit7<-vglm(as.ordered(score.grp)~agew4+sexXsexorient+racethnic+W4_educ+born_US_foreign+W4.married+incomeW4+drink+smoke+health+mom.race+mom_educ+mom_emp+good.neigh,addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm=T), family=cumulative(parallel = T, reverse = T))
summary(fit7)
##
## Call:
## vglm(formula = as.ordered(score.grp) ~ agew4 + sexXsexorient +
## racethnic + W4_educ + born_US_foreign + W4.married + incomeW4 +
## drink + smoke + health + mom.race + mom_educ + mom_emp +
## good.neigh, family = cumulative(parallel = T, reverse = T),
## data = addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -5.198 -0.3955 0.2566 0.5778 3.435
## logitlink(P[Y>=3]) -2.282 -0.6098 -0.2629 0.5243 4.159
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.88220 0.60700 -1.453 0.146118
## (Intercept):2 -2.87393 0.60932 -4.717 2.40e-06 ***
## agew4 0.07975 0.02029 3.931 8.46e-05 ***
## sexXsexorientb_female.a_straight -0.26900 0.07877 -3.415 0.000638 ***
## sexXsexorienta_male.b_bisexual -3.00100 0.73691 -4.072 4.65e-05 ***
## sexXsexorientb_female.b_bisexual 0.02420 0.26772 0.090 0.927973
## sexXsexorienta_male.c_LGB 1.18652 0.36739 3.230 0.001240 **
## sexXsexorientb_female.c_LGB -0.52176 0.33186 -1.572 0.115891
## racethnicb-nhblack -1.14627 0.47399 -2.418 0.015593 *
## racethnicc-hispanic 0.60376 0.27763 2.175 0.029651 *
## racethnicd-asian 0.15355 0.58328 0.263 0.792348
## W4_educb_HS_grad 0.22785 0.12571 1.813 0.069900 .
## W4_educd_college_bach.plus -0.29153 0.15347 -1.900 0.057487 .
## born_US_foreignb_born.foreign 0.11716 0.39008 0.300 0.763915
## W4.marriedb_married_once -0.11372 0.07864 -1.446 0.148172
## W4.marriedc_married_twice+ -0.03773 0.14733 -0.256 0.797874
## incomeW4b_$25,000<$75,000 -0.12720 0.09532 -1.334 0.182048
## incomeW4c_$75,000<$150,000 -0.37705 0.12387 -3.044 0.002336 **
## incomeW4d_>$150,000 -0.93263 0.22374 -4.168 3.07e-05 ***
## drinkb_lite.drinker 0.11984 0.09700 1.235 0.216664
## drinkc_drinker -0.19891 0.11915 -1.669 0.095035 .
## smokeb_tried.smoking -0.18709 0.11619 -1.610 0.107369
## smokec_smoker -0.57351 0.08875 -6.462 1.03e-10 ***
## healthb_Bad.health 0.90387 0.09143 9.886 < 2e-16 ***
## mom.raceb_black 1.18418 0.47961 2.469 0.013547 *
## mom.racec_latino 0.33924 0.18076 1.877 0.060554 .
## mom.raced_asian -0.38318 0.53886 -0.711 0.477022
## mom_educb_HSgrad/voca 0.06032 0.10424 0.579 0.562810
## mom_educc_col.grad -0.11138 0.13999 -0.796 0.426234
## mom_empb_yes.emp -0.10517 0.08341 -1.261 0.207340
## good.neigh1 -0.01866 0.09874 -0.189 0.850113
## good.neigh2 0.11842 0.10290 1.151 0.249808
## good.neigh3 0.18117 0.10820 1.674 0.094051 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 6089.806 on 5951 degrees of freedom
##
## Log-likelihood: -3044.903 on 5951 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## agew4 sexXsexorientb_female.a_straight
## 1.0830170 0.7641427
## sexXsexorienta_male.b_bisexual sexXsexorientb_female.b_bisexual
## 0.0497372 1.0244959
## sexXsexorienta_male.c_LGB sexXsexorientb_female.c_LGB
## 3.2756697 0.5934732
## racethnicb-nhblack racethnicc-hispanic
## 0.3178208 1.8289914
## racethnicd-asian W4_educb_HS_grad
## 1.1659718 1.2558984
## W4_educd_college_bach.plus born_US_foreignb_born.foreign
## 0.7471177 1.1242969
## W4.marriedb_married_once W4.marriedc_married_twice+
## 0.8925086 0.9629715
## incomeW4b_$25,000<$75,000 incomeW4c_$75,000<$150,000
## 0.8805601 0.6858822
## incomeW4d_>$150,000 drinkb_lite.drinker
## 0.3935177 1.1273191
## drinkc_drinker smokeb_tried.smoking
## 0.8196239 0.8293702
## smokec_smoker healthb_Bad.health
## 0.5635418 2.4691384
## mom.raceb_black mom.racec_latino
## 3.2679950 1.4038846
## mom.raced_asian mom_educb_HSgrad/voca
## 0.6816886 1.0621756
## mom_educc_col.grad mom_empb_yes.emp
## 0.8945973 0.9001714
## good.neigh1 good.neigh2
## 0.9815132 1.1257218
## good.neigh3
## 1.1986224
fit8<-vglm(as.ordered(score.grp)~agew4+seXorientXrace+W4_educ+born_US_foreign+W4.married+incomeW4+drink+smoke+health+mom.race+mom_educ+mom_emp+good.neigh,addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm=T), family=cumulative(parallel = T, reverse = T))
summary(fit8)
##
## Call:
## vglm(formula = as.ordered(score.grp) ~ agew4 + seXorientXrace +
## W4_educ + born_US_foreign + W4.married + incomeW4 + drink +
## smoke + health + mom.race + mom_educ + mom_emp + good.neigh,
## family = cumulative(parallel = T, reverse = T), data = addhealth.pro,
## weights = GSWGT4_2/mean(GSWGT4_2, na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -4.513 -0.3732 0.2545 0.5708 3.569
## logitlink(P[Y>=3]) -2.213 -0.6093 -0.2614 0.5035 4.104
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept):1 -0.87301 0.61038 -1.430
## (Intercept):2 -2.87170 0.61269 -4.687
## agew4 0.07805 0.02036 3.834
## seXorientXraceb_female.a_straight.a-nhwhite -0.22812 0.08381 -2.722
## seXorientXracea_male.b_bisexual.a-nhwhite -3.02175 0.74993 -4.029
## seXorientXraceb_female.b_bisexual.a-nhwhite 0.07862 0.27877 0.282
## seXorientXracea_male.c_LGB.a-nhwhite 1.36581 0.37924 3.601
## seXorientXraceb_female.c_LGB.a-nhwhite -0.17254 0.36222 -0.476
## seXorientXracea_male.a_straight.b-nhblack -0.22109 0.55781 -0.396
## seXorientXraceb_female.a_straight.b-nhblack -1.04318 0.52016 -2.006
## seXorientXraceb_female.b_bisexual.b-nhblack -0.93997 1.01541 -0.926
## seXorientXracea_male.c_LGB.b-nhblack -2.02676 1.89857 -1.068
## seXorientXraceb_female.c_LGB.b-nhblack -3.44318 1.15193 NA
## seXorientXracea_male.a_straight.c-hispanic 0.62865 0.36721 1.712
## seXorientXraceb_female.a_straight.c-hispanic 0.39855 0.36773 1.084
## seXorientXracea_male.b_bisexual.c-hispanic 11.53747 445.23473 NA
## seXorientXraceb_female.c_LGB.c-hispanic -0.02460 8.13736 -0.003
## seXorientXracea_male.a_straight.d-asian -0.53935 0.87494 -0.616
## seXorientXraceb_female.a_straight.d-asian 0.56885 0.72644 0.783
## W4_educb_HS_grad 0.25315 0.12624 2.005
## W4_educd_college_bach.plus -0.27401 0.15435 -1.775
## born_US_foreignb_born.foreign 0.07609 0.39185 0.194
## W4.marriedb_married_once -0.09691 0.07876 -1.230
## W4.marriedc_married_twice+ -0.03093 0.14747 -0.210
## incomeW4b_$25,000<$75,000 -0.15725 0.09572 -1.643
## incomeW4c_$75,000<$150,000 -0.39744 0.12415 -3.201
## incomeW4d_>$150,000 -0.97750 0.22566 -4.332
## drinkb_lite.drinker 0.14755 0.09756 1.512
## drinkc_drinker -0.17585 0.11944 -1.472
## smokeb_tried.smoking -0.17815 0.11664 -1.527
## smokec_smoker -0.57263 0.08891 -6.441
## healthb_Bad.health 0.90407 0.09194 9.834
## mom.raceb_black 0.75649 0.51290 1.475
## mom.racec_latino 0.33204 0.18101 1.834
## mom.raced_asian -0.64102 0.55564 -1.154
## mom_educb_HSgrad/voca 0.05305 0.10473 0.507
## mom_educc_col.grad -0.12370 0.14043 -0.881
## mom_empb_yes.emp -0.12521 0.08372 -1.495
## good.neigh1 -0.02508 0.09895 -0.253
## good.neigh2 0.12919 0.10314 1.253
## good.neigh3 0.18979 0.10842 1.751
## Pr(>|z|)
## (Intercept):1 0.152636
## (Intercept):2 2.77e-06 ***
## agew4 0.000126 ***
## seXorientXraceb_female.a_straight.a-nhwhite 0.006490 **
## seXorientXracea_male.b_bisexual.a-nhwhite 5.59e-05 ***
## seXorientXraceb_female.b_bisexual.a-nhwhite 0.777935
## seXorientXracea_male.c_LGB.a-nhwhite 0.000317 ***
## seXorientXraceb_female.c_LGB.a-nhwhite 0.633825
## seXorientXracea_male.a_straight.b-nhblack 0.691841
## seXorientXraceb_female.a_straight.b-nhblack 0.044909 *
## seXorientXraceb_female.b_bisexual.b-nhblack 0.354598
## seXorientXracea_male.c_LGB.b-nhblack 0.285737
## seXorientXraceb_female.c_LGB.b-nhblack NA
## seXorientXracea_male.a_straight.c-hispanic 0.086903 .
## seXorientXraceb_female.a_straight.c-hispanic 0.278449
## seXorientXracea_male.b_bisexual.c-hispanic NA
## seXorientXraceb_female.c_LGB.c-hispanic 0.997588
## seXorientXracea_male.a_straight.d-asian 0.537604
## seXorientXraceb_female.a_straight.d-asian 0.433586
## W4_educb_HS_grad 0.044936 *
## W4_educd_college_bach.plus 0.075862 .
## born_US_foreignb_born.foreign 0.846031
## W4.marriedb_married_once 0.218538
## W4.marriedc_married_twice+ 0.833868
## incomeW4b_$25,000<$75,000 0.100407
## incomeW4c_$75,000<$150,000 0.001368 **
## incomeW4d_>$150,000 1.48e-05 ***
## drinkb_lite.drinker 0.130421
## drinkc_drinker 0.140939
## smokeb_tried.smoking 0.126677
## smokec_smoker 1.19e-10 ***
## healthb_Bad.health < 2e-16 ***
## mom.raceb_black 0.140228
## mom.racec_latino 0.066600 .
## mom.raced_asian 0.248637
## mom_educb_HSgrad/voca 0.612451
## mom_educc_col.grad 0.378386
## mom_empb_yes.emp 0.134791
## good.neigh1 0.799910
## good.neigh2 0.210337
## good.neigh3 0.080025 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 6073.927 on 5943 degrees of freedom
##
## Log-likelihood: -3036.964 on 5943 degrees of freedom
##
## Number of Fisher scoring iterations: 10
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## 'seXorientXraceb_female.c_LGB.b-nhblack', 'seXorientXracea_male.b_bisexual.c-hispanic'
##
##
## Exponentiated coefficients:
## agew4
## 1.081175e+00
## seXorientXraceb_female.a_straight.a-nhwhite
## 7.960265e-01
## seXorientXracea_male.b_bisexual.a-nhwhite
## 4.871612e-02
## seXorientXraceb_female.b_bisexual.a-nhwhite
## 1.081788e+00
## seXorientXracea_male.c_LGB.a-nhwhite
## 3.918895e+00
## seXorientXraceb_female.c_LGB.a-nhwhite
## 8.415229e-01
## seXorientXracea_male.a_straight.b-nhblack
## 8.016414e-01
## seXorientXraceb_female.a_straight.b-nhblack
## 3.523330e-01
## seXorientXraceb_female.b_bisexual.b-nhblack
## 3.906382e-01
## seXorientXracea_male.c_LGB.b-nhblack
## 1.317612e-01
## seXorientXraceb_female.c_LGB.b-nhblack
## 3.196273e-02
## seXorientXracea_male.a_straight.c-hispanic
## 1.875085e+00
## seXorientXraceb_female.a_straight.c-hispanic
## 1.489657e+00
## seXorientXracea_male.b_bisexual.c-hispanic
## 1.024852e+05
## seXorientXraceb_female.c_LGB.c-hispanic
## 9.756970e-01
## seXorientXracea_male.a_straight.d-asian
## 5.831271e-01
## seXorientXraceb_female.a_straight.d-asian
## 1.766235e+00
## W4_educb_HS_grad
## 1.288079e+00
## W4_educd_college_bach.plus
## 7.603254e-01
## born_US_foreignb_born.foreign
## 1.079062e+00
## W4.marriedb_married_once
## 9.076421e-01
## W4.marriedc_married_twice+
## 9.695431e-01
## incomeW4b_$25,000<$75,000
## 8.544909e-01
## incomeW4c_$75,000<$150,000
## 6.720356e-01
## incomeW4d_>$150,000
## 3.762513e-01
## drinkb_lite.drinker
## 1.158989e+00
## drinkc_drinker
## 8.387468e-01
## smokeb_tried.smoking
## 8.368194e-01
## smokec_smoker
## 5.640380e-01
## healthb_Bad.health
## 2.469639e+00
## mom.raceb_black
## 2.130791e+00
## mom.racec_latino
## 1.393804e+00
## mom.raced_asian
## 5.267549e-01
## mom_educb_HSgrad/voca
## 1.054487e+00
## mom_educc_col.grad
## 8.836417e-01
## mom_empb_yes.emp
## 8.823155e-01
## good.neigh1
## 9.752316e-01
## good.neigh2
## 1.137912e+00
## good.neigh3
## 1.209001e+00
AIC(fit)
## [1] 6437.869
AIC(fit2)
## [1] 6378.434
AIC(fit3)
## [1] 6344.984
AIC(fit4)
## [1] 6189.602
AIC(fit5)
## [1] 6182.315
#race x sexorient
AIC(fit6)
## [1] 6178.857
#sex X sexorient
AIC(fit7)
## [1] 6155.806
#sex x sexorient x race
AIC(fit8)
## [1] 6155.927
#fit<-svyolr(score.grp~sex+racethnic+sexorient,des)
#summary(fit)
#library(VGAM)
#Proportional odds
#fit.vgam<-vglm(as.ordered(score.grp)~sex+racethnic+sexorient+born_US_foreign+W4_educ+incomeW4,addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm=T), family=cumulative(parallel = T, reverse = T)) #<-parallel = T == proportional odds
#summary(fit.vgam)
#continuation Ratio
#fit.crp<-vglm(as.ordered(score.grp)~sex+racethnic,addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm=T), family=cratio( parallel = F, reverse = T))
#Non-proportional odds
#fit.vgam2<-vglm(as.ordered(score.grp)~sex+racethnic+sexorient+W4_UScitizenship+W4_educ+incomeW4,addhealth.pro, weights = GSWGT4_2/mean(GSWGT4_2, na.rm=T), family=cumulative(parallel = F, reverse = T)) #<-parallel = F == Nonproportional odds
is.parallel(fit5)
## (Intercept) agew4 sex racethnic sexorient
## FALSE TRUE TRUE TRUE TRUE
## W4_educ born_US_foreign W4.married incomeW4 drink
## TRUE TRUE TRUE TRUE TRUE
## smoke health mom.race mom_educ mom_emp
## TRUE TRUE TRUE TRUE TRUE
## good.neigh
## TRUE
table(addhealth.pro$raceXsexorient)
##
## a-nhwhite.a_straight b-nhblack.a_straight c-hispanic.a_straight
## 2318 381 99
## d-asian.a_straight a-nhwhite.b_bisexual b-nhblack.b_bisexual
## 41 61 7
## c-hispanic.b_bisexual d-asian.b_bisexual a-nhwhite.c_LGB
## 2 0 61
## b-nhblack.c_LGB c-hispanic.c_LGB d-asian.c_LGB
## 21 1 0
table(addhealth.pro$raceXsexorient,addhealth.pro$W4_educ)
##
## a_less_highschool b_HS_grad d_college_bach.plus
## a-nhwhite.a_straight 213 1588 517
## b-nhblack.a_straight 24 246 111
## c-hispanic.a_straight 10 70 19
## d-asian.a_straight 2 13 26
## a-nhwhite.b_bisexual 17 26 18
## b-nhblack.b_bisexual 1 6 0
## c-hispanic.b_bisexual 0 2 0
## d-asian.b_bisexual 0 0 0
## a-nhwhite.c_LGB 2 48 11
## b-nhblack.c_LGB 0 21 0
## c-hispanic.c_LGB 0 0 1
## d-asian.c_LGB 0 0 0
table(addhealth.pro$raceXsexorient,addhealth.pro$sex)
##
## a_male b_female
## a-nhwhite.a_straight 674 1644
## b-nhblack.a_straight 79 302
## c-hispanic.a_straight 37 62
## d-asian.a_straight 16 25
## a-nhwhite.b_bisexual 8 53
## b-nhblack.b_bisexual 0 7
## c-hispanic.b_bisexual 2 0
## d-asian.b_bisexual 0 0
## a-nhwhite.c_LGB 30 31
## b-nhblack.c_LGB 7 14
## c-hispanic.c_LGB 0 1
## d-asian.c_LGB 0 0