For this assignment, I will be fitting a cox regression model to the ELS:2002 dataset, in an attempt to answer the following question:
To this end, I have chosen a set of independent variables that I hypothesize will effect the dependent variable (entry into community college out of High School). I will run a cox regression on these variables to determine which increases the hazard of community college entry the most.
The variables are as follows:
Here, I use dplyr to create the variables that we are interested in:
#standardizing variable names
colnames(els)<-toupper(colnames(els))
#college entry variables
entry_variables<-els %>% select(STU_ID,SCH_ID,STRAT_ID,PSU,F2HS2P_P,F2PS1SEC,F3HS2PS1,F3PS1SEC,BYQXDATP,F1RDTLFT,BYSES1QU,F3QWT,BYP33,BYSEX,BYRACE,BYPARED,BYGPARED)
entry_variables<-entry_variables %>% mutate(F2_months_to_college=ifelse(F2HS2P_P<0,NA_real_,F2HS2P_P))
entry_variables<-entry_variables %>% mutate(F3_months_to_college=ifelse(F3HS2PS1<0,NA_real_,F3HS2PS1))
entry_variables<-entry_variables %>% mutate(F2_first_college_2_year=ifelse(F2PS1SEC==4,1,
ifelse(F2PS1SEC<0,NA_real_,
ifelse(F2PS1SEC<!0 & F2PS1SEC!=4,0,0))))
entry_variables<-entry_variables %>% mutate(F3_first_college_2_year=ifelse(F3PS1SEC==4,1,
ifelse(F3PS1SEC<0,NA_real_,
ifelse(F3PS1SEC<!0 & F3PS1SEC!=4,0,0))))
entry_variables_complete<-entry_variables %>% filter(!is.na(F3_first_college_2_year))
entry_variables_complete<-entry_variables_complete %>% mutate(TV_ENTERTAIN_HRS=ifelse(BYP33<0,NA,
ifelse(BYP33>=0 & BYP33<=3,0,
ifelse(BYP33>=4 & BYP33<=10,1,NA))))
entry_variables_complete<-entry_variables_complete %>% mutate(sex=ifelse(BYSEX==1,"Male",
ifelse(BYSEX==2,"Female",
ifelse(BYSEX<0,NA,NA))))
entry_variables_complete<-entry_variables_complete %>% mutate(parents_highest_ed=ifelse(BYPARED==1,"Did.Not.Graduate.HS",
ifelse(BYPARED==2,"HS.Grad.or.GED",
ifelse(BYPARED==3,"2.Year.School.No.Deg",
ifelse(BYPARED==4,"2.Year.School.Awarded.Deg",
ifelse(BYPARED==5,"4.Year.School.No.Deg",
ifelse(BYPARED==6,"4.Year.School.Awarded.Deg",
ifelse(BYPARED==7,"Masters.Degree",
ifelse(BYPARED==8,"PhD.or.other.doctorate",
ifelse(BYPARED<0,NA,NA))))))))))
entry_variables_complete<-entry_variables_complete %>% mutate(PARENT_HS_OR_LOWER=ifelse(parents_highest_ed=="Did.Not.Graduate.HS" | parents_highest_ed=="HS.Grad.or.GED",1,0))
entry_variables_complete<-entry_variables_complete %>% mutate(F3_UPDATED_MONTHS_TO_COLLEGE=ifelse(is.na(F3_months_to_college),21,F3_months_to_college))
entry_variables_complete<-entry_variables_complete %>% mutate(BY_LOW_SES=ifelse(BYSES1QU==1,1,
ifelse(BYSES1QU==2,1,
ifelse(BYSES1QU==3,0,
ifelse(BYSES1QU==4,0,
ifelse(BYSES1QU<0,NA_real_,0))))))
entry_variables_complete<-entry_variables_complete %>% mutate(race=ifelse(BYRACE==1,"Am.Indian",
ifelse(BYRACE==2,"Asian/Pacific",
ifelse(BYRACE==3,"Afr.American",
ifelse(BYRACE==4,"Hispanic/NoRace",
ifelse(BYRACE==5,"Hispanic/withRace",
ifelse(BYRACE==6,"2orMoreRace",
ifelse(BYRACE==7,"White",NA))))))))
entry_variables_complete<-entry_variables_complete %>% mutate(grandparents_highest_ed=ifelse(BYGPARED==1,"Did.Not.Graduate.HS",
ifelse(BYGPARED==2,"HS.Grad.or.GED",
ifelse(BYGPARED==3,"2.Year.School.No.Deg",
ifelse(BYGPARED==4,"2.Year.School.Awarded.Deg",
ifelse(BYGPARED==5,"4.Year.School.No.Deg",
ifelse(BYGPARED==6,"4.Year.School.Awarded.Deg",
ifelse(BYGPARED==7,"Masters.Degree",
ifelse(BYGPARED==8,"PhD.or.other.doctorate",
ifelse(BYGPARED<0,NA,NA))))))))))
entry_variables_complete<-entry_variables_complete %>% filter(!is.na(grandparents_highest_ed) & !is.na(race) & !is.na(PARENT_HS_OR_LOWER) & !is.na(sex))
entry_var_clean<-entry_variables_complete %>% select(STU_ID,STRAT_ID,PSU,F3_first_college_2_year,F3_UPDATED_MONTHS_TO_COLLEGE,sex,parents_highest_ed,grandparents_highest_ed,PARENT_HS_OR_LOWER,BY_LOW_SES,race,F3QWT)
entry_var_clean$sex<-as.factor(entry_var_clean$sex)
entry_var_clean$parents_highest_ed<-as.factor(entry_var_clean$parents_highest_ed)
entry_var_clean$grandparents_highest_ed<-as.factor(entry_var_clean$grandparents_highest_ed)
entry_var_clean$race<-as.factor(entry_var_clean$race)
# relevel factors
entry_var_clean$sex<-relevel(entry_var_clean$sex,ref = "Male")
entry_var_clean$parents_highest_ed<-relevel(entry_var_clean$parents_highest_ed,ref="HS.Grad.or.GED")
entry_var_clean$grandparents_highest_ed<-relevel(entry_var_clean$grandparents_highest_ed,ref="HS.Grad.or.GED")
entry_var_clean$race<-relevel(entry_var_clean$race,ref="White")
For the first part of this assignment, I'll create a cox regression model without the survey design included in the ELS:2002 data.
fit.cox<-coxreg(Surv(time=F3_UPDATED_MONTHS_TO_COLLEGE,event = F3_first_college_2_year)~BY_LOW_SES+sex+parents_highest_ed+grandparents_highest_ed+race,data=entry_var_clean)
summary(fit.cox)
## Call:
## coxreg(formula = Surv(time = F3_UPDATED_MONTHS_TO_COLLEGE, event = F3_first_college_2_year) ~
## BY_LOW_SES + sex + parents_highest_ed + grandparents_highest_ed +
## race, data = entry_var_clean)
##
## Covariate Mean Coef Rel.Risk S.E. Wald p
## BY_LOW_SES 0.465 0.092 1.096 0.051 0.073
## sex
## Male 0.480 0 1 (reference)
## Female 0.520 0.025 1.025 0.039 0.515
## parents_highest_ed
## HS.Grad.or.GED 0.177 0 1 (reference)
## 2.Year.School.Aw 0.123 0.182 1.200 0.069 0.009
## 2.Year.School.No 0.122 0.079 1.082 0.070 0.260
## 4.Year.School.Aw 0.222 -0.092 0.912 0.073 0.206
## 4.Year.School.No 0.132 -0.012 0.988 0.073 0.872
## Did.Not.Graduate 0.048 -0.041 0.960 0.098 0.674
## Masters.Degree 0.111 -0.243 0.784 0.091 0.007
## PhD.or.other.doc 0.064 -0.551 0.576 0.121 0.000
## grandparents_highest_ed
## HS.Grad.or.GED 0.372 0 1 (reference)
## 2.Year.School.Aw 0.070 -0.085 0.919 0.079 0.285
## 2.Year.School.No 0.055 0.004 1.004 0.086 0.967
## 4.Year.School.Aw 0.154 -0.162 0.850 0.063 0.009
## 4.Year.School.No 0.049 -0.160 0.852 0.097 0.098
## Did.Not.Graduate 0.181 -0.054 0.948 0.058 0.357
## Masters.Degree 0.071 -0.223 0.800 0.087 0.010
## PhD.or.other.doc 0.048 -0.237 0.789 0.112 0.035
## race
## White 0.592 0 1 (reference)
## 2orMoreRace 0.049 -0.065 0.937 0.094 0.491
## Afr.American 0.132 -0.221 0.802 0.065 0.001
## Am.Indian 0.009 -0.226 0.798 0.226 0.317
## Asian/Pacific 0.062 0.261 1.298 0.076 0.001
## Hispanic/NoRace 0.071 0.198 1.219 0.075 0.008
## Hispanic/withRac 0.085 0.134 1.143 0.070 0.056
##
## Events 2723
## Total time at risk 48924
## Max. log. likelihood -21878
## LR test statistic 168.05
## Degrees of freedom 22
## Overall p-value 0
From this model, we see that Low SES is at the very edge of significance, while sex does not appear to be significant at all. African Americans are about twenty percent less likely to enter community college after high school than a white reference student, while Asian and Hispanic students are about thirty and twenty-two percent more likely to attend community college out of high school (respectively).
Parents and grandparents education level is also associated with the risk of attending community college after high school. Children of parents who attended community college and got an award from that college were twenty percent more likely to attend community college out of high school than children of parents who only graduated high school (the reference group).
On the other hand, children of parents who have a master's degree or higher are much less likely to attend community college out of high school (twenty-two percent and forty-three percent, respectively).
Now I will run the same analysis, but with the survey design included in the ELS:2002 dataset:
#now with survey design
cox.des<-svydesign(ids=~STU_ID,strata = ~STRAT_ID,weights = ~F3QWT,data=entry_var_clean,nest=T)
fit.cox.surv<-svycoxph(Surv(time=F3_UPDATED_MONTHS_TO_COLLEGE,event=F3_first_college_2_year)~BY_LOW_SES+parents_highest_ed+grandparents_highest_ed+race+sex,design=cox.des)
summary(fit.cox.surv)
## Stratified Independent Sampling design (with replacement)
## svydesign(ids = ~STU_ID, strata = ~STRAT_ID, weights = ~F3QWT,
## data = entry_var_clean, nest = T)
## Call:
## svycoxph(formula = Surv(time = F3_UPDATED_MONTHS_TO_COLLEGE,
## event = F3_first_college_2_year) ~ BY_LOW_SES + parents_highest_ed +
## grandparents_highest_ed + race + sex, design = cox.des)
##
## n= 8692, number of events= 2723
##
## coef exp(coef) se(coef)
## BY_LOW_SES 0.057566 1.059255 0.047726
## parents_highest_ed2.Year.School.Awarded.Deg 0.143316 1.154094 0.063726
## parents_highest_ed2.Year.School.No.Deg 0.091500 1.095817 0.063668
## parents_highest_ed4.Year.School.Awarded.Deg -0.114875 0.891478 0.068016
## parents_highest_ed4.Year.School.No.Deg -0.013639 0.986453 0.067622
## parents_highest_edDid.Not.Graduate.HS -0.008439 0.991596 0.089173
## parents_highest_edMasters.Degree -0.195622 0.822323 0.086837
## parents_highest_edPhD.or.other.doctorate -0.436102 0.646552 0.121939
## grandparents_highest_ed2.Year.School.Awarded.Deg -0.151738 0.859214 0.074633
## grandparents_highest_ed2.Year.School.No.Deg -0.006071 0.993947 0.078877
## grandparents_highest_ed4.Year.School.Awarded.Deg -0.204067 0.815408 0.059962
## grandparents_highest_ed4.Year.School.No.Deg -0.183212 0.832592 0.095133
## grandparents_highest_edDid.Not.Graduate.HS -0.075278 0.927485 0.054479
## grandparents_highest_edMasters.Degree -0.312164 0.731861 0.084456
## grandparents_highest_edPhD.or.other.doctorate -0.140688 0.868761 0.106693
## race2orMoreRace -0.191099 0.826051 0.097445
## raceAfr.American -0.206223 0.813652 0.059259
## raceAm.Indian -0.264076 0.767915 0.202073
## raceAsian/Pacific 0.146274 1.157514 0.111296
## raceHispanic/NoRace 0.146389 1.157646 0.070106
## raceHispanic/withRace 0.162788 1.176787 0.063769
## sexFemale 0.071644 1.074273 0.036538
## robust se z Pr(>|z|)
## BY_LOW_SES 0.058986 0.976 0.32910
## parents_highest_ed2.Year.School.Awarded.Deg 0.081248 1.764 0.07774 .
## parents_highest_ed2.Year.School.No.Deg 0.080687 1.134 0.25679
## parents_highest_ed4.Year.School.Awarded.Deg 0.085864 -1.338 0.18094
## parents_highest_ed4.Year.School.No.Deg 0.085568 -0.159 0.87335
## parents_highest_edDid.Not.Graduate.HS 0.120597 -0.070 0.94421
## parents_highest_edMasters.Degree 0.107941 -1.812 0.06994 .
## parents_highest_edPhD.or.other.doctorate 0.142998 -3.050 0.00229 **
## grandparents_highest_ed2.Year.School.Awarded.Deg 0.092996 -1.632 0.10275
## grandparents_highest_ed2.Year.School.No.Deg 0.100456 -0.060 0.95181
## grandparents_highest_ed4.Year.School.Awarded.Deg 0.072395 -2.819 0.00482 **
## grandparents_highest_ed4.Year.School.No.Deg 0.114054 -1.606 0.10819
## grandparents_highest_edDid.Not.Graduate.HS 0.067392 -1.117 0.26398
## grandparents_highest_edMasters.Degree 0.102768 -3.038 0.00239 **
## grandparents_highest_edPhD.or.other.doctorate 0.139928 -1.005 0.31469
## race2orMoreRace 0.118253 -1.616 0.10609
## raceAfr.American 0.071698 -2.876 0.00402 **
## raceAm.Indian 0.235593 -1.121 0.26233
## raceAsian/Pacific 0.094418 1.549 0.12133
## raceHispanic/NoRace 0.091245 1.604 0.10864
## raceHispanic/withRace 0.083093 1.959 0.05010 .
## sexFemale 0.045328 1.581 0.11398
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95
## BY_LOW_SES 1.0593 0.9441 0.9436
## parents_highest_ed2.Year.School.Awarded.Deg 1.1541 0.8665 0.9842
## parents_highest_ed2.Year.School.No.Deg 1.0958 0.9126 0.9355
## parents_highest_ed4.Year.School.Awarded.Deg 0.8915 1.1217 0.7534
## parents_highest_ed4.Year.School.No.Deg 0.9865 1.0137 0.8341
## parents_highest_edDid.Not.Graduate.HS 0.9916 1.0085 0.7829
## parents_highest_edMasters.Degree 0.8223 1.2161 0.6655
## parents_highest_edPhD.or.other.doctorate 0.6466 1.5467 0.4885
## grandparents_highest_ed2.Year.School.Awarded.Deg 0.8592 1.1639 0.7160
## grandparents_highest_ed2.Year.School.No.Deg 0.9939 1.0061 0.8163
## grandparents_highest_ed4.Year.School.Awarded.Deg 0.8154 1.2264 0.7075
## grandparents_highest_ed4.Year.School.No.Deg 0.8326 1.2011 0.6658
## grandparents_highest_edDid.Not.Graduate.HS 0.9275 1.0782 0.8127
## grandparents_highest_edMasters.Degree 0.7319 1.3664 0.5983
## grandparents_highest_edPhD.or.other.doctorate 0.8688 1.1511 0.6604
## race2orMoreRace 0.8261 1.2106 0.6552
## raceAfr.American 0.8137 1.2290 0.7070
## raceAm.Indian 0.7679 1.3022 0.4839
## raceAsian/Pacific 1.1575 0.8639 0.9620
## raceHispanic/NoRace 1.1576 0.8638 0.9681
## raceHispanic/withRace 1.1768 0.8498 0.9999
## sexFemale 1.0743 0.9309 0.9829
## upper .95
## BY_LOW_SES 1.1891
## parents_highest_ed2.Year.School.Awarded.Deg 1.3533
## parents_highest_ed2.Year.School.No.Deg 1.2836
## parents_highest_ed4.Year.School.Awarded.Deg 1.0549
## parents_highest_ed4.Year.School.No.Deg 1.1666
## parents_highest_edDid.Not.Graduate.HS 1.2560
## parents_highest_edMasters.Degree 1.0161
## parents_highest_edPhD.or.other.doctorate 0.8557
## grandparents_highest_ed2.Year.School.Awarded.Deg 1.0310
## grandparents_highest_ed2.Year.School.No.Deg 1.2102
## grandparents_highest_ed4.Year.School.Awarded.Deg 0.9397
## grandparents_highest_ed4.Year.School.No.Deg 1.0412
## grandparents_highest_edDid.Not.Graduate.HS 1.0585
## grandparents_highest_edMasters.Degree 0.8952
## grandparents_highest_edPhD.or.other.doctorate 1.1429
## race2orMoreRace 1.0415
## raceAfr.American 0.9364
## raceAm.Indian 1.2186
## raceAsian/Pacific 1.3928
## raceHispanic/NoRace 1.3843
## raceHispanic/withRace 1.3849
## sexFemale 1.1741
##
## Concordance= 0.59 (se = 0.008 )
## Likelihood ratio test= NA on 22 df, p=NA
## Wald test = 95.76 on 22 df, p=4e-11
## Score (logrank) test = NA on 22 df, p=NA
##
## (Note: the likelihood ratio and score tests assume independence of
## observations within a cluster, the Wald and robust score tests do not).
As you can see, the results above are broadly similar to the main effects without the survey design. There are some important differences, however:
Contrary to the results above, I still believe that sex and SES may play a role in community college entry after high school. To test this hypothesis, I will need to introduce a SEX*SES interaction term into the model, as performed below:
#now with interaction term
fit.cox.surv2<-svycoxph(Surv(time=F3_UPDATED_MONTHS_TO_COLLEGE,event=F3_first_college_2_year)~BY_LOW_SES+parents_highest_ed+grandparents_highest_ed+race+sex+(sex*BY_LOW_SES),design=cox.des)
summary(fit.cox.surv2)
## Stratified Independent Sampling design (with replacement)
## svydesign(ids = ~STU_ID, strata = ~STRAT_ID, weights = ~F3QWT,
## data = entry_var_clean, nest = T)
## Call:
## svycoxph(formula = Surv(time = F3_UPDATED_MONTHS_TO_COLLEGE,
## event = F3_first_college_2_year) ~ BY_LOW_SES + parents_highest_ed +
## grandparents_highest_ed + race + sex + (sex * BY_LOW_SES),
## design = cox.des)
##
## n= 8692, number of events= 2723
##
## coef exp(coef) se(coef)
## BY_LOW_SES 0.022376 1.022629 0.060880
## parents_highest_ed2.Year.School.Awarded.Deg 0.144442 1.155395 0.063754
## parents_highest_ed2.Year.School.No.Deg 0.092880 1.097330 0.063682
## parents_highest_ed4.Year.School.Awarded.Deg -0.114585 0.891736 0.068017
## parents_highest_ed4.Year.School.No.Deg -0.012728 0.987353 0.067641
## parents_highest_edDid.Not.Graduate.HS -0.008807 0.991232 0.089178
## parents_highest_edMasters.Degree -0.195876 0.822114 0.086850
## parents_highest_edPhD.or.other.doctorate -0.436381 0.646372 0.121944
## grandparents_highest_ed2.Year.School.Awarded.Deg -0.147770 0.862629 0.074762
## grandparents_highest_ed2.Year.School.No.Deg -0.006720 0.993303 0.078878
## grandparents_highest_ed4.Year.School.Awarded.Deg -0.202528 0.816663 0.059996
## grandparents_highest_ed4.Year.School.No.Deg -0.182744 0.832981 0.095136
## grandparents_highest_edDid.Not.Graduate.HS -0.075469 0.927309 0.054473
## grandparents_highest_edMasters.Degree -0.310963 0.732741 0.084472
## grandparents_highest_edPhD.or.other.doctorate -0.137887 0.871197 0.106750
## race2orMoreRace -0.191058 0.826085 0.097443
## raceAfr.American -0.205309 0.814396 0.059271
## raceAm.Indian -0.266780 0.765842 0.202106
## raceAsian/Pacific 0.146013 1.157211 0.111290
## raceHispanic/NoRace 0.147741 1.159213 0.070113
## raceHispanic/withRace 0.163123 1.177182 0.063770
## sexFemale 0.035803 1.036452 0.053083
## BY_LOW_SES:sexFemale 0.067992 1.070356 0.073031
## robust se z Pr(>|z|)
## BY_LOW_SES 0.076054 0.294 0.76859
## parents_highest_ed2.Year.School.Awarded.Deg 0.081292 1.777 0.07560 .
## parents_highest_ed2.Year.School.No.Deg 0.080603 1.152 0.24919
## parents_highest_ed4.Year.School.Awarded.Deg 0.085832 -1.335 0.18188
## parents_highest_ed4.Year.School.No.Deg 0.085563 -0.149 0.88175
## parents_highest_edDid.Not.Graduate.HS 0.120746 -0.073 0.94185
## parents_highest_edMasters.Degree 0.108025 -1.813 0.06979 .
## parents_highest_edPhD.or.other.doctorate 0.142926 -3.053 0.00226 **
## grandparents_highest_ed2.Year.School.Awarded.Deg 0.093092 -1.587 0.11243
## grandparents_highest_ed2.Year.School.No.Deg 0.100702 -0.067 0.94680
## grandparents_highest_ed4.Year.School.Awarded.Deg 0.072367 -2.799 0.00513 **
## grandparents_highest_ed4.Year.School.No.Deg 0.113988 -1.603 0.10889
## grandparents_highest_edDid.Not.Graduate.HS 0.067418 -1.119 0.26296
## grandparents_highest_edMasters.Degree 0.102789 -3.025 0.00248 **
## grandparents_highest_edPhD.or.other.doctorate 0.139637 -0.987 0.32341
## race2orMoreRace 0.118153 -1.617 0.10587
## raceAfr.American 0.071606 -2.867 0.00414 **
## raceAm.Indian 0.235598 -1.132 0.25749
## raceAsian/Pacific 0.094384 1.547 0.12186
## raceHispanic/NoRace 0.091404 1.616 0.10602
## raceHispanic/withRace 0.083234 1.960 0.05002 .
## sexFemale 0.064106 0.558 0.57650
## BY_LOW_SES:sexFemale 0.090243 0.753 0.45119
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95
## BY_LOW_SES 1.0226 0.9779 0.8810
## parents_highest_ed2.Year.School.Awarded.Deg 1.1554 0.8655 0.9852
## parents_highest_ed2.Year.School.No.Deg 1.0973 0.9113 0.9370
## parents_highest_ed4.Year.School.Awarded.Deg 0.8917 1.1214 0.7537
## parents_highest_ed4.Year.School.No.Deg 0.9874 1.0128 0.8349
## parents_highest_edDid.Not.Graduate.HS 0.9912 1.0088 0.7823
## parents_highest_edMasters.Degree 0.8221 1.2164 0.6652
## parents_highest_edPhD.or.other.doctorate 0.6464 1.5471 0.4885
## grandparents_highest_ed2.Year.School.Awarded.Deg 0.8626 1.1592 0.7188
## grandparents_highest_ed2.Year.School.No.Deg 0.9933 1.0067 0.8154
## grandparents_highest_ed4.Year.School.Awarded.Deg 0.8167 1.2245 0.7087
## grandparents_highest_ed4.Year.School.No.Deg 0.8330 1.2005 0.6662
## grandparents_highest_edDid.Not.Graduate.HS 0.9273 1.0784 0.8125
## grandparents_highest_edMasters.Degree 0.7327 1.3647 0.5990
## grandparents_highest_edPhD.or.other.doctorate 0.8712 1.1478 0.6626
## race2orMoreRace 0.8261 1.2105 0.6553
## raceAfr.American 0.8144 1.2279 0.7078
## raceAm.Indian 0.7658 1.3058 0.4826
## raceAsian/Pacific 1.1572 0.8641 0.9618
## raceHispanic/NoRace 1.1592 0.8627 0.9691
## raceHispanic/withRace 1.1772 0.8495 1.0000
## sexFemale 1.0365 0.9648 0.9141
## BY_LOW_SES:sexFemale 1.0704 0.9343 0.8968
## upper .95
## BY_LOW_SES 1.1870
## parents_highest_ed2.Year.School.Awarded.Deg 1.3550
## parents_highest_ed2.Year.School.No.Deg 1.2851
## parents_highest_ed4.Year.School.Awarded.Deg 1.0551
## parents_highest_ed4.Year.School.No.Deg 1.1676
## parents_highest_edDid.Not.Graduate.HS 1.2559
## parents_highest_edMasters.Degree 1.0160
## parents_highest_edPhD.or.other.doctorate 0.8553
## grandparents_highest_ed2.Year.School.Awarded.Deg 1.0353
## grandparents_highest_ed2.Year.School.No.Deg 1.2100
## grandparents_highest_ed4.Year.School.Awarded.Deg 0.9411
## grandparents_highest_ed4.Year.School.No.Deg 1.0415
## grandparents_highest_edDid.Not.Graduate.HS 1.0583
## grandparents_highest_edMasters.Degree 0.8963
## grandparents_highest_edPhD.or.other.doctorate 1.1454
## race2orMoreRace 1.0414
## raceAfr.American 0.9371
## raceAm.Indian 1.2153
## raceAsian/Pacific 1.3924
## raceHispanic/NoRace 1.3866
## raceHispanic/withRace 1.3858
## sexFemale 1.1752
## BY_LOW_SES:sexFemale 1.2774
##
## Concordance= 0.59 (se = 0.008 )
## Likelihood ratio test= NA on 23 df, p=NA
## Wald test = 96.29 on 23 df, p=6e-11
## Score (logrank) test = NA on 23 df, p=NA
##
## (Note: the likelihood ratio and score tests assume independence of
## observations within a cluster, the Wald and robust score tests do not).
Unfortunately, we see that the interaction between sex and Low SES is not significant, and further more the main effects of the model are largely unchanged.
Here, we see our standard plots (survival, cumulative hazard and smoothed hazard) for the model that we have creative above:
plot(survfit(fit.cox.surv,conf.int = F),xlab = "Months",ylab = "S(t)")
title(main=c("Survival Plot for CC Entry - Mean"))
surv1<-survfit(fit.cox.surv)
Hi<-log(surv1$surv)*-1
times<-surv1$time
plot(Hi~times,type="s",ylab="H(t)",xlab="Months after HS",main="Cumulative Hazard Plot for CC Entry")
axis(1,at=c(0,1,2))
hsi<-diff(c(0,Hi))
plot(hsi~times,type="l",ylab="smoothed h(t)",xlab="Months after HS",main="Hazard Plot for CC Entry")
axis(1,at=c(0,1,2))
Here I've taken the data from our survey design model and created plots which reflect different risk profiles associated with community college entry that are found in the ELS:2002 data:
The primary difference between these two risk profiles is expected to be the differences between Hispanic community college entrance (higher than reference) and African American community college entrance (lower than reference).
Here, we see the typical survival fit for all three risk profiles:
plot(survfit(fit.cox.surv,conf.int = F))
lines(survfit(fit.cox.surv,newdata = data.frame(race="Hispanic/withRace",BY_LOW_SES=1,parents_highest_ed="2.Year.School.Awarded.Deg",grandparents_highest_ed="2.Year.School.Awarded.Deg",sex="Male"),conf.int = F),col="red",lty=1)
lines(survfit(fit.cox.surv,newdata = data.frame(race="Afr.American",BY_LOW_SES=1,parents_highest_ed="2.Year.School.Awarded.Deg",grandparents_highest_ed="2.Year.School.Awarded.Deg",sex="Male"),conf.int = F),col="blue",lty=2)
title(main=c("Survival function for CC Entry: Mean vs. Hispanic vs. Af. Am","Low SES, Parents/Grandparents 2 Year Grad"))
legend("bottomleft",legend=c("mean","Hispanic/withRace, Low SES, Parents/Gparents CC Award,Male","AfricanAmerican, Low SES, Parents/Gparents CC Award,Male"),col=c(1,"red","blue"),lty=c(1,1,2))
As expected, Hispanics are a higher risk of community college attendence after high school compared to the mean, while African Americans are at less risk overall.
This same general pattern is repeated with the cumulative hazard function:
chz1<-survfit(fit.cox.surv)
chz2<-survfit(fit.cox.surv,newdata = data.frame(race="Hispanic/withRace",BY_LOW_SES=1,parents_highest_ed="2.Year.School.Awarded.Deg",grandparents_highest_ed="2.Year.School.Awarded.Deg",sex="Male"))
chz3<-survfit(fit.cox.surv,newdata=data.frame(race="Afr.American",BY_LOW_SES=1,parents_highest_ed="2.Year.School.Awarded.Deg",grandparents_highest_ed="2.Year.School.Awarded.Deg",sex="Male"))
chz_a<-log(chz1$surv)*-1
chz_b<-log(chz2$surv)*-1
chz_c<-log(chz3$surv)*-1
plot(chz_a~times,type="s",ylab="H(t)",xlab="Months")
title(main="Cumulative Hazard Plot")
lines(chz_b~times,col="red",type="s")
lines(chz_c~times,col="blue",type="s")
legend("topright",legend=c("Means of Covariates","Hispanic/Low SES/Parents-Gparents CC Award,Male","Afr.American/Low SES/Parent-Gparents CC Award,Male"),lty=1,col=c(1,"red","blue"))
Finally, I've created a smoothed hazard function, which also replicates the Hispanic and African American risk profiles as compared to the referent:
hz1<-loess(diff(c(0,chz_a))~times,degree=1,span=.25)
hz2<-loess(diff(c(0,chz_b))~times,degree=1,span=.25)
hz3<-loess(diff(c(0,chz_c))~times,degree=1,span=.25)
plot(predict(hz1)~times,type="l",ylab="smoothed h(t)",xlab="Months")
title(main="Smoothed hazard plots")
lines(predict(hz2)~times,type="l",col="red")
lines(predict(hz3)~times,type="l",col="blue")
legend("topright",legend=c("Means of Covariates","Hispanic/Low SES/Parents-Gparents CC Award,Male","Afr.American/Low SES/Parent-Gparents CC Award,Male"),lty=1,col=c(1,"red","blue"))
As you can see, the smoothed hazard fit largely replicates the differences between the two main risk profiles and the referent.
From this exercise we've learned that there are real differences in the risk profiles of Hispanics and African Americans in terms of community college entry immediately after high school. In addition, while the highest education level of the parent seems to be either not significant or barely so, while the grandparent's highest education level seems to predict entry into community college after high school (in general, the higher the grandparents education, the less the hazard of entry into community college).
This is an interesting finding as it suggests distant family traditions of university graduation may adversely affect community college entry. The mechanism and ramifications of this finding merit further exploration.