colnames(els)<-toupper(colnames(els))
#college entry variables
entry_variables<-els %>% select(STU_ID,SCH_ID,STRAT_ID,PSU,F2HS2P_P,F2PS1SEC,F3HS2PS1,F3PS1SEC,BYQXDATP,F1RDTLFT,BYSES1QU,F3QWT,BYP33,BYSEX,BYRACE,BYPARED,BYGPARED,F1RGPP2,BYMATHSE,BYSTEXP,BYPARASP)
entry_variables<-entry_variables %>% mutate(PARENT_EXPECTATION=ifelse(BYPARASP==1,"Will.Not.Graduate.HS",
ifelse(BYPARASP==2,"HS.Grad.or.GED",
ifelse(BYPARASP==3,"Expect.2.Yr.College",
ifelse(BYPARASP==4,"4.Year.School.No.Deg",
ifelse(BYPARASP==5,"Graduate.4.Year",
ifelse(BYPARASP==6,"Masters.Degree",
ifelse(BYPARASP==7,"PhD.Expected",
ifelse(BYPARASP<0,NA,NA)))))))))
entry_variables<-entry_variables %>% mutate(STUDENT_EXPECTATION=ifelse(BYSTEXP==1,"Will.Not.Graduate.HS",
ifelse(BYSTEXP==2,"HS.Grad.or.GED",
ifelse(BYSTEXP==3,"Expect.2.Yr.College",
ifelse(BYSTEXP==4,"4.Year.School.No.Deg",
ifelse(BYSTEXP==5,"Graduate.4.Year",
ifelse(BYSTEXP==6,"Masters.Degree",
ifelse(BYSTEXP==7,"PhD.Expected",
ifelse(BYPARED<0,NA,NA)))))))))
entry_variables<-entry_variables %>% mutate(MATH_SELF=ifelse(BYMATHSE<(-2),NA_real_,BYMATHSE))
entry_variables<-entry_variables %>% mutate(HS_GPA=ifelse(F1RGPP2<0,NA_real_,F1RGPP2))
entry_variables<-entry_variables %>% mutate(F2_months_to_college=ifelse(F2HS2P_P<0,NA_real_,F2HS2P_P))
entry_variables<-entry_variables %>% mutate(F3_months_to_college=ifelse(F3HS2PS1<0,NA_real_,F3HS2PS1))
entry_variables<-entry_variables %>% mutate(F2_first_college_2_year=ifelse(F2PS1SEC==4,1,
ifelse(F2PS1SEC<0,NA_real_,
ifelse(F2PS1SEC<!0 & F2PS1SEC!=4,0,0))))
entry_variables<-entry_variables %>% mutate(F3_first_college_2_year=ifelse(F3PS1SEC==4,1,
ifelse(F3PS1SEC<0,NA_real_,
ifelse(F3PS1SEC<!0 & F3PS1SEC!=4,0,0))))
entry_variables_complete<-entry_variables %>% filter(!is.na(F3_first_college_2_year))
entry_variables_complete<-entry_variables_complete %>% mutate(TV_ENTERTAIN_HRS=ifelse(BYP33<0,NA,
ifelse(BYP33>=0 & BYP33<=3,0,
ifelse(BYP33>=4 & BYP33<=10,1,NA))))
entry_variables_complete<-entry_variables_complete %>% mutate(sex=ifelse(BYSEX==1,"Male",
ifelse(BYSEX==2,"Female",
ifelse(BYSEX<0,NA,NA))))
entry_variables_complete<-entry_variables_complete %>% mutate(parents_highest_ed=ifelse(BYPARED==1,"Did.Not.Graduate.HS",
ifelse(BYPARED==2,"HS.Grad.or.GED",
ifelse(BYPARED==3,"2.Year.School.No.Deg",
ifelse(BYPARED==4,"2.Year.School.Awarded.Deg",
ifelse(BYPARED==5,"4.Year.School.No.Deg",
ifelse(BYPARED==6,"4.Year.School.Awarded.Deg",
ifelse(BYPARED==7,"Masters.Degree",
ifelse(BYPARED==8,"PhD.or.other.doctorate",
ifelse(BYPARED<0,NA,NA))))))))))
entry_variables_complete<-entry_variables_complete %>% mutate(PARENT_HS_OR_LOWER=ifelse(parents_highest_ed=="Did.Not.Graduate.HS" | parents_highest_ed=="HS.Grad.or.GED",1,0))
entry_variables_complete<-entry_variables_complete %>% mutate(F3_UPDATED_MONTHS_TO_COLLEGE=ifelse(is.na(F3_months_to_college),21,F3_months_to_college))
entry_variables_complete<-entry_variables_complete %>% mutate(BY_LOW_SES=ifelse(BYSES1QU==1,1,
ifelse(BYSES1QU==2,1,
ifelse(BYSES1QU==3,0,
ifelse(BYSES1QU==4,0,
ifelse(BYSES1QU<0,NA_real_,0))))))
entry_variables_complete<-entry_variables_complete %>% mutate(race=ifelse(BYRACE==1,"Am.Indian",
ifelse(BYRACE==2,"Asian/Pacific",
ifelse(BYRACE==3,"Afr.American",
ifelse(BYRACE==4|BYRACE==5,"Hispanic",
ifelse(BYRACE==6,"2orMoreRace",
ifelse(BYRACE==7,"White",NA)))))))
entry_variables_complete<-entry_variables_complete %>% mutate(grandparents_highest_ed=ifelse(BYGPARED==1,"Did.Not.Graduate.HS",
ifelse(BYGPARED==2,"HS.Grad.or.GED",
ifelse(BYGPARED==3,"2.Year.School.No.Deg",
ifelse(BYGPARED==4,"2.Year.School.Awarded.Deg",
ifelse(BYGPARED==5,"4.Year.School.No.Deg",
ifelse(BYGPARED==6,"4.Year.School.Awarded.Deg",
ifelse(BYGPARED==7,"Masters.Degree",
ifelse(BYGPARED==8,"PhD.or.other.doctorate",
ifelse(BYGPARED<0,NA,NA))))))))))
entry_variables_complete<-entry_variables_complete %>% filter(!is.na(parents_highest_ed) & !is.na(PARENT_EXPECTATION) &!is.na(STUDENT_EXPECTATION) & !is.na(race) & !is.na(sex) & !is.na(MATH_SELF) & !is.na(HS_GPA))
entry_var_clean<-entry_variables_complete %>% select(STU_ID,STRAT_ID,PSU,F3_first_college_2_year,F3_UPDATED_MONTHS_TO_COLLEGE,sex,parents_highest_ed,BY_LOW_SES,race,F3QWT,PARENT_EXPECTATION,STUDENT_EXPECTATION,MATH_SELF,HS_GPA)
entry_var_clean<-entry_var_clean %>% mutate(student_expects_2yr=ifelse(STUDENT_EXPECTATION=="Expect.2.Yr.College",1,0))
entry_var_clean<-entry_var_clean %>% mutate(parent_expects_2yr=ifelse(PARENT_EXPECTATION=="Expect.2.Yr.College",1,0))
entry_var_clean<-entry_var_clean %>% mutate(student_parent_exp_differ=ifelse(PARENT_EXPECTATION!=STUDENT_EXPECTATION,1,0))
entry_var_clean<-entry_var_clean %>% mutate(student_expectation_ordinal=ifelse(STUDENT_EXPECTATION=="Will.Not.Graduate.HS",1,
ifelse(STUDENT_EXPECTATION=="HS.Grad.or.GED",2,
ifelse(STUDENT_EXPECTATION=="Expect.2.Yr.College",3,
ifelse(STUDENT_EXPECTATION=="4.Year.School.No.Deg",4,
ifelse(STUDENT_EXPECTATION=="Graduate.4.Year",5,
ifelse(STUDENT_EXPECTATION=="Masters.Degree",6,
ifelse(STUDENT_EXPECTATION=="PhD.Expected",7,0))))))))
entry_var_clean<-entry_var_clean %>% mutate(parent_expectation_ordinal=ifelse(PARENT_EXPECTATION=="Will.Not.Graduate.HS",1,
ifelse(PARENT_EXPECTATION=="HS.Grad.or.GED",2,
ifelse(PARENT_EXPECTATION=="Expect.2.Yr.College",3,
ifelse(PARENT_EXPECTATION=="4.Year.School.No.Deg",4,
ifelse(PARENT_EXPECTATION=="Graduate.4.Year",5,
ifelse(PARENT_EXPECTATION=="Masters.Degree",6,
ifelse(PARENT_EXPECTATION=="PhD.Expected",7,0))))))))
entry_var_clean<-entry_var_clean %>% mutate(parent_expectation_higher=ifelse(parent_expectation_ordinal>student_expectation_ordinal,1,0))
entry_var_wide<-dcast(entry_var_clean,STU_ID~F3_UPDATED_MONTHS_TO_COLLEGE,value.var="F3_first_college_2_year")
entry_var_wide[is.na(entry_var_wide)]<-0
entry_var_long<-reshape2::melt(entry_var_wide,id.vars="STU_ID",variable.name="time",value.name="cc_entry")
entry_var_long<-entry_var_long %>% arrange(STU_ID)
entry_var_long<-merge(entry_var_long,entry_var_clean,entry_var_clean[c(1:3,6:12)],by.x="STU_ID",by.y="STU_ID")
head(entry_var_long)
## STU_ID time cc_entry STRAT_ID PSU F3_first_college_2_year
## 1 101101 0 0 101 1 1
## 2 101101 1 0 101 1 1
## 3 101101 2 0 101 1 1
## 4 101101 3 0 101 1 1
## 5 101101 4 0 101 1 1
## 6 101101 5 0 101 1 1
## F3_UPDATED_MONTHS_TO_COLLEGE sex parents_highest_ed BY_LOW_SES race
## 1 18 Female 4.Year.School.No.Deg 1 Hispanic
## 2 18 Female 4.Year.School.No.Deg 1 Hispanic
## 3 18 Female 4.Year.School.No.Deg 1 Hispanic
## 4 18 Female 4.Year.School.No.Deg 1 Hispanic
## 5 18 Female 4.Year.School.No.Deg 1 Hispanic
## 6 18 Female 4.Year.School.No.Deg 1 Hispanic
## F3QWT PARENT_EXPECTATION STUDENT_EXPECTATION MATH_SELF HS_GPA
## 1 185.9702 Graduate.4.Year Expect.2.Yr.College -1.118 2
## 2 185.9702 Graduate.4.Year Expect.2.Yr.College -1.118 2
## 3 185.9702 Graduate.4.Year Expect.2.Yr.College -1.118 2
## 4 185.9702 Graduate.4.Year Expect.2.Yr.College -1.118 2
## 5 185.9702 Graduate.4.Year Expect.2.Yr.College -1.118 2
## 6 185.9702 Graduate.4.Year Expect.2.Yr.College -1.118 2
## student_expects_2yr parent_expects_2yr student_parent_exp_differ
## 1 1 0 1
## 2 1 0 1
## 3 1 0 1
## 4 1 0 1
## 5 1 0 1
## 6 1 0 1
## student_expectation_ordinal parent_expectation_ordinal
## 1 3 5
## 2 3 5
## 3 3 5
## 4 3 5
## 5 3 5
## 6 3 5
## parent_expectation_higher
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
des1<-svydesign(ids=~STU_ID,strata = ~STRAT_ID,weights = ~F3QWT,data = entry_var_long,nest=T)
options(survey.lonely.psu = "adjust")
fit1<-svyglm(cc_entry~as.factor(time)+sex+race+BY_LOW_SES+MATH_SELF+HS_GPA+parent_expects_2yr+parent_expectation_higher,design=des1,family=binomial(link = cloglog))
summary(fit1)
##
## Call:
## svyglm(formula = cc_entry ~ as.factor(time) + sex + race + BY_LOW_SES +
## MATH_SELF + HS_GPA + parent_expects_2yr + parent_expectation_higher,
## design = des1, family = binomial(link = cloglog))
##
## Survey design:
## svydesign(ids = ~STU_ID, strata = ~STRAT_ID, weights = ~F3QWT,
## data = entry_var_long, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.47686 0.17815 -19.516 < 2e-16 ***
## as.factor(time)1 -0.38991 0.20006 -1.949 0.05134 .
## as.factor(time)2 1.34392 0.14741 9.117 < 2e-16 ***
## as.factor(time)3 2.12685 0.13988 15.204 < 2e-16 ***
## as.factor(time)4 -0.06823 0.18763 -0.364 0.71615
## as.factor(time)5 -1.89125 0.38151 -4.957 7.34e-07 ***
## as.factor(time)6 -2.75217 0.46486 -5.920 3.38e-09 ***
## as.factor(time)7 -0.52886 0.21336 -2.479 0.01321 *
## as.factor(time)8 -0.19309 0.19050 -1.014 0.31082
## as.factor(time)9 -1.76037 0.33731 -5.219 1.86e-07 ***
## as.factor(time)10 -2.12619 0.37616 -5.652 1.65e-08 ***
## as.factor(time)11 -2.38282 0.39515 -6.030 1.73e-09 ***
## as.factor(time)12 -2.15487 0.41020 -5.253 1.54e-07 ***
## as.factor(time)13 -1.98497 0.35555 -5.583 2.47e-08 ***
## as.factor(time)14 -0.87962 0.21631 -4.067 4.83e-05 ***
## as.factor(time)15 -0.29698 0.19195 -1.547 0.12188
## as.factor(time)16 -1.70281 0.32318 -5.269 1.42e-07 ***
## as.factor(time)17 -3.21918 0.52968 -6.078 1.29e-09 ***
## as.factor(time)18 -2.95433 0.47830 -6.177 6.96e-10 ***
## as.factor(time)19 -1.42484 0.27986 -5.091 3.66e-07 ***
## as.factor(time)20 -1.59756 0.29570 -5.403 6.81e-08 ***
## as.factor(time)21 1.42330 0.14689 9.690 < 2e-16 ***
## sexMale -0.01753 0.04555 -0.385 0.70033
## raceAfr.American -0.26391 0.12508 -2.110 0.03490 *
## raceAm.Indian 0.24128 0.21817 1.106 0.26880
## raceAsian/Pacific -0.07066 0.12841 -0.550 0.58216
## raceHispanic 0.19248 0.11443 1.682 0.09262 .
## raceWhite 0.07773 0.10670 0.728 0.46635
## BY_LOW_SES 0.35358 0.04634 7.630 2.70e-14 ***
## MATH_SELF -0.12367 0.02385 -5.186 2.21e-07 ***
## HS_GPA -0.24020 0.01560 -15.395 < 2e-16 ***
## parent_expects_2yr 0.20520 0.07674 2.674 0.00751 **
## parent_expectation_higher 0.07946 0.04604 1.726 0.08445 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.8824798)
##
## Number of Fisher Scoring iterations: 9
fit.splines.ses.race<-svyglm(formula=F3_first_college_2_year~ns(F3_UPDATED_MONTHS_TO_COLLEGE,df=4)+race+BY_LOW_SES,design=des1,family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
plot_ses_race<-expand.grid(month=seq(1,21,1),race=unique(entry_var_long$race),BY_LOW_SES=unique(entry_var_long$BY_LOW_SES))
plot_ses_race<-plot_ses_race %>% mutate(F3_UPDATED_MONTHS_TO_COLLEGE=month)
plot_ses_race$splines<-predict(fit.splines.ses.race,newdata=plot_ses_race,type="response")
plot_ses_race$F3_UPDATED_MONTHS_TO_COLLEGE<-NULL
head(plot_ses_race,n=20)
## month race BY_LOW_SES splines
## 1 1 Hispanic 1 0.6896508
## 2 2 Hispanic 1 0.6398162
## 3 3 Hispanic 1 0.5890601
## 4 4 Hispanic 1 0.5410242
## 5 5 Hispanic 1 0.4992894
## 6 6 Hispanic 1 0.4669457
## 7 7 Hispanic 1 0.4463286
## 8 8 Hispanic 1 0.4387361
## 9 9 Hispanic 1 0.4453221
## 10 10 Hispanic 1 0.4674825
## 11 11 Hispanic 1 0.5067164
## 12 12 Hispanic 1 0.5621109
## 13 13 Hispanic 1 0.6242297
## 14 14 Hispanic 1 0.6818507
## 15 15 Hispanic 1 0.7263109
## 16 16 Hispanic 1 0.7522464
## 17 17 Hispanic 1 0.7573968
## 18 18 Hispanic 1 0.7451009
## 19 19 Hispanic 1 0.7184153
## 20 20 Hispanic 1 0.6799970
library(data.table)
library(magrittr)
out<-melt(setDT(plot_ses_race),id=c("month","race","BY_LOW_SES"),measure.vars = list(haz=c("splines")))
head(out,n=20)
## month race BY_LOW_SES variable value
## 1 1 Hispanic 1 splines 0.6896508
## 2 2 Hispanic 1 splines 0.6398162
## 3 3 Hispanic 1 splines 0.5890601
## 4 4 Hispanic 1 splines 0.5410242
## 5 5 Hispanic 1 splines 0.4992894
## 6 6 Hispanic 1 splines 0.4669457
## 7 7 Hispanic 1 splines 0.4463286
## 8 8 Hispanic 1 splines 0.4387361
## 9 9 Hispanic 1 splines 0.4453221
## 10 10 Hispanic 1 splines 0.4674825
## 11 11 Hispanic 1 splines 0.5067164
## 12 12 Hispanic 1 splines 0.5621109
## 13 13 Hispanic 1 splines 0.6242297
## 14 14 Hispanic 1 splines 0.6818507
## 15 15 Hispanic 1 splines 0.7263109
## 16 16 Hispanic 1 splines 0.7522464
## 17 17 Hispanic 1 splines 0.7573968
## 18 18 Hispanic 1 splines 0.7451009
## 19 19 Hispanic 1 splines 0.7184153
## 20 20 Hispanic 1 splines 0.6799970
splines.ses.table<-out %>% group_by(race,BY_LOW_SES) %>% dplyr::mutate(bigH=cumsum(value))
splines.ses.table %>% group_by(race,BY_LOW_SES) %>% ggplot(aes(x=month,y=bigH)) + geom_line(aes(group=BY_LOW_SES,color=BY_LOW_SES)) + facet_wrap(~race) +ggtitle("Figure 4 - Cumulative Hazard by SES and Race")

splines.ses.table2<-splines.ses.table %>% group_by(month,race) %>% summarize(hz=mean(bigH,na.rm=T))
## `summarise()` regrouping output by 'month' (override with `.groups` argument)
ggplot(data=splines.ses.table2,aes(x=month,y=hz))+
geom_line(aes(group=race,color=race))

stargazer(fit1,type = "html",title="Figure 2",omit = "as.factor")
Figure 2
|
|
|
|
Dependent variable:
|
|
|
|
|
|
cc_entry
|
|
|
|
sexMale
|
-0.018
|
|
|
(0.046)
|
|
|
|
|
raceAfr.American
|
-0.264**
|
|
|
(0.125)
|
|
|
|
|
raceAm.Indian
|
0.241
|
|
|
(0.218)
|
|
|
|
|
raceAsian/Pacific
|
-0.071
|
|
|
(0.128)
|
|
|
|
|
raceHispanic
|
0.192*
|
|
|
(0.114)
|
|
|
|
|
raceWhite
|
0.078
|
|
|
(0.107)
|
|
|
|
|
BY_LOW_SES
|
0.354***
|
|
|
(0.046)
|
|
|
|
|
MATH_SELF
|
-0.124***
|
|
|
(0.024)
|
|
|
|
|
HS_GPA
|
-0.240***
|
|
|
(0.016)
|
|
|
|
|
parent_expects_2yr
|
0.205***
|
|
|
(0.077)
|
|
|
|
|
parent_expectation_higher
|
0.079*
|
|
|
(0.046)
|
|
|
|
|
Constant
|
-3.477***
|
|
|
(0.178)
|
|
|
|
|
|
|
Observations
|
145,508
|
|
Log Likelihood
|
-8,579.326
|
|
Akaike Inf. Crit.
|
17,224.650
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
hispanic_low_ses<-splines.ses.table %>% filter(race=="Hispanic",BY_LOW_SES==1) %>% select(month,bigH)
## Adding missing grouping variables: `race`, `BY_LOW_SES`
hispanic_high_ses<-splines.ses.table %>% filter(race=="Hispanic",BY_LOW_SES==0) %>% select(month,bigH)
## Adding missing grouping variables: `race`, `BY_LOW_SES`
plot(bigH~month,hispanic_low_ses,col=1,lwd=2,type="s")
lines(bigH~month,hispanic_high_ses,col=2,lwd=2,type="s")
legend("bottomright",legend=c("Hispanic Low SES","Hispanic High SES"),lty=1,col=c(1,2))

#stargazer(entry_var_clean,title="Figure 1",type = "html")
figure1_code<-entry_var_clean %>% select(STU_ID,F3_first_college_2_year,F3_UPDATED_MONTHS_TO_COLLEGE,sex,race,BY_LOW_SES,student_expects_2yr,parent_expects_2yr,parent_expectation_higher)
figure1_code2<-dcast(figure1_code,STU_ID~race,value.var="race")
figure1_code2[2:7][!is.na(figure1_code2[2:7])]<-1
figure1_code2[2:7][is.na(figure1_code2[2:7])]<-0
figure1_code3<-dcast(figure1_code,STU_ID~sex,value.var="sex")
figure1_code3[2:3][!is.na(figure1_code3[2:3])]<-1
figure1_code3[2:3][is.na(figure1_code3[2:3])]<-0
figure1_code<-merge(figure1_code,figure1_code2,by.x="STU_ID",by.y="STU_ID")
figure1_code<-merge(figure1_code,figure1_code3,by.x="STU_ID",by.y="STU_ID")
figure1_code[10:17]<-sapply(figure1_code[10:17],as.numeric)
stargazer(figure1_code,title="Figure 1",type = "html")
Figure 1
|
|
|
Statistic
|
N
|
Mean
|
St. Dev.
|
Min
|
Pctl(25)
|
Pctl(75)
|
Max
|
|
|
|
STU_ID
|
6,614
|
282,299.200
|
108,452.200
|
101,101
|
184,110
|
386,213.8
|
461,234
|
|
F3_first_college_2_year
|
6,614
|
0.296
|
0.456
|
0
|
0
|
1
|
1
|
|
F3_UPDATED_MONTHS_TO_COLLEGE
|
6,614
|
5.451
|
6.149
|
0
|
2
|
4
|
21
|
|
BY_LOW_SES
|
6,614
|
0.371
|
0.483
|
0
|
0
|
1
|
1
|
|
student_expects_2yr
|
6,614
|
0.045
|
0.207
|
0
|
0
|
0
|
1
|
|
parent_expects_2yr
|
6,614
|
0.044
|
0.205
|
0
|
0
|
0
|
1
|
|
parent_expectation_higher
|
6,614
|
0.294
|
0.456
|
0
|
0
|
1
|
1
|
|
2orMoreRace
|
6,614
|
0.047
|
0.211
|
0
|
0
|
0
|
1
|
|
Afr.American
|
6,614
|
0.097
|
0.296
|
0
|
0
|
0
|
1
|
|
Am.Indian
|
6,614
|
0.005
|
0.070
|
0
|
0
|
0
|
1
|
|
Asian/Pacific
|
6,614
|
0.101
|
0.301
|
0
|
0
|
0
|
1
|
|
Hispanic
|
6,614
|
0.111
|
0.314
|
0
|
0
|
0
|
1
|
|
White
|
6,614
|
0.640
|
0.480
|
0
|
0
|
1
|
1
|
|
Female
|
6,614
|
0.571
|
0.495
|
0
|
0
|
1
|
1
|
|
Male
|
6,614
|
0.429
|
0.495
|
0
|
0
|
1
|
1
|
|
|
splines.ses.table<-splines.ses.table %>% mutate(surv=exp(-cumsum(value)))
plot.low_ses<-splines.ses.table %>% group_by(month) %>% filter(BY_LOW_SES==1) %>% summarize(values=mean(value))
## `summarise()` ungrouping output (override with `.groups` argument)
plot.high_ses<-splines.ses.table %>% group_by(month) %>% filter(BY_LOW_SES==0) %>% summarize(values=mean(value))
## `summarise()` ungrouping output (override with `.groups` argument)
plot(values~month,plot.low_ses,col=1,lwd=2,type="s",xlim=c(0,21),ylim=c(0,0.8),main="Figure 3. Hazard for Community College Entry by SES",ylab="Hazard")
lines(values~month,plot.high_ses,col=2,lwd=2,type="s")
legend("bottomright",legend=c("Low SES","High SES"),lty=1,col=c(1,2))

#plot.low_ses<-splines.ses.table %>% group_by(month) %>% filter(BY_LOW_SES==1) %>% summarize(values=mean(value))
#plot.high_ses<-splines.ses.table %>% group_by(month) %>% filter(BY_LOW_SES==0) %>% summarize(values=mean(value))
#plot(values~month,plot.high_ses,col=1,lwd=2,type="s")
#lines(values~month,plot.low_ses,col=2,lwd=2,type="s")
table1<-CreateTableOne(vars=c("sex","race","BY_LOW_SES","parent_expects_2yr","student_expects_2yr"),strata = "F3_first_college_2_year",test=T,data=entry_var_clean)
print(table1,format="p")
## Stratified by F3_first_college_2_year
## 0 1 p test
## n 4658 1956
## sex = Male (%) 42.7 43.1 0.782
## race (%) <0.001
## 2orMoreRace 4.6 4.8
## Afr.American 9.9 9.1
## Am.Indian 0.4 0.8
## Asian/Pacific 10.4 9.3
## Hispanic 8.9 16.4
## White 65.8 59.6
## BY_LOW_SES (mean (SD)) 0.31 (0.46) 0.53 (0.50) <0.001
## parent_expects_2yr (mean (SD)) 0.03 (0.17) 0.08 (0.27) <0.001
## student_expects_2yr (mean (SD)) 0.03 (0.16) 0.09 (0.28) <0.001
test1<-print(table1,format="p")
## Stratified by F3_first_college_2_year
## 0 1 p test
## n 4658 1956
## sex = Male (%) 42.7 43.1 0.782
## race (%) <0.001
## 2orMoreRace 4.6 4.8
## Afr.American 9.9 9.1
## Am.Indian 0.4 0.8
## Asian/Pacific 10.4 9.3
## Hispanic 8.9 16.4
## White 65.8 59.6
## BY_LOW_SES (mean (SD)) 0.31 (0.46) 0.53 (0.50) <0.001
## parent_expects_2yr (mean (SD)) 0.03 (0.17) 0.08 (0.27) <0.001
## student_expects_2yr (mean (SD)) 0.03 (0.16) 0.09 (0.28) <0.001
stargazer(test1,align=TRUE,type="html")
|
|
|
|
0
|
1
|
p
|
test
|
|
|
|
n
|
4658
|
1956
|
|
|
|
sex = Male (%)
|
42.7
|
43.1
|
0.782
|
|
|
race (%)
|
|
|
< 0.001
|
|
|
2orMoreRace
|
4.6
|
4.8
|
|
|
|
Afr.American
|
9.9
|
9.1
|
|
|
|
Am.Indian
|
0.4
|
0.8
|
|
|
|
Asian/Pacific
|
10.4
|
9.3
|
|
|
|
Hispanic
|
8.9
|
16.4
|
|
|
|
White
|
65.8
|
59.6
|
|
|
|
BY_LOW_SES (mean (SD))
|
0.31 (0.46)
|
0.53 (0.50)
|
< 0.001
|
|
|
parent_expects_2yr (mean (SD))
|
0.03 (0.17)
|
0.08 (0.27)
|
< 0.001
|
|
|
student_expects_2yr (mean (SD))
|
0.03 (0.16)
|
0.09 (0.28)
|
< 0.001
|
|
|
|
entry_var_final<-entry_var_long %>% select(cc_entry,time,sex,race,BY_LOW_SES,MATH_SELF,HS_GPA,parent_expects_2yr,parent_expectation_higher,STU_ID,STRAT_ID,F3QWT,F3_first_college_2_year)
colnames(entry_var_final)<-c("Community College Entry","Time in Months","Sex","Race","Low SES in Base Year","Math Self Efficacy","High School GPA","Parent Expects 2 Year College","Parent Expectation is Higher Than Student's","STU_ID","STRAT_ID","F3QWT","First College Entry is CC")
des_final<-svydesign(ids=~STU_ID,strata = ~STRAT_ID,weights = ~F3QWT,data = entry_var_final,nest=T)
options(survey.lonely.psu = "adjust")
fit_final<-svyglm(`Community College Entry`~as.factor(`Time in Months`)+Sex+Race+`Low SES in Base Year`+`Math Self Efficacy`+`High School GPA`+`Parent Expects 2 Year College`+`Parent Expectation is Higher Than Student's`,design=des_final,family=binomial(link = cloglog))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
entry_var_final2<-entry_var_clean %>% select(F3_UPDATED_MONTHS_TO_COLLEGE,sex,race,BY_LOW_SES,MATH_SELF,HS_GPA,parent_expects_2yr,parent_expectation_higher,STU_ID,STRAT_ID,F3QWT,F3_first_college_2_year)
colnames(entry_var_final2)<-c("Time in Months","Sex","Race","Low SES in Base Year","Math Self Efficacy","High School GPA","Parent Expects 2 Year College","Parent Expectation is Higher Than Student's","STU_ID","STRAT_ID","F3QWT","First College Entry is CC")
table1<-CreateTableOne(vars=c("Sex","Race","Low SES in Base Year","Parent Expects 2 Year College","High School GPA","Math Self Efficacy"),strata = "First College Entry is CC",test=T,data=entry_var_final2)
print(table1,format="p")
## Stratified by First College Entry is CC
## 0 1 p
## n 4658 1956
## Sex = Male (%) 42.7 43.1 0.782
## Race (%) <0.001
## 2orMoreRace 4.6 4.8
## Afr.American 9.9 9.1
## Am.Indian 0.4 0.8
## Asian/Pacific 10.4 9.3
## Hispanic 8.9 16.4
## White 65.8 59.6
## Low SES in Base Year (mean (SD)) 0.31 (0.46) 0.53 (0.50) <0.001
## Parent Expects 2 Year College (mean (SD)) 0.03 (0.17) 0.08 (0.27) <0.001
## High School GPA (mean (SD)) 4.71 (1.27) 3.74 (1.32) <0.001
## Math Self Efficacy (mean (SD)) 0.23 (1.01) -0.11 (0.97) <0.001
## Stratified by First College Entry is CC
## test
## n
## Sex = Male (%)
## Race (%)
## 2orMoreRace
## Afr.American
## Am.Indian
## Asian/Pacific
## Hispanic
## White
## Low SES in Base Year (mean (SD))
## Parent Expects 2 Year College (mean (SD))
## High School GPA (mean (SD))
## Math Self Efficacy (mean (SD))
test1<-print(table1,format="p")
## Stratified by First College Entry is CC
## 0 1 p
## n 4658 1956
## Sex = Male (%) 42.7 43.1 0.782
## Race (%) <0.001
## 2orMoreRace 4.6 4.8
## Afr.American 9.9 9.1
## Am.Indian 0.4 0.8
## Asian/Pacific 10.4 9.3
## Hispanic 8.9 16.4
## White 65.8 59.6
## Low SES in Base Year (mean (SD)) 0.31 (0.46) 0.53 (0.50) <0.001
## Parent Expects 2 Year College (mean (SD)) 0.03 (0.17) 0.08 (0.27) <0.001
## High School GPA (mean (SD)) 4.71 (1.27) 3.74 (1.32) <0.001
## Math Self Efficacy (mean (SD)) 0.23 (1.01) -0.11 (0.97) <0.001
## Stratified by First College Entry is CC
## test
## n
## Sex = Male (%)
## Race (%)
## 2orMoreRace
## Afr.American
## Am.Indian
## Asian/Pacific
## Hispanic
## White
## Low SES in Base Year (mean (SD))
## Parent Expects 2 Year College (mean (SD))
## High School GPA (mean (SD))
## Math Self Efficacy (mean (SD))
stargazer(test1,align=TRUE,type="html")
|
|
|
|
0
|
1
|
p
|
test
|
|
|
|
n
|
4658
|
1956
|
|
|
|
Sex = Male (%)
|
42.7
|
43.1
|
0.782
|
|
|
Race (%)
|
|
|
< 0.001
|
|
|
2orMoreRace
|
4.6
|
4.8
|
|
|
|
Afr.American
|
9.9
|
9.1
|
|
|
|
Am.Indian
|
0.4
|
0.8
|
|
|
|
Asian/Pacific
|
10.4
|
9.3
|
|
|
|
Hispanic
|
8.9
|
16.4
|
|
|
|
White
|
65.8
|
59.6
|
|
|
|
Low SES in Base Year (mean (SD))
|
0.31 (0.46)
|
0.53 (0.50)
|
< 0.001
|
|
|
Parent Expects 2 Year College (mean (SD))
|
0.03 (0.17)
|
0.08 (0.27)
|
< 0.001
|
|
|
High School GPA (mean (SD))
|
4.71 (1.27)
|
3.74 (1.32)
|
< 0.001
|
|
|
Math Self Efficacy (mean (SD))
|
0.23 (1.01)
|
-0.11 (0.97)
|
< 0.001
|
|
|
|
stargazer(fit_final,type = "html",title="Figure 2",omit = "as.factor")
Figure 2
|
|
|
|
Dependent variable:
|
|
|
|
|
|
Community College Entry
|
|
|
|
SexMale
|
-0.018
|
|
|
(0.046)
|
|
|
|
|
RaceAfr.American
|
-0.264**
|
|
|
(0.125)
|
|
|
|
|
RaceAm.Indian
|
0.241
|
|
|
(0.218)
|
|
|
|
|
RaceAsian/Pacific
|
-0.071
|
|
|
(0.128)
|
|
|
|
|
RaceHispanic
|
0.192*
|
|
|
(0.114)
|
|
|
|
|
RaceWhite
|
0.078
|
|
|
(0.107)
|
|
|
|
Low SES in Base Year
|
0.354***
|
|
|
(0.046)
|
|
|
|
Math Self Efficacy
|
-0.124***
|
|
|
(0.024)
|
|
|
|
High School GPA
|
-0.240***
|
|
|
(0.016)
|
|
|
|
Parent Expects 2 Year College
|
0.205***
|
|
|
(0.077)
|
|
|
|
Parent Expectation is Higher Than Student's
|
0.079*
|
|
|
(0.046)
|
|
|
|
|
Constant
|
-3.477***
|
|
|
(0.178)
|
|
|
|
|
|
|
Observations
|
145,508
|
|
Log Likelihood
|
-8,579.326
|
|
Akaike Inf. Crit.
|
17,224.650
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|