colnames(els)<-toupper(colnames(els))

#college entry variables

entry_variables<-els %>% select(STU_ID,SCH_ID,STRAT_ID,PSU,F2HS2P_P,F2PS1SEC,F3HS2PS1,F3PS1SEC,BYQXDATP,F1RDTLFT,BYSES1QU,F3QWT,BYP33,BYSEX,BYRACE,BYPARED,BYGPARED,F1RGPP2,BYMATHSE,BYSTEXP,BYPARASP)


entry_variables<-entry_variables %>% mutate(PARENT_EXPECTATION=ifelse(BYPARASP==1,"Will.Not.Graduate.HS",
                                                                  ifelse(BYPARASP==2,"HS.Grad.or.GED",
                                                                         ifelse(BYPARASP==3,"Expect.2.Yr.College",
                                                                                ifelse(BYPARASP==4,"4.Year.School.No.Deg",
                                                                                       ifelse(BYPARASP==5,"Graduate.4.Year",
                                                                                              ifelse(BYPARASP==6,"Masters.Degree",
                                                                                                     ifelse(BYPARASP==7,"PhD.Expected",
                                                                                                            ifelse(BYPARASP<0,NA,NA)))))))))


entry_variables<-entry_variables %>% mutate(STUDENT_EXPECTATION=ifelse(BYSTEXP==1,"Will.Not.Graduate.HS",
                                                                                        ifelse(BYSTEXP==2,"HS.Grad.or.GED",
                                                                                               ifelse(BYSTEXP==3,"Expect.2.Yr.College",
                                                                                                      ifelse(BYSTEXP==4,"4.Year.School.No.Deg",
                                                                                                             ifelse(BYSTEXP==5,"Graduate.4.Year",
                                                                                                                    ifelse(BYSTEXP==6,"Masters.Degree",
                                                                                                                           ifelse(BYSTEXP==7,"PhD.Expected",
                                                                                                                                         ifelse(BYPARED<0,NA,NA)))))))))

entry_variables<-entry_variables %>% mutate(MATH_SELF=ifelse(BYMATHSE<(-2),NA_real_,BYMATHSE))

entry_variables<-entry_variables %>% mutate(HS_GPA=ifelse(F1RGPP2<0,NA_real_,F1RGPP2))


entry_variables<-entry_variables %>% mutate(F2_months_to_college=ifelse(F2HS2P_P<0,NA_real_,F2HS2P_P))

entry_variables<-entry_variables %>% mutate(F3_months_to_college=ifelse(F3HS2PS1<0,NA_real_,F3HS2PS1))

entry_variables<-entry_variables %>% mutate(F2_first_college_2_year=ifelse(F2PS1SEC==4,1,
                                                                           ifelse(F2PS1SEC<0,NA_real_,
                                                                                  ifelse(F2PS1SEC<!0 & F2PS1SEC!=4,0,0))))

entry_variables<-entry_variables %>% mutate(F3_first_college_2_year=ifelse(F3PS1SEC==4,1,
                                                                           ifelse(F3PS1SEC<0,NA_real_,
                                                                                  ifelse(F3PS1SEC<!0 & F3PS1SEC!=4,0,0))))

entry_variables_complete<-entry_variables %>% filter(!is.na(F3_first_college_2_year))

entry_variables_complete<-entry_variables_complete %>% mutate(TV_ENTERTAIN_HRS=ifelse(BYP33<0,NA,
                                                                                      ifelse(BYP33>=0 & BYP33<=3,0,
                                                                                             ifelse(BYP33>=4 & BYP33<=10,1,NA))))

entry_variables_complete<-entry_variables_complete %>% mutate(sex=ifelse(BYSEX==1,"Male",
                                                                         ifelse(BYSEX==2,"Female",
                                                                                ifelse(BYSEX<0,NA,NA))))


entry_variables_complete<-entry_variables_complete %>% mutate(parents_highest_ed=ifelse(BYPARED==1,"Did.Not.Graduate.HS",
                                                                                        ifelse(BYPARED==2,"HS.Grad.or.GED",
                                                                                               ifelse(BYPARED==3,"2.Year.School.No.Deg",
                                                                                                      ifelse(BYPARED==4,"2.Year.School.Awarded.Deg",
                                                                                                             ifelse(BYPARED==5,"4.Year.School.No.Deg",
                                                                                                                    ifelse(BYPARED==6,"4.Year.School.Awarded.Deg",
                                                                                                                           ifelse(BYPARED==7,"Masters.Degree",
                                                                                                                                  ifelse(BYPARED==8,"PhD.or.other.doctorate",
                                                                                                                                         ifelse(BYPARED<0,NA,NA))))))))))

entry_variables_complete<-entry_variables_complete %>% mutate(PARENT_HS_OR_LOWER=ifelse(parents_highest_ed=="Did.Not.Graduate.HS" | parents_highest_ed=="HS.Grad.or.GED",1,0))

entry_variables_complete<-entry_variables_complete %>% mutate(F3_UPDATED_MONTHS_TO_COLLEGE=ifelse(is.na(F3_months_to_college),21,F3_months_to_college))

entry_variables_complete<-entry_variables_complete %>% mutate(BY_LOW_SES=ifelse(BYSES1QU==1,1,
                                                                                ifelse(BYSES1QU==2,1,
                                                                                       ifelse(BYSES1QU==3,0,
                                                                                              ifelse(BYSES1QU==4,0,
                                                                                                     ifelse(BYSES1QU<0,NA_real_,0))))))

entry_variables_complete<-entry_variables_complete %>% mutate(race=ifelse(BYRACE==1,"Am.Indian",
                                                                          ifelse(BYRACE==2,"Asian/Pacific",
                                                                                 ifelse(BYRACE==3,"Afr.American",
                                                                                        ifelse(BYRACE==4|BYRACE==5,"Hispanic",
                                                                                               ifelse(BYRACE==6,"2orMoreRace",
                                                                                                      ifelse(BYRACE==7,"White",NA)))))))

entry_variables_complete<-entry_variables_complete %>% mutate(grandparents_highest_ed=ifelse(BYGPARED==1,"Did.Not.Graduate.HS",
                                                                                             ifelse(BYGPARED==2,"HS.Grad.or.GED",
                                                                                                    ifelse(BYGPARED==3,"2.Year.School.No.Deg",
                                                                                                           ifelse(BYGPARED==4,"2.Year.School.Awarded.Deg",
                                                                                                                  ifelse(BYGPARED==5,"4.Year.School.No.Deg",
                                                                                                                         ifelse(BYGPARED==6,"4.Year.School.Awarded.Deg",
                                                                                                                                ifelse(BYGPARED==7,"Masters.Degree",
                                                                                                                                       ifelse(BYGPARED==8,"PhD.or.other.doctorate",
                                                                                                                                              ifelse(BYGPARED<0,NA,NA))))))))))


entry_variables_complete<-entry_variables_complete %>% filter(!is.na(parents_highest_ed) & !is.na(PARENT_EXPECTATION) &!is.na(STUDENT_EXPECTATION) & !is.na(race) & !is.na(sex) & !is.na(MATH_SELF) & !is.na(HS_GPA))

entry_var_clean<-entry_variables_complete %>% select(STU_ID,STRAT_ID,PSU,F3_first_college_2_year,F3_UPDATED_MONTHS_TO_COLLEGE,sex,parents_highest_ed,BY_LOW_SES,race,F3QWT,PARENT_EXPECTATION,STUDENT_EXPECTATION,MATH_SELF,HS_GPA)

entry_var_clean<-entry_var_clean %>% mutate(student_expects_2yr=ifelse(STUDENT_EXPECTATION=="Expect.2.Yr.College",1,0))
entry_var_clean<-entry_var_clean %>% mutate(parent_expects_2yr=ifelse(PARENT_EXPECTATION=="Expect.2.Yr.College",1,0))
entry_var_clean<-entry_var_clean %>% mutate(student_parent_exp_differ=ifelse(PARENT_EXPECTATION!=STUDENT_EXPECTATION,1,0))

entry_var_clean<-entry_var_clean %>% mutate(student_expectation_ordinal=ifelse(STUDENT_EXPECTATION=="Will.Not.Graduate.HS",1,
                                                                               ifelse(STUDENT_EXPECTATION=="HS.Grad.or.GED",2,
                                                                                      ifelse(STUDENT_EXPECTATION=="Expect.2.Yr.College",3,
                                                                                             ifelse(STUDENT_EXPECTATION=="4.Year.School.No.Deg",4,
                                                                                                    ifelse(STUDENT_EXPECTATION=="Graduate.4.Year",5,
                                                                                                           ifelse(STUDENT_EXPECTATION=="Masters.Degree",6,
                                                                                                                  ifelse(STUDENT_EXPECTATION=="PhD.Expected",7,0))))))))



entry_var_clean<-entry_var_clean %>% mutate(parent_expectation_ordinal=ifelse(PARENT_EXPECTATION=="Will.Not.Graduate.HS",1,
                                                                               ifelse(PARENT_EXPECTATION=="HS.Grad.or.GED",2,
                                                                                      ifelse(PARENT_EXPECTATION=="Expect.2.Yr.College",3,
                                                                                             ifelse(PARENT_EXPECTATION=="4.Year.School.No.Deg",4,
                                                                                                    ifelse(PARENT_EXPECTATION=="Graduate.4.Year",5,
                                                                                                           ifelse(PARENT_EXPECTATION=="Masters.Degree",6,
                                                                                                                  ifelse(PARENT_EXPECTATION=="PhD.Expected",7,0))))))))


entry_var_clean<-entry_var_clean %>% mutate(parent_expectation_higher=ifelse(parent_expectation_ordinal>student_expectation_ordinal,1,0))
entry_var_wide<-dcast(entry_var_clean,STU_ID~F3_UPDATED_MONTHS_TO_COLLEGE,value.var="F3_first_college_2_year")
entry_var_wide[is.na(entry_var_wide)]<-0

entry_var_long<-reshape2::melt(entry_var_wide,id.vars="STU_ID",variable.name="time",value.name="cc_entry")
entry_var_long<-entry_var_long %>% arrange(STU_ID)
entry_var_long<-merge(entry_var_long,entry_var_clean,entry_var_clean[c(1:3,6:12)],by.x="STU_ID",by.y="STU_ID")

head(entry_var_long)
##   STU_ID time cc_entry STRAT_ID PSU F3_first_college_2_year
## 1 101101    0        0      101   1                       1
## 2 101101    1        0      101   1                       1
## 3 101101    2        0      101   1                       1
## 4 101101    3        0      101   1                       1
## 5 101101    4        0      101   1                       1
## 6 101101    5        0      101   1                       1
##   F3_UPDATED_MONTHS_TO_COLLEGE    sex   parents_highest_ed BY_LOW_SES     race
## 1                           18 Female 4.Year.School.No.Deg          1 Hispanic
## 2                           18 Female 4.Year.School.No.Deg          1 Hispanic
## 3                           18 Female 4.Year.School.No.Deg          1 Hispanic
## 4                           18 Female 4.Year.School.No.Deg          1 Hispanic
## 5                           18 Female 4.Year.School.No.Deg          1 Hispanic
## 6                           18 Female 4.Year.School.No.Deg          1 Hispanic
##      F3QWT PARENT_EXPECTATION STUDENT_EXPECTATION MATH_SELF HS_GPA
## 1 185.9702    Graduate.4.Year Expect.2.Yr.College    -1.118      2
## 2 185.9702    Graduate.4.Year Expect.2.Yr.College    -1.118      2
## 3 185.9702    Graduate.4.Year Expect.2.Yr.College    -1.118      2
## 4 185.9702    Graduate.4.Year Expect.2.Yr.College    -1.118      2
## 5 185.9702    Graduate.4.Year Expect.2.Yr.College    -1.118      2
## 6 185.9702    Graduate.4.Year Expect.2.Yr.College    -1.118      2
##   student_expects_2yr parent_expects_2yr student_parent_exp_differ
## 1                   1                  0                         1
## 2                   1                  0                         1
## 3                   1                  0                         1
## 4                   1                  0                         1
## 5                   1                  0                         1
## 6                   1                  0                         1
##   student_expectation_ordinal parent_expectation_ordinal
## 1                           3                          5
## 2                           3                          5
## 3                           3                          5
## 4                           3                          5
## 5                           3                          5
## 6                           3                          5
##   parent_expectation_higher
## 1                         1
## 2                         1
## 3                         1
## 4                         1
## 5                         1
## 6                         1
des1<-svydesign(ids=~STU_ID,strata = ~STRAT_ID,weights = ~F3QWT,data = entry_var_long,nest=T)
options(survey.lonely.psu = "adjust")
fit1<-svyglm(cc_entry~as.factor(time)+sex+race+BY_LOW_SES+MATH_SELF+HS_GPA+parent_expects_2yr+parent_expectation_higher,design=des1,family=binomial(link = cloglog))
summary(fit1)
## 
## Call:
## svyglm(formula = cc_entry ~ as.factor(time) + sex + race + BY_LOW_SES + 
##     MATH_SELF + HS_GPA + parent_expects_2yr + parent_expectation_higher, 
##     design = des1, family = binomial(link = cloglog))
## 
## Survey design:
## svydesign(ids = ~STU_ID, strata = ~STRAT_ID, weights = ~F3QWT, 
##     data = entry_var_long, nest = T)
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -3.47686    0.17815 -19.516  < 2e-16 ***
## as.factor(time)1          -0.38991    0.20006  -1.949  0.05134 .  
## as.factor(time)2           1.34392    0.14741   9.117  < 2e-16 ***
## as.factor(time)3           2.12685    0.13988  15.204  < 2e-16 ***
## as.factor(time)4          -0.06823    0.18763  -0.364  0.71615    
## as.factor(time)5          -1.89125    0.38151  -4.957 7.34e-07 ***
## as.factor(time)6          -2.75217    0.46486  -5.920 3.38e-09 ***
## as.factor(time)7          -0.52886    0.21336  -2.479  0.01321 *  
## as.factor(time)8          -0.19309    0.19050  -1.014  0.31082    
## as.factor(time)9          -1.76037    0.33731  -5.219 1.86e-07 ***
## as.factor(time)10         -2.12619    0.37616  -5.652 1.65e-08 ***
## as.factor(time)11         -2.38282    0.39515  -6.030 1.73e-09 ***
## as.factor(time)12         -2.15487    0.41020  -5.253 1.54e-07 ***
## as.factor(time)13         -1.98497    0.35555  -5.583 2.47e-08 ***
## as.factor(time)14         -0.87962    0.21631  -4.067 4.83e-05 ***
## as.factor(time)15         -0.29698    0.19195  -1.547  0.12188    
## as.factor(time)16         -1.70281    0.32318  -5.269 1.42e-07 ***
## as.factor(time)17         -3.21918    0.52968  -6.078 1.29e-09 ***
## as.factor(time)18         -2.95433    0.47830  -6.177 6.96e-10 ***
## as.factor(time)19         -1.42484    0.27986  -5.091 3.66e-07 ***
## as.factor(time)20         -1.59756    0.29570  -5.403 6.81e-08 ***
## as.factor(time)21          1.42330    0.14689   9.690  < 2e-16 ***
## sexMale                   -0.01753    0.04555  -0.385  0.70033    
## raceAfr.American          -0.26391    0.12508  -2.110  0.03490 *  
## raceAm.Indian              0.24128    0.21817   1.106  0.26880    
## raceAsian/Pacific         -0.07066    0.12841  -0.550  0.58216    
## raceHispanic               0.19248    0.11443   1.682  0.09262 .  
## raceWhite                  0.07773    0.10670   0.728  0.46635    
## BY_LOW_SES                 0.35358    0.04634   7.630 2.70e-14 ***
## MATH_SELF                 -0.12367    0.02385  -5.186 2.21e-07 ***
## HS_GPA                    -0.24020    0.01560 -15.395  < 2e-16 ***
## parent_expects_2yr         0.20520    0.07674   2.674  0.00751 ** 
## parent_expectation_higher  0.07946    0.04604   1.726  0.08445 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.8824798)
## 
## Number of Fisher Scoring iterations: 9
fit.splines.ses.race<-svyglm(formula=F3_first_college_2_year~ns(F3_UPDATED_MONTHS_TO_COLLEGE,df=4)+race+BY_LOW_SES,design=des1,family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
plot_ses_race<-expand.grid(month=seq(1,21,1),race=unique(entry_var_long$race),BY_LOW_SES=unique(entry_var_long$BY_LOW_SES))
plot_ses_race<-plot_ses_race %>% mutate(F3_UPDATED_MONTHS_TO_COLLEGE=month)
plot_ses_race$splines<-predict(fit.splines.ses.race,newdata=plot_ses_race,type="response")

plot_ses_race$F3_UPDATED_MONTHS_TO_COLLEGE<-NULL

head(plot_ses_race,n=20)
##    month     race BY_LOW_SES   splines
## 1      1 Hispanic          1 0.6896508
## 2      2 Hispanic          1 0.6398162
## 3      3 Hispanic          1 0.5890601
## 4      4 Hispanic          1 0.5410242
## 5      5 Hispanic          1 0.4992894
## 6      6 Hispanic          1 0.4669457
## 7      7 Hispanic          1 0.4463286
## 8      8 Hispanic          1 0.4387361
## 9      9 Hispanic          1 0.4453221
## 10    10 Hispanic          1 0.4674825
## 11    11 Hispanic          1 0.5067164
## 12    12 Hispanic          1 0.5621109
## 13    13 Hispanic          1 0.6242297
## 14    14 Hispanic          1 0.6818507
## 15    15 Hispanic          1 0.7263109
## 16    16 Hispanic          1 0.7522464
## 17    17 Hispanic          1 0.7573968
## 18    18 Hispanic          1 0.7451009
## 19    19 Hispanic          1 0.7184153
## 20    20 Hispanic          1 0.6799970
library(data.table)
library(magrittr)

out<-melt(setDT(plot_ses_race),id=c("month","race","BY_LOW_SES"),measure.vars = list(haz=c("splines")))
head(out,n=20)
##    month     race BY_LOW_SES variable     value
## 1      1 Hispanic          1  splines 0.6896508
## 2      2 Hispanic          1  splines 0.6398162
## 3      3 Hispanic          1  splines 0.5890601
## 4      4 Hispanic          1  splines 0.5410242
## 5      5 Hispanic          1  splines 0.4992894
## 6      6 Hispanic          1  splines 0.4669457
## 7      7 Hispanic          1  splines 0.4463286
## 8      8 Hispanic          1  splines 0.4387361
## 9      9 Hispanic          1  splines 0.4453221
## 10    10 Hispanic          1  splines 0.4674825
## 11    11 Hispanic          1  splines 0.5067164
## 12    12 Hispanic          1  splines 0.5621109
## 13    13 Hispanic          1  splines 0.6242297
## 14    14 Hispanic          1  splines 0.6818507
## 15    15 Hispanic          1  splines 0.7263109
## 16    16 Hispanic          1  splines 0.7522464
## 17    17 Hispanic          1  splines 0.7573968
## 18    18 Hispanic          1  splines 0.7451009
## 19    19 Hispanic          1  splines 0.7184153
## 20    20 Hispanic          1  splines 0.6799970
splines.ses.table<-out %>% group_by(race,BY_LOW_SES) %>% dplyr::mutate(bigH=cumsum(value))

splines.ses.table %>% group_by(race,BY_LOW_SES) %>% ggplot(aes(x=month,y=bigH)) + geom_line(aes(group=BY_LOW_SES,color=BY_LOW_SES)) + facet_wrap(~race) +ggtitle("Figure 4 - Cumulative Hazard by SES and Race")

splines.ses.table2<-splines.ses.table %>% group_by(month,race) %>% summarize(hz=mean(bigH,na.rm=T))
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
ggplot(data=splines.ses.table2,aes(x=month,y=hz))+
  geom_line(aes(group=race,color=race))

stargazer(fit1,type = "html",title="Figure 2",omit = "as.factor")
Figure 2
Dependent variable:
cc_entry
sexMale -0.018
(0.046)
raceAfr.American -0.264**
(0.125)
raceAm.Indian 0.241
(0.218)
raceAsian/Pacific -0.071
(0.128)
raceHispanic 0.192*
(0.114)
raceWhite 0.078
(0.107)
BY_LOW_SES 0.354***
(0.046)
MATH_SELF -0.124***
(0.024)
HS_GPA -0.240***
(0.016)
parent_expects_2yr 0.205***
(0.077)
parent_expectation_higher 0.079*
(0.046)
Constant -3.477***
(0.178)
Observations 145,508
Log Likelihood -8,579.326
Akaike Inf. Crit. 17,224.650
Note: p<0.1; p<0.05; p<0.01
hispanic_low_ses<-splines.ses.table %>% filter(race=="Hispanic",BY_LOW_SES==1) %>% select(month,bigH)
## Adding missing grouping variables: `race`, `BY_LOW_SES`
hispanic_high_ses<-splines.ses.table %>% filter(race=="Hispanic",BY_LOW_SES==0) %>% select(month,bigH)
## Adding missing grouping variables: `race`, `BY_LOW_SES`
plot(bigH~month,hispanic_low_ses,col=1,lwd=2,type="s")
lines(bigH~month,hispanic_high_ses,col=2,lwd=2,type="s")
legend("bottomright",legend=c("Hispanic Low SES","Hispanic High SES"),lty=1,col=c(1,2))

#stargazer(entry_var_clean,title="Figure 1",type = "html")
figure1_code<-entry_var_clean %>% select(STU_ID,F3_first_college_2_year,F3_UPDATED_MONTHS_TO_COLLEGE,sex,race,BY_LOW_SES,student_expects_2yr,parent_expects_2yr,parent_expectation_higher)

figure1_code2<-dcast(figure1_code,STU_ID~race,value.var="race")
figure1_code2[2:7][!is.na(figure1_code2[2:7])]<-1
figure1_code2[2:7][is.na(figure1_code2[2:7])]<-0

figure1_code3<-dcast(figure1_code,STU_ID~sex,value.var="sex")

figure1_code3[2:3][!is.na(figure1_code3[2:3])]<-1
figure1_code3[2:3][is.na(figure1_code3[2:3])]<-0

figure1_code<-merge(figure1_code,figure1_code2,by.x="STU_ID",by.y="STU_ID")
figure1_code<-merge(figure1_code,figure1_code3,by.x="STU_ID",by.y="STU_ID")

figure1_code[10:17]<-sapply(figure1_code[10:17],as.numeric)

stargazer(figure1_code,title="Figure 1",type = "html")
Figure 1
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
STU_ID 6,614 282,299.200 108,452.200 101,101 184,110 386,213.8 461,234
F3_first_college_2_year 6,614 0.296 0.456 0 0 1 1
F3_UPDATED_MONTHS_TO_COLLEGE 6,614 5.451 6.149 0 2 4 21
BY_LOW_SES 6,614 0.371 0.483 0 0 1 1
student_expects_2yr 6,614 0.045 0.207 0 0 0 1
parent_expects_2yr 6,614 0.044 0.205 0 0 0 1
parent_expectation_higher 6,614 0.294 0.456 0 0 1 1
2orMoreRace 6,614 0.047 0.211 0 0 0 1
Afr.American 6,614 0.097 0.296 0 0 0 1
Am.Indian 6,614 0.005 0.070 0 0 0 1
Asian/Pacific 6,614 0.101 0.301 0 0 0 1
Hispanic 6,614 0.111 0.314 0 0 0 1
White 6,614 0.640 0.480 0 0 1 1
Female 6,614 0.571 0.495 0 0 1 1
Male 6,614 0.429 0.495 0 0 1 1
splines.ses.table<-splines.ses.table %>% mutate(surv=exp(-cumsum(value)))

plot.low_ses<-splines.ses.table %>% group_by(month) %>% filter(BY_LOW_SES==1) %>% summarize(values=mean(value))
plot.high_ses<-splines.ses.table %>% group_by(month) %>% filter(BY_LOW_SES==0) %>% summarize(values=mean(value))

plot(values~month,plot.low_ses,col=1,lwd=2,type="s",xlim=c(0,21),ylim=c(0,0.8),main="Figure 3. Hazard for Community College Entry by SES",ylab="Hazard")
lines(values~month,plot.high_ses,col=2,lwd=2,type="s")
legend("bottomright",legend=c("Low SES","High SES"),lty=1,col=c(1,2))

#plot.low_ses<-splines.ses.table %>% group_by(month) %>% filter(BY_LOW_SES==1) %>% summarize(values=mean(value))
#plot.high_ses<-splines.ses.table %>% group_by(month) %>% filter(BY_LOW_SES==0) %>% summarize(values=mean(value))

#plot(values~month,plot.high_ses,col=1,lwd=2,type="s")
#lines(values~month,plot.low_ses,col=2,lwd=2,type="s")
table1<-CreateTableOne(vars=c("sex","race","BY_LOW_SES","parent_expects_2yr","student_expects_2yr"),strata = "F3_first_college_2_year",test=T,data=entry_var_clean)

print(table1,format="p")
##                                  Stratified by F3_first_college_2_year
##                                   0           1           p      test
##   n                               4658        1956                   
##   sex = Male (%)                  42.7        43.1         0.782     
##   race (%)                                                <0.001     
##      2orMoreRace                   4.6         4.8                   
##      Afr.American                  9.9         9.1                   
##      Am.Indian                     0.4         0.8                   
##      Asian/Pacific                10.4         9.3                   
##      Hispanic                      8.9        16.4                   
##      White                        65.8        59.6                   
##   BY_LOW_SES (mean (SD))          0.31 (0.46) 0.53 (0.50) <0.001     
##   parent_expects_2yr (mean (SD))  0.03 (0.17) 0.08 (0.27) <0.001     
##   student_expects_2yr (mean (SD)) 0.03 (0.16) 0.09 (0.28) <0.001
test1<-print(table1,format="p")
##                                  Stratified by F3_first_college_2_year
##                                   0           1           p      test
##   n                               4658        1956                   
##   sex = Male (%)                  42.7        43.1         0.782     
##   race (%)                                                <0.001     
##      2orMoreRace                   4.6         4.8                   
##      Afr.American                  9.9         9.1                   
##      Am.Indian                     0.4         0.8                   
##      Asian/Pacific                10.4         9.3                   
##      Hispanic                      8.9        16.4                   
##      White                        65.8        59.6                   
##   BY_LOW_SES (mean (SD))          0.31 (0.46) 0.53 (0.50) <0.001     
##   parent_expects_2yr (mean (SD))  0.03 (0.17) 0.08 (0.27) <0.001     
##   student_expects_2yr (mean (SD)) 0.03 (0.16) 0.09 (0.28) <0.001
stargazer(test1,align=TRUE,type="html")
0 1 p test
n 4658 1956
sex = Male (%) 42.7 43.1 0.782
race (%) < 0.001
2orMoreRace 4.6 4.8
Afr.American 9.9 9.1
Am.Indian 0.4 0.8
Asian/Pacific 10.4 9.3
Hispanic 8.9 16.4
White 65.8 59.6
BY_LOW_SES (mean (SD)) 0.31 (0.46) 0.53 (0.50) < 0.001
parent_expects_2yr (mean (SD)) 0.03 (0.17) 0.08 (0.27) < 0.001
student_expects_2yr (mean (SD)) 0.03 (0.16) 0.09 (0.28) < 0.001
entry_var_final<-entry_var_long %>% select(cc_entry,time,sex,race,BY_LOW_SES,MATH_SELF,HS_GPA,parent_expects_2yr,parent_expectation_higher,STU_ID,STRAT_ID,F3QWT,F3_first_college_2_year)

colnames(entry_var_final)<-c("Community College Entry","Time in Months","Sex","Race","Low SES in Base Year","Math Self Efficacy","High School GPA","Parent Expects 2 Year College","Parent Expectation is Higher Than Student's","STU_ID","STRAT_ID","F3QWT","First College Entry is CC")

des_final<-svydesign(ids=~STU_ID,strata = ~STRAT_ID,weights = ~F3QWT,data = entry_var_final,nest=T)
options(survey.lonely.psu = "adjust")

fit_final<-svyglm(`Community College Entry`~as.factor(`Time in Months`)+Sex+Race+`Low SES in Base Year`+`Math Self Efficacy`+`High School GPA`+`Parent Expects 2 Year College`+`Parent Expectation is Higher Than Student's`,design=des_final,family=binomial(link = cloglog))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
entry_var_final2<-entry_var_clean %>% select(F3_UPDATED_MONTHS_TO_COLLEGE,sex,race,BY_LOW_SES,MATH_SELF,HS_GPA,parent_expects_2yr,parent_expectation_higher,STU_ID,STRAT_ID,F3QWT,F3_first_college_2_year)

colnames(entry_var_final2)<-c("Time in Months","Sex","Race","Low SES in Base Year","Math Self Efficacy","High School GPA","Parent Expects 2 Year College","Parent Expectation is Higher Than Student's","STU_ID","STRAT_ID","F3QWT","First College Entry is CC")

table1<-CreateTableOne(vars=c("Sex","Race","Low SES in Base Year","Parent Expects 2 Year College","High School GPA","Math Self Efficacy"),strata = "First College Entry is CC",test=T,data=entry_var_final2)

print(table1,format="p")
##                                            Stratified by First College Entry is CC
##                                             0           1            p     
##   n                                         4658         1956              
##   Sex = Male (%)                            42.7         43.1         0.782
##   Race (%)                                                           <0.001
##      2orMoreRace                             4.6          4.8              
##      Afr.American                            9.9          9.1              
##      Am.Indian                               0.4          0.8              
##      Asian/Pacific                          10.4          9.3              
##      Hispanic                                8.9         16.4              
##      White                                  65.8         59.6              
##   Low SES in Base Year (mean (SD))          0.31 (0.46)  0.53 (0.50) <0.001
##   Parent Expects 2 Year College (mean (SD)) 0.03 (0.17)  0.08 (0.27) <0.001
##   High School GPA (mean (SD))               4.71 (1.27)  3.74 (1.32) <0.001
##   Math Self Efficacy (mean (SD))            0.23 (1.01) -0.11 (0.97) <0.001
##                                            Stratified by First College Entry is CC
##                                             test
##   n                                             
##   Sex = Male (%)                                
##   Race (%)                                      
##      2orMoreRace                                
##      Afr.American                               
##      Am.Indian                                  
##      Asian/Pacific                              
##      Hispanic                                   
##      White                                      
##   Low SES in Base Year (mean (SD))              
##   Parent Expects 2 Year College (mean (SD))     
##   High School GPA (mean (SD))                   
##   Math Self Efficacy (mean (SD))
test1<-print(table1,format="p")
##                                            Stratified by First College Entry is CC
##                                             0           1            p     
##   n                                         4658         1956              
##   Sex = Male (%)                            42.7         43.1         0.782
##   Race (%)                                                           <0.001
##      2orMoreRace                             4.6          4.8              
##      Afr.American                            9.9          9.1              
##      Am.Indian                               0.4          0.8              
##      Asian/Pacific                          10.4          9.3              
##      Hispanic                                8.9         16.4              
##      White                                  65.8         59.6              
##   Low SES in Base Year (mean (SD))          0.31 (0.46)  0.53 (0.50) <0.001
##   Parent Expects 2 Year College (mean (SD)) 0.03 (0.17)  0.08 (0.27) <0.001
##   High School GPA (mean (SD))               4.71 (1.27)  3.74 (1.32) <0.001
##   Math Self Efficacy (mean (SD))            0.23 (1.01) -0.11 (0.97) <0.001
##                                            Stratified by First College Entry is CC
##                                             test
##   n                                             
##   Sex = Male (%)                                
##   Race (%)                                      
##      2orMoreRace                                
##      Afr.American                               
##      Am.Indian                                  
##      Asian/Pacific                              
##      Hispanic                                   
##      White                                      
##   Low SES in Base Year (mean (SD))              
##   Parent Expects 2 Year College (mean (SD))     
##   High School GPA (mean (SD))                   
##   Math Self Efficacy (mean (SD))
stargazer(test1,align=TRUE,type="html")
0 1 p test
n 4658 1956
Sex = Male (%) 42.7 43.1 0.782
Race (%) < 0.001
2orMoreRace 4.6 4.8
Afr.American 9.9 9.1
Am.Indian 0.4 0.8
Asian/Pacific 10.4 9.3
Hispanic 8.9 16.4
White 65.8 59.6
Low SES in Base Year (mean (SD)) 0.31 (0.46) 0.53 (0.50) < 0.001
Parent Expects 2 Year College (mean (SD)) 0.03 (0.17) 0.08 (0.27) < 0.001
High School GPA (mean (SD)) 4.71 (1.27) 3.74 (1.32) < 0.001
Math Self Efficacy (mean (SD)) 0.23 (1.01) -0.11 (0.97) < 0.001
stargazer(fit_final,type = "html",title="Figure 2",omit = "as.factor")
Figure 2
Dependent variable:
Community College Entry
SexMale -0.018
(0.046)
RaceAfr.American -0.264**
(0.125)
RaceAm.Indian 0.241
(0.218)
RaceAsian/Pacific -0.071
(0.128)
RaceHispanic 0.192*
(0.114)
RaceWhite 0.078
(0.107)
Low SES in Base Year 0.354***
(0.046)
Math Self Efficacy -0.124***
(0.024)
High School GPA -0.240***
(0.016)
Parent Expects 2 Year College 0.205***
(0.077)
Parent Expectation is Higher Than Student's 0.079*
(0.046)
Constant -3.477***
(0.178)
Observations 145,508
Log Likelihood -8,579.326
Akaike Inf. Crit. 17,224.650
Note: p<0.1; p<0.05; p<0.01
#grab early and late adopters

early_entry<-entry_var_final2 %>% filter(`First College Entry is CC`==1)

early_entry<-early_entry %>% filter(`Time in Months`==1|`Time in Months`==2|`Time in Months`==3)

early_entry<-early_entry %>% mutate(GROUP="Early")

late_entry<-entry_var_final2 %>% filter(`First College Entry is CC`==1)

late_entry<-late_entry %>% filter(`First College Entry is CC`==1 & `Time in Months`==14|`Time in Months`==15|`Time in Months`==16)

late_entry<-late_entry %>% mutate(GROUP="Late")
#add dummy variables

early_and_late<-rbind(early_entry,late_entry)

early_and_late<-early_and_late %>% mutate(SEX_DUMMY_FEMALE=ifelse(Sex=="Female",1,0))

early_and_late<-early_and_late %>% mutate(race_DUMMY_white=ifelse(Race=="White",1,0))

early_and_late<-early_and_late %>% mutate(race_DUMMY_hispanic=ifelse(Race=="Hispanic",1,0))

early_and_late<-early_and_late %>% mutate(race_DUMMY_black=ifelse(Race=="Afr.American",1,0))
#for (i in c(4:8,12,14:17)) {
#  boxplot(early_and_late[,i]~early_and_late$GROUP,
#          ylab=names(early_and_late[i]),
#          xlab="Group"
#          )
#t.test(early_and_late[,i]~early_and_late$GROUP)
#}

entry_var_final2<-entry_var_final2 %>% mutate(early_or_late=ifelse(`Time in Months`==1 & `First College Entry is CC`==1|`Time in Months`==2 & `First College Entry is CC`==1|`Time in Months`==3 & `First College Entry is CC`==1,"Early",
                                                                    ifelse(`Time in Months`==14 & `First College Entry is CC`==1 |`Time in Months`==15 & `First College Entry is CC`==1|`Time in Months`==16 & `First College Entry is CC`==1,"Late",NA_character_)))



table2<-CreateTableOne(vars=c("Sex","Race","Low SES in Base Year","Parent Expects 2 Year College","High School GPA","Math Self Efficacy","Parent Expectation is Higher Than Student's"),strata = "early_or_late",test=T,data=entry_var_final2)

print(table2,format="p")
##                                                          Stratified by early_or_late
##                                                           Early       
##   n                                                        1099       
##   Sex = Male (%)                                           42.9       
##   Race (%)                                                            
##      2orMoreRace                                            4.5       
##      Afr.American                                           8.6       
##      Am.Indian                                              0.5       
##      Asian/Pacific                                         10.5       
##      Hispanic                                              15.0       
##      White                                                 61.0       
##   Low SES in Base Year (mean (SD))                         0.49 (0.50)
##   Parent Expects 2 Year College (mean (SD))                0.06 (0.25)
##   High School GPA (mean (SD))                              3.94 (1.23)
##   Math Self Efficacy (mean (SD))                          -0.12 (0.96)
##   Parent Expectation is Higher Than Student's (mean (SD))  0.34 (0.47)
##                                                          Stratified by early_or_late
##                                                           Late         p     
##   n                                                         126              
##   Sex = Male (%)                                           51.6         0.076
##   Race (%)                                                              0.042
##      2orMoreRace                                            5.6              
##      Afr.American                                          13.5              
##      Am.Indian                                              2.4              
##      Asian/Pacific                                          7.9              
##      Hispanic                                              16.7              
##      White                                                 54.0              
##   Low SES in Base Year (mean (SD))                         0.67 (0.47) <0.001
##   Parent Expects 2 Year College (mean (SD))                0.08 (0.27)  0.528
##   High School GPA (mean (SD))                              3.42 (1.32) <0.001
##   Math Self Efficacy (mean (SD))                          -0.14 (0.95)  0.800
##   Parent Expectation is Higher Than Student's (mean (SD))  0.44 (0.50)  0.027
##                                                          Stratified by early_or_late
##                                                           test
##   n                                                           
##   Sex = Male (%)                                              
##   Race (%)                                                    
##      2orMoreRace                                              
##      Afr.American                                             
##      Am.Indian                                                
##      Asian/Pacific                                            
##      Hispanic                                                 
##      White                                                    
##   Low SES in Base Year (mean (SD))                            
##   Parent Expects 2 Year College (mean (SD))                   
##   High School GPA (mean (SD))                                 
##   Math Self Efficacy (mean (SD))                              
##   Parent Expectation is Higher Than Student's (mean (SD))
table2<-print(table2,format="p")
##                                                          Stratified by early_or_late
##                                                           Early       
##   n                                                        1099       
##   Sex = Male (%)                                           42.9       
##   Race (%)                                                            
##      2orMoreRace                                            4.5       
##      Afr.American                                           8.6       
##      Am.Indian                                              0.5       
##      Asian/Pacific                                         10.5       
##      Hispanic                                              15.0       
##      White                                                 61.0       
##   Low SES in Base Year (mean (SD))                         0.49 (0.50)
##   Parent Expects 2 Year College (mean (SD))                0.06 (0.25)
##   High School GPA (mean (SD))                              3.94 (1.23)
##   Math Self Efficacy (mean (SD))                          -0.12 (0.96)
##   Parent Expectation is Higher Than Student's (mean (SD))  0.34 (0.47)
##                                                          Stratified by early_or_late
##                                                           Late         p     
##   n                                                         126              
##   Sex = Male (%)                                           51.6         0.076
##   Race (%)                                                              0.042
##      2orMoreRace                                            5.6              
##      Afr.American                                          13.5              
##      Am.Indian                                              2.4              
##      Asian/Pacific                                          7.9              
##      Hispanic                                              16.7              
##      White                                                 54.0              
##   Low SES in Base Year (mean (SD))                         0.67 (0.47) <0.001
##   Parent Expects 2 Year College (mean (SD))                0.08 (0.27)  0.528
##   High School GPA (mean (SD))                              3.42 (1.32) <0.001
##   Math Self Efficacy (mean (SD))                          -0.14 (0.95)  0.800
##   Parent Expectation is Higher Than Student's (mean (SD))  0.44 (0.50)  0.027
##                                                          Stratified by early_or_late
##                                                           test
##   n                                                           
##   Sex = Male (%)                                              
##   Race (%)                                                    
##      2orMoreRace                                              
##      Afr.American                                             
##      Am.Indian                                                
##      Asian/Pacific                                            
##      Hispanic                                                 
##      White                                                    
##   Low SES in Base Year (mean (SD))                            
##   Parent Expects 2 Year College (mean (SD))                   
##   High School GPA (mean (SD))                                 
##   Math Self Efficacy (mean (SD))                              
##   Parent Expectation is Higher Than Student's (mean (SD))
stargazer(table2,align=TRUE,type="html",title="Early and Late CC Adopters by Variable (t-test)",dep.var.caption = "Early CC Adopters = 0, Late CC Adopters = 1")
Early and Late CC Adopters by Variable (t-test)
Early Late p test
n 1099 126
Sex = Male (%) 42.9 51.6 0.076
Race (%) 0.042
2orMoreRace 4.5 5.6
Afr.American 8.6 13.5
Am.Indian 0.5 2.4
Asian/Pacific 10.5 7.9
Hispanic 15.0 16.7
White 61.0 54.0
Low SES in Base Year (mean (SD)) 0.49 (0.50) 0.67 (0.47) < 0.001
Parent Expects 2 Year College (mean (SD)) 0.06 (0.25) 0.08 (0.27) 0.528
High School GPA (mean (SD)) 3.94 (1.23) 3.42 (1.32) < 0.001
Math Self Efficacy (mean (SD)) -0.12 (0.96) -0.14 (0.95) 0.800
Parent Expectation is Higher Than Student’s (mean (SD)) 0.34 (0.47) 0.44 (0.50) 0.027
stargazer(fit1,align=TRUE,type="html")
Dependent variable:
cc_entry
as.factor(time)1 -0.390*
(0.200)
as.factor(time)2 1.344***
(0.147)
as.factor(time)3 2.127***
(0.140)
as.factor(time)4 -0.068
(0.188)
as.factor(time)5 -1.891***
(0.382)
as.factor(time)6 -2.752***
(0.465)
as.factor(time)7 -0.529**
(0.213)
as.factor(time)8 -0.193
(0.190)
as.factor(time)9 -1.760***
(0.337)
as.factor(time)10 -2.126***
(0.376)
as.factor(time)11 -2.383***
(0.395)
as.factor(time)12 -2.155***
(0.410)
as.factor(time)13 -1.985***
(0.356)
as.factor(time)14 -0.880***
(0.216)
as.factor(time)15 -0.297
(0.192)
as.factor(time)16 -1.703***
(0.323)
as.factor(time)17 -3.219***
(0.530)
as.factor(time)18 -2.954***
(0.478)
as.factor(time)19 -1.425***
(0.280)
as.factor(time)20 -1.598***
(0.296)
as.factor(time)21 1.423***
(0.147)
sexMale -0.018
(0.046)
raceAfr.American -0.264**
(0.125)
raceAm.Indian 0.241
(0.218)
raceAsian/Pacific -0.071
(0.128)
raceHispanic 0.192*
(0.114)
raceWhite 0.078
(0.107)
BY_LOW_SES 0.354***
(0.046)
MATH_SELF -0.124***
(0.024)
HS_GPA -0.240***
(0.016)
parent_expects_2yr 0.205***
(0.077)
parent_expectation_higher 0.079*
(0.046)
Constant -3.477***
(0.178)
Observations 145,508
Log Likelihood -8,579.326
Akaike Inf. Crit. 17,224.650
Note: p<0.1; p<0.05; p<0.01