This data set uses the ELS public database. Filters out any non-Hispanic student.

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.6.2

library(haven)

## Warning: package 'haven' was built under R version 3.6.2

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## Warning: package 'survival' was built under R version 3.6.2

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(questionr)

load("C:/Users/PCMcC/Documents/DISSERTATION/CHAPTER 1 ASPIRATIONS & SOCIAL CAPITAL/Data/els_02_12_byf3pststu_v1_0.rdata")
data1<-as.data.frame(els_02_12_byf3pststu_v1_0)
 rm(els_02_12_byf3pststu_v1_0)
 
 options(scipen = 999)

#Rename columns

library(dplyr)

## Warning: package 'dplyr' was built under R version 3.6.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data2<-data1 %>% select(STU_ID,psu,bystuwt,F3QWT,F3BYPNLWT,STRAT_ID,BYS14, byrace,bypared,byparacr, bystexp, BYP85, F3ATTAINMENT, F3BYEDEXPFF, F3A17, F3FURTHERED, F3STEXP,F3HIIMPACT,F3A14A,F3A14B,F3A14C,F3A14D,F3A14E,F3A14F,F3D39,F3D40,F3D42)

#bystuwt represents student weight at baseline
#F3QWT represents weight at third round of survey
#psu representents primary sampling unit
#STRAT_ID representes the sampling strata 
# F3BYPNLWT this is a panel weight used when your analysis spans more than one round of data collection. Need to clarify if this is more appropriate considering some of my control variables are coming from baseline survey like parent's income, parent's race. 

#Rename Columns
names(data2)[names(data2) == "STU_ID"]<-"studentID"
names(data2)[names(data2)=="BYS14"]<-"sex"
names(data2)[names(data2)=="byrace"]<-"race"
names(data2)[names(data2)=="bypared"]<-"parentsed"
names(data2)[names(data2)=="bystexp"]<-"firstExpectEdu"
names(data2)[names(data2)=="BYP85"]<-"familyInc"
names(data2)[names(data2)=="F3ATTAINMENT"]<-"eduAttain"
names(data2)[names(data2)=="F3BYEDEXPFF"]<-"expectEduStatus"
#names(data2)[names(data2)=="F3A17"]<-"expectEduLevel"
names(data2)[names(data2)=="F3FURTHERED"]<-"expectEduAttained"
names(data2)[names(data2)=="F3STEXP"]<-"thirdExpectEdu" 
names(data2)[names(data2)=="F3HIIMPACT"]<-"ImpactActivities"
names(data2)[names(data2)=="F3A14A"]<-"internship"
names(data2)[names(data2)=="F3A14B"]<-"research"
names(data2)[names(data2)=="F3A14C"]<-"abroad"
names(data2)[names(data2)=="F3A14D"]<-"community"
names(data2)[names(data2)=="F3A14E"]<-"seniorExperience"
names(data2)[names(data2)=="F3A14F"]<-"mentoring"
names(data2)[names(data2)=="F3D39"]<-"voted"
names(data2)[names(data2)=="F3D40"]<-"unpaidWork"
names(data2)[names(data2)=="F3D42"]<-"volunteeringFreq"
names(data2)[names(data2)=="STRAT_ID"]<-"strataID"
names(data2)[names(data2)=="F3QWT"]<-"studentWeight"
names(data2)[names(data2)=="F3BYPNLWT"]<-"panelStudentWeight"
#Filter data set to keep only hispanic students (4=Hispanic, no race specified, 5=Hispanic, race specified)

data2<-filter(data2, race %in% c("4", "5"))




#Recode Variables
library(car)

## Warning: package 'car' was built under R version 3.6.2

## Loading required package: carData

## Warning: package 'carData' was built under R version 3.6.1

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

#sex
data2$Female <-Recode(data2$sex, recodes = "1=0; 2=1; else =NA")

data2$sex2 <- Recode(data2$sex, recodes = "1='Male'; 2='Female'; else =NA", as.factor=T)
#parents education
data2$parentsed2 <-Recode(data2$parentsed, recodes = "1='LessthanHS';2='HighSchool'; 3='LessthanAssociate'; 4='Associate'; 5='LessthanBach';6='Bachelors'; 7:8='Graduate'; else=NA ", as.factor=T)
data2$parentsBachelorOrMore<-Recode(data2$parentsed, recodes = "1:5=0; 6:8=1; else=NA ", as.factor=T)
#first survey academic expectation 
data2$firstExpectEdu2 <-Recode(data2$firstExpectEdu, recodes = "1='LessthanHS';2='HighSchool'; 3='Associate'; 4='SomeCollege'; 5='Bachelors'; 6:7='Graduate'; else=NA ", as.factor=T)
#family income during first survey
data2$familyInc2 <-Recode(data2$familyInc, recodes = "1:6='Lessthan20k';7:9:='20kto50k'; 10:11='50kto100k'; 12:13='Morethan100k'; else=NA ", as.factor=T)
#education attainment during F3 survey
data2$eduAttain2 <-Recode(data2$eduAttain, recodes = "1='LessthanHS';2='HighSchool'; 3='SomeCollege'; 4='Certificate'; 5='Associates'; 6='Bachelors'; 7='PostBacCert'; 8:10='Graduate';else= NA", as.factor=T)
#Fulfillment of BY educational expectations as of F3
data2$expectEduStatus2 <-Recode(data2$expectEduStatus, recodes = "1:2='MetExpectations';3='DidNotMeetExpectations'; 4:5='NotApplicable';else='NA' ", as.factor=T)
#Highest level of education respondent expects to complete - composite (DEPENDENT VARIABLE)
data2$thirdExpectEdu2 <-Recode(data2$thirdExpectEdu, recodes = "1='LessthanHS';2:3='HighSchool'; 4='Certificate'; 5='Associates'; 6='Bachelors'; 7:10='Graduate'; 11='dontknow';else= NA", as.factor=T)
data2$thirdExpectEdu2<-relevel(data2$thirdExpectEdu2, ref = "HighSchool")

data2$GradExpect <-Recode(data2$thirdExpectEdu, recodes = "1:6=0; 7:10=1; 11=0; else=NA ")
#High-impact PS activities:  Internship/co-op/field experience/student teaching/clinical assignment
data2$internship2 <-Recode(data2$internship, recodes = "1=1; 0=0; else=NA ")
# High-impact PS activities:  Research project with faculty member outside course/program requirements
data2$research2 <-Recode(data2$research, recodes = "1=1; 0=0; else=NA ")
#High-impact PS activities:  Study abroad
data2$abroad2 <-Recode(data2$abroad, recodes = "1=1; 0=0; else=NA ")
#High-impact PS activities:  Community-based project
data2$community2<-Recode(data2$community, recodes = "1=1; 0=0; else= NA ")
#High-impact PS activities:  Culminating senior experience
data2$seniorExperience2<-Recode(data2$seniorExperience, recodes = "1=1; 0=0; else= NA ")
#High-impact PS activities:  Mentoring
data2$mentoring2<-Recode(data2$mentoring, recodes = "1=1; 0=0; else=NA ")
#Sum of High-Impact Activities
data2$ImpactActivities2<-ifelse(data2$ImpactActivities<0, NA,data2$ImpactActivities )
#Whether voted in any local/state/national election during 2009, 2010, or 2011
data2$voted2<-Recode(data2$voted, recodes = "1=1; 0=0; else= NA ")
#Performed unpaid volunteer work during the last 2 years
data2$unpaidWork2<-Recode(data2$unpaidWork, recodes = "1=1; 0=0; else=NA ")
#Frequency of volunteer service
data2$volunteeringFreq2<-Recode(data2$volunteeringFreq, recodes = "1='lessthanonceamonth'; 2='onceamonth'; 3='onceaweek';else=NA ", as.factor=T)
data2$volunteering<-Recode(data2$volunteeringFreq, recodes = "1:3=1;else=0 ")

SELECT RECODED VARIABLES AND KEEP COMPLETE CASES

data3<-data2 %>% select(studentID, psu,studentWeight,panelStudentWeight,strataID,Female,sex2, parentsed2, familyInc2,thirdExpectEdu2,GradExpect,internship2,research2,abroad2,community2,seniorExperience2,mentoring2,ImpactActivities2,voted2,volunteering,parentsBachelorOrMore) %>% filter(complete.cases(.))

data3<-data3 %>% filter(complete.cases(.))
#Filter out students who have not indicated clearly what their education expectations were
#data3<-filter(data3,thirdExpectEdu2!="NA")

CREATE SURVEY DESIGN

options(survey.lonely.psu="adjust")
des<-svydesign(ids= ~psu, strata = ~strataID,  weights= ~studentWeight, data= data3, nest=T)

DESCRIPTIVE STATISTICS BY SEX AND EXPECTED LEVEL OF EDUCATION

Percent of men and women by expected level of education.Standard errors are adjusted to survey design

library(survey)
sv.table<-as.data.frame(svyby(formula = ~thirdExpectEdu2, by= ~sex2, design=des, FUN=svymean, na.rm=T))
t(sv.table)

##                               Female        Male         
## sex2                          "Female"      "Male"       
## thirdExpectEdu2HighSchool     "0.08700098"  "0.09732707" 
## thirdExpectEdu2Associates     "0.10602566"  "0.08119032" 
## thirdExpectEdu2Bachelors      "0.2726824"   "0.3331056"  
## thirdExpectEdu2Certificate    "0.03994399"  "0.07473933" 
## thirdExpectEdu2dontknow       "0.06774006"  "0.07054863" 
## thirdExpectEdu2Graduate       "0.4240965"   "0.3430890"  
## thirdExpectEdu2LessthanHS     "0.002510407" "0.000000000"
## se.thirdExpectEdu2HighSchool  "0.01451037"  "0.01765306" 
## se.thirdExpectEdu2Associates  "0.01677427"  "0.01551446" 
## se.thirdExpectEdu2Bachelors   "0.02232773"  "0.02608747" 
## se.thirdExpectEdu2Certificate "0.01035604"  "0.01399850" 
## se.thirdExpectEdu2dontknow    "0.01534381"  "0.01550985" 
## se.thirdExpectEdu2Graduate    "0.02440294"  "0.02428952" 
## se.thirdExpectEdu2LessthanHS  "0.002507866" "0.000000000"

DESCRIPTIVE STATISTICS BY EXPECTATION OF GRADUATE SCHOOL OR NOT AND NUMBER OF IMPACT ACTIVITIES

Percent of men and women by expected level of education.Standard errors are adjusted to survey design

library(survey)
sv.table2<-as.data.frame(svyby(formula = ~GradExpect, by= ~ImpactActivities2, design=des, FUN=svymean, na.rm=T))
t(sv.table2)

##                            0          1          2          3          4
## ImpactActivities2 0.00000000 1.00000000 2.00000000 3.00000000 4.00000000
## GradExpect        0.25012207 0.45847323 0.58051790 0.64972493 0.68846406
## se                0.01916117 0.04404977 0.05742597 0.06696498 0.07324312
##                           5         6
## ImpactActivities2 5.0000000 6.0000000
## GradExpect        0.6559168 0.8849742
## se                0.1879861 0.1128552

GENERATE PLOTS FROM MY DATA AND CHI-SQUARE ANALYSES

https://uc-r.github.io/descriptives_categorical

Post-Graduate Expectations by Sex

library(ggplot2)
library(ggthemes)

## Warning: package 'ggthemes' was built under R version 3.6.2

library(viridis)

## Warning: package 'viridis' was built under R version 3.6.2

## Loading required package: viridisLite

chi1<-svyby(formula = ~sex2, by = ~GradExpect, design = des, FUN = svymean, na.rm=T)
svychisq(~sex2+GradExpect, design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~sex2 + GradExpect, design = des)
## F = 4.7378, ndf = 1, ddf = 123, p-value = 0.03142

qplot(x=chi1$GradExpect ,y=chi1$sex2Female, data=chi1 ,xlab="test", ylab="females" )+
geom_errorbar(aes(x=GradExpect, ymin=sex2Female,ymax=sex2Female), width=.25)+
ggtitle(label = "% of Females with Post-Graduate Expectations")

Post-Graduate Expectations by Voting Status

The Chisquared tests of association for survey data shows that students who voted versus those that did not ARE statistically different in their expectations to attend post-graduate school.

chi2<-svyby(formula = ~voted2, by = ~GradExpect, design = des, FUN = svymean, na.rm=T)
svychisq(~voted2+GradExpect, design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~voted2 + GradExpect, design = des)
## F = 10.614, ndf = 1, ddf = 123, p-value = 0.001451

qplot(x=chi2$GradExpect ,y=chi2$voted2, data=chi2 ,xlab="Post-Graduate Expectations 1=yes, 0=no", ylab="percent of participants who voted" )+
geom_errorbar(aes(x=GradExpect, ymin=voted2,ymax=voted2), width=.25)+
ggtitle(label = "% of Participants who Voted and Post-Graduate Expectations")

Post-Graduate Expectations by Volunteering Status

chi3<-svyby(formula = ~volunteering, by = ~GradExpect, design = des, FUN = svymean, na.rm=T)
svychisq(~volunteering+GradExpect, design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~volunteering + GradExpect, design = des)
## F = 45.008, ndf = 1, ddf = 123, p-value = 0.0000000006374

qplot(x=chi3$GradExpect ,y=chi3$volunteering, data=chi3 ,xlab="Post-Graduate Expectations 1=yes, 0=no", ylab="percent of participants who volunteered" )+
geom_errorbar(aes(x=GradExpect, ymin=volunteering,ymax=volunteering), width=.25)+
ggtitle(label = "% of Participants who Volunteered and Post-Graduate Expectations")

Post-Graduate Expectations by having received Mentoring

chi4<-svyby(formula = ~mentoring2, by = ~GradExpect, design = des, FUN = svymean, na.rm=T)
svychisq(~mentoring2+GradExpect, design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~mentoring2 + GradExpect, design = des)
## F = 19.276, ndf = 1, ddf = 123, p-value = 0.00002409

qplot(x=chi4$GradExpect ,y=chi4$mentoring2, data=chi4 ,xlab="Post-Graduate Expectations 1=yes, 0=no", ylab="percent of participants who received mentoring" )+
geom_errorbar(aes(x=GradExpect, ymin=mentoring2,ymax=mentoring2), width=.25)+
ggtitle(label = "% of Participants who Received Mentoring and Post-Graduate Expectations")

## Post-Graduate Expectations by Parent’s Education (greater than a bacherlor’s degree)

chi5<-svyby(formula = ~parentsBachelorOrMore, by = ~GradExpect, design = des, FUN = svymean, na.rm=T)
svychisq(~parentsBachelorOrMore+GradExpect, design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~parentsBachelorOrMore + GradExpect, design = des)
## F = 29.709, ndf = 1, ddf = 123, p-value = 0.0000002629

qplot(x=chi5$GradExpect ,y=chi5$parentsBachelorOrMore1, data=chi5 ,xlab="Survey Participant's Post-Graduate Expectations 1=yes, 0=no", ylab="Percent of Parents with a Bachelors or More" )+
geom_errorbar(aes(x=GradExpect, ymin=parentsBachelorOrMore1 , ymax=parentsBachelorOrMore1), width=.25)+
ggtitle(label = "% of Parents with a Bachelors or More and Participant's Post-Graduate Expectations")

Post-Graduate Expectations and ALL Impact Activities

chi6<-svyby(formula = ~ImpactActivities2, by = ~GradExpect, design = des, FUN = svymean, na.rm=T)
svychisq(~ImpactActivities2+GradExpect, design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~ImpactActivities2 + GradExpect, design = des)
## F = 14.957, ndf = 5.2665, ddf = 647.7797, p-value = 0.00000000000001723

qplot(x=chi6$GradExpect ,y=chi6$ImpactActivities2, data=chi6 ,xlab="Survey Participant's Post-Graduate Expectations 1=yes, 0=no", ylab="Percent of Parents with a Bachelors or More" )+
geom_errorbar(aes(x=GradExpect, ymin=ImpactActivities2 , ymax=ImpactActivities2), width=.25)+
ggtitle(label = "Impact Activities and Participant's Post-Graduate Expectations")

## Post-Graduate Expectations and Parent’s Income

chi7<-svyby(formula = ~familyInc2, by = ~GradExpect, design = des, FUN = svymean, na.rm=T)
svychisq(~familyInc2+GradExpect, design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~familyInc2 + GradExpect, design = des)
## F = 9.8226, ndf = 2.9101, ddf = 357.9370, p-value = 0.000004031

qplot(x=chi7$GradExpect, y=chi7$familyInc2Morethan100k, data=chi7 ,xlab="Survey Participant's Post-Graduate Expectations 1=yes, 0=no", ylab="Family Income" )+
geom_errorbar(aes(x=GradExpect, ymin=familyInc2Morethan100k , ymax=familyInc2Morethan100k), width=.25)+
ggtitle(label = "Parent's Income > 100k at Baseline and Post-Graduate Expectations")

ADDITIONAL DESCRIPTIVE INFORMATION

Impact Activities

prop.table(questionr::wtd.table(data3$ImpactActivities2,weights=data3$studentWeight))*100

##          0          1          2          3          4          5          6 
## 54.5999218 19.4464412 13.3472575  6.5446368  4.8153862  0.6541071  0.5922493

ggplot(data3, aes(x = `ImpactActivities2`)) +
        geom_bar(aes(y = (..count..)/sum(..count..))) + 
        xlab("Number of Impact Activities") +
        scale_y_continuous(labels = scales::percent, name = "Percent of Respondents") +
        theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme_hc()+
ggtitle("Percent of Participants by Number of Impact Activities")

Parents Education

prop.table(questionr::wtd.table(data3$parentsed2,weights=data3$studentWeight))*100

##         Associate         Bachelors          Graduate        HighSchool 
##          11.03330          16.69473          10.43277          19.14218 
## LessthanAssociate      LessthanBach        LessthanHS 
##          12.12562          12.39418          18.17722

ggplot(data3, aes(x = `parentsed2`)) +
        geom_bar(aes(y = (..count..)/sum(..count..))) + 
        xlab("Participants Parents Education (ordered by size)") +
        scale_y_continuous(labels = scales::percent, name = "Percent of Respondents") +
        theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme_hc()+
ggtitle("Percent of Parent's Educational Attainment of Survey Respondents")

## Education Expectations

prop.table(questionr::wtd.table(data3$thirdExpectEdu2,weights=data3$studentWeight))*100

##  HighSchool  Associates   Bachelors Certificate    dontknow    Graduate 
##   9.1549656   9.5085599  29.9299075   5.5271465   6.8977248  38.8412393 
##  LessthanHS 
##   0.1404564

reorder_size <- function(x) {
        factor(x, levels = names(sort(table(x), decreasing = TRUE)))
}

ggplot(data3, aes(x = reorder_size(`thirdExpectEdu2`))) +
        geom_bar(aes(y = (..count..)/sum(..count..))) +
        xlab("Education Expectations (ordered by size)") +
        scale_y_continuous(labels = scales::percent, name = "Percent of Respondents") +
        theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme_hc() +
ggtitle("Percent of Survey Respondents by Educational Expectations")

## Educational Expectation by Sex

prop.table(questionr::wtd.table(data3$thirdExpectEdu2, data3$sex2,weights=data3$studentWeight))*100

##                 Female       Male
## HighSchool   4.8676727  4.2872929
## Associates   5.9320967  3.5764633
## Bachelors   15.2564825 14.6734250
## Certificate  2.2348514  3.2922951
## dontknow     3.7900318  3.1076930
## Graduate    23.7280424 15.1131969
## LessthanHS   0.1404564  0.0000000

ggplot(data3, aes(thirdExpectEdu2, (..count..)/sum(..count..)*100 )) + geom_bar(aes(fill = sex2), position = "dodge")+  xlab("Education Expectations") + ylab("Percent of Respondents") +
ggtitle("Percent of Survey Respondents by Educational Expectations by Sex")

prop.table(questionr::wtd.table(data3$GradExpect, data3$sex2,weights=data3$studentWeight))*100

##     Female     Male
## 0 32.22159 28.93717
## 1 23.72804 15.11320

ggplot(data3, aes(GradExpect, (..count..)/sum(..count..)*100 )) + geom_bar(aes(fill = sex2), position = "dodge") + xlab("Post-Graduate Expectations") + ylab("Percent of Respondents") +
ggtitle("Percent of Survey Respondents by Post-Graduate vs. Non Post-Graduate Expectations by Sex")

CLEAN UP ENV.

rm(chi1, chi2, chi3, chi4, chi5, chi6, chi7, sv.table, sv.table2)

REGRESSION ANALYSIS

The outcome of interest is whether a survey participant is expected to enroll in graduate school (masters, phd, doctoral) or not. A logistic regression is appropriate to use in this case.

Logit model

library(survey)

#Average of Participants Expecting to go to Graduate School (1) 
svymean(~GradExpect, design=des)

##               mean     SE
## GradExpect 0.38841 0.0162

#LOGIT MODEL
fit.logit<-svyglm(as.numeric(GradExpect) ~ Female + seniorExperience2 + community2 + research2 + internship2 + volunteering + voted2 + mentoring2 + parentsBachelorOrMore + familyInc2, design = des, family = binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(fit.logit)

## 
## Call:
## svyglm(formula = as.numeric(GradExpect) ~ Female + seniorExperience2 + 
##     community2 + research2 + internship2 + volunteering + voted2 + 
##     mentoring2 + parentsBachelorOrMore + familyInc2, design = des, 
##     family = binomial)
## 
## Survey design:
## svydesign(ids = ~psu, strata = ~strataID, weights = ~studentWeight, 
##     data = data3, nest = T)
## 
## Coefficients:
##                        Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)            -1.74605    0.17882  -9.764 < 0.0000000000000002 ***
## Female                  0.17107    0.17232   0.993             0.323013    
## seniorExperience2       0.48684    0.25865   1.882             0.062424 .  
## community2              0.13993    0.31787   0.440             0.660645    
## research2               0.68328    0.24011   2.846             0.005279 ** 
## internship2             0.76278    0.22458   3.396             0.000948 ***
## volunteering            0.67384    0.18481   3.646             0.000407 ***
## voted2                  0.41344    0.16995   2.433             0.016585 *  
## mentoring2              0.15738    0.25722   0.612             0.541888    
## parentsBachelorOrMore1  0.63658    0.19625   3.244             0.001559 ** 
## familyInc250kto100k    -0.03141    0.22949  -0.137             0.891377    
## familyInc2Lessthan20k   0.22649    0.24171   0.937             0.350782    
## familyInc2Morethan100k  1.38415    0.33683   4.109            0.0000762 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.021202)
## 
## Number of Fisher Scoring iterations: 4

#Short version of logit model
#LOGIT MODEL
fit.logit2<-svyglm(as.numeric(GradExpect) ~ ImpactActivities2 + familyInc2, design = des, family = binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(fit.logit2)

## 
## Call:
## svyglm(formula = as.numeric(GradExpect) ~ ImpactActivities2 + 
##     familyInc2, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~psu, strata = ~strataID, weights = ~studentWeight, 
##     data = data3, nest = T)
## 
## Coefficients:
##                        Estimate Std. Error t value           Pr(>|t|)    
## (Intercept)            -1.12975    0.12905  -8.754 0.0000000000000164 ***
## ImpactActivities2       0.53486    0.06583   8.125 0.0000000000004749 ***
## familyInc250kto100k     0.11895    0.22601   0.526              0.600    
## familyInc2Lessthan20k   0.09553    0.22838   0.418              0.676    
## familyInc2Morethan100k  1.55779    0.30825   5.054 0.0000015849870031 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.009485)
## 
## Number of Fisher Scoring iterations: 4

TEST FOR MULTICOLLINEARITY

The variance inflation factor (VIF) quantifies the extent of correlation between one predictor and the other predictors in a model. It is used for diagnosing collinearity/multicollinearity. Higher values signify that it is difficult to impossible to assess accurately the contribution of predictors to a model.

library(rms)

## Warning: package 'rms' was built under R version 3.6.2

## Loading required package: Hmisc

## Warning: package 'Hmisc' was built under R version 3.6.2

## Loading required package: lattice

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:questionr':
## 
##     describe, wtd.mean, wtd.table, wtd.var

## The following object is masked from 'package:survey':
## 
##     deff

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## Loading required package: SparseM

## Warning: package 'SparseM' was built under R version 3.6.2

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

## 
## Attaching package: 'rms'

## The following objects are masked from 'package:car':
## 
##     Predict, vif

## The following object is masked from 'package:survey':
## 
##     calibrate

vif(fit.logit)

##                 Female      seniorExperience2             community2 
##               1.290482               1.251171               1.432380 
##              research2            internship2           volunteering 
##               1.169167               1.605373               1.335499 
##                 voted2             mentoring2 parentsBachelorOrMore1 
##               1.297728               1.382488               1.218255 
##    familyInc250kto100k  familyInc2Lessthan20k familyInc2Morethan100k 
##               1.350884               1.407225               1.349944

FITTED VALUES: IMPACT ACTIVITIES AND FAMILY INCOME

Dissertation Chapter: Aspirations to Post-Graduate Education of Hispanics & Social Capital

Paulina Cano McC

January/February 2020