All the variables in this dataset were compiled from a sample of the National Survey of College Graduates through the IPUMS-HigherEd site.
For this assignment, I will define education as my ordinal variable by recoding it as a progression from Bachelor followed by Masters or other post-graduate education which refers to Doctoral degrees or Professional degrees.
In this analysis, I attempt to understand whether number of children, gender or ethnicity have an impact in the transition from one level to another. I expect to see a lower rate of women, minorities and those with two or more children transitioning into doctoral/professional level of education when compared to men, white and asians, and those with one child.
The data used for this analysis consists of a sample from the National Survey of College Graduates gathered through IPUMS-Higher Ed.
library(haven)
library(car)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
cpsipums2<-read_dta("highered_00005.dta")
names(cpsipums2) #print the column names
## [1] "personid" "year" "weight" "sample" "surid" "gender"
## [7] "raceth" "chtot" "dgrdg" "salary"
In this sample, there are 38,626 females and 45,830 males.
#income grouping
cpsipums2$salary2<-ifelse(cpsipums2$salary==9999998:9999999, NA, cpsipums2$salary)
summary (cpsipums2$salary2)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 44000 71000 906935 108000 9999998 6519
#number of children living in the household
cpsipums2$chtot<-recode(cpsipums2$chtot, recodes="00='no children'; 01='one child'; 02='one to three children'; 03='two or more children'; 04='more than 3 children'; 98=NA", as.factor.result=T)
cpsipums2$chtot<-relevel(cpsipums2$chtot, ref = "one child")
table(cpsipums2$chtot)
##
## one child two or more children
## 14472 20517
#gender
cpsipums2$female<-recode(cpsipums2$gender, recodes=1)
cpsipums2$male<-recode(cpsipums2$gender, recodes=2)
cpsipums2$gender<-recode(cpsipums2$gender, recodes="1='female'; 2='male'", as.factor.result=T)
table(cpsipums2$gender)
##
## female male
## 38626 45830
cpsipums2$gender<-relevel(cpsipums2$gender, ref = "female")
#race/ethnicity
#There are no entries in this data set under "other"
cpsipums2$asian<-recode(cpsipums2$raceth, recodes=1)
cpsipums2$white<-recode(cpsipums2$raceth, recodes=2)
cpsipums2$minorities<-recode(cpsipums2$raceth, recodes=3)
cpsipums2$other<-recode(cpsipums2$raceth, recodes=4)
cpsipums2$raceth<-recode(cpsipums2$raceth, recodes="1='asian'; 2='white'; 3='minorities'", as.factor.result=T)
cpsipums2$raceth<-relevel(cpsipums2$raceth, ref = "white")
table(cpsipums2$raceth)
##
## white asian minorities
## 51846 13868 18742
Education level ordinal coding from Bachelors (1) followed by Masters (2) or other post-graduate education (3) which refers to Doctoral degrees or Professional degrees.
cpsipums2$education<-recode(cpsipums2$dgrdg,recodes="1=1; 2=2; 3:4=3; else=NA", as.factor.result=T)
cpsipums2$education<-relevel(cpsipums2$education, ref="1")
cpsipums2$education2<-car::recode(cpsipums2$dgrdg, recodes="1=1; 2=2; 3:4=3; else=NA", as.factor.result=F)
options(survey.lonely.psu = "adjust")
table(cpsipums2$education)
##
## 1 2 3
## 44199 33341 6916
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sub<-cpsipums2%>%
select(education,education2,gender,chtot,raceth,salary,weight,sample) %>%
filter( complete.cases(.))
#Survey Design
options(survey.lonely.psu = "adjust")
des2<-svydesign(ids=~1, strata=~sample, weights=~weight, data =sub)
The ordinal regression shows that males are slightly more likely than females to make a transition into post-graduate education. The same trend is found amongst White and Asian survey respondents when compared to minorities. Finally, those with two or more children as slightly less likely to enter post-graduate education.
#Nested models for the education outcome
fit.solr1<-svyolr(education~gender+raceth+gender+chtot,des2)
summary(fit.solr1)
## Call:
## svyolr(education ~ gender + raceth + gender + chtot, des2)
##
## Coefficients:
## Value Std. Error t value
## gendermale 0.04402464 0.04147118 1.0615720
## racethasian 0.19785049 0.06131927 3.2265630
## racethminorities -0.15725858 0.05759723 -2.7303148
## chtottwo or more children -0.02727343 0.04209209 -0.6479466
##
## Intercepts:
## Value Std. Error t value
## 1|2 0.3331 0.0407 8.1941
## 2|3 2.0897 0.0465 44.9392
#Calculate the AIC ourself
fit.solr1$deviance+2*length(fit.solr1$coefficients)
## [1] 64440.7
A comparison of betas or proportional odds assumpption shows that they are NOT consistent across transitions. The only area where they appear consistent is along the race variable.
ex1<-svyglm(I(education2>1)~raceth+education+chtot+gender,des2, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
## Warning: glm.fit: algorithm did not converge
ex2<-svyglm(I(education2>2)~raceth+education+chtot+gender,des2, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
## Warning in eval(family$initialize): glm.fit: algorithm did not converge
plot(coef(ex1)[-1], ylim=c(-3, 3), type="l",xaxt="n",
ylab="Beta", main=c("Comparison of betas for", " proportional odds assumption"))
lines(coef(ex2)[-1], col=1, lty=2)
axis(side=1, at=1:12, labels=F)
text(x=1:12, y=-4, srt = 90, pos = 1, xpd = TRUE,cex=.8,
labels = c( "asian", "minorities","masters" ,"doctorate/prof",
"Two children or more","male"))
legend("bottomright",col=c(1,1),lty=c(1,2), legend=c(">1", ">2"))
lines(coef(fit.solr1)[c(-13:-16)], col=4, lwd=3)
#Printing odds ratios,
round(exp(rbind(coef(ex1)[-1], coef(ex2)[-1])),3)
## racethasian racethminorities education2 education3
## [1,] 0.980 1.036 1.777857e+23 2.147777e+23
## [2,] 1.159 1.129 1.168000e+00 2.139375e+23
## chtottwo or more children gendermale
## [1,] 0.991 1.037
## [2,] 0.967 1.063
In using a non-proportial assumption we find some of the same trends, minorities are less likely to pursue education beyond the bachelors level when compared to white and asian survey respondents. Males are slighly more likely than women as well however, in the transition between bachelors and masters, men are show a slight negative direction when compared to women. This is consistent with previous findings in assignments 1 and 2. An interesting finding is that it is less likely for those with two or more childre to transition between bachelors and masters while this finding reverses for those with two children or more transitioning between masters and doctoral/professional degrees.
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:survey':
##
## calibrate
## The following object is masked from 'package:car':
##
## logit
#Proportional odds
fit.vgam<-vglm(as.ordered(education)~gender+chtot+raceth,
cpsipums2, weights =weight/mean(weight, na.rm=T),
family=cumulative(parallel = T, reverse = T)) #<-parallel = T == proportional odds
summary(fit.vgam)
##
## Call:
## vglm(formula = as.ordered(education) ~ gender + chtot + raceth,
## family = cumulative(parallel = T, reverse = T), data = cpsipums2,
## weights = weight/mean(weight, na.rm = T))
##
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logit(P[Y>=2]) -4.446 -0.5833 -0.1434 0.7137 6.908
## logit(P[Y>=3]) -2.971 -0.3736 -0.2105 -0.1076 11.771
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.33312 0.02076 -16.047 < 2e-16 ***
## (Intercept):2 -2.08974 0.02427 -86.086 < 2e-16 ***
## gendermale 0.04402 0.02020 2.179 0.0293 *
## chtottwo or more children -0.02728 0.02081 -1.311 0.1899
## racethasian 0.19785 0.02962 6.680 2.39e-11 ***
## racethminorities -0.15726 0.02796 -5.624 1.86e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 2
##
## Names of linear predictors: logit(P[Y>=2]), logit(P[Y>=3])
##
## Residual deviance: 70567.94 on 69972 degrees of freedom
##
## Log-likelihood: -35283.97 on 69972 degrees of freedom
##
## Number of iterations: 3
##
## No Hauck-Donner effect found in any of the estimates
##
## Exponentiated coefficients:
## gendermale chtottwo or more children
## 1.0450069 0.9730928
## racethasian racethminorities
## 1.2187741 0.8544806
#Nagelkerke R^2
fit.null<-vglm(as.ordered(education)~1,
cpsipums2, weights =weight/mean(weight, na.rm=T),
family=cumulative(parallel = T, reverse = T))
(1-exp((fit.vgam@criterion$deviance - fit.null@criterion$deviance)/485742))/(1-exp(-fit.null@criterion$deviance/485742))
## [1] 0.5783867
#Non-proportional odds
fit.vgam2<-vglm(as.ordered(education)~gender+chtot+raceth,cpsipums2,
weights =weight/mean(weight, na.rm=T),
family=cumulative(parallel = F, reverse = T)) #<-parallel = F == Nonproportional odds
summary(fit.vgam2)
##
## Call:
## vglm(formula = as.ordered(education) ~ gender + chtot + raceth,
## family = cumulative(parallel = F, reverse = T), data = cpsipums2,
## weights = weight/mean(weight, na.rm = T))
##
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logit(P[Y>=2]) -4.502 -0.5823 -0.1427 0.7196 7.25
## logit(P[Y>=3]) -3.394 -0.3714 -0.2094 -0.1075 12.53
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -0.29124 0.02118 -13.750 < 2e-16 ***
## (Intercept):2 -2.32287 0.03500 -66.376 < 2e-16 ***
## gendermale:1 -0.01585 0.02079 -0.762 0.445790
## gendermale:2 0.35982 0.03315 10.855 < 2e-16 ***
## chtottwo or more children:1 -0.04367 0.02142 -2.039 0.041450 *
## chtottwo or more children:2 0.05733 0.03383 1.695 0.090153 .
## racethasian:1 0.18877 0.03077 6.135 8.51e-10 ***
## racethasian:2 0.22861 0.04543 5.032 4.84e-07 ***
## racethminorities:1 -0.15812 0.02859 -5.530 3.20e-08 ***
## racethminorities:2 -0.16264 0.04722 -3.445 0.000572 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 2
##
## Names of linear predictors: logit(P[Y>=2]), logit(P[Y>=3])
##
## Residual deviance: 70402.36 on 69968 degrees of freedom
##
## Log-likelihood: -35201.18 on 69968 degrees of freedom
##
## Number of iterations: 4
##
## No Hauck-Donner effect found in any of the estimates
##
## Exponentiated coefficients:
## gendermale:1 gendermale:2
## 0.9842730 1.4330709
## chtottwo or more children:1 chtottwo or more children:2
## 0.9572718 1.0590081
## racethasian:1 racethasian:2
## 1.2077629 1.2568506
## racethminorities:1 racethminorities:2
## 0.8537498 0.8498970
fit.null2<-vglm(as.ordered(education)~1,
cpsipums2, weights =weight/mean(weight, na.rm=T),
family=cumulative(parallel = F, reverse = T))
(1-exp((fit.vgam2@criterion$deviance - fit.null2@criterion$deviance)/485742))/(1-exp(-fit.null2@criterion$deviance/485742))
## [1] 0.5794494
AIC(fit.vgam)
## [1] 70579.94
AIC(fit.vgam2)
## [1] 70422.36
The best fitting model appears to be when using the non-proportional assumptions.
A calculation of the odds rations and confidence levels confirms our findings:
In further analysis, I would like to exclude professional degrees completely to see if the trends remain the same. I would also like to replace masters degrees with professional degrees to see if there are any changes if the person transitions from bachelors to a professional degree.
round(exp(coef(fit.vgam2)), 3)
## (Intercept):1 (Intercept):2
## 0.747 0.098
## gendermale:1 gendermale:2
## 0.984 1.433
## chtottwo or more children:1 chtottwo or more children:2
## 0.957 1.059
## racethasian:1 racethasian:2
## 1.208 1.257
## racethminorities:1 racethminorities:2
## 0.854 0.850
round(exp(confint(fit.vgam2)), 3)
## 2.5 % 97.5 %
## (Intercept):1 0.717 0.779
## (Intercept):2 0.091 0.105
## gendermale:1 0.945 1.025
## gendermale:2 1.343 1.529
## chtottwo or more children:1 0.918 0.998
## chtottwo or more children:2 0.991 1.132
## racethasian:1 1.137 1.283
## racethasian:2 1.150 1.374
## racethminorities:1 0.807 0.903
## racethminorities:2 0.775 0.932