Assignment3

All the variables in this dataset were compiled from a sample of the National Survey of College Graduates through the IPUMS-HigherEd site.

For this assignment, I will define education as my ordinal variable by recoding it as a progression from Bachelor followed by Masters or other post-graduate education which refers to Doctoral degrees or Professional degrees.

In this analysis, I attempt to understand whether number of children, gender or ethnicity have an impact in the transition from one level to another. I expect to see a lower rate of women, minorities and those with two or more children transitioning into doctoral/professional level of education when compared to men, white and asians, and those with one child.

The data used for this analysis consists of a sample from the National Survey of College Graduates gathered through IPUMS-Higher Ed.

library(haven)
library(car)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(questionr)

cpsipums2<-read_dta("highered_00005.dta")
names(cpsipums2) #print the column names

##  [1] "personid" "year"     "weight"   "sample"   "surid"    "gender"  
##  [7] "raceth"   "chtot"    "dgrdg"    "salary"

In this sample, there are 38,626 females and 45,830 males.

Recoding of Variables

#income grouping
cpsipums2$salary2<-ifelse(cpsipums2$salary==9999998:9999999, NA, cpsipums2$salary)

summary (cpsipums2$salary2)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       0   44000   71000  906935  108000 9999998    6519

#number of children living in the household
cpsipums2$chtot<-recode(cpsipums2$chtot, recodes="00='no children'; 01='one child'; 02='one to three children'; 03='two or more children'; 04='more than 3 children'; 98=NA", as.factor.result=T)
cpsipums2$chtot<-relevel(cpsipums2$chtot, ref = "one child")
table(cpsipums2$chtot)

## 
##            one child two or more children 
##                14472                20517

#gender
cpsipums2$female<-recode(cpsipums2$gender, recodes=1)
cpsipums2$male<-recode(cpsipums2$gender, recodes=2)
cpsipums2$gender<-recode(cpsipums2$gender, recodes="1='female'; 2='male'", as.factor.result=T)
table(cpsipums2$gender)

## 
## female   male 
##  38626  45830

cpsipums2$gender<-relevel(cpsipums2$gender, ref = "female")

#race/ethnicity 
#There are no entries in this data set under "other"
cpsipums2$asian<-recode(cpsipums2$raceth, recodes=1)
cpsipums2$white<-recode(cpsipums2$raceth, recodes=2)
cpsipums2$minorities<-recode(cpsipums2$raceth, recodes=3)
cpsipums2$other<-recode(cpsipums2$raceth, recodes=4)

cpsipums2$raceth<-recode(cpsipums2$raceth, recodes="1='asian'; 2='white'; 3='minorities'", as.factor.result=T)
cpsipums2$raceth<-relevel(cpsipums2$raceth, ref = "white")

table(cpsipums2$raceth)

## 
##      white      asian minorities 
##      51846      13868      18742

Recoding of Ordinal Variable: Education

Education level ordinal coding from Bachelors (1) followed by Masters (2) or other post-graduate education (3) which refers to Doctoral degrees or Professional degrees.

cpsipums2$education<-recode(cpsipums2$dgrdg,recodes="1=1; 2=2; 3:4=3; else=NA", as.factor.result=T)
cpsipums2$education<-relevel(cpsipums2$education, ref="1")
cpsipums2$education2<-car::recode(cpsipums2$dgrdg, recodes="1=1; 2=2; 3:4=3;  else=NA", as.factor.result=F)
options(survey.lonely.psu = "adjust")

table(cpsipums2$education)

## 
##     1     2     3 
## 44199 33341  6916

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

sub<-cpsipums2%>%
  select(education,education2,gender,chtot,raceth,salary,weight,sample) %>%
  filter( complete.cases(.))

#Survey Design
options(survey.lonely.psu = "adjust")
des2<-svydesign(ids=~1, strata=~sample, weights=~weight, data =sub)

Ordinal Regression

The ordinal regression shows that males are slightly more likely than females to make a transition into post-graduate education. The same trend is found amongst White and Asian survey respondents when compared to minorities. Finally, those with two or more children as slightly less likely to enter post-graduate education.

#Nested models for the education outcome
fit.solr1<-svyolr(education~gender+raceth+gender+chtot,des2)
summary(fit.solr1)

## Call:
## svyolr(education ~ gender + raceth + gender + chtot, des2)
## 
## Coefficients:
##                                 Value Std. Error    t value
## gendermale                 0.04402464 0.04147118  1.0615720
## racethasian                0.19785049 0.06131927  3.2265630
## racethminorities          -0.15725858 0.05759723 -2.7303148
## chtottwo or more children -0.02727343 0.04209209 -0.6479466
## 
## Intercepts:
##     Value   Std. Error t value
## 1|2  0.3331  0.0407     8.1941
## 2|3  2.0897  0.0465    44.9392

#Calculate the AIC ourself
fit.solr1$deviance+2*length(fit.solr1$coefficients)

## [1] 64440.7

Proportional odds assumption

A comparison of betas or proportional odds assumpption shows that they are NOT consistent across transitions. The only area where they appear consistent is along the race variable.

ex1<-svyglm(I(education2>1)~raceth+education+chtot+gender,des2, family="binomial")

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

## Warning: glm.fit: algorithm did not converge

ex2<-svyglm(I(education2>2)~raceth+education+chtot+gender,des2, family="binomial")

## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

## Warning in eval(family$initialize): glm.fit: algorithm did not converge

plot(coef(ex1)[-1], ylim=c(-3, 3), type="l",xaxt="n",
     ylab="Beta", main=c("Comparison of betas for", " proportional odds assumption"))
lines(coef(ex2)[-1], col=1, lty=2) 
axis(side=1, at=1:12, labels=F)
text(x=1:12, y=-4,  srt = 90, pos = 1, xpd = TRUE,cex=.8,
     labels = c( "asian", "minorities","masters" ,"doctorate/prof",
                 "Two children or more","male"))
legend("bottomright",col=c(1,1),lty=c(1,2), legend=c(">1", ">2"))

lines(coef(fit.solr1)[c(-13:-16)], col=4, lwd=3)

#Printing odds ratios, 
round(exp(rbind(coef(ex1)[-1], coef(ex2)[-1])),3)

##      racethasian racethminorities   education2   education3
## [1,]       0.980            1.036 1.777857e+23 2.147777e+23
## [2,]       1.159            1.129 1.168000e+00 2.139375e+23
##      chtottwo or more children gendermale
## [1,]                     0.991      1.037
## [2,]                     0.967      1.063

Non proportional assumption

In using a non-proportial assumption we find some of the same trends, minorities are less likely to pursue education beyond the bachelors level when compared to white and asian survey respondents. Males are slighly more likely than women as well however, in the transition between bachelors and masters, men are show a slight negative direction when compared to women. This is consistent with previous findings in assignments 1 and 2. An interesting finding is that it is less likely for those with two or more childre to transition between bachelors and masters while this finding reverses for those with two children or more transitioning between masters and doctoral/professional degrees.

library(VGAM)

## Loading required package: stats4

## Loading required package: splines

## 
## Attaching package: 'VGAM'

## The following object is masked from 'package:survey':
## 
##     calibrate

## The following object is masked from 'package:car':
## 
##     logit

#Proportional odds
fit.vgam<-vglm(as.ordered(education)~gender+chtot+raceth,
               cpsipums2, weights =weight/mean(weight, na.rm=T),
               family=cumulative(parallel = T, reverse = T))  #<-parallel = T == proportional odds
summary(fit.vgam)

## 
## Call:
## vglm(formula = as.ordered(education) ~ gender + chtot + raceth, 
##     family = cumulative(parallel = T, reverse = T), data = cpsipums2, 
##     weights = weight/mean(weight, na.rm = T))
## 
## 
## Pearson residuals:
##                   Min      1Q  Median      3Q    Max
## logit(P[Y>=2]) -4.446 -0.5833 -0.1434  0.7137  6.908
## logit(P[Y>=3]) -2.971 -0.3736 -0.2105 -0.1076 11.771
## 
## Coefficients: 
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1             -0.33312    0.02076 -16.047  < 2e-16 ***
## (Intercept):2             -2.08974    0.02427 -86.086  < 2e-16 ***
## gendermale                 0.04402    0.02020   2.179   0.0293 *  
## chtottwo or more children -0.02728    0.02081  -1.311   0.1899    
## racethasian                0.19785    0.02962   6.680 2.39e-11 ***
## racethminorities          -0.15726    0.02796  -5.624 1.86e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  2 
## 
## Names of linear predictors: logit(P[Y>=2]), logit(P[Y>=3])
## 
## Residual deviance: 70567.94 on 69972 degrees of freedom
## 
## Log-likelihood: -35283.97 on 69972 degrees of freedom
## 
## Number of iterations: 3 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## Exponentiated coefficients:
##                gendermale chtottwo or more children 
##                 1.0450069                 0.9730928 
##               racethasian          racethminorities 
##                 1.2187741                 0.8544806

#Nagelkerke R^2
fit.null<-vglm(as.ordered(education)~1,
cpsipums2, weights =weight/mean(weight, na.rm=T),
 family=cumulative(parallel = T, reverse = T))
(1-exp((fit.vgam@criterion$deviance - fit.null@criterion$deviance)/485742))/(1-exp(-fit.null@criterion$deviance/485742))

## [1] 0.5783867

#Non-proportional odds
fit.vgam2<-vglm(as.ordered(education)~gender+chtot+raceth,cpsipums2,
                weights =weight/mean(weight, na.rm=T),
                family=cumulative(parallel = F, reverse = T))  #<-parallel = F == Nonproportional odds
summary(fit.vgam2)

## 
## Call:
## vglm(formula = as.ordered(education) ~ gender + chtot + raceth, 
##     family = cumulative(parallel = F, reverse = T), data = cpsipums2, 
##     weights = weight/mean(weight, na.rm = T))
## 
## 
## Pearson residuals:
##                   Min      1Q  Median      3Q   Max
## logit(P[Y>=2]) -4.502 -0.5823 -0.1427  0.7196  7.25
## logit(P[Y>=3]) -3.394 -0.3714 -0.2094 -0.1075 12.53
## 
## Coefficients: 
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1               -0.29124    0.02118 -13.750  < 2e-16 ***
## (Intercept):2               -2.32287    0.03500 -66.376  < 2e-16 ***
## gendermale:1                -0.01585    0.02079  -0.762 0.445790    
## gendermale:2                 0.35982    0.03315  10.855  < 2e-16 ***
## chtottwo or more children:1 -0.04367    0.02142  -2.039 0.041450 *  
## chtottwo or more children:2  0.05733    0.03383   1.695 0.090153 .  
## racethasian:1                0.18877    0.03077   6.135 8.51e-10 ***
## racethasian:2                0.22861    0.04543   5.032 4.84e-07 ***
## racethminorities:1          -0.15812    0.02859  -5.530 3.20e-08 ***
## racethminorities:2          -0.16264    0.04722  -3.445 0.000572 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of linear predictors:  2 
## 
## Names of linear predictors: logit(P[Y>=2]), logit(P[Y>=3])
## 
## Residual deviance: 70402.36 on 69968 degrees of freedom
## 
## Log-likelihood: -35201.18 on 69968 degrees of freedom
## 
## Number of iterations: 4 
## 
## No Hauck-Donner effect found in any of the estimates
## 
## Exponentiated coefficients:
##                gendermale:1                gendermale:2 
##                   0.9842730                   1.4330709 
## chtottwo or more children:1 chtottwo or more children:2 
##                   0.9572718                   1.0590081 
##               racethasian:1               racethasian:2 
##                   1.2077629                   1.2568506 
##          racethminorities:1          racethminorities:2 
##                   0.8537498                   0.8498970

fit.null2<-vglm(as.ordered(education)~1,
cpsipums2, weights =weight/mean(weight, na.rm=T),
 family=cumulative(parallel = F, reverse = T))

(1-exp((fit.vgam2@criterion$deviance - fit.null2@criterion$deviance)/485742))/(1-exp(-fit.null2@criterion$deviance/485742))

## [1] 0.5794494

AIC(fit.vgam)

## [1] 70579.94

AIC(fit.vgam2)

## [1] 70422.36

The best fitting model appears to be when using the non-proportional assumptions.

A calculation of the odds rations and confidence levels confirms our findings:

Males are 43% more likely to transition from a masters into doctoral/professional level of education
Females are 2% more likely to transition from a bachelors into a masters degree.
Survey participants with two or more children are 5% less likely to transition from bachelors into a masters degree.
Survey participants with two or more children are 5% morelikely to transition from a masters degree into a doctoral/professional degree.
Asian are 20% and 25% more likely to transition into a masters degree and doctoral/professional degree respectively. 6.Minorities are 5% less lilely to transition from a bachelors to masters degree or from a masters degree to a doctoral/professinal degree.

In further analysis, I would like to exclude professional degrees completely to see if the trends remain the same. I would also like to replace masters degrees with professional degrees to see if there are any changes if the person transitions from bachelors to a professional degree.

round(exp(coef(fit.vgam2)), 3)

##               (Intercept):1               (Intercept):2 
##                       0.747                       0.098 
##                gendermale:1                gendermale:2 
##                       0.984                       1.433 
## chtottwo or more children:1 chtottwo or more children:2 
##                       0.957                       1.059 
##               racethasian:1               racethasian:2 
##                       1.208                       1.257 
##          racethminorities:1          racethminorities:2 
##                       0.854                       0.850

round(exp(confint(fit.vgam2)), 3)

##                             2.5 % 97.5 %
## (Intercept):1               0.717  0.779
## (Intercept):2               0.091  0.105
## gendermale:1                0.945  1.025
## gendermale:2                1.343  1.529
## chtottwo or more children:1 0.918  0.998
## chtottwo or more children:2 0.991  1.132
## racethasian:1               1.137  1.283
## racethasian:2               1.150  1.374
## racethminorities:1          0.807  0.903
## racethminorities:2          0.775  0.932