All the variables in this dataset were compiled from a sample of the National Survey of College Graduates through the IPUMS-HigherEd site.

The data used for this analysis consists of a sample from the National Survey of College Graduates gathered through IPUMS-Higher Ed.

library(car)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(ggplot2)
library(pander)
library(knitr)
library(haven)
cpsipums4<-read_dta("~/Statistics II/Data for project/highered/highered_00010.dta")
names(cpsipums4) #print the column names
##  [1] "personid" "year"     "weight"   "sample"   "surid"    "age"     
##  [7] "gender"   "minrty"   "raceth"   "bthus"    "chtot"    "dgrdg"   
## [13] "lfstat"   "hrswkgr"  "jobins"   "jobpens"  "jobvac"   "ocedrlp" 
## [19] "prmbr"    "salary"

Recoding of Variables

#income grouping. NA's and 407 individuals who earned zero dollars have been removed from this variable.
cpsipums4$salary3<-ifelse(cpsipums4$salary<=0, NA, cpsipums4$salary)
cpsipums4$salary4<-ifelse(cpsipums4$salary3==9999998, NA, cpsipums4$salary3)
summary(cpsipums4$salary4)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1000   42000   67000   71284   96000  150000   13559
#number of children living in the household
cpsipums4$onechild<-recode(cpsipums4$chtot, recodes=01)
cpsipums4$twoormore<-recode(cpsipums4$chtot, recodes=03)
cpsipums4$chtot<-recode(cpsipums4$chtot, recodes="00='no children'; 01='one child'; 02='one to three children'; 03='two or more children'; 04='more than 3 children'; 98=NA", as.factor.result=T)
cpsipums4$chtot<-relevel(cpsipums4$chtot, ref = "one child")
table(cpsipums4$chtot)
## 
##            one child two or more children 
##                14472                20517
#Do they have two or more children?
cpsipums4$twochildrenx<-recode(cpsipums4$chtot, recodes="3=1; 1=0; 98=NA")

table(cpsipums4$twochildrenx)
## 
##            one child two or more children 
##                14472                20517
#gender
cpsipums4$female<-recode(cpsipums4$gender, recodes=1)
cpsipums4$male<-recode(cpsipums4$gender, recodes=2)
cpsipums4$gender<-recode(cpsipums4$gender, recodes="1='female'; 2='male'", as.factor.result=T)
table(cpsipums4$gender)
## 
## female   male 
##  38626  45830
cpsipums4$gender<-relevel(cpsipums4$gender, ref = "female")

#race/ethnicity 
#There are no entries in this data set under "other"
cpsipums4$asian<-recode(cpsipums4$raceth, recodes=1)
cpsipums4$white<-recode(cpsipums4$raceth, recodes=2)
cpsipums4$minorities<-recode(cpsipums4$raceth, recodes=3)
cpsipums4$other<-recode(cpsipums4$raceth, recodes=4)

cpsipums4$raceth<-recode(cpsipums4$raceth, recodes="1='asian'; 2='white'; 3='minorities'", as.factor.result=T)
cpsipums4$raceth<-relevel(cpsipums4$raceth, ref = "white")

table(cpsipums4$raceth)
## 
##      white      asian minorities 
##      51846      13868      18742
#education level
cpsipums4$dgrdg<-recode(cpsipums4$dgrdg, recodes="1='0bachelors'; 2='1masters'; 3='2doctorate'; 4='3professional'", as.factor.result=T)
table(cpsipums4$dgrdg, cpsipums4$gender)
##                
##                 female  male
##   0bachelors     18930 25269
##   1masters       16756 16585
##   2doctorate      1103  1611
##   3professional   1837  2365
#place of birth (There are no NAs)
cpsipums4$birth<-recode (cpsipums4$bthus, recodes="00='not in the US'; 01='in the US'; 99='NA'", as.factor.result=T)
table(cpsipums4$birth, cpsipums4$gender)
##                
##                 female  male
##   in the US      29873 34003
##   not in the US   8753 11827
summary(cpsipums4$birth)
##     in the US not in the US 
##         63876         20580
#age
cpsipums4$agex<-ifelse(cpsipums4$age>=49, NA, cpsipums4$age)
cpsipums4$agex<-cut(cpsipums4$agex, breaks=c(22,29,39,49))
summary(cpsipums4$agex)
## (22,29] (29,39] (39,49]    NA's 
##   19733   20888   13499   30336

Descriptive Analysis

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
analysisf<-cpsipums4%>%
  select(chtot,twochildrenx, gender,dgrdg, salary4, raceth,birth, agex, weight, sample) %>%
  filter( complete.cases(.))
#DESIGN
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~sample, weights=~weight, data =analysisf)

Percentages of Children Living in the Household by Gender

The percentage of males with two children or more is higher than the observed percentage on the females. The opposite pattern is observed amongst the males with one child with a lower percentage than the females with one child.

library(ggplot2)
chi2<-svyby(formula = ~gender, by = ~chtot, design = des, FUN = svymean, na.rm=T)
svychisq(~gender+chtot, design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~gender + chtot, design = des)
## F = 26.213, ndf = 1, ddf = 21628, p-value = 3.084e-07
qplot(x=chi2$chtot,y=chi2$gendermale, data=chi2 ,xlab="children", ylab="males" )+
geom_errorbar(aes(x=chtot, ymin=gendermale,ymax=gendermale), width=.25)+
ggtitle(label = "% of Children for Males")

qplot(x=chi2$chtot,y=chi2$genderfemale, data=chi2 ,xlab="children", ylab="females" )+
geom_errorbar(aes(x=chtot, ymin=genderfemale,ymax=genderfemale), width=.25)+
ggtitle(label = "% of Children for Females")

###Percentage of Children Living in the Household by Level of Education A further comparison of genders confirms the pattern observed particularly when comparing men and women with a doctorate degree and a masters degree. However, this trend reverses when looking at participants with a professional degree; women are more likely to report having 2 or more children in the household than men.

fert2<-svyby(formula = ~gender, by = ~chtot+dgrdg, design = des, FUN = svymean, 
na.rm=T)
fert2
##                                                   chtot         dgrdg
## one child.0bachelors                          one child    0bachelors
## two or more children.0bachelors    two or more children    0bachelors
## one child.1masters                            one child      1masters
## two or more children.1masters      two or more children      1masters
## one child.2doctorate                          one child    2doctorate
## two or more children.2doctorate    two or more children    2doctorate
## one child.3professional                       one child 3professional
## two or more children.3professional two or more children 3professional
##                                    genderfemale gendermale se.genderfemale
## one child.0bachelors                  0.5122710  0.4877290      0.01674961
## two or more children.0bachelors       0.4433727  0.5566273      0.01199109
## one child.1masters                    0.5806455  0.4193545      0.01748147
## two or more children.1masters         0.4917566  0.5082434      0.01367640
## one child.2doctorate                  0.3925541  0.6074459      0.04423637
## two or more children.2doctorate       0.3249362  0.6750638      0.04600357
## one child.3professional               0.4894034  0.5105966      0.04431615
## two or more children.3professional    0.4336513  0.5663487      0.02804066
##                                    se.gendermale
## one child.0bachelors                  0.01674961
## two or more children.0bachelors       0.01199109
## one child.1masters                    0.01748147
## two or more children.1masters         0.01367640
## one child.2doctorate                  0.04423637
## two or more children.2doctorate       0.04600357
## one child.3professional               0.04431615
## two or more children.3professional    0.02804066
fert2$chtot_rec<-rep(c("one child","two or more children"),2)
fert2$dgrdg_rec<-factor(c(rep("0bachelors", 2), rep("1masters", 2), rep("2doctorate", 2), rep("3professional", 2)), ordered = T)
                      
#fix the order of the education factor levels
fert2$dgrdg_rec<-factor(fert2$dgrdg_rec, levels(fert2$dgrdg_rec)[c(4,3,2,1)])
#FEMALES
A<-ggplot(fert2, aes(dgrdg_rec,fert2$genderfemale),xlab="education", ylab="% gender")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))

A<-A+ylab("female")
A<-A+xlab("Education Level")
A+ggtitle("Percentage of Children in the Household for Females by Level of Education")

#MALES
B<-ggplot(fert2, aes(dgrdg_rec,fert2$gendermale),xlab="education", ylab="% gender")
B<-B+geom_point(aes(colour=chtot_rec))
B<-B+geom_line(aes(colour=chtot_rec,group=chtot_rec))

B<-B+ylab("male")
B<-B+xlab("Education Level")
B+ggtitle("Percentage of Children in the Household for Males by Level of Education")

###Percentage of Children Living in the Household by Gender by Place of Birth A comparison by gender and place of birth shows that females with a degree or higher who were born in the US are more likely to have one child than two or more. For women born outside of the US this trend is also found however, they are less likely to have one or two children in comparison to women born in the US.

A percentage comparison of males shows the opposite result. Men born outside the US are more likely to report having 2 or more children than men born in the US.

summary (cpsipums4$birth)
##     in the US not in the US 
##         63876         20580
fert3<-svyby(formula = ~gender, by = ~chtot+birth, design = des, FUN = svymean, 
na.rm=T)
fert3
##                                                   chtot         birth
## one child.in the US                           one child     in the US
## two or more children.in the US     two or more children     in the US
## one child.not in the US                       one child not in the US
## two or more children.not in the US two or more children not in the US
##                                    genderfemale gendermale se.genderfemale
## one child.in the US                   0.5644567  0.4355433     0.013810481
## two or more children.in the US        0.4699535  0.5300465     0.009779827
## one child.not in the US               0.4355965  0.5644035     0.022156964
## two or more children.not in the US    0.4000004  0.5999996     0.017313588
##                                    se.gendermale
## one child.in the US                  0.013810481
## two or more children.in the US       0.009779827
## one child.not in the US              0.022156964
## two or more children.not in the US   0.017313588
fert3$chtot_rec<-rep(c("one child","two or more children"),2)
fert3$birth_rec<-factor(c(rep("intheUS", 2), rep("notintheUS", 2)), ordered = T)
                        

#FEMALES
A<-ggplot(fert3, aes(birth_rec,fert3$genderfemale),xlab="place of birth", ylab="% gender")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))

A<-A+ylab("female")
A<-A+xlab("place of birth")
A+ggtitle("Percentage of Children in the Household for Females by Place of Birth")

#MALES
A<-ggplot(fert3, aes(birth_rec,fert3$gendermale),xlab="place of birth", ylab="% gender")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))

A<-A+ylab("male")
A<-A+xlab("place of birth")
A+ggtitle("Percentage of Children in the Household for Males by Place of Birth")

summary(cpsipums4$raceth)
##      white      asian minorities 
##      51846      13868      18742

Percentage of Children Living in the Household by level of Education and Race/Ethnicity

A look by race and ethnicity shows that the white participants are more likely to report having two or more children accross all level of education than one child, although the gap closes for those with a professional degree.For Asian participants, the trends reverses; they are more likely to report having one child accross all levels of education. Finally, the minority group shows some interesting findings. At the bacherlor’s level, the percentages between having one child or two or more are the same. When we look at minorities with a doctoral or master’s degree, they are more likely to report having one child however, those with a professional degree report having two or more children more often.

fert4<-svyby(formula = ~raceth, by = ~chtot+dgrdg, design = des, FUN = svymean, 
na.rm=T)
fert4
##                                                   chtot         dgrdg
## one child.0bachelors                          one child    0bachelors
## two or more children.0bachelors    two or more children    0bachelors
## one child.1masters                            one child      1masters
## two or more children.1masters      two or more children      1masters
## one child.2doctorate                          one child    2doctorate
## two or more children.2doctorate    two or more children    2doctorate
## one child.3professional                       one child 3professional
## two or more children.3professional two or more children 3professional
##                                    racethwhite racethasian
## one child.0bachelors                 0.6368831  0.15196181
## two or more children.0bachelors      0.7241036  0.09812742
## one child.1masters                   0.6358618  0.19516156
## two or more children.1masters        0.7053533  0.14118711
## one child.2doctorate                 0.4874854  0.38138179
## two or more children.2doctorate      0.5680071  0.32588310
## one child.3professional              0.6150045  0.26478968
## two or more children.3professional   0.7593696  0.10904173
##                                    racethminorities se.racethwhite
## one child.0bachelors                      0.2111551     0.01599005
## two or more children.0bachelors           0.1777690     0.01072046
## one child.1masters                        0.1689766     0.01680647
## two or more children.1masters             0.1534596     0.01170554
## one child.2doctorate                      0.1311328     0.04451820
## two or more children.2doctorate           0.1061098     0.04715143
## one child.3professional                   0.1202059     0.04536123
## two or more children.3professional        0.1315886     0.02278542
##                                    se.racethasian se.racethminorities
## one child.0bachelors                  0.011924534          0.01337396
## two or more children.0bachelors       0.006751630          0.00943298
## one child.1masters                    0.011246382          0.01450926
## two or more children.1masters         0.007410258          0.01002523
## one child.2doctorate                  0.042774071          0.03404169
## two or more children.2doctorate       0.045438820          0.02763948
## one child.3professional               0.044542535          0.02699844
## two or more children.3professional    0.016540220          0.01762423
fert4$chtot_rec<-rep(c("one child","two or more children"), 2)
fert4$dgrdg_rec<-factor(c(rep("0bachelors", 2), rep("1masters", 2), rep("2doctorate", 2), rep("3professional", 2)), ordered = T)

#WHITE
A<-ggplot(fert4, aes(dgrdg_rec,fert4$racethwhite),xlab="Education", ylab="% raceth")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))

A<-A+ylab("white")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for White Participants")

#ASIAN
A<-ggplot(fert4, aes(dgrdg_rec,fert4$racethasian),xlab="Education", ylab="% raceth")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))

A<-A+ylab("asian")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for Asian Participants")

#MINORITIES
A<-ggplot(fert4, aes(dgrdg_rec,fert4$racethminorities),xlab="Education", ylab="% raceth")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))

A<-A+ylab("minorities")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for Minority Participants")

###Percentage of Children Living in the Household by Level of Education and Age

fert5<-svyby(formula = ~agex, by = ~chtot+dgrdg, design = des, FUN = svymean, 
na.rm=T)
fert5
##                                                   chtot         dgrdg
## one child.0bachelors                          one child    0bachelors
## two or more children.0bachelors    two or more children    0bachelors
## one child.1masters                            one child      1masters
## two or more children.1masters      two or more children      1masters
## one child.2doctorate                          one child    2doctorate
## two or more children.2doctorate    two or more children    2doctorate
## one child.3professional                       one child 3professional
## two or more children.3professional two or more children 3professional
##                                    agex(22,29] agex(29,39] agex(39,49]
## one child.0bachelors               0.148093592   0.4759955   0.3759109
## two or more children.0bachelors    0.036317760   0.4033073   0.5603750
## one child.1masters                 0.070532515   0.5478367   0.3816308
## two or more children.1masters      0.020351141   0.4163619   0.5632869
## one child.2doctorate               0.045660760   0.4762022   0.4781371
## two or more children.2doctorate    0.016365149   0.2432663   0.7403686
## one child.3professional            0.072336281   0.5753288   0.3523349
## two or more children.3professional 0.002768426   0.4292026   0.5680290
##                                    se.agex(22,29] se.agex(29,39]
## one child.0bachelors                  0.010497389     0.01677959
## two or more children.0bachelors       0.004750570     0.01171078
## one child.1masters                    0.006615392     0.01813714
## two or more children.1masters         0.004669316     0.01368568
## one child.2doctorate                  0.012954357     0.04470383
## two or more children.2doctorate       0.009681062     0.03403752
## one child.3professional               0.021884803     0.04371710
## two or more children.3professional    0.001558626     0.02736714
##                                    se.agex(39,49]
## one child.0bachelors                   0.01673825
## two or more children.0bachelors        0.01184973
## one child.1masters                     0.01787279
## two or more children.1masters          0.01375369
## one child.2doctorate                   0.04446329
## two or more children.2doctorate        0.03518223
## one child.3professional                0.04219639
## two or more children.3professional     0.02738916
fert5$chtot_rec<-rep(c("one child","two or more children"), 2)
fert5$dgrdg_rec<-factor(c(rep("0bachelors", 2), rep("1masters", 2), rep("2doctorate", 2), rep("3professional", 2)), ordered = T)

#22-29
A<-ggplot(fert5, aes(dgrdg_rec,fert5$`agex(22,29]`),xlab="Education", ylab="% agex")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))

A<-A+ylab("22-29")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for Participants Ages 22-29")

#29-39
A<-ggplot(fert5, aes(dgrdg_rec,fert5$`agex(29,39]`),xlab="Education", ylab="% agex")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))

A<-A+ylab("29-39")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for Participants Ages 29-39")

#39-49
A<-ggplot(fert5, aes(dgrdg_rec,fert5$`agex(39,49]`),xlab="Education", ylab="% agex")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))

A<-A+ylab("39-49")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for Participants Ages 39-49")

###Basic Chi-Square Test of Indendendence We compute chi-square analyses to observe all the various interactions with the number of children. Only complete cases are considered below. All interactions show significant effects.

#column percentages of number of children by gender
prop.table(table(analysisf$chtot, analysisf$gender), margin=2)
##                       
##                           female      male
##   one child            0.4073771 0.3356697
##   two or more children 0.5926229 0.6643303
chisq.test(table(analysisf$chtot, analysisf$gender))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(analysisf$chtot, analysisf$gender)
## X-squared = 118.14, df = 1, p-value < 2.2e-16
#column percentages of number of children by race/ethcnitiy
prop.table(table(analysisf$chtot, analysisf$raceth), margin=2)
##                       
##                            white     asian minorities
##   one child            0.3387135 0.4274234  0.3958755
##   two or more children 0.6612865 0.5725766  0.6041245
chisq.test(table(analysisf$chtot, analysisf$raceth))
## 
##  Pearson's Chi-squared test
## 
## data:  table(analysisf$chtot, analysisf$raceth)
## X-squared = 123.8, df = 2, p-value < 2.2e-16
#column percentages of number of children by place of birth
prop.table(table(analysisf$chtot, analysisf$birth), margin=2)
##                       
##                        in the US not in the US
##   one child            0.3559038     0.3990526
##   two or more children 0.6440962     0.6009474
chisq.test(table(analysisf$chtot, analysisf$birth))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(analysisf$chtot, analysisf$birth)
## X-squared = 34.947, df = 1, p-value = 3.389e-09
#column percentages of number of children by level of education
prop.table(table(analysisf$chtot, analysisf$dgrdg), margin=2)
##                       
##                        0bachelors  1masters 2doctorate 3professional
##   one child             0.3553617 0.3832552  0.4287812     0.3198421
##   two or more children  0.6446383 0.6167448  0.5712188     0.6801579
chisq.test(table(analysisf$chtot, analysisf$dgrdg))
## 
##  Pearson's Chi-squared test
## 
## data:  table(analysisf$chtot, analysisf$dgrdg)
## X-squared = 37.55, df = 3, p-value = 3.52e-08
#column percentages of number of children by level of age
prop.table(table(analysisf$chtot, analysisf$agex), margin=2)
##                       
##                          (22,29]   (29,39]   (39,49]
##   one child            0.6624258 0.4075370 0.2506744
##   two or more children 0.3375742 0.5924630 0.7493256
chisq.test(table(analysisf$chtot, analysisf$agex))
## 
##  Pearson's Chi-squared test
## 
## data:  table(analysisf$chtot, analysisf$agex)
## X-squared = 1494.4, df = 2, p-value < 2.2e-16
#column percentages of education level by age
prop.table(table(analysisf$dgrdg, analysisf$agex), margin=2)
##                
##                    (22,29]    (29,39]    (39,49]
##   0bachelors    0.66497031 0.43532587 0.49897486
##   1masters      0.30619169 0.48900440 0.40725154
##   2doctorate    0.01696353 0.02718912 0.03981871
##   3professional 0.01187447 0.04848061 0.05395489
chisq.test(table(analysisf$dgrdg, analysisf$agex))
## 
##  Pearson's Chi-squared test
## 
## data:  table(analysisf$dgrdg, analysisf$agex)
## X-squared = 498.64, df = 6, p-value < 2.2e-16

Logit Regression Analysis

A logit analysis shows similar patterns than those mentioned during the descriptive analysis: 1. Men are more likely than women to have two children or more 2. Men and Women with a masters and doctoral degree are less likely to have two or more children than those with a bachelors. However, men and women with a professional degree are more likely to have two children or more. 3.Both Asian and Minority groups are less likely to report having two or more children when compared to the white group. 4. Men and women who were not born in the US are most likely as group to report having two or more children when compared to those born in the US.

These findings are grouping men and women together.

#Logit model
fit.logit<-svyglm(twochildrenx~gender+dgrdg+raceth+birth+agex, design= des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
#probit model (added for comparison)
fit.probit<-svyglm(twochildrenx~gender+dgrdg+raceth+birth+agex, design=des, family=binomial(link= "probit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(analysisf$agex)
## (22,29] (29,39] (39,49] 
##    2358   10004    9267
stargazer(fit.logit, type = "html", style="demography",covariate.labels=c("male","1masters","2doctorate","3professional", "asian","minorities", "not in the US","(29,39]","(39,49]"))
twochildrenx
male 0.258***
(0.062)
1masters -0.089
(0.066)
2doctorate -0.192
(0.144)
3professional 0.027
(0.112)
asian -0.497***
(0.105)
minorities -0.158
(0.088)
not in the US -0.135
(0.094)
(29,39] 1.221***
(0.137)
(39,49] 1.827***
(0.139)
Constant -0.611***
(0.136)
N 21,629
Log Likelihood -11,721.520
AIC 23,463.040
p < .05; p < .01; p < .001

The marginal effects allows us to take a closer look at the differences between the groups discussed previously.

  1. Males are 3% more likely than females to have two or more children
  2. Those with a master’s degree and doctoral degree are 1% and 3% less likely respectively, than those with a bacherlos degree to have two or more children. Those with a professional degree are 2% more likely to report having two or more children.
  3. Both Asian and Minorities are 5% and .8% less likely to report having two or more children
  4. Those who are not born the US are .4% less likely than those who were born in the US to report having two or more children in their household.
#Logit marginal effects
log.marg<-coef(fit.logit)*mean(dlogis(predict(fit.logit)), na.rm=T)
#Probit marginal effects
prob.marg<-coef(fit.probit)*mean(dnorm(predict(fit.probit)), na.rm=T)

plot(log.marg[-1], ylab="Marginal Effects", axes=T,xaxt="n", main="Marginal Effects from Logit and Probit models", ylim=c(-.25, .2))
axis(side=1, at=1:13, labels=F)
text(x=1:13, y=-.3,  srt = 45, pos= 1, xpd = TRUE,
     labels = c("male","1masters","2doctorate","3professional", "asian","minorities", "not in the US","age"))
points(prob.marg[-1], col=2)
abline(h=0, col=2)
legend("bottomright", legend=c("Logit Model", "Probit Model"), col=c("black", "red"),pch=1)

data.frame(m.logit=log.marg, m.probit=prob.marg)
##                         m.logit     m.probit
## (Intercept)        -0.127261999 -0.131314490
## gendermale          0.053702703  0.054439928
## dgrdg1masters      -0.018464069 -0.018030026
## dgrdg2doctorate    -0.039997902 -0.039731210
## dgrdg3professional  0.005685848  0.007273044
## racethasian        -0.103486652 -0.104015466
## racethminorities   -0.032819810 -0.032700668
## birthnot in the US -0.028146499 -0.027670429
## agex(29,39]         0.254310933  0.261031465
## agex(39,49]         0.380432965  0.386166460

Fitted Values

\[\text{Pr(twochildren)} = \frac{1}{1+exp({\beta_0 + \beta_1*gender + \beta_2*race /ethnicity+\beta_3*education+\beta_4*age+\beta_5*place of birth})}\]

#get a series of predicted probabilites for different "types" of people for each model
#expand.grid will generate all possible combinations of values you specify



dat<-expand.grid(gender=levels(factor(analysisf$gender)),dgrdg=levels(factor(analysisf$dgrdg)), twochildrenx=levels(factor(analysisf$twochildrenx)),raceth=levels(factor(analysisf$raceth)),birth=levels(factor(analysisf$birth)),agex=levels(factor(analysisf$agex)))
summary (dat)
##     gender              dgrdg                  twochildrenx
##  female:144   0bachelors   :72   one child           :144  
##  male  :144   1masters     :72   two or more children:144  
##               2doctorate   :72                             
##               3professional:72                             
##         raceth             birth          agex   
##  white     :96   in the US    :144   (22,29]:96  
##  asian     :96   not in the US:144   (29,39]:96  
##  minorities:96                       (39,49]:96  
## 
fit<-predict(fit.logit, newdata=dat, type="response")
dat$fitted.prob.lrm<-round(fit, 3)

head(dat, n=48)
##    gender         dgrdg         twochildrenx     raceth     birth    agex
## 1  female    0bachelors            one child      white in the US (22,29]
## 2    male    0bachelors            one child      white in the US (22,29]
## 3  female      1masters            one child      white in the US (22,29]
## 4    male      1masters            one child      white in the US (22,29]
## 5  female    2doctorate            one child      white in the US (22,29]
## 6    male    2doctorate            one child      white in the US (22,29]
## 7  female 3professional            one child      white in the US (22,29]
## 8    male 3professional            one child      white in the US (22,29]
## 9  female    0bachelors two or more children      white in the US (22,29]
## 10   male    0bachelors two or more children      white in the US (22,29]
## 11 female      1masters two or more children      white in the US (22,29]
## 12   male      1masters two or more children      white in the US (22,29]
## 13 female    2doctorate two or more children      white in the US (22,29]
## 14   male    2doctorate two or more children      white in the US (22,29]
## 15 female 3professional two or more children      white in the US (22,29]
## 16   male 3professional two or more children      white in the US (22,29]
## 17 female    0bachelors            one child      asian in the US (22,29]
## 18   male    0bachelors            one child      asian in the US (22,29]
## 19 female      1masters            one child      asian in the US (22,29]
## 20   male      1masters            one child      asian in the US (22,29]
## 21 female    2doctorate            one child      asian in the US (22,29]
## 22   male    2doctorate            one child      asian in the US (22,29]
## 23 female 3professional            one child      asian in the US (22,29]
## 24   male 3professional            one child      asian in the US (22,29]
## 25 female    0bachelors two or more children      asian in the US (22,29]
## 26   male    0bachelors two or more children      asian in the US (22,29]
## 27 female      1masters two or more children      asian in the US (22,29]
## 28   male      1masters two or more children      asian in the US (22,29]
## 29 female    2doctorate two or more children      asian in the US (22,29]
## 30   male    2doctorate two or more children      asian in the US (22,29]
## 31 female 3professional two or more children      asian in the US (22,29]
## 32   male 3professional two or more children      asian in the US (22,29]
## 33 female    0bachelors            one child minorities in the US (22,29]
## 34   male    0bachelors            one child minorities in the US (22,29]
## 35 female      1masters            one child minorities in the US (22,29]
## 36   male      1masters            one child minorities in the US (22,29]
## 37 female    2doctorate            one child minorities in the US (22,29]
## 38   male    2doctorate            one child minorities in the US (22,29]
## 39 female 3professional            one child minorities in the US (22,29]
## 40   male 3professional            one child minorities in the US (22,29]
## 41 female    0bachelors two or more children minorities in the US (22,29]
## 42   male    0bachelors two or more children minorities in the US (22,29]
## 43 female      1masters two or more children minorities in the US (22,29]
## 44   male      1masters two or more children minorities in the US (22,29]
## 45 female    2doctorate two or more children minorities in the US (22,29]
## 46   male    2doctorate two or more children minorities in the US (22,29]
## 47 female 3professional two or more children minorities in the US (22,29]
## 48   male 3professional two or more children minorities in the US (22,29]
##    fitted.prob.lrm
## 1            0.352
## 2            0.413
## 3            0.332
## 4            0.391
## 5            0.309
## 6            0.367
## 7            0.358
## 8            0.419
## 9            0.352
## 10           0.413
## 11           0.332
## 12           0.391
## 13           0.309
## 14           0.367
## 15           0.358
## 16           0.419
## 17           0.248
## 18           0.299
## 19           0.232
## 20           0.281
## 21           0.214
## 22           0.261
## 23           0.253
## 24           0.305
## 25           0.248
## 26           0.299
## 27           0.232
## 28           0.281
## 29           0.214
## 30           0.261
## 31           0.253
## 32           0.305
## 33           0.317
## 34           0.375
## 35           0.298
## 36           0.354
## 37           0.277
## 38           0.331
## 39           0.323
## 40           0.381
## 41           0.317
## 42           0.375
## 43           0.298
## 44           0.354
## 45           0.277
## 46           0.331
## 47           0.323
## 48           0.381

We explore a few of the interesting cases presented in the fitted values. ##Professional Degree

#Professional Degree - Ages 22-29
dat[which(dat$gender=="female"&dat$twochildrenx=="two or more children"&dat$dgrdg=="3professional"&dat$agex=="(22,29]"),]
##    gender         dgrdg         twochildrenx     raceth         birth
## 15 female 3professional two or more children      white     in the US
## 31 female 3professional two or more children      asian     in the US
## 47 female 3professional two or more children minorities     in the US
## 63 female 3professional two or more children      white not in the US
## 79 female 3professional two or more children      asian not in the US
## 95 female 3professional two or more children minorities not in the US
##       agex fitted.prob.lrm
## 15 (22,29]           0.358
## 31 (22,29]           0.253
## 47 (22,29]           0.323
## 63 (22,29]           0.328
## 79 (22,29]           0.229
## 95 (22,29]           0.294
dat[which(dat$gender=="male"&dat$twochildrenx=="two or more children"&dat$dgrdg=="3professional"&dat$agex=="(22,29]"),]
##    gender         dgrdg         twochildrenx     raceth         birth
## 16   male 3professional two or more children      white     in the US
## 32   male 3professional two or more children      asian     in the US
## 48   male 3professional two or more children minorities     in the US
## 64   male 3professional two or more children      white not in the US
## 80   male 3professional two or more children      asian not in the US
## 96   male 3professional two or more children minorities not in the US
##       agex fitted.prob.lrm
## 16 (22,29]           0.419
## 32 (22,29]           0.305
## 48 (22,29]           0.381
## 64 (22,29]           0.387
## 80 (22,29]           0.277
## 96 (22,29]           0.350
#Professional Degree - Ages 39-49
dat[which(dat$gender=="female"&dat$twochildrenx=="two or more children"&dat$dgrdg=="3professional"&dat$agex=="(39,49]"),]
##     gender         dgrdg         twochildrenx     raceth         birth
## 207 female 3professional two or more children      white     in the US
## 223 female 3professional two or more children      asian     in the US
## 239 female 3professional two or more children minorities     in the US
## 255 female 3professional two or more children      white not in the US
## 271 female 3professional two or more children      asian not in the US
## 287 female 3professional two or more children minorities not in the US
##        agex fitted.prob.lrm
## 207 (39,49]           0.776
## 223 (39,49]           0.678
## 239 (39,49]           0.748
## 255 (39,49]           0.752
## 271 (39,49]           0.648
## 287 (39,49]           0.721
dat[which(dat$gender=="male"&dat$twochildrenx=="two or more children"&dat$dgrdg=="3professional"&dat$agex=="(39,49]"),]
##     gender         dgrdg         twochildrenx     raceth         birth
## 208   male 3professional two or more children      white     in the US
## 224   male 3professional two or more children      asian     in the US
## 240   male 3professional two or more children minorities     in the US
## 256   male 3professional two or more children      white not in the US
## 272   male 3professional two or more children      asian not in the US
## 288   male 3professional two or more children minorities not in the US
##        agex fitted.prob.lrm
## 208 (39,49]           0.818
## 224 (39,49]           0.732
## 240 (39,49]           0.793
## 256 (39,49]           0.797
## 272 (39,49]           0.704
## 288 (39,49]           0.770

Doctoral Degrees

#Doctoral Degree - Ages 22-29
dat[which(dat$gender=="female"&dat$twochildrenx=="two or more children"&dat$dgrdg=="2doctorate"&dat$agex=="(22,29]"),]
##    gender      dgrdg         twochildrenx     raceth         birth    agex
## 13 female 2doctorate two or more children      white     in the US (22,29]
## 29 female 2doctorate two or more children      asian     in the US (22,29]
## 45 female 2doctorate two or more children minorities     in the US (22,29]
## 61 female 2doctorate two or more children      white not in the US (22,29]
## 77 female 2doctorate two or more children      asian not in the US (22,29]
## 93 female 2doctorate two or more children minorities not in the US (22,29]
##    fitted.prob.lrm
## 13           0.309
## 29           0.214
## 45           0.277
## 61           0.281
## 77           0.192
## 93           0.251
dat[which(dat$gender=="male"&dat$twochildrenx=="two or more children"&dat$dgrdg=="2doctorate"&dat$agex=="(22,29]"),]
##    gender      dgrdg         twochildrenx     raceth         birth    agex
## 14   male 2doctorate two or more children      white     in the US (22,29]
## 30   male 2doctorate two or more children      asian     in the US (22,29]
## 46   male 2doctorate two or more children minorities     in the US (22,29]
## 62   male 2doctorate two or more children      white not in the US (22,29]
## 78   male 2doctorate two or more children      asian not in the US (22,29]
## 94   male 2doctorate two or more children minorities not in the US (22,29]
##    fitted.prob.lrm
## 14           0.367
## 30           0.261
## 46           0.331
## 62           0.336
## 78           0.236
## 94           0.302
#Doctoral Degree - Ages 39-49
dat[which(dat$gender=="female"&dat$twochildrenx=="two or more children"&dat$dgrdg=="2doctorate"&dat$agex=="(39,49]"),]
##     gender      dgrdg         twochildrenx     raceth         birth
## 205 female 2doctorate two or more children      white     in the US
## 221 female 2doctorate two or more children      asian     in the US
## 237 female 2doctorate two or more children minorities     in the US
## 253 female 2doctorate two or more children      white not in the US
## 269 female 2doctorate two or more children      asian not in the US
## 285 female 2doctorate two or more children minorities not in the US
##        agex fitted.prob.lrm
## 205 (39,49]           0.736
## 221 (39,49]           0.629
## 237 (39,49]           0.704
## 253 (39,49]           0.709
## 269 (39,49]           0.597
## 285 (39,49]           0.675
dat[which(dat$gender=="male"&dat$twochildrenx=="two or more children"&dat$dgrdg=="2doctorate"&dat$agex=="(39,49]"),]
##     gender      dgrdg         twochildrenx     raceth         birth
## 206   male 2doctorate two or more children      white     in the US
## 222   male 2doctorate two or more children      asian     in the US
## 238   male 2doctorate two or more children minorities     in the US
## 254   male 2doctorate two or more children      white not in the US
## 270   male 2doctorate two or more children      asian not in the US
## 286   male 2doctorate two or more children minorities not in the US
##        agex fitted.prob.lrm
## 206 (39,49]           0.783
## 222 (39,49]           0.687
## 238 (39,49]           0.755
## 254 (39,49]           0.759
## 270 (39,49]           0.657
## 286 (39,49]           0.729

Nested model comparison (two or more children)

options(survey.lonely.psu = "adjust")
des1<-svydesign(ids=~1, strata=~sample, weights=~weight,data= analysisf, nest=T)
#Logit models
fit.logit1<-svyglm(twochildrenx~gender+agex,design= des1, family=binomial) #gender and age only
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
fit.logit2<-svyglm(twochildrenx~gender+agex+dgrdg,design= des1, family=binomial) #gender+age+education
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
fit.logit3<-svyglm(twochildrenx~gender+agex+dgrdg+raceth+birth,design= des1, family=binomial)#gender+age+education+racen and place of birth
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!

Now, let’s see if, by controlling for education and age, some of these differences go away, or are reduced. The fancy word for when an effect is reduced is “attenuated”. We will also do a test to see if the model with the education term significantly improves the model fit. Traditionally this would be done using a likelihood ratio test, but in survey models, that’s not kosherload

summary(fit.logit2)
## 
## Call:
## svyglm(formula = twochildrenx ~ gender + agex + dgrdg, design = des1, 
##     family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~sample, weights = ~weight, data = analysisf, 
##     nest = T)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.69054    0.13434  -5.140 2.77e-07 ***
## gendermale          0.23457    0.06173   3.800 0.000145 ***
## agex(29,39]         1.18487    0.13710   8.643  < 2e-16 ***
## agex(39,49]         1.80074    0.13856  12.996  < 2e-16 ***
## dgrdg1masters      -0.11318    0.06509  -1.739 0.082071 .  
## dgrdg2doctorate    -0.35920    0.13523  -2.656 0.007907 ** 
## dgrdg3professional  0.01188    0.11386   0.104 0.916875    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9985291)
## 
## Number of Fisher Scoring iterations: 4
regTermTest(fit.logit2, test.terms = ~dgrdg, method="Wald", df = NULL)
## Wald test for dgrdg
##  in svyglm(formula = twochildrenx ~ gender + agex + dgrdg, design = des1, 
##     family = binomial)
## F =  3.001236  on  3  and  21622  df: p= 0.029264

A comparison of the third model

summary(fit.logit3)
## 
## Call:
## svyglm(formula = twochildrenx ~ gender + agex + dgrdg + raceth + 
##     birth, design = des1, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~sample, weights = ~weight, data = analysisf, 
##     nest = T)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.61110    0.13563  -4.506 6.66e-06 ***
## gendermale          0.25787    0.06190   4.166 3.11e-05 ***
## agex(29,39]         1.22117    0.13716   8.903  < 2e-16 ***
## agex(39,49]         1.82680    0.13898  13.145  < 2e-16 ***
## dgrdg1masters      -0.08866    0.06601  -1.343   0.1792    
## dgrdg2doctorate    -0.19207    0.14405  -1.333   0.1824    
## dgrdg3professional  0.02730    0.11221   0.243   0.8078    
## racethasian        -0.49693    0.10501  -4.732 2.23e-06 ***
## racethminorities   -0.15760    0.08752  -1.801   0.0718 .  
## birthnot in the US -0.13516    0.09430  -1.433   0.1518    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.999826)
## 
## Number of Fisher Scoring iterations: 4
regTermTest(fit.logit3, test.terms=~birth+raceth, method="Wald", df = NULL)
## Wald test for birth raceth
##  in svyglm(formula = twochildrenx ~ gender + agex + dgrdg + raceth + 
##     birth, design = des1, family = binomial)
## F =  19.92706  on  3  and  21619  df: p= 6.8128e-13

Salary

super1<-lm(salary4~dgrdg+gender+birth+twochildrenx, data=analysisf)
library(broom)
tidy(super1)
##                               term  estimate std.error statistic
## 1                      (Intercept) 49853.147  539.7450 92.364261
## 2                    dgrdg1masters  9781.622  494.9543 19.762676
## 3                  dgrdg2doctorate  7383.630 1388.4585  5.317861
## 4               dgrdg3professional 36248.481 1137.9632 31.853827
## 5                       gendermale 26285.296  476.5761 55.154461
## 6               birthnot in the US  7093.896  532.7124 13.316559
## 7 twochildrenxtwo or more children  5741.066  489.5883 11.726314
##         p.value
## 1  0.000000e+00
## 2  3.597379e-86
## 3  1.060368e-07
## 4 1.229952e-217
## 5  0.000000e+00
## 6  2.673429e-40
## 7  1.165330e-31

After evaluating for normality of errors, we find some discrepancies.

qqnorm(rstudent(super1), main="Q-Q Plot for Model Residuals")
qqline(rstudent(super1), col="red")

We apply a log transformation to address the anormalities. We see that the second graph “Q-Q Plot for Model Residuals Version 2” does the best job however, there is still some skewness possibly from some extreme values in the data set. The rest of the transformation are not helpful.

super12<-lm( log (salary4)~dgrdg+gender+birth+twochildrenx, data=analysisf)
qqnorm(rstudent(super12), main="Q-Q Plot for Model Residuals Version 1")
qqline(rstudent(super12), col="red")

super13<-lm(sqrt(salary4)~dgrdg, data=analysisf)
qqnorm(rstudent(super13), main="Q-Q Plot for Model Residuals Version 2")
qqline(rstudent(super13), col="red")

super14<-lm(I(1/salary4)~dgrdg, data=analysisf)
qqnorm(rstudent(super14), main="Q-Q Plot for Model Residuals Version 3")

#qqline(rstudent(super14), col="red")

Analysis of Variance Table with Five Models & Anova Test- salary as continous variable

Each model under this analysis increases in complexity. The first model considers number of children and gender. The second model includes number of children, gender and level of education. The third model includes number of children, gender, education and adds race/ethnicity. The fourth model, includes number of children, gender, education, race and age and the last model includes number of children, gender, education, race, age and place of birth.

model1<-lm(scale(sqrt(salary4))~chtot+gender, data=analysisf)
model2<-lm(scale(sqrt(salary4))~chtot+gender+dgrdg, data=analysisf)
model3<-lm(scale(sqrt(salary4))~chtot+gender+dgrdg+raceth, data=analysisf)
model4<-lm(scale(sqrt(salary4))~chtot+gender+dgrdg+raceth+agex, data=analysisf)
model5<-lm(scale(sqrt(salary4))~chtot+gender+dgrdg+raceth+agex+birth, data=analysisf)
anova(model1,model2, model3, model4, model5)
## Analysis of Variance Table
## 
## Model 1: scale(sqrt(salary4)) ~ chtot + gender
## Model 2: scale(sqrt(salary4)) ~ chtot + gender + dgrdg
## Model 3: scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth
## Model 4: scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth + agex
## Model 5: scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth + agex + 
##     birth
##   Res.Df   RSS Df Sum of Sq        F Pr(>F)    
## 1  21626 18908                                 
## 2  21623 18036  3    871.96 367.5254 <2e-16 ***
## 3  21621 17802  2    233.81 147.8213 <2e-16 ***
## 4  21619 17098  2    703.70 444.9083 <2e-16 ***
## 5  21618 17096  1      1.93   2.4364 0.1186    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AICs<-AIC(model1, model2, model3, model4, model5)
AICs$diff<-AICs$AIC-AICs$AIC[1]
AICs
##        df      AIC      diff
## model1  4 58480.14     0.000
## model2  7 57464.95 -1015.187
## model3  9 57186.73 -1293.406
## model4 11 56318.39 -2161.745
## model5 12 56317.96 -2162.182

Nested Models for Salary

library(stargazer)
summary(model1)

Call: lm(formula = scale(sqrt(salary4)) ~ chtot + gender, data = analysisf)

Residuals: Min 1Q Median 3Q Max -3.4859 -0.5746 0.0327 0.6497 2.0871

Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.45786 0.01228 -37.297 <2e-16 chtottwo or more children 0.12367 0.01322 9.355 <2e-16 gendermale 0.69354 0.01281 54.146 <2e-16 *** — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1

Residual standard error: 0.935 on 21626 degrees of freedom Multiple R-squared: 0.1258, Adjusted R-squared: 0.1257 F-statistic: 1556 on 2 and 21626 DF, p-value: < 2.2e-16

summary(model2)

Call: lm(formula = scale(sqrt(salary4)) ~ chtot + gender + dgrdg, data = analysisf)

Residuals: Min 1Q Median 3Q Max -4.0068 -0.5361 0.0390 0.6091 2.2653

Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.63605 0.01392 -45.691 < 2e-16 chtottwo or more children 0.12352 0.01292 9.559 < 2e-16 gendermale 0.71687 0.01256 57.072 < 2e-16 dgrdg1masters 0.26914 0.01301 20.691 < 2e-16 dgrdg2doctorate 0.28031 0.03613 7.759 8.96e-15 dgrdg3professional 0.85112 0.03006 28.317 < 2e-16 — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1

Residual standard error: 0.9133 on 21623 degrees of freedom Multiple R-squared: 0.1661, Adjusted R-squared: 0.1659 F-statistic: 861.3 on 5 and 21623 DF, p-value: < 2.2e-16

summary(model3)

Call: lm(formula = scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth, data = analysisf)

Residuals: Min 1Q Median 3Q Max -3.8689 -0.5276 0.0412 0.6108 2.3800

Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.63100 0.01503 -41.991 < 2e-16 chtottwo or more children 0.13048 0.01287 10.138 < 2e-16 gendermale 0.69922 0.01254 55.745 < 2e-16 dgrdg1masters 0.24989 0.01298 19.254 < 2e-16 dgrdg2doctorate 0.23283 0.03603 6.462 1.05e-10 dgrdg3professional 0.83856 0.02987 28.072 < 2e-16 racethasian 0.20656 0.01662 12.428 < 2e-16 racethminorities -0.11969 0.01529 -7.829 5.13e-15 — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1

Residual standard error: 0.9074 on 21621 degrees of freedom Multiple R-squared: 0.1769, Adjusted R-squared: 0.1766 F-statistic: 663.8 on 7 and 21621 DF, p-value: < 2.2e-16

summary(model4)

Call: lm(formula = scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth + agex, data = analysisf)

Residuals: Min 1Q Median 3Q Max -3.7478 -0.5060 0.0656 0.5984 2.6203

Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.99106 0.02100 -47.184 < 2e-16 chtottwo or more children 0.02817 0.01307 2.155 0.0312
gendermale 0.67260 0.01233 54.564 < 2e-16
dgrdg1masters 0.22592 0.01283 17.606 < 2e-16 dgrdg2doctorate 0.17467 0.03537 4.939 7.92e-07 dgrdg3professional 0.78090 0.02937 26.589 < 2e-16 racethasian 0.17762 0.01632 10.882 < 2e-16 racethminorities -0.10133 0.01500 -6.758 1.44e-11 agex(29,39] 0.40719 0.02092 19.467 < 2e-16 agex(39,49] 0.62319 0.02148 29.010 < 2e-16 ** — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1

Residual standard error: 0.8893 on 21619 degrees of freedom Multiple R-squared: 0.2094, Adjusted R-squared: 0.2091 F-statistic: 636.4 on 9 and 21619 DF, p-value: < 2.2e-16

summary(model5)

Call: lm(formula = scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth + agex + birth, data = analysisf)

Residuals: Min 1Q Median 3Q Max -3.7415 -0.5048 0.0679 0.5990 2.6207

Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.99142 0.02100 -47.200 < 2e-16 chtottwo or more children 0.02853 0.01307 2.182 0.0291
gendermale 0.67149 0.01235 54.385 < 2e-16
dgrdg1masters 0.22474 0.01285 17.484 < 2e-16 dgrdg2doctorate 0.16626 0.03577 4.648 3.38e-06 dgrdg3professional 0.78140 0.02937 26.605 < 2e-16 racethasian 0.15759 0.02076 7.591 3.30e-14 racethminorities -0.10598 0.01529 -6.932 4.25e-12 agex(29,39] 0.40612 0.02093 19.406 < 2e-16 agex(39,49] 0.62088 0.02153 28.835 < 2e-16 ** birthnot in the US 0.02733 0.01751 1.561 0.1186
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1

Residual standard error: 0.8893 on 21618 degrees of freedom Multiple R-squared: 0.2095, Adjusted R-squared: 0.2092 F-statistic: 573 on 10 and 21618 DF, p-value: < 2.2e-16

stargazer(model1, model2, model3, model4, model5, type = "html", style = "demography", ci = T)
scale(sqrt(salary4))
Model 1 Model 2 Model 3 Model 4 Model 5
chtottwo or more children 0.124*** 0.124*** 0.130*** 0.028* 0.029*
(0.098, 0.150) (0.098, 0.149) (0.105, 0.156) (0.003, 0.054) (0.003, 0.054)
gendermale 0.694*** 0.717*** 0.699*** 0.673*** 0.671***
(0.668, 0.719) (0.692, 0.741) (0.675, 0.724) (0.648, 0.697) (0.647, 0.696)
dgrdg1masters 0.269*** 0.250*** 0.226*** 0.225***
(0.244, 0.295) (0.224, 0.275) (0.201, 0.251) (0.200, 0.250)
dgrdg2doctorate 0.280*** 0.233*** 0.175*** 0.166***
(0.210, 0.351) (0.162, 0.303) (0.105, 0.244) (0.096, 0.236)
dgrdg3professional 0.851*** 0.839*** 0.781*** 0.781***
(0.792, 0.910) (0.780, 0.897) (0.723, 0.838) (0.724, 0.839)
racethasian 0.207*** 0.178*** 0.158***
(0.174, 0.239) (0.146, 0.210) (0.117, 0.198)
racethminorities -0.120*** -0.101*** -0.106***
(-0.150, -0.090) (-0.131, -0.072) (-0.136, -0.076)
agex(29,39] 0.407*** 0.406***
(0.366, 0.448) (0.365, 0.447)
agex(39,49] 0.623*** 0.621***
(0.581, 0.665) (0.579, 0.663)
birthnot in the US 0.027
(-0.007, 0.062)
Constant -0.458*** -0.636*** -0.631*** -0.991*** -0.991***
(-0.482, -0.434) (-0.663, -0.609) (-0.660, -0.602) (-1.032, -0.950) (-1.033, -0.950)
N 21,629 21,629 21,629 21,629 21,629
R2 0.126 0.166 0.177 0.209 0.210
Adjusted R2 0.126 0.166 0.177 0.209 0.209
Residual Std. Error 0.935 (df = 21626) 0.913 (df = 21623) 0.907 (df = 21621) 0.889 (df = 21619) 0.889 (df = 21618)
F Statistic 1,555.663*** (df = 2; 21626) 861.336*** (df = 5; 21623) 663.829*** (df = 7; 21621) 636.373*** (df = 9; 21619) 573.018*** (df = 10; 21618)
p < .05; p < .01; p < .001

Overall the model that best fits our data is Model 4 which includes age.

Gamma Regression Model

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
fit.gr<-glm(salary4~factor(gender)+factor(raceth)+factor(dgrdg)+factor(agex)+factor(birth)+factor(chtot), data=analysisf, family= Gamma(link=log))
summary(fit.gr)
## 
## Call:
## glm(formula = salary4 ~ factor(gender) + factor(raceth) + factor(dgrdg) + 
##     factor(agex) + factor(birth) + factor(chtot), family = Gamma(link = log), 
##     data = analysisf)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6512  -0.3573  -0.0256   0.2601   1.6075  
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)                       10.647910   0.010902 976.734  < 2e-16
## factor(gender)male                 0.340854   0.006408  53.191  < 2e-16
## factor(raceth)asian                0.084212   0.010775   7.816 5.71e-15
## factor(raceth)minorities          -0.060384   0.007935  -7.610 2.85e-14
## factor(dgrdg)1masters              0.118459   0.006671  17.757  < 2e-16
## factor(dgrdg)2doctorate            0.091119   0.018567   4.908 9.28e-07
## factor(dgrdg)3professional         0.405232   0.015243  26.584  < 2e-16
## factor(agex)(29,39]                0.260923   0.010861  24.023  < 2e-16
## factor(agex)(39,49]                0.379967   0.011175  34.001  < 2e-16
## factor(birth)not in the US         0.027851   0.009087   3.065  0.00218
## factor(chtot)two or more children  0.014510   0.006786   2.138  0.03251
##                                      
## (Intercept)                       ***
## factor(gender)male                ***
## factor(raceth)asian               ***
## factor(raceth)minorities          ***
## factor(dgrdg)1masters             ***
## factor(dgrdg)2doctorate           ***
## factor(dgrdg)3professional        ***
## factor(agex)(29,39]               ***
## factor(agex)(39,49]               ***
## factor(birth)not in the US        ** 
## factor(chtot)two or more children *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.2130266)
## 
##     Null deviance: 7839.3  on 21628  degrees of freedom
## Residual deviance: 6576.1  on 21618  degrees of freedom
## AIC: 515093
## 
## Number of Fisher Scoring iterations: 5
##Percentage increase in the Mean for Each Coefficient 
round(exp(summary(fit.gr)$coef[-1,1]), 3)
##                factor(gender)male               factor(raceth)asian 
##                             1.406                             1.088 
##          factor(raceth)minorities             factor(dgrdg)1masters 
##                             0.941                             1.126 
##           factor(dgrdg)2doctorate        factor(dgrdg)3professional 
##                             1.095                             1.500 
##               factor(agex)(29,39]               factor(agex)(39,49] 
##                             1.298                             1.462 
##        factor(birth)not in the US factor(chtot)two or more children 
##                             1.028                             1.015