All the variables in this dataset were compiled from a sample of the National Survey of College Graduates through the IPUMS-HigherEd site.
The data used for this analysis consists of a sample from the National Survey of College Graduates gathered through IPUMS-Higher Ed.
library(car)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(ggplot2)
library(pander)
library(knitr)
library(haven)
cpsipums4<-read_dta("~/Statistics II/Data for project/highered/highered_00010.dta")
names(cpsipums4) #print the column names
## [1] "personid" "year" "weight" "sample" "surid" "age"
## [7] "gender" "minrty" "raceth" "bthus" "chtot" "dgrdg"
## [13] "lfstat" "hrswkgr" "jobins" "jobpens" "jobvac" "ocedrlp"
## [19] "prmbr" "salary"
#income grouping. NA's and 407 individuals who earned zero dollars have been removed from this variable.
cpsipums4$salary3<-ifelse(cpsipums4$salary<=0, NA, cpsipums4$salary)
cpsipums4$salary4<-ifelse(cpsipums4$salary3==9999998, NA, cpsipums4$salary3)
summary(cpsipums4$salary4)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1000 42000 67000 71284 96000 150000 13559
#number of children living in the household
cpsipums4$onechild<-recode(cpsipums4$chtot, recodes=01)
cpsipums4$twoormore<-recode(cpsipums4$chtot, recodes=03)
cpsipums4$chtot<-recode(cpsipums4$chtot, recodes="00='no children'; 01='one child'; 02='one to three children'; 03='two or more children'; 04='more than 3 children'; 98=NA", as.factor.result=T)
cpsipums4$chtot<-relevel(cpsipums4$chtot, ref = "one child")
table(cpsipums4$chtot)
##
## one child two or more children
## 14472 20517
#Do they have two or more children?
cpsipums4$twochildrenx<-recode(cpsipums4$chtot, recodes="3=1; 1=0; 98=NA")
table(cpsipums4$twochildrenx)
##
## one child two or more children
## 14472 20517
#gender
cpsipums4$female<-recode(cpsipums4$gender, recodes=1)
cpsipums4$male<-recode(cpsipums4$gender, recodes=2)
cpsipums4$gender<-recode(cpsipums4$gender, recodes="1='female'; 2='male'", as.factor.result=T)
table(cpsipums4$gender)
##
## female male
## 38626 45830
cpsipums4$gender<-relevel(cpsipums4$gender, ref = "female")
#race/ethnicity
#There are no entries in this data set under "other"
cpsipums4$asian<-recode(cpsipums4$raceth, recodes=1)
cpsipums4$white<-recode(cpsipums4$raceth, recodes=2)
cpsipums4$minorities<-recode(cpsipums4$raceth, recodes=3)
cpsipums4$other<-recode(cpsipums4$raceth, recodes=4)
cpsipums4$raceth<-recode(cpsipums4$raceth, recodes="1='asian'; 2='white'; 3='minorities'", as.factor.result=T)
cpsipums4$raceth<-relevel(cpsipums4$raceth, ref = "white")
table(cpsipums4$raceth)
##
## white asian minorities
## 51846 13868 18742
#education level
cpsipums4$dgrdg<-recode(cpsipums4$dgrdg, recodes="1='0bachelors'; 2='1masters'; 3='2doctorate'; 4='3professional'", as.factor.result=T)
table(cpsipums4$dgrdg, cpsipums4$gender)
##
## female male
## 0bachelors 18930 25269
## 1masters 16756 16585
## 2doctorate 1103 1611
## 3professional 1837 2365
#place of birth (There are no NAs)
cpsipums4$birth<-recode (cpsipums4$bthus, recodes="00='not in the US'; 01='in the US'; 99='NA'", as.factor.result=T)
table(cpsipums4$birth, cpsipums4$gender)
##
## female male
## in the US 29873 34003
## not in the US 8753 11827
summary(cpsipums4$birth)
## in the US not in the US
## 63876 20580
#age
cpsipums4$agex<-ifelse(cpsipums4$age>=49, NA, cpsipums4$age)
cpsipums4$agex<-cut(cpsipums4$agex, breaks=c(22,29,39,49))
summary(cpsipums4$agex)
## (22,29] (29,39] (39,49] NA's
## 19733 20888 13499 30336
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
analysisf<-cpsipums4%>%
select(chtot,twochildrenx, gender,dgrdg, salary4, raceth,birth, agex, weight, sample) %>%
filter( complete.cases(.))
#DESIGN
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~sample, weights=~weight, data =analysisf)
The percentage of males with two children or more is higher than the observed percentage on the females. The opposite pattern is observed amongst the males with one child with a lower percentage than the females with one child.
library(ggplot2)
chi2<-svyby(formula = ~gender, by = ~chtot, design = des, FUN = svymean, na.rm=T)
svychisq(~gender+chtot, design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~gender + chtot, design = des)
## F = 26.213, ndf = 1, ddf = 21628, p-value = 3.084e-07
qplot(x=chi2$chtot,y=chi2$gendermale, data=chi2 ,xlab="children", ylab="males" )+
geom_errorbar(aes(x=chtot, ymin=gendermale,ymax=gendermale), width=.25)+
ggtitle(label = "% of Children for Males")
qplot(x=chi2$chtot,y=chi2$genderfemale, data=chi2 ,xlab="children", ylab="females" )+
geom_errorbar(aes(x=chtot, ymin=genderfemale,ymax=genderfemale), width=.25)+
ggtitle(label = "% of Children for Females")
###Percentage of Children Living in the Household by Level of Education A further comparison of genders confirms the pattern observed particularly when comparing men and women with a doctorate degree and a masters degree. However, this trend reverses when looking at participants with a professional degree; women are more likely to report having 2 or more children in the household than men.
fert2<-svyby(formula = ~gender, by = ~chtot+dgrdg, design = des, FUN = svymean,
na.rm=T)
fert2
## chtot dgrdg
## one child.0bachelors one child 0bachelors
## two or more children.0bachelors two or more children 0bachelors
## one child.1masters one child 1masters
## two or more children.1masters two or more children 1masters
## one child.2doctorate one child 2doctorate
## two or more children.2doctorate two or more children 2doctorate
## one child.3professional one child 3professional
## two or more children.3professional two or more children 3professional
## genderfemale gendermale se.genderfemale
## one child.0bachelors 0.5122710 0.4877290 0.01674961
## two or more children.0bachelors 0.4433727 0.5566273 0.01199109
## one child.1masters 0.5806455 0.4193545 0.01748147
## two or more children.1masters 0.4917566 0.5082434 0.01367640
## one child.2doctorate 0.3925541 0.6074459 0.04423637
## two or more children.2doctorate 0.3249362 0.6750638 0.04600357
## one child.3professional 0.4894034 0.5105966 0.04431615
## two or more children.3professional 0.4336513 0.5663487 0.02804066
## se.gendermale
## one child.0bachelors 0.01674961
## two or more children.0bachelors 0.01199109
## one child.1masters 0.01748147
## two or more children.1masters 0.01367640
## one child.2doctorate 0.04423637
## two or more children.2doctorate 0.04600357
## one child.3professional 0.04431615
## two or more children.3professional 0.02804066
fert2$chtot_rec<-rep(c("one child","two or more children"),2)
fert2$dgrdg_rec<-factor(c(rep("0bachelors", 2), rep("1masters", 2), rep("2doctorate", 2), rep("3professional", 2)), ordered = T)
#fix the order of the education factor levels
fert2$dgrdg_rec<-factor(fert2$dgrdg_rec, levels(fert2$dgrdg_rec)[c(4,3,2,1)])
#FEMALES
A<-ggplot(fert2, aes(dgrdg_rec,fert2$genderfemale),xlab="education", ylab="% gender")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))
A<-A+ylab("female")
A<-A+xlab("Education Level")
A+ggtitle("Percentage of Children in the Household for Females by Level of Education")
#MALES
B<-ggplot(fert2, aes(dgrdg_rec,fert2$gendermale),xlab="education", ylab="% gender")
B<-B+geom_point(aes(colour=chtot_rec))
B<-B+geom_line(aes(colour=chtot_rec,group=chtot_rec))
B<-B+ylab("male")
B<-B+xlab("Education Level")
B+ggtitle("Percentage of Children in the Household for Males by Level of Education")
###Percentage of Children Living in the Household by Gender by Place of Birth A comparison by gender and place of birth shows that females with a degree or higher who were born in the US are more likely to have one child than two or more. For women born outside of the US this trend is also found however, they are less likely to have one or two children in comparison to women born in the US.
A percentage comparison of males shows the opposite result. Men born outside the US are more likely to report having 2 or more children than men born in the US.
summary (cpsipums4$birth)
## in the US not in the US
## 63876 20580
fert3<-svyby(formula = ~gender, by = ~chtot+birth, design = des, FUN = svymean,
na.rm=T)
fert3
## chtot birth
## one child.in the US one child in the US
## two or more children.in the US two or more children in the US
## one child.not in the US one child not in the US
## two or more children.not in the US two or more children not in the US
## genderfemale gendermale se.genderfemale
## one child.in the US 0.5644567 0.4355433 0.013810481
## two or more children.in the US 0.4699535 0.5300465 0.009779827
## one child.not in the US 0.4355965 0.5644035 0.022156964
## two or more children.not in the US 0.4000004 0.5999996 0.017313588
## se.gendermale
## one child.in the US 0.013810481
## two or more children.in the US 0.009779827
## one child.not in the US 0.022156964
## two or more children.not in the US 0.017313588
fert3$chtot_rec<-rep(c("one child","two or more children"),2)
fert3$birth_rec<-factor(c(rep("intheUS", 2), rep("notintheUS", 2)), ordered = T)
#FEMALES
A<-ggplot(fert3, aes(birth_rec,fert3$genderfemale),xlab="place of birth", ylab="% gender")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))
A<-A+ylab("female")
A<-A+xlab("place of birth")
A+ggtitle("Percentage of Children in the Household for Females by Place of Birth")
#MALES
A<-ggplot(fert3, aes(birth_rec,fert3$gendermale),xlab="place of birth", ylab="% gender")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))
A<-A+ylab("male")
A<-A+xlab("place of birth")
A+ggtitle("Percentage of Children in the Household for Males by Place of Birth")
summary(cpsipums4$raceth)
## white asian minorities
## 51846 13868 18742
A look by race and ethnicity shows that the white participants are more likely to report having two or more children accross all level of education than one child, although the gap closes for those with a professional degree.For Asian participants, the trends reverses; they are more likely to report having one child accross all levels of education. Finally, the minority group shows some interesting findings. At the bacherlor’s level, the percentages between having one child or two or more are the same. When we look at minorities with a doctoral or master’s degree, they are more likely to report having one child however, those with a professional degree report having two or more children more often.
fert4<-svyby(formula = ~raceth, by = ~chtot+dgrdg, design = des, FUN = svymean,
na.rm=T)
fert4
## chtot dgrdg
## one child.0bachelors one child 0bachelors
## two or more children.0bachelors two or more children 0bachelors
## one child.1masters one child 1masters
## two or more children.1masters two or more children 1masters
## one child.2doctorate one child 2doctorate
## two or more children.2doctorate two or more children 2doctorate
## one child.3professional one child 3professional
## two or more children.3professional two or more children 3professional
## racethwhite racethasian
## one child.0bachelors 0.6368831 0.15196181
## two or more children.0bachelors 0.7241036 0.09812742
## one child.1masters 0.6358618 0.19516156
## two or more children.1masters 0.7053533 0.14118711
## one child.2doctorate 0.4874854 0.38138179
## two or more children.2doctorate 0.5680071 0.32588310
## one child.3professional 0.6150045 0.26478968
## two or more children.3professional 0.7593696 0.10904173
## racethminorities se.racethwhite
## one child.0bachelors 0.2111551 0.01599005
## two or more children.0bachelors 0.1777690 0.01072046
## one child.1masters 0.1689766 0.01680647
## two or more children.1masters 0.1534596 0.01170554
## one child.2doctorate 0.1311328 0.04451820
## two or more children.2doctorate 0.1061098 0.04715143
## one child.3professional 0.1202059 0.04536123
## two or more children.3professional 0.1315886 0.02278542
## se.racethasian se.racethminorities
## one child.0bachelors 0.011924534 0.01337396
## two or more children.0bachelors 0.006751630 0.00943298
## one child.1masters 0.011246382 0.01450926
## two or more children.1masters 0.007410258 0.01002523
## one child.2doctorate 0.042774071 0.03404169
## two or more children.2doctorate 0.045438820 0.02763948
## one child.3professional 0.044542535 0.02699844
## two or more children.3professional 0.016540220 0.01762423
fert4$chtot_rec<-rep(c("one child","two or more children"), 2)
fert4$dgrdg_rec<-factor(c(rep("0bachelors", 2), rep("1masters", 2), rep("2doctorate", 2), rep("3professional", 2)), ordered = T)
#WHITE
A<-ggplot(fert4, aes(dgrdg_rec,fert4$racethwhite),xlab="Education", ylab="% raceth")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))
A<-A+ylab("white")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for White Participants")
#ASIAN
A<-ggplot(fert4, aes(dgrdg_rec,fert4$racethasian),xlab="Education", ylab="% raceth")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))
A<-A+ylab("asian")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for Asian Participants")
#MINORITIES
A<-ggplot(fert4, aes(dgrdg_rec,fert4$racethminorities),xlab="Education", ylab="% raceth")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))
A<-A+ylab("minorities")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for Minority Participants")
###Percentage of Children Living in the Household by Level of Education and Age
fert5<-svyby(formula = ~agex, by = ~chtot+dgrdg, design = des, FUN = svymean,
na.rm=T)
fert5
## chtot dgrdg
## one child.0bachelors one child 0bachelors
## two or more children.0bachelors two or more children 0bachelors
## one child.1masters one child 1masters
## two or more children.1masters two or more children 1masters
## one child.2doctorate one child 2doctorate
## two or more children.2doctorate two or more children 2doctorate
## one child.3professional one child 3professional
## two or more children.3professional two or more children 3professional
## agex(22,29] agex(29,39] agex(39,49]
## one child.0bachelors 0.148093592 0.4759955 0.3759109
## two or more children.0bachelors 0.036317760 0.4033073 0.5603750
## one child.1masters 0.070532515 0.5478367 0.3816308
## two or more children.1masters 0.020351141 0.4163619 0.5632869
## one child.2doctorate 0.045660760 0.4762022 0.4781371
## two or more children.2doctorate 0.016365149 0.2432663 0.7403686
## one child.3professional 0.072336281 0.5753288 0.3523349
## two or more children.3professional 0.002768426 0.4292026 0.5680290
## se.agex(22,29] se.agex(29,39]
## one child.0bachelors 0.010497389 0.01677959
## two or more children.0bachelors 0.004750570 0.01171078
## one child.1masters 0.006615392 0.01813714
## two or more children.1masters 0.004669316 0.01368568
## one child.2doctorate 0.012954357 0.04470383
## two or more children.2doctorate 0.009681062 0.03403752
## one child.3professional 0.021884803 0.04371710
## two or more children.3professional 0.001558626 0.02736714
## se.agex(39,49]
## one child.0bachelors 0.01673825
## two or more children.0bachelors 0.01184973
## one child.1masters 0.01787279
## two or more children.1masters 0.01375369
## one child.2doctorate 0.04446329
## two or more children.2doctorate 0.03518223
## one child.3professional 0.04219639
## two or more children.3professional 0.02738916
fert5$chtot_rec<-rep(c("one child","two or more children"), 2)
fert5$dgrdg_rec<-factor(c(rep("0bachelors", 2), rep("1masters", 2), rep("2doctorate", 2), rep("3professional", 2)), ordered = T)
#22-29
A<-ggplot(fert5, aes(dgrdg_rec,fert5$`agex(22,29]`),xlab="Education", ylab="% agex")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))
A<-A+ylab("22-29")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for Participants Ages 22-29")
#29-39
A<-ggplot(fert5, aes(dgrdg_rec,fert5$`agex(29,39]`),xlab="Education", ylab="% agex")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))
A<-A+ylab("29-39")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for Participants Ages 29-39")
#39-49
A<-ggplot(fert5, aes(dgrdg_rec,fert5$`agex(39,49]`),xlab="Education", ylab="% agex")
A<-A+geom_point(aes(colour=chtot_rec))
A<-A+geom_line(aes(colour=chtot_rec,group=chtot_rec))
A<-A+ylab("39-49")
A<-A+xlab("education")
A+ggtitle("Percentage of Children in the Household for Participants Ages 39-49")
###Basic Chi-Square Test of Indendendence We compute chi-square analyses to observe all the various interactions with the number of children. Only complete cases are considered below. All interactions show significant effects.
#column percentages of number of children by gender
prop.table(table(analysisf$chtot, analysisf$gender), margin=2)
##
## female male
## one child 0.4073771 0.3356697
## two or more children 0.5926229 0.6643303
chisq.test(table(analysisf$chtot, analysisf$gender))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(analysisf$chtot, analysisf$gender)
## X-squared = 118.14, df = 1, p-value < 2.2e-16
#column percentages of number of children by race/ethcnitiy
prop.table(table(analysisf$chtot, analysisf$raceth), margin=2)
##
## white asian minorities
## one child 0.3387135 0.4274234 0.3958755
## two or more children 0.6612865 0.5725766 0.6041245
chisq.test(table(analysisf$chtot, analysisf$raceth))
##
## Pearson's Chi-squared test
##
## data: table(analysisf$chtot, analysisf$raceth)
## X-squared = 123.8, df = 2, p-value < 2.2e-16
#column percentages of number of children by place of birth
prop.table(table(analysisf$chtot, analysisf$birth), margin=2)
##
## in the US not in the US
## one child 0.3559038 0.3990526
## two or more children 0.6440962 0.6009474
chisq.test(table(analysisf$chtot, analysisf$birth))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(analysisf$chtot, analysisf$birth)
## X-squared = 34.947, df = 1, p-value = 3.389e-09
#column percentages of number of children by level of education
prop.table(table(analysisf$chtot, analysisf$dgrdg), margin=2)
##
## 0bachelors 1masters 2doctorate 3professional
## one child 0.3553617 0.3832552 0.4287812 0.3198421
## two or more children 0.6446383 0.6167448 0.5712188 0.6801579
chisq.test(table(analysisf$chtot, analysisf$dgrdg))
##
## Pearson's Chi-squared test
##
## data: table(analysisf$chtot, analysisf$dgrdg)
## X-squared = 37.55, df = 3, p-value = 3.52e-08
#column percentages of number of children by level of age
prop.table(table(analysisf$chtot, analysisf$agex), margin=2)
##
## (22,29] (29,39] (39,49]
## one child 0.6624258 0.4075370 0.2506744
## two or more children 0.3375742 0.5924630 0.7493256
chisq.test(table(analysisf$chtot, analysisf$agex))
##
## Pearson's Chi-squared test
##
## data: table(analysisf$chtot, analysisf$agex)
## X-squared = 1494.4, df = 2, p-value < 2.2e-16
#column percentages of education level by age
prop.table(table(analysisf$dgrdg, analysisf$agex), margin=2)
##
## (22,29] (29,39] (39,49]
## 0bachelors 0.66497031 0.43532587 0.49897486
## 1masters 0.30619169 0.48900440 0.40725154
## 2doctorate 0.01696353 0.02718912 0.03981871
## 3professional 0.01187447 0.04848061 0.05395489
chisq.test(table(analysisf$dgrdg, analysisf$agex))
##
## Pearson's Chi-squared test
##
## data: table(analysisf$dgrdg, analysisf$agex)
## X-squared = 498.64, df = 6, p-value < 2.2e-16
A logit analysis shows similar patterns than those mentioned during the descriptive analysis: 1. Men are more likely than women to have two children or more 2. Men and Women with a masters and doctoral degree are less likely to have two or more children than those with a bachelors. However, men and women with a professional degree are more likely to have two children or more. 3.Both Asian and Minority groups are less likely to report having two or more children when compared to the white group. 4. Men and women who were not born in the US are most likely as group to report having two or more children when compared to those born in the US.
These findings are grouping men and women together.
#Logit model
fit.logit<-svyglm(twochildrenx~gender+dgrdg+raceth+birth+agex, design= des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
#probit model (added for comparison)
fit.probit<-svyglm(twochildrenx~gender+dgrdg+raceth+birth+agex, design=des, family=binomial(link= "probit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(analysisf$agex)
## (22,29] (29,39] (39,49]
## 2358 10004 9267
stargazer(fit.logit, type = "html", style="demography",covariate.labels=c("male","1masters","2doctorate","3professional", "asian","minorities", "not in the US","(29,39]","(39,49]"))
| twochildrenx | |
| male | 0.258*** |
| (0.062) | |
| 1masters | -0.089 |
| (0.066) | |
| 2doctorate | -0.192 |
| (0.144) | |
| 3professional | 0.027 |
| (0.112) | |
| asian | -0.497*** |
| (0.105) | |
| minorities | -0.158 |
| (0.088) | |
| not in the US | -0.135 |
| (0.094) | |
| (29,39] | 1.221*** |
| (0.137) | |
| (39,49] | 1.827*** |
| (0.139) | |
| Constant | -0.611*** |
| (0.136) | |
| N | 21,629 |
| Log Likelihood | -11,721.520 |
| AIC | 23,463.040 |
| p < .05; p < .01; p < .001 | |
The marginal effects allows us to take a closer look at the differences between the groups discussed previously.
#Logit marginal effects
log.marg<-coef(fit.logit)*mean(dlogis(predict(fit.logit)), na.rm=T)
#Probit marginal effects
prob.marg<-coef(fit.probit)*mean(dnorm(predict(fit.probit)), na.rm=T)
plot(log.marg[-1], ylab="Marginal Effects", axes=T,xaxt="n", main="Marginal Effects from Logit and Probit models", ylim=c(-.25, .2))
axis(side=1, at=1:13, labels=F)
text(x=1:13, y=-.3, srt = 45, pos= 1, xpd = TRUE,
labels = c("male","1masters","2doctorate","3professional", "asian","minorities", "not in the US","age"))
points(prob.marg[-1], col=2)
abline(h=0, col=2)
legend("bottomright", legend=c("Logit Model", "Probit Model"), col=c("black", "red"),pch=1)
data.frame(m.logit=log.marg, m.probit=prob.marg)
## m.logit m.probit
## (Intercept) -0.127261999 -0.131314490
## gendermale 0.053702703 0.054439928
## dgrdg1masters -0.018464069 -0.018030026
## dgrdg2doctorate -0.039997902 -0.039731210
## dgrdg3professional 0.005685848 0.007273044
## racethasian -0.103486652 -0.104015466
## racethminorities -0.032819810 -0.032700668
## birthnot in the US -0.028146499 -0.027670429
## agex(29,39] 0.254310933 0.261031465
## agex(39,49] 0.380432965 0.386166460
\[\text{Pr(twochildren)} = \frac{1}{1+exp({\beta_0 + \beta_1*gender + \beta_2*race /ethnicity+\beta_3*education+\beta_4*age+\beta_5*place of birth})}\]
#get a series of predicted probabilites for different "types" of people for each model
#expand.grid will generate all possible combinations of values you specify
dat<-expand.grid(gender=levels(factor(analysisf$gender)),dgrdg=levels(factor(analysisf$dgrdg)), twochildrenx=levels(factor(analysisf$twochildrenx)),raceth=levels(factor(analysisf$raceth)),birth=levels(factor(analysisf$birth)),agex=levels(factor(analysisf$agex)))
summary (dat)
## gender dgrdg twochildrenx
## female:144 0bachelors :72 one child :144
## male :144 1masters :72 two or more children:144
## 2doctorate :72
## 3professional:72
## raceth birth agex
## white :96 in the US :144 (22,29]:96
## asian :96 not in the US:144 (29,39]:96
## minorities:96 (39,49]:96
##
fit<-predict(fit.logit, newdata=dat, type="response")
dat$fitted.prob.lrm<-round(fit, 3)
head(dat, n=48)
## gender dgrdg twochildrenx raceth birth agex
## 1 female 0bachelors one child white in the US (22,29]
## 2 male 0bachelors one child white in the US (22,29]
## 3 female 1masters one child white in the US (22,29]
## 4 male 1masters one child white in the US (22,29]
## 5 female 2doctorate one child white in the US (22,29]
## 6 male 2doctorate one child white in the US (22,29]
## 7 female 3professional one child white in the US (22,29]
## 8 male 3professional one child white in the US (22,29]
## 9 female 0bachelors two or more children white in the US (22,29]
## 10 male 0bachelors two or more children white in the US (22,29]
## 11 female 1masters two or more children white in the US (22,29]
## 12 male 1masters two or more children white in the US (22,29]
## 13 female 2doctorate two or more children white in the US (22,29]
## 14 male 2doctorate two or more children white in the US (22,29]
## 15 female 3professional two or more children white in the US (22,29]
## 16 male 3professional two or more children white in the US (22,29]
## 17 female 0bachelors one child asian in the US (22,29]
## 18 male 0bachelors one child asian in the US (22,29]
## 19 female 1masters one child asian in the US (22,29]
## 20 male 1masters one child asian in the US (22,29]
## 21 female 2doctorate one child asian in the US (22,29]
## 22 male 2doctorate one child asian in the US (22,29]
## 23 female 3professional one child asian in the US (22,29]
## 24 male 3professional one child asian in the US (22,29]
## 25 female 0bachelors two or more children asian in the US (22,29]
## 26 male 0bachelors two or more children asian in the US (22,29]
## 27 female 1masters two or more children asian in the US (22,29]
## 28 male 1masters two or more children asian in the US (22,29]
## 29 female 2doctorate two or more children asian in the US (22,29]
## 30 male 2doctorate two or more children asian in the US (22,29]
## 31 female 3professional two or more children asian in the US (22,29]
## 32 male 3professional two or more children asian in the US (22,29]
## 33 female 0bachelors one child minorities in the US (22,29]
## 34 male 0bachelors one child minorities in the US (22,29]
## 35 female 1masters one child minorities in the US (22,29]
## 36 male 1masters one child minorities in the US (22,29]
## 37 female 2doctorate one child minorities in the US (22,29]
## 38 male 2doctorate one child minorities in the US (22,29]
## 39 female 3professional one child minorities in the US (22,29]
## 40 male 3professional one child minorities in the US (22,29]
## 41 female 0bachelors two or more children minorities in the US (22,29]
## 42 male 0bachelors two or more children minorities in the US (22,29]
## 43 female 1masters two or more children minorities in the US (22,29]
## 44 male 1masters two or more children minorities in the US (22,29]
## 45 female 2doctorate two or more children minorities in the US (22,29]
## 46 male 2doctorate two or more children minorities in the US (22,29]
## 47 female 3professional two or more children minorities in the US (22,29]
## 48 male 3professional two or more children minorities in the US (22,29]
## fitted.prob.lrm
## 1 0.352
## 2 0.413
## 3 0.332
## 4 0.391
## 5 0.309
## 6 0.367
## 7 0.358
## 8 0.419
## 9 0.352
## 10 0.413
## 11 0.332
## 12 0.391
## 13 0.309
## 14 0.367
## 15 0.358
## 16 0.419
## 17 0.248
## 18 0.299
## 19 0.232
## 20 0.281
## 21 0.214
## 22 0.261
## 23 0.253
## 24 0.305
## 25 0.248
## 26 0.299
## 27 0.232
## 28 0.281
## 29 0.214
## 30 0.261
## 31 0.253
## 32 0.305
## 33 0.317
## 34 0.375
## 35 0.298
## 36 0.354
## 37 0.277
## 38 0.331
## 39 0.323
## 40 0.381
## 41 0.317
## 42 0.375
## 43 0.298
## 44 0.354
## 45 0.277
## 46 0.331
## 47 0.323
## 48 0.381
We explore a few of the interesting cases presented in the fitted values. ##Professional Degree
#Professional Degree - Ages 22-29
dat[which(dat$gender=="female"&dat$twochildrenx=="two or more children"&dat$dgrdg=="3professional"&dat$agex=="(22,29]"),]
## gender dgrdg twochildrenx raceth birth
## 15 female 3professional two or more children white in the US
## 31 female 3professional two or more children asian in the US
## 47 female 3professional two or more children minorities in the US
## 63 female 3professional two or more children white not in the US
## 79 female 3professional two or more children asian not in the US
## 95 female 3professional two or more children minorities not in the US
## agex fitted.prob.lrm
## 15 (22,29] 0.358
## 31 (22,29] 0.253
## 47 (22,29] 0.323
## 63 (22,29] 0.328
## 79 (22,29] 0.229
## 95 (22,29] 0.294
dat[which(dat$gender=="male"&dat$twochildrenx=="two or more children"&dat$dgrdg=="3professional"&dat$agex=="(22,29]"),]
## gender dgrdg twochildrenx raceth birth
## 16 male 3professional two or more children white in the US
## 32 male 3professional two or more children asian in the US
## 48 male 3professional two or more children minorities in the US
## 64 male 3professional two or more children white not in the US
## 80 male 3professional two or more children asian not in the US
## 96 male 3professional two or more children minorities not in the US
## agex fitted.prob.lrm
## 16 (22,29] 0.419
## 32 (22,29] 0.305
## 48 (22,29] 0.381
## 64 (22,29] 0.387
## 80 (22,29] 0.277
## 96 (22,29] 0.350
#Professional Degree - Ages 39-49
dat[which(dat$gender=="female"&dat$twochildrenx=="two or more children"&dat$dgrdg=="3professional"&dat$agex=="(39,49]"),]
## gender dgrdg twochildrenx raceth birth
## 207 female 3professional two or more children white in the US
## 223 female 3professional two or more children asian in the US
## 239 female 3professional two or more children minorities in the US
## 255 female 3professional two or more children white not in the US
## 271 female 3professional two or more children asian not in the US
## 287 female 3professional two or more children minorities not in the US
## agex fitted.prob.lrm
## 207 (39,49] 0.776
## 223 (39,49] 0.678
## 239 (39,49] 0.748
## 255 (39,49] 0.752
## 271 (39,49] 0.648
## 287 (39,49] 0.721
dat[which(dat$gender=="male"&dat$twochildrenx=="two or more children"&dat$dgrdg=="3professional"&dat$agex=="(39,49]"),]
## gender dgrdg twochildrenx raceth birth
## 208 male 3professional two or more children white in the US
## 224 male 3professional two or more children asian in the US
## 240 male 3professional two or more children minorities in the US
## 256 male 3professional two or more children white not in the US
## 272 male 3professional two or more children asian not in the US
## 288 male 3professional two or more children minorities not in the US
## agex fitted.prob.lrm
## 208 (39,49] 0.818
## 224 (39,49] 0.732
## 240 (39,49] 0.793
## 256 (39,49] 0.797
## 272 (39,49] 0.704
## 288 (39,49] 0.770
#Doctoral Degree - Ages 22-29
dat[which(dat$gender=="female"&dat$twochildrenx=="two or more children"&dat$dgrdg=="2doctorate"&dat$agex=="(22,29]"),]
## gender dgrdg twochildrenx raceth birth agex
## 13 female 2doctorate two or more children white in the US (22,29]
## 29 female 2doctorate two or more children asian in the US (22,29]
## 45 female 2doctorate two or more children minorities in the US (22,29]
## 61 female 2doctorate two or more children white not in the US (22,29]
## 77 female 2doctorate two or more children asian not in the US (22,29]
## 93 female 2doctorate two or more children minorities not in the US (22,29]
## fitted.prob.lrm
## 13 0.309
## 29 0.214
## 45 0.277
## 61 0.281
## 77 0.192
## 93 0.251
dat[which(dat$gender=="male"&dat$twochildrenx=="two or more children"&dat$dgrdg=="2doctorate"&dat$agex=="(22,29]"),]
## gender dgrdg twochildrenx raceth birth agex
## 14 male 2doctorate two or more children white in the US (22,29]
## 30 male 2doctorate two or more children asian in the US (22,29]
## 46 male 2doctorate two or more children minorities in the US (22,29]
## 62 male 2doctorate two or more children white not in the US (22,29]
## 78 male 2doctorate two or more children asian not in the US (22,29]
## 94 male 2doctorate two or more children minorities not in the US (22,29]
## fitted.prob.lrm
## 14 0.367
## 30 0.261
## 46 0.331
## 62 0.336
## 78 0.236
## 94 0.302
#Doctoral Degree - Ages 39-49
dat[which(dat$gender=="female"&dat$twochildrenx=="two or more children"&dat$dgrdg=="2doctorate"&dat$agex=="(39,49]"),]
## gender dgrdg twochildrenx raceth birth
## 205 female 2doctorate two or more children white in the US
## 221 female 2doctorate two or more children asian in the US
## 237 female 2doctorate two or more children minorities in the US
## 253 female 2doctorate two or more children white not in the US
## 269 female 2doctorate two or more children asian not in the US
## 285 female 2doctorate two or more children minorities not in the US
## agex fitted.prob.lrm
## 205 (39,49] 0.736
## 221 (39,49] 0.629
## 237 (39,49] 0.704
## 253 (39,49] 0.709
## 269 (39,49] 0.597
## 285 (39,49] 0.675
dat[which(dat$gender=="male"&dat$twochildrenx=="two or more children"&dat$dgrdg=="2doctorate"&dat$agex=="(39,49]"),]
## gender dgrdg twochildrenx raceth birth
## 206 male 2doctorate two or more children white in the US
## 222 male 2doctorate two or more children asian in the US
## 238 male 2doctorate two or more children minorities in the US
## 254 male 2doctorate two or more children white not in the US
## 270 male 2doctorate two or more children asian not in the US
## 286 male 2doctorate two or more children minorities not in the US
## agex fitted.prob.lrm
## 206 (39,49] 0.783
## 222 (39,49] 0.687
## 238 (39,49] 0.755
## 254 (39,49] 0.759
## 270 (39,49] 0.657
## 286 (39,49] 0.729
options(survey.lonely.psu = "adjust")
des1<-svydesign(ids=~1, strata=~sample, weights=~weight,data= analysisf, nest=T)
#Logit models
fit.logit1<-svyglm(twochildrenx~gender+agex,design= des1, family=binomial) #gender and age only
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
fit.logit2<-svyglm(twochildrenx~gender+agex+dgrdg,design= des1, family=binomial) #gender+age+education
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
fit.logit3<-svyglm(twochildrenx~gender+agex+dgrdg+raceth+birth,design= des1, family=binomial)#gender+age+education+racen and place of birth
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
Now, let’s see if, by controlling for education and age, some of these differences go away, or are reduced. The fancy word for when an effect is reduced is “attenuated”. We will also do a test to see if the model with the education term significantly improves the model fit. Traditionally this would be done using a likelihood ratio test, but in survey models, that’s not kosherload
summary(fit.logit2)
##
## Call:
## svyglm(formula = twochildrenx ~ gender + agex + dgrdg, design = des1,
## family = binomial)
##
## Survey design:
## svydesign(ids = ~1, strata = ~sample, weights = ~weight, data = analysisf,
## nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.69054 0.13434 -5.140 2.77e-07 ***
## gendermale 0.23457 0.06173 3.800 0.000145 ***
## agex(29,39] 1.18487 0.13710 8.643 < 2e-16 ***
## agex(39,49] 1.80074 0.13856 12.996 < 2e-16 ***
## dgrdg1masters -0.11318 0.06509 -1.739 0.082071 .
## dgrdg2doctorate -0.35920 0.13523 -2.656 0.007907 **
## dgrdg3professional 0.01188 0.11386 0.104 0.916875
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9985291)
##
## Number of Fisher Scoring iterations: 4
regTermTest(fit.logit2, test.terms = ~dgrdg, method="Wald", df = NULL)
## Wald test for dgrdg
## in svyglm(formula = twochildrenx ~ gender + agex + dgrdg, design = des1,
## family = binomial)
## F = 3.001236 on 3 and 21622 df: p= 0.029264
A comparison of the third model
summary(fit.logit3)
##
## Call:
## svyglm(formula = twochildrenx ~ gender + agex + dgrdg + raceth +
## birth, design = des1, family = binomial)
##
## Survey design:
## svydesign(ids = ~1, strata = ~sample, weights = ~weight, data = analysisf,
## nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.61110 0.13563 -4.506 6.66e-06 ***
## gendermale 0.25787 0.06190 4.166 3.11e-05 ***
## agex(29,39] 1.22117 0.13716 8.903 < 2e-16 ***
## agex(39,49] 1.82680 0.13898 13.145 < 2e-16 ***
## dgrdg1masters -0.08866 0.06601 -1.343 0.1792
## dgrdg2doctorate -0.19207 0.14405 -1.333 0.1824
## dgrdg3professional 0.02730 0.11221 0.243 0.8078
## racethasian -0.49693 0.10501 -4.732 2.23e-06 ***
## racethminorities -0.15760 0.08752 -1.801 0.0718 .
## birthnot in the US -0.13516 0.09430 -1.433 0.1518
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.999826)
##
## Number of Fisher Scoring iterations: 4
regTermTest(fit.logit3, test.terms=~birth+raceth, method="Wald", df = NULL)
## Wald test for birth raceth
## in svyglm(formula = twochildrenx ~ gender + agex + dgrdg + raceth +
## birth, design = des1, family = binomial)
## F = 19.92706 on 3 and 21619 df: p= 6.8128e-13
super1<-lm(salary4~dgrdg+gender+birth+twochildrenx, data=analysisf)
library(broom)
tidy(super1)
## term estimate std.error statistic
## 1 (Intercept) 49853.147 539.7450 92.364261
## 2 dgrdg1masters 9781.622 494.9543 19.762676
## 3 dgrdg2doctorate 7383.630 1388.4585 5.317861
## 4 dgrdg3professional 36248.481 1137.9632 31.853827
## 5 gendermale 26285.296 476.5761 55.154461
## 6 birthnot in the US 7093.896 532.7124 13.316559
## 7 twochildrenxtwo or more children 5741.066 489.5883 11.726314
## p.value
## 1 0.000000e+00
## 2 3.597379e-86
## 3 1.060368e-07
## 4 1.229952e-217
## 5 0.000000e+00
## 6 2.673429e-40
## 7 1.165330e-31
After evaluating for normality of errors, we find some discrepancies.
qqnorm(rstudent(super1), main="Q-Q Plot for Model Residuals")
qqline(rstudent(super1), col="red")
We apply a log transformation to address the anormalities. We see that the second graph “Q-Q Plot for Model Residuals Version 2” does the best job however, there is still some skewness possibly from some extreme values in the data set. The rest of the transformation are not helpful.
super12<-lm( log (salary4)~dgrdg+gender+birth+twochildrenx, data=analysisf)
qqnorm(rstudent(super12), main="Q-Q Plot for Model Residuals Version 1")
qqline(rstudent(super12), col="red")
super13<-lm(sqrt(salary4)~dgrdg, data=analysisf)
qqnorm(rstudent(super13), main="Q-Q Plot for Model Residuals Version 2")
qqline(rstudent(super13), col="red")
super14<-lm(I(1/salary4)~dgrdg, data=analysisf)
qqnorm(rstudent(super14), main="Q-Q Plot for Model Residuals Version 3")
#qqline(rstudent(super14), col="red")
Each model under this analysis increases in complexity. The first model considers number of children and gender. The second model includes number of children, gender and level of education. The third model includes number of children, gender, education and adds race/ethnicity. The fourth model, includes number of children, gender, education, race and age and the last model includes number of children, gender, education, race, age and place of birth.
model1<-lm(scale(sqrt(salary4))~chtot+gender, data=analysisf)
model2<-lm(scale(sqrt(salary4))~chtot+gender+dgrdg, data=analysisf)
model3<-lm(scale(sqrt(salary4))~chtot+gender+dgrdg+raceth, data=analysisf)
model4<-lm(scale(sqrt(salary4))~chtot+gender+dgrdg+raceth+agex, data=analysisf)
model5<-lm(scale(sqrt(salary4))~chtot+gender+dgrdg+raceth+agex+birth, data=analysisf)
anova(model1,model2, model3, model4, model5)
## Analysis of Variance Table
##
## Model 1: scale(sqrt(salary4)) ~ chtot + gender
## Model 2: scale(sqrt(salary4)) ~ chtot + gender + dgrdg
## Model 3: scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth
## Model 4: scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth + agex
## Model 5: scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth + agex +
## birth
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 21626 18908
## 2 21623 18036 3 871.96 367.5254 <2e-16 ***
## 3 21621 17802 2 233.81 147.8213 <2e-16 ***
## 4 21619 17098 2 703.70 444.9083 <2e-16 ***
## 5 21618 17096 1 1.93 2.4364 0.1186
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AICs<-AIC(model1, model2, model3, model4, model5)
AICs$diff<-AICs$AIC-AICs$AIC[1]
AICs
## df AIC diff
## model1 4 58480.14 0.000
## model2 7 57464.95 -1015.187
## model3 9 57186.73 -1293.406
## model4 11 56318.39 -2161.745
## model5 12 56317.96 -2162.182
library(stargazer)
summary(model1)
Call: lm(formula = scale(sqrt(salary4)) ~ chtot + gender, data = analysisf)
Residuals: Min 1Q Median 3Q Max -3.4859 -0.5746 0.0327 0.6497 2.0871
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.45786 0.01228 -37.297 <2e-16 chtottwo or more children 0.12367 0.01322 9.355 <2e-16 gendermale 0.69354 0.01281 54.146 <2e-16 *** — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
Residual standard error: 0.935 on 21626 degrees of freedom Multiple R-squared: 0.1258, Adjusted R-squared: 0.1257 F-statistic: 1556 on 2 and 21626 DF, p-value: < 2.2e-16
summary(model2)
Call: lm(formula = scale(sqrt(salary4)) ~ chtot + gender + dgrdg, data = analysisf)
Residuals: Min 1Q Median 3Q Max -4.0068 -0.5361 0.0390 0.6091 2.2653
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.63605 0.01392 -45.691 < 2e-16 chtottwo or more children 0.12352 0.01292 9.559 < 2e-16 gendermale 0.71687 0.01256 57.072 < 2e-16 dgrdg1masters 0.26914 0.01301 20.691 < 2e-16 dgrdg2doctorate 0.28031 0.03613 7.759 8.96e-15 dgrdg3professional 0.85112 0.03006 28.317 < 2e-16 — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
Residual standard error: 0.9133 on 21623 degrees of freedom Multiple R-squared: 0.1661, Adjusted R-squared: 0.1659 F-statistic: 861.3 on 5 and 21623 DF, p-value: < 2.2e-16
summary(model3)
Call: lm(formula = scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth, data = analysisf)
Residuals: Min 1Q Median 3Q Max -3.8689 -0.5276 0.0412 0.6108 2.3800
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.63100 0.01503 -41.991 < 2e-16 chtottwo or more children 0.13048 0.01287 10.138 < 2e-16 gendermale 0.69922 0.01254 55.745 < 2e-16 dgrdg1masters 0.24989 0.01298 19.254 < 2e-16 dgrdg2doctorate 0.23283 0.03603 6.462 1.05e-10 dgrdg3professional 0.83856 0.02987 28.072 < 2e-16 racethasian 0.20656 0.01662 12.428 < 2e-16 racethminorities -0.11969 0.01529 -7.829 5.13e-15 — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
Residual standard error: 0.9074 on 21621 degrees of freedom Multiple R-squared: 0.1769, Adjusted R-squared: 0.1766 F-statistic: 663.8 on 7 and 21621 DF, p-value: < 2.2e-16
summary(model4)
Call: lm(formula = scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth + agex, data = analysisf)
Residuals: Min 1Q Median 3Q Max -3.7478 -0.5060 0.0656 0.5984 2.6203
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.99106 0.02100 -47.184 < 2e-16 chtottwo or more children 0.02817 0.01307 2.155 0.0312
gendermale 0.67260 0.01233 54.564 < 2e-16 dgrdg1masters 0.22592 0.01283 17.606 < 2e-16 dgrdg2doctorate 0.17467 0.03537 4.939 7.92e-07 dgrdg3professional 0.78090 0.02937 26.589 < 2e-16 racethasian 0.17762 0.01632 10.882 < 2e-16 racethminorities -0.10133 0.01500 -6.758 1.44e-11 agex(29,39] 0.40719 0.02092 19.467 < 2e-16 agex(39,49] 0.62319 0.02148 29.010 < 2e-16 ** — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
Residual standard error: 0.8893 on 21619 degrees of freedom Multiple R-squared: 0.2094, Adjusted R-squared: 0.2091 F-statistic: 636.4 on 9 and 21619 DF, p-value: < 2.2e-16
summary(model5)
Call: lm(formula = scale(sqrt(salary4)) ~ chtot + gender + dgrdg + raceth + agex + birth, data = analysisf)
Residuals: Min 1Q Median 3Q Max -3.7415 -0.5048 0.0679 0.5990 2.6207
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.99142 0.02100 -47.200 < 2e-16 chtottwo or more children 0.02853 0.01307 2.182 0.0291
gendermale 0.67149 0.01235 54.385 < 2e-16 dgrdg1masters 0.22474 0.01285 17.484 < 2e-16 dgrdg2doctorate 0.16626 0.03577 4.648 3.38e-06 dgrdg3professional 0.78140 0.02937 26.605 < 2e-16 racethasian 0.15759 0.02076 7.591 3.30e-14 racethminorities -0.10598 0.01529 -6.932 4.25e-12 agex(29,39] 0.40612 0.02093 19.406 < 2e-16 agex(39,49] 0.62088 0.02153 28.835 < 2e-16 ** birthnot in the US 0.02733 0.01751 1.561 0.1186
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ‘’ 1
Residual standard error: 0.8893 on 21618 degrees of freedom Multiple R-squared: 0.2095, Adjusted R-squared: 0.2092 F-statistic: 573 on 10 and 21618 DF, p-value: < 2.2e-16
stargazer(model1, model2, model3, model4, model5, type = "html", style = "demography", ci = T)
| scale(sqrt(salary4)) | |||||
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | |
| chtottwo or more children | 0.124*** | 0.124*** | 0.130*** | 0.028* | 0.029* |
| (0.098, 0.150) | (0.098, 0.149) | (0.105, 0.156) | (0.003, 0.054) | (0.003, 0.054) | |
| gendermale | 0.694*** | 0.717*** | 0.699*** | 0.673*** | 0.671*** |
| (0.668, 0.719) | (0.692, 0.741) | (0.675, 0.724) | (0.648, 0.697) | (0.647, 0.696) | |
| dgrdg1masters | 0.269*** | 0.250*** | 0.226*** | 0.225*** | |
| (0.244, 0.295) | (0.224, 0.275) | (0.201, 0.251) | (0.200, 0.250) | ||
| dgrdg2doctorate | 0.280*** | 0.233*** | 0.175*** | 0.166*** | |
| (0.210, 0.351) | (0.162, 0.303) | (0.105, 0.244) | (0.096, 0.236) | ||
| dgrdg3professional | 0.851*** | 0.839*** | 0.781*** | 0.781*** | |
| (0.792, 0.910) | (0.780, 0.897) | (0.723, 0.838) | (0.724, 0.839) | ||
| racethasian | 0.207*** | 0.178*** | 0.158*** | ||
| (0.174, 0.239) | (0.146, 0.210) | (0.117, 0.198) | |||
| racethminorities | -0.120*** | -0.101*** | -0.106*** | ||
| (-0.150, -0.090) | (-0.131, -0.072) | (-0.136, -0.076) | |||
| agex(29,39] | 0.407*** | 0.406*** | |||
| (0.366, 0.448) | (0.365, 0.447) | ||||
| agex(39,49] | 0.623*** | 0.621*** | |||
| (0.581, 0.665) | (0.579, 0.663) | ||||
| birthnot in the US | 0.027 | ||||
| (-0.007, 0.062) | |||||
| Constant | -0.458*** | -0.636*** | -0.631*** | -0.991*** | -0.991*** |
| (-0.482, -0.434) | (-0.663, -0.609) | (-0.660, -0.602) | (-1.032, -0.950) | (-1.033, -0.950) | |
| N | 21,629 | 21,629 | 21,629 | 21,629 | 21,629 |
| R2 | 0.126 | 0.166 | 0.177 | 0.209 | 0.210 |
| Adjusted R2 | 0.126 | 0.166 | 0.177 | 0.209 | 0.209 |
| Residual Std. Error | 0.935 (df = 21626) | 0.913 (df = 21623) | 0.907 (df = 21621) | 0.889 (df = 21619) | 0.889 (df = 21618) |
| F Statistic | 1,555.663*** (df = 2; 21626) | 861.336*** (df = 5; 21623) | 663.829*** (df = 7; 21621) | 636.373*** (df = 9; 21619) | 573.018*** (df = 10; 21618) |
| p < .05; p < .01; p < .001 | |||||
Overall the model that best fits our data is Model 4 which includes age.
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
fit.gr<-glm(salary4~factor(gender)+factor(raceth)+factor(dgrdg)+factor(agex)+factor(birth)+factor(chtot), data=analysisf, family= Gamma(link=log))
summary(fit.gr)
##
## Call:
## glm(formula = salary4 ~ factor(gender) + factor(raceth) + factor(dgrdg) +
## factor(agex) + factor(birth) + factor(chtot), family = Gamma(link = log),
## data = analysisf)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6512 -0.3573 -0.0256 0.2601 1.6075
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.647910 0.010902 976.734 < 2e-16
## factor(gender)male 0.340854 0.006408 53.191 < 2e-16
## factor(raceth)asian 0.084212 0.010775 7.816 5.71e-15
## factor(raceth)minorities -0.060384 0.007935 -7.610 2.85e-14
## factor(dgrdg)1masters 0.118459 0.006671 17.757 < 2e-16
## factor(dgrdg)2doctorate 0.091119 0.018567 4.908 9.28e-07
## factor(dgrdg)3professional 0.405232 0.015243 26.584 < 2e-16
## factor(agex)(29,39] 0.260923 0.010861 24.023 < 2e-16
## factor(agex)(39,49] 0.379967 0.011175 34.001 < 2e-16
## factor(birth)not in the US 0.027851 0.009087 3.065 0.00218
## factor(chtot)two or more children 0.014510 0.006786 2.138 0.03251
##
## (Intercept) ***
## factor(gender)male ***
## factor(raceth)asian ***
## factor(raceth)minorities ***
## factor(dgrdg)1masters ***
## factor(dgrdg)2doctorate ***
## factor(dgrdg)3professional ***
## factor(agex)(29,39] ***
## factor(agex)(39,49] ***
## factor(birth)not in the US **
## factor(chtot)two or more children *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.2130266)
##
## Null deviance: 7839.3 on 21628 degrees of freedom
## Residual deviance: 6576.1 on 21618 degrees of freedom
## AIC: 515093
##
## Number of Fisher Scoring iterations: 5
##Percentage increase in the Mean for Each Coefficient
round(exp(summary(fit.gr)$coef[-1,1]), 3)
## factor(gender)male factor(raceth)asian
## 1.406 1.088
## factor(raceth)minorities factor(dgrdg)1masters
## 0.941 1.126
## factor(dgrdg)2doctorate factor(dgrdg)3professional
## 1.095 1.500
## factor(agex)(29,39] factor(agex)(39,49]
## 1.298 1.462
## factor(birth)not in the US factor(chtot)two or more children
## 1.028 1.015