The beginning steps are the same steps from assignment 2, in which I create an age at first marriage variable.
#set up age at first marriage variable
library(car)
## Loading required package: carData
mdata$status <- Recode(mdata$fmarital,
recodes= "1 = 1; 5 = 0; 2 = 2; 3=2; 4=2")
mdata$evermarried<-Recode(mdata$fmarital,"1:4 = 1; 5= 0")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##here I use nformwife and numwife as a way to point out if the respondent has been married more than once
##I make 4 groups: those who are married and this in their first marriage, those who are married and this is a previous marriage than their first, those who are divorce/separated or widowed, and the never married
marriedsetnfw <- filter(mdata, mdata$status==1 & numwife==1)
marriedsetfw<-filter(mdata, status==1 & nformwife>0)
nmarriedset <- filter(mdata, status ==0)
marriedinpast<-filter(mdata, status==2 & nformwife>0)
marriedsetnfw$ageevent <-marriedsetnfw$hisagem
marriedsetfw$ageevent<-marriedsetfw$agemarrn
nmarriedset$ageevent <- nmarriedset$ager
marriedinpast$ageevent<-marriedinpast$agemarrn
#after making the age of event variable the same (Ageevent) for all the groups I bind them together
atrisk <- rbind(marriedsetnfw, nmarriedset, marriedsetfw, marriedinpast)
#filter out NAs
atrisk<- atrisk%>%
filter(ageevent!="NA")%>%
filter(ageevent!=98)
Next, I filter my risk set for NH white and NH black men.
#filter for non-Hispanic white and non-Hispanic black men only
library(dplyr)
atrisk2<-atrisk%>%
filter(hispanic==2)%>%
filter(race!=3)
#show table of age of event and whether ever married
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
tabyl(atrisk2, ageevent, evermarried)
## ageevent 0 1
## 15 110 0
## 16 102 2
## 17 128 7
## 18 127 27
## 19 119 42
## 20 76 59
## 21 85 100
## 22 78 87
## 23 80 94
## 24 92 103
## 25 77 114
## 26 75 88
## 27 73 94
## 28 94 72
## 29 61 81
## 30 72 60
## 31 65 46
## 32 64 40
## 33 69 43
## 34 41 33
## 35 43 22
## 36 42 22
## 37 37 14
## 38 36 12
## 39 33 5
## 40 32 11
## 41 27 6
## 42 33 4
## 43 21 2
## 44 20 1
## 45 22 4
## 46 22 1
## 47 28 0
## 48 21 0
## 49 27 0
#note if married ever married=1 and not married=0
#show this table by race
tabyl(atrisk2, ageevent, evermarried, race)
## $`1`
## ageevent 0 1
## 15 32 0
## 16 32 1
## 17 52 2
## 18 45 6
## 19 38 11
## 20 27 15
## 21 27 15
## 22 25 7
## 23 27 15
## 24 24 26
## 25 30 23
## 26 30 7
## 27 28 14
## 28 39 11
## 29 25 17
## 30 25 17
## 31 19 7
## 32 10 5
## 33 19 8
## 34 19 6
## 35 11 4
## 36 12 4
## 37 13 6
## 38 8 2
## 39 14 1
## 40 12 2
## 41 9 1
## 42 10 2
## 43 6 0
## 44 6 0
## 45 8 1
## 46 7 0
## 47 10 0
## 48 8 0
## 49 11 0
##
## $`2`
## ageevent 0 1
## 15 78 0
## 16 70 1
## 17 76 5
## 18 82 21
## 19 81 31
## 20 49 44
## 21 58 85
## 22 53 80
## 23 53 79
## 24 68 77
## 25 47 91
## 26 45 81
## 27 45 80
## 28 55 61
## 29 36 64
## 30 47 43
## 31 46 39
## 32 54 35
## 33 50 35
## 34 22 27
## 35 32 18
## 36 30 18
## 37 24 8
## 38 28 10
## 39 19 4
## 40 20 9
## 41 18 5
## 42 23 2
## 43 15 2
## 44 14 1
## 45 14 3
## 46 15 1
## 47 18 0
## 48 13 0
## 49 16 0
#note 1= black race and 2=white race
Here I create a variable for age at interview, then I use this to create age at birth. Next I calculate age at bachelors degree by comparing age to year earned bachelors.
#convert date of interview from century months to year
atrisk2$yrint <-((atrisk2$cmintvw-1)/12) + 1900
#calculate birth year
atrisk2$yrbirth<-atrisk2$intvwyear-atrisk2$age_r
#select columns needed for analysis
atrisk3<-select(atrisk2, ageevent, evermarried, earnba_y, race, caseid, yrint, yrbirth, wgt2017_2019, havedeg, age_r, secu, sest)
#calculate age at bachelors
atrisk3$agebach<-atrisk3$earnba_y-atrisk3$yrbirth%>%
as.integer()
Next I make my risk set into a person-year file for discrete time analysis.
#create person-year file
library(survival)
pydata<-survSplit(atrisk3, cut=c(17:50),end="ageevent", event="evermarried")
#create a time varying bachelors variable
pydata$bacheventtv1<-ifelse(pydata$ageevent>pydata$agebach,1,0)
#replace NA's (individuals who did not get a bachelor's degree) with 0
pydata$bacheventtv1 <- pydata$bacheventtv1 %>% replace(is.na(.), 0)
head(pydata[, c("ageevent", "evermarried", "caseid", "agebach", "bacheventtv1", "race")], n=60)
## ageevent evermarried caseid agebach bacheventtv1 race
## 1 17 0 80734 21 0 2
## 2 18 0 80734 21 0 2
## 3 19 0 80734 21 0 2
## 4 20 0 80734 21 0 2
## 5 21 0 80734 21 0 2
## 6 22 0 80734 21 1 2
## 7 23 0 80734 21 1 2
## 8 24 0 80734 21 1 2
## 9 25 0 80734 21 1 2
## 10 26 0 80734 21 1 2
## 11 27 1 80734 21 1 2
## 12 17 0 80739 23 0 2
## 13 18 0 80739 23 0 2
## 14 19 0 80739 23 0 2
## 15 20 0 80739 23 0 2
## 16 21 0 80739 23 0 2
## 17 22 0 80739 23 0 2
## 18 23 0 80739 23 0 2
## 19 24 0 80739 23 1 2
## 20 25 0 80739 23 1 2
## 21 26 0 80739 23 1 2
## 22 27 0 80739 23 1 2
## 23 28 0 80739 23 1 2
## 24 29 0 80739 23 1 2
## 25 30 0 80739 23 1 2
## 26 31 1 80739 23 1 2
## 27 17 0 80745 NA 0 2
## 28 18 0 80745 NA 0 2
## 29 19 0 80745 NA 0 2
## 30 20 0 80745 NA 0 2
## 31 21 0 80745 NA 0 2
## 32 22 0 80745 NA 0 2
## 33 23 1 80745 NA 0 2
## 34 17 0 80770 NA 0 2
## 35 18 0 80770 NA 0 2
## 36 19 0 80770 NA 0 2
## 37 20 0 80770 NA 0 2
## 38 21 0 80770 NA 0 2
## 39 22 0 80770 NA 0 2
## 40 23 0 80770 NA 0 2
## 41 24 0 80770 NA 0 2
## 42 25 0 80770 NA 0 2
## 43 26 0 80770 NA 0 2
## 44 27 0 80770 NA 0 2
## 45 28 0 80770 NA 0 2
## 46 29 0 80770 NA 0 2
## 47 30 0 80770 NA 0 2
## 48 31 0 80770 NA 0 2
## 49 32 0 80770 NA 0 2
## 50 33 0 80770 NA 0 2
## 51 34 0 80770 NA 0 2
## 52 35 0 80770 NA 0 2
## 53 36 1 80770 NA 0 2
## 54 17 0 80775 NA 0 2
## 55 18 0 80775 NA 0 2
## 56 19 0 80775 NA 0 2
## 57 20 0 80775 NA 0 2
## 58 21 0 80775 NA 0 2
## 59 22 0 80775 NA 0 2
## 60 23 0 80775 NA 0 2
#recodes for simplicity
pydata$bachevent <- car::Recode(pydata$bacheventtv, recodes="1='BA/BS Degree'; 0='No College Degree'")
pydata$race <- car::Recode(pydata$race, recodes="1='NH Black Men'; 2='NH White men'")
#employ survey design
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
design<-svydesign(ids = ~secu,
strata = ~sest,
weights=~wgt2017_2019,
data=pydata,
nest=T)
Now I add weights to the data and create a visual for comparison.
#Create a graph based on your PY file showing how the probability of marrying at each age differs by whether or not respondent has a degree for non-Hispanic white and black men.
library(ggplot2)
raceprobs <- svyby(~evermarried, by = ~ageevent + race+ bachevent, design = design,
FUN = svymean, na.rm = TRUE)
raceprobs$lim <- 2*raceprobs$se
ggplot(data =raceprobs, aes(x = ageevent, y = evermarried, ymin=evermarried-lim, ymax=evermarried+lim, fill=bachevent ,color = bachevent, group = bachevent)) +
geom_line() + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="Adjusted First Marriage Rates by Age and Race",
subtitle="Men Ages 18 to 49",
caption="Source: 2017-2019 NSFG Male Respondent File") +
xlab(label="Age at First Marriage") + facet_grid(~race)
#visual for first marriage rates by race and age
raceprobs <- svyby(~evermarried, by = ~ageevent + race, design = design,
FUN = svymean, na.rm = TRUE)
raceprobs$lim <- 2*raceprobs$se
ggplot(data =raceprobs, aes(x = ageevent, y = evermarried, ymin=evermarried-lim, ymax=evermarried+lim, fill=race,color = race, group = race)) +
geom_line() + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="Adjusted First Marriage Rates by Age and Race",
subtitle="Men Ages 18 to 49",
caption="Source: 2017-2019 NSFG Male Respondent File") +
xlab(label="Age at First Marriage") +
ylab(label="Adjusted First Marriage Rate")
#add weights
pydata$wt<-pydata$wgt2017_2019/mean(pydata$wgt2017_2019)
#age group as a categorical method
pydata <- pydata %>%
mutate(agegrp = (case_when(ageevent %in% c(15:19) ~ '15-19',
ageevent %in% c(20:24) ~ '20-24',
ageevent %in% c(24:29) ~ '25-29',
ageevent %in% c(30:34) ~ '30-34',
ageevent %in% c(35:39) ~ '35-39',
ageevent %in% c(40:44) ~ '40-44',
ageevent %in% c(45:49) ~ '45-49'
) )
)
library(survey)
#filter for whites
pydatawhite<-pydata%>%
filter(race=='NH White men')
#employ survey design
library(survey)
designwhite<-svydesign(ids = ~secu,
strata = ~sest,
weights=~wgt2017_2019,
data=pydatawhite,
nest=T)
whitemodelcat <- svyglm(evermarried ~ agegrp+ bacheventtv1, family = "binomial", design=designwhite)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(whitemodelcat)
##
## Call:
## svyglm(formula = evermarried ~ agegrp + bacheventtv1, design = designwhite,
## family = "binomial")
##
## Survey design:
## svydesign(ids = ~secu, strata = ~sest, weights = ~wgt2017_2019,
## data = pydatawhite, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.58366 0.18380 -24.938 < 2e-16 ***
## agegrp20-24 1.49366 0.19297 7.740 6.24e-10 ***
## agegrp25-29 1.97069 0.20205 9.754 7.12e-13 ***
## agegrp30-34 1.89582 0.20612 9.198 4.43e-12 ***
## agegrp35-39 1.57547 0.23656 6.660 2.68e-08 ***
## agegrp40-44 1.26562 0.38232 3.310 0.0018 **
## agegrp45-49 0.25017 0.82707 0.302 0.7636
## bacheventtv1 0.53492 0.08995 5.947 3.24e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9999374)
##
## Number of Fisher Scoring iterations: 7
#filter for blacks
pydatablack<-pydata%>%
filter(race=='NH Black Men')
#employ survey design
library(survey)
designwblack<-svydesign(ids = ~secu,
strata = ~sest,
weights=~wgt2017_2019,
data=pydatablack,
nest=T)
blackmodelcat <- svyglm(evermarried ~ agegrp+ bacheventtv1, family = "binomial", design=designwblack)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(blackmodelcat)
##
## Call:
## svyglm(formula = evermarried ~ agegrp + bacheventtv1, design = designwblack,
## family = "binomial")
##
## Survey design:
## svydesign(ids = ~secu, strata = ~sest, weights = ~wgt2017_2019,
## data = pydatablack, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.9226 0.3093 -15.917 < 2e-16 ***
## agegrp20-24 1.4852 0.3572 4.158 0.000142 ***
## agegrp25-29 1.7220 0.3554 4.845 1.54e-05 ***
## agegrp30-34 1.9484 0.3929 4.959 1.05e-05 ***
## agegrp35-39 1.6788 0.5012 3.349 0.001647 **
## agegrp40-44 0.7085 0.5931 1.195 0.238540
## agegrp45-49 -0.6999 1.1003 -0.636 0.527927
## bacheventtv1 0.6258 0.2877 2.175 0.034932 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.00177)
##
## Number of Fisher Scoring iterations: 7
whitemodel <- svyglm(evermarried ~ factor(ageevent) + bacheventtv1, family = "binomial", design=designwhite)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(whitemodel)
##
## Call:
## svyglm(formula = evermarried ~ factor(ageevent) + bacheventtv1,
## design = designwhite, family = "binomial")
##
## Survey design:
## svydesign(ids = ~secu, strata = ~sest, weights = ~wgt2017_2019,
## data = pydatawhite, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.64300 0.19395 -80.656 < 2e-16 ***
## factor(ageevent)16 10.92166 1.02500 10.655 1.88e-09 ***
## factor(ageevent)17 8.63370 0.56531 15.272 4.01e-12 ***
## factor(ageevent)18 11.42800 0.35387 32.294 < 2e-16 ***
## factor(ageevent)19 11.50596 0.30248 38.038 < 2e-16 ***
## factor(ageevent)20 12.02367 0.26334 45.659 < 2e-16 ***
## factor(ageevent)21 12.62631 0.26189 48.212 < 2e-16 ***
## factor(ageevent)22 12.54735 0.22750 55.153 < 2e-16 ***
## factor(ageevent)23 12.79541 0.23764 53.844 < 2e-16 ***
## factor(ageevent)24 12.71617 0.26189 48.556 < 2e-16 ***
## factor(ageevent)25 12.81299 0.24388 52.538 < 2e-16 ***
## factor(ageevent)26 13.08326 0.27405 47.740 < 2e-16 ***
## factor(ageevent)27 13.28442 0.22515 59.002 < 2e-16 ***
## factor(ageevent)28 13.03679 0.23938 54.461 < 2e-16 ***
## factor(ageevent)29 13.04545 0.23305 55.977 < 2e-16 ***
## factor(ageevent)30 12.97293 0.24721 52.477 < 2e-16 ***
## factor(ageevent)31 13.00493 0.30950 42.020 < 2e-16 ***
## factor(ageevent)32 13.02529 0.30928 42.115 < 2e-16 ***
## factor(ageevent)33 12.93682 0.23564 54.900 < 2e-16 ***
## factor(ageevent)34 12.91439 0.30934 41.748 < 2e-16 ***
## factor(ageevent)35 12.53168 0.40267 31.121 < 2e-16 ***
## factor(ageevent)36 13.17635 0.37589 35.054 < 2e-16 ***
## factor(ageevent)37 12.31615 0.44722 27.539 < 2e-16 ***
## factor(ageevent)38 12.67968 0.45761 27.709 < 2e-16 ***
## factor(ageevent)39 11.86523 0.62037 19.126 7.16e-14 ***
## factor(ageevent)40 13.10090 0.52609 24.902 5.73e-16 ***
## factor(ageevent)41 12.33916 0.52207 23.635 1.50e-15 ***
## factor(ageevent)42 11.27280 0.78906 14.286 1.29e-11 ***
## factor(ageevent)43 11.83152 0.82894 14.273 1.31e-11 ***
## factor(ageevent)44 11.09421 1.01228 10.960 1.18e-09 ***
## factor(ageevent)45 12.22257 0.78635 15.543 2.94e-12 ***
## factor(ageevent)46 10.77307 1.00789 10.689 1.78e-09 ***
## factor(ageevent)47 -0.04757 0.23116 -0.206 0.839
## factor(ageevent)48 -0.02009 0.25124 -0.080 0.937
## factor(ageevent)49 0.10060 0.24905 0.404 0.691
## bacheventtv1 0.48406 0.09439 5.128 5.98e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9944435)
##
## Number of Fisher Scoring iterations: 14
blackmodel <- svyglm(evermarried ~ factor(ageevent)+ bacheventtv1, family = "binomial", design=designwblack)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(blackmodel)
##
## Call:
## svyglm(formula = evermarried ~ factor(ageevent) + bacheventtv1,
## design = designwblack, family = "binomial")
##
## Survey design:
## svydesign(ids = ~secu, strata = ~sest, weights = ~wgt2017_2019,
## data = pydatablack, nest = T)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.58689 0.32186 -54.641 < 2e-16 ***
## factor(ageevent)16 14.18362 1.08741 13.044 2.78e-10 ***
## factor(ageevent)17 11.02850 0.79506 13.871 1.06e-10 ***
## factor(ageevent)18 12.35023 0.54553 22.639 3.91e-14 ***
## factor(ageevent)19 13.41308 0.52304 25.644 4.98e-15 ***
## factor(ageevent)20 14.02314 0.57234 24.502 1.06e-14 ***
## factor(ageevent)21 13.56901 0.40557 33.457 < 2e-16 ***
## factor(ageevent)22 13.87240 0.62272 22.277 5.10e-14 ***
## factor(ageevent)23 13.89547 0.46695 29.758 4.18e-16 ***
## factor(ageevent)24 14.95257 0.46269 32.317 < 2e-16 ***
## factor(ageevent)25 14.49797 0.36159 40.095 < 2e-16 ***
## factor(ageevent)26 13.89443 0.63818 21.772 7.44e-14 ***
## factor(ageevent)27 14.86185 0.45085 32.964 < 2e-16 ***
## factor(ageevent)28 13.57922 0.52624 25.804 4.49e-15 ***
## factor(ageevent)29 14.69234 0.42682 34.423 < 2e-16 ***
## factor(ageevent)30 15.19558 0.44793 33.924 < 2e-16 ***
## factor(ageevent)31 13.84265 0.74841 18.496 1.07e-12 ***
## factor(ageevent)32 14.49105 0.60298 24.032 1.46e-14 ***
## factor(ageevent)33 14.25222 0.51736 27.548 1.51e-15 ***
## factor(ageevent)34 14.73877 0.52875 27.875 1.24e-15 ***
## factor(ageevent)35 13.93540 0.62836 22.177 5.49e-14 ***
## factor(ageevent)36 14.67667 0.79554 18.449 1.12e-12 ***
## factor(ageevent)37 15.03104 0.68001 22.104 5.80e-14 ***
## factor(ageevent)38 13.87241 0.73702 18.822 8.05e-13 ***
## factor(ageevent)39 13.03937 1.05620 12.345 6.51e-10 ***
## factor(ageevent)40 13.43071 0.77989 17.221 3.40e-12 ***
## factor(ageevent)41 13.71458 1.09438 12.532 5.17e-10 ***
## factor(ageevent)42 14.17699 0.92140 15.386 2.07e-11 ***
## factor(ageevent)43 -0.13174 0.38807 -0.339 0.738
## factor(ageevent)44 -0.13211 0.39274 -0.336 0.741
## factor(ageevent)45 13.07587 1.00834 12.968 3.05e-10 ***
## factor(ageevent)46 -0.14548 0.42649 -0.341 0.737
## factor(ageevent)47 -0.13258 0.44133 -0.300 0.768
## factor(ageevent)48 -0.04527 0.52971 -0.085 0.933
## factor(ageevent)49 -0.06686 0.50545 -0.132 0.896
## bacheventtv1 0.55673 0.30222 1.842 0.083 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9728703)
##
## Number of Fisher Scoring iterations: 16
Here I calculate the predicted probabilities using weighted means. I calculate the phats for NH white and black men using their weighted means, then I swap the means using the white means for the black model and the black means for the white model.
#weighted predicted probabilities
newwhitedata <- pydatawhite %>%
group_by(ageevent) %>%
dplyr::summarize(bacheventtv1=weighted.mean(bacheventtv1,wgt2017_2019,na.rm = TRUE),n=n())
newwhitedata$phat <- predict(whitemodel, newwhitedata, type="response")
as.data.frame(newwhitedata)
## ageevent bacheventtv1 n phat
## 1 15 0.000000000 78 1.608163e-07
## 2 16 0.000000000 71 8.824615e-03
## 3 17 0.000000000 2325 9.026185e-04
## 4 18 0.000000000 2244 1.455723e-02
## 5 19 0.000000000 2141 1.571901e-02
## 6 20 0.002463456 2029 2.613138e-02
## 7 21 0.004149484 1936 4.676671e-02
## 8 22 0.048133357 1793 4.426214e-02
## 9 23 0.186357539 1660 5.967092e-02
## 10 24 0.275414376 1528 5.767535e-02
## 11 25 0.324431202 1383 6.458727e-02
## 12 26 0.345045659 1245 8.372925e-02
## 13 27 0.352063404 1119 1.008187e-01
## 14 28 0.342936438 994 8.015738e-02
## 15 29 0.343754279 878 8.082734e-02
## 16 30 0.358808404 778 7.611190e-02
## 17 31 0.345299320 688 7.792150e-02
## 18 32 0.333120468 603 7.896741e-02
## 19 33 0.321120336 514 7.237697e-02
## 20 34 0.316520014 429 7.073863e-02
## 21 35 0.313681261 380 4.929024e-02
## 22 36 0.335014456 330 9.075221e-02
## 23 37 0.287579928 282 3.963316e-02
## 24 38 0.274675930 250 5.570514e-02
## 25 39 0.256396389 212 2.524252e-02
## 26 40 0.280950682 189 8.270807e-02
## 27 41 0.267958041 160 4.015072e-02
## 28 42 0.264705208 137 1.417403e-02
## 29 43 0.280608899 112 2.470694e-02
## 30 44 0.294314448 95 1.205291e-02
## 31 45 0.287390482 80 3.621777e-02
## 32 46 0.274051554 63 8.686374e-03
## 33 47 0.206228594 47 1.694428e-07
## 34 48 0.102148101 29 1.656079e-07
## 35 49 0.034895322 16 1.808647e-07
newblackdata <- pydatablack %>%
group_by(ageevent) %>%
dplyr::summarize(bacheventtv1=weighted.mean(bacheventtv1,wgt2017_2019,na.rm = TRUE),n=n())
newblackdata$phat <- predict(blackmodel, newblackdata, type="response", se.fit = FALSE)
as.data.frame(newblackdata)
## ageevent bacheventtv1 n phat
## 1 15 0.00000000 32 2.302022e-08
## 2 16 0.00000000 33 3.219336e-02
## 3 17 0.00000000 889 1.416152e-03
## 4 18 0.00000000 835 5.289852e-03
## 5 19 0.00000000 784 1.516014e-02
## 6 20 0.00000000 735 2.755170e-02
## 7 21 0.00255432 693 1.769769e-02
## 8 22 0.02295590 651 2.408684e-02
## 9 23 0.06730008 619 2.523528e-02
## 10 24 0.11854169 577 7.120493e-02
## 11 25 0.14027244 527 4.693925e-02
## 12 26 0.15837934 474 2.648625e-02
## 13 27 0.17693694 437 6.745017e-02
## 14 28 0.19117968 395 1.981633e-02
## 15 29 0.19137694 345 5.797592e-02
## 16 30 0.17733780 303 9.173941e-02
## 17 31 0.18778020 261 2.558834e-02
## 18 32 0.18419645 235 4.772985e-02
## 19 33 0.20278867 220 3.835443e-02
## 20 34 0.16934375 193 5.987017e-02
## 21 35 0.16013393 168 2.758949e-02
## 22 36 0.15682518 153 5.609860e-02
## 23 37 0.12472773 137 7.681583e-02
## 24 38 0.13839131 118 2.564471e-02
## 25 39 0.14402949 108 1.134753e-02
## 26 40 0.12585182 93 1.652659e-02
## 27 41 0.16001604 79 2.224305e-02
## 28 42 0.16918474 69 3.503622e-02
## 29 43 0.16713494 57 2.214653e-08
## 30 44 0.17198014 51 2.219820e-08
## 31 45 0.17956506 45 1.199675e-02
## 32 46 0.14694798 36 2.160031e-08
## 33 47 0.12601381 29 2.162713e-08
## 34 48 0.08247742 19 2.303516e-08
## 35 49 0.15668918 11 2.349410e-08
#swap
#BLACK DATA WITH WHITE MODEL
whitemodel_blackdata<-pydatablack%>%
group_by(ageevent) %>%
dplyr::summarize(bacheventtv1=weighted.mean(bacheventtv1,wgt2017_2019,na.rm = TRUE),n=n())
whitemodel_blackdata$phat <- predict(whitemodel, newblackdata, type="response")
as.data.frame(whitemodel_blackdata)
## ageevent bacheventtv1 n phat
## 1 15 0.00000000 32 1.608163e-07
## 2 16 0.00000000 33 8.824615e-03
## 3 17 0.00000000 889 9.026185e-04
## 4 18 0.00000000 835 1.455723e-02
## 5 19 0.00000000 784 1.571901e-02
## 6 20 0.00000000 735 2.610105e-02
## 7 21 0.00255432 693 4.673230e-02
## 8 22 0.02295590 651 4.374943e-02
## 9 23 0.06730008 619 5.651812e-02
## 10 24 0.11854169 577 5.368431e-02
## 11 25 0.14027244 527 5.940614e-02
## 12 26 0.15837934 474 7.705283e-02
## 13 27 0.17693694 437 9.338965e-02
## 14 28 0.19117968 395 7.490543e-02
## 15 29 0.19137694 345 7.551411e-02
## 16 30 0.17733780 303 7.016037e-02
## 17 31 0.18778020 261 7.261638e-02
## 18 32 0.18419645 235 7.388094e-02
## 19 33 0.20278867 220 6.862426e-02
## 20 34 0.16934375 193 6.619640e-02
## 21 35 0.16013393 168 4.592168e-02
## 22 36 0.15682518 153 8.388165e-02
## 23 37 0.12472773 137 3.673923e-02
## 24 38 0.13839131 118 5.233500e-02
## 25 39 0.14402949 108 2.393819e-02
## 26 40 0.12585182 93 7.718773e-02
## 27 41 0.16001604 79 3.818475e-02
## 28 42 0.16918474 69 1.354225e-02
## 29 43 0.16713494 57 2.341736e-02
## 30 44 0.17198014 51 1.136778e-02
## 31 45 0.17956506 45 3.443935e-02
## 32 46 0.14694798 36 8.172285e-03
## 33 47 0.12601381 29 1.629897e-07
## 34 48 0.08247742 19 1.640385e-07
## 35 49 0.15668918 11 1.918482e-07
#WHITE DATA WITH BLACK MODEL
blackmodel_whitedata <- pydatawhite %>%
group_by(ageevent) %>%
dplyr::summarize(bacheventtv1=weighted.mean(bacheventtv1,wgt2017_2019,na.rm = TRUE),n=n())
blackmodel_whitedata$phat <- predict(blackmodel, newwhitedata, type="response")
as.data.frame(blackmodel_whitedata)
## ageevent bacheventtv1 n phat
## 1 15 0.000000000 78 2.302022e-08
## 2 16 0.000000000 71 3.219336e-02
## 3 17 0.000000000 2325 1.416152e-03
## 4 18 0.000000000 2244 5.289852e-03
## 5 19 0.000000000 2141 1.516014e-02
## 6 20 0.002463456 2029 2.758847e-02
## 7 21 0.004149484 1936 1.771314e-02
## 8 22 0.048133357 1793 2.441854e-02
## 9 23 0.186357539 1660 2.691807e-02
## 10 24 0.275414376 1528 7.720159e-02
## 11 25 0.324431202 1383 5.174487e-02
## 12 26 0.345045659 1245 2.930188e-02
## 13 27 0.352063404 1119 7.384757e-02
## 14 28 0.342936438 994 2.152571e-02
## 15 29 0.343754279 878 6.278653e-02
## 16 30 0.358808404 778 1.005119e-01
## 17 31 0.345299320 688 2.786829e-02
## 18 32 0.333120468 603 5.164274e-02
## 19 33 0.321120336 514 4.085953e-02
## 20 34 0.316520014 429 6.465182e-02
## 21 35 0.313681261 380 2.997788e-02
## 22 36 0.335014456 330 6.158881e-02
## 23 37 0.287579928 282 8.349707e-02
## 24 38 0.274675930 250 2.761036e-02
## 25 39 0.256396389 212 1.207124e-02
## 26 40 0.280950682 189 1.799023e-02
## 27 41 0.267958041 160 2.358822e-02
## 28 42 0.264705208 137 3.687927e-02
## 29 43 0.280608899 112 2.359075e-08
## 30 44 0.294314448 95 2.376273e-08
## 31 45 0.287390482 80 1.272952e-02
## 32 46 0.274051554 63 2.318417e-08
## 33 47 0.206228594 47 2.261484e-08
## 34 48 0.102148101 29 2.328881e-08
## 35 49 0.034895322 16 2.195386e-08
I calculate the probability of marrying before 50 (the sum of dx, highlighted in yellow) and carry out a decomposition in excel to show how much of the difference in first marriage rate between NH white men and NH black men is due to differences in whether or not they have a bachelor’s degree.