Assignment 3

Author

Jacob M. Souch

Published

November 20, 2023

Pre-processing

library(tidyverse)
Warning: package 'readr' was built under R version 4.3.2
Warning: package 'purrr' was built under R version 4.3.2
Warning: package 'dplyr' was built under R version 4.3.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(survival)
NSFGM1719 <- read_csv("C:/Users/jacob/Downloads/2017_2019_Male_Data.csv")
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)
Rows: 5206 Columns: 3009
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (1939): caseid, rscrninf, rscrage, rscrhisp, rscrrace, age_a, age_r, age...
lgl (1070): sedbclc3, sedbclc4, sedwhlc3, sedwhlc4, sedconlc4, p2currwife, p...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data <- NSFGM1719  %>% select(caseid, age_r, hisagem,havedeg,fmarital, evrmarry,agemarr,agemarrn,agemarrn2,agemarrn3,agemarrn4, expschl, hisprace2,rscrrace, fmarno, earnba_y, intvwyear,sest,secu, wgt2017_2019)
data0 <- data
tabyl(data0,evrmarry,hisprace2, show_na = TRUE)
 evrmarry   1    2   3   4
        0 905 1353 659 328
        1 463 1074 240 184
data0 <- data0 %>%   
  mutate(ageevent = pmin(hisagem, agemarrn, agemarrn2, agemarrn3, agemarrn4, na.rm=TRUE))


data0$ageevent <- ifelse(data0$evrmarry == 0, data0$age_r,data0$ageevent)


data0 <- data0 %>% filter((!ageevent %in%(c(98,NA))))
print("Age Event")
[1] "Age Event"
tabyl(data0$ageevent)
 data0$ageevent   n     percent
             15 186 0.036399217
             16 192 0.037573386
             17 234 0.045792564
             18 271 0.053033268
             19 276 0.054011742
             20 199 0.038943249
             21 264 0.051663405
             22 248 0.048532290
             23 251 0.049119374
             24 267 0.052250489
             25 284 0.055577299
             26 240 0.046966732
             27 234 0.045792564
             28 239 0.046771037
             29 206 0.040313112
             30 197 0.038551859
             31 165 0.032289628
             32 162 0.031702544
             33 146 0.028571429
             34 108 0.021135029
             35  92 0.018003914
             36  82 0.016046967
             37  73 0.014285714
             38  64 0.012524462
             39  57 0.011154599
             40  58 0.011350294
             41  44 0.008610568
             42  50 0.009784736
             43  32 0.006262231
             44  31 0.006066536
             45  30 0.005870841
             46  35 0.006849315
             47  33 0.006457926
             48  29 0.005675147
             49  31 0.006066536
hist(data0$ageevent)

print("Ever Married")
[1] "Ever Married"
tabyl(data0$evrmarry)
 data0$evrmarry    n   percent
              0 3245 0.6350294
              1 1865 0.3649706
table(data0$ageevent, data0$evrmarry)
    
       0   1
  15 186   0
  16 186   6
  17 223  11
  18 229  42
  19 206  70
  20 115  84
  21 128 136
  22 127 121
  23 120 131
  24 128 139
  25 121 163
  26 104 136
  27 117 117
  28 130 109
  29  89 117
  30 109  88
  31  95  70
  32  99  63
  33  89  57
  34  61  47
  35  63  29
  36  51  31
  37  46  27
  38  49  15
  39  50   7
  40  42  16
  41  35   9
  42  42   8
  43  27   5
  44  27   4
  45  26   4
  46  32   3
  47  33   0
  48  29   0
  49  31   0
data0 <- data0 %>% filter(hisprace2 %in% c(2,3))
#How many years before interview year did the respondent earn a bachelor's degree?

 data0$ageba <- data0$age_r - (data0$intvwyear - data0$earnba_y)
 

hist(data0$ageba)

tabyl(data0$ageba)
 data0$ageba    n     percent valid_percent
          19    3 0.000921093   0.003726708
          20    7 0.002149217   0.008695652
          21   97 0.029782008   0.120496894
          22  253 0.077678846   0.314285714
          23  169 0.051888241   0.209937888
          24   87 0.026711698   0.108074534
          25   32 0.009824992   0.039751553
          26   31 0.009517961   0.038509317
          27   16 0.004912496   0.019875776
          28   15 0.004605465   0.018633540
          29   15 0.004605465   0.018633540
          30   14 0.004298434   0.017391304
          31   15 0.004605465   0.018633540
          32   12 0.003684372   0.014906832
          33   11 0.003377341   0.013664596
          34    4 0.001228124   0.004968944
          35    7 0.002149217   0.008695652
          36    2 0.000614062   0.002484472
          37    4 0.001228124   0.004968944
          39    1 0.000307031   0.001242236
          40    4 0.001228124   0.004968944
          44    3 0.000921093   0.003726708
          45    2 0.000614062   0.002484472
          46    1 0.000307031   0.001242236
          NA 2452 0.752840037            NA
summary(data0$ageba)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  19.00   22.00   23.00   24.06   24.00   46.00    2452 

The histogram shows that the median age to earn a bachelor’s degree was 23 years. This is valid and an expected result.

Marriage BA/BS Configuration Pseudocode TVBA Value
Never Married No BA/BS 1 evrmarry = 0; NA 0
Never Married BA/BS 2 evrmarry = 0; ageba 1
Married No BA/BS 3 evrmarry = 0; NA 0
Married Married, BA/BS 4 evrmarry = 1; ageevent < ageba 0
Married BA/BS, Married 5 evrmarry = 1; ageevent > ageba 1
Married Married = BA/BS 6 evrmarry = 1; ageevent == ageba 0
#degmar assigns configurations 

data0 <- data0 %>%
  mutate(degmar = case_when(( evrmarry == 0) & (is.na(ageba)) ~ 1,
                             (evrmarry == 0) & (!is.na(ageba)) ~ 2,
                             (evrmarry == 1) & (is.na(ageba)) ~ 3,
                             (evrmarry == 1) & (!is.na(ageba)) & (ageevent < ageba) ~ 4,
                             (evrmarry == 1) & (!is.na(ageba)) & (ageevent > ageba) ~ 5,
                             (evrmarry == 1) & (!is.na(ageba)) & (ageevent == ageba) ~ 6))
pydata<-survSplit(data0, cut=c(15:49),end="ageevent",event="evrmarry")

pydata$babs <- is.na(pydata$ageba)
degmar Description Temporal Order Event Value
1 Unmarried, No BA/BS N/A 0
2 Unmarried, BA/BS ageevent(age_r) >= ageba 1
2 Unmarried, BA/BS ageevent(age_r) < ageba 0
3 Married, No BA/BS Married, N/A 0
4 Married, BA/BS Married, BA/BS 0
5 Married, BA/BS BA/BS, Married 1
5 Married, BA/BS Married, BA/BS 0
6 Married, BA/BS ageevent >= ageba 1
6 Married, BA/BS ageevent < ageba 0
pydata$tvba <- case_when(pydata$degmar == 1 ~ 0,
                               pydata$degmar == 3 ~ 0,
                               pydata$degmar == 4 ~ 0,
                               pydata$degmar == 2 & (pydata$ageevent >= pydata$ageba) ~ 1,
                               pydata$degmar == 2 & (pydata$ageevent < pydata$ageba) ~ 0,
                               pydata$degmar == 5 & (pydata$ageevent >= pydata$ageba) ~ 1,
                               pydata$degmar == 5 & (pydata$ageevent < pydata$ageba) ~ 0,
                               pydata$degmar == 6 & (pydata$ageevent >= pydata$ageba) ~ 1,
                               pydata$degmar == 6 & (pydata$ageevent < pydata$ageba) ~ 0)
pydata$tvba_test <- case_when(pydata$ageevent >= pydata$ageba ~ 1,
                              pydata$ageevent < pydata$ageba ~  0,
                             # pydata$ageevent == pydata$ageba ~ 0,
                              is.na(pydata$ageba) ~0)
raceprobs<- pydata %>%
    dplyr::group_by(ageevent, hisprace2) %>%
    dplyr::summarize(p=weighted.mean(evrmarry,wgt2017_2019,na.rm = TRUE),n=n())
`summarise()` has grouped output by 'ageevent'. You can override using the
`.groups` argument.
raceprobs$num <- raceprobs$p*(1-raceprobs$p)

raceprobs$sep <- sqrt(raceprobs$num/raceprobs$n)

raceprobs$me <- 2*raceprobs$sep
  
raceprobs$hisprace2 <- car::Recode(raceprobs$hisprace2, recodes="2='White Men'; 3='Black Men'; else=NA",
                       as.factor=T)

ggplot(data =raceprobs, aes(x = ageevent, y = p, ymin=p-me, ymax=p+me, fill=hisprace2,color = hisprace2, group = hisprace2)) +
     geom_line() + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="Adjusted First Marriage Rates by Age and Race", 
       subtitle="Men Ages 15 to 49", 
       caption="Source: 2017-2019 NSFG Male Respondent File") +
       xlab(label="Age at First Marriage") +
  ylab(label="Adjusted First Marriage Rate")


White men marry at higher rates than Black at most ages.

educprobs<- pydata %>%
    group_by(ageevent, tvba) %>%
    dplyr::summarize(p=weighted.mean(evrmarry,wgt2017_2019,na.rm = TRUE),n=n())
`summarise()` has grouped output by 'ageevent'. You can override using the
`.groups` argument.
educprobs<- pydata %>%
    group_by(ageevent, tvba) %>%
    dplyr::summarize(p=weighted.mean(evrmarry,wgt2017_2019,na.rm = TRUE),n=n())
`summarise()` has grouped output by 'ageevent'. You can override using the
`.groups` argument.
educprobs$num <- educprobs$p*(1-educprobs$p)

educprobs$sep <- sqrt(educprobs$num/educprobs$n)

educprobs$me <- 2*educprobs$sep

educprobs$babs <- case_when(educprobs$tvba == 1 ~ "BA/BS Degree",educprobs$tvba  == 0 ~ "No College Degree")

educprobs$tvba <- as.factor(educprobs$tvba)

ggplot(data =educprobs, aes(x = ageevent, y = p, ymin=p-me, ymax=p+me, fill=tvba,color = tvba, group = tvba)) +
     geom_line() + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="Adjusted First Marriage Rates by Age and Education (Time-Varying Education )", 
       subtitle="Men Ages 15 to 49", 
       caption="Source: 2017-2019 NSFG Male Respondent File") +
       xlab(label="Age at First Marriage") +
  ylab(label="Adjusted First Marriage Rate")

Across lifespan, those with bachelor’s degrees tend to marry at higher rates than those without.

educprobs<- pydata %>%
    group_by(ageevent, babs) %>%
    dplyr::summarize(p=weighted.mean(evrmarry,wgt2017_2019,na.rm = TRUE),n=n())
`summarise()` has grouped output by 'ageevent'. You can override using the
`.groups` argument.
educprobs$babs <- case_when(educprobs$babs == "FALSE" ~ "BA/BS Degree",educprobs$babs == "TRUE" ~ "No College Degree")

educprobs$num <- educprobs$p*(1-educprobs$p)

educprobs$sep <- sqrt(educprobs$num/educprobs$n)

educprobs$me <- 2*educprobs$sep


ggplot(data =educprobs, aes(x = ageevent, y = p, ymin=p-me, ymax=p+me, fill= babs,color = babs, group = babs)) +
     geom_line() + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="Adjusted First Marriage Rates by Age and Education", 
       subtitle="Men Ages 15 to 49", 
       caption="Source: 2017-2019 NSFG Male Respondent File") +
       xlab(label="Age at First Marriage") +
  ylab(label="Adjusted First Marriage Rate")

raceeducprobs<- pydata %>%
    group_by(ageevent, hisprace2, tvba) %>%
    dplyr::summarize(p=weighted.mean(evrmarry,wgt2017_2019,na.rm = TRUE),n=n())
`summarise()` has grouped output by 'ageevent', 'hisprace2'. You can override
using the `.groups` argument.
raceeducprobs$num <- raceeducprobs$p*(1-raceeducprobs$p)

raceeducprobs$sep <- sqrt(raceeducprobs$num/raceeducprobs$n)

raceeducprobs$me <- 2*raceeducprobs$sep
  
raceeducprobs$tvba <- case_when(raceeducprobs$tvba == 1 ~ "BA/BS Degree",raceeducprobs$tvba == 0 ~ "No College Degree")

raceeducprobs$hisprace2 <- case_when(raceeducprobs$hisprace2 == 2 ~ "White Men",raceeducprobs$hisprace2 == 3 ~ "Black Men")

ggplot(data =raceeducprobs, aes(x = ageevent, y = p, ymin=p-me, ymax=p+me, fill=tvba,color = tvba, group = tvba)) +
     geom_line() + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="Adjusted First Marriage Rates by Race and Education (Time Varying)", 
       subtitle="Men Ages 15 to 49", 
       caption="Source: 2017-2019 NSFG Male Respondent File") +
       xlab(label="Age at First Marriage") +
  ylab(label="Ajusted First Marriage Rate") + facet_grid(~hisprace2)

Discrete-Time Logit Models

pydata$new_weights <- pydata$wgt2017_2019/mean(pydata$wgt2017_2019)  
pydata$age_cat <-
  car::Recode(pydata$ageevent, recodes = 
              "15:19 = '15-19';
              20:24 = '20-24';
              25:29 = '25-29';
              30:34 = '30-34';
              35:39 = '35-39';
              40:44 = '40-44';
              45:49 = '45-49';
              else=NA", as.factor = T)

Model 1: First Marriage by Age, Race/Ethnicity

m1 <- glm(evrmarry~ age_cat + factor(hisprace2), family = "binomial", weights=new_weights, data = pydata) 
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m1)

Call:
glm(formula = evrmarry ~ age_cat + factor(hisprace2), family = "binomial", 
    data = pydata, weights = new_weights)

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)        -5.09536    0.10624 -47.962  < 2e-16 ***
age_cat20-24        2.06148    0.11493  17.937  < 2e-16 ***
age_cat25-29        2.69533    0.11442  23.556  < 2e-16 ***
age_cat30-34        2.64754    0.12348  21.440  < 2e-16 ***
age_cat35-39        2.32274    0.15183  15.298  < 2e-16 ***
age_cat40-44        2.10117    0.21108   9.954  < 2e-16 ***
age_cat45-49        0.90428    0.55708   1.623    0.105    
factor(hisprace2)3 -0.47912    0.07779  -6.159 7.33e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 13433  on 41591  degrees of freedom
Residual deviance: 12321  on 41584  degrees of freedom
AIC: 12429

Number of Fisher Scoring iterations: 7
exp(coef(m1))
       (Intercept)       age_cat20-24       age_cat25-29       age_cat30-34 
       0.006125076        7.857607557       14.810466192       14.119241199 
      age_cat35-39       age_cat40-44       age_cat45-49 factor(hisprace2)3 
      10.203621111        8.175702971        2.470153696        0.619328163 

Model 2: First Marriage by Age, Race/Ethnicity and Degree Status

m2 <- glm(evrmarry ~ age_cat + factor(hisprace2) + babs, family = "binomial", weights=new_weights, data = pydata) 
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m2)

Call:
glm(formula = evrmarry ~ age_cat + factor(hisprace2) + babs, 
    family = "binomial", data = pydata, weights = new_weights)

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)        -5.00103    0.11139 -44.895  < 2e-16 ***
age_cat20-24        2.05080    0.11500  17.834  < 2e-16 ***
age_cat25-29        2.68264    0.11452  23.425  < 2e-16 ***
age_cat30-34        2.64021    0.12352  21.375  < 2e-16 ***
age_cat35-39        2.32296    0.15184  15.299  < 2e-16 ***
age_cat40-44        2.10505    0.21110   9.972  < 2e-16 ***
age_cat45-49        0.91829    0.55711   1.648  0.09929 .  
factor(hisprace2)3 -0.44926    0.07859  -5.716 1.09e-08 ***
babsTRUE           -0.14676    0.05346  -2.745  0.00605 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 13433  on 41591  degrees of freedom
Residual deviance: 12313  on 41583  degrees of freedom
AIC: 12422

Number of Fisher Scoring iterations: 7
exp(coef(m2))
       (Intercept)       age_cat20-24       age_cat25-29       age_cat30-34 
       0.006730984        7.774121804       14.623583829       14.016204719 
      age_cat35-39       age_cat40-44       age_cat45-49 factor(hisprace2)3 
      10.205875841        8.207497601        2.505010107        0.638102549 
          babsTRUE 
       0.863503660 

Model 3: Model 2: First Marriage by Age, Race/Ethnicity and Time-Varying Degree

m3 <- glm(evrmarry ~ age_cat + factor(hisprace2) + tvba, family = "binomial", weights=new_weights, data = pydata) 
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m3)

Call:
glm(formula = evrmarry ~ age_cat + factor(hisprace2) + tvba, 
    family = "binomial", data = pydata, weights = new_weights)

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)        -5.10609    0.10631 -48.031  < 2e-16 ***
age_cat20-24        1.96850    0.11561  17.027  < 2e-16 ***
age_cat25-29        2.49191    0.11730  21.243  < 2e-16 ***
age_cat30-34        2.44622    0.12614  19.392  < 2e-16 ***
age_cat35-39        2.13813    0.15377  13.904  < 2e-16 ***
age_cat40-44        1.92692    0.21247   9.069  < 2e-16 ***
age_cat45-49        0.76649    0.55756   1.375    0.169    
factor(hisprace2)3 -0.39209    0.07855  -4.992 5.99e-07 ***
tvba                0.51762    0.05798   8.928  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 13433  on 41591  degrees of freedom
Residual deviance: 12244  on 41583  degrees of freedom
AIC: 12345

Number of Fisher Scoring iterations: 7
exp(coef(m3))
       (Intercept)       age_cat20-24       age_cat25-29       age_cat30-34 
       0.006059707        7.159947125       12.084303960       11.544610597 
      age_cat35-39       age_cat40-44       age_cat45-49 factor(hisprace2)3 
       8.483570054        6.868331984        2.152208945        0.675644327 
              tvba 
       1.678025123 
pydatarace <- split.data.frame(pydata, f = pydata$hisprace2)


whitepy <- pydatarace$`2`
blackpy <- pydatarace$`3`

Model 4: White Model, White Data

m4 <- glm(evrmarry ~ age_cat + tvba, family = "binomial", weights=new_weights, data = whitepy) 
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m4)

Call:
glm(formula = evrmarry ~ age_cat + tvba, family = "binomial", 
    data = whitepy, weights = new_weights)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)  -5.10186    0.11292 -45.180   <2e-16 ***
age_cat20-24  1.95864    0.12346  15.864   <2e-16 ***
age_cat25-29  2.50161    0.12535  19.957   <2e-16 ***
age_cat30-34  2.42261    0.13543  17.888   <2e-16 ***
age_cat35-39  2.09886    0.16723  12.551   <2e-16 ***
age_cat40-44  2.03891    0.22448   9.083   <2e-16 ***
age_cat45-49  0.91816    0.58801   1.561    0.118    
tvba          0.51497    0.06087   8.461   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 11566  on 30309  degrees of freedom
Residual deviance: 10546  on 30302  degrees of freedom
AIC: 10722

Number of Fisher Scoring iterations: 7
exp(coef(m4))
 (Intercept) age_cat20-24 age_cat25-29 age_cat30-34 age_cat35-39 age_cat40-44 
 0.006085393  7.089658521 12.202168051 11.275287371  8.156887756  7.682265715 
age_cat45-49         tvba 
 2.504681283  1.673595082 

Model 5: Black Model, Black Data

m5 <- glm(evrmarry ~ age_cat + tvba, family = "binomial", weights=new_weights, data = blackpy) 
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m5)

Call:
glm(formula = evrmarry ~ age_cat + tvba, family = "binomial", 
    data = blackpy, weights = new_weights)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)  -5.52834    0.30422 -18.172  < 2e-16 ***
age_cat20-24  2.04001    0.32991   6.184 6.27e-10 ***
age_cat25-29  2.42096    0.33390   7.250 4.15e-13 ***
age_cat30-34  2.59231    0.34856   7.437 1.03e-13 ***
age_cat35-39  2.34178    0.39916   5.867 4.44e-09 ***
age_cat40-44  1.21153    0.67723   1.789   0.0736 .  
age_cat45-49 -0.05683    1.77633  -0.032   0.9745    
tvba          0.54577    0.19258   2.834   0.0046 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1829.7  on 11281  degrees of freedom
Residual deviance: 1693.2  on 11274  degrees of freedom
AIC: 1631.3

Number of Fisher Scoring iterations: 8
exp(coef(m5))
 (Intercept) age_cat20-24 age_cat25-29 age_cat30-34 age_cat35-39 age_cat40-44 
 0.003972563  7.690692989 11.256714024 13.360631549 10.399740898  3.358625046 
age_cat45-49         tvba 
 0.944750012  1.725939661 

Matched Predicted Probabilities

PP1: White Model, White Data

pprobs_w <- expand.grid(age_cat=c("15-19","20-24","25-29","30-34","35-39","40-44","45-49"),tvba=0:1)

pprobs_w$phat <- predict(m4, pprobs_w, type="response")

pprobs_w
   age_cat tvba        phat
1    15-19    0 0.006048585
2    20-24    0 0.041358991
3    25-29    0 0.069122309
4    30-34    0 0.064208888
5    35-39    0 0.047290468
6    40-44    0 0.044661688
7    45-49    0 0.015013139
8    15-19    1 0.010081805
9    20-24    1 0.067342106
10   25-29    1 0.110536144
11   30-34    1 0.103004646
12   35-39    1 0.076701787
13   40-44    1 0.072562615
14   45-49    1 0.024874367

PP2: Black Model, Black Data

pprobs_b <- expand.grid(age_cat=c("15-19","20-24","25-29","30-34","35-39","40-44","45-49"),tvba=0:1)

pprobs_b$phat <- predict(m5, pprobs_b, type="response")

pprobs_b
   age_cat tvba        phat
1    15-19    0 0.003956844
2    20-24    0 0.029646024
3    25-29    0 0.042803900
4    30-34    0 0.050400876
5    35-39    0 0.039674527
6    40-44    0 0.013166675
7    45-49    0 0.003739046
8    15-19    1 0.006809714
9    20-24    1 0.050089266
10   25-29    1 0.071650548
11   30-34    1 0.083918463
12   35-39    1 0.066558858
13   40-44    1 0.022509734
14   45-49    1 0.006435899
pprobs_b$tvba <- recode(pprobs_b$tvba, `1` = "Bachelor's Degree", `0`=  "No Bachelor's Degree")

ggplot(pprobs_b, aes(x = age_cat, y = phat, group=tvba, fill=tvba,color = tvba)) +
  geom_point() +
  geom_line() +
  labs(x = "Age", y = "Predicted First Marriage Probability", title = "Predicted First Marriage Probabilities by Education for Black Men")

PP3: Weighted White Probabilities

wtddata_w <- whitepy %>%                           
  group_by(age_cat) %>% 
  dplyr::summarize(tvba=weighted.mean(tvba,wgt2017_2019,na.rm = TRUE),n=n())

wtddata_w$phat <- predict(m4, wtddata_w, type="response")

as.data.frame(wtddata_w)
  age_cat         tvba     n        phat
1   15-19 0.0004530251 11139 0.006049987
2   20-24 0.1590766209  8611 0.044731842
3   25-29 0.3684261830  5423 0.082373970
4   30-34 0.3642379080  2891 0.076443677
5   35-39 0.3322900790  1391 0.055625528
6   40-44 0.3039027132   646 0.051835760
7   45-49 0.2210554539   209 0.016792882

PP4: Weighted Black Probabilities

wtddata_b <- blackpy %>%                           
  group_by(age_cat) %>% 
  dplyr::summarize(tvba=weighted.mean(tvba,wgt2017_2019,na.rm = TRUE),n=n())

wtddata_b$phat <- predict(m5, wtddata_b, type="response")

as.data.frame(wtddata_b)
  age_cat       tvba    n        phat
1   15-19 0.00000000 4028 0.003956844
2   20-24 0.06555702 3021 0.030692792
3   25-29 0.16775968 2009 0.046716248
4   30-34 0.16704397 1114 0.054947438
5   35-39 0.13738011  639 0.042631917
6   40-44 0.15986562  336 0.014349850
7   45-49 0.14881902  135 0.004054127

PP5: Crossed Predicted Probabilities: Black Model, White Data

wtddata_bxw <- whitepy %>%                           
  group_by(age_cat) %>% 
  dplyr::summarize(tvba=weighted.mean(tvba,wgt2017_2019,na.rm = TRUE),n=n())

wtddata_bxw$phat <- predict(m5, wtddata_bxw, type="response")

as.data.frame(wtddata_bxw)
  age_cat         tvba     n        phat
1   15-19 0.0004530251 11139 0.003957819
2   20-24 0.1590766209  8611 0.032248201
3   25-29 0.3684261830  5423 0.051842886
4   30-34 0.3642379080  2891 0.060811308
5   35-39 0.3322900790  1391 0.047191128
6   40-44 0.3039027132   646 0.015505238
7   45-49 0.2210554539   209 0.004216464

PP6: Crossed Predicted Probabilities: White Model, Black Data

wtddata_wxb <- blackpy %>%                           
  group_by(age_cat) %>% 
  dplyr::summarize(tvba=weighted.mean(tvba,wgt2017_2019,na.rm = TRUE),n=n())

wtddata_wxb$phat <- predict(m4, wtddata_wxb, type="response")

as.data.frame(wtddata_wxb)
  age_cat       tvba    n        phat
1   15-19 0.00000000 4028 0.006048585
2   20-24 0.06555702 3021 0.042718449
3   25-29 0.16775968 2009 0.074892341
4   30-34 0.16704397 1114 0.069575572
5   35-39 0.13738011  639 0.050581964
6   40-44 0.15986562  336 0.048308985
7   45-49 0.14881902  135 0.016189593
dfs <- list(wtddata_w,wtddata_b,wtddata_wxb,wtddata_bxw)

Life Table: White Model, White Data

d = wtddata_w

d$Lx[1] <- 1
Warning: Unknown or uninitialised column: `Lx`.
d$Dx[1] <- d$Lx[1] * d$phat[1]
Warning: Unknown or uninitialised column: `Dx`.
for (i in 2:nrow(d)) {
  d$Dx[i] <- d$Lx[i] * d$phat[i]
  d$Lx[i] <- d$Lx[i-1] * d$Dx[i-1]
  
}
LT_wxw <- as.data.frame(d)
LT_wxw
  age_cat         tvba     n        phat           Lx          Dx
1   15-19 0.0004530251 11139 0.006049987 1.000000e+00 0.006049987
2   20-24 0.1590766209  8611 0.044731842 6.049987e-03 0.044731842
3   25-29 0.3684261830  5423 0.082373970 2.706271e-04 0.082373970
4   30-34 0.3642379080  2891 0.076443677 2.229263e-05 0.076443677
5   35-39 0.3322900790  1391 0.055625528 1.704130e-06 0.055625528
6   40-44 0.3039027132   646 0.051835760 9.479315e-08 0.051835760
7   45-49 0.2210554539   209 0.016792882 4.913675e-09 0.016792882

Life Table: Black Model, Black Data

d = wtddata_b

d$Lx[1] <- 1
Warning: Unknown or uninitialised column: `Lx`.
d$Dx[1] <- d$Lx[1] * d$phat[1]
Warning: Unknown or uninitialised column: `Dx`.
for (i in 2:nrow(d)) {
  d$Dx[i] <- d$Lx[i] * d$phat[i]
  d$Lx[i] <- d$Lx[i-1] * d$Dx[i-1]
  
}
LT_bxb <- as.data.frame(d)
LT_bxb
  age_cat       tvba    n        phat           Lx          Dx
1   15-19 0.00000000 4028 0.003956844 1.000000e+00 0.003956844
2   20-24 0.06555702 3021 0.030692792 3.956844e-03 0.030692792
3   25-29 0.16775968 2009 0.046716248 1.214466e-04 0.046716248
4   30-34 0.16704397 1114 0.054947438 5.673529e-06 0.054947438
5   35-39 0.13738011  639 0.042631917 3.117459e-07 0.042631917
6   40-44 0.15986562  336 0.014349850 1.329033e-08 0.014349850
7   45-49 0.14881902  135 0.004054127 1.907142e-10 0.004054127

Life Table: White Model, Black Data

d = wtddata_wxb


d$Lx[1] <- 1
Warning: Unknown or uninitialised column: `Lx`.
d$Lx[1] <- 1

d$Dx[1] <- d$Lx[1] * d$phat[1]
Warning: Unknown or uninitialised column: `Dx`.
for (i in 2:nrow(d)) {
  d$Dx[i] <- d$Lx[i] * d$phat[i]
  d$Lx[i] <- d$Lx[i-1] * d$Dx[i-1]
  
}
LT_wxb <- as.data.frame(d)
LT_wxb
  age_cat       tvba    n        phat           Lx          Dx
1   15-19 0.00000000 4028 0.006048585 1.000000e+00 0.006048585
2   20-24 0.06555702 3021 0.042718449 6.048585e-03 0.042718449
3   25-29 0.16775968 2009 0.074892341 2.583862e-04 0.074892341
4   30-34 0.16704397 1114 0.069575572 1.935114e-05 0.069575572
5   35-39 0.13738011  639 0.050581964 1.346367e-06 0.050581964
6   40-44 0.15986562  336 0.048308985 6.810188e-08 0.048308985
7   45-49 0.14881902  135 0.016189593 3.289933e-09 0.016189593

Life Table: Black Model, White Data

d = wtddata_bxw

d$Lx[1] <- 1
Warning: Unknown or uninitialised column: `Lx`.
d$Dx[1] <- d$Lx[1] * d$phat[1]
Warning: Unknown or uninitialised column: `Dx`.
for (i in 2:nrow(d)) {
  d$Dx[i] <- d$Lx[i] * d$phat[i]
  d$Lx[i] <- d$Lx[i-1] * d$Dx[i-1]
  
}
LT_bxw <- as.data.frame(d)
LT_bxw
  age_cat         tvba     n        phat           Lx          Dx
1   15-19 0.0004530251 11139 0.003957819 1.000000e+00 0.003957819
2   20-24 0.1590766209  8611 0.032248201 3.957819e-03 0.032248201
3   25-29 0.3684261830  5423 0.051842886 1.276325e-04 0.051842886
4   30-34 0.3642379080  2891 0.060811308 6.616839e-06 0.060811308
5   35-39 0.3322900790  1391 0.047191128 4.023786e-07 0.047191128
6   40-44 0.3039027132   646 0.015505238 1.898870e-08 0.015505238
7   45-49 0.2210554539   209 0.004216464 2.944243e-10 0.004216464
v1 = (sum(LT_bxw$Dx) - sum(LT_bxb$Dx)) / (sum(LT_wxw$Dx) - sum(LT_bxb$Dx))

v2 = (sum(LT_wxw$Dx)-sum(LT_wxb$Dx)) / (sum(LT_wxw$Dx)-sum(LT_bxb$Dx))
gap <-mean(v1,v2)

1-gap
[1] 0.8650313

Decomposition analysis showed that 13.5% of the difference in age-adjusted first marriage rates was due to differences in educational attainment of bachelor’s degree.

Single-Year Age Groups

Model 1a: First Marriage by Age, Race/Ethnicity

m1a <- glm(evrmarry~ factor(ageevent) + factor(hisprace2), family = "binomial", weights=new_weights, data = pydata) 
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m1a)

Call:
glm(formula = evrmarry ~ factor(ageevent) + factor(hisprace2), 
    family = "binomial", data = pydata, weights = new_weights)

Coefficients:
                    Estimate Std. Error z value Pr(>|z|)    
(Intercept)         -18.6195   120.0266  -0.155    0.877    
factor(ageevent)16    9.9576   120.0346   0.083    0.934    
factor(ageevent)17   11.8227   120.0279   0.098    0.922    
factor(ageevent)18   14.3925   120.0267   0.120    0.905    
factor(ageevent)19   14.5123   120.0267   0.121    0.904    
factor(ageevent)20   15.1136   120.0267   0.126    0.900    
factor(ageevent)21   15.5132   120.0267   0.129    0.897    
factor(ageevent)22   15.5458   120.0267   0.130    0.897    
factor(ageevent)23   15.7665   120.0267   0.131    0.895    
factor(ageevent)24   15.9591   120.0267   0.133    0.894    
factor(ageevent)25   16.0144   120.0267   0.133    0.894    
factor(ageevent)26   16.2365   120.0267   0.135    0.892    
factor(ageevent)27   16.4904   120.0267   0.137    0.891    
factor(ageevent)28   16.0918   120.0267   0.134    0.893    
factor(ageevent)29   16.2436   120.0267   0.135    0.892    
factor(ageevent)30   16.2934   120.0267   0.136    0.892    
factor(ageevent)31   16.1718   120.0267   0.135    0.893    
factor(ageevent)32   16.2523   120.0267   0.135    0.892    
factor(ageevent)33   15.9882   120.0268   0.133    0.894    
factor(ageevent)34   15.9980   120.0268   0.133    0.894    
factor(ageevent)35   15.6497   120.0268   0.130    0.896    
factor(ageevent)36   16.3547   120.0268   0.136    0.892    
factor(ageevent)37   15.7793   120.0269   0.131    0.895    
factor(ageevent)38   15.8053   120.0269   0.132    0.895    
factor(ageevent)39   14.9812   120.0274   0.125    0.901    
factor(ageevent)40   16.0764   120.0270   0.134    0.893    
factor(ageevent)41   15.4957   120.0273   0.129    0.897    
factor(ageevent)42   14.7309   120.0283   0.123    0.902    
factor(ageevent)43   16.1002   120.0272   0.134    0.893    
factor(ageevent)44   14.1291   120.0310   0.118    0.906    
factor(ageevent)45   15.3775   120.0281   0.128    0.898    
factor(ageevent)46   13.8339   120.0351   0.115    0.908    
factor(ageevent)47    0.1860   894.1825   0.000    1.000    
factor(ageevent)48    0.1807  1144.0146   0.000    1.000    
factor(ageevent)49    0.2201  1591.8960   0.000    1.000    
factor(hisprace2)3   -0.4794     0.0779  -6.155 7.53e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 13433  on 41591  degrees of freedom
Residual deviance: 12091  on 41556  degrees of freedom
AIC: 12250

Number of Fisher Scoring iterations: 17

Model 2a: First Marriage by Age, Race/Ethnicity and Degree Status

m2a <- glm(evrmarry ~ factor(ageevent) + factor(hisprace2) + babs, family = "binomial", weights=new_weights, data = pydata) 
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m2a)

Call:
glm(formula = evrmarry ~ factor(ageevent) + factor(hisprace2) + 
    babs, family = "binomial", data = pydata, weights = new_weights)

Coefficients:
                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)         -18.53173  119.98899  -0.154   0.8773    
factor(ageevent)16    9.95583  119.99694   0.083   0.9339    
factor(ageevent)17   11.82008  119.99025   0.099   0.9215    
factor(ageevent)18   14.38865  119.98909   0.120   0.9045    
factor(ageevent)19   14.50642  119.98908   0.121   0.9038    
factor(ageevent)20   15.10553  119.98904   0.126   0.8998    
factor(ageevent)21   15.50254  119.98903   0.129   0.8972    
factor(ageevent)22   15.53192  119.98903   0.129   0.8970    
factor(ageevent)23   15.75120  119.98903   0.131   0.8956    
factor(ageevent)24   15.94309  119.98902   0.133   0.8943    
factor(ageevent)25   15.99864  119.98903   0.133   0.8939    
factor(ageevent)26   16.22078  119.98902   0.135   0.8925    
factor(ageevent)27   16.47671  119.98902   0.137   0.8908    
factor(ageevent)28   16.07921  119.98904   0.134   0.8934    
factor(ageevent)29   16.23114  119.98904   0.135   0.8924    
factor(ageevent)30   16.28093  119.98905   0.136   0.8921    
factor(ageevent)31   16.16085  119.98906   0.135   0.8929    
factor(ageevent)32   16.24404  119.98907   0.135   0.8923    
factor(ageevent)33   15.98059  119.98911   0.133   0.8940    
factor(ageevent)34   15.99265  119.98913   0.133   0.8940    
factor(ageevent)35   15.64530  119.98921   0.130   0.8963    
factor(ageevent)36   16.34769  119.98912   0.136   0.8916    
factor(ageevent)37   15.77880  119.98927   0.132   0.8954    
factor(ageevent)38   15.80559  119.98930   0.132   0.8952    
factor(ageevent)39   14.98249  119.98978   0.125   0.9006    
factor(ageevent)40   16.07684  119.98932   0.134   0.8934    
factor(ageevent)41   15.49679  119.98967   0.129   0.8972    
factor(ageevent)42   14.73134  119.99064   0.123   0.9023    
factor(ageevent)43   16.09888  119.98954   0.134   0.8933    
factor(ageevent)44   14.13154  119.99337   0.118   0.9063    
factor(ageevent)45   15.38115  119.99048   0.128   0.8980    
factor(ageevent)46   13.84071  119.99747   0.115   0.9082    
factor(ageevent)47    0.19562  893.98009   0.000   0.9998    
factor(ageevent)48    0.20364 1144.02293   0.000   0.9999    
factor(ageevent)49    0.24773 1592.89234   0.000   0.9999    
factor(hisprace2)3   -0.45242    0.07870  -5.749    9e-09 ***
babsTRUE             -0.13180    0.05357  -2.460   0.0139 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 13433  on 41591  degrees of freedom
Residual deviance: 12085  on 41555  degrees of freedom
AIC: 12246

Number of Fisher Scoring iterations: 17
exp(coef(m2a))
       (Intercept) factor(ageevent)16 factor(ageevent)17 factor(ageevent)18 
      8.948951e-09       2.107479e+04       1.359546e+05       1.773832e+06 
factor(ageevent)19 factor(ageevent)20 factor(ageevent)21 factor(ageevent)22 
      1.995531e+06       3.632870e+06       5.403389e+06       5.564515e+06 
factor(ageevent)23 factor(ageevent)24 factor(ageevent)25 factor(ageevent)26 
      6.928833e+06       8.394528e+06       8.874046e+06       1.108142e+07 
factor(ageevent)27 factor(ageevent)28 factor(ageevent)29 factor(ageevent)30 
      1.431348e+07       9.618654e+06       1.119679e+07       1.176846e+07 
factor(ageevent)31 factor(ageevent)32 factor(ageevent)33 factor(ageevent)34 
      1.043682e+07       1.134224e+07       8.715278e+06       8.821029e+06 
factor(ageevent)35 factor(ageevent)36 factor(ageevent)37 factor(ageevent)38 
      6.232574e+06       1.258089e+07       7.122698e+06       7.316149e+06 
factor(ageevent)39 factor(ageevent)40 factor(ageevent)41 factor(ageevent)42 
      3.212278e+06       9.595869e+06       5.372425e+06       2.498848e+06 
factor(ageevent)43 factor(ageevent)44 factor(ageevent)45 factor(ageevent)46 
      9.809634e+06       1.371674e+06       4.785716e+06       1.025522e+06 
factor(ageevent)47 factor(ageevent)48 factor(ageevent)49 factor(hisprace2)3 
      1.216068e+00       1.225863e+00       1.281118e+00       6.360873e-01 
          babsTRUE 
      8.765192e-01 

Model 3a: First Marriage by Age, Race/Ethnicity and Time-Varying Degree

m3a <- glm(evrmarry ~ factor(ageevent) + factor(hisprace2) + tvba, family = "binomial", weights=new_weights, data = pydata) 
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m3a)

Call:
glm(formula = evrmarry ~ factor(ageevent) + factor(hisprace2) + 
    tvba, family = "binomial", data = pydata, weights = new_weights)

Coefficients:
                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)         -18.63092  120.13100  -0.155    0.877    
factor(ageevent)16    9.95935  120.13894   0.083    0.934    
factor(ageevent)17   11.82451  120.13226   0.098    0.922    
factor(ageevent)18   14.39439  120.13110   0.120    0.905    
factor(ageevent)19   14.51299  120.13109   0.121    0.904    
factor(ageevent)20   15.11254  120.13105   0.126    0.900    
factor(ageevent)21   15.48975  120.13104   0.129    0.897    
factor(ageevent)22   15.45139  120.13104   0.129    0.898    
factor(ageevent)23   15.62664  120.13104   0.130    0.897    
factor(ageevent)24   15.79390  120.13104   0.131    0.895    
factor(ageevent)25   15.84079  120.13104   0.132    0.895    
factor(ageevent)26   16.05401  120.13104   0.134    0.894    
factor(ageevent)27   16.31220  120.13103   0.136    0.892    
factor(ageevent)28   15.91655  120.13105   0.132    0.895    
factor(ageevent)29   16.06296  120.13105   0.134    0.894    
factor(ageevent)30   16.11083  120.13106   0.134    0.893    
factor(ageevent)31   15.99173  120.13107   0.133    0.894    
factor(ageevent)32   16.08123  120.13108   0.134    0.894    
factor(ageevent)33   15.81470  120.13112   0.132    0.895    
factor(ageevent)34   15.83119  120.13114   0.132    0.895    
factor(ageevent)35   15.48311  120.13122   0.129    0.897    
factor(ageevent)36   16.18123  120.13114   0.135    0.893    
factor(ageevent)37   15.62435  120.13128   0.130    0.897    
factor(ageevent)38   15.65359  120.13131   0.130    0.896    
factor(ageevent)39   14.83283  120.13179   0.123    0.902    
factor(ageevent)40   15.92477  120.13134   0.133    0.895    
factor(ageevent)41   15.34555  120.13168   0.128    0.898    
factor(ageevent)42   14.57782  120.13265   0.121    0.903    
factor(ageevent)43   15.94312  120.13155   0.133    0.894    
factor(ageevent)44   13.98206  120.13538   0.116    0.907    
factor(ageevent)45   15.23601  120.13249   0.127    0.899    
factor(ageevent)46   13.70341  120.13948   0.114    0.909    
factor(ageevent)47    0.06866  892.42352   0.000    1.000    
factor(ageevent)48    0.11204 1144.23553   0.000    1.000    
factor(ageevent)49    0.17248 1593.94306   0.000    1.000    
factor(hisprace2)3   -0.39936    0.07868  -5.076 3.86e-07 ***
tvba                  0.46280    0.05883   7.866 3.65e-15 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 13433  on 41591  degrees of freedom
Residual deviance: 12031  on 41555  degrees of freedom
AIC: 12184

Number of Fisher Scoring iterations: 17
exp(coef(m3a))
       (Intercept) factor(ageevent)16 factor(ageevent)17 factor(ageevent)18 
      8.103882e-09       2.114908e+04       1.365589e+05       1.784032e+06 
factor(ageevent)19 factor(ageevent)20 factor(ageevent)21 factor(ageevent)22 
      2.008678e+06       3.658410e+06       5.334734e+06       5.133986e+06 
factor(ageevent)23 factor(ageevent)24 factor(ageevent)25 factor(ageevent)26 
      6.117322e+06       7.231077e+06       7.578254e+06       9.379282e+06 
factor(ageevent)27 factor(ageevent)28 factor(ageevent)29 factor(ageevent)30 
      1.214221e+07       8.174656e+06       9.463574e+06       9.927566e+06 
factor(ageevent)31 factor(ageevent)32 factor(ageevent)33 factor(ageevent)34 
      8.812917e+06       9.638084e+06       7.383106e+06       7.505794e+06 
factor(ageevent)35 factor(ageevent)36 factor(ageevent)37 factor(ageevent)38 
      5.299453e+06       1.065174e+07       6.103384e+06       6.284451e+06 
factor(ageevent)39 factor(ageevent)40 factor(ageevent)41 factor(ageevent)42 
      2.765771e+06       8.242171e+06       4.618366e+06       2.143222e+06 
factor(ageevent)43 factor(ageevent)44 factor(ageevent)45 factor(ageevent)46 
      8.394773e+06       1.181224e+06       4.139182e+06       8.939582e+05 
factor(ageevent)47 factor(ageevent)48 factor(ageevent)49 factor(hisprace2)3 
      1.071070e+00       1.118559e+00       1.188249e+00       6.707498e-01 
              tvba 
      1.588512e+00 

Model 4a: White Model, White Data

m4a <- glm(evrmarry ~ factor(ageevent) + tvba, family = "binomial", weights=new_weights, data = whitepy) 
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m4a)

Call:
glm(formula = evrmarry ~ factor(ageevent) + tvba, family = "binomial", 
    data = whitepy, weights = new_weights)

Coefficients:
                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)         -19.72851  222.33722  -0.089    0.929    
factor(ageevent)16   -0.00145  316.76068   0.000    1.000    
factor(ageevent)17   12.75003  222.33814   0.057    0.954    
factor(ageevent)18   15.54549  222.33728   0.070    0.944    
factor(ageevent)19   15.58621  222.33728   0.070    0.944    
factor(ageevent)20   16.13354  222.33725   0.073    0.942    
factor(ageevent)21   16.64275  222.33724   0.075    0.940    
factor(ageevent)22   16.57899  222.33724   0.075    0.941    
factor(ageevent)23   16.80343  222.33724   0.076    0.940    
factor(ageevent)24   16.75254  222.33724   0.075    0.940    
factor(ageevent)25   16.90658  222.33724   0.076    0.939    
factor(ageevent)26   17.21326  222.33724   0.077    0.938    
factor(ageevent)27   17.39459  222.33724   0.078    0.938    
factor(ageevent)28   17.09496  222.33725   0.077    0.939    
factor(ageevent)29   17.13826  222.33725   0.077    0.939    
factor(ageevent)30   17.08453  222.33726   0.077    0.939    
factor(ageevent)31   17.16046  222.33726   0.077    0.938    
factor(ageevent)32   17.15441  222.33727   0.077    0.939    
factor(ageevent)33   16.91174  222.33729   0.076    0.939    
factor(ageevent)34   16.95007  222.33731   0.076    0.939    
factor(ageevent)35   16.58379  222.33736   0.075    0.941    
factor(ageevent)36   17.27385  222.33730   0.078    0.938    
factor(ageevent)37   16.42750  222.33745   0.074    0.941    
factor(ageevent)38   16.79473  222.33741   0.076    0.940    
factor(ageevent)39   15.97975  222.33771   0.072    0.943    
factor(ageevent)40   17.15887  222.33741   0.077    0.938    
factor(ageevent)41   16.47147  222.33766   0.074    0.941    
factor(ageevent)42   15.41032  222.33863   0.069    0.945    
factor(ageevent)43   17.26967  222.33752   0.078    0.938    
factor(ageevent)44   15.31609  222.33960   0.069    0.945    
factor(ageevent)45   16.46869  222.33813   0.074    0.941    
factor(ageevent)46   15.05585  222.34181   0.068    0.946    
factor(ageevent)47    0.00309 1815.29897   0.000    1.000    
factor(ageevent)48    0.04428 2315.59401   0.000    1.000    
factor(ageevent)49    0.13484 3227.17461   0.000    1.000    
tvba                  0.46300    0.06186   7.485 7.15e-14 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 11566  on 30309  degrees of freedom
Residual deviance: 10350  on 30274  degrees of freedom
AIC: 10574

Number of Fisher Scoring iterations: 18
exp(coef(m4a))
       (Intercept) factor(ageevent)16 factor(ageevent)17 factor(ageevent)18 
      2.704058e-09       9.985512e-01       3.445608e+05       5.640545e+06 
factor(ageevent)19 factor(ageevent)20 factor(ageevent)21 factor(ageevent)22 
      5.874951e+06       1.015565e+07       1.689870e+07       1.585497e+07 
factor(ageevent)23 factor(ageevent)24 factor(ageevent)25 factor(ageevent)26 
      1.984442e+07       1.885970e+07       2.200067e+07       2.989681e+07 
factor(ageevent)27 factor(ageevent)28 factor(ageevent)29 factor(ageevent)30 
      3.584040e+07       2.656126e+07       2.773642e+07       2.628547e+07 
factor(ageevent)31 factor(ageevent)32 factor(ageevent)33 factor(ageevent)34 
      2.835922e+07       2.818819e+07       2.211433e+07       2.297852e+07 
factor(ageevent)35 factor(ageevent)36 factor(ageevent)37 factor(ageevent)38 
      1.593128e+07       3.176404e+07       1.362611e+07       1.967241e+07 
factor(ageevent)39 factor(ageevent)40 factor(ageevent)41 factor(ageevent)42 
      8.708008e+06       2.831399e+07       1.423863e+07       4.927371e+06 
factor(ageevent)43 factor(ageevent)44 factor(ageevent)45 factor(ageevent)46 
      3.163156e+07       4.484293e+06       1.419910e+07       3.456797e+06 
factor(ageevent)47 factor(ageevent)48 factor(ageevent)49               tvba 
      1.003095e+00       1.045276e+00       1.144349e+00       1.588831e+00 

Model 5: Black Model, Black Data

m5a <- glm(evrmarry ~ factor(ageevent) + tvba, family = "binomial", weights=new_weights, data = blackpy) 
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(m5a)

Call:
glm(formula = evrmarry ~ factor(ageevent) + tvba, family = "binomial", 
    data = blackpy, weights = new_weights)

Coefficients:
                     Estimate Std. Error z value Pr(>|z|)  
(Intercept)        -1.956e+01  4.425e+02  -0.044   0.9647  
factor(ageevent)16  1.257e+01  4.425e+02   0.028   0.9773  
factor(ageevent)17  1.310e+01  4.425e+02   0.030   0.9764  
factor(ageevent)18  1.443e+01  4.425e+02   0.033   0.9740  
factor(ageevent)19  1.520e+01  4.425e+02   0.034   0.9726  
factor(ageevent)20  1.607e+01  4.425e+02   0.036   0.9710  
factor(ageevent)21  1.550e+01  4.425e+02   0.035   0.9721  
factor(ageevent)22  1.572e+01  4.425e+02   0.036   0.9717  
factor(ageevent)23  1.525e+01  4.425e+02   0.034   0.9725  
factor(ageevent)24  1.700e+01  4.425e+02   0.038   0.9693  
factor(ageevent)25  1.657e+01  4.425e+02   0.037   0.9701  
factor(ageevent)26  1.596e+01  4.425e+02   0.036   0.9712  
factor(ageevent)27  1.693e+01  4.425e+02   0.038   0.9695  
factor(ageevent)28  1.562e+01  4.425e+02   0.035   0.9718  
factor(ageevent)29  1.672e+01  4.425e+02   0.038   0.9699  
factor(ageevent)30  1.721e+01  4.425e+02   0.039   0.9690  
factor(ageevent)31  1.584e+01  4.425e+02   0.036   0.9714  
factor(ageevent)32  1.674e+01  4.425e+02   0.038   0.9698  
factor(ageevent)33  1.634e+01  4.425e+02   0.037   0.9705  
factor(ageevent)34  1.622e+01  4.425e+02   0.037   0.9708  
factor(ageevent)35  1.598e+01  4.425e+02   0.036   0.9712  
factor(ageevent)36  1.673e+01  4.425e+02   0.038   0.9698  
factor(ageevent)37  1.701e+01  4.425e+02   0.038   0.9693  
factor(ageevent)38  1.591e+01  4.425e+02   0.036   0.9713  
factor(ageevent)39  1.507e+01  4.425e+02   0.034   0.9728  
factor(ageevent)40  1.542e+01  4.425e+02   0.035   0.9722  
factor(ageevent)41  1.572e+01  4.425e+02   0.036   0.9717  
factor(ageevent)42  1.582e+01  4.425e+02   0.036   0.9715  
factor(ageevent)43 -2.641e-02  1.870e+03   0.000   1.0000  
factor(ageevent)44 -2.836e-02  1.982e+03   0.000   1.0000  
factor(ageevent)45  1.507e+01  4.425e+02   0.034   0.9728  
factor(ageevent)46 -4.111e-02  2.322e+03   0.000   1.0000  
factor(ageevent)47 -2.976e-02  2.557e+03   0.000   1.0000  
factor(ageevent)48  4.226e-02  3.306e+03   0.000   1.0000  
factor(ageevent)49  6.551e-03  4.577e+03   0.000   1.0000  
tvba                5.011e-01  1.948e-01   2.572   0.0101 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1829.7  on 11281  degrees of freedom
Residual deviance: 1623.7  on 11246  degrees of freedom
AIC: 1624.7

Number of Fisher Scoring iterations: 18
exp(coef(m5a))
       (Intercept) factor(ageevent)16 factor(ageevent)17 factor(ageevent)18 
      3.202855e-09       2.873626e+05       4.904275e+05       1.842931e+06 
factor(ageevent)19 factor(ageevent)20 factor(ageevent)21 factor(ageevent)22 
      3.996196e+06       9.489346e+06       5.406412e+06       6.737424e+06 
factor(ageevent)23 factor(ageevent)24 factor(ageevent)25 factor(ageevent)26 
      4.177191e+06       2.424413e+07       1.576138e+07       8.547354e+06 
factor(ageevent)27 factor(ageevent)28 factor(ageevent)29 factor(ageevent)30 
      2.252777e+07       6.055712e+06       1.819132e+07       2.989955e+07 
factor(ageevent)31 factor(ageevent)32 factor(ageevent)33 factor(ageevent)34 
      7.603682e+06       1.869114e+07       1.243902e+07       1.102356e+07 
factor(ageevent)35 factor(ageevent)36 factor(ageevent)37 factor(ageevent)38 
      8.742010e+06       1.841535e+07       2.451658e+07       8.100441e+06 
factor(ageevent)39 factor(ageevent)40 factor(ageevent)41 factor(ageevent)42 
      3.516491e+06       4.973179e+06       6.741998e+06       7.403488e+06 
factor(ageevent)43 factor(ageevent)44 factor(ageevent)45 factor(ageevent)46 
      9.739352e-01       9.720428e-01       3.521256e+06       9.597214e-01 
factor(ageevent)47 factor(ageevent)48 factor(ageevent)49               tvba 
      9.706792e-01       1.043164e+00       1.006573e+00       1.650575e+00 

Predicted Probabilities (Single-Year)

pprobs_w1 <- expand.grid(ageevent = 15:49,tvba=0:1)

pprobs_w1$phat <- predict(m4a, pprobs_w1, type="response")

pprobs_w1
   ageevent tvba         phat
1        15    0 2.704058e-09
2        16    0 2.700140e-09
3        17    0 9.308451e-04
4        18    0 1.502322e-02
5        19    0 1.563778e-02
6        20    0 2.672749e-02
7        21    0 4.369827e-02
8        22    0 4.111024e-02
9        23    0 5.092767e-02
10       24    0 4.852314e-02
11       25    0 5.615062e-02
12       26    0 7.479599e-02
13       27    0 8.835194e-02
14       28    0 6.701030e-02
15       29    0 6.976821e-02
16       30    0 6.636069e-02
17       31    0 7.122323e-02
18       32    0 7.082411e-02
19       33    0 5.642434e-02
20       34    0 5.850031e-02
21       35    0 4.129993e-02
22       36    0 7.909795e-02
23       37    0 3.553643e-02
24       38    0 5.050853e-02
25       39    0 2.300526e-02
26       40    0 7.111772e-02
27       41    0 3.707463e-02
28       42    0 1.314871e-02
29       43    0 7.879403e-02
30       44    0 1.198052e-02
31       45    0 3.697552e-02
32       46    0 9.260816e-03
33       47    0 2.712426e-09
34       48    0 2.826486e-09
35       49    0 3.094386e-09
36       15    1 4.296292e-09
37       16    1 4.290068e-09
38       17    1 1.478146e-03
39       18    1 2.366007e-02
40       19    1 2.461911e-02
41       20    1 4.180751e-02
42       21    1 6.768753e-02
43       22    1 6.377348e-02
44       23    1 7.855965e-02
45       24    1 7.495353e-02
46       25    1 8.635857e-02
47       26    1 1.138251e-01
48       27    1 1.334345e-01
49       28    1 1.024266e-01
50       29    1 1.064757e-01
51       30    1 1.014710e-01
52       31    1 1.086069e-01
53       32    1 1.080227e-01
54       33    1 8.676602e-02
55       34    1 8.985202e-02
56       35    1 6.406076e-02
57       36    1 1.200805e-01
58       37    1 5.530416e-02
59       38    1 7.793177e-02
60       39    1 3.606296e-02
61       40    1 1.084525e-01
62       41    1 5.764687e-02
63       42    1 2.073057e-02
64       43    1 1.196396e-01
65       44    1 1.890168e-02
66       45    1 5.749604e-02
67       46    1 1.463408e-02
68       47    1 4.309588e-09
69       48    1 4.490810e-09
70       49    1 4.916458e-09
pprobs_w1$tvba <- recode(pprobs_w$tvba, `1` = "Bachelor's Degree", `0`=  "No Bachelor's Degree")

pprobs_w1$tvba
 [1] "No Bachelor's Degree" "No Bachelor's Degree" "No Bachelor's Degree"
 [4] "No Bachelor's Degree" "No Bachelor's Degree" "No Bachelor's Degree"
 [7] "No Bachelor's Degree" "Bachelor's Degree"    "Bachelor's Degree"   
[10] "Bachelor's Degree"    "Bachelor's Degree"    "Bachelor's Degree"   
[13] "Bachelor's Degree"    "Bachelor's Degree"    "No Bachelor's Degree"
[16] "No Bachelor's Degree" "No Bachelor's Degree" "No Bachelor's Degree"
[19] "No Bachelor's Degree" "No Bachelor's Degree" "No Bachelor's Degree"
[22] "Bachelor's Degree"    "Bachelor's Degree"    "Bachelor's Degree"   
[25] "Bachelor's Degree"    "Bachelor's Degree"    "Bachelor's Degree"   
[28] "Bachelor's Degree"    "No Bachelor's Degree" "No Bachelor's Degree"
[31] "No Bachelor's Degree" "No Bachelor's Degree" "No Bachelor's Degree"
[34] "No Bachelor's Degree" "No Bachelor's Degree" "Bachelor's Degree"   
[37] "Bachelor's Degree"    "Bachelor's Degree"    "Bachelor's Degree"   
[40] "Bachelor's Degree"    "Bachelor's Degree"    "Bachelor's Degree"   
[43] "No Bachelor's Degree" "No Bachelor's Degree" "No Bachelor's Degree"
[46] "No Bachelor's Degree" "No Bachelor's Degree" "No Bachelor's Degree"
[49] "No Bachelor's Degree" "Bachelor's Degree"    "Bachelor's Degree"   
[52] "Bachelor's Degree"    "Bachelor's Degree"    "Bachelor's Degree"   
[55] "Bachelor's Degree"    "Bachelor's Degree"    "No Bachelor's Degree"
[58] "No Bachelor's Degree" "No Bachelor's Degree" "No Bachelor's Degree"
[61] "No Bachelor's Degree" "No Bachelor's Degree" "No Bachelor's Degree"
[64] "Bachelor's Degree"    "Bachelor's Degree"    "Bachelor's Degree"   
[67] "Bachelor's Degree"    "Bachelor's Degree"    "Bachelor's Degree"   
[70] "Bachelor's Degree"   
ggplot(pprobs_w1, aes(x = ageevent, y = phat, group=tvba, fill=tvba,color = tvba)) +
  geom_point() +
  geom_line() +
  labs(x = "Age", y = "Predicted First Marriage Probability", title = "Predicted First Marriage Probabilities by Education for White Men, Single-Years")

pprobs_b1 <- expand.grid(ageevent = 15:49,tvba=0:1)

pprobs_b1$phat <- predict(m5a, pprobs_b1, type="response")

pprobs_b1
   ageevent tvba         phat
1        15    0 3.202854e-09
2        16    0 9.195343e-04
3        17    0 1.568304e-03
4        18    0 5.868004e-03
5        19    0 1.263748e-02
6        20    0 2.949651e-02
7        21    0 1.702121e-02
8        22    0 2.112317e-02
9        23    0 1.320230e-02
10       24    0 7.205531e-02
11       25    0 4.805551e-02
12       26    0 2.664646e-02
13       27    0 6.729744e-02
14       28    0 1.902653e-02
15       29    0 5.505634e-02
16       30    0 8.739464e-02
17       31    0 2.377450e-02
18       32    0 5.648360e-02
19       33    0 3.831392e-02
20       34    0 3.410280e-02
21       35    0 2.723677e-02
22       36    0 5.569659e-02
23       37    0 7.280608e-02
24       38    0 2.528844e-02
25       39    0 1.113737e-02
26       40    0 1.567863e-02
27       41    0 2.113721e-02
28       42    0 2.316305e-02
29       43    0 3.119373e-09
30       44    0 3.113312e-09
31       45    0 1.115229e-02
32       46    0 3.073848e-09
33       47    0 3.108944e-09
34       48    0 3.341103e-09
35       49    0 3.223905e-09
36       15    1 5.286551e-09
37       16    1 1.516853e-03
38       17    1 2.585965e-03
39       18    1 9.648744e-03
40       19    1 2.068901e-02
41       20    1 4.776950e-02
42       21    1 2.778708e-02
43       22    1 3.439274e-02
44       23    1 2.160581e-02
45       24    1 1.136071e-01
46       25    1 7.691457e-02
47       26    1 4.323251e-02
48       27    1 1.064202e-01
49       28    1 3.102073e-02
50       29    1 8.773219e-02
51       30    1 1.364910e-01
52       31    1 3.864387e-02
53       32    1 8.992591e-02
54       33    1 6.170199e-02
55       34    1 5.506747e-02
56       35    1 4.417359e-02
57       36    1 8.871675e-02
58       37    1 1.147372e-01
59       38    1 4.106486e-02
60       39    1 1.825082e-02
61       40    1 2.561745e-02
62       41    1 3.441529e-02
63       42    1 3.766476e-02
64       43    1 5.148758e-09
65       44    1 5.138753e-09
66       45    1 1.827510e-02
67       46    1 5.073616e-09
68       47    1 5.131545e-09
69       48    1 5.514739e-09
70       49    1 5.321297e-09
pprobs_b1$tvba <- recode(pprobs_b1$tvba, `1` = "Bachelor's Degree", `0`=  "No Bachelor's Degree")


ggplot(pprobs_b1, aes(x = ageevent, y = phat, group=tvba, fill=tvba,color = tvba)) +
  geom_point() +
  geom_line() +
  labs(x = "Age", y = "Predicted First Marriage Probability", title = "Predicted First Marriage Probabilities by Education for Black Men, Single-Years")

wtddata_w <- whitepy %>%                           
  group_by(ageevent) %>% 
  dplyr::summarize(tvba=weighted.mean(tvba,wgt2017_2019,na.rm = TRUE),n=n())

wtddata_w$phat <- predict(m4a, wtddata_w, type="response")

as.data.frame(wtddata_w)
   ageevent        tvba    n         phat
1        15 0.000000000 2380 2.704058e-09
2        16 0.000000000 2304 2.700140e-09
3        17 0.000000000 2237 9.308451e-04
4        18 0.000000000 2157 1.502322e-02
5        19 0.002428361 2061 1.565510e-02
6        20 0.005297571 1952 2.679137e-02
7        21 0.047316961 1863 4.462297e-02
8        22 0.189208265 1728 4.470582e-02
9        23 0.284691573 1598 5.768911e-02
10       24 0.341288049 1470 5.636141e-02
11       25 0.358237618 1332 6.561616e-02
12       26 0.379966910 1201 8.791797e-02
13       27 0.372049788 1082 1.032460e-01
14       28 0.359087372  958 7.818327e-02
15       29 0.374112816  850 8.188239e-02
16       30 0.380913362  751 7.815934e-02
17       31 0.373048581  661 8.352963e-02
18       32 0.353703912  579 8.238806e-02
19       33 0.353321221  490 6.579275e-02
20       34 0.345667529  410 6.796368e-02
21       35 0.338799975  365 4.797764e-02
22       36 0.363354048  317 9.225272e-02
23       37 0.323024579  270 4.103403e-02
24       38 0.312784728  238 5.792338e-02
25       39 0.300853072  201 2.635307e-02
26       40 0.309045212  178 8.116981e-02
27       41 0.299360273  150 4.235302e-02
28       42 0.301740970  127 1.509041e-02
29       43 0.318906446  104 9.020019e-02
30       44 0.284796965   87 1.364612e-02
31       45 0.268934231   72 4.167416e-02
32       46 0.247657361   55 1.037431e-02
33       47 0.229820737   42 3.016961e-09
34       48 0.116523293   26 2.983164e-09
35       49 0.037364360   14 3.148384e-09
wtddata_b <- blackpy %>%                           
  group_by(ageevent) %>% 
  dplyr::summarize(tvba=weighted.mean(tvba,wgt2017_2019,na.rm = TRUE),n=n())

wtddata_b$phat <- predict(m5a, wtddata_b, type="response")

as.data.frame(wtddata_b)
   ageevent        tvba   n         phat
1        15 0.000000000 877 3.202854e-09
2        16 0.000000000 847 9.195343e-04
3        17 0.000000000 816 1.568304e-03
4        18 0.000000000 767 5.868004e-03
5        19 0.000000000 721 1.263748e-02
6        20 0.002624811 677 2.953418e-02
7        21 0.023728775 640 1.722132e-02
8        22 0.073157382 601 2.189467e-02
9        23 0.116686997 570 1.398620e-02
10       24 0.136580915 533 7.676789e-02
11       25 0.156156175 486 5.176459e-02
12       26 0.162062118 437 2.883580e-02
13       27 0.168785583 404 7.280483e-02
14       28 0.181246226 363 2.079794e-02
15       29 0.177101380 319 5.986010e-02
16       30 0.167478635 279 9.432433e-02
17       31 0.170248749 239 2.583716e-02
18       32 0.163370484 217 6.100842e-02
19       33 0.187288675 202 4.192595e-02
20       34 0.142605360 177 3.653678e-02
21       35 0.163411863 155 2.949250e-02
22       36 0.152309185 141 5.984973e-02
23       37 0.111721003 128 7.667700e-02
24       38 0.121223603 112 2.682979e-02
25       39 0.124673791 103 1.184689e-02
26       40 0.147941113  90 1.686482e-02
27       41 0.158716216  76 2.284708e-02
28       42 0.167953463  66 2.514582e-02
29       43 0.163520995  55 3.385752e-09
30       44 0.167978543  49 3.386731e-09
31       45 0.181798896  44 1.220305e-02
32       46 0.149242492  35 3.312553e-09
33       47 0.128414827  28 3.315588e-09
34       48 0.085261317  18 3.486950e-09
35       49 0.167051479  10 3.505408e-09
wtddata_bxw <- whitepy %>%                           
  group_by(ageevent) %>% 
  dplyr::summarize(tvba=weighted.mean(tvba,wgt2017_2019,na.rm = TRUE),n=n())

wtddata_bxw$phat <- predict(m5a, wtddata_bxw, type="response")

as.data.frame(wtddata_bxw)
   ageevent        tvba    n         phat
1        15 0.000000000 2380 3.202854e-09
2        16 0.000000000 2304 9.195343e-04
3        17 0.000000000 2237 1.568304e-03
4        18 0.000000000 2157 5.868004e-03
5        19 0.002428361 2061 1.265268e-02
6        20 0.005297571 1952 2.957260e-02
7        21 0.047316961 1863 1.742252e-02
8        22 0.189208265 1728 2.317534e-02
9        23 0.284691573 1598 1.519603e-02
10       24 0.341288049 1470 8.436148e-02
11       25 0.358237618 1332 5.696703e-02
12       26 0.379966910 1201 3.205628e-02
13       27 0.372049788 1082 7.998718e-02
14       28 0.359087372  958 2.269257e-02
15       29 0.374112816  850 6.566362e-02
16       30 0.380913362  751 1.038663e-01
17       31 0.373048581  661 2.852217e-02
18       32 0.353703912  579 6.670671e-02
19       33 0.353321221  490 4.539839e-02
20       34 0.345667529  410 4.029279e-02
21       35 0.338799975  365 3.211494e-02
22       36 0.363354048  317 6.608502e-02
23       37 0.323024579  270 8.451785e-02
24       38 0.312784728  238 2.945339e-02
25       39 0.300853072  201 1.292625e-02
26       40 0.309045212  178 1.825695e-02
27       41 0.299360273  150 2.447461e-02
28       42 0.301740970  127 2.684269e-02
29       43 0.318906446  104 3.659928e-09
30       44 0.284796965   87 3.590909e-09
31       45 0.268934231   72 1.274077e-02
32       46 0.247657361   55 3.480017e-09
33       47 0.229820737   42 3.488430e-09
34       48 0.116523293   26 3.542007e-09
35       49 0.037364360   14 3.284839e-09
wtddata_wxb <- blackpy %>%                           
  group_by(ageevent) %>% 
  dplyr::summarize(tvba=weighted.mean(tvba,wgt2017_2019,na.rm = TRUE),n=n())

wtddata_wxb$phat <- predict(m4a, wtddata_wxb, type="response")

as.data.frame(wtddata_wxb)
   ageevent        tvba   n         phat
1        15 0.000000000 877 2.704058e-09
2        16 0.000000000 847 2.700140e-09
3        17 0.000000000 816 9.308451e-04
4        18 0.000000000 767 1.502322e-02
5        19 0.000000000 721 1.563778e-02
6        20 0.002624811 677 2.675912e-02
7        21 0.023728775 640 4.415969e-02
8        22 0.073157382 601 4.246642e-02
9        23 0.116686997 570 5.360323e-02
10       24 0.136580915 533 5.152747e-02
11       25 0.156156175 486 6.010762e-02
12       26 0.162062118 437 8.015703e-02
13       27 0.168785583 404 9.485220e-02
14       28 0.181246226 363 7.245128e-02
15       29 0.177101380 319 7.528131e-02
16       30 0.167478635 279 7.132957e-02
17       31 0.170248749 239 7.661705e-02
18       32 0.163370484 217 7.596634e-02
19       33 0.187288675 202 6.122263e-02
20       34 0.142605360 177 6.224469e-02
21       35 0.163411863 155 4.440178e-02
22       36 0.152309185 141 8.438953e-02
23       37 0.111721003 128 3.735251e-02
24       38 0.121223603 112 5.326912e-02
25       39 0.124673791 103 2.433901e-02
26       40 0.147941113  90 7.577767e-02
27       41 0.158716216  76 3.978920e-02
28       42 0.167953463  66 1.419689e-02
29       43 0.163520995  55 8.446771e-02
30       44 0.167978543  49 1.293694e-02
31       45 0.181798896  44 4.009240e-02
32       46 0.149242492  35 9.916786e-03
33       47 0.128414827  28 2.878586e-09
34       48 0.085261317  18 2.940296e-09
35       49 0.167051479  10 3.343219e-09

Life Table: White Model, White Data (Single-Year)

d = wtddata_w

d$Lx[1] <- 1
Warning: Unknown or uninitialised column: `Lx`.
d$Dx[1] <- d$Lx[1] * d$phat[1]
Warning: Unknown or uninitialised column: `Dx`.
for (i in 2:nrow(d)) {
  d$Dx[i] <- d$Lx[i] * d$phat[i]
  d$Lx[i] <- d$Lx[i-1] * d$Dx[i-1]
  
}
LT_wxw <- as.data.frame(d)
LT_wxw
   ageevent        tvba    n         phat           Lx           Dx
1        15 0.000000000 2380 2.704058e-09 1.000000e+00 2.704058e-09
2        16 0.000000000 2304 2.700140e-09 2.704058e-09 2.700140e-09
3        17 0.000000000 2237 9.308451e-04 7.301336e-18 9.308451e-04
4        18 0.000000000 2157 1.502322e-02 6.796413e-21 1.502322e-02
5        19 0.002428361 2061 1.565510e-02 1.021040e-22 1.565510e-02
6        20 0.005297571 1952 2.679137e-02 1.598448e-24 2.679137e-02
7        21 0.047316961 1863 4.462297e-02 4.282462e-26 4.462297e-02
8        22 0.189208265 1728 4.470582e-02 1.910962e-27 4.470582e-02
9        23 0.284691573 1598 5.768911e-02 8.543112e-29 5.768911e-02
10       24 0.341288049 1470 5.636141e-02 4.928446e-30 5.636141e-02
11       25 0.358237618 1332 6.561616e-02 2.777742e-31 6.561616e-02
12       26 0.379966910 1201 8.791797e-02 1.822647e-32 8.791797e-02
13       27 0.372049788 1082 1.032460e-01 1.602435e-33 1.032460e-01
14       28 0.359087372  958 7.818327e-02 1.654449e-34 7.818327e-02
15       29 0.374112816  850 8.188239e-02 1.293503e-35 8.188239e-02
16       30 0.380913362  751 7.815934e-02 1.059151e-36 7.815934e-02
17       31 0.373048581  661 8.352963e-02 8.278253e-38 8.352963e-02
18       32 0.353703912  579 8.238806e-02 6.914794e-39 8.238806e-02
19       33 0.353321221  490 6.579275e-02 5.696965e-40 6.579275e-02
20       34 0.345667529  410 6.796368e-02 3.748190e-41 6.796368e-02
21       35 0.338799975  365 4.797764e-02 2.547408e-42 4.797764e-02
22       36 0.363354048  317 9.225272e-02 1.222186e-43 9.225272e-02
23       37 0.323024579  270 4.103403e-02 1.127500e-44 4.103403e-02
24       38 0.312784728  238 5.792338e-02 4.626588e-46 5.792338e-02
25       39 0.300853072  201 2.635307e-02 2.679876e-47 2.635307e-02
26       40 0.309045212  178 8.116981e-02 7.062296e-49 8.116981e-02
27       41 0.299360273  150 4.235302e-02 5.732453e-50 4.235302e-02
28       42 0.301740970  127 1.509041e-02 2.427867e-51 1.509041e-02
29       43 0.318906446  104 9.020019e-02 3.663751e-53 9.020019e-02
30       44 0.284796965   87 1.364612e-02 3.304710e-54 1.364612e-02
31       45 0.268934231   72 4.167416e-02 4.509647e-56 4.167416e-02
32       46 0.247657361   55 1.037431e-02 1.879357e-57 1.037431e-02
33       47 0.229820737   42 3.016961e-09 1.949704e-59 3.016961e-09
34       48 0.116523293   26 2.983164e-09 5.882181e-68 2.983164e-09
35       49 0.037364360   14 3.148384e-09 1.754751e-76 3.148384e-09

Life Table: Black Model, Black Data (Single-Year)

d = wtddata_b

d$Lx[1] <- 1
Warning: Unknown or uninitialised column: `Lx`.
d$Dx[1] <- d$Lx[1] * d$phat[1]
Warning: Unknown or uninitialised column: `Dx`.
for (i in 2:nrow(d)) {
  d$Dx[i] <- d$Lx[i] * d$phat[i]
  d$Lx[i] <- d$Lx[i-1] * d$Dx[i-1]
  
}
LT_bxb <- as.data.frame(d)
LT_bxb
   ageevent        tvba   n         phat           Lx           Dx
1        15 0.000000000 877 3.202854e-09 1.000000e+00 3.202854e-09
2        16 0.000000000 847 9.195343e-04 3.202854e-09 9.195343e-04
3        17 0.000000000 816 1.568304e-03 2.945134e-12 1.568304e-03
4        18 0.000000000 767 5.868004e-03 4.618868e-15 5.868004e-03
5        19 0.000000000 721 1.263748e-02 2.710353e-17 1.263748e-02
6        20 0.002624811 677 2.953418e-02 3.425205e-19 2.953418e-02
7        21 0.023728775 640 1.722132e-02 1.011606e-20 1.722132e-02
8        22 0.073157382 601 2.189467e-02 1.742119e-22 2.189467e-02
9        23 0.116686997 570 1.398620e-02 3.814311e-24 1.398620e-02
10       24 0.136580915 533 7.676789e-02 5.334772e-26 7.676789e-02
11       25 0.156156175 486 5.176459e-02 4.095392e-27 5.176459e-02
12       26 0.162062118 437 2.883580e-02 2.119963e-28 2.883580e-02
13       27 0.168785583 404 7.280483e-02 6.113082e-30 7.280483e-02
14       28 0.181246226 363 2.079794e-02 4.450620e-31 2.079794e-02
15       29 0.177101380 319 5.986010e-02 9.256371e-33 5.986010e-02
16       30 0.167478635 279 9.432433e-02 5.540873e-34 9.432433e-02
17       31 0.170248749 239 2.583716e-02 5.226391e-35 2.583716e-02
18       32 0.163370484 217 6.100842e-02 1.350351e-36 6.100842e-02
19       33 0.187288675 202 4.192595e-02 8.238280e-38 4.192595e-02
20       34 0.142605360 177 3.653678e-02 3.453977e-39 3.653678e-02
21       35 0.163411863 155 2.949250e-02 1.261972e-40 2.949250e-02
22       36 0.152309185 141 5.984973e-02 3.721871e-42 5.984973e-02
23       37 0.111721003 128 7.667700e-02 2.227529e-43 7.667700e-02
24       38 0.121223603 112 2.682979e-02 1.708003e-44 2.682979e-02
25       39 0.124673791 103 1.184689e-02 4.582535e-46 1.184689e-02
26       40 0.147941113  90 1.686482e-02 5.428879e-48 1.686482e-02
27       41 0.158716216  76 2.284708e-02 9.155707e-50 2.284708e-02
28       42 0.167953463  66 2.514582e-02 2.091811e-51 2.514582e-02
29       43 0.163520995  55 3.385752e-09 5.260031e-53 3.385752e-09
30       44 0.167978543  49 3.386731e-09 1.780916e-61 3.386731e-09
31       45 0.181798896  44 1.220305e-02 6.031484e-70 1.220305e-02
32       46 0.149242492  35 3.312553e-09 7.360249e-72 3.312553e-09
33       47 0.128414827  28 3.315588e-09 2.438122e-80 3.315588e-09
34       48 0.085261317  18 3.486950e-09 8.083806e-89 3.486950e-09
35       49 0.167051479  10 3.505408e-09 2.818782e-97 3.505408e-09

Life Table: White Model, Black Data (Single-Year)

d = wtddata_wxb


d$Lx[1] <- 1
Warning: Unknown or uninitialised column: `Lx`.
d$Lx[1] <- 1

d$Dx[1] <- d$Lx[1] * d$phat[1]
Warning: Unknown or uninitialised column: `Dx`.
for (i in 2:nrow(d)) {
  d$Dx[i] <- d$Lx[i] * d$phat[i]
  d$Lx[i] <- d$Lx[i-1] * d$Dx[i-1]
  
}
LT_wxb <- as.data.frame(d)
LT_wxb
   ageevent        tvba   n         phat           Lx           Dx
1        15 0.000000000 877 2.704058e-09 1.000000e+00 2.704058e-09
2        16 0.000000000 847 2.700140e-09 2.704058e-09 2.700140e-09
3        17 0.000000000 816 9.308451e-04 7.301336e-18 9.308451e-04
4        18 0.000000000 767 1.502322e-02 6.796413e-21 1.502322e-02
5        19 0.000000000 721 1.563778e-02 1.021040e-22 1.563778e-02
6        20 0.002624811 677 2.675912e-02 1.596680e-24 2.675912e-02
7        21 0.023728775 640 4.415969e-02 4.272577e-26 4.415969e-02
8        22 0.073157382 601 4.246642e-02 1.886756e-27 4.246642e-02
9        23 0.116686997 570 5.360323e-02 8.012380e-29 5.360323e-02
10       24 0.136580915 533 5.152747e-02 4.294894e-30 5.152747e-02
11       25 0.156156175 486 6.010762e-02 2.213050e-31 6.010762e-02
12       26 0.162062118 437 8.015703e-02 1.330212e-32 8.015703e-02
13       27 0.168785583 404 9.485220e-02 1.066258e-33 9.485220e-02
14       28 0.181246226 363 7.245128e-02 1.011370e-34 7.245128e-02
15       29 0.177101380 319 7.528131e-02 7.327502e-36 7.528131e-02
16       30 0.167478635 279 7.132957e-02 5.516240e-37 7.132957e-02
17       31 0.170248749 239 7.661705e-02 3.934710e-38 7.661705e-02
18       32 0.163370484 217 7.596634e-02 3.014659e-39 7.596634e-02
19       33 0.187288675 202 6.122263e-02 2.290126e-40 6.122263e-02
20       34 0.142605360 177 6.224469e-02 1.402075e-41 6.224469e-02
21       35 0.163411863 155 4.440178e-02 8.727174e-43 4.440178e-02
22       36 0.152309185 141 8.438953e-02 3.875021e-44 8.438953e-02
23       37 0.111721003 128 3.735251e-02 3.270112e-45 3.735251e-02
24       38 0.121223603 112 5.326912e-02 1.221469e-46 5.326912e-02
25       39 0.124673791 103 2.433901e-02 6.506658e-48 2.433901e-02
26       40 0.147941113  90 7.577767e-02 1.583656e-49 7.577767e-02
27       41 0.158716216  76 3.978920e-02 1.200058e-50 3.978920e-02
28       42 0.167953463  66 1.419689e-02 4.774933e-52 1.419689e-02
29       43 0.163520995  55 8.446771e-02 6.778920e-54 8.446771e-02
30       44 0.167978543  49 1.293694e-02 5.725998e-55 1.293694e-02
31       45 0.181798896  44 4.009240e-02 7.407691e-57 4.009240e-02
32       46 0.149242492  35 9.916786e-03 2.969921e-58 9.916786e-03
33       47 0.128414827  28 2.878586e-09 2.945208e-60 2.878586e-09
34       48 0.085261317  18 2.940296e-09 8.478034e-69 2.940296e-09
35       49 0.167051479  10 3.343219e-09 2.492793e-77 3.343219e-09

Life Table: Black Model, White Data (Single-Year)

d = wtddata_bxw

d$Lx[1] <- 1
Warning: Unknown or uninitialised column: `Lx`.
d$Dx[1] <- d$Lx[1] * d$phat[1]
Warning: Unknown or uninitialised column: `Dx`.
for (i in 2:nrow(d)) {
  d$Dx[i] <- d$Lx[i] * d$phat[i]
  d$Lx[i] <- d$Lx[i-1] * d$Dx[i-1]
  
}
LT_bxw <- as.data.frame(d)
LT_bxw
   ageevent        tvba    n         phat           Lx           Dx
1        15 0.000000000 2380 3.202854e-09 1.000000e+00 3.202854e-09
2        16 0.000000000 2304 9.195343e-04 3.202854e-09 9.195343e-04
3        17 0.000000000 2237 1.568304e-03 2.945134e-12 1.568304e-03
4        18 0.000000000 2157 5.868004e-03 4.618868e-15 5.868004e-03
5        19 0.002428361 2061 1.265268e-02 2.710353e-17 1.265268e-02
6        20 0.005297571 1952 2.957260e-02 3.429322e-19 2.957260e-02
7        21 0.047316961 1863 1.742252e-02 1.014140e-20 1.742252e-02
8        22 0.189208265 1728 2.317534e-02 1.766887e-22 2.317534e-02
9        23 0.284691573 1598 1.519603e-02 4.094820e-24 1.519603e-02
10       24 0.341288049 1470 8.436148e-02 6.222503e-26 8.436148e-02
11       25 0.358237618 1332 5.696703e-02 5.249396e-27 5.696703e-02
12       26 0.379966910 1201 3.205628e-02 2.990425e-28 3.205628e-02
13       27 0.372049788 1082 7.998718e-02 9.586191e-30 7.998718e-02
14       28 0.359087372  958 2.269257e-02 7.667723e-31 2.269257e-02
15       29 0.374112816  850 6.566362e-02 1.740003e-32 6.566362e-02
16       30 0.380913362  751 1.038663e-01 1.142549e-33 1.038663e-01
17       31 0.373048581  661 2.852217e-02 1.186724e-34 2.852217e-02
18       32 0.353703912  579 6.670671e-02 3.384794e-36 6.670671e-02
19       33 0.353321221  490 4.539839e-02 2.257884e-37 4.539839e-02
20       34 0.345667529  410 4.029279e-02 1.025043e-38 4.029279e-02
21       35 0.338799975  365 3.211494e-02 4.130185e-40 3.211494e-02
22       36 0.363354048  317 6.608502e-02 1.326406e-41 6.608502e-02
23       37 0.323024579  270 8.451785e-02 8.765560e-43 8.451785e-02
24       38 0.312784728  238 2.945339e-02 7.408463e-44 2.945339e-02
25       39 0.300853072  201 1.292625e-02 2.182043e-45 1.292625e-02
26       40 0.309045212  178 1.825695e-02 2.820564e-47 1.825695e-02
27       41 0.299360273  150 2.447461e-02 5.149489e-49 2.447461e-02
28       42 0.301740970  127 2.684269e-02 1.260317e-50 2.684269e-02
29       43 0.318906446  104 3.659928e-09 3.383031e-52 3.659928e-09
30       44 0.284796965   87 3.590909e-09 1.238165e-60 3.590909e-09
31       45 0.268934231   72 1.274077e-02 4.446138e-69 1.274077e-02
32       46 0.247657361   55 3.480017e-09 5.664721e-71 3.480017e-09
33       47 0.229820737   42 3.488430e-09 1.971333e-79 3.488430e-09
34       48 0.116523293   26 3.542007e-09 6.876856e-88 3.542007e-09
35       49 0.037364360   14 3.284839e-09 2.435787e-96 3.284839e-09
v1 = (sum(LT_bxw$Dx) - sum(LT_bxb$Dx)) / (sum(LT_wxw$Dx) - sum(LT_bxb$Dx))

v2 = (sum(LT_wxw$Dx)-sum(LT_wxb$Dx)) / (sum(LT_wxw$Dx)-sum(LT_bxb$Dx))
gap <-mean(v1,v2)
gap
[1] 0.12783
1-gap
[1] 0.87217