library(readr, quietly = T)
library(tidyverse, quietly = T)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(viridis, quietly = T)
library(purrr, quietly = T)
library(haven, quietly = T)
library(janitor, quietly = T)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(plyr, quietly = T)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:purrr':
##
## compact
library(ggplot2, quietly = T)
library(scales, quietly = T)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:viridis':
##
## viridis_pal
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(sur, quietly = T)
library(summarytools, quietly = T)
##
## Attaching package: 'summarytools'
##
## The following object is masked from 'package:tibble':
##
## view
library(Rmisc, quietly = T)
library(car, quietly = T)
##
## Attaching package: 'carData'
##
## The following objects are masked from 'package:sur':
##
## Anscombe, States
##
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(survival, quietly = T)
library(readr)
X2017_2019_Male_Data <-read_csv("C:/Users/bryan/OneDrive/Desktop/2017_2019_Male_Data.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 5206 Columns: 3009
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1939): caseid, rscrninf, rscrage, rscrhisp, rscrrace, age_a, age_r, age...
## lgl (1070): sedbclc3, sedbclc4, sedwhlc3, sedwhlc4, sedconlc4, p2currwife, p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(X2017_2019_Male_Data)
# Assuming 'X2017_2019_Male_Data' is your original data frame
# You can replace the column names inside 'select' with the actual column names you want
maledat <- X2017_2019_Male_Data %>%
select(
"caseid","age_r", "fmarital", "agemarr", "agemarrn4", "agemarrn2", "agemarrn3", "hisagem", "evrmarry", "agemarrn", "hisprace2", "fmarno", "nformwife", "hieduc", "wgt2017_2019", "earnba_y", "intvwyear", "ager"
)
# Now, 'maledat' contains the selected columns from 'X2017_2019_Male_Data'
maledat <- maledat %>%
select(caseid, ager, fmarital, hisagem, agemarrn, agemarrn2, agemarrn3, agemarrn4, hisprace2, earnba_y, wgt2017_2019, intvwyear, age_r, fmarno)
nrow(maledat)
## [1] 5206
maledat <- maledat %>%
filter(hisprace2 %in% c(2, 3)) # Selecting for non-Hispanic white and black men
tabyl(maledat$hisprace2)
## maledat$hisprace2 n percent
## 2 2427 0.7297054
## 3 899 0.2702946
maledat$raceeth = recode(maledat$hisprace2, recodes = "2='White Men'; 3='Black Men'")
tabyl(maledat$raceeth)
## maledat$raceeth n percent
## Black Men 899 0.2702946
## White Men 2427 0.7297054
nrow(maledat)
## [1] 3326
maledat <- maledat %>%
mutate(agemarry1 = pmin(hisagem, agemarrn, agemarrn2, agemarrn3, agemarrn4, na.rm = TRUE))
tabyl(maledat$agemarry1)
## maledat$agemarry1 n percent valid_percent
## 16 1 0.0003006615 0.0008019246
## 17 7 0.0021046302 0.0056134723
## 18 27 0.0081178593 0.0216519647
## 19 38 0.0114251353 0.0304731355
## 20 56 0.0168370415 0.0449077787
## 21 93 0.0279615153 0.0745789896
## 22 84 0.0252555622 0.0673616680
## 23 88 0.0264582081 0.0705693665
## 24 100 0.0300661455 0.0801924619
## 25 111 0.0333734215 0.0890136327
## 26 85 0.0255562237 0.0681635926
## 27 94 0.0282621768 0.0753809142
## 28 68 0.0204449790 0.0545308741
## 29 79 0.0237522550 0.0633520449
## 30 59 0.0177390259 0.0473135525
## 31 45 0.0135297655 0.0360866079
## 32 42 0.0126277811 0.0336808340
## 33 40 0.0120264582 0.0320769848
## 34 28 0.0084185207 0.0224538893
## 35 21 0.0063138906 0.0168404170
## 36 21 0.0063138906 0.0168404170
## 37 13 0.0039085989 0.0104250200
## 38 12 0.0036079375 0.0096230954
## 39 5 0.0015033073 0.0040096231
## 40 10 0.0030066146 0.0080192462
## 41 6 0.0018039687 0.0048115477
## 42 3 0.0009019844 0.0024057739
## 43 3 0.0009019844 0.0024057739
## 44 1 0.0003006615 0.0008019246
## 45 4 0.0012026458 0.0032076985
## 46 1 0.0003006615 0.0008019246
## 98 2 0.0006013229 0.0016038492
## NA 2079 0.6250751654 NA
data1 <- maledat %>%
mutate(evmarried = ifelse(fmarital == 1 & fmarital == 1, 1,
ifelse(fmarital == 0 & fmarital == 5, 0,
ifelse(fmarital %in% c(1:5), 1, NA))))
#data1 <- data1[, c("caseid","age_r", "fmarital", "agemarr", "agemarrn4", "agemarrn2", "agemarrn3", "hisagem", "evrmarry", "agemarrn", "hisprace2", "fmarno", "nformwife", "hieduc", "wgt2017_2019", "earnba_y", "intvwyear")]
data1 <- data1 %>%
mutate(evmarried = ifelse(fmarno == 1 & fmarital == 1, 1,
ifelse(fmarno == 0 & fmarital == 5, 0,
ifelse(fmarno %in% c(1:5), 1, NA))))
data1 %>%
group_by(evmarried) %>%
dplyr::summarize(n=n()) %>%
dplyr::mutate(pct = n / sum(n))
## # A tibble: 2 × 3
## evmarried n pct
## <dbl> <int> <dbl>
## 1 0 2012 0.605
## 2 1 1314 0.395
#ever married age event
data1$ageevent<- pmin(data1$hisagem,data1$agemarrn, na.rm=T)
#never married age
data1$ageevent <- ifelse(data1$fmarital == 5, data1$age_r, data1$ageevent)
# filter for NA values
data1 <- data1 %>%
filter(ageevent !=98 & !is.na(ageevent))
maledat <- maledat %>%
mutate(age_bach = earnba_y - (intvwyear - ager))
tabyl(maledat$age_bach)
## maledat$age_bach n percent valid_percent
## 19 3 0.0009019844 0.003640777
## 20 7 0.0021046302 0.008495146
## 21 100 0.0300661455 0.121359223
## 22 258 0.0775706554 0.313106796
## 23 172 0.0517137703 0.208737864
## 24 88 0.0264582081 0.106796117
## 25 33 0.0099218280 0.040048544
## 26 32 0.0096211666 0.038834951
## 27 16 0.0048105833 0.019417476
## 28 17 0.0051112447 0.020631068
## 29 16 0.0048105833 0.019417476
## 30 14 0.0042092604 0.016990291
## 31 15 0.0045099218 0.018203883
## 32 12 0.0036079375 0.014563107
## 33 11 0.0033072760 0.013349515
## 34 4 0.0012026458 0.004854369
## 35 7 0.0021046302 0.008495146
## 36 2 0.0006013229 0.002427184
## 37 4 0.0012026458 0.004854369
## 39 1 0.0003006615 0.001213592
## 40 5 0.0015033073 0.006067961
## 44 3 0.0009019844 0.003640777
## 45 2 0.0006013229 0.002427184
## 46 1 0.0003006615 0.001213592
## 8015 1 0.0003006615 0.001213592
## NA 2502 0.7522549609 NA
maledat <- maledat %>%
mutate(evermarried = ifelse(fmarital %in% 1:4, 1, 0))
evermarry <- maledat %>%
filter(evermarried == 1) %>%
select(age = agemarry1, evermarried, age_bach, raceeth, wgt2017_2019, caseid)
head(evermarry)
## # A tibble: 6 × 6
## age evermarried age_bach raceeth wgt2017_2019 caseid
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 18 1 NA White Men 3872. 80724
## 2 27 1 21 White Men 46706. 80734
## 3 NA 1 NA White Men 5582. 80738
## 4 31 1 23 White Men 32543. 80739
## 5 23 1 NA White Men 10445. 80745
## 6 24 1 NA White Men 8080. 80762
nrow(evermarry)
## [1] 1314
evermarry <- evermarry %>%
filter(!is.na(age), age %in% c(16:46))
age_tab <- evermarry %>%
tabyl(age)
nevermarry <- maledat %>%
filter(evermarried==0) %>%
select(age = ager, evermarried, age_bach, raceeth, caseid, wgt2017_2019)
head(nevermarry)
## # A tibble: 6 × 6
## age evermarried age_bach raceeth caseid wgt2017_2019
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 31 0 22 White Men 80717 11404.
## 2 17 0 NA Black Men 80721 12670.
## 3 39 0 22 White Men 80732 6390.
## 4 17 0 NA White Men 80735 3052.
## 5 40 0 NA White Men 80736 9588.
## 6 31 0 23 White Men 80740 11234.
nrow(nevermarry)
## [1] 2012
allmarry <- rbind(evermarry, nevermarry)
head(allmarry, n = 20)
## # A tibble: 20 × 6
## age evermarried age_bach raceeth wgt2017_2019 caseid
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 18 1 NA White Men 3872. 80724
## 2 27 1 21 White Men 46706. 80734
## 3 31 1 23 White Men 32543. 80739
## 4 23 1 NA White Men 10445. 80745
## 5 24 1 NA White Men 8080. 80762
## 6 27 1 NA White Men 68186. 80769
## 7 36 1 NA White Men 11764. 80770
## 8 26 1 NA White Men 6810. 80775
## 9 29 1 NA Black Men 17327. 80777
## 10 27 1 25 White Men 9543. 80780
## 11 28 1 21 White Men 3607. 80794
## 12 21 1 NA White Men 4888. 80825
## 13 31 1 23 White Men 19400. 80837
## 14 18 1 NA White Men 14372. 80840
## 15 20 1 NA White Men 76325. 80850
## 16 27 1 23 White Men 27728. 80856
## 17 19 1 NA White Men 4281. 80865
## 18 22 1 NA White Men 8125. 80868
## 19 28 1 NA White Men 6060. 80875
## 20 24 1 NA White Men 12023. 80884
tail(allmarry, n = 20)
## # A tibble: 20 × 6
## age evermarried age_bach raceeth wgt2017_2019 caseid
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 19 0 NA White Men 10001. 91933
## 2 15 0 NA White Men 3188. 91940
## 3 22 0 NA White Men 13226. 91945
## 4 30 0 NA Black Men 7316. 91957
## 5 24 0 22 White Men 5128. 91962
## 6 28 0 NA White Men 36151. 91963
## 7 25 0 23 White Men 5870. 91972
## 8 15 0 NA White Men 11718. 91974
## 9 27 0 NA Black Men 10512. 91986
## 10 27 0 NA White Men 24137. 91987
## 11 29 0 NA Black Men 10929. 91988
## 12 15 0 NA Black Men 4644. 92007
## 13 45 0 NA Black Men 16157. 92009
## 14 21 0 NA White Men 17928. 92025
## 15 16 0 NA White Men 27677. 92029
## 16 45 0 NA White Men 5719. 92040
## 17 19 0 NA White Men 16452. 92050
## 18 46 0 NA White Men 4895. 92051
## 19 39 0 NA Black Men 7012. 92059
## 20 37 0 22 White Men 3114. 92060
nrow(allmarry)
## [1] 3257
tabyl(allmarry$age)
## allmarry$age n percent
## 15 106 0.032545287
## 16 98 0.030089039
## 17 129 0.039607000
## 18 142 0.043598403
## 19 153 0.046975745
## 20 126 0.038685907
## 21 174 0.053423396
## 22 161 0.049431993
## 23 165 0.050660117
## 24 185 0.056800737
## 25 180 0.055265582
## 26 152 0.046668714
## 27 165 0.050660117
## 28 152 0.046668714
## 29 139 0.042677310
## 30 130 0.039914031
## 31 104 0.031931225
## 32 104 0.031931225
## 33 105 0.032238256
## 34 67 0.020571078
## 35 62 0.019035923
## 36 60 0.018421861
## 37 48 0.014737488
## 38 46 0.014123426
## 39 36 0.011053116
## 40 42 0.012895302
## 41 33 0.010132023
## 42 34 0.010439054
## 43 23 0.007061713
## 44 20 0.006140620
## 45 26 0.007982806
## 46 20 0.006140620
## 47 26 0.007982806
## 48 20 0.006140620
## 49 24 0.007368744
pydata1 <- survSplit(allmarry, cut=c(15:49), end="age", event="evermarried")
head(pydata1[, c("age", "evermarried", "age_bach", "raceeth", "caseid")], n=20)
## age evermarried age_bach raceeth caseid
## 1 15 0 NA White Men 80724
## 2 16 0 NA White Men 80724
## 3 17 0 NA White Men 80724
## 4 18 1 NA White Men 80724
## 5 15 0 21 White Men 80734
## 6 16 0 21 White Men 80734
## 7 17 0 21 White Men 80734
## 8 18 0 21 White Men 80734
## 9 19 0 21 White Men 80734
## 10 20 0 21 White Men 80734
## 11 21 0 21 White Men 80734
## 12 22 0 21 White Men 80734
## 13 23 0 21 White Men 80734
## 14 24 0 21 White Men 80734
## 15 25 0 21 White Men 80734
## 16 26 0 21 White Men 80734
## 17 27 1 21 White Men 80734
## 18 15 0 23 White Men 80739
## 19 16 0 23 White Men 80739
## 20 17 0 23 White Men 80739
tail(pydata1[, c("age", "evermarried", "age_bach", "raceeth", "caseid")], n=20)
## age evermarried age_bach raceeth caseid
## 41573 18 0 22 White Men 92060
## 41574 19 0 22 White Men 92060
## 41575 20 0 22 White Men 92060
## 41576 21 0 22 White Men 92060
## 41577 22 0 22 White Men 92060
## 41578 23 0 22 White Men 92060
## 41579 24 0 22 White Men 92060
## 41580 25 0 22 White Men 92060
## 41581 26 0 22 White Men 92060
## 41582 27 0 22 White Men 92060
## 41583 28 0 22 White Men 92060
## 41584 29 0 22 White Men 92060
## 41585 30 0 22 White Men 92060
## 41586 31 0 22 White Men 92060
## 41587 32 0 22 White Men 92060
## 41588 33 0 22 White Men 92060
## 41589 34 0 22 White Men 92060
## 41590 35 0 22 White Men 92060
## 41591 36 0 22 White Men 92060
## 41592 37 0 22 White Men 92060
# Define the time intervals for splitting
cut_times <- c(15, 20, 25, 30, 35, 40, 45, 50)
# Split the data into intervals
pydata1 <- survSplit(data = pydata1, cut = cut_times, end = "age", event = "evermarried")
# Inspect the resulting data
head(pydata1)
## age_bach raceeth wgt2017_2019 caseid tstart age evermarried
## 1 NA White Men 3872.331 80724 0 15 0
## 2 NA White Men 3872.331 80724 15 16 0
## 3 NA White Men 3872.331 80724 16 17 0
## 4 NA White Men 3872.331 80724 17 18 1
## 5 21 White Men 46706.359 80734 0 15 0
## 6 21 White Men 46706.359 80734 15 16 0
table(pydata1$age,pydata1$evermarried)
##
## 0 1
## 15 3257 0
## 16 3150 1
## 17 3046 7
## 18 2897 27
## 19 2744 38
## 20 2573 56
## 21 2410 93
## 22 2245 84
## 23 2080 88
## 24 1903 100
## 25 1707 111
## 26 1553 85
## 27 1392 94
## 28 1253 68
## 29 1090 79
## 30 971 59
## 31 855 45
## 32 754 42
## 33 652 40
## 34 559 28
## 35 499 21
## 36 437 21
## 37 385 13
## 38 338 12
## 39 299 5
## 40 258 10
## 41 220 6
## 42 190 3
## 43 156 3
## 44 135 1
## 45 112 4
## 46 89 1
## 47 70 0
## 48 44 0
## 49 24 0
head(pydata1[, c("age", "evermarried", "age_bach", "raceeth")], n=50)
## age evermarried age_bach raceeth
## 1 15 0 NA White Men
## 2 16 0 NA White Men
## 3 17 0 NA White Men
## 4 18 1 NA White Men
## 5 15 0 21 White Men
## 6 16 0 21 White Men
## 7 17 0 21 White Men
## 8 18 0 21 White Men
## 9 19 0 21 White Men
## 10 20 0 21 White Men
## 11 21 0 21 White Men
## 12 22 0 21 White Men
## 13 23 0 21 White Men
## 14 24 0 21 White Men
## 15 25 0 21 White Men
## 16 26 0 21 White Men
## 17 27 1 21 White Men
## 18 15 0 23 White Men
## 19 16 0 23 White Men
## 20 17 0 23 White Men
## 21 18 0 23 White Men
## 22 19 0 23 White Men
## 23 20 0 23 White Men
## 24 21 0 23 White Men
## 25 22 0 23 White Men
## 26 23 0 23 White Men
## 27 24 0 23 White Men
## 28 25 0 23 White Men
## 29 26 0 23 White Men
## 30 27 0 23 White Men
## 31 28 0 23 White Men
## 32 29 0 23 White Men
## 33 30 0 23 White Men
## 34 31 1 23 White Men
## 35 15 0 NA White Men
## 36 16 0 NA White Men
## 37 17 0 NA White Men
## 38 18 0 NA White Men
## 39 19 0 NA White Men
## 40 20 0 NA White Men
## 41 21 0 NA White Men
## 42 22 0 NA White Men
## 43 23 1 NA White Men
## 44 15 0 NA White Men
## 45 16 0 NA White Men
## 46 17 0 NA White Men
## 47 18 0 NA White Men
## 48 19 0 NA White Men
## 49 20 0 NA White Men
## 50 21 0 NA White Men
tail(pydata1[, c("age", "evermarried", "age_bach", "raceeth")], n=50)
## age evermarried age_bach raceeth
## 41543 45 0 NA White Men
## 41544 46 0 NA White Men
## 41545 15 0 NA Black Men
## 41546 16 0 NA Black Men
## 41547 17 0 NA Black Men
## 41548 18 0 NA Black Men
## 41549 19 0 NA Black Men
## 41550 20 0 NA Black Men
## 41551 21 0 NA Black Men
## 41552 22 0 NA Black Men
## 41553 23 0 NA Black Men
## 41554 24 0 NA Black Men
## 41555 25 0 NA Black Men
## 41556 26 0 NA Black Men
## 41557 27 0 NA Black Men
## 41558 28 0 NA Black Men
## 41559 29 0 NA Black Men
## 41560 30 0 NA Black Men
## 41561 31 0 NA Black Men
## 41562 32 0 NA Black Men
## 41563 33 0 NA Black Men
## 41564 34 0 NA Black Men
## 41565 35 0 NA Black Men
## 41566 36 0 NA Black Men
## 41567 37 0 NA Black Men
## 41568 38 0 NA Black Men
## 41569 39 0 NA Black Men
## 41570 15 0 22 White Men
## 41571 16 0 22 White Men
## 41572 17 0 22 White Men
## 41573 18 0 22 White Men
## 41574 19 0 22 White Men
## 41575 20 0 22 White Men
## 41576 21 0 22 White Men
## 41577 22 0 22 White Men
## 41578 23 0 22 White Men
## 41579 24 0 22 White Men
## 41580 25 0 22 White Men
## 41581 26 0 22 White Men
## 41582 27 0 22 White Men
## 41583 28 0 22 White Men
## 41584 29 0 22 White Men
## 41585 30 0 22 White Men
## 41586 31 0 22 White Men
## 41587 32 0 22 White Men
## 41588 33 0 22 White Men
## 41589 34 0 22 White Men
## 41590 35 0 22 White Men
## 41591 36 0 22 White Men
## 41592 37 0 22 White Men
table(pydata1$age,pydata1$evermarried)
##
## 0 1
## 15 3257 0
## 16 3150 1
## 17 3046 7
## 18 2897 27
## 19 2744 38
## 20 2573 56
## 21 2410 93
## 22 2245 84
## 23 2080 88
## 24 1903 100
## 25 1707 111
## 26 1553 85
## 27 1392 94
## 28 1253 68
## 29 1090 79
## 30 971 59
## 31 855 45
## 32 754 42
## 33 652 40
## 34 559 28
## 35 499 21
## 36 437 21
## 37 385 13
## 38 338 12
## 39 299 5
## 40 258 10
## 41 220 6
## 42 190 3
## 43 156 3
## 44 135 1
## 45 112 4
## 46 89 1
## 47 70 0
## 48 44 0
## 49 24 0
# View the first 50 rows of selected columns
head(pydata1[, c("age", "evermarried", "caseid", "age_bach","raceeth")], n = 50)
## age evermarried caseid age_bach raceeth
## 1 15 0 80724 NA White Men
## 2 16 0 80724 NA White Men
## 3 17 0 80724 NA White Men
## 4 18 1 80724 NA White Men
## 5 15 0 80734 21 White Men
## 6 16 0 80734 21 White Men
## 7 17 0 80734 21 White Men
## 8 18 0 80734 21 White Men
## 9 19 0 80734 21 White Men
## 10 20 0 80734 21 White Men
## 11 21 0 80734 21 White Men
## 12 22 0 80734 21 White Men
## 13 23 0 80734 21 White Men
## 14 24 0 80734 21 White Men
## 15 25 0 80734 21 White Men
## 16 26 0 80734 21 White Men
## 17 27 1 80734 21 White Men
## 18 15 0 80739 23 White Men
## 19 16 0 80739 23 White Men
## 20 17 0 80739 23 White Men
## 21 18 0 80739 23 White Men
## 22 19 0 80739 23 White Men
## 23 20 0 80739 23 White Men
## 24 21 0 80739 23 White Men
## 25 22 0 80739 23 White Men
## 26 23 0 80739 23 White Men
## 27 24 0 80739 23 White Men
## 28 25 0 80739 23 White Men
## 29 26 0 80739 23 White Men
## 30 27 0 80739 23 White Men
## 31 28 0 80739 23 White Men
## 32 29 0 80739 23 White Men
## 33 30 0 80739 23 White Men
## 34 31 1 80739 23 White Men
## 35 15 0 80745 NA White Men
## 36 16 0 80745 NA White Men
## 37 17 0 80745 NA White Men
## 38 18 0 80745 NA White Men
## 39 19 0 80745 NA White Men
## 40 20 0 80745 NA White Men
## 41 21 0 80745 NA White Men
## 42 22 0 80745 NA White Men
## 43 23 1 80745 NA White Men
## 44 15 0 80762 NA White Men
## 45 16 0 80762 NA White Men
## 46 17 0 80762 NA White Men
## 47 18 0 80762 NA White Men
## 48 19 0 80762 NA White Men
## 49 20 0 80762 NA White Men
## 50 21 0 80762 NA White Men
# View the last 50 rows of selected columns
tail(pydata1[, c("age", "evermarried", "caseid", "age_bach","raceeth")], n = 50)
## age evermarried caseid age_bach raceeth
## 41543 45 0 92051 NA White Men
## 41544 46 0 92051 NA White Men
## 41545 15 0 92059 NA Black Men
## 41546 16 0 92059 NA Black Men
## 41547 17 0 92059 NA Black Men
## 41548 18 0 92059 NA Black Men
## 41549 19 0 92059 NA Black Men
## 41550 20 0 92059 NA Black Men
## 41551 21 0 92059 NA Black Men
## 41552 22 0 92059 NA Black Men
## 41553 23 0 92059 NA Black Men
## 41554 24 0 92059 NA Black Men
## 41555 25 0 92059 NA Black Men
## 41556 26 0 92059 NA Black Men
## 41557 27 0 92059 NA Black Men
## 41558 28 0 92059 NA Black Men
## 41559 29 0 92059 NA Black Men
## 41560 30 0 92059 NA Black Men
## 41561 31 0 92059 NA Black Men
## 41562 32 0 92059 NA Black Men
## 41563 33 0 92059 NA Black Men
## 41564 34 0 92059 NA Black Men
## 41565 35 0 92059 NA Black Men
## 41566 36 0 92059 NA Black Men
## 41567 37 0 92059 NA Black Men
## 41568 38 0 92059 NA Black Men
## 41569 39 0 92059 NA Black Men
## 41570 15 0 92060 22 White Men
## 41571 16 0 92060 22 White Men
## 41572 17 0 92060 22 White Men
## 41573 18 0 92060 22 White Men
## 41574 19 0 92060 22 White Men
## 41575 20 0 92060 22 White Men
## 41576 21 0 92060 22 White Men
## 41577 22 0 92060 22 White Men
## 41578 23 0 92060 22 White Men
## 41579 24 0 92060 22 White Men
## 41580 25 0 92060 22 White Men
## 41581 26 0 92060 22 White Men
## 41582 27 0 92060 22 White Men
## 41583 28 0 92060 22 White Men
## 41584 29 0 92060 22 White Men
## 41585 30 0 92060 22 White Men
## 41586 31 0 92060 22 White Men
## 41587 32 0 92060 22 White Men
## 41588 33 0 92060 22 White Men
## 41589 34 0 92060 22 White Men
## 41590 35 0 92060 22 White Men
## 41591 36 0 92060 22 White Men
## 41592 37 0 92060 22 White Men
pydata1<- pydata1%>%
mutate(ba_obtained = case_when(age > age_bach ~ 1,
TRUE ~ 0
)
)
head(pydata1[, c("age", "evermarried", "caseid", "age_bach", "raceeth", "ba_obtained")], n=50)
## age evermarried caseid age_bach raceeth ba_obtained
## 1 15 0 80724 NA White Men 0
## 2 16 0 80724 NA White Men 0
## 3 17 0 80724 NA White Men 0
## 4 18 1 80724 NA White Men 0
## 5 15 0 80734 21 White Men 0
## 6 16 0 80734 21 White Men 0
## 7 17 0 80734 21 White Men 0
## 8 18 0 80734 21 White Men 0
## 9 19 0 80734 21 White Men 0
## 10 20 0 80734 21 White Men 0
## 11 21 0 80734 21 White Men 0
## 12 22 0 80734 21 White Men 1
## 13 23 0 80734 21 White Men 1
## 14 24 0 80734 21 White Men 1
## 15 25 0 80734 21 White Men 1
## 16 26 0 80734 21 White Men 1
## 17 27 1 80734 21 White Men 1
## 18 15 0 80739 23 White Men 0
## 19 16 0 80739 23 White Men 0
## 20 17 0 80739 23 White Men 0
## 21 18 0 80739 23 White Men 0
## 22 19 0 80739 23 White Men 0
## 23 20 0 80739 23 White Men 0
## 24 21 0 80739 23 White Men 0
## 25 22 0 80739 23 White Men 0
## 26 23 0 80739 23 White Men 0
## 27 24 0 80739 23 White Men 1
## 28 25 0 80739 23 White Men 1
## 29 26 0 80739 23 White Men 1
## 30 27 0 80739 23 White Men 1
## 31 28 0 80739 23 White Men 1
## 32 29 0 80739 23 White Men 1
## 33 30 0 80739 23 White Men 1
## 34 31 1 80739 23 White Men 1
## 35 15 0 80745 NA White Men 0
## 36 16 0 80745 NA White Men 0
## 37 17 0 80745 NA White Men 0
## 38 18 0 80745 NA White Men 0
## 39 19 0 80745 NA White Men 0
## 40 20 0 80745 NA White Men 0
## 41 21 0 80745 NA White Men 0
## 42 22 0 80745 NA White Men 0
## 43 23 1 80745 NA White Men 0
## 44 15 0 80762 NA White Men 0
## 45 16 0 80762 NA White Men 0
## 46 17 0 80762 NA White Men 0
## 47 18 0 80762 NA White Men 0
## 48 19 0 80762 NA White Men 0
## 49 20 0 80762 NA White Men 0
## 50 21 0 80762 NA White Men 0
tail(pydata1[, c("age", "evermarried", "caseid", "age_bach", "raceeth", "ba_obtained")], n=50)
## age evermarried caseid age_bach raceeth ba_obtained
## 41543 45 0 92051 NA White Men 0
## 41544 46 0 92051 NA White Men 0
## 41545 15 0 92059 NA Black Men 0
## 41546 16 0 92059 NA Black Men 0
## 41547 17 0 92059 NA Black Men 0
## 41548 18 0 92059 NA Black Men 0
## 41549 19 0 92059 NA Black Men 0
## 41550 20 0 92059 NA Black Men 0
## 41551 21 0 92059 NA Black Men 0
## 41552 22 0 92059 NA Black Men 0
## 41553 23 0 92059 NA Black Men 0
## 41554 24 0 92059 NA Black Men 0
## 41555 25 0 92059 NA Black Men 0
## 41556 26 0 92059 NA Black Men 0
## 41557 27 0 92059 NA Black Men 0
## 41558 28 0 92059 NA Black Men 0
## 41559 29 0 92059 NA Black Men 0
## 41560 30 0 92059 NA Black Men 0
## 41561 31 0 92059 NA Black Men 0
## 41562 32 0 92059 NA Black Men 0
## 41563 33 0 92059 NA Black Men 0
## 41564 34 0 92059 NA Black Men 0
## 41565 35 0 92059 NA Black Men 0
## 41566 36 0 92059 NA Black Men 0
## 41567 37 0 92059 NA Black Men 0
## 41568 38 0 92059 NA Black Men 0
## 41569 39 0 92059 NA Black Men 0
## 41570 15 0 92060 22 White Men 0
## 41571 16 0 92060 22 White Men 0
## 41572 17 0 92060 22 White Men 0
## 41573 18 0 92060 22 White Men 0
## 41574 19 0 92060 22 White Men 0
## 41575 20 0 92060 22 White Men 0
## 41576 21 0 92060 22 White Men 0
## 41577 22 0 92060 22 White Men 0
## 41578 23 0 92060 22 White Men 1
## 41579 24 0 92060 22 White Men 1
## 41580 25 0 92060 22 White Men 1
## 41581 26 0 92060 22 White Men 1
## 41582 27 0 92060 22 White Men 1
## 41583 28 0 92060 22 White Men 1
## 41584 29 0 92060 22 White Men 1
## 41585 30 0 92060 22 White Men 1
## 41586 31 0 92060 22 White Men 1
## 41587 32 0 92060 22 White Men 1
## 41588 33 0 92060 22 White Men 1
## 41589 34 0 92060 22 White Men 1
## 41590 35 0 92060 22 White Men 1
## 41591 36 0 92060 22 White Men 1
## 41592 37 0 92060 22 White Men 1
ageprobs <- pydata1%>%
group_by(age, ba_obtained) %>%
dplyr::summarise(q=mean(evermarried, na.rm = TRUE), wgt2017_2019, n=n())
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'age', 'ba_obtained'. You can override
## using the `.groups` argument.
head(ageprobs)
## # A tibble: 6 × 5
## # Groups: age, ba_obtained [1]
## age ba_obtained q wgt2017_2019 n
## <dbl> <dbl> <dbl> <dbl> <int>
## 1 15 0 0 3872. 3257
## 2 15 0 0 46706. 3257
## 3 15 0 0 32543. 3257
## 4 15 0 0 10445. 3257
## 5 15 0 0 8080. 3257
## 6 15 0 0 68186. 3257
# Create the plot
# Calculate meq
ageprobs$numq <- ageprobs$q * (1 - ageprobs$q)
ageprobs$seq <- sqrt(ageprobs$numq / ageprobs$n)
ageprobs$meq <- 1.96 * ageprobs$seq
# Create the plot
library(ggplot2)
ggplot(data = ageprobs, aes(x = age, y = q, ymin = q - meq, ymax = q + meq, fill = as.factor(ba_obtained), color = as.factor(ba_obtained), group = as.factor(ba_obtained))) +
geom_line() +
geom_ribbon(alpha = 0.3, aes(color = NULL)) +
labs(
title = "First Marriage Rates by Age and Attainment of BA Prior to Marriage",
subtitle = "US-Born Men Ages 15 to 49",
caption = "Source: NSFG, 2017-2019",
color = "Degree Obtained Before Marriage",
fill = "Degree Obtained Before Marriage"
) +
xlab("Age at Risk") +
ylab("First Marriage Rate") +
ylim(0, 0.15)
## Warning: Removed 3 rows containing missing values (`geom_line()`).

raceeducprobs <- pydata1%>%
group_by(age, raceeth, ba_obtained) %>%
dplyr::summarize(p=weighted.mean(evermarried, wgt2017_2019, na.rm = TRUE), n=n()) # add weight here?
## `summarise()` has grouped output by 'age', 'raceeth'. You can override using
## the `.groups` argument.
raceeducprobs
## # A tibble: 129 × 5
## # Groups: age, raceeth [70]
## age raceeth ba_obtained p n
## <dbl> <chr> <dbl> <dbl> <int>
## 1 15 Black Men 0 0 877
## 2 15 White Men 0 0 2380
## 3 16 Black Men 0 0.000920 847
## 4 16 White Men 0 0 2304
## 5 17 Black Men 0 0.00157 816
## 6 17 White Men 0 0.000931 2237
## 7 18 Black Men 0 0.00587 767
## 8 18 White Men 0 0.0150 2157
## 9 19 Black Men 0 0.0126 721
## 10 19 White Men 0 0.0157 2061
## # ℹ 119 more rows
raceeducprobs$num <- raceeducprobs$p * (1 - raceeducprobs$p)
raceeducprobs$sep <- sqrt(raceeducprobs$num / raceeducprobs$n)
raceeducprobs$me <- 2 * raceeducprobs$sep
ggplot(data = raceeducprobs, aes(x = age, y = p, ymin = p - me, ymax = p + me, fill = as.factor(ba_obtained), color = as.factor(ba_obtained), group = as.factor(ba_obtained))) +
geom_line() +
geom_ribbon(alpha = 0.3, aes(color = NULL)) +
labs(
title = "Adjusted First Marriage Rates by Age and Education, Grouped by Race/Ethnicity",
subtitle = "US-Born Men Ages 15 to 49",
caption = "Source: NSFG, 2017-2019",
color = "Degree Obtained Before Marriage",
fill = "Degree Obtained Before Marriage"
) +
xlab("Age at Survey") +
ylab("Adjusted First Marriage Rate") +
facet_grid(~ raceeth)

pydata1<- pydata1%>%
mutate(agegrp = case_when(
age %in% c(15:19) ~ '15-19',
age %in% c(20:24) ~ '20-24',
age %in% c(25:29) ~ '25-29',
age %in% c(30:34) ~ '30-34',
age %in% c(35:39) ~ '35-39',
age %in% c(40:44) ~ '40-44',
age %in% c(45:49) ~ '45-49'
)) %>%
mutate(newweight = wgt2017_2019 / mean(wgt2017_2019))
pywhite1<- pydata1%>%
filter(raceeth == "White Men")
pyblack1<- pydata1%>%
filter(raceeth == "Black Men")
pywhite1<- pydata1%>%
filter(raceeth=="White Men")
head(pywhite1)
## age_bach raceeth wgt2017_2019 caseid tstart age evermarried ba_obtained
## 1 NA White Men 3872.331 80724 0 15 0 0
## 2 NA White Men 3872.331 80724 15 16 0 0
## 3 NA White Men 3872.331 80724 16 17 0 0
## 4 NA White Men 3872.331 80724 17 18 1 0
## 5 21 White Men 46706.359 80734 0 15 0 0
## 6 21 White Men 46706.359 80734 15 16 0 0
## agegrp newweight
## 1 15-19 0.2686633
## 2 15-19 0.2686633
## 3 15-19 0.2686633
## 4 15-19 0.2686633
## 5 15-19 3.2404984
## 6 15-19 3.2404984
nrow(pywhite1)
## [1] 30310
pyblack1<- pydata1%>%
filter(raceeth=="Black Men")
head(pyblack1)
## age_bach raceeth wgt2017_2019 caseid tstart age evermarried ba_obtained
## 1 NA Black Men 17327.5 80777 0 15 0 0
## 2 NA Black Men 17327.5 80777 15 16 0 0
## 3 NA Black Men 17327.5 80777 16 17 0 0
## 4 NA Black Men 17327.5 80777 17 18 0 0
## 5 NA Black Men 17327.5 80777 18 19 0 0
## 6 NA Black Men 17327.5 80777 19 20 0 0
## agegrp newweight
## 1 15-19 1.202186
## 2 15-19 1.202186
## 3 15-19 1.202186
## 4 15-19 1.202186
## 5 15-19 1.202186
## 6 20-24 1.202186
nrow(pyblack1)
## [1] 11282
pywhite1<- pywhite1%>%
mutate(ba_obtained = case_when(age > age_bach ~ 1, TRUE ~ 0))
modelwhite1<- glm(evermarried ~ agegrp + ba_obtained, family = "binomial", weights = newweight, data = pywhite1)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(modelwhite1)
##
## Call:
## glm(formula = evermarried ~ agegrp + ba_obtained, family = "binomial",
## data = pywhite1, weights = newweight)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.10156 0.11292 -45.177 < 2e-16 ***
## agegrp20-24 1.99871 0.12299 16.251 < 2e-16 ***
## agegrp25-29 2.51341 0.12540 20.044 < 2e-16 ***
## agegrp30-34 2.42901 0.13562 17.911 < 2e-16 ***
## agegrp35-39 2.10153 0.16743 12.551 < 2e-16 ***
## agegrp40-44 2.04126 0.22461 9.088 < 2e-16 ***
## agegrp45-49 0.91960 0.58803 1.564 0.118
## ba_obtained 0.50947 0.06397 7.964 1.67e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 11566 on 30309 degrees of freedom
## Residual deviance: 10554 on 30302 degrees of freedom
## AIC: 10728
##
## Number of Fisher Scoring iterations: 7
pyblack <- pyblack1 # Use the filtered dataset pyblack1
# Fit the logistic regression model
modelblack1 <- glm(evermarried ~ agegrp + ba_obtained, family = "binomial", weights = newweight, data = pyblack)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# Summary of the model
summary(modelblack1)
##
## Call:
## glm(formula = evermarried ~ agegrp + ba_obtained, family = "binomial",
## data = pyblack, weights = newweight)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.52834 0.30422 -18.172 < 2e-16 ***
## agegrp20-24 2.05299 0.32956 6.230 4.68e-10 ***
## agegrp25-29 2.40216 0.33422 7.187 6.60e-13 ***
## agegrp30-34 2.56740 0.34920 7.352 1.95e-13 ***
## agegrp35-39 2.32108 0.39962 5.808 6.31e-09 ***
## agegrp40-44 1.18957 0.67758 1.756 0.07915 .
## agegrp45-49 -0.08247 1.77657 -0.046 0.96298
## ba_obtained 0.65240 0.19857 3.285 0.00102 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1829.7 on 11281 degrees of freedom
## Residual deviance: 1690.8 on 11274 degrees of freedom
## AIC: 1628.1
##
## Number of Fisher Scoring iterations: 8
whitedata1<- expand.grid(agegrp=c("15-19","20-24","25-29","30-34","35-39","40-44","45-49"), ba_obtained=0:1)
whitedata1$phat <- predict(modelwhite1, whitedata1, type="response")
whitedata1
## agegrp ba_obtained phat
## 1 15-19 0 0.006050412
## 2 20-24 0 0.042989983
## 3 25-29 0 0.069905033
## 4 30-34 0 0.064612617
## 5 35-39 0 0.047424390
## 6 40-44 0 0.044774886
## 7 45-49 0 0.015038984
## 8 15-19 1 0.010030004
## 9 20-24 1 0.069565676
## 10 25-29 1 0.111186097
## 11 30-34 1 0.103114837
## 12 35-39 1 0.076522168
## 13 40-44 1 0.072370535
## 14 45-49 1 0.024783297
blackdata1<- expand.grid(agegrp=c("15-19","20-24","25-29","30-34","35-39","40-44","45-49"), ba_obtained=0:1)
blackdata1$phat <- predict(modelblack1, blackdata1,type="response")
blackdata1
## agegrp ba_obtained phat
## 1 15-19 0 0.003956844
## 2 20-24 0 0.030021684
## 3 25-29 0 0.042040170
## 4 30-34 0 0.049221906
## 5 35-39 0 0.038893279
## 6 40-44 0 0.012884381
## 7 45-49 0 0.003644772
## 8 15-19 1 0.007570159
## 9 20-24 1 0.056096407
## 10 25-29 1 0.077716953
## 11 30-34 1 0.090418115
## 12 35-39 1 0.072100498
## 13 40-44 1 0.024450032
## 14 45-49 1 0.006975104
# Calculate the probability of marrying before 50 for each group
probability_white_with_ba <- sum(whitedata1$phat[whitedata1$ba_obtained == 1 & whitedata1$agegrp == "45-49"])
probability_white_without_ba <- sum(whitedata1$phat[whitedata1$ba_obtained == 0 & whitedata1$agegrp == "45-49"])
probability_black_with_ba <- sum(blackdata1$phat[blackdata1$ba_obtained == 1 & blackdata1$agegrp == "45-49"])
probability_black_without_ba <- sum(blackdata1$phat[blackdata1$ba_obtained == 0 & blackdata1$agegrp == "45-49"])
# Sum up the probabilities for all four sets
probability_all_groups <- probability_white_with_ba + probability_white_without_ba + probability_black_with_ba + probability_black_without_ba
# Print the overall probability
cat("Overall probability of marrying before 50 based on all four sets:", probability_all_groups, "\n")
## Overall probability of marrying before 50 based on all four sets: 0.05044216
# Calculate the overall probability of marrying before 50 for White and Black men
probability_white <- probability_white_with_ba + probability_white_without_ba
probability_black <- probability_black_with_ba + probability_black_without_ba
# Calculate the raw difference between White and Black men
raw_difference <- probability_white - probability_black
# Calculate the difference due to educational attainment
difference_education <- (probability_white_with_ba - probability_black_with_ba) + (probability_white_without_ba - probability_black_without_ba)
# Calculate the difference due to model coefficients
difference_coefficients <- raw_difference - difference_education
# Print the results
cat("Raw difference in probability:", raw_difference, "\n")
## Raw difference in probability: 0.0292024
cat("Difference due to educational attainment:", difference_education, "\n")
## Difference due to educational attainment: 0.0292024
cat("Difference due to model coefficients:", difference_coefficients, "\n")
## Difference due to model coefficients: 3.469447e-18
whitedata1$ba_obtained <- car::Recode(whitedata1$ba_obtained, recodes="1 = 'BA_Degree'; 0 = 'NO COLLEGE DEGREE'; else=NA",
as.factor=T)
# Rest of your code
ggplot(whitedata1, aes(x = agegrp, y = phat, group = ba_obtained, fill = ba_obtained, color = ba_obtained)) +
geom_point() +
geom_line() +
labs(x = "Age", y = "Predicted First Marriage Probability", title = "Predicted First Marriage Probabilities by Education for White Men")

blackdata1$ba_obtained <- car::Recode(blackdata1$ba_obtained, recodes="1 = 'BA_Degree'; 0 = 'NO COLLEGE DEGREE'; else=NA",
as.factor=T)
ggplot(blackdata1, aes(x = agegrp, y = phat, group = ba_obtained, fill = ba_obtained, color = ba_obtained)) +
geom_point() +
geom_line() +
labs(x = "Age", y = "Predicted First Marriage Probability", title = "Predicted First Marriage Probabilities by Education for Black Men")

newwhitedata <- pywhite1 %>%
group_by(agegrp) %>%
dplyr::summarize(ba_obtained=weighted.mean(ba_obtained,wgt2017_2019,na.rm = TRUE),n=n())
newwhitedata$phat <- predict(modelwhite1, newwhitedata, type="response")
as.data.frame(newwhitedata)
## agegrp ba_obtained n phat
## 1 15-19 0.00000000 11139 0.006050412
## 2 20-24 0.09415799 8611 0.045007413
## 3 25-29 0.35065029 5423 0.082450799
## 4 30-34 0.35652896 2891 0.076497780
## 5 35-39 0.33126816 1391 0.055657988
## 6 40-44 0.30316262 646 0.051865210
## 7 45-49 0.22105545 209 0.016801614
newblackdata <- pyblack1 %>%
group_by(agegrp) %>%
dplyr::summarize(ba_obtained=weighted.mean(ba_obtained,wgt2017_2019,na.rm = TRUE),n=n())
newblackdata$phat <- predict(modelblack1, newblackdata, type="response")
as.data.frame(newblackdata)
## agegrp ba_obtained n phat
## 1 15-19 0.00000000 4028 0.003956844
## 2 20-24 0.03699129 3021 0.030732479
## 3 25-29 0.15704990 2009 0.046365684
## 4 30-34 0.16452680 1114 0.054495301
## 5 35-39 0.13474843 639 0.042315947
## 6 40-44 0.15355195 336 0.014222663
## 7 45-49 0.14881902 135 0.004014899
whitemodel_blackdata <- pyblack1 %>%
group_by(agegrp) %>%
dplyr::summarize(ba_obtained=weighted.mean(ba_obtained,wgt2017_2019,na.rm = TRUE),n=n())
whitemodel_blackdata$phat <- predict(modelwhite1,whitemodel_blackdata, type="response")
as.data.frame(whitemodel_blackdata)
## agegrp ba_obtained n phat
## 1 15-19 0.00000000 4028 0.006050412
## 2 20-24 0.03699129 3021 0.043772048
## 3 25-29 0.15704990 2009 0.075289685
## 4 30-34 0.16452680 1114 0.069867260
## 5 35-39 0.13474843 639 0.050623814
## 6 40-44 0.15355195 336 0.048242493
## 7 45-49 0.14881902 135 0.016204358
blackmodel_whitedata <- pywhite1 %>%
group_by(agegrp) %>%
dplyr::summarize(ba_obtained=weighted.mean(ba_obtained,wgt2017_2019,na.rm = TRUE),n=n())
blackmodel_whitedata$phat <- predict(modelblack1, blackmodel_whitedata, type="response")
as.data.frame(blackmodel_whitedata)
## agegrp ba_obtained n phat
## 1 15-19 0.00000000 11139 0.003956844
## 2 20-24 0.09415799 8611 0.031863098
## 3 25-29 0.35065029 5423 0.052281473
## 4 30-34 0.35652896 2891 0.061321537
## 5 35-39 0.33126816 1391 0.047827558
## 6 40-44 0.30316262 646 0.015658059
## 7 45-49 0.22105545 209 0.004207823
data1$agegrp <- car::Recode(data1$ageevent, recodes = "15:19 = '15-19'; 20:24 = '20-24'; 25:29 = '25-29'; 30:34 = '30-34'; 35:39 = '35-39'; 40:44 = '40-44'; 45:49 = '45-49'; else=NA", as.factor = T)
fprobs <- pydata1 %>%
dplyr::group_by(agegrp) %>%
dplyr::summarize(p = weighted.mean(evermarried, wgt2017_2019, na.rm = TRUE), n = n())
library(ggplot2) # Load the ggplot2 library if not already loaded
ggplot(fprobs, aes(x = agegrp, y = p, group = 1)) +
geom_point() +
geom_line() +
labs(x = "Age", y = "Adjusted First Marriage Rate", title = "Adjusted First Marriage Rate by Age")

data1 <- data1 %>%
mutate(nh_wh_marr = ifelse(evmarried == 1 & hisprace2 == 2,1,0),
nh_bl_marr = ifelse(evmarried == 1 & hisprace2 == 3,1,0),
marr_by_race = ifelse(evmarried == 1 & hisprace2 == 2,1,
ifelse(evmarried == 1 & hisprace2 == 3,2,0))) #1 is for NH whites marrying, 2 is for NH Blacks marrying, 0 is for no marriages whatsoever regardless of race)
nh_wh_probs<- data1 %>%
group_by(agegrp) %>%
dplyr::summarize(p=weighted.mean(nh_wh_marr,wgt2017_2019,na.rm = TRUE),n=n())
ggplot(nh_wh_probs, aes(x = factor(agegrp), y = p, group = 1)) +
geom_point() +
geom_line() +
labs(x = "Age", y = "Adjusted First Marriage Rate", title = "Adjusted First Marriage Rate (NH White) by Age")

nh_bl_probs<- data1 %>%
group_by(agegrp) %>%
dplyr::summarize(p=weighted.mean(nh_bl_marr,wgt2017_2019,na.rm = TRUE),n=n())
ggplot(nh_bl_probs, aes(x = agegrp, y = p, group =1 )) +
geom_point() +
geom_line() +
labs(x = "Age", y = "Adjusted First Marriage Rate", title = "Adjusted First Marriage Rate (NH Black) by Age")

library(nnet)
compriskmodel <-multinom(formula = marr_by_race ~ agegrp, data = data1)
## # weights: 24 (14 variable)
## initial value 3578.180224
## iter 10 value 2532.030674
## iter 20 value 2486.812645
## final value 2486.783619
## converged
summary(compriskmodel)
## Call:
## multinom(formula = marr_by_race ~ agegrp, data = data1)
##
## Coefficients:
## (Intercept) agegrp20-24 agegrp25-29 agegrp30-34 agegrp35-39 agegrp40-44
## 1 -2.311634 2.209127 2.356211 1.780331 1.143977 0.39628878
## 2 -3.428630 1.696617 1.816311 1.427148 1.008216 -0.04545012
## agegrp45-49
## 1 -1.011440
## 2 -1.280696
##
## Std. Errors:
## (Intercept) agegrp20-24 agegrp25-29 agegrp30-34 agegrp35-39 agegrp40-44
## 1 0.1413633 0.1593380 0.1598667 0.1706138 0.2083191 0.2834871
## 2 0.2394978 0.2727925 0.2729364 0.2928099 0.3541421 0.5614702
## agegrp45-49
## 1 0.5281609
## 2 1.0325556
##
## Residual Deviance: 4973.567
## AIC: 5001.567
compriskmodel <-multinom(formula = marr_by_race ~ factor(evmarried), data = data1)
## # weights: 9 (4 variable)
## initial value 3578.180224
## iter 10 value 591.840457
## final value 577.578838
## converged
summary(compriskmodel)
## Call:
## multinom(formula = marr_by_race ~ factor(evmarried), data = data1)
##
## Coefficients:
## (Intercept) factor(evmarried)1
## 1 -12.70518 24.57202
## 2 -11.03884 21.35572
##
## Std. Errors:
## (Intercept) factor(evmarried)1
## 1 12.79633 17.39152
## 2 5.56221 13.02539
##
## Residual Deviance: 1155.158
## AIC: 1163.158
data1$new_weight <- data1$wgt2017_2019/mean(data1$wgt2017_2019)
data1$new_weight = data1$wgt2017_2019/mean(data1$wgt2017_2019) #creating new weight
anymodel <- glm(evmarried ~ agegrp, new_weight, family = "binomial", data = data1)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(anymodel)
##
## Call:
## glm(formula = evmarried ~ agegrp, family = "binomial", data = data1,
## weights = new_weight)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.6079 0.1171 -13.731 < 2e-16 ***
## agegrp20-24 1.8421 0.1347 13.671 < 2e-16 ***
## agegrp25-29 2.1603 0.1363 15.846 < 2e-16 ***
## agegrp30-34 1.6993 0.1474 11.527 < 2e-16 ***
## agegrp35-39 1.0591 0.1783 5.940 2.85e-09 ***
## agegrp40-44 0.6287 0.2421 2.597 0.00941 **
## agegrp45-49 -1.5946 0.5728 -2.784 0.00537 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4505.8 on 3256 degrees of freedom
## Residual deviance: 4051.8 on 3250 degrees of freedom
## AIC: 3995.5
##
## Number of Fisher Scoring iterations: 5
anymodel <- glm(evmarried ~ factor(ageevent), weights=new_weight, family = "binomial", data = data1)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(anymodel)
##
## Call:
## glm(formula = evmarried ~ factor(ageevent), family = "binomial",
## data = data1, weights = new_weight)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -16.63186 261.51882 -0.064 0.949
## factor(ageevent)16 11.76824 261.52258 0.045 0.964
## factor(ageevent)17 13.27880 261.51943 0.051 0.960
## factor(ageevent)18 15.71972 261.51888 0.060 0.952
## factor(ageevent)19 15.92562 261.51888 0.061 0.951
## factor(ageevent)20 16.45440 261.51886 0.063 0.950
## factor(ageevent)21 16.60451 261.51885 0.063 0.949
## factor(ageevent)22 16.82407 261.51886 0.064 0.949
## factor(ageevent)23 17.22629 261.51886 0.066 0.947
## factor(ageevent)24 17.25150 261.51886 0.066 0.947
## factor(ageevent)25 17.15481 261.51886 0.066 0.948
## factor(ageevent)26 17.49893 261.51887 0.067 0.947
## factor(ageevent)27 17.47596 261.51886 0.067 0.947
## factor(ageevent)28 16.65639 261.51886 0.064 0.949
## factor(ageevent)29 17.09184 261.51887 0.065 0.948
## factor(ageevent)30 17.05200 261.51888 0.065 0.948
## factor(ageevent)31 17.07489 261.51890 0.065 0.948
## factor(ageevent)32 16.52822 261.51888 0.063 0.950
## factor(ageevent)33 16.15155 261.51890 0.062 0.951
## factor(ageevent)34 16.75322 261.51895 0.064 0.949
## factor(ageevent)35 16.06973 261.51897 0.061 0.951
## factor(ageevent)36 16.42463 261.51892 0.063 0.950
## factor(ageevent)37 16.15067 261.51902 0.062 0.951
## factor(ageevent)38 15.94210 261.51902 0.061 0.951
## factor(ageevent)39 15.13919 261.51926 0.058 0.954
## factor(ageevent)40 16.13955 261.51905 0.062 0.951
## factor(ageevent)41 15.59706 261.51923 0.060 0.952
## factor(ageevent)42 14.64536 261.51968 0.056 0.955
## factor(ageevent)43 15.98953 261.51918 0.061 0.951
## factor(ageevent)44 14.32893 261.52106 0.055 0.956
## factor(ageevent)45 14.91363 261.51961 0.057 0.955
## factor(ageevent)46 13.34649 261.52292 0.051 0.959
## factor(ageevent)47 0.05916 588.53811 0.000 1.000
## factor(ageevent)48 0.03428 662.14810 0.000 1.000
## factor(ageevent)49 0.10625 647.05364 0.000 1.000
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4505.8 on 3256 degrees of freedom
## Residual deviance: 3869.1 on 3222 degrees of freedom
## AIC: 3865.4
##
## Number of Fisher Scoring iterations: 15
as.data.frame(fprobs)
## agegrp p n
## 1 15-19 0.005686679 15167
## 2 20-24 0.042987314 11632
## 3 25-29 0.077805920 7432
## 4 30-34 0.073962657 4005
## 5 35-39 0.054000719 2030
## 6 40-44 0.043089447 982
## 7 45-49 0.013173333 344