HW 2 B. Solomon

library(purrr)
library(haven)
library(janitor)

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(plyr)

## 
## Attaching package: 'plyr'

## The following object is masked from 'package:purrr':
## 
##     compact

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(scales)

## 
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

library(sur)
library(summarytools)
library(Rmisc)

## Loading required package: lattice

library(car)

## Loading required package: carData

## 
## Attaching package: 'carData'

## The following objects are masked from 'package:sur':
## 
##     Anscombe, States

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

library(usmap)
library(ggthemes)
library(tigris)

## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ readr     2.1.4     ✔ tidyr     1.3.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange()    masks plyr::arrange()
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ plyr::compact()     masks purrr::compact()
## ✖ dplyr::count()      masks plyr::count()
## ✖ dplyr::desc()       masks plyr::desc()
## ✖ scales::discard()   masks purrr::discard()
## ✖ dplyr::failwith()   masks plyr::failwith()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::id()         masks plyr::id()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ dplyr::mutate()     masks plyr::mutate()
## ✖ car::recode()       masks dplyr::recode()
## ✖ dplyr::rename()     masks plyr::rename()
## ✖ car::some()         masks purrr::some()
## ✖ dplyr::summarise()  masks plyr::summarise()
## ✖ dplyr::summarize()  masks plyr::summarize()
## ✖ tibble::view()      masks summarytools::view()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidycensus)
library("ggpubr")

## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:plyr':
## 
##     mutate

library(lme4)

## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library("writexl")
library(readxl)
library("tinytex")
library("lubridate")
library(splitstackshape)
library(tinytex)
library(psych)

## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:car':
## 
##     logit
## 
## The following object is masked from 'package:sur':
## 
##     skew
## 
## The following objects are masked from 'package:scales':
## 
##     alpha, rescale
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(glmmTMB)
library(readxl)
library(openxlsx)

# theme for nice plotting
theme_nice <- theme_classic()+
                theme(
                  axis.line.y.left = element_line(colour = "black"),
                  axis.line.y.right = element_line(colour = "black"),
                  axis.line.x.bottom = element_line(colour = "black"),
                  axis.line.x.top = element_line(colour = "black"),
                  axis.text.y = element_text(colour = "black", size = 12),
                  axis.text.x = element_text(color = "black", size = 12),
                  axis.ticks = element_line(color = "black")) +
                theme(
                  axis.ticks.length = unit(-0.25, "cm"), 
                  axis.text.x = element_text(margin=unit(c(0.5,0.5,0.5,0.5), "cm")), 
                  axis.text.y = element_text(margin=unit(c(0.5,0.5,0.5,0.5), "cm")))
gc()

##           used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 2678457 143.1    4736740 253.0  4736740 253.0
## Vcells 4503635  34.4   10146329  77.5  7110372  54.3

library(purrr)
library(haven)
library(janitor)
library(plyr)
library(dplyr)
library(ggplot2)
library(scales)
library(sur)
library(summarytools)
library(Rmisc)
library(car)
library(usmap)
library(ggthemes)
library(tigris)
library(tidyverse)
library(tidycensus)
library("ggpubr")
library(lme4)
library("writexl")
library(readxl)
library("tinytex")
library("lubridate")
library(splitstackshape)
library(tinytex)
library(psych)
library(glmmTMB)
library(readxl)
library(openxlsx)

library(devtools)

## Warning: package 'devtools' was built under R version 4.3.2

## Loading required package: usethis

install_github("ajdamico/lodown" , dependencies = TRUE)

## Skipping install of 'lodown' from a github remote, the SHA1 (3e305dea) has not changed since last install.
##   Use `force = TRUE` to force installation

library(lodown)

lodown( "nsfg" , output_dir = file.path( path.expand( "~" ) , "NSFG" ) )

## building catalog for nsfg
## 
## locally downloading nsfg
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/1973FemRespSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/b2d5880fdfd7724a8a7ad32ff1bb9acd.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/1973NSFGData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/fc61d4a8883fa0c6b1f99f0512563585.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 1 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/1973NSFGData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/1976PregSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/edaf9e1d1085b4d6f8f5e52665ae7b94.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/1976NSFGData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/2895ec3203307ca4bebd13229ccdeae9.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## nsfg catalog entry 2 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/1976FemPreg.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/1976FemRespSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/dd3a5960e93c090d40e18c153c2a41c8.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/1976NSFGData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/2895ec3203307ca4bebd13229ccdeae9.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'

## New names:
## • `missing` -> `missing...401`
## • `missing` -> `missing...466`

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## nsfg catalog entry 3 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/1976FemResp.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/1982PregSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/78c60ec2dfdc9ed452072118e18e4620.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/1982NSFGData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/7ebc6bef9f4e1cab43b86deb6809bab4.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'

## New names:
## • `missing` -> `missing...10`
## • `missing` -> `missing...157`
## • `missing` -> `missing...167`
## • `missing` -> `missing...171`
## • `missing` -> `missing...173`
## • `missing` -> `missing...175`
## • `missing` -> `missing...179`

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## nsfg catalog entry 4 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/1982FemPreg.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/1982FemRespSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/54322fc0acf2db0a01adcfd89051607d.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/1982NSFGData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/7ebc6bef9f4e1cab43b86deb6809bab4.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'

## New names:
## • `missing` -> `missing...4`
## • `missing` -> `missing...338`
## • `missing` -> `missing...347`
## • `missing` -> `missing...350`
## • `missing` -> `missing...354`
## • `missing` -> `missing...356`
## • `missing` -> `missing...359`
## • `missing` -> `missing...361`
## • `missing` -> `missing...365`
## • `missing` -> `missing...367`
## • `missing` -> `missing...369`
## • `missing` -> `missing...371`
## • `missing` -> `missing...373`
## • `missing` -> `missing...377`
## • `missing` -> `missing...379`
## • `missing` -> `missing...381`
## • `missing` -> `missing...383`
## • `missing` -> `missing...435`
## • `missing` -> `missing...441`
## • `missing` -> `missing...466`
## • `missing` -> `missing...468`
## • `missing` -> `missing...470`
## • `missing` -> `missing...472`
## • `missing` -> `missing...476`
## • `missing` -> `missing...502`
## • `missing` -> `missing...528`
## • `missing` -> `missing...562`
## • `missing` -> `missing...586`
## • `missing` -> `missing...590`
## • `missing` -> `missing...620`
## • `missing` -> `missing...636`
## • `missing` -> `missing...639`
## • `missing` -> `missing...650`
## • `missing` -> `missing...723`
## • `missing` -> `missing...767`
## • `missing` -> `missing...776`

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## nsfg catalog entry 5 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/1982FemResp.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/1988FemRespSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/c30bff014c69fd6b24f0d2af4ce1c0be.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/1988FemRespData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/f00251748fc6e6e99b92ddf4c99b27d6.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'

## New names:
## • `missing` -> `missing...3`
## • `missing` -> `missing...6`
## • `missing` -> `missing...10`
## • `missing` -> `missing...59`
## • `missing` -> `missing...70`
## • `missing` -> `missing...89`
## • `missing` -> `missing...99`
## • `missing` -> `missing...172`
## • `missing` -> `missing...198`
## • `missing` -> `missing...202`
## • `missing` -> `missing...204`
## • `missing` -> `missing...208`
## • `missing` -> `missing...274`
## • `missing` -> `missing...422`
## • `missing` -> `missing...426`
## • `missing` -> `missing...467`
## • `missing` -> `missing...541`
## • `missing` -> `missing...556`
## • `missing` -> `missing...613`
## • `missing` -> `missing...639`
## • `missing` -> `missing...641`
## • `missing` -> `missing...650`
## • `missing` -> `missing...652`
## • `missing` -> `missing...661`
## • `missing` -> `missing...687`
## • `missing` -> `missing...694`
## • `missing` -> `missing...705`
## • `missing` -> `missing...718`
## • `missing` -> `missing...744`
## • `missing` -> `missing...750`
## • `missing` -> `missing...760`
## • `missing` -> `missing...767`
## • `missing` -> `missing...779`
## • `missing` -> `missing...784`
## • `missing` -> `missing...788`
## • `missing` -> `missing...797`
## • `missing` -> `missing...814`
## • `missing` -> `missing...1026`
## • `missing` -> `missing...1028`
## • `missing` -> `missing...1030`
## • `missing` -> `missing...1059`
## • `missing` -> `missing...1069`
## • `missing` -> `missing...1071`
## • `missing` -> `missing...1105`
## • `missing` -> `missing...1112`
## • `missing` -> `missing...1114`
## • `missing` -> `missing...1116`
## • `missing` -> `missing...1127`
## • `missing` -> `missing...1137`
## • `missing` -> `missing...1152`
## • `missing` -> `missing...1165`
## • `missing` -> `missing...1167`
## • `missing` -> `missing...1170`
## • `missing` -> `missing...1193`
## • `missing` -> `missing...1213`
## • `missing` -> `missing...1220`
## • `missing` -> `missing...1225`
## • `missing` -> `missing...1227`
## • `missing` -> `missing...1234`
## • `missing` -> `missing...1262`
## • `missing` -> `missing...1268`
## • `missing` -> `missing...1292`
## • `missing` -> `missing...1309`
## • `missing` -> `missing...1311`
## • `missing` -> `missing...1318`
## • `missing` -> `missing...1335`
## • `missing` -> `missing...1361`
## • `missing` -> `missing...1363`
## • `missing` -> `missing...1366`
## • `missing` -> `missing...1399`
## • `missing` -> `missing...1404`

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## nsfg catalog entry 6 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/1988FemRespData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/1988PregSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/d50830e9b1653f985203457bde14071d.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/1988PregData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/ec0fc4289d710d6d280411c7daaca343.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'

## New names:
## • `missing` -> `missing...3`
## • `missing` -> `missing...13`
## • `missing` -> `missing...54`
## • `missing` -> `missing...188`

## nsfg catalog entry 7 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/1988PregData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/1995FemRespSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/5e41aa9083b6e159a156c387beaf6c60.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/1995FemRespData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/9e8dfc70b36e88e53e289e4c494f93c2.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'

## New names:
## • `missing` -> `missing...1930`
## • `missing` -> `missing...2270`
## • `missing` -> `missing...2441`
## • `missing` -> `missing...2524`
## • `missing` -> `missing...2698`
## • `FILLER` -> `FILLER...3913`
## • `FILLER` -> `FILLER...3917`
## • `FILLER` -> `FILLER...3921`
## • `FILLER` -> `FILLER...3925`
## • `FILLER` -> `FILLER...3966`
## • `FILLER` -> `FILLER...4051`
## • `FILLER` -> `FILLER...4056`
## • `FILLER` -> `FILLER...4061`
## • `FILLER` -> `FILLER...4066`
## • `FILLER` -> `FILLER...4071`
## • `FILLER` -> `FILLER...4076`
## • `FILLER` -> `FILLER...4081`
## • `FILLER` -> `FILLER...4086`
## • `FILLER` -> `FILLER...4091`
## • `FILLER` -> `FILLER...4096`
## • `FILLER` -> `FILLER...4101`
## • `FILLER` -> `FILLER...4106`
## • `FILLER` -> `FILLER...4111`
## • `FILLER` -> `FILLER...4116`
## • `FILLER` -> `FILLER...4137`
## • `missing` -> `missing...4687`
## • `missing` -> `missing...4700`
## • `missing` -> `missing...4909`
## • `missing` -> `missing...4914`
## • `missing` -> `missing...4947`
## • `missing` -> `missing...4950`
## • `missing` -> `missing...4953`
## • `missing` -> `missing...4956`
## • `missing` -> `missing...4959`
## • `missing` -> `missing...4962`
## • `missing` -> `missing...4965`
## • `missing` -> `missing...4968`
## • `missing` -> `missing...4971`
## • `missing` -> `missing...4974`
## • `missing` -> `missing...4977`
## • `missing` -> `missing...4980`
## • `missing` -> `missing...4983`
## • `missing` -> `missing...4986`
## • `missing` -> `missing...4989`
## • `missing` -> `missing...5037`
## • `missing` -> `missing...5040`
## • `missing` -> `missing...5043`
## • `missing` -> `missing...5046`
## • `missing` -> `missing...5049`
## • `missing` -> `missing...5052`
## • `missing` -> `missing...5055`
## • `missing` -> `missing...5058`
## • `missing` -> `missing...5061`
## • `missing` -> `missing...5064`
## • `missing` -> `missing...5067`
## • `missing` -> `missing...5070`
## • `missing` -> `missing...5073`
## • `missing` -> `missing...5076`
## • `missing` -> `missing...5079`
## • `missing` -> `missing...5708`
## • `missing` -> `missing...5794`

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## New names:
## • `FILLER...3913` -> `FILLER...3908`
## • `FILLER...3917` -> `FILLER...3912`
## • `FILLER...3921` -> `FILLER...3916`
## • `FILLER...3925` -> `FILLER...3920`
## • `FILLER...3966` -> `FILLER...3961`
## • `FILLER...4051` -> `FILLER...4046`
## • `FILLER...4056` -> `FILLER...4051`
## • `FILLER...4061` -> `FILLER...4056`
## • `FILLER...4066` -> `FILLER...4061`
## • `FILLER...4071` -> `FILLER...4066`
## • `FILLER...4076` -> `FILLER...4071`
## • `FILLER...4081` -> `FILLER...4076`
## • `FILLER...4086` -> `FILLER...4081`
## • `FILLER...4091` -> `FILLER...4086`
## • `FILLER...4096` -> `FILLER...4091`
## • `FILLER...4101` -> `FILLER...4096`
## • `FILLER...4106` -> `FILLER...4101`
## • `FILLER...4111` -> `FILLER...4106`
## • `FILLER...4116` -> `FILLER...4111`
## • `FILLER...4137` -> `FILLER...4132`

## nsfg catalog entry 8 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/1995FemRespData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/1995FemPregFile.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/3a6bcc7d37615947bc9b405f74ed6fd2.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/1995PregData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/001f6cd6c1354d02b5db9aab54756109.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 9 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/1995PregData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2002FemPregInput.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/42c613f71cc1a6988fe38eb44becd104.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2002FemPreg.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/e64ee1d25b16988eab330db74a156441.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 10 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2002FemPreg.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2002FemRespInput.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/d44d0aceca7a3c7d823ee396014042a2.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2002FemResp.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/0809fbb3f03f020da771f37713d4986a.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 11 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2002FemResp.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2002HHvars.SAS'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/d0ddf027fb9a80c42960564b111988c8.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2002HHvars.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/85de92022c28af61549cc37380b53bb2.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 12 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2002HHvars.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2002MaleInput.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/c9fcd6df1aae3249ee261b8f473ae187.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2002Male.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/e6782c34daffc542f4ef009a5fb6c64a.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 13 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2002Male.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2006_2010_FemPregSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/759c5d7e675a89598cf66af396d8be48.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2006_2010_FemPreg.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/bb8877ea2dbc53155e66cb97e4aa9412.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 14 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2006_2010_FemPreg.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2006_2010_FemRespSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/6a5e2c1d47347466f667abc73f729176.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2006_2010_FemResp.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/7647ca952428443b06fe57d303fe0a13.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 15 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2006_2010_FemResp.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2006_2010_MaleSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/de82da3af116aac5fdac9bcc97a9b8ce.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2006_2010_Male.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/0c6e844b49bc7d82b70dd95210b2cd4d.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 16 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2006_2010_Male.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2011_2013_FemPregSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/3b8aec088e2721ed7d2e6f8157afbe64.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2011_2013_FemPregData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/cdf42f7c3c1fa2045b72281e58f64605.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 17 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2011_2013_FemPregData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2011_2013_FemRespSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/eee5a9f11dce789ece27081bcd973fd0.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2011_2013_FemRespData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/b66265a493f8a6a64318321cad579e31.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 18 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2011_2013_FemRespData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2011_2013_MaleSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/d9f17dafcad8823bc16f4639442cf55e.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2011_2013_MaleData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/84130ab66f17ee93d35821d6e9522c82.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 19 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2011_2013_MaleData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2011_2019_FemaleWgtSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/4e17e7382b072fa470752cff9d403b57.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2011_2019_FemaleWgtData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/4f9c2dd8a02ace5af3d8582e7f3a7024.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 20 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2011_2019_FemaleWgtData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2011_2019_MaleWgtSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/693b83b5bbb5f38b49e83d50878faf2a.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2011_2019_MaleWgtData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/f354654329986be1059c22b5ed067f60.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 21 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2011_2019_MaleWgtData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2013_2015_FemPregSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/44625bbccf161a02771ae7c1b831f033.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2013_2015_FemPregData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/cb3ea4f57d41c9408df7458e9f7aa708.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 22 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2013_2015_FemPregData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2013_2015_FemRespSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/e057d6aab9116ccd41e6680394d754d0.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2013_2015_FemRespData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/834a86460d1f27de4188c198fbe26683.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 23 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2013_2015_FemRespData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2013_2015_MaleSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/6ca6e9436a14713b9463996a0604c6d4.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2013_2015_MaleData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/51a8d97378d82a697cff8050ec9012ec.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 24 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2013_2015_MaleData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2015_2017_FemPregSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/a2f79c1a83b8979573e0fcdaa5c7276e.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2015_2017_FemPregData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/a6e9bca9622e3a2a0dfa200ef08722c5.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 25 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2015_2017_FemPregData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2015_2017_FemRespSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/1c53bc0318b5961e7300f85106d2ab54.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2015_2017_FemRespData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/53b43a4eac4779351e94c1a4adcaf0a2.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 26 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2015_2017_FemRespData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2015_2017_MaleSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/8a63827811e40807a7714f86b781bab2.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2015_2017_MaleData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/f373221505adf9521fe4c89960e106a8.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 27 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2015_2017_MaleData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2017_2019_FemPregSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/a7a9a2b279e80de61e56d24c0fa94578.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2017_2019_FemPregData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/a7a51d63a471368a2ffd7d5a8468b4b4.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 28 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2017_2019_FemPregData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2017_2019_FemRespSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/5bd7e4e6fc1fc787c36922b923c4c8ec.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2017_2019_FemRespData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/bff084db456c4616a8fb213e61793e19.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 29 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2017_2019_FemRespData.rds'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/sas/2017_2019_MaleSetup.sas'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/7d22e01ffb919e61c321c6a6c36ec747.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c1471523e5c'
## 
## 'https://ftp.cdc.gov/pub/Health_Statistics/NCHS/Datasets/NSFG/2017_2019_MaleData.dat'
## cached in
## 'C:/Users/bryan/AppData/Local/Temp/5958c3707dcf573e03c74ecb3602c1a7.Rcache'
## copying to
## 'C:\Users\bryan\AppData\Local\Temp\RtmpEr45yQ\file7c147bd718c2'
## 
## nsfg catalog entry 30 of 30 stored at 'C:/Users/bryan/OneDrive/Documents/NSFG/2017_2019_MaleData.rds'
## 
## nsfg local download completed

nsfg_df <- readRDS( file.path( path.expand( "~" ) , "NSFG" , "2017_2019_MaleData.rds" ) )

dat <- nsfg_df[, c("caseid","age_r", "fmarit", "agemarrn", "agemarrn4", "agemarrn2", "agemarrn3", "hisagem", "evrmarry", "agemarrn", "hisprace2", "fmarno", "nformwife", "hieduc", "wgt2017_2019", "earnba_y", "intvwyear", "intact18", "hisp", "secu", "sest")]

dat <- dat %>%
  mutate(evmarried = ifelse(fmarno == 1 & fmarit == 1, 1, 
                                     ifelse(fmarno == 0 & fmarit == 5, 0, 
                                            ifelse(fmarno %in% c(1:5), 1, NA))))
dat %>%
  group_by(evmarried) %>%
  dplyr::summarize(n=n()) %>%
  dplyr::mutate(pct = n / sum(n))

## # A tibble: 3 × 3
##   evmarried     n     pct
##       <dbl> <int>   <dbl>
## 1         0  3239 0.622  
## 2         1  1961 0.377  
## 3        NA     6 0.00115

#Age at event

#ever married age event
dat$ageevent<- pmin(dat$hisagem,dat$agemarrn, na.rm=T)



#never married age 
dat$ageevent <- ifelse(dat$fmarit == 5, dat$age_r, dat$ageevent)

# filter for NA values 
dat <- dat %>% 
  filter(ageevent !=98 & !is.na(ageevent))

tabyl(dat$ageevent)

##  dat$ageevent   n     percent
##            15 186 0.036442006
##            16 192 0.037617555
##            17 234 0.045846395
##            18 271 0.053095611
##            19 276 0.054075235
##            20 199 0.038989028
##            21 264 0.051724138
##            22 248 0.048589342
##            23 251 0.049177116
##            24 267 0.052311912
##            25 284 0.055642633
##            26 240 0.047021944
##            27 234 0.045846395
##            28 238 0.046630094
##            29 206 0.040360502
##            30 197 0.038597179
##            31 164 0.032131661
##            32 162 0.031739812
##            33 146 0.028605016
##            34 108 0.021159875
##            35  91 0.017829154
##            36  82 0.016065831
##            37  73 0.014302508
##            38  64 0.012539185
##            39  57 0.011167712
##            40  57 0.011167712
##            41  44 0.008620690
##            42  50 0.009796238
##            43  32 0.006269592
##            44  31 0.006073668
##            45  30 0.005877743
##            46  35 0.006857367
##            47  33 0.006465517
##            48  28 0.005485893
##            49  30 0.005877743

#Filter data for NH White and NH Black

dat <- dat %>% 
  filter(hisprace2 == 2 | hisprace2 == 3)

#Determine the age they got their bachelor’s degree

dat <- dat %>%
  mutate(
    bach_degree = ifelse(hieduc %in% c(5:11), 0,
                  ifelse(hieduc %in% c(12:15), 1, NA)),
    birth_year = intvwyear - age_r,
    age_bach_degree = earnba_y - birth_year)


dat %>%
  group_by(evrmarry, ageevent, age_bach_degree) %>%
  dplyr::summarize(n=n()) %>%
  mutate(pct = n / sum(n))

## `summarise()` has grouped output by 'evrmarry', 'ageevent'. You can override
## using the `.groups` argument.

## # A tibble: 379 × 5
## # Groups:   evrmarry, ageevent [66]
##    evrmarry ageevent age_bach_degree     n    pct
##       <dbl>    <dbl>           <dbl> <int>  <dbl>
##  1        0       15              NA   106 1     
##  2        0       16              NA    97 1     
##  3        0       17              NA   122 1     
##  4        0       18              NA   115 1     
##  5        0       19              NA   115 1     
##  6        0       20              NA    70 1     
##  7        0       21              NA    81 1     
##  8        0       22              21     4 0.0519
##  9        0       22              22     3 0.0390
## 10        0       22              NA    70 0.909 
## # ℹ 369 more rows

#Create survey design

library(survey)

## Warning: package 'survey' was built under R version 4.3.2

## Loading required package: grid

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

des<-svydesign(ids = ~secu,
                strata = ~sest,
                weights=~wgt2017_2019, 
                data=dat,
                nest=T)

#Percent with a bachelor’s degree at each age for both race

bach_age_race <- svyby(~bach_degree, by = ~hisprace2 + age_r, design = des, FUN = svymean) %>%
  mutate(Race = ifelse(hisprace2 == 3, "percent of  Black with bach", "& White with bach")) %>%
  select(age_r, Race, Percent_with_college_degree = bach_degree) %>%
  spread(Race, Percent_with_college_degree)

# Print the result
print(bach_age_race)

##    age_r & White with bach percent of  Black with bach
## 1     15       0.000000000                 0.000000000
## 2     16       0.000000000                 0.000000000
## 3     17       0.000000000                 0.000000000
## 4     18       0.000000000                 0.000000000
## 5     19       0.000000000                 0.000000000
## 6     20       0.000000000                 0.000000000
## 7     21       0.008250833                 0.000000000
## 8     22       0.114112456                 0.007067989
## 9     23       0.301123729                 0.153831478
## 10    24       0.394126772                 0.271459435
## 11    25       0.360019297                 0.256943867
## 12    26       0.458598748                 0.101478745
## 13    27       0.385167571                 0.006186328
## 14    28       0.379778158                 0.143184877
## 15    29       0.328598010                 0.209892625
## 16    30       0.349032816                 0.112009745
## 17    31       0.469542786                 0.148690892
## 18    32       0.407440425                 0.092208286
## 19    33       0.341284328                 0.330142638
## 20    34       0.387126825                 0.068720672
## 21    35       0.366626178                 0.307217765
## 22    36       0.479001848                 0.073041272
## 23    37       0.426800075                 0.200632472
## 24    38       0.489537256                 0.460298863
## 25    39       0.283914810                 0.171186043
## 26    40       0.599124196                 0.256314847
## 27    41       0.556878984                 0.098169769
## 28    42       0.413063878                 0.231067351
## 29    43       0.297915871                 0.557450719
## 30    44       0.406574667                 0.213307189
## 31    45       0.453818304                 0.357997355
## 32    46       0.448505331                 0.506315460
## 33    47       0.313390039                 0.270052526
## 34    48       0.377430624                 0.310944449
## 35    49       0.307178148                 0.174127747
## 36    50       1.000000000                          NA

#Determine ever married status

dat <- dat %>%
  mutate(marriage_babs = case_when((evmarried == 0) & (is.na(age_bach_degree)) ~ 1,
                             (evmarried == 0) & (!is.na(age_bach_degree)) ~ 2,
                             (evmarried == 1) & (is.na(age_bach_degree)) ~ 3,
                             (evmarried == 1) & (!is.na(age_bach_degree)) & (ageevent < age_bach_degree) ~ 4,
                             (evmarried == 1) & (!is.na(age_bach_degree)) & (ageevent > age_bach_degree) ~ 5,
                             (evmarried == 1) & (!is.na(age_bach_degree)) & (ageevent == age_bach_degree) ~ 6))

dat %>%
  group_by(evmarried, ageevent, age_bach_degree, marriage_babs) %>%
  dplyr::summarize(n=n()) %>%
  mutate(pct = n / sum(n))

## `summarise()` has grouped output by 'evmarried', 'ageevent', 'age_bach_degree'.
## You can override using the `.groups` argument.

## # A tibble: 379 × 6
## # Groups:   evmarried, ageevent, age_bach_degree [379]
##    evmarried ageevent age_bach_degree marriage_babs     n   pct
##        <dbl>    <dbl>           <dbl>         <dbl> <int> <dbl>
##  1         0       15              NA             1   106     1
##  2         0       16              NA             1    97     1
##  3         0       17              NA             1   122     1
##  4         0       18              NA             1   115     1
##  5         0       19              NA             1   115     1
##  6         0       20              NA             1    70     1
##  7         0       21              NA             1    81     1
##  8         0       22              21             2     4     1
##  9         0       22              22             2     3     1
## 10         0       22              NA             1    70     1
## # ℹ 369 more rows

dat %>%
  group_by(marriage_babs) %>%
  dplyr::summarize(n=n()) %>%
  dplyr::mutate(pct = n / sum(n))

## # A tibble: 6 × 3
##   marriage_babs     n    pct
##           <dbl> <int>  <dbl>
## 1             1  1667 0.512 
## 2             2   343 0.105 
## 3             3   783 0.241 
## 4             4    79 0.0243
## 5             5   346 0.106 
## 6             6    37 0.0114

#conditional probabilities

library(ggsurvfit)

## 
## Attaching package: 'ggsurvfit'

## The following object is masked from 'package:psych':
## 
##     %+%

library(relsurv)

## Warning: package 'relsurv' was built under R version 4.3.2

## Loading required package: date

## 
## Attaching package: 'relsurv'

## The following object is masked from 'package:lubridate':
## 
##     years

pydata<-survSplit(dat, cut=c(15:49),end="ageevent",event="evmarried")


pydata$babs_event <- case_when(pydata$marriage_babs == 1 ~ 0,
                               pydata$marriage_babs == 3 ~ 0,
                               pydata$marriage_babs == 4 ~ 0,
                               pydata$marriage_babs == 2 & (pydata$ageevent >= pydata$age_bach_degree) ~ 1,
                               pydata$marriage_babs == 2 & (pydata$ageevent < pydata$age_bach_degree) ~ 0,
                               pydata$marriage_babs == 5 & (pydata$ageevent >= pydata$age_bach_degree) ~ 1,
                               pydata$marriage_babs == 5 & (pydata$ageevent < pydata$age_bach_degree) ~ 0,
                               pydata$marriage_babs == 6 & (pydata$ageevent >= pydata$age_bach_degree) ~ 1,
                               pydata$marriage_babs == 6 & (pydata$ageevent < pydata$age_bach_degree) ~ 0)
head(pydata[, c("caseid", "ageevent", "age_bach_degree", "marriage_babs")], n=50)

##    caseid ageevent age_bach_degree marriage_babs
## 1   80717       15              22             2
## 2   80717       16              22             2
## 3   80717       17              22             2
## 4   80717       18              22             2
## 5   80717       19              22             2
## 6   80717       20              22             2
## 7   80717       21              22             2
## 8   80717       22              22             2
## 9   80717       23              22             2
## 10  80717       24              22             2
## 11  80717       25              22             2
## 12  80717       26              22             2
## 13  80717       27              22             2
## 14  80717       28              22             2
## 15  80717       29              22             2
## 16  80717       30              22             2
## 17  80717       31              22             2
## 18  80721       15              NA             1
## 19  80721       16              NA             1
## 20  80721       17              NA             1
## 21  80724       15              NA             3
## 22  80724       16              NA             3
## 23  80724       17              NA             3
## 24  80724       18              NA             3
## 25  80732       15              22             2
## 26  80732       16              22             2
## 27  80732       17              22             2
## 28  80732       18              22             2
## 29  80732       19              22             2
## 30  80732       20              22             2
## 31  80732       21              22             2
## 32  80732       22              22             2
## 33  80732       23              22             2
## 34  80732       24              22             2
## 35  80732       25              22             2
## 36  80732       26              22             2
## 37  80732       27              22             2
## 38  80732       28              22             2
## 39  80732       29              22             2
## 40  80732       30              22             2
## 41  80732       31              22             2
## 42  80732       32              22             2
## 43  80732       33              22             2
## 44  80732       34              22             2
## 45  80732       35              22             2
## 46  80732       36              22             2
## 47  80732       37              22             2
## 48  80732       38              22             2
## 49  80732       39              22             2
## 50  80734       15              21             5

#Age and Race

library(survey)
des2<-svydesign(ids = ~secu,
                strata = ~sest,
                weights=~wgt2017_2019, 
                data=pydata,
                nest=T)

dat %>%
  group_by(evmarried) %>%
  dplyr::summarize(n=n()) %>%
  dplyr::mutate(pct = n / sum(n))

## # A tibble: 2 × 3
##   evmarried     n   pct
##       <dbl> <int> <dbl>
## 1         0  2010 0.618
## 2         1  1245 0.382

# Use svyby to calculate weighted means and sample sizes by ageevent and hisprace2
raceprobs <- svyby(~evmarried, by = ~ageevent + hisprace2, design = des2, 
                   FUN = svymean, na.rm = TRUE)

raceprobs$me <- 2*raceprobs$se
  
raceprobs$hisprace2 <- car::Recode(raceprobs$hisprace2, recodes="2='NH white Men'; 3='NH black Men'; else=NA",
                       as.factor=T)

ggplot(data =raceprobs, aes(x = ageevent, y = evmarried, ymin=evmarried-me, ymax=evmarried+me, fill=hisprace2,color = hisprace2, group = hisprace2)) +
     geom_line() + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="Adjusted First Marriage Rates by Age and Race", 
       subtitle="Men Ages 18 to 49", 
       caption="Source: 2017-2019 NSFG Male Respondent File") +
       xlab(label="Age at First Marriage") +
  ylab(label="Adjusted First Marriage Rate") +theme_nice

#Filter data for NH White and NH Black

educprobs<- pydata %>%
    group_by(ageevent, bach_degree) %>%
    dplyr::summarize(p=weighted.mean(evmarried,wgt2017_2019,na.rm = TRUE),n=n())

## `summarise()` has grouped output by 'ageevent'. You can override using the
## `.groups` argument.

educprobs$num <- educprobs$p*(1-educprobs$p)

educprobs$sep <- sqrt(educprobs$num/educprobs$n)

educprobs$me <- 2*educprobs$sep

educprobs$babs <- case_when(educprobs$bach_degree == 1 ~ "BA/BS Degree",educprobs$bach_degree == 0 ~ "No College Degree")

ggplot(data =educprobs, aes(x = ageevent, y = p, ymin=p-me, ymax=p+me, fill=babs,color = babs, group = babs)) +
     geom_line() + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="Adjusted First Marriage Rates by Age and Education", 
       subtitle="Men Ages 18 to 49", 
       caption="Source: 2017-2019 NSFG Male Respondent File") +
       xlab(label="Age at First Marriage") +
  ylab(label="Adjusted First Marriage Rate")+theme_nice

#Race and Education

raceeducprobs<- pydata %>%
    group_by(ageevent, hisprace2, babs_event) %>%
    dplyr::summarize(p=weighted.mean(evmarried,wgt2017_2019,na.rm = TRUE),n=n())

## `summarise()` has grouped output by 'ageevent', 'hisprace2'. You can override
## using the `.groups` argument.

raceeducprobs$num <- raceeducprobs$p*(1-raceeducprobs$p)

raceeducprobs$sep <- sqrt(raceeducprobs$num/raceeducprobs$n)

raceeducprobs$me <- 2*raceeducprobs$sep
  
raceeducprobs$babs_event <- case_when(raceeducprobs$babs_event == 1 ~ "BA/BS Degree",raceeducprobs$babs_event == 0 ~ "No College Degree")

raceeducprobs$hisprace2 <- case_when(raceeducprobs$hisprace2 == 2 ~ "NH white Men",raceeducprobs$hisprace2 == 3 ~ "NH black Men")

ggplot(data =raceeducprobs, aes(x = ageevent, y = p, ymin=p-me, ymax=p+me, fill=babs_event,color = babs_event, group = babs_event)) +
     geom_line() + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="Adjusted First Marriage Rates by Race and Education (Time Varying)", 
       subtitle="Men Ages 18 to 49", 
       caption="Source: 2017-2019 NSFG Male Respondent File") +
       xlab(label="Age at First Marriage") +
  ylab(label="Ajusted First Marriage Rate") + facet_grid(~hisprace2)+theme_nice

#Filter data for NH White and NH Black

raceeducprobs<- pydata %>%
    group_by(ageevent, hisprace2, bach_degree) %>%
    dplyr::summarize(p=weighted.mean(evmarried,wgt2017_2019,na.rm = TRUE),n=n())

## `summarise()` has grouped output by 'ageevent', 'hisprace2'. You can override
## using the `.groups` argument.

raceeducprobs$num <- raceeducprobs$p*(1-raceeducprobs$p)

raceeducprobs$sep <- sqrt(raceeducprobs$num/raceeducprobs$n)

raceeducprobs$me <- 2*raceeducprobs$sep
  
raceeducprobs$bach_degree <- case_when(raceeducprobs$bach_degree == 1 ~ "BA/BS Degree",raceeducprobs$bach_degree == 0 ~ "No College Degree")

raceeducprobs$hisprace2 <- case_when(raceeducprobs$hisprace2 == 2 ~ "NH white Men",raceeducprobs$hisprace2 == 3 ~ "NH black Men")

ggplot(data =raceeducprobs, aes(x = ageevent, y = p, ymin=p-me, ymax=p+me, fill=bach_degree,color = bach_degree, group = bach_degree)) +
     geom_line() + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="Adjusted First Marriage Rates by Race and Education", 
       subtitle="Men Ages 18 to 49", 
       caption="Source: 2017-2019 NSFG Male Respondent File") +
       xlab(label="Age at First Marriage") +
  ylab(label="Ajusted First Marriage Rate") + facet_grid(~hisprace2)+theme_nice

pydata$age_cat <- car::Recode(pydata$ageevent,recodes = "15:19 = '15-19'; 20:24 = '20-24'; 25:29 = '25-29'; 30:34 = '30-34'; 35:39 = '35-39'; 40:44 = '40-44'; 45:49 = '45-49'; else=NA", as.factor=T)

pydata$new_weight <- pydata$wgt2017_2019/mean(pydata$wgt2017_2019) #standize the weight 

pydata$black <- ifelse(pydata$hisprace2==3,1,0)

#Model

library(survey)

des2<-svydesign(ids = ~secu,
                strata = ~sest,
                weights=~wgt2017_2019, 
                data=pydata,
                nest=T)

model1 <- svyglm(evmarried~ age_cat + black, family = "binomial", design= des2)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

model_2 <- svyglm(evmarried ~ age_cat + black + babs_event, family = "binomial", design=des2)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(model_2)

## 
## Call:
## svyglm(formula = evmarried ~ age_cat + black + babs_event, design = des2, 
##     family = "binomial")
## 
## Survey design:
## svydesign(ids = ~secu, strata = ~sest, weights = ~wgt2017_2019, 
##     data = pydata, nest = T)
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -5.10615    0.17365 -29.405  < 2e-16 ***
## age_cat20-24  1.96890    0.17854  11.028 1.66e-14 ***
## age_cat25-29  2.49285    0.18931  13.168  < 2e-16 ***
## age_cat30-34  2.44778    0.20017  12.228 4.67e-16 ***
## age_cat35-39  2.14114    0.23097   9.270 4.28e-12 ***
## age_cat40-44  1.93352    0.34543   5.597 1.16e-06 ***
## age_cat45-49  0.78355    0.74760   1.048 0.300078    
## black        -0.38895    0.10693  -3.638 0.000693 ***
## babs_event    0.51620    0.08398   6.147 1.74e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9991504)
## 
## Number of Fisher Scoring iterations: 7

#NH white Model

pydata_whites <-filter(pydata, black == 0)


des3<-svydesign(ids = ~secu,
                strata = ~sest,
                weights=~wgt2017_2019, 
                data=pydata_whites,
                nest=T)


whitemodel <- svyglm(evmarried ~ age_cat + babs_event, family = "binomial", design=des3)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(whitemodel)

## 
## Call:
## svyglm(formula = evmarried ~ age_cat + babs_event, design = des3, 
##     family = "binomial")
## 
## Survey design:
## svydesign(ids = ~secu, strata = ~sest, weights = ~wgt2017_2019, 
##     data = pydata_whites, nest = T)
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -5.10169    0.18889 -27.009  < 2e-16 ***
## age_cat20-24  1.95890    0.19621   9.984 3.38e-13 ***
## age_cat25-29  2.50222    0.20863  11.993 6.62e-16 ***
## age_cat30-34  2.42357    0.21295  11.381 4.19e-15 ***
## age_cat35-39  2.10065    0.24341   8.630 2.97e-11 ***
## age_cat40-44  2.04284    0.36191   5.645 9.25e-07 ***
## age_cat45-49  0.92728    0.81409   1.139     0.26    
## babs_event    0.51402    0.09162   5.611 1.04e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.000346)
## 
## Number of Fisher Scoring iterations: 7

#Black Model

pydata_blacks <-filter(pydata, black == 1)

des4<-svydesign(ids = ~secu,
                strata = ~sest,
                weights=~wgt2017_2019, 
                data=pydata_blacks,
                nest=T)

blackmodel<- svyglm(evmarried ~ age_cat + babs_event, family = "binomial", design =des4)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(blackmodel)

## 
## Call:
## svyglm(formula = evmarried ~ age_cat + babs_event, design = des4, 
##     family = "binomial")
## 
## Survey design:
## svydesign(ids = ~secu, strata = ~sest, weights = ~wgt2017_2019, 
##     data = pydata_blacks, nest = T)
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -5.52691    0.31219 -17.703  < 2e-16 ***
## age_cat20-24  2.04100    0.36753   5.553 1.63e-06 ***
## age_cat25-29  2.42370    0.34832   6.958 1.48e-08 ***
## age_cat30-34  2.59751    0.41106   6.319 1.26e-07 ***
## age_cat35-39  2.35165    0.49229   4.777 2.09e-05 ***
## age_cat40-44  1.23035    0.60168   2.045   0.0470 *  
## age_cat45-49 -0.01125    1.14445  -0.010   0.9922    
## babs_event    0.54022    0.29384   1.838   0.0729 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.000857)
## 
## Number of Fisher Scoring iterations: 8

whitedata <- expand.grid(age_cat=c("15-19","20-24","25-29","30-34","35-39","40-44","45-49"),babs_event=0:1)

whitedata$phat <- predict(whitemodel, whitedata, type="response")

whitedata

##    age_cat babs_event        phat
## 1    15-19          0 0.006049628
## 2    20-24          0 0.041376157
## 3    25-29          0 0.069172623
## 4    30-34          0 0.064277120
## 5    35-39          0 0.047378708
## 6    40-44          0 0.044836988
## 7    45-49          0 0.015151180
## 8    15-19          1 0.010074008
## 9    20-24          1 0.067309312
## 10   25-29          1 0.110519094
## 11   30-34          1 0.103021270
## 12   35-39          1 0.076772756
## 13   40-44          1 0.072774602
## 14   45-49          1 0.025077405

blackdata <- expand.grid(age_cat=c("15-19","20-24","25-29","30-34","35-39","40-44","45-49"),babs_event=0:1)

blackdata$phat <- predict(blackmodel, blackdata, type="response")

blackdata

##    age_cat babs_event        phat
## 1    15-19          0 0.003962483
## 2    20-24          0 0.029715618
## 3    25-29          0 0.042974696
## 4    30-34          0 0.050719096
## 5    35-39          0 0.040107194
## 6    40-44          0 0.013432325
## 7    45-49          0 0.003918328
## 8    15-19          1 0.006781872
## 9    20-24          1 0.049940159
## 10   25-29          1 0.071557880
## 11   30-34          1 0.084001097
## 12   35-39          1 0.066916511
## 13   40-44          1 0.022835229
## 14   45-49          1 0.006706511

library(survey)
library(dplyr)
library(ggplot2)

# Assuming your survey design is named des2

prop_marrying <- svyby(~evrmarry, by = ~hisprace2 + age_r + bach_degree, design = des2, FUN = svymean) %>%
  mutate(me = 2 * se,
         babs_event = case_when(bach_degree == 1 ~ "BA/BS Degree", bach_degree == 0 ~ "No College Degree"),
         hisprace2 = case_when(hisprace2 == 2 ~ "NH white Men", hisprace2 == 3 ~ "NH black Men"))

# Create the plot
ggplot(data = prop_marrying, aes(x = age_r, y = evrmarry, ymin = evrmarry - me, ymax = evrmarry + me, fill = babs_event, color = babs_event, group = babs_event)) +
  geom_line() +
  geom_ribbon(alpha = 0.3, aes(color = NULL)) +
  labs(
    title = "Probability of Marrying by Race and Education (Time Varying)",
    subtitle = "Men Ages 18 to 49",
    caption = "Source: 2017-2019 NSFG Male Respondent File"
  ) +
  xlab(label = "Age at First Marriage") +
  ylab(label = "Adjusted First Marriage Rate") +
  facet_grid(~hisprace2)

whitedata$babs_event<- recode(whitedata$babs_event, "1='BA/BS Degree'; 0='No College Degree'")

ggplot(whitedata, aes(x = age_cat, y = phat, group=babs_event, fill=babs_event,color = babs_event)) +
  geom_point() +
  geom_line() +
  labs(x = "Age", y = "Predicted First Marriage Probability", title = "Predicted First Marriage Probabilities by Education for NH white Men")

## Don't know how to automatically pick scale for object of type <svystat>.
## Defaulting to continuous.

blackdata$babs_event<- recode(blackdata$babs_event, "1='BA/BS Degree'; 0='No College Degree'")

ggplot(blackdata, aes(x = age_cat, y = phat, group=babs_event, fill=babs_event,color = babs_event)) +
  geom_point() +
  geom_line() +
  labs(x = "Age", y = "Predicted First Marriage Probability", title = "Predicted First Marriage Probabilities by Education for NH black Men")

## Don't know how to automatically pick scale for object of type <svystat>.
## Defaulting to continuous.

#number of each of marriage, the kinds of fmarit variable

newwhitedata <- pydata_whites %>%                           
  group_by(age_cat) %>% 
  dplyr::summarize(babs_event=weighted.mean(babs_event,wgt2017_2019,na.rm = TRUE),n=n())

newwhitedata$phat <- predict(whitemodel, newwhitedata, type="response")

as.data.frame(newwhitedata)

##   age_cat  babs_event     n        phat
## 1   15-19 0.000453103 11134 0.006051029
## 2   20-24 0.159112071  8606 0.044744627
## 3   25-29 0.368562026  5418 0.082411744
## 4   30-34 0.364518254  2886 0.076509448
## 5   35-39 0.332869707  1386 0.055727388
## 6   40-44 0.305171735   641 0.052055547
## 7   45-49 0.223301683   205 0.016962730

newblackdata <- pydata_blacks %>%                           
  group_by(age_cat) %>% 
  dplyr::summarize(babs_event=weighted.mean(babs_event,wgt2017_2019,na.rm = TRUE),n=n())

newblackdata$phat <- predict(blackmodel, newblackdata, type="response")

as.data.frame(newblackdata)

##   age_cat babs_event    n        phat
## 1   15-19 0.00000000 4023 0.003962483
## 2   20-24 0.06567915 3016 0.030755875
## 3   25-29 0.16825533 2004 0.046872212
## 4   30-34 0.16796750 1109 0.055270289
## 5   35-39 0.13884223  634 0.043096492
## 6   40-44 0.16320965  331 0.014652272
## 7   45-49 0.15652366  130 0.004262586

whitemodel_blackdata <- pydata_blacks %>%                           
  group_by(age_cat) %>% 
  dplyr::summarize(babs_event=weighted.mean(babs_event,wgt2017_2019,na.rm = TRUE),n=n())

whitemodel_blackdata$phat <- predict(whitemodel, whitemodel_blackdata, type="response")

as.data.frame(whitemodel_blackdata)

##   age_cat babs_event    n        phat
## 1   15-19 0.00000000 4023 0.006049628
## 2   20-24 0.06567915 3016 0.042736160
## 3   25-29 0.16825533 2004 0.074953072
## 4   30-34 0.16796750 1109 0.069669508
## 5   35-39 0.13884223  634 0.050705871
## 6   40-44 0.16320965  331 0.048570199
## 7   45-49 0.15652366  130 0.016399747

dat$age_cat <- car::Recode(dat$ageevent,recodes = "15:19 = '15-19'; 20:24 = '20-24'; 25:29 = '25-29'; 30:34 = '30-34'; 35:39 = '35-39'; 40:44 = '40-44'; 45:49 = '45-49'; else=NA", as.factor=T)


fprobs<- dat %>%
    group_by(age_cat) %>%
    dplyr::summarize(p=weighted.mean(evmarried,wgt2017_2019,na.rm = TRUE),n=n())

ggplot(fprobs, aes(x = age_cat, y = p, group =1 )) +
  geom_point() +
  geom_line() +
  labs(x = "Age", y = "Adjusted First Marriage Rate", title = "Adjusted First Marriage Rate by Age")

dat <- dat %>%
  mutate(nh_wh_marr = ifelse(evmarried == 1 & hisprace2 == 2,1,0),
         nh_bl_marr = ifelse(evmarried == 1 & hisprace2 == 3,1,0),
         marr_by_race = ifelse(evmarried == 1 & hisprace2 == 2,1,
                               ifelse(evmarried == 1 & hisprace2 == 3,2,0))) #1 is for NH whites marrying, 2 is for NH Blacks marrying, 0 is for no marriages whatsoever regardless of race)

nh_wh_probs<- dat %>%
  group_by(age_cat) %>%
  dplyr::summarize(p=weighted.mean(nh_wh_marr,wgt2017_2019,na.rm = TRUE),n=n())

ggplot(nh_wh_probs, aes(x = factor(age_cat), y = p, group = 1)) +
  geom_point() +
  geom_line() +
  labs(x = "Age", y = "Adjusted First Marriage Rate", title = "Adjusted First Marriage Rate (NH white) by Age")

nh_bl_probs<- dat %>%
  group_by(age_cat) %>%
  dplyr::summarize(p=weighted.mean(nh_bl_marr,wgt2017_2019,na.rm = TRUE),n=n())

ggplot(nh_bl_probs, aes(x = age_cat, y = p, group =1 )) +
  geom_point() +
  geom_line() + 
  labs(x = "Age", y = "Adjusted First Marriage Rate", title = "Adjusted First Marriage Rate (NH black) by Age")

library(nnet)
compriskmodel <-multinom(formula = marr_by_race ~ age_cat, data = dat)

## # weights:  24 (14 variable)
## initial  value 3575.983000 
## iter  10 value 2531.703939
## iter  20 value 2486.719504
## final  value 2486.694713 
## converged

summary(compriskmodel)

## Call:
## multinom(formula = marr_by_race ~ age_cat, data = dat)
## 
## Coefficients:
##   (Intercept) age_cat20-24 age_cat25-29 age_cat30-34 age_cat35-39 age_cat40-44
## 1   -2.311628     2.209119     2.356203     1.780325     1.143975   0.39628588
## 2   -3.428632     1.696611     1.816302     1.427142     1.008207  -0.04542083
##   age_cat45-49
## 1   -0.9933175
## 2   -1.2625512
## 
## Std. Errors:
##   (Intercept) age_cat20-24 age_cat25-29 age_cat30-34 age_cat35-39 age_cat40-44
## 1   0.1413630    0.1593377    0.1598664    0.1706136    0.2083188    0.2834868
## 2   0.2394981    0.2727929    0.2729369    0.2928105    0.3541433    0.5614646
##   age_cat45-49
## 1    0.5283297
## 2    1.0326542
## 
## Residual Deviance: 4973.389 
## AIC: 5001.389

dat$new_weight <- dat$wgt2017_2019/mean(dat$wgt2017_2019)

dat$new_weight = dat$wgt2017_2019/mean(dat$wgt2017_2019) #creating new weight


des5<-svydesign(ids = ~secu,
                strata = ~sest,
                weights=~new_weight, 
                data=dat,
                nest=T)

anymodel <- svyglm(evmarried ~ age_cat, design = des5) 

summary(anymodel)

## 
## Call:
## svyglm(formula = evmarried ~ age_cat, design = des5)
## 
## Survey design:
## svydesign(ids = ~secu, strata = ~sest, weights = ~new_weight, 
##     data = dat, nest = T)
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.16688    0.02647   6.303 8.62e-08 ***
## age_cat20-24  0.39140    0.04738   8.260 8.92e-11 ***
## age_cat25-29  0.46781    0.03813  12.268  < 2e-16 ***
## age_cat30-34  0.35595    0.04737   7.515 1.20e-09 ***
## age_cat35-39  0.19926    0.05698   3.497  0.00102 ** 
## age_cat40-44  0.10616    0.07693   1.380  0.17398    
## age_cat45-49 -0.12724    0.03763  -3.381  0.00144 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.2177061)
## 
## Number of Fisher Scoring iterations: 2

library(gbm)

## Loaded gbm 2.1.8.1

compriskmodel <-multinom(formula = marr_by_race ~ factor(evmarried), data = dat)

## # weights:  9 (4 variable)
## initial  value 3575.983000 
## iter  10 value 591.721666
## final  value 577.574873 
## converged

summary(compriskmodel)

## Call:
## multinom(formula = marr_by_race ~ factor(evmarried), data = dat)
## 
## Coefficients:
##   (Intercept) factor(evmarried)1
## 1   -12.86357           24.57186
## 2   -11.18074           21.33906
## 
## Std. Errors:
##   (Intercept) factor(evmarried)1
## 1    13.85782           17.61870
## 2     5.97414           12.41263
## 
## Residual Deviance: 1155.15 
## AIC: 1163.15

anymodel <- svyglm(evmarried ~ factor(ageevent), weights=new_weight, design = des5)
summary(anymodel)

## 
## Call:
## svyglm(formula = evmarried ~ factor(ageevent), design = des5, 
##     weights = new_weight)
## 
## Survey design:
## svydesign(ids = ~secu, strata = ~sest, weights = ~new_weight, 
##     data = dat, nest = T)
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -5.861e-16  1.442e-15  -0.406 0.688773    
## factor(ageevent)16  3.475e-03  3.563e-03   0.975 0.341016    
## factor(ageevent)17  1.208e-02  6.350e-03   1.902 0.071635 .  
## factor(ageevent)18  3.762e-01  1.089e-01   3.454 0.002510 ** 
## factor(ageevent)19  4.492e-01  1.269e-01   3.540 0.002055 ** 
## factor(ageevent)20  4.319e-01  1.103e-01   3.914 0.000859 ***
## factor(ageevent)21  3.980e-01  1.271e-01   3.130 0.005270 ** 
## factor(ageevent)22  5.402e-01  1.300e-01   4.155 0.000490 ***
## factor(ageevent)23  7.643e-01  6.390e-02  11.961 1.44e-10 ***
## factor(ageevent)24  7.437e-01  6.603e-02  11.264 4.13e-10 ***
## factor(ageevent)25  5.961e-01  9.722e-02   6.132 5.43e-06 ***
## factor(ageevent)26  7.963e-01  7.623e-02  10.446 1.51e-09 ***
## factor(ageevent)27  7.834e-01  5.894e-02  13.293 2.19e-11 ***
## factor(ageevent)28  5.408e-01  9.901e-02   5.463 2.39e-05 ***
## factor(ageevent)29  6.583e-01  9.477e-02   6.946 9.60e-07 ***
## factor(ageevent)30  6.409e-01  1.138e-01   5.631 1.64e-05 ***
## factor(ageevent)31  7.304e-01  9.434e-02   7.742 1.93e-07 ***
## factor(ageevent)32  4.712e-01  1.156e-01   4.074 0.000591 ***
## factor(ageevent)33  2.932e-01  9.756e-02   3.005 0.006989 ** 
## factor(ageevent)34  5.723e-01  1.553e-01   3.686 0.001466 ** 
## factor(ageevent)35  3.778e-01  2.041e-01   1.851 0.078950 .  
## factor(ageevent)36  4.855e-01  1.533e-01   3.167 0.004846 ** 
## factor(ageevent)37  5.103e-01  1.387e-01   3.679 0.001487 ** 
## factor(ageevent)38  3.918e-01  1.280e-01   3.061 0.006171 ** 
## factor(ageevent)39  1.229e-01  1.038e-01   1.184 0.250220    
## factor(ageevent)40  6.368e-01  2.014e-01   3.162 0.004905 ** 
## factor(ageevent)41  3.626e-01  1.495e-01   2.425 0.024887 *  
## factor(ageevent)42  1.208e-01  7.206e-02   1.677 0.109119    
## factor(ageevent)43  6.798e-01  2.121e-01   3.206 0.004440 ** 
## factor(ageevent)44  1.298e-01  1.164e-01   1.115 0.278089    
## factor(ageevent)45  1.540e-01  1.065e-01   1.445 0.163895    
## factor(ageevent)46  1.820e-02  1.921e-02   0.947 0.354719    
## factor(ageevent)47  5.885e-16  1.413e-15   0.417 0.681432    
## factor(ageevent)48  6.855e-16  1.373e-15   0.499 0.622984    
## factor(ageevent)49  5.210e-16  1.363e-15   0.382 0.706404    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.4258782)
## 
## Number of Fisher Scoring iterations: 2

as.data.frame(fprobs)

##   age_cat          p   n
## 1   15-19 0.16687517 628
## 2   20-24 0.55827326 811
## 3   25-29 0.63468717 788
## 4   30-34 0.52282340 510
## 5   35-39 0.36613989 252
## 6   40-44 0.27303553 152
## 7   45-49 0.03963077 114

#Total pop hisprace2( NH black)
#Total pop hisprace2(NH white)
#Group evmarried and hisprace2, 
#babs_event is they have bachelor's degree or not 



pydata %>%
  group_by(hisprace2, babs_event) %>% # 3 is black 
  summarise(Count = n())

## `summarise()` has grouped output by 'hisprace2'. You can override using the
## `.groups` argument.

## # A tibble: 4 × 3
## # Groups:   hisprace2 [2]
##   hisprace2 babs_event Count
##       <dbl>      <dbl> <int>
## 1         2          0 25620
## 2         2          1  4656
## 3         3          0 10382
## 4         3          1   865

# To get those who experienced event
pydata %>%
  group_by(hisprace2, babs_event) %>%
  summarise(Count = sum(evmarried == 1))

## `summarise()` has grouped output by 'hisprace2'. You can override using the
## `.groups` argument.

## # A tibble: 4 × 3
## # Groups:   hisprace2 [2]
##   hisprace2 babs_event Count
##       <dbl>      <dbl> <int>
## 1         2          0   682
## 2         2          1   345
## 3         3          0   180
## 4         3          1    38

HW 2 B. Solomon

Bryan Solomon

2023-09-11