Uganda Survival Analysis

Author

R.Luttinen

Event history analysis: Uganda PMA 2020-2022

#read in data from IPUMS: long version

library(ipumsr)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#person-period dataset


pmauglong <- read_ipums_micro(
  ddi = "C:/Users/Rebecca/Downloads/pma_00018.xml",
  data = "C:/Users/Rebecca/Downloads/pma_00018.dat.gz"
)
Use of data from IPUMS PMA is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.
#descriptive info on year and month of interviews
library(janitor)

Attaching package: 'janitor'
The following objects are masked from 'package:stats':

    chisq.test, fisher.test
tabyl(pmauglong, INTFQMON, INTFQYEAR)
 INTFQMON 2020 2021 9999
        9 3890  422    0
       10  100 4322    0
       11    0  228    0
       99    0    0 5216
tabyl(pmauglong, SEX)
 SEX     n percent
   2 14178       1
#recoding

#fertility preferences: time-varying

pmauglongfilter<-pmauglong%>%
  mutate(FERTPREF=as.factor(FERTPREF))%>%
mutate(wantanotherchild= recode(FERTPREF, '1' = "have another child" ,'2' = "no more children", '3'="infertile", .default = NA_character_))

tabyl(pmauglongfilter, wantanotherchild)
   wantanotherchild     n    percent valid_percent
 have another child 10043 0.70835097     0.7378049
   no more children  3360 0.23698688     0.2468410
          infertile   209 0.01474115     0.0153541
               <NA>   566 0.03992100            NA
#pregant: time-varying

pmauglongfilter<-pmauglongfilter%>%
  mutate(PREGNANT=as.factor(PREGNANT))%>%
mutate(pregnant= recode(PREGNANT, '1' = "pregnant" ,'0' = "not pregnant",.default = NA_character_))

tabyl(pmauglongfilter, pregnant)
     pregnant     n    percent valid_percent
 not pregnant 12709 0.89638877    0.90785056
     pregnant  1290 0.09098603    0.09214944
         <NA>   179 0.01262519            NA
#marital status time-varying

pmauglongfilter<-pmauglongfilter%>%
    mutate(MARSTAT=as.factor(MARSTAT))%>%
mutate(maritalstatus= recode(MARSTAT, '10' = "never married" ,'20' = "married or living together", '21'= "currently married", '22'= "currently living with partner", '31'="formerly in union",'32'='widow or widower',.default = NA_character_))

tabyl(pmauglongfilter, maritalstatus)
                 maritalstatus    n      percent valid_percent
                 never married 3506 0.2472845253    0.24735431
             currently married 3416 0.2409366624    0.24100466
 currently living with partner 4980 0.3512484130    0.35134754
             formerly in union 1947 0.1373254338    0.13736419
              widow or widower  325 0.0229228382    0.02292931
                          <NA>    4 0.0002821272            NA
#age-group

pmauglongfilter<-pmauglongfilter %>%
  mutate(agegroup = case_when(
      AGE >= 15 & AGE <= 19 ~ "15-19",
      AGE >= 20 & AGE <= 25 ~ "20-25",
    AGE >= 26 & AGE <= 30 ~ "26-30",
    AGE >= 31 & AGE <= 35 ~ "31-35",
     AGE >= 36 & AGE <= 40 ~ "36-40",
     AGE >= 41 & AGE <= 44 ~ "41-44",
    AGE >= 45 & AGE <= 49 ~ "45-49"
    ))

#contraceptive use: time-varying

pmauglongfilter<-pmauglongfilter%>%
  mutate(MCP=as.factor(MCP))%>%
mutate(usingmoderncon= recode(MCP, '1' = "yes" ,'0' = "no",.default = NA_character_))

tabyl(pmauglongfilter, usingmoderncon)
 usingmoderncon    n    percent valid_percent
             no 9307 0.65643955      0.666261
            yes 4662 0.32881930      0.333739
           <NA>  209 0.01474115            NA
#education: time-varying

pmauglongfilter<-pmauglongfilter%>%
  mutate(EDUCATTGEN=as.factor(EDUCATTGEN))%>%
mutate(educationlevel= recode(EDUCATTGEN, '1' = "none" ,'2' = "primary/middle school", '3'= "secondary/post-primary", '4'= 'tertiary/ post-secondary',.default = NA_character_))

tabyl(pmauglongfilter, educationlevel)
           educationlevel    n      percent valid_percent
                     none  843 0.0594583157     0.0594667
    primary/middle school 7905 0.5575539568     0.5576326
   secondary/post-primary 4269 0.3011002962     0.3011428
 tertiary/ post-secondary 1159 0.0817463676     0.0817579
                     <NA>    2 0.0001410636            NA
#decision-making:medical- time-varying

pmauglongfilter<-pmauglongfilter%>%
  mutate(HHDECMED=as.factor(HHDECMED))%>%
   mutate(decidemedical= recode(HHDECMED, '1' = "respondent" ,'2' = "husband/partner", '3'="someone else", '98'="no response", .default = NA_character_))

tabyl(pmauglongfilter, decidemedical)
   decidemedical    n      percent valid_percent
      respondent 2130 0.1502327550  0.2571842550
 husband/partner 3826 0.2698547045  0.4619657088
    someone else 2319 0.1635632670  0.2800048298
     no response    7 0.0004937227  0.0008452065
            <NA> 5896 0.4158555509            NA
#decision-making:family planning use- time-varying

pmauglongfilter<-pmauglongfilter%>%
  mutate(FPDECIDER=as.factor(FPDECIDER))%>%
   mutate(decidefp= recode(FPDECIDER, '1' = "respondent" ,'2' = "husband/partner", '3'="joint decision", '9'="other", .default = NA_character_))

tabyl(pmauglongfilter, decidefp)
        decidefp    n     percent valid_percent
      respondent 1887 0.133093525   0.379144063
 husband/partner  563 0.039709409   0.113120354
  joint decision 2499 0.176258993   0.502109705
           other   28 0.001974891   0.005625879
            <NA> 9201 0.648963182            NA
#what is censored: Women who are pregnant at wave 1 

pmauglongfilter<-pmauglongfilter %>%
  mutate(censored = case_when(
      pregnant == 'pregnant' & PHASE == 1~ "1",
      .default="0"))

#pivot wide

selecting<-select(pmauglongfilter, PHASE, BIRTHEVENT)

pmaugwide <- pmauglongfilter %>%
  pivot_wider(
    id_cols=FQINSTID,
    names_from = PHASE,
    values_from = BIRTHEVENT
  )

pmaugwide<-pmaugwide%>%
  dplyr::rename('PHASE1'= '1')

pmaugwide<-pmaugwide%>%
  dplyr::rename('PHASE2'= '2')

pmaugwide<-pmaugwide%>%
  dplyr::rename('PHASE3'= '3')

pmaugwide<-pmaugwide%>%
  filter(PHASE1<97)

pmaugwide<-pmaugwide%>%
  filter(PHASE2<97)

pmaugwide<-pmaugwide%>%
  filter(PHASE3<97)



pmaugbind1to2<-pmaugwide%>%
  select(FQINSTID, PHASE1, PHASE2)

pmaugbind2to3<-pmaugwide%>%
  select(FQINSTID, PHASE2, PHASE3)


pmaugbind1to2 <- pmaugbind1to2 %>%
  mutate(birtheventvar = case_when(PHASE1 < PHASE2 ~ "1",
                                   .default = "0" ))

pmaugbind2to3 <-pmaugbind2to3 %>%
  mutate(birtheventvar = case_when(PHASE2 < PHASE3 ~ "1",
                                   .default = "0" ))


pmaugbindlong1to2 <- pmaugbind1to2 %>%
  pivot_longer(
    cols = starts_with("PHASE"),
    values_to= "parity",
    names_to = "wave",
  )

pmaugbindlong2to3 <- pmaugbind2to3 %>%
  pivot_longer(
    cols = starts_with("PHASE"),
    values_to= "parity",
    names_to = "wave",
  )

bindedlong<-rbind(pmaugbindlong2to3, pmaugbindlong1to2)

bindedlong<-bindedlong%>%
  dplyr::rename(PHASE=wave)

bindedlong <- bindedlong %>%
  mutate(PHASE = case_when(
    PHASE == "PHASE1" ~ 1,
    PHASE == "PHASE2" ~ 2,
    PHASE == "PHASE3" ~ 3,
    TRUE ~ NA_real_  # Handles any unexpected values
  ))

joined<-merge(pmauglongfilter, bindedlong, by=c("FQINSTID", "PHASE"))

#drop any duplicates

cleaned_joined <- joined %>%
  distinct()

#drop censored cases

cleaned_joined_dropcensored<-cleaned_joined%>%
  filter(censored==0)


cleaned_joined_dropcensored$birtheventvar<-as.numeric(cleaned_joined_dropcensored$birtheventvar)


survivalanalysis<-cleaned_joined_dropcensored%>%
  filter(PHASE!=1)


survivalanalysisselect<-survivalanalysis%>%
  select(1:2, wantanotherchild, 85:94)

head(survivalanalysisselect, n=20)
                                    FQINSTID PHASE   wantanotherchild
1  uuid:007ecf46-4611-43be-8bb1-cec5b1e55e78     2 have another child
2  uuid:007ecf46-4611-43be-8bb1-cec5b1e55e78     3 have another child
3  uuid:00accc49-4193-445f-be05-b0225ceac763     2 have another child
4  uuid:00accc49-4193-445f-be05-b0225ceac763     3 have another child
5  uuid:00c445c9-4dc7-4461-a854-263e0bd9ba9f     2 have another child
6  uuid:00c445c9-4dc7-4461-a854-263e0bd9ba9f     2 have another child
7  uuid:00c445c9-4dc7-4461-a854-263e0bd9ba9f     3 have another child
8  uuid:00e2e893-8187-48dc-8b26-0b6191ea189a     2 have another child
9  uuid:00e2e893-8187-48dc-8b26-0b6191ea189a     3 have another child
10 uuid:0107c7a0-2d79-4f50-af90-bd865afe1d14     2          infertile
11 uuid:0107c7a0-2d79-4f50-af90-bd865afe1d14     3               <NA>
12 uuid:01393ddb-660c-4a62-811e-d26dc50b184e     2 have another child
13 uuid:01393ddb-660c-4a62-811e-d26dc50b184e     2 have another child
14 uuid:01393ddb-660c-4a62-811e-d26dc50b184e     3 have another child
15 uuid:01403d5f-e872-42cd-8ca3-3eb226390c3b     2 have another child
16 uuid:01403d5f-e872-42cd-8ca3-3eb226390c3b     2 have another child
17 uuid:01403d5f-e872-42cd-8ca3-3eb226390c3b     3 have another child
18 uuid:016f1991-c6b5-4b9c-913b-554b5652ab2a     2   no more children
19 uuid:016f1991-c6b5-4b9c-913b-554b5652ab2a     2   no more children
20 uuid:016f1991-c6b5-4b9c-913b-554b5652ab2a     3 have another child
       pregnant                 maritalstatus agegroup usingmoderncon
1  not pregnant currently living with partner    20-25            yes
2      pregnant currently living with partner    20-25             no
3  not pregnant                 never married    15-19             no
4  not pregnant                 never married    15-19             no
5      pregnant             formerly in union    26-30            yes
6      pregnant             formerly in union    26-30            yes
7  not pregnant             formerly in union    26-30            yes
8  not pregnant             currently married    20-25             no
9  not pregnant             currently married    20-25             no
10 not pregnant             currently married    31-35            yes
11 not pregnant             currently married    31-35            yes
12     pregnant currently living with partner    26-30             no
13     pregnant currently living with partner    26-30             no
14 not pregnant currently living with partner    31-35             no
15     pregnant currently living with partner    20-25             no
16     pregnant currently living with partner    20-25             no
17 not pregnant             currently married    20-25            yes
18 not pregnant currently living with partner    26-30            yes
19 not pregnant currently living with partner    26-30            yes
20 not pregnant currently living with partner    26-30            yes
           educationlevel   decidemedical       decidefp censored birtheventvar
1  secondary/post-primary      respondent     respondent        0             0
2  secondary/post-primary      respondent           <NA>        0             0
3  secondary/post-primary            <NA>           <NA>        0             0
4  secondary/post-primary            <NA>           <NA>        0             0
5   primary/middle school            <NA>           <NA>        0             1
6   primary/middle school            <NA>           <NA>        0             0
7   primary/middle school            <NA>           <NA>        0             1
8   primary/middle school husband/partner           <NA>        0             0
9   primary/middle school husband/partner           <NA>        0             0
10  primary/middle school      respondent joint decision        0             0
11  primary/middle school    someone else          other        0             0
12  primary/middle school    someone else           <NA>        0             1
13  primary/middle school    someone else           <NA>        0             0
14  primary/middle school      respondent           <NA>        0             1
15  primary/middle school husband/partner           <NA>        0             1
16  primary/middle school husband/partner           <NA>        0             0
17  primary/middle school husband/partner joint decision        0             1
18  primary/middle school husband/partner joint decision        0             0
19  primary/middle school husband/partner joint decision        0             1
20  primary/middle school husband/partner     respondent        0             0

Now I will lag the following predictor variables by one wave: fertility preferences, marital status, family planning decision maker, and medical decision maker.

#first for fertility preferences

pmaugwidefertpref<- pmauglongfilter %>%
  pivot_wider(
    id_cols=FQINSTID,
    names_from = PHASE,
    values_from = wantanotherchild
  )

fertprefwave1<-pmaugwidefertpref%>%
select(FQINSTID,`1`)

fertprefwave2<-pmaugwidefertpref%>%
select(FQINSTID,`2`)

survivalanalysisPHASE2<-survivalanalysis%>%
  filter(PHASE==2)


survivalanalysisPHASE3<-survivalanalysis%>%
  filter(PHASE==3)
  
fertpref1wave2<-merge(fertprefwave1, survivalanalysisPHASE2, by= "FQINSTID")

fertpref2wave3<-merge(fertprefwave2, survivalanalysisPHASE3, by= "FQINSTID")


fertpref1wave2<-fertpref1wave2%>%
  dplyr::rename('laggedfertpref'= '1')

fertpref2wave3<-fertpref2wave3%>%
  dplyr::rename('laggedfertpref'= '2')

fertpreflagged<-rbind(fertpref1wave2,fertpref2wave3)



#add the fertpref lagged column to the other

allsurvival<-left_join(survivalanalysis, fertpreflagged)
Joining with `by = join_by(FQINSTID, PHASE, SAMPLE, COUNTRY, YEAR, PERSONID,
ELIGIBLE, EAID, CONSENTFQ, INTFQMON, INTFQYEAR, CONSENTHQ, SURVEYWILLING,
FQWEIGHT, HQWEIGHT, STRATA, URBAN, SEX, MARSTATHQ, AGE, BIRTHMO, DOBCMC,
PREGNANT, EDUCATTGEN, MARSTAT, PTR1STAGE, WIVESOTH, COHAB1STSTARTMO,
COHAB1STSTARTYR, COHABLASTSTARTMO, COHABLASTSTARTYR, COHAB1STSTARTCMC,
COHABLASTSTARTCMC, WEALTHQ, SCORE, ROOF, ELECTRC, BIRTHEVENT, AGE1STSEX,
TIMESINCESEX, TIMESINCESEXVAL, TIMESINCESEXDAY, KID1STBIRTHCMC, LASTDOBCMC,
FERTPREF, PGDESIRE, PREFTIMECH, PREFTIMECHVAL, PREFTIMECHMONTH, FERTPREFNPREG,
FERTPREFPREG, STARTKIDDEC, NEXTKIDDEC, STARTKIDDECWILL, PTRDISCKIDSTART,
FPPROVIDERGEN, FPPROVIDER, CP, MCP, TCP, FPCURRUSE, FPCURREFFMETH,
FPCURREFFMETHRC, FPNOWUSINJ, FPNOWUSEMRG, FPNOWUSCON, FPNOWUSFC, FPNOWUSCYCB,
FPNOWUSRHY, FPNOWUSWD, FPNOWUSTRAD, FPDECIDER, CONFPARTNERTALK, FPCANSWITCH,
FPUSPLAN, FPUSPLAN_NPREG, FIRSTCARRIEDAWAY, FIRSTCURIOUS, FIRSTFORCE,
FIRSTEXPECT, FIRSTTIMING, FIRSTWILLING, FIRSTSUB, HHDECMED, wantanotherchild,
pregnant, maritalstatus, agegroup, usingmoderncon, educationlevel,
decidemedical, decidefp, censored, birtheventvar, parity)`
#then marital status

pmaugwidemarital<- pmauglongfilter %>%
  pivot_wider(
    id_cols=FQINSTID,
    names_from = PHASE,
    values_from = maritalstatus
  )

maritalwave1<-pmaugwidemarital%>%
select(FQINSTID,`1`)

maritalwave2<-pmaugwidemarital%>%
select(FQINSTID,`2`)

survivalanalysisPHASE2<-survivalanalysis%>%
  filter(PHASE==2)


survivalanalysisPHASE3<-survivalanalysis%>%
  filter(PHASE==3)
  
marital1wave2<-merge(maritalwave1, survivalanalysisPHASE2, by= "FQINSTID")

marital2wave3<-merge(maritalwave2, survivalanalysisPHASE3, by= "FQINSTID")


marital1wave2<-marital1wave2%>%
  dplyr::rename('laggedmarital'= '1')

marital2wave3<-marital2wave3%>%
  dplyr::rename('laggedmarital'= '2')

maritallagged<-rbind(marital1wave2,marital2wave3)



#add the marital lagged column to the other

allsurvival1<-left_join(allsurvival, maritallagged)
Joining with `by = join_by(FQINSTID, PHASE, SAMPLE, COUNTRY, YEAR, PERSONID,
ELIGIBLE, EAID, CONSENTFQ, INTFQMON, INTFQYEAR, CONSENTHQ, SURVEYWILLING,
FQWEIGHT, HQWEIGHT, STRATA, URBAN, SEX, MARSTATHQ, AGE, BIRTHMO, DOBCMC,
PREGNANT, EDUCATTGEN, MARSTAT, PTR1STAGE, WIVESOTH, COHAB1STSTARTMO,
COHAB1STSTARTYR, COHABLASTSTARTMO, COHABLASTSTARTYR, COHAB1STSTARTCMC,
COHABLASTSTARTCMC, WEALTHQ, SCORE, ROOF, ELECTRC, BIRTHEVENT, AGE1STSEX,
TIMESINCESEX, TIMESINCESEXVAL, TIMESINCESEXDAY, KID1STBIRTHCMC, LASTDOBCMC,
FERTPREF, PGDESIRE, PREFTIMECH, PREFTIMECHVAL, PREFTIMECHMONTH, FERTPREFNPREG,
FERTPREFPREG, STARTKIDDEC, NEXTKIDDEC, STARTKIDDECWILL, PTRDISCKIDSTART,
FPPROVIDERGEN, FPPROVIDER, CP, MCP, TCP, FPCURRUSE, FPCURREFFMETH,
FPCURREFFMETHRC, FPNOWUSINJ, FPNOWUSEMRG, FPNOWUSCON, FPNOWUSFC, FPNOWUSCYCB,
FPNOWUSRHY, FPNOWUSWD, FPNOWUSTRAD, FPDECIDER, CONFPARTNERTALK, FPCANSWITCH,
FPUSPLAN, FPUSPLAN_NPREG, FIRSTCARRIEDAWAY, FIRSTCURIOUS, FIRSTFORCE,
FIRSTEXPECT, FIRSTTIMING, FIRSTWILLING, FIRSTSUB, HHDECMED, wantanotherchild,
pregnant, maritalstatus, agegroup, usingmoderncon, educationlevel,
decidemedical, decidefp, censored, birtheventvar, parity)`
#next for FP decision maker

pmaugwidedecidefp<- pmauglongfilter %>%
  pivot_wider(
    id_cols=FQINSTID,
    names_from = PHASE,
    values_from = decidefp
  )

decidefpwave1<-pmaugwidedecidefp%>%
select(FQINSTID,`1`)

decidefpwave2<-pmaugwidedecidefp%>%
select(FQINSTID,`2`)

survivalanalysisPHASE2<-survivalanalysis%>%
  filter(PHASE==2)


survivalanalysisPHASE3<-survivalanalysis%>%
  filter(PHASE==3)
  
decidefp1wave2<-merge(decidefpwave1, survivalanalysisPHASE2, by= "FQINSTID")

decidefp2wave3<-merge(decidefpwave2, survivalanalysisPHASE3, by= "FQINSTID")


decidefp1wave2<-decidefp1wave2%>%
  dplyr::rename('laggeddecidefp'= '1')

decidefp2wave3<-decidefp2wave3%>%
  dplyr::rename('laggeddecidefp'= '2')

decidefplagged<-rbind(decidefp1wave2,decidefp2wave3)



#add the decidefp lagged column to the other

allsurvival2<-left_join(allsurvival1, decidefplagged)
Joining with `by = join_by(FQINSTID, PHASE, SAMPLE, COUNTRY, YEAR, PERSONID,
ELIGIBLE, EAID, CONSENTFQ, INTFQMON, INTFQYEAR, CONSENTHQ, SURVEYWILLING,
FQWEIGHT, HQWEIGHT, STRATA, URBAN, SEX, MARSTATHQ, AGE, BIRTHMO, DOBCMC,
PREGNANT, EDUCATTGEN, MARSTAT, PTR1STAGE, WIVESOTH, COHAB1STSTARTMO,
COHAB1STSTARTYR, COHABLASTSTARTMO, COHABLASTSTARTYR, COHAB1STSTARTCMC,
COHABLASTSTARTCMC, WEALTHQ, SCORE, ROOF, ELECTRC, BIRTHEVENT, AGE1STSEX,
TIMESINCESEX, TIMESINCESEXVAL, TIMESINCESEXDAY, KID1STBIRTHCMC, LASTDOBCMC,
FERTPREF, PGDESIRE, PREFTIMECH, PREFTIMECHVAL, PREFTIMECHMONTH, FERTPREFNPREG,
FERTPREFPREG, STARTKIDDEC, NEXTKIDDEC, STARTKIDDECWILL, PTRDISCKIDSTART,
FPPROVIDERGEN, FPPROVIDER, CP, MCP, TCP, FPCURRUSE, FPCURREFFMETH,
FPCURREFFMETHRC, FPNOWUSINJ, FPNOWUSEMRG, FPNOWUSCON, FPNOWUSFC, FPNOWUSCYCB,
FPNOWUSRHY, FPNOWUSWD, FPNOWUSTRAD, FPDECIDER, CONFPARTNERTALK, FPCANSWITCH,
FPUSPLAN, FPUSPLAN_NPREG, FIRSTCARRIEDAWAY, FIRSTCURIOUS, FIRSTFORCE,
FIRSTEXPECT, FIRSTTIMING, FIRSTWILLING, FIRSTSUB, HHDECMED, wantanotherchild,
pregnant, maritalstatus, agegroup, usingmoderncon, educationlevel,
decidemedical, decidefp, censored, birtheventvar, parity)`
#lastly for medical decision maker

pmaugwidedecidemed<- pmauglongfilter %>%
  pivot_wider(
    id_cols=FQINSTID,
    names_from = PHASE,
    values_from = decidemedical
  )


decidemedwave1<-pmaugwidedecidemed%>%
select(FQINSTID,`1`)

decidemedwave2<-pmaugwidedecidemed%>%
select(FQINSTID,`2`)

survivalanalysisPHASE2<-survivalanalysis%>%
  filter(PHASE==2)


survivalanalysisPHASE3<-survivalanalysis%>%
  filter(PHASE==3)
  
decidemed1wave2<-merge(decidemedwave1, survivalanalysisPHASE2, by= "FQINSTID")

decidemed2wave3<-merge(decidemedwave2, survivalanalysisPHASE3, by= "FQINSTID")


decidemed1wave2<-decidemed1wave2%>%
  dplyr::rename('laggeddecidemed'= '1')

decidemed2wave3<-decidemed2wave3%>%
  dplyr::rename('laggeddecidemed'= '2')

decidemedlagged<-rbind(decidemed1wave2,decidemed2wave3)



#add the decidemed lagged column to the other

allsurvivalall<-left_join(allsurvival2, decidemedlagged)
Joining with `by = join_by(FQINSTID, PHASE, SAMPLE, COUNTRY, YEAR, PERSONID,
ELIGIBLE, EAID, CONSENTFQ, INTFQMON, INTFQYEAR, CONSENTHQ, SURVEYWILLING,
FQWEIGHT, HQWEIGHT, STRATA, URBAN, SEX, MARSTATHQ, AGE, BIRTHMO, DOBCMC,
PREGNANT, EDUCATTGEN, MARSTAT, PTR1STAGE, WIVESOTH, COHAB1STSTARTMO,
COHAB1STSTARTYR, COHABLASTSTARTMO, COHABLASTSTARTYR, COHAB1STSTARTCMC,
COHABLASTSTARTCMC, WEALTHQ, SCORE, ROOF, ELECTRC, BIRTHEVENT, AGE1STSEX,
TIMESINCESEX, TIMESINCESEXVAL, TIMESINCESEXDAY, KID1STBIRTHCMC, LASTDOBCMC,
FERTPREF, PGDESIRE, PREFTIMECH, PREFTIMECHVAL, PREFTIMECHMONTH, FERTPREFNPREG,
FERTPREFPREG, STARTKIDDEC, NEXTKIDDEC, STARTKIDDECWILL, PTRDISCKIDSTART,
FPPROVIDERGEN, FPPROVIDER, CP, MCP, TCP, FPCURRUSE, FPCURREFFMETH,
FPCURREFFMETHRC, FPNOWUSINJ, FPNOWUSEMRG, FPNOWUSCON, FPNOWUSFC, FPNOWUSCYCB,
FPNOWUSRHY, FPNOWUSWD, FPNOWUSTRAD, FPDECIDER, CONFPARTNERTALK, FPCANSWITCH,
FPUSPLAN, FPUSPLAN_NPREG, FIRSTCARRIEDAWAY, FIRSTCURIOUS, FIRSTFORCE,
FIRSTEXPECT, FIRSTTIMING, FIRSTWILLING, FIRSTSUB, HHDECMED, wantanotherchild,
pregnant, maritalstatus, agegroup, usingmoderncon, educationlevel,
decidemedical, decidefp, censored, birtheventvar, parity)`
allsurvivalclean<-allsurvivalall%>%
  distinct(PHASE, FQINSTID, .keep_all = TRUE)

Now we will show the survival curves considering these lagged factors

allsurvivalall1<-allsurvivalclean%>%
  filter(wantanotherchild!="NA")

allsurvivalall1%>%
group_by(YEAR, wantanotherchild)%>%
summarise(prop_bir=mean(birtheventvar, na.rm=T))%>%
ggplot(aes(x=YEAR, y=prop_bir))+
geom_line(aes(group=factor(wantanotherchild), color=factor(wantanotherchild)))+
ggtitle(label = "Hazard of having a birth by prior wave fertility preference")+scale_x_continuous(breaks=c(2021, 2022))
`summarise()` has grouped output by 'YEAR'. You can override using the
`.groups` argument.

allsurvivalall2<-allsurvivalclean%>%
  filter(maritalstatus!="NA")



allsurvivalall2%>%
group_by(YEAR, maritalstatus)%>%
summarise(prop_bir=mean(birtheventvar, na.rm=T))%>%
ggplot(aes(x=YEAR, y=prop_bir))+
geom_line(aes(group=factor(maritalstatus), color=factor(maritalstatus)))+
ggtitle(label = "Hazard of having a birth by prior wave marital status")+scale_x_continuous(breaks=c(2021, 2022))
`summarise()` has grouped output by 'YEAR'. You can override using the
`.groups` argument.

allsurvivalclean%>%
group_by(YEAR, decidefp)%>%
summarise(prop_bir=mean(birtheventvar, na.rm=T))%>%
ggplot(aes(x=YEAR, y=prop_bir))+
geom_line(aes(group=factor(decidefp), color=factor(decidefp)))+
ggtitle(label = "Hazard of having a birth by prior wave family planning user decision-maker")+scale_x_continuous(breaks=c(2021, 2022))
`summarise()` has grouped output by 'YEAR'. You can override using the
`.groups` argument.

allsurvivalclean%>%
group_by(YEAR, decidemedical)%>%
summarise(prop_bir=mean(birtheventvar, na.rm=T))%>%
ggplot(aes(x=YEAR, y=prop_bir))+
geom_line(aes(group=factor(decidemedical), color=factor(decidemedical)))+
ggtitle(label = "Hazard of having a birth by prior wave medical decision-maker")+scale_x_continuous(breaks=c(2021, 2022))
`summarise()` has grouped output by 'YEAR'. You can override using the
`.groups` argument.

Now I will break up the analysis by year.

#See how many births happened by each wave
tabyl(allsurvivalall, YEAR, birtheventvar)
 YEAR    0   1
 2021 2318 971
 2022 1956 422
wave2<-allsurvivalclean%>%
  filter(YEAR==2021)


#general model with sociodemographics and fertiltiy preferences from 2020-2021
fit.wave2<-glm(birtheventvar~+ usingmoderncon+ laggedmarital+ laggedfertpref +AGE+ educationlevel, data=wave2, family=binomial(link="cloglog"))

summary(fit.wave2)

Call:
glm(formula = birtheventvar ~ +usingmoderncon + laggedmarital + 
    laggedfertpref + AGE + educationlevel, family = binomial(link = "cloglog"), 
    data = wave2)

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                                -1.263510   0.278207  -4.542
usingmodernconyes                          -0.641263   0.103334  -6.206
laggedmaritalcurrently married              1.282087   0.183030   7.005
laggedmaritalcurrently living with partner  1.187774   0.170309   6.974
laggedmaritalformerly in union              0.836551   0.219143   3.817
laggedmaritalwidow or widower               0.404715   0.453680   0.892
laggedfertprefno more children             -0.040145   0.128132  -0.313
laggedfertprefinfertile                    -0.845862   0.583668  -1.449
AGE                                        -0.026511   0.007476  -3.546
educationlevelprimary/middle school        -0.004226   0.179921  -0.023
educationlevelsecondary/post-primary       -0.242725   0.197776  -1.227
educationleveltertiary/ post-secondary     -0.386860   0.253543  -1.526
                                           Pr(>|z|)    
(Intercept)                                5.58e-06 ***
usingmodernconyes                          5.44e-10 ***
laggedmaritalcurrently married             2.47e-12 ***
laggedmaritalcurrently living with partner 3.08e-12 ***
laggedmaritalformerly in union             0.000135 ***
laggedmaritalwidow or widower              0.372355    
laggedfertprefno more children             0.754044    
laggedfertprefinfertile                    0.147277    
AGE                                        0.000391 ***
educationlevelprimary/middle school        0.981261    
educationlevelsecondary/post-primary       0.219721    
educationleveltertiary/ post-secondary     0.127056    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2369.6  on 2231  degrees of freedom
Residual deviance: 2247.5  on 2220  degrees of freedom
  (146 observations deleted due to missingness)
AIC: 2271.5

Number of Fisher Scoring iterations: 5
#model from 2020-2021: add family planning decision making power
fit.wave2decisionfp<-glm(birtheventvar~+ usingmoderncon+ laggedmarital+ laggedfertpref +AGE+ educationlevel + laggeddecidefp, data=wave2, family=binomial(link="cloglog"))

summary(fit.wave2decisionfp)

Call:
glm(formula = birtheventvar ~ +usingmoderncon + laggedmarital + 
    laggedfertpref + AGE + educationlevel + laggeddecidefp, family = binomial(link = "cloglog"), 
    data = wave2)

Coefficients:
                                           Estimate Std. Error z value Pr(>|z|)
(Intercept)                                -0.10644    0.55581  -0.192   0.8481
usingmodernconyes                          -0.97297    0.16175  -6.015  1.8e-09
laggedmaritalcurrently married              0.37525    0.32058   1.171   0.2418
laggedmaritalcurrently living with partner  0.13248    0.30605   0.433   0.6651
laggedmaritalformerly in union             -0.06590    0.38420  -0.172   0.8638
laggedmaritalwidow or widower              -0.74492    1.06532  -0.699   0.4844
laggedfertprefno more children             -0.13585    0.22633  -0.600   0.5484
laggedfertprefinfertile                    -1.46865    1.54015  -0.954   0.3403
AGE                                        -0.01325    0.01375  -0.964   0.3350
educationlevelprimary/middle school        -0.27361    0.34324  -0.797   0.4254
educationlevelsecondary/post-primary       -0.67498    0.37476  -1.801   0.0717
educationleveltertiary/ post-secondary     -0.58491    0.43067  -1.358   0.1744
laggeddecidefphusband/partner              -0.26070    0.22895  -1.139   0.2548
laggeddecidefpjoint decision               -0.44838    0.17469  -2.567   0.0103
laggeddecidefpother                         0.89189    1.53925   0.579   0.5623
                                              
(Intercept)                                   
usingmodernconyes                          ***
laggedmaritalcurrently married                
laggedmaritalcurrently living with partner    
laggedmaritalformerly in union                
laggedmaritalwidow or widower                 
laggedfertprefno more children                
laggedfertprefinfertile                       
AGE                                           
educationlevelprimary/middle school           
educationlevelsecondary/post-primary       .  
educationleveltertiary/ post-secondary        
laggeddecidefphusband/partner                 
laggeddecidefpjoint decision               *  
laggeddecidefpother                           
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 854.79  on 852  degrees of freedom
Residual deviance: 797.79  on 838  degrees of freedom
  (1525 observations deleted due to missingness)
AIC: 827.79

Number of Fisher Scoring iterations: 5
#model from 2020-2021: add medical decision making power
fit.wave2decisionmed<-glm(birtheventvar~+ usingmoderncon+ laggedmarital+ laggedfertpref +AGE+ educationlevel + laggeddecidemed, data=wave2, family=binomial(link="cloglog"))

summary(fit.wave2decisionmed)

Call:
glm(formula = birtheventvar ~ +usingmoderncon + laggedmarital + 
    laggedfertpref + AGE + educationlevel + laggeddecidemed, 
    family = binomial(link = "cloglog"), data = wave2)

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                                 0.145172   0.366997   0.396
usingmodernconyes                          -0.677061   0.113563  -5.962
laggedmaritalcurrently living with partner -0.086179   0.106786  -0.807
laggedfertprefno more children              0.032469   0.144927   0.224
laggedfertprefinfertile                    -0.440274   0.585583  -0.752
AGE                                        -0.032371   0.008715  -3.715
educationlevelprimary/middle school         0.007050   0.195059   0.036
educationlevelsecondary/post-primary       -0.213762   0.217436  -0.983
educationleveltertiary/ post-secondary     -0.380126   0.283842  -1.339
laggeddecidemedhusband/partner              0.051069   0.132707   0.385
laggeddecidemedsomeone else                -0.035371   0.144986  -0.244
laggeddecidemedno response                  0.709842   0.604451   1.174
                                           Pr(>|z|)    
(Intercept)                                0.692424    
usingmodernconyes                          2.49e-09 ***
laggedmaritalcurrently living with partner 0.419651    
laggedfertprefno more children             0.822727    
laggedfertprefinfertile                    0.452138    
AGE                                        0.000204 ***
educationlevelprimary/middle school        0.971168    
educationlevelsecondary/post-primary       0.325557    
educationleveltertiary/ post-secondary     0.180501    
laggeddecidemedhusband/partner             0.700367    
laggeddecidemedsomeone else                0.807259    
laggeddecidemedno response                 0.240252    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1718.9  on 1502  degrees of freedom
Residual deviance: 1648.2  on 1491  degrees of freedom
  (875 observations deleted due to missingness)
AIC: 1672.2

Number of Fisher Scoring iterations: 5
wave3<-allsurvivalclean%>%
  filter(YEAR==2022)


#general model with sociodemographics and fertiltiy preferences from 2021-2022
fit.wave3<-glm(birtheventvar~+ usingmoderncon+ laggedmarital+ laggedfertpref +AGE+ educationlevel, data=wave3, family=binomial(link="cloglog"))

summary(fit.wave3)

Call:
glm(formula = birtheventvar ~ +usingmoderncon + laggedmarital + 
    laggedfertpref + AGE + educationlevel, family = binomial(link = "cloglog"), 
    data = wave3)

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                                -1.544672   0.352823  -4.378
usingmodernconyes                          -0.269023   0.107445  -2.504
laggedmaritalcurrently married              1.613630   0.231222   6.979
laggedmaritalcurrently living with partner  1.535875   0.217641   7.057
laggedmaritalformerly in union              0.844590   0.281803   2.997
laggedmaritalwidow or widower               1.453856   0.429257   3.387
laggedfertprefno more children             -0.124960   0.152308  -0.820
laggedfertprefinfertile                    -0.607384   0.520080  -1.168
AGE                                        -0.040638   0.008663  -4.691
educationlevelprimary/middle school        -0.036325   0.228646  -0.159
educationlevelsecondary/post-primary       -0.060095   0.239557  -0.251
educationleveltertiary/ post-secondary     -0.174298   0.290569  -0.600
                                           Pr(>|z|)    
(Intercept)                                1.20e-05 ***
usingmodernconyes                          0.012286 *  
laggedmaritalcurrently married             2.98e-12 ***
laggedmaritalcurrently living with partner 1.70e-12 ***
laggedmaritalformerly in union             0.002726 ** 
laggedmaritalwidow or widower              0.000707 ***
laggedfertprefno more children             0.411962    
laggedfertprefinfertile                    0.242861    
AGE                                        2.72e-06 ***
educationlevelprimary/middle school        0.873771    
educationlevelsecondary/post-primary       0.801924    
educationleveltertiary/ post-secondary     0.548605    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2101.8  on 2262  degrees of freedom
Residual deviance: 1996.0  on 2251  degrees of freedom
  (115 observations deleted due to missingness)
AIC: 2020

Number of Fisher Scoring iterations: 5
#model from 2021-2022: add medical decision making power
fit.wave3decisionfp<-glm(birtheventvar~+ usingmoderncon+ laggedmarital+ laggedfertpref +AGE+ educationlevel + laggeddecidefp, data=wave3, family=binomial(link="cloglog"))

summary(fit.wave3decisionfp)

Call:
glm(formula = birtheventvar ~ +usingmoderncon + laggedmarital + 
    laggedfertpref + AGE + educationlevel + laggeddecidefp, family = binomial(link = "cloglog"), 
    data = wave3)

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                                -1.592204   0.789865  -2.016
usingmodernconyes                          -0.572901   0.205699  -2.785
laggedmaritalcurrently married              0.452232   0.434686   1.040
laggedmaritalcurrently living with partner  0.266411   0.406086   0.656
laggedmaritalformerly in union              0.162203   0.490289   0.331
laggedmaritalwidow or widower               0.684193   0.712231   0.961
laggedfertprefno more children             -0.570702   0.299627  -1.905
laggedfertprefinfertile                    -0.948566   1.045091  -0.908
AGE                                         0.005542   0.017621   0.315
educationlevelprimary/middle school        -0.394970   0.527362  -0.749
educationlevelsecondary/post-primary       -0.003260   0.538380  -0.006
educationleveltertiary/ post-secondary     -0.655328   0.655919  -0.999
laggeddecidefphusband/partner              -0.555367   0.348332  -1.594
laggeddecidefpjoint decision               -0.761278   0.222270  -3.425
laggeddecidefpother                         0.526456   1.038275   0.507
                                           Pr(>|z|)    
(Intercept)                                0.043822 *  
usingmodernconyes                          0.005351 ** 
laggedmaritalcurrently married             0.298170    
laggedmaritalcurrently living with partner 0.511795    
laggedmaritalformerly in union             0.740773    
laggedmaritalwidow or widower              0.336736    
laggedfertprefno more children             0.056818 .  
laggedfertprefinfertile                    0.364069    
AGE                                        0.753135    
educationlevelprimary/middle school        0.453885    
educationlevelsecondary/post-primary       0.995168    
educationleveltertiary/ post-secondary     0.317747    
laggeddecidefphusband/partner              0.110856    
laggeddecidefpjoint decision               0.000615 ***
laggeddecidefpother                        0.612121    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 653.48  on 994  degrees of freedom
Residual deviance: 619.78  on 980  degrees of freedom
  (1383 observations deleted due to missingness)
AIC: 649.78

Number of Fisher Scoring iterations: 6
#model from 2021-2022: add medical decision making power
fit.wave3decisionmed<-glm(birtheventvar~+ usingmoderncon+ laggedmarital+ laggedfertpref +AGE+ educationlevel + laggeddecidemed, data=wave3, family=binomial(link="cloglog"))

summary(fit.wave3decisionmed)

Call:
glm(formula = birtheventvar ~ +usingmoderncon + laggedmarital + 
    laggedfertpref + AGE + educationlevel + laggeddecidemed, 
    family = binomial(link = "cloglog"), data = wave3)

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                                -0.161428   0.425832  -0.379
usingmodernconyes                          -0.377907   0.117147  -3.226
laggedmaritalcurrently living with partner -0.076867   0.117108  -0.656
laggedfertprefno more children             -0.059347   0.164193  -0.361
laggedfertprefinfertile                    -0.613878   0.596866  -1.029
AGE                                        -0.039368   0.009608  -4.097
educationlevelprimary/middle school         0.014757   0.245695   0.060
educationlevelsecondary/post-primary        0.051900   0.257430   0.202
educationleveltertiary/ post-secondary     -0.120860   0.319948  -0.378
laggeddecidemedhusband/partner              0.240034   0.149723   1.603
laggeddecidemedsomeone else                 0.098325   0.170276   0.577
                                           Pr(>|z|)    
(Intercept)                                 0.70462    
usingmodernconyes                           0.00126 ** 
laggedmaritalcurrently living with partner  0.51158    
laggedfertprefno more children              0.71776    
laggedfertprefinfertile                     0.30371    
AGE                                        4.18e-05 ***
educationlevelprimary/middle school         0.95211    
educationlevelsecondary/post-primary        0.84022    
educationleveltertiary/ post-secondary      0.70562    
laggeddecidemedhusband/partner              0.10889    
laggeddecidemedsomeone else                 0.56364    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1620.6  on 1576  degrees of freedom
Residual deviance: 1564.1  on 1566  degrees of freedom
  (801 observations deleted due to missingness)
AIC: 1586.1

Number of Fisher Scoring iterations: 5
#general model with sociodemographics and fertiltiy preferences 
fit.all<-glm(birtheventvar~+ usingmoderncon+ laggedmarital+ laggedfertpref +AGE+ educationlevel, data=allsurvivalclean, family=binomial(link="cloglog"))

summary(fit.wave2)

Call:
glm(formula = birtheventvar ~ +usingmoderncon + laggedmarital + 
    laggedfertpref + AGE + educationlevel, family = binomial(link = "cloglog"), 
    data = wave2)

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                                -1.263510   0.278207  -4.542
usingmodernconyes                          -0.641263   0.103334  -6.206
laggedmaritalcurrently married              1.282087   0.183030   7.005
laggedmaritalcurrently living with partner  1.187774   0.170309   6.974
laggedmaritalformerly in union              0.836551   0.219143   3.817
laggedmaritalwidow or widower               0.404715   0.453680   0.892
laggedfertprefno more children             -0.040145   0.128132  -0.313
laggedfertprefinfertile                    -0.845862   0.583668  -1.449
AGE                                        -0.026511   0.007476  -3.546
educationlevelprimary/middle school        -0.004226   0.179921  -0.023
educationlevelsecondary/post-primary       -0.242725   0.197776  -1.227
educationleveltertiary/ post-secondary     -0.386860   0.253543  -1.526
                                           Pr(>|z|)    
(Intercept)                                5.58e-06 ***
usingmodernconyes                          5.44e-10 ***
laggedmaritalcurrently married             2.47e-12 ***
laggedmaritalcurrently living with partner 3.08e-12 ***
laggedmaritalformerly in union             0.000135 ***
laggedmaritalwidow or widower              0.372355    
laggedfertprefno more children             0.754044    
laggedfertprefinfertile                    0.147277    
AGE                                        0.000391 ***
educationlevelprimary/middle school        0.981261    
educationlevelsecondary/post-primary       0.219721    
educationleveltertiary/ post-secondary     0.127056    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2369.6  on 2231  degrees of freedom
Residual deviance: 2247.5  on 2220  degrees of freedom
  (146 observations deleted due to missingness)
AIC: 2271.5

Number of Fisher Scoring iterations: 5
#how many people have multiple births

multiplebirths<-allsurvivalclean%>%
  select(FQINSTID, PHASE, birtheventvar)

numberbirths<-multiplebirths%>%
  group_by(FQINSTID)%>%
  summarise(totalbirthsbetwaves=sum(birtheventvar))

tabyl(numberbirths, totalbirthsbetwaves)
 totalbirthsbetwaves    n    percent
                   0 1659 0.69764508
                   1  485 0.20395290
                   2  234 0.09840202