Luttinen Ch. 2 Event Study

Author

R. Luttinen

Event study on time-varying fertility preferences, marital status and male-influence

library(ipumsr)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#person-period dataset


pmauglong <- read_ipums_micro(
  ddi = "C:/Users/Rebecca/Downloads/pma_00017.xml",
  data = "C:/Users/Rebecca/Downloads/pma_00017.dat.gz"
)
Use of data from IPUMS PMA is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.
#start date is 2020?

#end date is 2022

#fertility preferences and marital status are time-varying covariates and per person there should be 3 observations


library(janitor)

Attaching package: 'janitor'
The following objects are masked from 'package:stats':

    chisq.test, fisher.test
tabyl(pmauglong, INTFQMON, INTFQYEAR)
 INTFQMON 2020 2021 9999
        9 3890  422    0
       10  100 4322    0
       11    0  228    0
       99    0    0 5216
#recoding

#fertility preferences: time-varying

pmauglongfilter<-pmauglong%>%
  mutate(FERTPREF=as.factor(FERTPREF))%>%
mutate(wantanotherchild= recode(FERTPREF, '1' = "have another child" ,'2' = "no more children", '3'="infertile", .default = NA_character_))

tabyl(pmauglongfilter, wantanotherchild)
   wantanotherchild     n    percent valid_percent
 have another child 10043 0.70835097     0.7378049
   no more children  3360 0.23698688     0.2468410
          infertile   209 0.01474115     0.0153541
               <NA>   566 0.03992100            NA
#pregant: time-varying

pmauglongfilter<-pmauglongfilter%>%
  mutate(PREGNANT=as.factor(PREGNANT))%>%
mutate(pregnant= recode(PREGNANT, '1' = "pregnant" ,'0' = "not pregnant",.default = NA_character_))

tabyl(pmauglongfilter, pregnant)
     pregnant     n    percent valid_percent
 not pregnant 12709 0.89638877    0.90785056
     pregnant  1290 0.09098603    0.09214944
         <NA>   179 0.01262519            NA
#marital status time-varying

pmauglongfilter<-pmauglongfilter%>%
    mutate(MARSTAT=as.factor(MARSTAT))%>%
mutate(maritalstatus= recode(MARSTAT, '10' = "never married" ,'20' = "married or living together", '21'= "currently married", '22'= "currently living with partner", '31'="formerly in union",'32'='widow or widower',.default = NA_character_))

tabyl(pmauglongfilter, maritalstatus)
                 maritalstatus    n      percent valid_percent
                 never married 3506 0.2472845253    0.24735431
             currently married 3416 0.2409366624    0.24100466
 currently living with partner 4980 0.3512484130    0.35134754
             formerly in union 1947 0.1373254338    0.13736419
              widow or widower  325 0.0229228382    0.02292931
                          <NA>    4 0.0002821272            NA
#age-group

pmauglongfilter<-pmauglongfilter %>%
  mutate(agegroup = case_when(
      AGE >= 15 & AGE <= 19 ~ "15-19",
      AGE >= 20 & AGE <= 25 ~ "20-25",
    AGE >= 26 & AGE <= 30 ~ "26-30",
    AGE >= 31 & AGE <= 35 ~ "31-35",
     AGE >= 36 & AGE <= 40 ~ "36-40",
     AGE >= 41 & AGE <= 44 ~ "41-44",
    AGE >= 45 & AGE <= 49 ~ "45-49"
    ))

#contraceptive use: time-varying

pmauglongfilter<-pmauglongfilter%>%
  mutate(MCP=as.factor(MCP))%>%
mutate(usingmoderncon= recode(MCP, '1' = "yes" ,'0' = "no",.default = NA_character_))

tabyl(pmauglongfilter, usingmoderncon)
 usingmoderncon    n    percent valid_percent
             no 9307 0.65643955      0.666261
            yes 4662 0.32881930      0.333739
           <NA>  209 0.01474115            NA
#education: time-varying

pmauglongfilter<-pmauglongfilter%>%
  mutate(EDUCATTGEN=as.factor(EDUCATTGEN))%>%
mutate(educationlevel= recode(EDUCATTGEN, '1' = "none" ,'2' = "primary/middle school", '3'= "secondary/post-primary", '4'= 'tertiary/ post-secondary',.default = NA_character_))

tabyl(pmauglongfilter, educationlevel)
           educationlevel    n      percent valid_percent
                     none  843 0.0594583157     0.0594667
    primary/middle school 7905 0.5575539568     0.5576326
   secondary/post-primary 4269 0.3011002962     0.3011428
 tertiary/ post-secondary 1159 0.0817463676     0.0817579
                     <NA>    2 0.0001410636            NA
#decision-making:medical- time-varying

pmauglongfilter<-pmauglongfilter%>%
  mutate(HHDECMED=as.factor(HHDECMED))%>%
   mutate(decidemedical= recode(HHDECMED, '1' = "respondent" ,'2' = "husband/partner", '3'="someone else", .default = NA_character_))

tabyl(pmauglongfilter, decidemedical)
   decidemedical    n   percent valid_percent
      respondent 2130 0.1502328     0.2574018
 husband/partner 3826 0.2698547     0.4623565
    someone else 2319 0.1635633     0.2802417
            <NA> 5903 0.4163493            NA
#decision-making:family planning use- time-varying

pmauglongfilter<-pmauglongfilter%>%
  mutate(FPDECIDER=as.factor(FPDECIDER))%>%
   mutate(decidefp= recode(FPDECIDER, '1' = "respondent" ,'2' = "husband/partner", '3'="joint decision", .default = NA_character_))

tabyl(pmauglongfilter, decidefp)
        decidefp    n    percent valid_percent
      respondent 1887 0.13309353     0.3812891
 husband/partner  563 0.03970941     0.1137604
  joint decision 2499 0.17625899     0.5049505
            <NA> 9229 0.65093807            NA
#what is censored: Women who are pregnant at wave 1 

pmauglongfilter<-pmauglongfilter %>%
  mutate(censored = case_when(
      pregnant == 'pregnant' & PHASE == 1~ "1",
      .default="0"))

#pivot wide

selecting<-select(pmauglongfilter, PHASE, BIRTHEVENT)

pmaugwide <- pmauglongfilter %>%
  pivot_wider(
    id_cols=FQINSTID,
    names_from = PHASE,
    values_from = BIRTHEVENT
  )

pmaugwide<-pmaugwide%>%
  dplyr::rename('PHASE1'= '1')

pmaugwide<-pmaugwide%>%
  dplyr::rename('PHASE2'= '2')

pmaugwide<-pmaugwide%>%
  dplyr::rename('PHASE3'= '3')

pmaugwide<-pmaugwide%>%
  filter(PHASE1<97)

pmaugwide<-pmaugwide%>%
  filter(PHASE2<97)

pmaugwide<-pmaugwide%>%
  filter(PHASE3<97)



pmaugbind1to2<-pmaugwide%>%
  select(FQINSTID, PHASE1, PHASE2)

pmaugbind2to3<-pmaugwide%>%
  select(FQINSTID, PHASE2, PHASE3)


pmaugbind1to2 <- pmaugbind1to2 %>%
  mutate(birtheventvar = case_when(PHASE1 < PHASE2 ~ "1",
                                   .default = "0" ))

pmaugbind2to3 <-pmaugbind2to3 %>%
  mutate(birtheventvar = case_when(PHASE2 < PHASE3 ~ "1",
                                   .default = "0" ))


pmaugbindlong1to2 <- pmaugbind1to2 %>%
  pivot_longer(
    cols = starts_with("PHASE"),
    values_to= "parity",
    names_to = "wave",
  )

pmaugbindlong2to3 <- pmaugbind2to3 %>%
  pivot_longer(
    cols = starts_with("PHASE"),
    values_to= "parity",
    names_to = "wave",
  )

bindedlong<-rbind(pmaugbindlong2to3, pmaugbindlong1to2)

bindedlong<-bindedlong%>%
  dplyr::rename(PHASE=wave)

bindedlong <- bindedlong %>%
  mutate(PHASE = case_when(
    PHASE == "PHASE1" ~ 1,
    PHASE == "PHASE2" ~ 2,
    PHASE == "PHASE3" ~ 3,
    TRUE ~ NA_real_  # Handles any unexpected values
  ))

joined<-merge(pmauglongfilter, bindedlong, by=c("FQINSTID", "PHASE"))

#drop any duplicates

cleaned_joined <- joined %>%
  distinct()

#drop censored cases

cleaned_joined_dropcensored<-cleaned_joined%>%
  filter(censored==0)


cleaned_joined_dropcensored$birtheventvar<-as.numeric(cleaned_joined_dropcensored$birtheventvar)
hazardanalysis<-cleaned_joined_dropcensored%>%
  filter(wantanotherchild!="NA")

hazardanalysis%>%
group_by(YEAR, wantanotherchild)%>%
summarise(prop_bir=mean(birtheventvar, na.rm=T))%>%
ggplot(aes(x=YEAR, y=prop_bir))+
geom_line(aes(group=factor(wantanotherchild), color=factor(wantanotherchild)))+
ggtitle(label = "Hazard of having a birth after baseline by fertility preference")
`summarise()` has grouped output by 'YEAR'. You can override using the
`.groups` argument.

hazardanalysis2<-cleaned_joined_dropcensored%>%
  filter(maritalstatus!="NA")



hazardanalysis2%>%
group_by(YEAR, maritalstatus)%>%
summarise(prop_bir=mean(birtheventvar, na.rm=T))%>%
ggplot(aes(x=YEAR, y=prop_bir))+
geom_line(aes(group=factor(maritalstatus), color=factor(maritalstatus)))+
ggtitle(label = "Hazard of having a birth after baseline by marital status")
`summarise()` has grouped output by 'YEAR'. You can override using the
`.groups` argument.

#general model with sociodemographics and fertiltiy preferences
fit.1<-glm(birtheventvar~as.factor(YEAR)+ usingmoderncon+ maritalstatus+ wantanotherchild +AGE, data=hazardanalysis, family=binomial(link="cloglog"))

summary(fit.1)

Call:
glm(formula = birtheventvar ~ as.factor(YEAR) + usingmoderncon + 
    maritalstatus + wantanotherchild + AGE, family = binomial(link = "cloglog"), 
    data = hazardanalysis)

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                                -2.001319   0.134910 -14.835
as.factor(YEAR)2021                         0.459384   0.061864   7.426
as.factor(YEAR)2022                        -0.086481   0.072110  -1.199
usingmodernconyes                          -0.391094   0.053577  -7.300
maritalstatuscurrently married              1.617935   0.120108  13.471
maritalstatuscurrently living with partner  1.513600   0.114761  13.189
maritalstatusformerly in union              1.143245   0.135730   8.423
maritalstatuswidow or widower               1.131481   0.217932   5.192
wantanotherchildno more children            0.023659   0.068484   0.345
wantanotherchildinfertile                  -0.178285   0.220659  -0.808
AGE                                        -0.025464   0.004038  -6.307
                                           Pr(>|z|)    
(Intercept)                                 < 2e-16 ***
as.factor(YEAR)2021                        1.12e-13 ***
as.factor(YEAR)2022                           0.230    
usingmodernconyes                          2.88e-13 ***
maritalstatuscurrently married              < 2e-16 ***
maritalstatuscurrently living with partner  < 2e-16 ***
maritalstatusformerly in union              < 2e-16 ***
maritalstatuswidow or widower              2.08e-07 ***
wantanotherchildno more children              0.730    
wantanotherchildinfertile                     0.419    
AGE                                        2.85e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 7932.4  on 7389  degrees of freedom
Residual deviance: 7488.3  on 7379  degrees of freedom
  (122 observations deleted due to missingness)
AIC: 7510.3

Number of Fisher Scoring iterations: 5
#add decision-making factors


tabyl(pmauglongfilter,decidefp)
        decidefp    n    percent valid_percent
      respondent 1887 0.13309353     0.3812891
 husband/partner  563 0.03970941     0.1137604
  joint decision 2499 0.17625899     0.5049505
            <NA> 9229 0.65093807            NA
tabyl(pmauglongfilter, decidemedical)
   decidemedical    n   percent valid_percent
      respondent 2130 0.1502328     0.2574018
 husband/partner 3826 0.2698547     0.4623565
    someone else 2319 0.1635633     0.2802417
            <NA> 5903 0.4163493            NA
hazardanalysis3<-cleaned_joined_dropcensored%>%
  filter(decidefp!="NA")



hazardanalysis3%>%
group_by(YEAR, decidefp)%>%
summarise(prop_bir=mean(birtheventvar, na.rm=T))%>%
ggplot(aes(x=YEAR, y=prop_bir))+
geom_line(aes(group=factor(decidefp), color=factor(decidefp)))+
ggtitle(label = "Hazard of having a birth after baseline by family planning user decision-maker")
`summarise()` has grouped output by 'YEAR'. You can override using the
`.groups` argument.

hazardanalysis4<-cleaned_joined_dropcensored%>%
  filter(decidemedical!="NA")



hazardanalysis4%>%
group_by(YEAR, decidemedical)%>%
summarise(prop_bir=mean(birtheventvar, na.rm=T))%>%
ggplot(aes(x=YEAR, y=prop_bir))+
geom_line(aes(group=factor(decidemedical), color=factor(decidemedical)))+
ggtitle(label = "Hazard of having a birth after baseline by medical decision-maker")
`summarise()` has grouped output by 'YEAR'. You can override using the
`.groups` argument.

# general model plus male influence variable on family planning use
fit.2<-glm(birtheventvar~as.factor(YEAR)+ usingmoderncon+ maritalstatus+AGE+ wantanotherchild+ decidefp, data=hazardanalysis, family=binomial(link="cloglog"))

summary(fit.2)

Call:
glm(formula = birtheventvar ~ as.factor(YEAR) + usingmoderncon + 
    maritalstatus + AGE + wantanotherchild + decidefp, family = binomial(link = "cloglog"), 
    data = hazardanalysis)

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                                -1.102660   0.274468  -4.017
as.factor(YEAR)2021                         0.342608   0.098485   3.479
as.factor(YEAR)2022                        -0.039740   0.114975  -0.346
usingmodernconyes                          -0.401104   0.098567  -4.069
maritalstatuscurrently married              0.707437   0.214470   3.299
maritalstatuscurrently living with partner  0.705469   0.206247   3.421
maritalstatusformerly in union              0.379899   0.254408   1.493
maritalstatuswidow or widower               0.510620   0.435380   1.173
AGE                                        -0.020928   0.007127  -2.936
wantanotherchildno more children           -0.021351   0.113552  -0.188
wantanotherchildinfertile                  -0.246512   0.460172  -0.536
decidefphusband/partner                    -0.297557   0.140697  -2.115
decidefpjoint decision                     -0.225164   0.086743  -2.596
                                           Pr(>|z|)    
(Intercept)                                5.88e-05 ***
as.factor(YEAR)2021                        0.000504 ***
as.factor(YEAR)2022                        0.729612    
usingmodernconyes                          4.71e-05 ***
maritalstatuscurrently married             0.000972 ***
maritalstatuscurrently living with partner 0.000625 ***
maritalstatusformerly in union             0.135367    
maritalstatuswidow or widower              0.240870    
AGE                                        0.003321 ** 
wantanotherchildno more children           0.850854    
wantanotherchildinfertile                  0.592169    
decidefphusband/partner                    0.034440 *  
decidefpjoint decision                     0.009438 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 3075.2  on 2981  degrees of freedom
Residual deviance: 3009.3  on 2969  degrees of freedom
  (4530 observations deleted due to missingness)
AIC: 3035.3

Number of Fisher Scoring iterations: 5
#general model plus male influence variables on medical decision making
fit.3<-glm(birtheventvar~as.factor(YEAR)+ usingmoderncon+ maritalstatus+AGE + wantanotherchild+  decidemedical, data=hazardanalysis, family=binomial(link="cloglog"))

summary(fit.3)

Call:
glm(formula = birtheventvar ~ as.factor(YEAR) + usingmoderncon + 
    maritalstatus + AGE + wantanotherchild + decidemedical, family = binomial(link = "cloglog"), 
    data = hazardanalysis)

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                                -0.416483   0.164056  -2.539
as.factor(YEAR)2021                         0.480375   0.070164   6.846
as.factor(YEAR)2022                         0.003790   0.080403   0.047
usingmodernconyes                          -0.437043   0.058312  -7.495
maritalstatuscurrently living with partner -0.105899   0.056668  -1.869
AGE                                        -0.026485   0.004574  -5.791
wantanotherchildno more children            0.052727   0.075441   0.699
wantanotherchildinfertile                  -0.361347   0.277952  -1.300
decidemedicalhusband/partner                0.102957   0.069663   1.478
decidemedicalsomeone else                  -0.041043   0.079222  -0.518
                                           Pr(>|z|)    
(Intercept)                                  0.0111 *  
as.factor(YEAR)2021                        7.57e-12 ***
as.factor(YEAR)2022                          0.9624    
usingmodernconyes                          6.64e-14 ***
maritalstatuscurrently living with partner   0.0617 .  
AGE                                        7.00e-09 ***
wantanotherchildno more children             0.4846    
wantanotherchildinfertile                    0.1936    
decidemedicalhusband/partner                 0.1394    
decidemedicalsomeone else                    0.6044    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 6017.0  on 5185  degrees of freedom
Residual deviance: 5797.7  on 5176  degrees of freedom
  (2326 observations deleted due to missingness)
AIC: 5817.7

Number of Fisher Scoring iterations: 5