Now, the question is “does first birth affect women’s life satisfaction?” Prepare the dataset

library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(texreg) #output result
library(splitstackshape) #transform wide data (with stacked variables) to long data
library(plm) #linear models for panel data

##Import 6 waves of women data
women1 <- read_dta("wave1_women.dta")
women2 <- read_dta("wave2_women.dta")
women3 <- read_dta("wave3_women.dta")
women4 <- read_dta("wave4_women.dta")
women5 <- read_dta("wave5_women.dta")
women6 <- read_dta("wave6_women.dta")

##Clean 6 waves of women data
clean_fun <- function(df) {  df %>% 
  transmute(
    id, 
    age, 
    wave=as.numeric(wave),
    relstat=as_factor(relstat), #make relstat as a factor
    relstat=case_when(relstat== "-7 Incomplete data" ~ as.character(NA), #specify when is missing for relstat
                      TRUE ~ as.character(relstat))%>% as_factor(), #make relstat as a factor again
    health=case_when(hlt1<0 ~ as.numeric(NA),  #specify when hlt1 is missing 
                   TRUE ~ as.numeric(hlt1)),
    childno=case_when(nkidsbio==-7~ as.numeric(NA), #specify when is missing for relstat
                      TRUE ~ as.numeric(nkidsbio)), 
    sat=case_when(sat6<0 ~ as.numeric(NA), #specify when sat6 is missing
                   TRUE ~ as.numeric(sat6)),
  )%>% drop_na()  }
women1a <- clean_fun(women1)
women2a <- clean_fun(women2)
women3a <- clean_fun(women3)
women4a <- clean_fun(women4)
women5a <- clean_fun(women5)
women6a <- clean_fun(women6)

women1b <- women1a %>% filter(childno==0)%>% #keep individuals who are childless in the first wave
rename(wave.1=wave, age.1=age, relstat.1=relstat, health.1=health, childno.1=childno, sat.1=sat ) #rename variables

women2b <- women2a %>% 
rename(wave.2=wave, age.2=age, relstat.2=relstat, health.2=health, childno.2=childno, sat.2=sat )

women3b <- women3a %>% 
rename(wave.3=wave, age.3=age, relstat.3=relstat, health.3=health, childno.3=childno, sat.3=sat )

women4b <- women4a %>% 
rename(wave.4=wave, age.4=age, relstat.4=relstat, health.4=health, childno.4=childno, sat.4=sat )

women5b <- women5a %>% 
rename(wave.5=wave, age.5=age, relstat.5=relstat, health.5=health, childno.5=childno, sat.5=sat )

women6b <- women6a %>% 
rename(wave.6=wave, age.6=age, relstat.6=relstat, health.6=health, childno.6=childno, sat.6=sat )


women_wide <- left_join(women1b, women2b, by = "id") %>%  # left join women1b and women2b
  left_join(women3b, by = "id") %>% # left join with women3b
  left_join(women4b, by = "id") %>% # left join with women4b
  left_join(women5b, by = "id") %>% # left join with women5b
  left_join(women6b, by = "id") # left join with women6b
#by using left_join I keep those have no kids in the first wave and follow them

women_long<- merged.stack(women_wide, #dataset for transfrom
                            var.stubs = c("age", "wave", "relstat", "health","childno", "sat"), 
#var.stubs is to specify the prefixes of the variable groups
                            sep = ".") %>%  
#sep is to specify the character that separates the "variable name" from the "times" in the source
                            drop_na(wave)
#drop the observations which did not join the wave

No. 1

Question

1.1 Remove observations when people start to have a second or higher-order birth. Note: you need to remove people who have twins first.

1.2 Will it be difference, if we just remove observations when the childno variable turns to 2, without removing the twin cases?

Answer for Question 1.1

#first, identify having first child
women_long1 <- women_long %>% 
  group_by(id) %>% 
  mutate(
    firstkid=case_when( childno!=dplyr::lag(childno, 1) & dplyr::lag(childno, 1)==0 & childno>0 ~ 1,
                          TRUE ~ 0), 
    #when the person has 0 children at t-1 while has at least 1 child at t, define it first childbirth
    twin=case_when( childno!=dplyr::lag(childno, 1) & dplyr::lag(childno, 1)==0 & childno==1 ~ 1, #single birth
                    childno!=dplyr::lag(childno, 1) & dplyr::lag(childno, 1)==0 & childno==2 ~ 2, #twin birth
                    TRUE ~ 0)
    #when the person has 0 children at t-1 while has 1 child at t, define it a single birth, i.e. 1
    #when the person has 0 children at t-1 while has 2 children at t, define it a twin birth, i.e. 2
    ) 

#second, remove individuals who have twins
twinid <- women_long1$id[women_long1$twin==2] #the id of women who have twin for their first birth
women_long2 <- women_long1[!(women_long1$id %in% twinid),] #now the data does not have twin situations

#third, remove observations when people start to have a second or higher-order birth.
women_long3 <-  women_long2%>% 
  filter(childno<2)
table(women_long3$childno) #we have 808 observations of first child
## 
##     0     1 
## 14001   808
table(women_long3$firstkid, women_long3$twin) #we have 332 observations of first child
##    
##         0     1
##   0 14477     0
##   1     0   332
#sample size of women_long3 is 14809

Answer for Question 1.2

women_long4 <-  women_long%>% 
  filter(childno<2)
#sample size of women_long4 is 14933

#you can check the case where id=="314674000" in women_long3 and  women_long4 to understand the difference.
#women who have twin are still in the dataset of women_long4 but gone in women_long3.

No. 2

Question

Based on the dataset that you remove twins and observations of having 2 or more children, run a fixed effect, test the “step impact”.

Answer

step <- plm(sat ~ childno , data=women_long3, model="within") 
summary(step)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = sat ~ childno, data = women_long3, model = "within")
## 
## Unbalanced Panel: n = 3753, T = 1-6, N = 14809
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -8.00000 -0.50000  0.00000  0.59092  5.66667 
## 
## Coefficients:
##         Estimate Std. Error t-value Pr(>|t|)   
## childno 0.181837   0.068196  2.6664 0.007679 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    16966
## Residual Sum of Squares: 16956
## R-Squared:      0.00064269
## Adj. R-Squared: -0.33862
## F-statistic: 7.10949 on 1 and 11055 DF, p-value: 0.0076788
#interpretation:
#When a woman's number of children changes from 0 to 1, the life satisfaction increases by 0.18 on average. This effect remains constant over time.

No. 3

Question

Based on the dataset that you remove twins and observations of having 2 or more children, run a fixed effect, test the “linear impact”.

3.1 set up timing indicators

  • step 1: create the anchor wave to identify the time the first childbirth happens

  • step 2: create the timeindex

  • step 3: create the duration since the first childbirth

3.2 run the fixed effect with linear time indicators

Answer for 3.1

women_long3a <- women_long3 %>% 
  group_by(id) %>% 
  mutate(
     wave=as.numeric(wave),
     birthwave=case_when(firstkid==1 ~ wave,
                         TRUE ~ 99), #identify the timing of first birth
     anchorwave=min(birthwave), #generate the anchor wave
     timeindex=wave -  anchorwave, #create the timeindex
     duration=case_when(timeindex<0 ~ 0, 
                        timeindex>=0 ~ timeindex), #create duration
  ) 

#you can check a case to see if you code it right or not, e.g. id=="3651000"

Answer for 3.2

linear <- plm(sat ~ childno + duration, data=women_long3a, model="within") #fixed effect model (within-person demean)
summary(linear) 
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = sat ~ childno + duration, data = women_long3a, 
##     model = "within")
## 
## Unbalanced Panel: n = 3753, T = 1-6, N = 14809
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -8.00000 -0.50000  0.00000  0.58882  5.66667 
## 
## Coefficients:
##           Estimate Std. Error t-value  Pr(>|t|)    
## childno   0.373101   0.076486   4.878 1.086e-06 ***
## duration -0.241469   0.043944  -5.495 3.995e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    16966
## Residual Sum of Squares: 16909
## R-Squared:      0.003365
## Adj. R-Squared: -0.3351
## F-statistic: 18.6614 on 2 and 11054 DF, p-value: 8.1117e-09
#Interpretation:
#in the year of the first childbirth, the life satisfaction increases by 0.373; and then decline at a yearly rate of 0.24 after the childbirth

No. 4

Question

Based on the dataset that you remove twins and observations of having 2 or more children, run a fixed effect, test the “dummy impact”.

4.1 create dummy variables to indicate different years before and after first childbearing

4.2 run the fixed effect with dummy time indicators

Answer for 4.1

table(women_long3a$timeindex) 
## 
##  -98  -97  -96  -95  -94  -93   -5   -4   -3   -2   -1    0    1    2    3    4 
## 3421 2506 2179 1874 1683 1519   46   94  144  234  301  332  232  147   74   23
#to see the ranges

women_long3b <- women_long3a %>% 
  mutate(
  dummy=case_when(
                  timeindex %in% c(-93:-98) ~ "2+ year before",
                  timeindex %in% c(-2:-5) ~ "2+ year before",
                  timeindex==-1 ~ "1 year before",
                  timeindex==0 ~ "year of childbirth",
                  timeindex==1 ~ "1 year after",
                  timeindex>1 ~ "2+ year after"
                  ) %>% as_factor() 
  ) #setup for dummy impact

Answer for 4.2

dummy_impact <- plm(sat ~ dummy, data=women_long3b, model="within") #fixed effect model (within-person demean)
#note that, here we do not use sat ~ childno + dummy, 
summary(dummy_impact) 
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = sat ~ dummy, data = women_long3b, model = "within")
## 
## Unbalanced Panel: n = 3753, T = 1-6, N = 14809
## 
## Residuals:
##    Min. 1st Qu.  Median 3rd Qu.    Max. 
## -8.0522 -0.5000  0.0000  0.5998  5.6667 
## 
## Coefficients:
##                         Estimate Std. Error t-value  Pr(>|t|)    
## dummy1 year before      0.407299   0.096119  4.2374 2.279e-05 ***
## dummyyear of childbirth 0.679484   0.092315  7.3605 1.962e-13 ***
## dummy1 year after       0.060706   0.107714  0.5636     0.573    
## dummy2+ year after      0.049500   0.116916  0.4234     0.672    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    16966
## Residual Sum of Squares: 16855
## R-Squared:      0.0065595
## Adj. R-Squared: -0.33106
## F-statistic: 18.2435 on 4 and 11052 DF, p-value: 5.9849e-15
#Interpretation:
#compare to two years before the first childbirth, the life satisfaction increases by 0.407 one year before the first childbirth  ;
#compare to two years before the first childbirth, the life satisfaction increases by 0.68 in the year of first childbirth,;
#compared to two years before the first childbirth, the life satisfaction does not show significant difference one or more year after childbirth.

No. 5

Question

compare the result from fixed effect that test the step-impact, linear-impact and dummy-impact of first childbirth.

Answer for 5

texreg::screenreg(list(step,linear, dummy_impact), 
                custom.model.names=c("step impact", "linear impact", "dummy impact"),
        include.ci = FALSE)
## 
## =================================================================
##                          step impact  linear impact  dummy impact
## -----------------------------------------------------------------
## childno                      0.18 **      0.37 ***               
##                             (0.07)       (0.08)                  
## duration                                 -0.24 ***               
##                                          (0.04)                  
## dummy1 year before                                       0.41 ***
##                                                         (0.10)   
## dummyyear of childbirth                                  0.68 ***
##                                                         (0.09)   
## dummy1 year after                                        0.06    
##                                                         (0.11)   
## dummy2+ year after                                       0.05    
##                                                         (0.12)   
## -----------------------------------------------------------------
## R^2                          0.00         0.00           0.01    
## Adj. R^2                    -0.34        -0.34          -0.33    
## Num. obs.                14809        14809          14809       
## =================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05