Now, the question is “does first birth affect women’s life satisfaction?” Prepare the dataset

library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(splitstackshape) #transform wide data (with stacked variables) to long data
library(ggplot2)
library(plm)
library(lmtest)

##Import 6 waves of women data
for (i in 1:6) {
  assign(paste0("women", i), #assign is similar to <-; paste0 is to combine wave and i into a name, i ranges from 1 to 6. 
         read_dta(paste0("wave", i, "_women.dta"))
         )
} 

##Clean 6 waves of women data
clean_fun <- function(df) {  df %>% 
  transmute(
    id=zap_label(id), #remove label of id
    age=zap_label(age), #remove label of age
    wave=as.numeric(wave),
    relstat=as_factor(relstat), #make relstat as a factor
    relstat=case_when(relstat== "-7 Incomplete data" ~ as.character(NA), #specify when is missing for relstat
                      TRUE ~ as.character(relstat))%>% as_factor(), #make relstat as a factor again
    hlt=case_when(hlt1<0 ~ as.numeric(NA),  #specify when hlt1 is missing 
                   TRUE ~ as.numeric(hlt1)),
    nkidsbio=case_when(nkidsbio==-7~ as.numeric(NA), #specify when is missing for relstat
                      TRUE ~ as.numeric(nkidsbio)), 
    sat=case_when(sat6<0 ~ as.numeric(NA), #specify when sat6 is missing
                   TRUE ~ as.numeric(sat6)),
  )%>% drop_na()  }
women1a <- clean_fun(women1)
women2a <- clean_fun(women2)
women3a <- clean_fun(women3)
women4a <- clean_fun(women4)
women5a <- clean_fun(women5)
women6a <- clean_fun(women6)

women1b <- women1a %>% filter(nkidsbio==0)%>% #keep individuals who are childless in the first wave
rename(wave.1=wave, age.1=age, relstat.1=relstat, hlt.1=hlt, nkidsbio.1=nkidsbio, sat.1=sat ) #rename variables

women2b <- women2a %>% 
rename(wave.2=wave, age.2=age, relstat.2=relstat, hlt.2=hlt, nkidsbio.2=nkidsbio, sat.2=sat )

women3b <- women3a %>% 
rename(wave.3=wave, age.3=age, relstat.3=relstat, hlt.3=hlt, nkidsbio.3=nkidsbio, sat.3=sat )

women4b <- women4a %>% 
rename(wave.4=wave, age.4=age, relstat.4=relstat, hlt.4=hlt, nkidsbio.4=nkidsbio, sat.4=sat )

women5b <- women5a %>% 
rename(wave.5=wave, age.5=age, relstat.5=relstat, hlt.5=hlt, nkidsbio.5=nkidsbio, sat.5=sat )

women6b <- women6a %>% 
rename(wave.6=wave, age.6=age, relstat.6=relstat, hlt.6=hlt, nkidsbio.6=nkidsbio, sat.6=sat )


women_wide <- left_join(women1b, women2b, by = "id") %>%  # left join women1b and women2b
  left_join(women3b, by = "id") %>% # left join with women3b
  left_join(women4b, by = "id") %>% # left join with women4b
  left_join(women5b, by = "id") %>% # left join with women5b
  left_join(women6b, by = "id") # left join with women6b
#by using left_join I keep those have no kids in the first wave and follow them

women_long<- merged.stack(women_wide, #dataset for transfrom
                            var.stubs = c("age", "wave", "relstat", "hlt","nkidsbio", "sat"), 
#var.stubs is to specify the prefixes of the variable groups
                            sep = ".") %>%  
#sep is to specify the character that separates the "variable name" from the "times" in the source
                            drop_na(wave)
#drop the observations which did not join the wave


women_long <- women_long %>% 
  group_by(id) %>% 
  mutate(
    firstkid=case_when( nkidsbio!=dplyr::lag(nkidsbio, 1) & dplyr::lag(nkidsbio, 1)==0 & nkidsbio>0 ~ 1,
                          TRUE ~ 0),
    firstkid2=case_when( nkidsbio!=dplyr::lag(nkidsbio, 1) & dplyr::lag(nkidsbio, 1)==0 & nkidsbio==2 ~ 1,
                          TRUE ~ 0)
    ) #to identify individual whose first childbearing is twins

twinid <- women_long$id[women_long$firstkid2==1]

women_long <- women_long[!(women_long$id %in% twinid),] #remove respondents whose first childbearing is twins
women_long <-  filter(women_long, nkidsbio<2) # remove repeated event of childbearing, only focus on having first child

panel_data <- pdata.frame(women_long, index=c("id", "wave")) #define panel data

No. 1

Question

Run a fixed effect model with robust standard error to investigate the impact of first birth on women’s subjective wellbeing, and see what is the difference with a normal fixed effect

Answer

model1 <- plm(sat ~ nkidsbio, data=women_long, model="within") #fixed effect model (within-person demean)
summary(model1) #the normal results of fixed effect
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = sat ~ nkidsbio, data = women_long, model = "within")
## 
## Unbalanced Panel: n = 3753, T = 1-6, N = 14809
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -8.00000 -0.50000  0.00000  0.59092  5.66667 
## 
## Coefficients:
##          Estimate Std. Error t-value Pr(>|t|)   
## nkidsbio 0.181837   0.068196  2.6664 0.007679 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    16966
## Residual Sum of Squares: 16956
## R-Squared:      0.00064269
## Adj. R-Squared: -0.33862
## F-statistic: 7.10949 on 1 and 11055 DF, p-value: 0.0076788
coeftest(model1, vcov. = vcovHC, type = "HC1") #get results with robust standard error
## 
## t test of coefficients:
## 
##          Estimate Std. Error t value Pr(>|t|)  
## nkidsbio 0.181837   0.078189  2.3256  0.02006 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#The difference is that the significance level has changed.

No. 2

Question

Run a fixed effect model to see the linear impact of first birth on women’s subjective wellbeing

  • Step 1: create a duration variable to measure years since the first childbearing

  • Step 2: run a fix effect model to specify the linear impact of first childbearing on women’s life satisfaction

Answer

women_long <- women_long %>% 
  select(-firstkid2)%>%
  group_by(id) %>% 
  mutate(
     wave=as.numeric(wave),
     birthwave=case_when(firstkid==1 ~ wave), #identify the timing of first birth
     birthwave1=min(birthwave, na.rm = TRUE), #generate a variable for timing of first birth
     index=wave -  birthwave1,
     duration=case_when(index<0 ~ 0, index>=0 ~ index, TRUE ~ 0), #linear setup
  ) 

#There will be many warnings, whenever you attempt to find the minimum or maximum value of a vector that has a length of zero. It won’t actually prevent your code from running.

women_long$index[is.infinite(women_long$birthwave1)] <- NA 
model2 <- plm(sat ~ nkidsbio + duration, data=women_long, model="within") #fixed effect model (within-person demean)
summary(model2) 
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = sat ~ nkidsbio + duration, data = women_long, model = "within")
## 
## Unbalanced Panel: n = 3753, T = 1-6, N = 14809
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -8.00000 -0.50000  0.00000  0.58882  5.66667 
## 
## Coefficients:
##           Estimate Std. Error t-value  Pr(>|t|)    
## nkidsbio  0.373101   0.076486   4.878 1.086e-06 ***
## duration -0.241469   0.043944  -5.495 3.995e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    16966
## Residual Sum of Squares: 16909
## R-Squared:      0.003365
## Adj. R-Squared: -0.3351
## F-statistic: 18.6614 on 2 and 11054 DF, p-value: 8.1117e-09
coeftest(model2, vcov. = vcovHC, type = "HC1") #get results with robust standard error
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value  Pr(>|t|)    
## nkidsbio  0.373101   0.085119  4.3833 1.180e-05 ***
## duration -0.241469   0.042712 -5.6534 1.612e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

No. 3

Question

Run a fixed effect model to see the dummy impact of first birth on women’s subjective wellbeing

  • Step 1: create dummy variables to indicate different years before and after first childbearing

  • Step 2: run a fix effect model to specify the different impact of first childbearing over time on women’s life satisfaction

Answer

women_long <- women_long %>% 
  mutate(
  index=as.numeric(index),
  dummy=case_when(
                  is.na(index)==TRUE ~ "2+ year before",
                  index %in% c(-2:-5) ~ "2+ year before",
                  index==-1 ~ "1 year before",
                  index==0 ~ "year of childbearing",
                  index==1 ~ "1 year after",
                  index>1 ~ "2+ year after"
                  ) %>% as_factor() 
  ) #setup for dummy impact
model3 <- plm(sat ~ dummy, data=women_long, model="within") #fixed effect model (within-person demean)
#note that, here we do not use sat ~ nkidsbio + dummy, 
summary(model3) 
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = sat ~ dummy, data = women_long, model = "within")
## 
## Unbalanced Panel: n = 3753, T = 1-6, N = 14809
## 
## Residuals:
##    Min. 1st Qu.  Median 3rd Qu.    Max. 
## -8.0522 -0.5000  0.0000  0.5998  5.6667 
## 
## Coefficients:
##                           Estimate Std. Error t-value  Pr(>|t|)    
## dummy1 year before        0.407299   0.096119  4.2374 2.279e-05 ***
## dummyyear of childbearing 0.679484   0.092315  7.3605 1.962e-13 ***
## dummy1 year after         0.060706   0.107714  0.5636     0.573    
## dummy2+ year after        0.049500   0.116916  0.4234     0.672    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    16966
## Residual Sum of Squares: 16855
## R-Squared:      0.0065595
## Adj. R-Squared: -0.33106
## F-statistic: 18.2435 on 4 and 11052 DF, p-value: 5.9849e-15
coeftest(model3, vcov. = vcovHC, type = "HC1") #get results with robust standard error
## 
## t test of coefficients:
## 
##                           Estimate Std. Error t value  Pr(>|t|)    
## dummy1 year before        0.407299   0.092703  4.3936 1.125e-05 ***
## dummyyear of childbearing 0.679484   0.103584  6.5597 5.632e-11 ***
## dummy1 year after         0.060706   0.117366  0.5172    0.6050    
## dummy2+ year after        0.049500   0.121827  0.4063    0.6845    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1