Now, the question is “does first birth affect women’s life satisfaction?” Prepare the dataset
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(texreg) #output result
library(splitstackshape) #transform wide data (with stacked variables) to long data
library(plm) #linear models for panel data
##Import 6 waves of women data
women1 <- read_dta("wave1_women.dta")
women2 <- read_dta("wave2_women.dta")
women3 <- read_dta("wave3_women.dta")
women4 <- read_dta("wave4_women.dta")
women5 <- read_dta("wave5_women.dta")
women6 <- read_dta("wave6_women.dta")
##Clean 6 waves of women data
clean_fun <- function(df) { df %>%
transmute(
id,
age,
wave=as.numeric(wave),
relstat=as_factor(relstat), #make relstat as a factor
relstat=case_when(relstat== "-7 Incomplete data" ~ as.character(NA), #specify when is missing for relstat
TRUE ~ as.character(relstat))%>% as_factor(), #make relstat as a factor again
health=case_when(hlt1<0 ~ as.numeric(NA), #specify when hlt1 is missing
TRUE ~ as.numeric(hlt1)),
childno=case_when(nkidsbio==-7~ as.numeric(NA), #specify when is missing for relstat
TRUE ~ as.numeric(nkidsbio)),
sat=case_when(sat6<0 ~ as.numeric(NA), #specify when sat6 is missing
TRUE ~ as.numeric(sat6)),
)%>% drop_na() }
women1a <- clean_fun(women1)
women2a <- clean_fun(women2)
women3a <- clean_fun(women3)
women4a <- clean_fun(women4)
women5a <- clean_fun(women5)
women6a <- clean_fun(women6)
women1b <- women1a %>% filter(childno==0)%>% #keep individuals who are childless in the first wave
rename(wave.1=wave, age.1=age, relstat.1=relstat, health.1=health, childno.1=childno, sat.1=sat ) #rename variables
women2b <- women2a %>%
rename(wave.2=wave, age.2=age, relstat.2=relstat, health.2=health, childno.2=childno, sat.2=sat )
women3b <- women3a %>%
rename(wave.3=wave, age.3=age, relstat.3=relstat, health.3=health, childno.3=childno, sat.3=sat )
women4b <- women4a %>%
rename(wave.4=wave, age.4=age, relstat.4=relstat, health.4=health, childno.4=childno, sat.4=sat )
women5b <- women5a %>%
rename(wave.5=wave, age.5=age, relstat.5=relstat, health.5=health, childno.5=childno, sat.5=sat )
women6b <- women6a %>%
rename(wave.6=wave, age.6=age, relstat.6=relstat, health.6=health, childno.6=childno, sat.6=sat )
women_wide <- left_join(women1b, women2b, by = "id") %>% # left join women1b and women2b
left_join(women3b, by = "id") %>% # left join with women3b
left_join(women4b, by = "id") %>% # left join with women4b
left_join(women5b, by = "id") %>% # left join with women5b
left_join(women6b, by = "id") # left join with women6b
#by using left_join I keep those have no kids in the first wave and follow them
women_long<- merged.stack(women_wide, #dataset for transfrom
var.stubs = c("age", "wave", "relstat", "health","childno", "sat"),
#var.stubs is to specify the prefixes of the variable groups
sep = ".") %>%
#sep is to specify the character that separates the "variable name" from the "times" in the source
drop_na(wave)
#drop the observations which did not join the wave
1.1 Remove observations when people start to have a second or higher-order birth. Note: you need to remove people who have twins first.
1.2 Will it be difference, if we just remove observations when the childno variable turns to 2, without removing the twin cases?
#first, identify having first child
women_long1 <- women_long %>%
group_by(id) %>%
mutate(
firstkid=case_when( childno!=dplyr::lag(childno, 1) & dplyr::lag(childno, 1)==0 & childno>0 ~ 1,
TRUE ~ 0),
#when the person has 0 children at t-1 while has at least 1 child at t, define it first childbirth
twin=case_when( childno!=dplyr::lag(childno, 1) & dplyr::lag(childno, 1)==0 & childno==1 ~ 1, #single birth
childno!=dplyr::lag(childno, 1) & dplyr::lag(childno, 1)==0 & childno==2 ~ 2, #twin birth
TRUE ~ 0)
#when the person has 0 children at t-1 while has 1 child at t, define it a single birth, i.e. 1
#when the person has 0 children at t-1 while has 2 children at t, define it a twin birth, i.e. 2
)
#second, remove individuals who have twins
twinid <- women_long1$id[women_long1$twin==2] #the id of women who have twin for their first birth
women_long2 <- women_long1[!(women_long1$id %in% twinid),] #now the data does not have twin situations
#third, remove observations when people start to have a second or higher-order birth.
women_long3 <- women_long2%>%
filter(childno<2)
table(women_long3$childno) #we have 808 observations of first child
##
## 0 1
## 14001 808
table(women_long3$firstkid, women_long3$twin) #we have 332 observations of first child
##
## 0 1
## 0 14477 0
## 1 0 332
#sample size of women_long3 is 14809
women_long4 <- women_long%>%
filter(childno<2)
#sample size of women_long4 is 14933
#you can check the case where id=="314674000" in women_long3 and women_long4 to understand the difference.
#women who have twin are still in the dataset of women_long4 but gone in women_long3.
Based on the dataset that you remove twins and observations of having 2 or more children, run a fixed effect, test the “step impact”.
step <- plm(sat ~ childno , data=women_long3, model="within")
summary(step)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = sat ~ childno, data = women_long3, model = "within")
##
## Unbalanced Panel: n = 3753, T = 1-6, N = 14809
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -8.00000 -0.50000 0.00000 0.59092 5.66667
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## childno 0.181837 0.068196 2.6664 0.007679 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 16966
## Residual Sum of Squares: 16956
## R-Squared: 0.00064269
## Adj. R-Squared: -0.33862
## F-statistic: 7.10949 on 1 and 11055 DF, p-value: 0.0076788
#interpretation:
#When a woman's number of children changes from 0 to 1, the life satisfaction increases by 0.18 on average. This effect remains constant over time.
Based on the dataset that you remove twins and observations of having 2 or more children, run a fixed effect, test the “linear impact”.
3.1 set up timing indicators
step 1: create the anchor wave to identify the time the first childbirth happens
step 2: create the timeindex
step 3: create the duration since the first childbirth
3.2 run the fixed effect with linear time indicators
women_long3a <- women_long3 %>%
group_by(id) %>%
mutate(
wave=as.numeric(wave),
birthwave=case_when(firstkid==1 ~ wave,
TRUE ~ 99), #identify the timing of first birth
anchorwave=min(birthwave), #generate the anchor wave
timeindex=wave - anchorwave, #create the timeindex
duration=case_when(timeindex<0 ~ 0,
timeindex>=0 ~ timeindex), #create duration
)
#you can check a case to see if you code it right or not, e.g. id=="3651000"
linear <- plm(sat ~ childno + duration, data=women_long3a, model="within") #fixed effect model (within-person demean)
summary(linear)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = sat ~ childno + duration, data = women_long3a,
## model = "within")
##
## Unbalanced Panel: n = 3753, T = 1-6, N = 14809
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -8.00000 -0.50000 0.00000 0.58882 5.66667
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## childno 0.373101 0.076486 4.878 1.086e-06 ***
## duration -0.241469 0.043944 -5.495 3.995e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 16966
## Residual Sum of Squares: 16909
## R-Squared: 0.003365
## Adj. R-Squared: -0.3351
## F-statistic: 18.6614 on 2 and 11054 DF, p-value: 8.1117e-09
#Interpretation:
#in the year of the first childbirth, the life satisfaction increases by 0.373; and then decline at a yearly rate of 0.24 after the childbirth
Based on the dataset that you remove twins and observations of having 2 or more children, run a fixed effect, test the “dummy impact”.
4.1 create dummy variables to indicate different years before and after first childbearing
4.2 run the fixed effect with dummy time indicators
table(women_long3a$timeindex)
##
## -98 -97 -96 -95 -94 -93 -5 -4 -3 -2 -1 0 1 2 3 4
## 3421 2506 2179 1874 1683 1519 46 94 144 234 301 332 232 147 74 23
#to see the ranges
women_long3b <- women_long3a %>%
mutate(
dummy=case_when(
timeindex %in% c(-93:-98) ~ "2+ year before",
timeindex %in% c(-2:-5) ~ "2+ year before",
timeindex==-1 ~ "1 year before",
timeindex==0 ~ "year of childbirth",
timeindex==1 ~ "1 year after",
timeindex>1 ~ "2+ year after"
) %>% as_factor()
) #setup for dummy impact
dummy_impact <- plm(sat ~ dummy, data=women_long3b, model="within") #fixed effect model (within-person demean)
#note that, here we do not use sat ~ childno + dummy,
summary(dummy_impact)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = sat ~ dummy, data = women_long3b, model = "within")
##
## Unbalanced Panel: n = 3753, T = 1-6, N = 14809
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -8.0522 -0.5000 0.0000 0.5998 5.6667
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## dummy1 year before 0.407299 0.096119 4.2374 2.279e-05 ***
## dummyyear of childbirth 0.679484 0.092315 7.3605 1.962e-13 ***
## dummy1 year after 0.060706 0.107714 0.5636 0.573
## dummy2+ year after 0.049500 0.116916 0.4234 0.672
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 16966
## Residual Sum of Squares: 16855
## R-Squared: 0.0065595
## Adj. R-Squared: -0.33106
## F-statistic: 18.2435 on 4 and 11052 DF, p-value: 5.9849e-15
#Interpretation:
#compare to two years before the first childbirth, the life satisfaction increases by 0.407 one year before the first childbirth ;
#compare to two years before the first childbirth, the life satisfaction increases by 0.68 in the year of first childbirth,;
#compared to two years before the first childbirth, the life satisfaction does not show significant difference one or more year after childbirth.
compare the result from fixed effect that test the step-impact, linear-impact and dummy-impact of first childbirth.
texreg::screenreg(list(step,linear, dummy_impact),
custom.model.names=c("step impact", "linear impact", "dummy impact"),
include.ci = FALSE)
##
## =================================================================
## step impact linear impact dummy impact
## -----------------------------------------------------------------
## childno 0.18 ** 0.37 ***
## (0.07) (0.08)
## duration -0.24 ***
## (0.04)
## dummy1 year before 0.41 ***
## (0.10)
## dummyyear of childbirth 0.68 ***
## (0.09)
## dummy1 year after 0.06
## (0.11)
## dummy2+ year after 0.05
## (0.12)
## -----------------------------------------------------------------
## R^2 0.00 0.00 0.01
## Adj. R^2 -0.34 -0.34 -0.33
## Num. obs. 14809 14809 14809
## =================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05