Now, the question is “does first birth affect women’s life satisfaction?”
Prepare the dataset
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(texreg)
library(splitstackshape) #transform wide data (with stacked variables) to long data
library(plm) #linear models for panel data
##Import 6 waves of women data
women1 <- read_dta("wave1_women.dta")
women2 <- read_dta("wave2_women.dta")
women3 <- read_dta("wave3_women.dta")
women4 <- read_dta("wave4_women.dta")
women5 <- read_dta("wave5_women.dta")
women6 <- read_dta("wave6_women.dta")
##Clean 6 waves of women data
clean_fun <- function(df) { df %>%
transmute(
id,
age,
wave=as.numeric(wave),
relstat=as_factor(relstat), #make relstat as a factor
relstat=case_when(relstat== "-7 Incomplete data" ~ as.character(NA), #specify when is missing for relstat
TRUE ~ as.character(relstat))%>% as_factor(), #make relstat as a factor again
health=case_when(hlt1<0 ~ as.numeric(NA), #specify when hlt1 is missing
TRUE ~ as.numeric(hlt1)),
childno=case_when(nkidsbio==-7~ as.numeric(NA), #specify when is missing for relstat
TRUE ~ as.numeric(nkidsbio)),
sat=case_when(sat6<0 ~ as.numeric(NA), #specify when sat6 is missing
TRUE ~ as.numeric(sat6)),
)%>% drop_na() }
women1a <- clean_fun(women1)
women2a <- clean_fun(women2)
women3a <- clean_fun(women3)
women4a <- clean_fun(women4)
women5a <- clean_fun(women5)
women6a <- clean_fun(women6)
women1b <- women1a %>% filter(childno==0)%>% #keep individuals who are childless in the first wave
rename(wave.1=wave, age.1=age, relstat.1=relstat, health.1=health, childno.1=childno, sat.1=sat ) #rename variables
women2b <- women2a %>%
rename(wave.2=wave, age.2=age, relstat.2=relstat, health.2=health, childno.2=childno, sat.2=sat )
women3b <- women3a %>%
rename(wave.3=wave, age.3=age, relstat.3=relstat, health.3=health, childno.3=childno, sat.3=sat )
women4b <- women4a %>%
rename(wave.4=wave, age.4=age, relstat.4=relstat, health.4=health, childno.4=childno, sat.4=sat )
women5b <- women5a %>%
rename(wave.5=wave, age.5=age, relstat.5=relstat, health.5=health, childno.5=childno, sat.5=sat )
women6b <- women6a %>%
rename(wave.6=wave, age.6=age, relstat.6=relstat, health.6=health, childno.6=childno, sat.6=sat )
women_wide <- left_join(women1b, women2b, by = "id") %>% # left join women1b and women2b
left_join(women3b, by = "id") %>% # left join with women3b
left_join(women4b, by = "id") %>% # left join with women4b
left_join(women5b, by = "id") %>% # left join with women5b
left_join(women6b, by = "id") # left join with women6b
#by using left_join I keep those have no kids in the first wave and follow them
Transfer the “women_wide” data to a long format, name it as “women_long”
women_long<- merged.stack(women_wide, #dataset for transfrom
var.stubs = c("age", "wave", "relstat", "health","childno", "sat"),
#var.stubs is to specify the prefixes of the variable groups
sep = ".") %>%
#sep is to specify the character that separates the "variable name" from the "times" in the source
drop_na(wave)
#drop the observations which did not join the wave
Use pooled regression to estimate the effect of first births on women’s life satisfaction?
panel_data <- pdata.frame(women_long, index=c("id", "wave"))
pols <- plm(sat ~ childno, data=panel_data, model="pooling")
summary(pols)
## Pooling Model
##
## Call:
## plm(formula = sat ~ childno, data = panel_data, model = "pooling")
##
## Unbalanced Panel: n = 3770, T = 1-6, N = 15024
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -7.73697 -0.63976 0.36024 1.36024 2.36024
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 7.639758 0.013807 553.3134 < 2e-16 ***
## childno 0.097211 0.043509 2.2343 0.02548 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 40514
## Residual Sum of Squares: 40501
## R-Squared: 0.00033219
## Adj. R-Squared: 0.00026565
## F-statistic: 4.99188 on 1 and 15022 DF, p-value: 0.025481
Use fixed effect to estimate the effect of childbearing on women’s life satisfaction?
fixed <- plm(sat ~ childno, data=panel_data, model="within")
summary(fixed)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = sat ~ childno, data = panel_data, model = "within")
##
## Unbalanced Panel: n = 3770, T = 1-6, N = 15024
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -8.0000 -0.5000 0.0000 0.6000 5.6667
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## childno 0.037299 0.052198 0.7146 0.4749
##
## Total Sum of Squares: 17346
## Residual Sum of Squares: 17345
## R-Squared: 4.5372e-05
## Adj. R-Squared: -0.33496
## F-statistic: 0.510598 on 1 and 11253 DF, p-value: 0.47489
Compare the results from the pooled OLS and fixed effect
texreg::screenreg(list(pols, fixed),
custom.model.names=c("Pooled regression",
"Fixed effect(within)"),
include.ci = FALSE,
omit.coef = "factor",
center=TRUE)
##
## ====================================================
## Pooled regression Fixed effect(within)
## ----------------------------------------------------
## (Intercept) 7.64 ***
## (0.01)
## childno 0.10 * 0.04
## (0.04) (0.05)
## ----------------------------------------------------
## R^2 0.00 0.00
## Adj. R^2 0.00 -0.33
## Num. obs. 15024 15024
## ====================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05
table(panel_data$childno)
##
## 0 1 2 3
## 14044 809 167 4
#there are many cases that contains second birth.
#first, identify individuals who have twins
panel_data <- panel_data %>%
group_by(id) %>%
mutate(
firstkid=case_when( childno!=lag(childno, 1) & lag(childno, 1)==0 & childno>0 ~ 1,
TRUE ~ 0),
#when the person has 0 children at t-1 while has at least 1 child at t, define it first childbirth
twin=case_when( childno!=lag(childno, 1) & lag(childno, 1)==0 & childno==1 ~ 1, #single birth
childno!=lag(childno, 1) & lag(childno, 1)==0 & childno==2 ~ 2, #twin birth
TRUE ~ 0)
#when the person has 0 children at t-1 while has 1 child at t, define it a single birth, i.e. 1
#when the person has 0 children at t-1 while has 2 children at t, define it a twin birth, i.e. 2
)
#second, remove individuals who have twins
twinid <- panel_data$id[panel_data$twin==2] #the id of women who have twin for their first birth
panel_data <- panel_data[!(panel_data$id %in% twinid),] #remove twin cases
#when the id does not belong to the list of id that are individuals having twin at first childbearing
panel_data <- filter(panel_data, childno<2) #keep observations right before women having second child
table(panel_data$childno)
##
## 0 1
## 14044 809
#now we are sure people here are those who only have one child
Step1: Now, please run fixed effect to find out the effect of first birth on women’s life satisfaction. Interpret the result.
Step2: Compare the result from Question 3, and is there any difference?
fixed2 <- plm(sat ~ childno, data=panel_data, model="within")
summary(fixed2)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = sat ~ childno, data = panel_data, model = "within")
##
## Unbalanced Panel: n = 3770, T = 1-6, N = 14853
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -8.0000 -0.5000 0.0000 0.6000 5.6667
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## childno 0.183487 0.068156 2.6922 0.00711 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 17022
## Residual Sum of Squares: 17011
## R-Squared: 0.00065359
## Adj. R-Squared: -0.33932
## F-statistic: 7.24777 on 1 and 11082 DF, p-value: 0.0071096
##Interpretation:
#A within-person change from childless to having a first child is associated with 0.18 scale points increase in life satisfaction for women (always correct interpretation).
#When having a first child, life satisfaction is 0.18 points higher than when being childless (always correct interpretation).
texreg::screenreg(list(fixed, fixed2),
custom.model.names=c("Fixed effect(within)",
"Fixed effect(within)2"),
include.ci = FALSE,
center=TRUE)
##
## ======================================================
## Fixed effect(within) Fixed effect(within)2
## ------------------------------------------------------
## childno 0.04 0.18 **
## (0.05) (0.07)
## ------------------------------------------------------
## R^2 0.00 0.00
## Adj. R^2 -0.33 -0.34
## Num. obs. 15024 14853
## ======================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05