Exercise9

Now, the question is “does first birth affect women’s life satisfaction?” Prepare the dataset

library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Handle labelled data.
library(splitstackshape) #transform wide data (with stacked variables) to long data
library(ggplot2)
library(plm)

##Import 6 waves of women data
for (i in 1:6) {
  assign(paste0("women", i), #assign is similar to <-; paste0 is to combine wave and i into a name, i ranges from 1 to 6. 
         read_dta(paste0("wave", i, "_women.dta"))
         )
} 

##Clean 6 waves of women data
clean_fun <- function(df) {  df %>% 
  transmute(
    id=zap_label(id), #remove label of id
    age=zap_label(age), #remove label of age
    wave=as.numeric(wave),
    relstat=as_factor(relstat), #make relstat as a factor
    relstat=case_when(relstat== "-7 Incomplete data" ~ as.character(NA), #specify when is missing for relstat
                      TRUE ~ as.character(relstat))%>% as_factor(), #make relstat as a factor again
    hlt=case_when(hlt1<0 ~ as.numeric(NA),  #specify when hlt1 is missing 
                   TRUE ~ as.numeric(hlt1)),
    nkidsbio=case_when(nkidsbio==-7~ as.numeric(NA), #specify when is missing for relstat
                      TRUE ~ as.numeric(nkidsbio)), 
    sat=case_when(sat6<0 ~ as.numeric(NA), #specify when sat6 is missing
                   TRUE ~ as.numeric(sat6)),
  )%>% drop_na()  }
women1a <- clean_fun(women1)
women2a <- clean_fun(women2)
women3a <- clean_fun(women3)
women4a <- clean_fun(women4)
women5a <- clean_fun(women5)
women6a <- clean_fun(women6)

women1b <- women1a %>% filter(nkidsbio==0)%>% #keep individuals who are childless in the first wave
rename(wave.1=wave, age.1=age, relstat.1=relstat, hlt.1=hlt, nkidsbio.1=nkidsbio, sat.1=sat ) #rename variables

women2b <- women2a %>% 
rename(wave.2=wave, age.2=age, relstat.2=relstat, hlt.2=hlt, nkidsbio.2=nkidsbio, sat.2=sat )

women3b <- women3a %>% 
rename(wave.3=wave, age.3=age, relstat.3=relstat, hlt.3=hlt, nkidsbio.3=nkidsbio, sat.3=sat )

women4b <- women4a %>% 
rename(wave.4=wave, age.4=age, relstat.4=relstat, hlt.4=hlt, nkidsbio.4=nkidsbio, sat.4=sat )

women5b <- women5a %>% 
rename(wave.5=wave, age.5=age, relstat.5=relstat, hlt.5=hlt, nkidsbio.5=nkidsbio, sat.5=sat )

women6b <- women6a %>% 
rename(wave.6=wave, age.6=age, relstat.6=relstat, hlt.6=hlt, nkidsbio.6=nkidsbio, sat.6=sat )


women_wide <- left_join(women1b, women2b, by = "id") %>%  # left join women1b and women2b
  left_join(women3b, by = "id") %>% # left join with women3b
  left_join(women4b, by = "id") %>% # left join with women4b
  left_join(women5b, by = "id") %>% # left join with women5b
  left_join(women6b, by = "id") # left join with women6b
#by using left_join I keep those have no kids in the first wave and follow them

No. 1

Question

Check how many women have a child over the six waves,what is your answer?______

Answer

women_long<- merged.stack(women_wide, #dataset for transfrom
                            var.stubs = c("age", "wave", "relstat", "hlt","nkidsbio", "sat"), 
#var.stubs is to specify the prefixes of the variable groups
                            sep = ".") %>%  
#sep is to specify the character that separates the "variable name" from the "times" in the source
                            drop_na(wave)
#drop the observations which did not join the wave

women_long <- women_long %>% 
  group_by(id) %>% 
  mutate(
    havingkid=case_when( nkidsbio!=dplyr::lag(nkidsbio, 1) & nkidsbio>dplyr::lag(nkidsbio, 1) ~ 1,
                          TRUE ~ 0)
    ) #to identify individual whose first childbearing is twins
table(women_long$havingkid)

## 
##     0     1 
## 14597   427

No. 2

Question

Check how many women experience childbearing twice over the six waves,what is your answer?______

Answer

women_sum <- women_long %>% 
  group_by(id) %>% 
  summarise(
        times_birth=sum(havingkid)
  )
 #to identify individual whose first childbearing is twins
table(women_sum$times_birth)

## 
##    0    1    2    3 
## 3421  274   72    3

No. 3

Question

Use pooled regression to estimate the effect of first births on women’s life satisfaction?
Use fixed effect regression to estimate the effect of first births on women’s life satisfaction?
How do you interpret the result of fixed effect?

Answer

women_long <- women_long %>% 
  group_by(id) %>% 
  mutate(
    firstkid=case_when( nkidsbio!=dplyr::lag(nkidsbio, 1) & dplyr::lag(nkidsbio, 1)==0 & nkidsbio>0 ~ 1,
                          TRUE ~ 0),
    firstkid2=case_when( nkidsbio!=dplyr::lag(nkidsbio, 1) & dplyr::lag(nkidsbio, 1)==0 & nkidsbio==2 ~ 1,
                          TRUE ~ 0)
    ) #to identify individual whose first childbearing is twins
table(women_long$firstkid)

## 
##     0     1 
## 14674   350

table(women_long$firstkid2)

## 
##     0     1 
## 15007    17

twinid <- women_long$id[women_long$firstkid2==1] #randomly select 10 individuals

women_long <- women_long[!(women_long$id %in% twinid),] 
women_long <-  filter(women_long, nkidsbio<2)


panel_data <- pdata.frame(women_long, index=c("id", "wave"))

fixedmodel1 <- plm(sat ~ nkidsbio, data=women_long, model="pooling") 
summary(fixedmodel1)

## Pooling Model
## 
## Call:
## plm(formula = sat ~ nkidsbio, data = women_long, model = "pooling")
## 
## Unbalanced Panel: n = 3753, T = 1-6, N = 14809
## 
## Residuals:
##    Min. 1st Qu.  Median 3rd Qu.    Max. 
## -7.7834 -0.6381  0.3619  1.3619  2.3619 
## 
## Coefficients:
##             Estimate Std. Error  t-value Pr(>|t|)    
## (Intercept) 7.638097   0.013859 551.1472  < 2e-16 ***
## nkidsbio    0.145319   0.059330   2.4493  0.01432 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    39832
## Residual Sum of Squares: 39816
## R-Squared:      0.00040499
## Adj. R-Squared: 0.00033749
## F-statistic: 5.99919 on 1 and 14807 DF, p-value: 0.014324

fixedmodel2 <- plm(sat ~ nkidsbio, data=women_long, model="within") 
summary(fixedmodel2)

## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = sat ~ nkidsbio, data = women_long, model = "within")
## 
## Unbalanced Panel: n = 3753, T = 1-6, N = 14809
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -8.00000 -0.50000  0.00000  0.59092  5.66667 
## 
## Coefficients:
##          Estimate Std. Error t-value Pr(>|t|)   
## nkidsbio 0.181837   0.068196  2.6664 0.007679 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    16966
## Residual Sum of Squares: 16956
## R-Squared:      0.00064269
## Adj. R-Squared: -0.33862
## F-statistic: 7.10949 on 1 and 11055 DF, p-value: 0.0076788

#Interpretation:
#A within-person change from childless to having a first child is associated with 0.18 scale points increase in life satisfaction for women (always correct interpretation).
#When having a first child, life satisfaction is 0.18 points higher than when being childless (always correct interpretation).

Exercise9

Mengni Chen

2022-10-27

No. 1

Question

Answer

No. 2

Question

Answer

No. 3

Question

Answer