No. 1

Question

Based on pairfam wave1 dataset, compile a dataset consists of variables as following:

age, sex(sex_gen), life satisfaction (sat6), relstat (relationship), no.of children(nkidsbio), weight (cdweight)

Drop observations with missing values

Answer

library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Import data.
library(Hmisc) # Weighting
library(ggplot2) # Allows us to create nice figures.
library(estimatr) # Allows us to estimate (cluster-)robust standard errors.
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.
dataset1 <- read_dta("anchor1_50percent_Eng.dta")
#a dataset of age, sex, life satisfaction, no.of children ever born, weight
dataset2 <- dataset1 %>% 
  transmute(
           age=zap_labels(age), #remove labels of age
           sex_gen=as_factor(sex_gen) %>% fct_drop(), #treat sex_gen as a categorical, and drop unused levels
           nkidsbio=case_when(nkidsbio<0 ~ as.numeric(NA),TRUE ~ as.numeric(nkidsbio)) %>% 
             zap_label(), #specify when nkidsbio should be missing and then remove the label of nkidsbio
           sat6=case_when(sat6<0 ~ as.numeric(NA), TRUE ~ as.numeric(sat6)) %>% #specify when sat should be considered missing
             zap_label(), #remove labels of sat6
           relstat=as_factor(relstat), #treat relationship status as categorical
           relstat=case_when(relstat=="-7 Incomplete data" ~ as.character(NA), TRUE ~ as.character(relstat)) %>%  #specify when relstat should be considered missing
             as_factor() %>%  #make relstat as a factor again, as the ~ as.character(NA) has made it a character variable
             fct_drop(), #drop unused levels in relstat
           cdweight=zap_label(cdweight))  %>%  #remove labels of cdweight
  drop_na() #remove all observations that are missing

No. 2

Question

  • Generate a new variable “marital” for relationship status with categories of “Never married”, “Married”, “Separate/Divorced”, “Widowed”.
  • Generate a new variable “parenthood” based on nkidsbio with categories of “Have children”, “No kids”.

Answer

dataset3 <- dataset2 %>% 
  mutate(
    marital=case_when(
      relstat %in% c("1 Never married single","2 Never married LAT","3 Never married COHAB") ~ "Nevermarried",
      # when relstat has any of the three situations, I assign "Nevermarried" to new variable "marital1"
      
      relstat %in% c("4 Married COHAB","5 Married noncohabiting") ~ 'Married',
      # when relstat has any of the two situations, I assign "Married" to new variable "marital1
      
      relstat %in% c("6 Divorced/separated single","7 Divorced/separated LAT","8 Divorced/separated COHAB") ~ 'Divorced',
      # when relstat has any of the three situations, I assign "Divorced" to new variable "marital1"
      
      relstat %in% c("9 Widowed single","10 Widowed LAT") ~ 'Widow'
      # when relstat has any of the two situations, I assign "Widow" to new variable "marital1"
      
    ) %>% as_factor(), 
    #after case_when, marital is a character variable, so make it a facto nor
    parenthood=case_when(
      nkidsbio>0 ~ "Have kids",
      # when nkidsbio should be "Have kids"
      
      nkidsbio==0 ~ "No kids") %>% # when nkidsbio should be "No kids"
      as_factor() #make parenthood a categorical variable 
    )

No. 3

Question

  • Model regress life satisfaction on marital status and parenthood.
  • The coefficient for marital status is _____. The coefficient for parenthood is _____.

Answer

#check variables first
table(dataset3$marital) # there are only 4 observations in "Widow"
## 
## Nevermarried      Married     Divorced        Widow 
##         4114         1756          284            4
table(dataset3$parenthood)
## 
##   No kids Have kids 
##      4105      2053
#Therefore, I decided to drop the 4 widow observations

dataset4 <- filter(dataset3, marital!= "Widow") #exclude respondents who are widow

regression1 <- lm_robust(data = dataset4,
                         formula = sat6 ~ marital + parenthood,
                         weights = cdweight)
summary(regression1)
## 
## Call:
## lm_robust(formula = sat6 ~ marital + parenthood, data = dataset4, 
##     weights = cdweight)
## 
## Weighted, Standard error type:  HC2 
## 
## Coefficients:
##                     Estimate Std. Error t value  Pr(>|t|) CI Lower CI Upper
## (Intercept)           7.5285    0.03204 234.992 0.000e+00   7.4657  7.59131
## maritalMarried        0.4271    0.08673   4.925 8.663e-07   0.2571  0.59715
## maritalDivorced      -0.8738    0.17238  -5.069 4.116e-07  -1.2117 -0.53588
## parenthoodHave kids  -0.2109    0.08680  -2.430 1.513e-02  -0.3811 -0.04076
##                       DF
## (Intercept)         6150
## maritalMarried      6150
## maritalDivorced     6150
## parenthoodHave kids 6150
## 
## Multiple R-squared:  0.02103 ,   Adjusted R-squared:  0.02055 
## F-statistic: 27.01 on 3 and 6150 DF,  p-value: < 2.2e-16

No. 4

Question

  • Model 2: Add age and sex as controls. What happen to the estimate for marital status and parenthood.
  • Model 3: Based on Model2, add an interaction between parenthood and marital status.

Answer

regression2 <- lm_robust(data = dataset4,
                         formula = sat6 ~ marital + parenthood + age + sex_gen,
                         weights = cdweight)
summary(regression2)
## 
## Call:
## lm_robust(formula = sat6 ~ marital + parenthood + age + sex_gen, 
##     data = dataset4, weights = cdweight)
## 
## Weighted, Standard error type:  HC2 
## 
## Coefficients:
##                     Estimate Std. Error t value  Pr(>|t|) CI Lower CI Upper
## (Intercept)          8.53299   0.101602 83.9848 0.000e+00  8.33382  8.73217
## maritalMarried       0.70646   0.092586  7.6303 2.699e-14  0.52495  0.88796
## maritalDivorced     -0.55662   0.177203 -3.1411 1.691e-03 -0.90400 -0.20924
## parenthoodHave kids  0.04590   0.092878  0.4942 6.212e-01 -0.13617  0.22797
## age                 -0.04473   0.004602 -9.7191 3.606e-22 -0.05375 -0.03571
## sex_gen2 Female      0.03982   0.051325  0.7759 4.379e-01 -0.06079  0.14044
##                       DF
## (Intercept)         6148
## maritalMarried      6148
## maritalDivorced     6148
## parenthoodHave kids 6148
## age                 6148
## sex_gen2 Female     6148
## 
## Multiple R-squared:  0.04644 ,   Adjusted R-squared:  0.04567 
## F-statistic: 35.11 on 5 and 6148 DF,  p-value: < 2.2e-16
regression3 <- lm_robust(data = dataset4,
                         formula = sat6 ~ marital*parenthood + age + sex_gen,
                         weights = cdweight)
summary(regression3)
## 
## Call:
## lm_robust(formula = sat6 ~ marital * parenthood + age + sex_gen, 
##     data = dataset4, weights = cdweight)
## 
## Weighted, Standard error type:  HC2 
## 
## Coefficients:
##                                     Estimate Std. Error t value  Pr(>|t|)
## (Intercept)                          8.51169   0.102365 83.1502 0.000e+00
## maritalMarried                       0.52119   0.125159  4.1643 3.166e-05
## maritalDivorced                     -0.55506   0.386008 -1.4379 1.505e-01
## parenthoodHave kids                 -0.16787   0.135641 -1.2376 2.159e-01
## age                                 -0.04325   0.004679 -9.2430 3.238e-20
## sex_gen2 Female                      0.04928   0.051414  0.9585 3.379e-01
## maritalMarried:parenthoodHave kids   0.41169   0.177384  2.3209 2.033e-02
## maritalDivorced:parenthoodHave kids  0.16529   0.434183  0.3807 7.035e-01
##                                     CI Lower CI Upper   DF
## (Intercept)                          8.31101  8.71236 6146
## maritalMarried                       0.27584  0.76655 6146
## maritalDivorced                     -1.31177  0.20166 6146
## parenthoodHave kids                 -0.43378  0.09803 6146
## age                                 -0.05243 -0.03408 6146
## sex_gen2 Female                     -0.05151  0.15007 6146
## maritalMarried:parenthoodHave kids   0.06395  0.75942 6146
## maritalDivorced:parenthoodHave kids -0.68586  1.01644 6146
## 
## Multiple R-squared:  0.04774 ,   Adjusted R-squared:  0.04665 
## F-statistic: 26.53 on 7 and 6146 DF,  p-value: < 2.2e-16

No. 5

Question

Make a regression table of the results of Models 1, 2 and 3.

Answer

texreg::screenreg(list(regression1, regression2, regression3), 
                  include.ci = FALSE, digits = 3) # include.ci=FALSE means don't include CI, digit=3 to specify the digits of your result. 
## 
## =============================================================================
##                                      Model 1       Model 2       Model 3     
## -----------------------------------------------------------------------------
## (Intercept)                             7.529 ***     8.533 ***     8.512 ***
##                                        (0.032)       (0.102)       (0.102)   
## maritalMarried                          0.427 ***     0.706 ***     0.521 ***
##                                        (0.087)       (0.093)       (0.125)   
## maritalDivorced                        -0.874 ***    -0.557 **     -0.555    
##                                        (0.172)       (0.177)       (0.386)   
## parenthoodHave kids                    -0.211 *       0.046        -0.168    
##                                        (0.087)       (0.093)       (0.136)   
## age                                                  -0.045 ***    -0.043 ***
##                                                      (0.005)       (0.005)   
## sex_gen2 Female                                       0.040         0.049    
##                                                      (0.051)       (0.051)   
## maritalMarried:parenthoodHave kids                                  0.412 *  
##                                                                    (0.177)   
## maritalDivorced:parenthoodHave kids                                 0.165    
##                                                                    (0.434)   
## -----------------------------------------------------------------------------
## R^2                                     0.021         0.046         0.048    
## Adj. R^2                                0.021         0.046         0.047    
## Num. obs.                            6154          6154          6154        
## RMSE                                    1.781         1.758         1.757    
## =============================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05