Prepare the dataset for analysis

library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Import data.
library(Hmisc) # Weighting
library(estimatr) # Allows us to estimate (cluster-)robust standard errors.
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.
dataset1 <- read_dta("anchor1_50percent_Eng.dta")
#a dataset of age, sex, life satisfaction, no.of children ever born, weight
dataset2 <- dataset1 %>% 
  transmute(
           age=zap_labels(age), #remove labels of age
           sex_gen=as_factor(sex_gen) %>% fct_drop(), #treat sex_gen as a categorical, and drop unused levels
           nkidsbio=case_when(nkidsbio<0 ~ as.numeric(NA),TRUE ~ as.numeric(nkidsbio)) %>% 
             zap_label(), #specify when nkidsbio should be missing and then remove the label of nkidsbio
           sat6=case_when(sat6<0 ~ as.numeric(NA), TRUE ~ as.numeric(sat6)) %>% #specify when sat should be considered missing
             zap_label(), #remove labels of sat6
           relstat=as_factor(relstat), #treat relationship status as categorical
           relstat=case_when(relstat=="-7 Incomplete data" ~ as.character(NA), TRUE ~ as.character(relstat)) %>%  #specify when relstat should be considered missing
             as_factor() %>%  #make relstat as a factor again, as the ~ as.character(NA) has made it a character variable
             fct_drop(), #drop unused levels in relstat
           cdweight=zap_label(cdweight))  %>%  #remove labels of cdweight
  drop_na() #remove all observations that are missing


dataset3 <- dataset2 %>% 
  mutate(
    marital=case_when(
      relstat %in% c("1 Never married single","2 Never married LAT","3 Never married COHAB") ~ "Nevermarried",
      # when relstat has any of the three situations, I assign "Nevermarried" to new variable "marital1"
      
      relstat %in% c("4 Married COHAB","5 Married noncohabiting") ~ 'Married',
      # when relstat has any of the two situations, I assign "Married" to new variable "marital1
      
      relstat %in% c("6 Divorced/separated single","7 Divorced/separated LAT","8 Divorced/separated COHAB") ~ 'Divorced',
      # when relstat has any of the three situations, I assign "Divorced" to new variable "marital1"
      
      relstat %in% c("9 Widowed single","10 Widowed LAT") ~ 'Widow'
      # when relstat has any of the two situations, I assign "Widow" to new variable "marital1"
      
    ) %>% as_factor(), 
    #after case_when, marital is a character variable, so make it a facto nor
    parenthood=case_when(
      nkidsbio>0 ~ "Have kids",
      # when nkidsbio should be "Have kids"
      
      nkidsbio==0 ~ "No kids") %>% # when nkidsbio should be "No kids"
      as_factor() #make parenthood a categorical variable 
    )
dataset4 <- filter(dataset3, marital!= "Widow") #exclude respondents who are widow

No. 1

Question

  • 1) Based on the dataset cleaned above, please create model1: Regress life satisfaction on marital status and parenthood while considering the weight and robust standard error
  • 2) Add age varialbe to Model1

Answer

#1):Create model1: Regress life satisfaction on marital status and parenthood while considering the weight and robust standard error

model1 <- lm_robust(data = dataset4,
                         formula = sat6 ~ marital + parenthood,
                         weights = cdweight)
#ols regression with considering weight and robust standard errors
summary(model1)
## 
## Call:
## lm_robust(formula = sat6 ~ marital + parenthood, data = dataset4, 
##     weights = cdweight)
## 
## Weighted, Standard error type:  HC2 
## 
## Coefficients:
##                     Estimate Std. Error t value  Pr(>|t|) CI Lower CI Upper
## (Intercept)           7.5285    0.03204 234.992 0.000e+00   7.4657  7.59131
## maritalMarried        0.4271    0.08673   4.925 8.663e-07   0.2571  0.59715
## maritalDivorced      -0.8738    0.17238  -5.069 4.116e-07  -1.2117 -0.53588
## parenthoodHave kids  -0.2109    0.08680  -2.430 1.513e-02  -0.3811 -0.04076
##                       DF
## (Intercept)         6150
## maritalMarried      6150
## maritalDivorced     6150
## parenthoodHave kids 6150
## 
## Multiple R-squared:  0.02103 ,   Adjusted R-squared:  0.02055 
## F-statistic: 27.01 on 3 and 6150 DF,  p-value: < 2.2e-16
#2) Add age varialbe to Model1
model2 <- lm_robust(data = dataset4,
                         formula = sat6 ~ marital + parenthood + age,
                         weights = cdweight)
#ols regression with considering weight and robust standard errors
summary(model2)
## 
## Call:
## lm_robust(formula = sat6 ~ marital + parenthood + age, data = dataset4, 
##     weights = cdweight)
## 
## Weighted, Standard error type:  HC2 
## 
## Coefficients:
##                     Estimate Std. Error t value  Pr(>|t|) CI Lower CI Upper
## (Intercept)          8.55612   0.097218 88.0098 0.000e+00  8.36554  8.74670
## maritalMarried       0.71006   0.092423  7.6827 1.802e-14  0.52887  0.89124
## maritalDivorced     -0.55182   0.176797 -3.1212 1.810e-03 -0.89840 -0.20523
## parenthoodHave kids  0.05082   0.092537  0.5492 5.829e-01 -0.13058  0.23223
## age                 -0.04497   0.004593 -9.7906 1.802e-22 -0.05397 -0.03596
##                       DF
## (Intercept)         6149
## maritalMarried      6149
## maritalDivorced     6149
## parenthoodHave kids 6149
## age                 6149
## 
## Multiple R-squared:  0.04632 ,   Adjusted R-squared:  0.0457 
## F-statistic: 43.84 on 4 and 6149 DF,  p-value: < 2.2e-16

No. 2

Question

  • Export the results of the two models to a html format and change the names of variables so that the output is easy to read

Answer

#Export to html
texreg::htmlreg(list(model1, model2), 
                custom.coef.names=c(
                  "Intercept",
                  "Married",
                  "Divorced",
                  "Have kids", 
                  "Age"),
                include.ci = FALSE, 
                file = "MyOLSModels.html") #html
## The table was written to the file 'MyOLSModels.html'.

No. 3

Question

  • Model3: Add an interaction between parenthood and gender to Model 2
  • Does the relationship between parenthood and life satisfaction vary across gender?
  • How to interpret the interaction”

Answer

model3 <- lm_robust(data = dataset4,
                         formula = sat6 ~ marital + parenthood*sex_gen + age,
                         weights = cdweight)
summary(model3)
## 
## Call:
## lm_robust(formula = sat6 ~ marital + parenthood * sex_gen + age, 
##     data = dataset4, weights = cdweight)
## 
## Weighted, Standard error type:  HC2 
## 
## Coefficients:
##                                      Estimate Std. Error  t value  Pr(>|t|)
## (Intercept)                          8.545482   0.102278 83.55112 0.000e+00
## maritalMarried                       0.710073   0.092718  7.65839 2.174e-14
## maritalDivorced                     -0.558290   0.177113 -3.15217 1.628e-03
## parenthoodHave kids                 -0.005611   0.115252 -0.04869 9.612e-01
## sex_gen2 Female                      0.012087   0.061776  0.19565 8.449e-01
## age                                 -0.044742   0.004603 -9.71935 3.599e-22
## parenthoodHave kids:sex_gen2 Female  0.091956   0.110740  0.83038 4.064e-01
##                                     CI Lower CI Upper   DF
## (Intercept)                          8.34498  8.74598 6147
## maritalMarried                       0.52831  0.89183 6147
## maritalDivorced                     -0.90549 -0.21109 6147
## parenthoodHave kids                 -0.23155  0.22032 6147
## sex_gen2 Female                     -0.10902  0.13319 6147
## age                                 -0.05377 -0.03572 6147
## parenthoodHave kids:sex_gen2 Female -0.12513  0.30905 6147
## 
## Multiple R-squared:  0.04658 ,   Adjusted R-squared:  0.04565 
## F-statistic: 29.35 on 6 and 6147 DF,  p-value: < 2.2e-16
texreg::screenreg(list(model3), 
                  custom.model.names="OLS", #change the model name
                  custom.coef.names=c( #change the variable name
                    "Intercept",
                    "Married",
                    "Divorced",
                    "Have kids",
                    "Female",
                    "Age",
                    "Have kids X Female"
                  ),
                  include.ci = FALSE, digits = 3)
## 
## ================================
##                     OLS         
## --------------------------------
## Intercept              8.545 ***
##                       (0.102)   
## Married                0.710 ***
##                       (0.093)   
## Divorced              -0.558 ** 
##                       (0.177)   
## Have kids             -0.006    
##                       (0.115)   
## Female                 0.012    
##                       (0.062)   
## Age                   -0.045 ***
##                       (0.005)   
## Have kids X Female     0.092    
##                       (0.111)   
## --------------------------------
## R^2                    0.047    
## Adj. R^2               0.046    
## Num. obs.           6154        
## RMSE                   1.758    
## ================================
## *** p < 0.001; ** p < 0.01; * p < 0.05
#Interpretation: A male respondent who has kids has lower life satisfaction than a male respondents who has no kids by about 0.006. This association is reversed for female. A female respondent who has kids has higher life satisfaction than a female respondents who has no kids by about (-0.006+0.092). Nevertheless, the interaction is not signicant at 5%.

No. 4

Question

  • Model4: Add an interaction between age and gender to Model 2
  • Does the relationship between age and life satisfaction vary across gender?
  • How to interpret the interaction

Answer

model4 <- lm_robust(data = dataset4,
                         formula = sat6 ~ marital + parenthood + age*sex_gen,
                         weights = cdweight)
summary(model4)
## 
## Call:
## lm_robust(formula = sat6 ~ marital + parenthood + age * sex_gen, 
##     data = dataset4, weights = cdweight)
## 
## Weighted, Standard error type:  HC2 
## 
## Coefficients:
##                     Estimate Std. Error t value  Pr(>|t|)  CI Lower CI Upper
## (Intercept)          8.71844   0.124333 70.1218 0.000e+00  8.474707  8.96218
## maritalMarried       0.70530   0.092665  7.6113 3.122e-14  0.523644  0.88696
## maritalDivorced     -0.56368   0.177008 -3.1845 1.457e-03 -0.910673 -0.21668
## parenthoodHave kids  0.03547   0.093082  0.3811 7.032e-01 -0.147000  0.21795
## age                 -0.05159   0.005477 -9.4208 6.195e-21 -0.062330 -0.04086
## sex_gen2 Female     -0.35978   0.151285 -2.3782 1.743e-02 -0.656356 -0.06321
## age:sex_gen2 Female  0.01500   0.005912  2.5377 1.118e-02  0.003414  0.02659
##                       DF
## (Intercept)         6147
## maritalMarried      6147
## maritalDivorced     6147
## parenthoodHave kids 6147
## age                 6147
## sex_gen2 Female     6147
## age:sex_gen2 Female 6147
## 
## Multiple R-squared:  0.04761 ,   Adjusted R-squared:  0.04668 
## F-statistic: 29.57 on 6 and 6147 DF,  p-value: < 2.2e-16
texreg::screenreg(list(model4), 
                  custom.model.names="OLS", #change the model name
                  custom.coef.names=c( #change the variable name
                    "Intercept",
                    "Married",
                    "Divorced",
                    "Have kids",
                    "Age",
                    "Female",
                    "Age X Female"
                  ),
                  include.ci = FALSE, digits = 3)
## 
## ==========================
##               OLS         
## --------------------------
## Intercept        8.718 ***
##                 (0.124)   
## Married          0.705 ***
##                 (0.093)   
## Divorced        -0.564 ** 
##                 (0.177)   
## Have kids        0.035    
##                 (0.093)   
## Age             -0.052 ***
##                 (0.005)   
## Female          -0.360 *  
##                 (0.151)   
## Age X Female     0.015 *  
##                 (0.006)   
## --------------------------
## R^2              0.048    
## Adj. R^2         0.047    
## Num. obs.     6154        
## RMSE             1.757    
## ==========================
## *** p < 0.001; ** p < 0.01; * p < 0.05
#Interpretation: The relationship between age and life satisfaction varies across gender.
#As the age of the male respondent increases by 1 year, the life satisfaction decreases by 0.052.
#For females, such negative association is weaker. As the age of the female respondent increases by 1 year, the life satisfaction decreases by (-0.052+0.015).