Exercise 5

No. 1

Question

Based on pairfam wave1 dataset, compile a dataset consists of variables as following:

age, sex(sex_gen), life satisfaction (sat6), relstat (relationship), no.of children(nkidsbio), weight (cdweight)

Drop observations with missing values

Answer

#Question1a: Based on pairfam wave1 dataset, compile a dataset consists of variables as following:age, sex(sex_gen), life satisfaction (sat6), relstat (relationship), no.of children(nkidsbio), weight (cdweight)
#Question1b: Drop observations with missing values

library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Import data.
library(Hmisc) # Weighting
library(estimatr) # Allows us to estimate (cluster-)robust standard errors.
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.

dataset1 <- read_dta("anchor1_50percent_Eng.dta")
#a dataset of age, sex, life satisfaction, no.of children ever born, weight
dataset2 <- dataset1 %>% 
  transmute(
           age=zap_labels(age), #remove labels of age
           sex_gen=as_factor(sex_gen) %>% fct_drop(), #treat sex_gen as a categorical, and drop unused levels
           nkidsbio=case_when(nkidsbio<0 ~ as.numeric(NA),TRUE ~ as.numeric(nkidsbio)) %>% 
             zap_label(), #specify when nkidsbio should be missing and then remove the label of nkidsbio
           sat6=case_when(sat6<0 ~ as.numeric(NA), TRUE ~ as.numeric(sat6)) %>% #specify when sat should be considered missing
             zap_label(), #remove labels of sat6
           relstat=as_factor(relstat), #treat relationship status as categorical
           relstat=case_when(relstat=="-7 Incomplete data" ~ as.character(NA), TRUE ~ as.character(relstat)) %>%  #specify when relstat should be considered missing
             as_factor() %>%  #make relstat as a factor again, as the ~ as.character(NA) has made it a character variable
             fct_drop(), #drop unused levels in relstat
           cdweight=zap_label(cdweight))  %>%  #remove labels of cdweight
  drop_na() #remove all observations that are missing

No. 2

Question

Generate a new variable “marital” for relationship status with categories of “Never married”, “Married”, “Separate/Divorced”, “Widowed”.
Generate a new variable “parenthood” based on nkidsbio with categories of “Have kids”, “No kids”.

Answer

#Question2a:Generate a new variable "marital" for relationship status with categories of "Never married", "Married", "Separate/Divorced", "Widowed".
#Question2b:Generate a new variable "parenthood" based on nkidsbio with categories of "Have kids", "No kids".
dataset3 <- dataset2 %>% 
  mutate(
    marital=case_when(
      relstat %in% c("1 Never married single","2 Never married LAT","3 Never married COHAB") ~ "Nevermarried",
      # when relstat has any of the three situations, I assign "Nevermarried" to new variable "marital1"
      
      relstat %in% c("4 Married COHAB","5 Married noncohabiting") ~ 'Married',
      # when relstat has any of the two situations, I assign "Married" to new variable "marital1
      
      relstat %in% c("6 Divorced/separated single","7 Divorced/separated LAT","8 Divorced/separated COHAB") ~ 'Divorced',
      # when relstat has any of the three situations, I assign "Divorced" to new variable "marital1"
      
      relstat %in% c("9 Widowed single","10 Widowed LAT") ~ 'Widow'
      # when relstat has any of the two situations, I assign "Widow" to new variable "marital1"
      
    ) %>% as_factor(), 
    #after case_when, marital is a character variable, so make it a facto nor
    parenthood=case_when(
      nkidsbio>0 ~ "Have kids",
      # when nkidsbio should be "Have kids"
      
      nkidsbio==0 ~ "No kids") %>% # when nkidsbio should be "No kids"
      as_factor() #make parenthood a categorical variable 
    )

No. 3

Question

Model1: regress life satisfaction on marital status and parenthood while not considering the weight
Model2: regress life satisfaction on marital status and parenthood while considering the weight
Model3: regress life satisfaction on marital status and parenthood while considering the weight and robust standard error
The coefficient for marital status is _____. The coefficient for parenthood is _____.

Answer

#Question3a: Model1: regress life satisfaction on marital status and parenthood while not considering the weight
#Question3b: Model2: regress life satisfaction on marital status and parenthood while considering the weight
#Question3c: Model3: regress life satisfaction on marital status and parenthood while considering the weight and robust standard error
#Question3d: The coefficient for marital status is _____. The coefficient for parenthood is _____.


#check variables first
table(dataset3$marital) # there are only 4 observations in "Widow"

## 
## Nevermarried      Married     Divorced        Widow 
##         4114         1756          284            4

table(dataset3$parenthood)

## 
##   No kids Have kids 
##      4105      2053

#Therefore, I decided to drop the 4 widow observations

dataset4 <- filter(dataset3, marital!= "Widow") #exclude respondents who are widow
regression1 <- lm(data = dataset4,
                         formula = sat6 ~ marital + parenthood)
#ols regression without considering weight and robust standard errors
summary(regression1)

## 
## Call:
## lm(formula = sat6 ~ marital + parenthood, data = dataset4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.0205 -0.6253  0.3747  1.3747  3.3752 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          7.62532    0.02768 275.519  < 2e-16 ***
## maritalMarried       0.39515    0.07162   5.518 3.58e-08 ***
## maritalDivorced     -0.74063    0.11734  -6.312 2.95e-10 ***
## parenthoodHave kids -0.25987    0.06964  -3.732 0.000192 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.732 on 6150 degrees of freedom
## Multiple R-squared:  0.01892,    Adjusted R-squared:  0.01844 
## F-statistic: 39.52 on 3 and 6150 DF,  p-value: < 2.2e-16

regression2 <- lm(data = dataset4,
                         formula = sat6 ~ marital + parenthood,
                         weights = cdweight)
#ols regression with considering weight 
summary(regression2)

## 
## Call:
## lm(formula = sat6 ~ marital + parenthood, data = dataset4, weights = cdweight)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.0649  -0.6243   0.3895   1.2512   4.3363 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          7.52851    0.02822 266.748  < 2e-16 ***
## maritalMarried       0.42713    0.06929   6.164 7.52e-10 ***
## maritalDivorced     -0.87381    0.12471  -7.007 2.70e-12 ***
## parenthoodHave kids -0.21093    0.06884  -3.064  0.00219 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.781 on 6150 degrees of freedom
## Multiple R-squared:  0.02103,    Adjusted R-squared:  0.02055 
## F-statistic: 44.04 on 3 and 6150 DF,  p-value: < 2.2e-16

regression3 <- lm_robust(data = dataset4,
                         formula = sat6 ~ marital + parenthood,
                         weights = cdweight)
#ols regression with considering weight and robust standard errors
summary(regression3)

## 
## Call:
## lm_robust(formula = sat6 ~ marital + parenthood, data = dataset4, 
##     weights = cdweight)
## 
## Weighted, Standard error type:  HC2 
## 
## Coefficients:
##                     Estimate Std. Error t value  Pr(>|t|) CI Lower CI Upper
## (Intercept)           7.5285    0.03204 234.992 0.000e+00   7.4657  7.59131
## maritalMarried        0.4271    0.08673   4.925 8.663e-07   0.2571  0.59715
## maritalDivorced      -0.8738    0.17238  -5.069 4.116e-07  -1.2117 -0.53588
## parenthoodHave kids  -0.2109    0.08680  -2.430 1.513e-02  -0.3811 -0.04076
##                       DF
## (Intercept)         6150
## maritalMarried      6150
## maritalDivorced     6150
## parenthoodHave kids 6150
## 
## Multiple R-squared:  0.02103 ,   Adjusted R-squared:  0.02055 
## F-statistic: 27.01 on 3 and 6150 DF,  p-value: < 2.2e-16

No. 4

Question

Model 4: Add age and sex to Model3 as controls. What happen to the estimate for marital status and parenthood.

Answer

#Question4: Model 4: Add age and sex to Model3 as controls. What happen to the estimate for marital status and parenthood.

regression4 <- lm_robust(data = dataset4,
                         formula = sat6 ~ marital + parenthood + age + sex_gen,
                         weights = cdweight)
summary(regression4)

## 
## Call:
## lm_robust(formula = sat6 ~ marital + parenthood + age + sex_gen, 
##     data = dataset4, weights = cdweight)
## 
## Weighted, Standard error type:  HC2 
## 
## Coefficients:
##                     Estimate Std. Error t value  Pr(>|t|) CI Lower CI Upper
## (Intercept)          8.53299   0.101602 83.9848 0.000e+00  8.33382  8.73217
## maritalMarried       0.70646   0.092586  7.6303 2.699e-14  0.52495  0.88796
## maritalDivorced     -0.55662   0.177203 -3.1411 1.691e-03 -0.90400 -0.20924
## parenthoodHave kids  0.04590   0.092878  0.4942 6.212e-01 -0.13617  0.22797
## age                 -0.04473   0.004602 -9.7191 3.606e-22 -0.05375 -0.03571
## sex_gen2 Female      0.03982   0.051325  0.7759 4.379e-01 -0.06079  0.14044
##                       DF
## (Intercept)         6148
## maritalMarried      6148
## maritalDivorced     6148
## parenthoodHave kids 6148
## age                 6148
## sex_gen2 Female     6148
## 
## Multiple R-squared:  0.04644 ,   Adjusted R-squared:  0.04567 
## F-statistic: 35.11 on 5 and 6148 DF,  p-value: < 2.2e-16

No. 5

Question

Make a regression table of the results of Models 1, 2, 3 and 4.

Answer

#Question5: Make a regression table of the results of Models 1, 2, 3 and 4.

texreg::screenreg(list(regression1, regression2, regression3, regression4), 
                  include.ci = FALSE, digits = 3)

## 
## ===========================================================================
##                      Model 1       Model 2       Model 3       Model 4     
## ---------------------------------------------------------------------------
## (Intercept)             7.625 ***     7.529 ***     7.529 ***     8.533 ***
##                        (0.028)       (0.028)       (0.032)       (0.102)   
## maritalMarried          0.395 ***     0.427 ***     0.427 ***     0.706 ***
##                        (0.072)       (0.069)       (0.087)       (0.093)   
## maritalDivorced        -0.741 ***    -0.874 ***    -0.874 ***    -0.557 ** 
##                        (0.117)       (0.125)       (0.172)       (0.177)   
## parenthoodHave kids    -0.260 ***    -0.211 **     -0.211 *       0.046    
##                        (0.070)       (0.069)       (0.087)       (0.093)   
## age                                                              -0.045 ***
##                                                                  (0.005)   
## sex_gen2 Female                                                   0.040    
##                                                                  (0.051)   
## ---------------------------------------------------------------------------
## R^2                     0.019         0.021         0.021         0.046    
## Adj. R^2                0.018         0.021         0.021         0.046    
## Num. obs.            6154          6154          6154          6154        
## RMSE                                                1.781         1.758    
## ===========================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05

# include.ci=FALSE means don't include CI, digit=3 to specify the digits of your result.

Exercise 5

Mengni Chen

2023-10-04

No. 1

Question

Answer

No. 2

Question

Answer

No. 3

Question

Answer

No. 4

Question

Answer

No. 5

Question

Answer