Based on pairfam wave1 dataset, compile a dataset consists of variables as following:
age, sex(sex_gen), life satisfaction (sat6), relstat (relationship), no.of children(nkidsbio), weight (cdweight)
Drop observations with missing values
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Import data.
library(Hmisc) # Weighting
library(ggplot2) # Allows us to create nice figures.
library(estimatr) # Allows us to estimate (cluster-)robust standard errors.
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.
dataset1 <- read_dta("anchor1_50percent_Eng.dta")
#a dataset of age, sex, life satisfaction, no.of children ever born, weight
dataset2 <- dataset1 %>%
transmute(
age=zap_labels(age), #remove labels of age
sex_gen=as_factor(sex_gen) %>% fct_drop(), #treat sex_gen as a categorical, and drop unused levels
nkidsbio=case_when(nkidsbio<0 ~ as.numeric(NA),TRUE ~ as.numeric(nkidsbio)) %>%
zap_label(), #specify when nkidsbio should be missing and then remove the label of nkidsbio
sat6=case_when(sat6<0 ~ as.numeric(NA), TRUE ~ as.numeric(sat6)) %>% #specify when sat should be considered missing
zap_label(), #remove labels of sat6
relstat=as_factor(relstat), #treat relationship status as categorical
relstat=case_when(relstat=="-7 Incomplete data" ~ as.character(NA), TRUE ~ as.character(relstat)) %>% #specify when relstat should be considered missing
as_factor() %>% #make relstat as a factor again, as the ~ as.character(NA) has made it a character variable
fct_drop(), #drop unused levels in relstat
cdweight=zap_label(cdweight)) %>% #remove labels of cdweight
drop_na() #remove all observations that are missing
dataset3 <- dataset2 %>%
mutate(
marital=case_when(
relstat %in% c("1 Never married single","2 Never married LAT","3 Never married COHAB") ~ "Nevermarried",
# when relstat has any of the three situations, I assign "Nevermarried" to new variable "marital1"
relstat %in% c("4 Married COHAB","5 Married noncohabiting") ~ 'Married',
# when relstat has any of the two situations, I assign "Married" to new variable "marital1
relstat %in% c("6 Divorced/separated single","7 Divorced/separated LAT","8 Divorced/separated COHAB") ~ 'Divorced',
# when relstat has any of the three situations, I assign "Divorced" to new variable "marital1"
relstat %in% c("9 Widowed single","10 Widowed LAT") ~ 'Widow'
# when relstat has any of the two situations, I assign "Widow" to new variable "marital1"
) %>% as_factor(),
#after case_when, marital is a character variable, so make it a facto nor
parenthood=case_when(
nkidsbio>0 ~ "Have kids",
# when nkidsbio should be "Have kids"
nkidsbio==0 ~ "No kids") %>% # when nkidsbio should be "No kids"
as_factor() #make parenthood a categorical variable
)
#check variables first
table(dataset3$marital) # there are only 4 observations in "Widow"
##
## Nevermarried Married Divorced Widow
## 4114 1756 284 4
table(dataset3$parenthood)
##
## No kids Have kids
## 4105 2053
#Therefore, I decided to drop the 4 widow observations
dataset4 <- filter(dataset3, marital!= "Widow") #exclude respondents who are widow
regression1 <- lm_robust(data = dataset4,
formula = sat6 ~ marital + parenthood,
weights = cdweight)
summary(regression1)
##
## Call:
## lm_robust(formula = sat6 ~ marital + parenthood, data = dataset4,
## weights = cdweight)
##
## Weighted, Standard error type: HC2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper
## (Intercept) 7.5285 0.03204 234.992 0.000e+00 7.4657 7.59131
## maritalMarried 0.4271 0.08673 4.925 8.663e-07 0.2571 0.59715
## maritalDivorced -0.8738 0.17238 -5.069 4.116e-07 -1.2117 -0.53588
## parenthoodHave kids -0.2109 0.08680 -2.430 1.513e-02 -0.3811 -0.04076
## DF
## (Intercept) 6150
## maritalMarried 6150
## maritalDivorced 6150
## parenthoodHave kids 6150
##
## Multiple R-squared: 0.02103 , Adjusted R-squared: 0.02055
## F-statistic: 27.01 on 3 and 6150 DF, p-value: < 2.2e-16
regression2 <- lm_robust(data = dataset4,
formula = sat6 ~ marital + parenthood + age + sex_gen,
weights = cdweight)
summary(regression2)
##
## Call:
## lm_robust(formula = sat6 ~ marital + parenthood + age + sex_gen,
## data = dataset4, weights = cdweight)
##
## Weighted, Standard error type: HC2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper
## (Intercept) 8.53299 0.101602 83.9848 0.000e+00 8.33382 8.73217
## maritalMarried 0.70646 0.092586 7.6303 2.699e-14 0.52495 0.88796
## maritalDivorced -0.55662 0.177203 -3.1411 1.691e-03 -0.90400 -0.20924
## parenthoodHave kids 0.04590 0.092878 0.4942 6.212e-01 -0.13617 0.22797
## age -0.04473 0.004602 -9.7191 3.606e-22 -0.05375 -0.03571
## sex_gen2 Female 0.03982 0.051325 0.7759 4.379e-01 -0.06079 0.14044
## DF
## (Intercept) 6148
## maritalMarried 6148
## maritalDivorced 6148
## parenthoodHave kids 6148
## age 6148
## sex_gen2 Female 6148
##
## Multiple R-squared: 0.04644 , Adjusted R-squared: 0.04567
## F-statistic: 35.11 on 5 and 6148 DF, p-value: < 2.2e-16
regression3 <- lm_robust(data = dataset4,
formula = sat6 ~ marital*parenthood + age + sex_gen,
weights = cdweight)
summary(regression3)
##
## Call:
## lm_robust(formula = sat6 ~ marital * parenthood + age + sex_gen,
## data = dataset4, weights = cdweight)
##
## Weighted, Standard error type: HC2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.51169 0.102365 83.1502 0.000e+00
## maritalMarried 0.52119 0.125159 4.1643 3.166e-05
## maritalDivorced -0.55506 0.386008 -1.4379 1.505e-01
## parenthoodHave kids -0.16787 0.135641 -1.2376 2.159e-01
## age -0.04325 0.004679 -9.2430 3.238e-20
## sex_gen2 Female 0.04928 0.051414 0.9585 3.379e-01
## maritalMarried:parenthoodHave kids 0.41169 0.177384 2.3209 2.033e-02
## maritalDivorced:parenthoodHave kids 0.16529 0.434183 0.3807 7.035e-01
## CI Lower CI Upper DF
## (Intercept) 8.31101 8.71236 6146
## maritalMarried 0.27584 0.76655 6146
## maritalDivorced -1.31177 0.20166 6146
## parenthoodHave kids -0.43378 0.09803 6146
## age -0.05243 -0.03408 6146
## sex_gen2 Female -0.05151 0.15007 6146
## maritalMarried:parenthoodHave kids 0.06395 0.75942 6146
## maritalDivorced:parenthoodHave kids -0.68586 1.01644 6146
##
## Multiple R-squared: 0.04774 , Adjusted R-squared: 0.04665
## F-statistic: 26.53 on 7 and 6146 DF, p-value: < 2.2e-16
Make a regression table of the results of Models 1, 2 and 3.
texreg::screenreg(list(regression1, regression2, regression3),
include.ci = FALSE, digits = 3) # include.ci=FALSE means don't include CI, digit=3 to specify the digits of your result.
##
## =============================================================================
## Model 1 Model 2 Model 3
## -----------------------------------------------------------------------------
## (Intercept) 7.529 *** 8.533 *** 8.512 ***
## (0.032) (0.102) (0.102)
## maritalMarried 0.427 *** 0.706 *** 0.521 ***
## (0.087) (0.093) (0.125)
## maritalDivorced -0.874 *** -0.557 ** -0.555
## (0.172) (0.177) (0.386)
## parenthoodHave kids -0.211 * 0.046 -0.168
## (0.087) (0.093) (0.136)
## age -0.045 *** -0.043 ***
## (0.005) (0.005)
## sex_gen2 Female 0.040 0.049
## (0.051) (0.051)
## maritalMarried:parenthoodHave kids 0.412 *
## (0.177)
## maritalDivorced:parenthoodHave kids 0.165
## (0.434)
## -----------------------------------------------------------------------------
## R^2 0.021 0.046 0.048
## Adj. R^2 0.021 0.046 0.047
## Num. obs. 6154 6154 6154
## RMSE 1.781 1.758 1.757
## =============================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05