With a dataset of age, sex, martial status, parenthood status, life satisfaction, and weight
#Run the following to prepare your dataset
library(tidyverse) # Add the tidyverse package to my current library.
library(haven) # Import data.
library(Hmisc) # Weighting
library(ggplot2) # Allows us to create nice figures.
library(estimatr) # Allows us to estimate (cluster-)robust standard errors.
library(texreg) # Allows us to make nicely-formatted Html & Latex regression tables.
dataset1 <- read_dta("anchor1_50percent_Eng.dta")
#a dataset of age, sex, life satisfaction, no.of children ever born, weight
dataset2 <- dataset1 %>%
transmute(
age=zap_labels(age),
sex_gen=as_factor(sex_gen) %>% fct_drop(),
nkidsbio=case_when(nkidsbio<0 ~ as.numeric(NA),TRUE ~ as.numeric(nkidsbio)) %>%
zap_label(),
sat6=case_when(sat6<0 ~ as.numeric(NA), TRUE ~ as.numeric(sat6)) %>% #specify when sat should be considered missing
zap_label(),
relstat=as_factor(relstat),
relstat=case_when(relstat=="-7 Incomplete data" ~ as.character(NA), TRUE ~ as.character(relstat)) %>%
as_factor() %>%
fct_drop(),
cdweight=zap_label(cdweight)) %>%
drop_na()
dataset3 <- dataset2 %>%
mutate(
marital=case_when(
relstat %in% c("1 Never married single","2 Never married LAT","3 Never married COHAB") ~ "Nevermarried",
relstat %in% c("4 Married COHAB","5 Married noncohabiting") ~ 'Married',
relstat %in% c("6 Divorced/separated single","7 Divorced/separated LAT","8 Divorced/separated COHAB") ~ 'Divorced',
relstat %in% c("9 Widowed single","10 Widowed LAT") ~ 'Widow'
) %>% as_factor(),
parenthood=case_when(
nkidsbio>0 ~ "Have kids",
nkidsbio==0 ~ "No kids") %>%
as_factor()
)%>%
filter(marital!= "Widow")
model1 <- lm_robust(formula = sat6 ~ age + sex_gen + marital + parenthood, data = dataset3, weights = cdweight)
summary(model1)
##
## Call:
## lm_robust(formula = sat6 ~ age + sex_gen + marital + parenthood,
## data = dataset3, weights = cdweight)
##
## Weighted, Standard error type: HC2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper
## (Intercept) 8.53299 0.101602 83.9848 0.000e+00 8.33382 8.73217
## age -0.04473 0.004602 -9.7191 3.606e-22 -0.05375 -0.03571
## sex_gen2 Female 0.03982 0.051325 0.7759 4.379e-01 -0.06079 0.14044
## maritalMarried 0.70646 0.092586 7.6303 2.699e-14 0.52495 0.88796
## maritalDivorced -0.55662 0.177203 -3.1411 1.691e-03 -0.90400 -0.20924
## parenthoodHave kids 0.04590 0.092878 0.4942 6.212e-01 -0.13617 0.22797
## DF
## (Intercept) 6148
## age 6148
## sex_gen2 Female 6148
## maritalMarried 6148
## maritalDivorced 6148
## parenthoodHave kids 6148
##
## Multiple R-squared: 0.04644 , Adjusted R-squared: 0.04567
## F-statistic: 35.11 on 5 and 6148 DF, p-value: < 2.2e-16
model2 <- lm_robust(formula = sat6 ~ age + sex_gen*marital + parenthood, data = dataset3, weights = cdweight)
summary(model2)
##
## Call:
## lm_robust(formula = sat6 ~ age + sex_gen * marital + parenthood,
## data = dataset3, weights = cdweight)
##
## Weighted, Standard error type: HC2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) CI Lower
## (Intercept) 8.52739 0.102039 83.5695 0.000e+00 8.32736
## age -0.04470 0.004605 -9.7072 4.048e-22 -0.05373
## sex_gen2 Female 0.05188 0.062022 0.8365 4.029e-01 -0.06970
## maritalMarried 0.75108 0.113696 6.6060 4.279e-11 0.52820
## maritalDivorced -0.69430 0.296572 -2.3411 1.926e-02 -1.27569
## parenthoodHave kids 0.04023 0.092849 0.4333 6.648e-01 -0.14179
## sex_gen2 Female:maritalMarried -0.07517 0.111234 -0.6758 4.992e-01 -0.29323
## sex_gen2 Female:maritalDivorced 0.23749 0.336909 0.7049 4.809e-01 -0.42297
## CI Upper DF
## (Intercept) 8.72742 6146
## age -0.03567 6146
## sex_gen2 Female 0.17347 6146
## maritalMarried 0.97397 6146
## maritalDivorced -0.11292 6146
## parenthoodHave kids 0.22224 6146
## sex_gen2 Female:maritalMarried 0.14288 6146
## sex_gen2 Female:maritalDivorced 0.89795 6146
##
## Multiple R-squared: 0.04672 , Adjusted R-squared: 0.04564
## F-statistic: 25.49 on 7 and 6146 DF, p-value: < 2.2e-16
model3 <- lm_robust(formula = sat6 ~ age + marital + sex_gen*parenthood, data = dataset3, weights = cdweight)
summary(model3)
##
## Call:
## lm_robust(formula = sat6 ~ age + marital + sex_gen * parenthood,
## data = dataset3, weights = cdweight)
##
## Weighted, Standard error type: HC2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.545482 0.102278 83.55112 0.000e+00
## age -0.044742 0.004603 -9.71935 3.599e-22
## maritalMarried 0.710073 0.092718 7.65839 2.174e-14
## maritalDivorced -0.558290 0.177113 -3.15217 1.628e-03
## sex_gen2 Female 0.012087 0.061776 0.19565 8.449e-01
## parenthoodHave kids -0.005611 0.115252 -0.04869 9.612e-01
## sex_gen2 Female:parenthoodHave kids 0.091956 0.110740 0.83038 4.064e-01
## CI Lower CI Upper DF
## (Intercept) 8.34498 8.74598 6147
## age -0.05377 -0.03572 6147
## maritalMarried 0.52831 0.89183 6147
## maritalDivorced -0.90549 -0.21109 6147
## sex_gen2 Female -0.10902 0.13319 6147
## parenthoodHave kids -0.23155 0.22032 6147
## sex_gen2 Female:parenthoodHave kids -0.12513 0.30905 6147
##
## Multiple R-squared: 0.04658 , Adjusted R-squared: 0.04565
## F-statistic: 29.35 on 6 and 6147 DF, p-value: < 2.2e-16
model4 <- lm_robust(formula = sat6 ~ age + sex_gen + marital*parenthood, data = dataset3, weights = cdweight)
summary(model4)
##
## Call:
## lm_robust(formula = sat6 ~ age + sex_gen + marital * parenthood,
## data = dataset3, weights = cdweight)
##
## Weighted, Standard error type: HC2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.51169 0.102365 83.1502 0.000e+00
## age -0.04325 0.004679 -9.2430 3.238e-20
## sex_gen2 Female 0.04928 0.051414 0.9585 3.379e-01
## maritalMarried 0.52119 0.125159 4.1643 3.166e-05
## maritalDivorced -0.55506 0.386008 -1.4379 1.505e-01
## parenthoodHave kids -0.16787 0.135641 -1.2376 2.159e-01
## maritalMarried:parenthoodHave kids 0.41169 0.177384 2.3209 2.033e-02
## maritalDivorced:parenthoodHave kids 0.16529 0.434183 0.3807 7.035e-01
## CI Lower CI Upper DF
## (Intercept) 8.31101 8.71236 6146
## age -0.05243 -0.03408 6146
## sex_gen2 Female -0.05151 0.15007 6146
## maritalMarried 0.27584 0.76655 6146
## maritalDivorced -1.31177 0.20166 6146
## parenthoodHave kids -0.43378 0.09803 6146
## maritalMarried:parenthoodHave kids 0.06395 0.75942 6146
## maritalDivorced:parenthoodHave kids -0.68586 1.01644 6146
##
## Multiple R-squared: 0.04774 , Adjusted R-squared: 0.04665
## F-statistic: 26.53 on 7 and 6146 DF, p-value: < 2.2e-16
model5 <- lm_robust(formula = sat6 ~ age + sex_gen*marital + sex_gen*parenthood + marital*parenthood, data = dataset3, weights = cdweight)
summary(model5)
##
## Call:
## lm_robust(formula = sat6 ~ age + sex_gen * marital + sex_gen *
## parenthood + marital * parenthood, data = dataset3, weights = cdweight)
##
## Weighted, Standard error type: HC2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.51607 0.102971 82.7034 0.000e+00
## age -0.04335 0.004681 -9.2610 2.743e-20
## sex_gen2 Female 0.04442 0.063007 0.7050 4.808e-01
## maritalMarried 0.66539 0.162694 4.0898 4.373e-05
## maritalDivorced -0.58942 0.436013 -1.3518 1.765e-01
## parenthoodHave kids -0.31184 0.175935 -1.7725 7.637e-02
## sex_gen2 Female:maritalMarried -0.24832 0.175795 -1.4125 1.578e-01
## sex_gen2 Female:maritalDivorced 0.08666 0.365845 0.2369 8.128e-01
## sex_gen2 Female:parenthoodHave kids 0.24013 0.176603 1.3597 1.740e-01
## maritalMarried:parenthoodHave kids 0.41784 0.177308 2.3566 1.848e-02
## maritalDivorced:parenthoodHave kids 0.13458 0.438904 0.3066 7.591e-01
## CI Lower CI Upper DF
## (Intercept) 8.31422 8.71793 6143
## age -0.05253 -0.03418 6143
## sex_gen2 Female -0.07910 0.16794 6143
## maritalMarried 0.34645 0.98433 6143
## maritalDivorced -1.44416 0.26532 6143
## parenthoodHave kids -0.65673 0.03306 6143
## sex_gen2 Female:maritalMarried -0.59294 0.09630 6143
## sex_gen2 Female:maritalDivorced -0.63052 0.80385 6143
## sex_gen2 Female:parenthoodHave kids -0.10608 0.58633 6143
## maritalMarried:parenthoodHave kids 0.07025 0.76543 6143
## maritalDivorced:parenthoodHave kids -0.72582 0.99499 6143
##
## Multiple R-squared: 0.04852 , Adjusted R-squared: 0.04697
## F-statistic: 18.9 on 10 and 6143 DF, p-value: < 2.2e-16
Combine model 1 to model 5, output the results to a word file and label the names of independent variables to make readers easy to understand.
htmlreg(list(model1, model2, model3, model4, model5), include.ci = FALSE,
custom.coef.names = c("Intercept",
"Age",
"Female",
"Married",
"Divorced",
"Have kids",
"Female x married",
"Female x divorced",
"Female x have kids",
"Married x have kids",
"Divorced x have kids"),
caption = "My regression table",
caption.above = TRUE,
single.row = TRUE,
digits = 3,
file = "Regression.doc")
## The table was written to the file 'Regression.doc'.
Save the result of Model 5 as a dataset
result_m5<- model5 %>% tidy()
result_m5
## term estimate std.error statistic
## 1 (Intercept) 8.51607485 0.102971268 82.7034085
## 2 age -0.04335266 0.004681201 -9.2610107
## 3 sex_gen2 Female 0.04441926 0.063007202 0.7049870
## 4 maritalMarried 0.66538729 0.162694340 4.0897999
## 5 maritalDivorced -0.58941660 0.436013369 -1.3518315
## 6 parenthoodHave kids -0.31183846 0.175935007 -1.7724640
## 7 sex_gen2 Female:maritalMarried -0.24831929 0.175795053 -1.4125499
## 8 sex_gen2 Female:maritalDivorced 0.08666231 0.365844524 0.2368829
## 9 sex_gen2 Female:parenthoodHave kids 0.24012529 0.176602963 1.3596901
## 10 maritalMarried:parenthoodHave kids 0.41783963 0.177308234 2.3565720
## 11 maritalDivorced:parenthoodHave kids 0.13458419 0.438904344 0.3066367
## p.value conf.low conf.high df outcome
## 1 0.000000e+00 8.31421510 8.71793460 6143 sat6
## 2 2.743123e-20 -0.05252945 -0.03417586 6143 sat6
## 3 4.808450e-01 -0.07909692 0.16793544 6143 sat6
## 4 4.372627e-05 0.34644940 0.98432518 6143 sat6
## 5 1.764790e-01 -1.44415551 0.26532231 6143 sat6
## 6 7.636712e-02 -0.65673270 0.03305577 6143 sat6
## 7 1.578388e-01 -0.59293916 0.09630059 6143 sat6
## 8 8.127555e-01 -0.63052108 0.80384571 6143 sat6
## 9 1.739779e-01 -0.10607837 0.58632895 6143 sat6
## 10 1.847571e-02 0.07025339 0.76542587 6143 sat6
## 11 7.591303e-01 -0.72582205 0.99499042 6143 sat6
#or you can have the following code
result_m5<- tidy(model5)
Plot the result in a point chart, and also display the confidence interval of each coefficient
ggplot(data = result_m5,
aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) +
geom_pointrange() + #this is the code to ploty point with confidence interval, but you have to specify ymin and ymax for the CI
coord_flip() +
geom_hline(yintercept = 0, color = "red", lty = "dashed") # yintercept is to make y=0 as the ref. line, lty is to specify the line is a dashed line
to make your graph a nicer one
result_m5$predictor <- c("Intercept",
"Age",
"Female",
"Married",
"Divorced",
"Have kids",
"Female x married",
"Female x divorced",
"Female x have kids",
"Married x have kids",
"Divorced x have kids")
#generate a variable "predictor" in the dataset "result_m5" where the IVs are labelled in a way easy to understand
ggplot(data = result_m5,
aes(x = predictor, y = estimate, ymin = conf.low, ymax = conf.high)) + #now i change x=term to x= predictor
geom_pointrange() + #this is the code to ploty point with confidence interval, but you have to specify ymin and ymax for the CI
coord_flip() +
geom_hline(yintercept = 0, color = "red", lty = "dashed") # yintercept is to make y=0 as the ref. line, lty is to specify the line is a dahed line
or you may not want to show the intercept
result_m5$predictor <- c("Intercept",
"Age",
"Female",
"Married",
"Divorced",
"Have kids",
"Female x married",
"Female x divorced",
"Female x have kids",
"Married x have kids",
"Divorced x have kids")
result_m5 <- filter(result_m5, predictor!="Intercept")
#generate a variable "predictor" in the dataset "result_m5" where the IVs are labelled in a way easy to understand
ggplot(data = result_m5,
aes(x = predictor, y = estimate, ymin = conf.low, ymax = conf.high)) + #now i change x=term to x= predictor
geom_pointrange() + #this is the code to plot point with confidence interval, but you have to specify ymin and ymax for the CI
coord_flip() +
geom_hline(yintercept = 0, color = "red", lty = "dashed") # yintercept is to make y=0 as the ref. line, lty is to specify the line is a dahed line