Final Paper R code

Author

Jingyi Yang

#Prepare

library(haven)
library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.2     ✔ stringr   1.5.1
✔ lubridate 1.9.4     ✔ tibble    3.2.1
✔ purrr     1.0.4     ✔ tidyr     1.3.1

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(semPlot)
library(lavaan)

This is lavaan 0.6-19
lavaan is FREE software! Please report any bugs.

library(psych)


Attaching package: 'psych'

The following object is masked from 'package:lavaan':

    cor2cov

The following objects are masked from 'package:ggplot2':

    %+%, alpha

library(skimr) 
library(corrplot)

corrplot 0.95 loaded

library(patchwork) #Merge GGPlots together 
library(ggplot2) #Graphing
library(jtools) #Tabular Regression Results
library(descr) #Easy Frequency Tables  
library(stats) #Imports survey data 
library(ggeffects) #Predicted Probabilities from Regressions
library(nnet) #For multinomial models
library(MASS) #For ordered models


Attaching package: 'MASS'

The following object is masked from 'package:patchwork':

    area

The following object is masked from 'package:dplyr':

    select

library(brant) #Test parallel regression assumption 
library(boot) #Create CIs for Multinomial Modeling


Attaching package: 'boot'

The following object is masked from 'package:psych':

    logit

library(cem) #Coarsened Exact Matching

Loading required package: tcltk
Loading required package: lattice

Attaching package: 'lattice'

The following object is masked from 'package:boot':

    melanoma


How to use CEM? Type vignette("cem")

library(MatchIt)  #Coarsened Exact Matching 
library(stargazer)  #Regression Output


Please cite as: 

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

library(jtools)  #Regression Output
library(WeightIt) #Entropy Balancing
library(Hmisc) #General Modeling


Attaching package: 'Hmisc'

The following object is masked from 'package:jtools':

    %nin%

The following object is masked from 'package:psych':

    describe

The following objects are masked from 'package:dplyr':

    src, summarize

The following objects are masked from 'package:base':

    format.pval, units

library(ebal) #Entropy Balancing

##
## ebal Package: Implements Entropy Balancing.

## See http://www.stanford.edu/~jhain/ for additional information.

library(survey) #Applying EB weights

Loading required package: grid
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

Loading required package: survival

Attaching package: 'survival'

The following object is masked from 'package:boot':

    aml


Attaching package: 'survey'

The following object is masked from 'package:Hmisc':

    deff

The following object is masked from 'package:WeightIt':

    calibrate

The following object is masked from 'package:graphics':

    dotchart

Data Import

GSS2022 <- read_dta("研一下/DACSS 790Q/Final Project/2022/GSS2022.dta")
head(GSS2022)

# A tibble: 6 × 1,185
  year         id wrkstat hrs1        hrs2        evwork      wrkslf  occ10     
  <dbl+lbl> <dbl> <dbl+l> <dbl+lbl>   <dbl+lbl>   <dbl+lbl>   <dbl+l> <dbl+lbl> 
1 2022          1 1 [wor…    40       NA(i) [iap] NA(i) [iap] 2 [som…  430 [man…
2 2022          2 5 [ret… NA(i) [iap] NA(i) [iap]     1 [yes] 2 [som…   50 [mar…
3 2022          3 1 [wor…    52       NA(i) [iap] NA(i) [iap] 2 [som… 4610 [per…
4 2022          4 3 [wit… NA(i) [iap]    25       NA(i) [iap] 2 [som… 4120 [foo…
5 2022          5 8 [oth… NA(i) [iap] NA(i) [iap]     1 [yes] 2 [som… 7330 [ind…
6 2022          6 1 [wor…    50       NA(i) [iap] NA(i) [iap] 2 [som… 4610 [per…
# ℹ 1,177 more variables: prestg10 <dbl+lbl>, prestg105plus <dbl+lbl>,
#   indus10 <dbl+lbl>, marital <dbl+lbl>, martype <dbl+lbl>, divorce <dbl+lbl>,
#   widowed <dbl+lbl>, spwrksta <dbl+lbl>, sphrs1 <dbl+lbl>, sphrs2 <dbl+lbl>,
#   spevwork <dbl+lbl>, cowrksta <dbl+lbl>, coevwork <dbl+lbl>,
#   cohrs1 <dbl+lbl>, cohrs2 <dbl+lbl>, spwrkslf <dbl+lbl>, sppres80 <dbl+lbl>,
#   spocc10 <dbl+lbl>, sppres10 <dbl+lbl>, sppres105plus <dbl+lbl>,
#   spind10 <dbl+lbl>, coocc10 <dbl+lbl>, coind10 <dbl+lbl>, …

Setting data set

# Change age to age group! 
age_breaks <- c(18, 30, 50, 65, 100)

GSS2022$age_group <- cut(GSS2022$age, 
                      age_breaks, labels = c("18-29", "30-49", "50-64", "65+"), 
                      include.lowest = TRUE) 
                      
GSS2022<- GSS2022 %>% mutate(age_group= recode(age_group,"18-29"="1", "30-49"="2", "50-64"="3", "65+"="4"))

test_data <- GSS2022 %>% dplyr::select(c(wkvsfam, famvswk, physhlth, mntlhlth, marital,hompop_exp, age_group, degree, income, race, sex, wrkstat, wtssps))%>% mutate(wkvsfam= as.numeric(wkvsfam),famvswk= as.numeric(famvswk), physhlth= as.numeric(physhlth), mntlhlth= as.numeric(mntlhlth), marital= as.numeric(marital), hompop_exp= as.numeric(hompop_exp), age_group= as.numeric(age_group), degree= as.numeric(degree), income= as.numeric(income), race= as.numeric(race), sex= as.numeric(sex), wrkstat=as.numeric(wrkstat), wtssps=as.numeric(wtssps))

new_names <- c("job_interrupt_the_family", "family_interrupt_the_job", "physical_health_status", "mental_health_status", "marriage_status", "total_people_in_household", "age_group","education", "family_income", "race", "sex", "working_status", "weight" )

colnames(test_data) <- new_names #Apply new names to your data frame
skim(test_data)

Data summary
Name	test_data
Number of rows	4149
Number of columns	13
_______________________
Column type frequency:
numeric	13
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
job_interrupt_the_family	1767	0.57	2.69	0.93	1.00	2.00	3.00	3.00	4.00	▂▇▁▇▅
family_interrupt_the_job	1779	0.57	2.94	0.86	1.00	2.00	3.00	4.00	4.00	▁▆▁▇▆
physical_health_status	1844	0.56	2.67	6.11	0.00	0.00	0.00	2.00	30.00	▇▁▁▁▁
mental_health_status	1870	0.55	4.53	7.93	0.00	0.00	0.00	5.00	30.00	▇▁▁▁▁
marriage_status	16	1.00	2.77	1.74	1.00	1.00	3.00	5.00	5.00	▇▁▃▁▆
total_people_in_household	8	1.00	1.75	1.67	0.00	1.00	1.00	2.00	49.00	▇▁▁▁▁
age_group	256	0.94	2.46	1.03	1.00	2.00	2.00	3.00	4.00	▅▇▁▅▅
education	2	1.00	1.82	1.27	0.00	1.00	1.00	3.00	4.00	▂▇▂▃▂
family_income	484	0.88	11.22	2.07	1.00	12.00	12.00	12.00	12.00	▁▁▁▁▇
race	64	0.98	1.51	0.76	1.00	1.00	1.00	2.00	3.00	▇▁▂▁▂
sex	23	0.99	1.54	0.50	1.00	1.00	2.00	2.00	2.00	▇▁▁▁▇
working_status	9	1.00	3.07	2.34	1.00	1.00	2.00	5.00	8.00	▇▁▃▁▂
weight	605	0.85	1.00	1.26	0.07	0.34	0.58	1.06	14.27	▇▁▁▁▁

# Reverse Coding
final_data <- test_data %>%
  mutate(
    job_interrupt_the_family = case_when(
      job_interrupt_the_family == 1 ~ 4,
      job_interrupt_the_family == 2 ~ 3,
      job_interrupt_the_family == 3 ~ 2,
      job_interrupt_the_family == 4 ~ 1,
      job_interrupt_the_family == "NA"~ NA_real_),
    job_interrupt_the_family = labelled(job_interrupt_the_family, 
      c(`Never` = 1, `Rarely` = 2, `Sometimes` = 3, `Often` = 4))) %>% 
  mutate(family_interrupt_the_job = as.numeric(family_interrupt_the_job),
    family_interrupt_the_job = case_when(
      family_interrupt_the_job == 1 ~ 4,
      family_interrupt_the_job == 2 ~ 3,
      family_interrupt_the_job == 3 ~ 2,
      family_interrupt_the_job == 4 ~ 1,
      family_interrupt_the_job == "NA"~ NA_real_),
    family_interrupt_the_job = labelled(family_interrupt_the_job, 
      c(`Never` = 1, `Rarely` = 2, `Sometimes` = 3, `Often` = 4))) %>%
  mutate(marriage_status= case_when(
    marriage_status== 1 ~ 5, 
    marriage_status== 2 ~ 4,
    marriage_status== 3 ~ 3,
    marriage_status== 4 ~ 2,
    marriage_status== 5 ~ 1,
    marriage_status== "NA"~ NA_real_),
    marriage_status=labelled(marriage_status, c(`Never married`=1, `Separated`=2, `Divorced`=3, `Widowed`=4, `Married`=5))) %>%
  mutate(age_group= case_when(
    age_group== 1 ~ 1,
    age_group== 2 ~ 2, 
    age_group== 3 ~ 3,
    age_group== 4 ~ 4,
    age_group== "NA"~ NA_real_),
    age_group=labelled(age_group, c(`18-29 YEARS OLD`=1, `30-49 YEARS OLD`=2, `50-64 YEARS OLD`=3, `64 YEARS OLD OR OVER`=4)))%>% 
  mutate(education= case_when(
    education== 0 ~ 1,
    education== 1 ~ 2,
    education== 2 ~ 3, 
    education== 3 ~ 4, 
    education== 4 ~ 5,
    education== "NA"~ NA_real_),
    education= labelled(education, c(`Less THAN HIGH SCHOOL`= 1, `HIGH SCHOOL`= 2, `ASSOCIATE/JUNIOR COLLEGE`=3, `BACHELORS`=4, `GRADUATE`= 5)))%>%
  mutate(family_income= case_when(
    family_income== 1 ~ 1, 
    family_income== 2 ~ 2, 
    family_income== 3 ~ 3, 
    family_income== 4 ~ 4, 
    family_income== 5 ~ 5, 
    family_income== 6 ~ 6, 
    family_income== 7 ~ 7, 
    family_income== 8 ~ 8, 
    family_income== 9 ~ 9,
    family_income== 10 ~ 10, 
    family_income== 11 ~ 11, 
    family_income== 12 ~ 12,
    family_income== "NA"~ NA_real_),
    family_income= labelled(family_income, c(`UNDER$1,000`= 1,`$1,000 TO $2,999`= 2, `$3,000 TO $3,999`=3, `$4,000 TO $4,999`=4, `$5,000 TOlibra $5,999`= 5, `$6,000 TO $6,999`=6, `$7,000 TO $7,999`=7, `$8,000 TO $9,999`=8, `$10,000 TO $14,999`= 9, `$15,000 TO $19,999`= 10,`$20,000 TO $24,999`= 11,  `$25,000 OR MORE`= 12))) %>%
  mutate(race= case_when(
    race== 1 ~ 1, 
    race== 2 ~ 2,
    race== 3 ~ 3,
    race== "NA"~ NA_real_),
  race= labelled(race, c(`WHITE`= 1, `BLACK`= 2, `OTHER`=3))) %>%
  mutate(sex= case_when(
    sex== 1 ~ 1, 
    sex== 2 ~ 2,
    sex== "NA"~ NA_real_),
    sex= labelled(sex, c(`MALE`=1, `FEMALE`=2)))%>%
  mutate(total_people_in_household = case_when(
    total_people_in_household == 0 ~ 0,
    total_people_in_household == 1 ~ 1,
    total_people_in_household == 2 ~ 2,
    total_people_in_household == 3 ~ 3,
    total_people_in_household == 4 ~ 4,
    total_people_in_household == 5 ~ 5,
    total_people_in_household == 6 ~ 6,
    total_people_in_household == 7 ~ 7,
    total_people_in_household >= 8 ~ 8,
    total_people_in_household == "NA"~ NA_real_),
    total_people_in_household = labelled(total_people_in_household, 
      c(`NONE`=0, `1`=1, `2`=2, `3`=3, `4`=4, `5`=5, `6`=6, `7`=7, `8+`=8)))%>%
  mutate(working_status = case_when(
    working_status == 1 ~ 3,
    working_status == 2 ~ 2,
    working_status == 3 ~ 1,
    working_status == 4 ~ 1,
    working_status == 5 ~ 1,
    working_status == 6 ~ 1,
    working_status == 7 ~ 1,
    working_status == 8 ~ 1,
    working_status == "NA"~ NA_real_),
    working_status = labelled(working_status,
      c(`WORKING FULL TIME`=3, `WORKING PART TIME`=2,`OTHER`=1)))
head(final_data)

# A tibble: 6 × 13
  job_interrupt_the_family family_interrupt_the_job physical_health_status
  <dbl+lbl>                <dbl+lbl>                                 <dbl>
1  3 [Sometimes]            2 [Rarely]                                  30
2 NA                       NA                                           NA
3  2 [Rarely]               2 [Rarely]                                   0
4  4 [Often]                1 [Never]                                    4
5 NA                       NA                                           NA
6  4 [Often]                3 [Sometimes]                                0
# ℹ 10 more variables: mental_health_status <dbl>, marriage_status <dbl+lbl>,
#   total_people_in_household <dbl+lbl>, age_group <dbl+lbl>,
#   education <dbl+lbl>, family_income <dbl+lbl>, race <dbl+lbl>,
#   sex <dbl+lbl>, working_status <dbl+lbl>, weight <dbl>

skim(final_data)

Data summary
Name	final_data
Number of rows	4149
Number of columns	13
_______________________
Column type frequency:
numeric	13
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
job_interrupt_the_family	1767	0.57	2.31	0.93	1.00	2.00	2.00	3.00	4.00	▅▇▁▇▂
family_interrupt_the_job	1779	0.57	2.06	0.86	1.00	1.00	2.00	3.00	4.00	▆▇▁▆▁
physical_health_status	1844	0.56	2.67	6.11	0.00	0.00	0.00	2.00	30.00	▇▁▁▁▁
mental_health_status	1870	0.55	4.53	7.93	0.00	0.00	0.00	5.00	30.00	▇▁▁▁▁
marriage_status	16	1.00	3.23	1.74	1.00	1.00	3.00	5.00	5.00	▆▁▃▁▇
total_people_in_household	8	1.00	1.72	1.33	0.00	1.00	1.00	2.00	8.00	▇▂▁▁▁
age_group	256	0.94	2.46	1.03	1.00	2.00	2.00	3.00	4.00	▅▇▁▅▅
education	2	1.00	2.82	1.27	1.00	2.00	2.00	4.00	5.00	▂▇▂▃▂
family_income	484	0.88	11.22	2.07	1.00	12.00	12.00	12.00	12.00	▁▁▁▁▇
race	64	0.98	1.51	0.76	1.00	1.00	1.00	2.00	3.00	▇▁▂▁▂
sex	23	0.99	1.54	0.50	1.00	1.00	2.00	2.00	2.00	▇▁▁▁▇
working_status	9	1.00	2.02	0.95	1.00	1.00	2.00	3.00	3.00	▇▁▂▁▇
weight	605	0.85	1.00	1.26	0.07	0.34	0.58	1.06	14.27	▇▁▁▁▁

Separate two scales

data_physical_health<-final_data%>% dplyr::select(c(job_interrupt_the_family, family_interrupt_the_job, physical_health_status, marriage_status, total_people_in_household,  age_group,education, family_income, race, sex, working_status, weight)) %>% mutate(marriage_status= as.factor(marriage_status))%>% mutate(race= as.factor(race))%>% mutate(working_status= as.factor(working_status))


head(data_physical_health)

# A tibble: 6 × 12
  job_interrupt_the_family family_interrupt_the_job physical_health_status
  <dbl+lbl>                <dbl+lbl>                                 <dbl>
1  3 [Sometimes]            2 [Rarely]                                  30
2 NA                       NA                                           NA
3  2 [Rarely]               2 [Rarely]                                   0
4  4 [Often]                1 [Never]                                    4
5 NA                       NA                                           NA
6  4 [Often]                3 [Sometimes]                                0
# ℹ 9 more variables: marriage_status <fct>,
#   total_people_in_household <dbl+lbl>, age_group <dbl+lbl>,
#   education <dbl+lbl>, family_income <dbl+lbl>, race <fct>, sex <dbl+lbl>,
#   working_status <fct>, weight <dbl>

data_mental_health <- final_data%>% dplyr::select(c(job_interrupt_the_family, family_interrupt_the_job, mental_health_status,marriage_status, total_people_in_household,  age_group,education, family_income, race, sex, working_status, weight))%>% mutate(marriage_status= as.factor(marriage_status)) %>% mutate(race= as.factor(race)) %>% mutate(working_status= as.factor(working_status))

head(data_mental_health)

# A tibble: 6 × 12
  job_interrupt_the_family family_interrupt_the_job mental_health_status
  <dbl+lbl>                <dbl+lbl>                               <dbl>
1  3 [Sometimes]            2 [Rarely]                                15
2 NA                       NA                                         NA
3  2 [Rarely]               2 [Rarely]                                 0
4  4 [Often]                1 [Never]                                 10
5 NA                       NA                                         NA
6  4 [Often]                3 [Sometimes]                             NA
# ℹ 9 more variables: marriage_status <fct>,
#   total_people_in_household <dbl+lbl>, age_group <dbl+lbl>,
#   education <dbl+lbl>, family_income <dbl+lbl>, race <fct>, sex <dbl+lbl>,
#   working_status <fct>, weight <dbl>

Count Dependent Variable Tutorials

Negative-Binomial Model

nb_physical_health<- glm.nb(physical_health_status~job_interrupt_the_family+family_interrupt_the_job+ marriage_status+ total_people_in_household+ factor(age_group)+education+family_income+race+sex+working_status, data=data_physical_health, weights = data_physical_health$weight) 

summary(nb_physical_health)


Call:
glm.nb(formula = physical_health_status ~ job_interrupt_the_family + 
    family_interrupt_the_job + marriage_status + total_people_in_household + 
    factor(age_group) + education + family_income + race + sex + 
    working_status, data = data_physical_health, weights = data_physical_health$weight, 
    init.theta = 0.2066739091, link = log)

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)                1.070116   0.611382   1.750 0.080063 .  
job_interrupt_the_family   0.046956   0.071397   0.658 0.510746    
family_interrupt_the_job   0.150010   0.076790   1.954 0.050758 .  
marriage_status2          -0.305971   0.466022  -0.657 0.511464    
marriage_status3          -0.114493   0.211822  -0.541 0.588841    
marriage_status4           0.419432   0.403927   1.038 0.299090    
marriage_status5          -0.532134   0.151580  -3.511 0.000447 ***
total_people_in_household  0.035484   0.037679   0.942 0.346327    
factor(age_group)2         0.420183   0.163890   2.564 0.010353 *  
factor(age_group)3         0.597448   0.188429   3.171 0.001521 ** 
factor(age_group)4         0.161875   0.293786   0.551 0.581636    
education                 -0.088090   0.048420  -1.819 0.068866 .  
family_income             -0.006117   0.041862  -0.146 0.883831    
race2                     -0.869784   0.182331  -4.770 1.84e-06 ***
race3                     -0.380102   0.184256  -2.063 0.039122 *  
sex                        0.480984   0.116805   4.118 3.82e-05 ***
working_status2           -0.704522   0.289847  -2.431 0.015071 *  
working_status3           -1.188900   0.263378  -4.514 6.36e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(0.2067) family taken to be 1)

    Null deviance: 1431.4  on 1663  degrees of freedom
Residual deviance: 1322.8  on 1646  degrees of freedom
  (2485 observations deleted due to missingness)
AIC: 5950.4

Number of Fisher Scoring iterations: 1

              Theta:  0.2067 
          Std. Err.:  0.0108 

 2 x log-likelihood:  -5912.4340

nb_mental_health<- glm.nb(mental_health_status~job_interrupt_the_family+family_interrupt_the_job+ marriage_status+ total_people_in_household+ factor(age_group)+education+family_income+race+sex+working_status, data=data_mental_health, weights = data_mental_health$weight) 

summary(nb_mental_health)


Call:
glm.nb(formula = mental_health_status ~ job_interrupt_the_family + 
    family_interrupt_the_job + marriage_status + total_people_in_household + 
    factor(age_group) + education + family_income + race + sex + 
    working_status, data = data_mental_health, weights = data_mental_health$weight, 
    init.theta = 0.2967852665, link = log)

Coefficients:
                          Estimate Std. Error z value Pr(>|z|)    
(Intercept)                1.86665    0.49971   3.735 0.000187 ***
job_interrupt_the_family   0.30549    0.06046   5.053 4.35e-07 ***
family_interrupt_the_job   0.14536    0.06508   2.234 0.025503 *  
marriage_status2          -0.67765    0.39241  -1.727 0.084186 .  
marriage_status3           0.02496    0.17657   0.141 0.887600    
marriage_status4          -0.01392    0.34390  -0.040 0.967709    
marriage_status5          -0.57128    0.12572  -4.544 5.52e-06 ***
total_people_in_household  0.04766    0.03118   1.528 0.126400    
factor(age_group)2        -0.25762    0.13466  -1.913 0.055720 .  
factor(age_group)3        -0.68757    0.15633  -4.398 1.09e-05 ***
factor(age_group)4        -1.42375    0.25834  -5.511 3.57e-08 ***
education                 -0.14462    0.04057  -3.565 0.000364 ***
family_income             -0.10208    0.03339  -3.057 0.002233 ** 
race2                     -0.78152    0.14905  -5.243 1.58e-07 ***
race3                     -0.60170    0.15473  -3.889 0.000101 ***
sex                        0.61570    0.09770   6.302 2.94e-10 ***
working_status2           -0.15969    0.24722  -0.646 0.518323    
working_status3           -0.12411    0.22416  -0.554 0.579828    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(0.2968) family taken to be 1)

    Null deviance: 1783.3  on 1655  degrees of freedom
Residual deviance: 1570.7  on 1638  degrees of freedom
  (2493 observations deleted due to missingness)
AIC: 7574.4

Number of Fisher Scoring iterations: 1

              Theta:  0.2968 
          Std. Err.:  0.0141 

 2 x log-likelihood:  -7536.4160

Basic Poisson Model

pois_physical_health <- glm(physical_health_status~job_interrupt_the_family+family_interrupt_the_job+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=data_physical_health, family = poisson, weights = data_physical_health$weight)
summary(pois_physical_health)


Call:
glm(formula = physical_health_status ~ job_interrupt_the_family + 
    family_interrupt_the_job + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = poisson, data = data_physical_health, weights = data_physical_health$weight)

Coefficients:
                           Estimate Std. Error z value Pr(>|z|)    
(Intercept)                1.287148   0.153912   8.363  < 2e-16 ***
job_interrupt_the_family   0.034496   0.019110   1.805  0.07105 .  
family_interrupt_the_job   0.136809   0.020105   6.805 1.01e-11 ***
marriage_status2          -0.429247   0.147328  -2.914  0.00357 ** 
marriage_status3          -0.027718   0.051398  -0.539  0.58969    
marriage_status4           0.631936   0.077712   8.132 4.23e-16 ***
marriage_status5          -0.345389   0.040021  -8.630  < 2e-16 ***
total_people_in_household  0.053797   0.009884   5.443 5.24e-08 ***
age_group                  0.100197   0.020701   4.840 1.30e-06 ***
education                 -0.112181   0.013355  -8.400  < 2e-16 ***
family_income             -0.006640   0.010666  -0.623  0.53359    
race2                     -0.785702   0.061160 -12.847  < 2e-16 ***
race3                     -0.380280   0.055435  -6.860 6.89e-12 ***
sex                        0.368993   0.031925  11.558  < 2e-16 ***
working_status2           -0.720800   0.054434 -13.242  < 2e-16 ***
working_status3           -1.068241   0.047625 -22.430  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 13049  on 1663  degrees of freedom
Residual deviance: 11681  on 1648  degrees of freedom
  (2485 observations deleted due to missingness)
AIC: 13900

Number of Fisher Scoring iterations: 6

pois_mental_health <- glm(mental_health_status~job_interrupt_the_family+family_interrupt_the_job+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=data_mental_health, family = poisson, weights = data_mental_health$weight)
summary(pois_mental_health)


Call:
glm(formula = mental_health_status ~ job_interrupt_the_family + 
    family_interrupt_the_job + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = poisson, data = data_mental_health, weights = data_mental_health$weight)

Coefficients:
                          Estimate Std. Error z value Pr(>|z|)    
(Intercept)                1.86773    0.10756  17.364  < 2e-16 ***
job_interrupt_the_family   0.29156    0.01455  20.034  < 2e-16 ***
family_interrupt_the_job   0.10048    0.01545   6.503 7.87e-11 ***
marriage_status2          -0.52643    0.11076  -4.753 2.01e-06 ***
marriage_status3           0.08600    0.04142   2.076   0.0379 *  
marriage_status4           0.55237    0.07107   7.772 7.70e-15 ***
marriage_status5          -0.26321    0.02902  -9.071  < 2e-16 ***
total_people_in_household  0.01614    0.00731   2.208   0.0272 *  
age_group                 -0.36941    0.01738 -21.260  < 2e-16 ***
education                 -0.12536    0.01030 -12.168  < 2e-16 ***
family_income             -0.07043    0.00620 -11.360  < 2e-16 ***
race2                     -0.52015    0.03946 -13.183  < 2e-16 ***
race3                     -0.42281    0.04238  -9.978  < 2e-16 ***
sex                        0.56890    0.02454  23.186  < 2e-16 ***
working_status2           -0.03299    0.05354  -0.616   0.5377    
working_status3           -0.09345    0.04815  -1.941   0.0523 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 16817  on 1655  degrees of freedom
Residual deviance: 13943  on 1640  degrees of freedom
  (2493 observations deleted due to missingness)
AIC: 17023

Number of Fisher Scoring iterations: 6

Quasi-Poisson Model

quasi_physical_health <-  glm(physical_health_status~job_interrupt_the_family+family_interrupt_the_job+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status,family = quasipoisson, data=data_physical_health, weights = data_physical_health$weight)
summary(quasi_physical_health)


Call:
glm(formula = physical_health_status ~ job_interrupt_the_family + 
    family_interrupt_the_job + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = quasipoisson, data = data_physical_health, weights = data_physical_health$weight)

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)                1.28715    0.53099   2.424 0.015455 *  
job_interrupt_the_family   0.03450    0.06593   0.523 0.600875    
family_interrupt_the_job   0.13681    0.06936   1.972 0.048729 *  
marriage_status2          -0.42925    0.50828  -0.845 0.398505    
marriage_status3          -0.02772    0.17732  -0.156 0.875802    
marriage_status4           0.63194    0.26810   2.357 0.018537 *  
marriage_status5          -0.34539    0.13807  -2.502 0.012462 *  
total_people_in_household  0.05380    0.03410   1.578 0.114836    
age_group                  0.10020    0.07142   1.403 0.160808    
education                 -0.11218    0.04607  -2.435 0.015006 *  
family_income             -0.00664    0.03680  -0.180 0.856825    
race2                     -0.78570    0.21100  -3.724 0.000203 ***
race3                     -0.38028    0.19125  -1.988 0.046933 *  
sex                        0.36899    0.11014   3.350 0.000826 ***
working_status2           -0.72080    0.18780  -3.838 0.000129 ***
working_status3           -1.06824    0.16430  -6.502 1.05e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasipoisson family taken to be 11.90224)

    Null deviance: 13049  on 1663  degrees of freedom
Residual deviance: 11681  on 1648  degrees of freedom
  (2485 observations deleted due to missingness)
AIC: NA

Number of Fisher Scoring iterations: 6

quasi_mental_health <- glm(mental_health_status~job_interrupt_the_family+family_interrupt_the_job+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, family = quasipoisson, data=data_mental_health, weights = data_mental_health$weight)
summary(quasi_mental_health)


Call:
glm(formula = mental_health_status ~ job_interrupt_the_family + 
    family_interrupt_the_job + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = quasipoisson, data = data_mental_health, weights = data_mental_health$weight)

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)                1.86773    0.36572   5.107 3.65e-07 ***
job_interrupt_the_family   0.29156    0.04948   5.892 4.61e-09 ***
family_interrupt_the_job   0.10048    0.05254   1.913 0.055968 .  
marriage_status2          -0.52643    0.37660  -1.398 0.162350    
marriage_status3           0.08600    0.14084   0.611 0.541569    
marriage_status4           0.55237    0.24163   2.286 0.022382 *  
marriage_status5          -0.26321    0.09866  -2.668 0.007709 ** 
total_people_in_household  0.01614    0.02485   0.649 0.516177    
age_group                 -0.36941    0.05908  -6.253 5.13e-10 ***
education                 -0.12536    0.03503  -3.579 0.000355 ***
family_income             -0.07043    0.02108  -3.341 0.000853 ***
race2                     -0.52015    0.13415  -3.877 0.000110 ***
race3                     -0.42281    0.14408  -2.935 0.003386 ** 
sex                        0.56889    0.08343   6.819 1.28e-11 ***
working_status2           -0.03299    0.18205  -0.181 0.856200    
working_status3           -0.09345    0.16372  -0.571 0.568228    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasipoisson family taken to be 11.56031)

    Null deviance: 16817  on 1655  degrees of freedom
Residual deviance: 13943  on 1640  degrees of freedom
  (2493 observations deleted due to missingness)
AIC: NA

Number of Fisher Scoring iterations: 6

Likelihood Ratio, AIC, BIC

#Create function to compare model fit stats
glm_fit <- function(model) {
  # Calculate Likelihood Ratio
  lr <- logLik(model)
  
  # Calculate AIC
  aic <- AIC(model)
  
  # Calculate BIC
  n <- nobs(model)
  p <- length(coef(model))
  bic <- -2 * logLik(model) + p * log(n)
  
  # Calculate Deviance
  deviance <- summary(model)$deviance
  
  # Return the metrics as a list
  metrics <- data.frame(Likelihood_Ratio = lr, AIC = aic, BIC = bic, Deviance = deviance, Coefficients = p)
  return(metrics)
}

glm_fit(pois_physical_health)

  Likelihood_Ratio      AIC      BIC Deviance Coefficients
1        -6934.083 13900.17 13986.84 11680.73           16

glm_fit(pois_mental_health)

  Likelihood_Ratio      AIC      BIC Deviance Coefficients
1         -8495.61 17023.22 17109.81 13943.12           16

glm_fit(quasi_physical_health)

  Likelihood_Ratio AIC BIC Deviance Coefficients
1               NA  NA  NA 11680.73           16

glm_fit(quasi_mental_health)

  Likelihood_Ratio AIC BIC Deviance Coefficients
1               NA  NA  NA 13943.12           16

glm_fit(nb_physical_health)

  Likelihood_Ratio      AIC      BIC Deviance Coefficients
1        -2956.217 5950.434 6045.939 1322.828           18

glm_fit(nb_mental_health)

  Likelihood_Ratio      AIC      BIC Deviance Coefficients
1        -3768.208 7574.416 7669.834  1570.67           18

Overall Table

stargazer(nb_physical_health, nb_mental_health, type="text",align=TRUE, dep.var.labels=c("Physical Health Status","Mental Health Status"), covariate.labels=c("Job interrupt the family", "Family interrupt the job", "Marriage status:Separated","Marriage status:Divorced", "Marriage status:Widowed","Marriage status:Married","Total people in household", "Age group：30-49 YEARS OLD","Age group：50-64 YEARS OLD", "Age group：64 YEARS OLD OR OVER", "Education", "Family income", "Race:BLACK", "Race:OTHER","Sex", "Working Status:WORKING PART TIME", "Working Status:WORKING FULL TIME" ))


============================================================================
                                             Dependent variable:            
                                 -------------------------------------------
                                 Physical Health Status Mental Health Status
                                          (1)                   (2)         
----------------------------------------------------------------------------
Job interrupt the family                 0.047                0.305***      
                                        (0.071)               (0.060)       
                                                                            
Family interrupt the job                 0.150*               0.145**       
                                        (0.077)               (0.065)       
                                                                            
Marriage status:Separated                -0.306               -0.678*       
                                        (0.466)               (0.392)       
                                                                            
Marriage status:Divorced                 -0.114                0.025        
                                        (0.212)               (0.177)       
                                                                            
Marriage status:Widowed                  0.419                 -0.014       
                                        (0.404)               (0.344)       
                                                                            
Marriage status:Married                -0.532***             -0.571***      
                                        (0.152)               (0.126)       
                                                                            
Total people in household                0.035                 0.048        
                                        (0.038)               (0.031)       
                                                                            
Age group：30-49 YEARS OLD               0.420**               -0.258*       
                                        (0.164)               (0.135)       
                                                                            
Age group：50-64 YEARS OLD               0.597***             -0.688***      
                                        (0.188)               (0.156)       
                                                                            
Age group：64 YEARS OLD OR OVER           0.162               -1.424***      
                                        (0.294)               (0.258)       
                                                                            
Education                               -0.088*              -0.145***      
                                        (0.048)               (0.041)       
                                                                            
Family income                            -0.006              -0.102***      
                                        (0.042)               (0.033)       
                                                                            
Race:BLACK                             -0.870***             -0.782***      
                                        (0.182)               (0.149)       
                                                                            
Race:OTHER                              -0.380**             -0.602***      
                                        (0.184)               (0.155)       
                                                                            
Sex                                     0.481***              0.616***      
                                        (0.117)               (0.098)       
                                                                            
Working Status:WORKING PART TIME        -0.705**               -0.160       
                                        (0.290)               (0.247)       
                                                                            
Working Status:WORKING FULL TIME       -1.189***               -0.124       
                                        (0.263)               (0.224)       
                                                                            
Constant                                 1.070*               1.867***      
                                        (0.611)               (0.500)       
                                                                            
----------------------------------------------------------------------------
Observations                             1,664                 1,656        
Log Likelihood                         -2,957.217            -3,769.208     
theta                               0.207*** (0.011)      0.297*** (0.014)  
Akaike Inf. Crit.                      5,950.434             7,574.416      
============================================================================
Note:                                            *p<0.1; **p<0.05; ***p<0.01

Prediction

physical_pred_j<-ggpredict(nb_physical_health, terms="job_interrupt_the_family")
print(physical_pred_j)

# Predicted counts of physical_health_status

job_interrupt_the_family | Predicted |      95% CI
--------------------------------------------------
                       1 |      6.63 | 3.66, 12.01
                       2 |      6.94 | 3.96, 12.16
                       3 |      7.28 | 4.16, 12.74
                       4 |      7.63 | 4.21, 13.81

Adjusted for:
*  family_interrupt_the_job =  2.11
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.65
*                      race =     1
*                       sex =  1.48
*            working_status =     1

physical_pred_f<-ggpredict(nb_physical_health, terms="family_interrupt_the_job")
print(physical_pred_f)

# Predicted counts of physical_health_status

family_interrupt_the_job | Predicted |      95% CI
--------------------------------------------------
                       1 |      5.98 | 3.37, 10.62
                       2 |      6.95 | 3.98, 12.11
                       3 |      8.07 | 4.53, 14.38
                       4 |      9.38 | 4.97, 17.69

Adjusted for:
*  job_interrupt_the_family =  2.36
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.65
*                      race =     1
*                       sex =  1.48
*            working_status =     1

mental_pred_j<-ggpredict(nb_mental_health, terms="job_interrupt_the_family")
print(mental_pred_j)

# Predicted counts of mental_health_status

job_interrupt_the_family | Predicted |      95% CI
--------------------------------------------------
                       1 |      6.53 | 3.95, 10.78
                       2 |      8.86 | 5.52, 14.22
                       3 |     12.02 | 7.48, 19.31
                       4 |     16.32 | 9.87, 26.98

Adjusted for:
*  family_interrupt_the_job =  2.10
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.66
*                      race =     1
*                       sex =  1.48
*            working_status =     1

mental_pred_f<-ggpredict(nb_mental_health, terms="family_interrupt_the_job")
print(mental_pred_f)

# Predicted counts of mental_health_status

family_interrupt_the_job | Predicted |      95% CI
--------------------------------------------------
                       1 |      8.39 | 5.16, 13.65
                       2 |      9.71 | 6.07, 15.53
                       3 |     11.23 | 6.89, 18.29
                       4 |     12.98 | 7.59, 22.21

Adjusted for:
*  job_interrupt_the_family =  2.35
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.66
*                      race =     1
*                       sex =  1.48
*            working_status =     1

custom_order <- c("Never", "Rarely"," Sometimes", "Often")

ggplot(physical_pred_j, aes(x = x, y = predicted)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7, fill="grey") +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Job interrrupt family", y = "Predicted Count: Physical Health", 
       title = "Predicted Count for job interrupt family- Physical Health", 
       subtitle = "Results from Negative-Binomial Model")+
  scale_x_discrete(limits = custom_order)

ggplot(physical_pred_f, aes(x = x, y = predicted)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7, fill="grey") +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Family interrrupt job", y = "Predicted Count: Physical Health", 
       title = "Predicted Count for family interrupt job- Physical Health", 
       subtitle = "Results from Negative-Binomial Model")+
  scale_x_discrete(limits = custom_order)

ggplot(mental_pred_j, aes(x = x, y = predicted)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7, fill="grey") +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Job interrrupt family", y = "Predicted Count: Mental Health", 
       title = "Predicted Count for job interrupt family- Mental Health", 
       subtitle = "Results from Negative-Binomial Model")+
  scale_x_discrete(limits = custom_order)

ggplot(mental_pred_f, aes(x = x, y = predicted)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7, fill="grey") +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Family interrrupt job", y = "Predicted Count: Mental Health", 
       title = "Predicted Count for family interrupt job- Mental Health", 
       subtitle = "Results from Negative-Binomial Model")+
  scale_x_discrete(limits = custom_order)

Ordered Model

final_data$general_health_status <- as.numeric (GSS2022$health) 


final_data_new <- final_data %>% mutate(general_health_status = case_when(
      general_health_status == 1 ~ 4,
      general_health_status == 2 ~ 3,
      general_health_status == 3 ~ 2,
      general_health_status == 4 ~ 1,
      general_health_status == "NA"~ NA_real_),
      general_health_status = labelled(general_health_status, 
      c(`POOR` = 1, `FAIR` = 2, `GOOD` = 3, `EXCELLET` = 4)))%>% drop_na()

data_health <- final_data_new%>% dplyr::select(c(general_health_status,job_interrupt_the_family, family_interrupt_the_job,marriage_status, total_people_in_household,  age_group,education, family_income, race, sex, working_status)) %>% mutate(marriage_status= as.factor(marriage_status)) %>% mutate(working_status= as.factor(working_status)) %>% mutate(race= as.factor(race))

Work-life conflicts and general health

ordered_health_l<- polr(factor(general_health_status)~job_interrupt_the_family+family_interrupt_the_job+ marriage_status + total_people_in_household+age_group+education+family_income+race+sex+ working_status, data=data_health, na.action = na.exclude,method = "logistic") 

brant(ordered_health_l)

------------------------------------------------------------ 
Test for            X2  df  probability 
------------------------------------------------------------ 
Omnibus             46.38   30  0.03
job_interrupt_the_family    1.53    2   0.46
family_interrupt_the_job    1.44    2   0.49
marriage_status2        1.85    2   0.4
marriage_status3        3.12    2   0.21
marriage_status4        0.3 2   0.86
marriage_status5        6.99    2   0.03
total_people_in_household   0.69    2   0.71
age_group           3.12    2   0.21
education           0.78    2   0.68
family_income           2.06    2   0.36
race2               4.39    2   0.11
race3               0.1 2   0.95
sex             0.11    2   0.94
working_status2     3.49    2   0.17
working_status3     9.03    2   0.01
------------------------------------------------------------ 

H0: Parallel Regression Assumption holds

Warning in brant(ordered_health_l): 73 combinations in table(dv,ivs) do not
occur. Because of that, the test results might be invalid.

summary(ordered_health_l)


Re-fitting to get Hessian

Call:
polr(formula = factor(general_health_status) ~ job_interrupt_the_family + 
    family_interrupt_the_job + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    data = data_health, na.action = na.exclude, method = "logistic")

Coefficients:
                              Value Std. Error t value
job_interrupt_the_family  -0.122948    0.06128 -2.0064
family_interrupt_the_job  -0.144652    0.06570 -2.2017
marriage_status2          -0.161949    0.33224 -0.4874
marriage_status3           0.284050    0.16161  1.7576
marriage_status4           0.310883    0.34954  0.8894
marriage_status5           0.322837    0.12437  2.5959
total_people_in_household  0.006078    0.03583  0.1696
age_group                 -0.098659    0.06762 -1.4590
education                  0.298656    0.04085  7.3118
family_income              0.070874    0.03420  2.0724
race2                     -0.229835    0.14355 -1.6011
race3                     -0.306331    0.15274 -2.0056
sex                       -0.057793    0.09943 -0.5813
working_status2            0.599278    0.25707  2.3312
working_status3            0.474477    0.23386  2.0289

Intercepts:
    Value   Std. Error t value
1|2 -2.5022  0.5168    -4.8415
2|3  0.1830  0.4969     0.3683
3|4  2.8677  0.5022     5.7106

Residual Deviance: 3378.744 
AIC: 3414.744

ordered_health_p<- polr(factor(general_health_status)~job_interrupt_the_family+family_interrupt_the_job+ marriage_status+total_people_in_household+age_group+education+family_income+race+sex+working_status, data=data_health, 
           na.action = na.exclude, method = "probit") 

brant(ordered_health_p)

------------------------------------------------------------ 
Test for            X2  df  probability 
------------------------------------------------------------ 
Omnibus             46.38   30  0.03
job_interrupt_the_family    1.53    2   0.46
family_interrupt_the_job    1.44    2   0.49
marriage_status2        1.85    2   0.4
marriage_status3        3.12    2   0.21
marriage_status4        0.3 2   0.86
marriage_status5        6.99    2   0.03
total_people_in_household   0.69    2   0.71
age_group           3.12    2   0.21
education           0.78    2   0.68
family_income           2.06    2   0.36
race2               4.39    2   0.11
race3               0.1 2   0.95
sex             0.11    2   0.94
working_status2     3.49    2   0.17
working_status3     9.03    2   0.01
------------------------------------------------------------ 

H0: Parallel Regression Assumption holds

Warning in brant(ordered_health_p): 73 combinations in table(dv,ivs) do not
occur. Because of that, the test results might be invalid.

summary(ordered_health_p)


Re-fitting to get Hessian

Call:
polr(formula = factor(general_health_status) ~ job_interrupt_the_family + 
    family_interrupt_the_job + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    data = data_health, na.action = na.exclude, method = "probit")

Coefficients:
                              Value Std. Error t value
job_interrupt_the_family  -0.071025    0.03492 -2.0340
family_interrupt_the_job  -0.087773    0.03739 -2.3475
marriage_status2          -0.079601    0.19586 -0.4064
marriage_status3           0.166165    0.09127  1.8207
marriage_status4           0.170738    0.18981  0.8995
marriage_status5           0.196997    0.07044  2.7967
total_people_in_household  0.005202    0.02046  0.2543
age_group                 -0.063929    0.03847 -1.6617
education                  0.172750    0.02323  7.4350
family_income              0.038517    0.01933  1.9926
race2                     -0.104945    0.08084 -1.2982
race3                     -0.171684    0.08652 -1.9844
sex                       -0.036818    0.05663 -0.6502
working_status2            0.363096    0.14339  2.5322
working_status3            0.299482    0.12978  2.3076

Intercepts:
    Value   Std. Error t value
1|2 -1.3057  0.2855    -4.5735
2|3  0.0471  0.2818     0.1671
3|4  1.6649  0.2836     5.8711

Residual Deviance: 3376.882 
AIC: 3412.882

ols_health_p<-glm(as.numeric(general_health_status)~job_interrupt_the_family+family_interrupt_the_job+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+ working_status, data=data_health) #PID treated as factor
summary(ols_health_p)


Call:
glm(formula = as.numeric(general_health_status) ~ job_interrupt_the_family + 
    family_interrupt_the_job + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    data = data_health)

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)                2.427030   0.175244  13.849  < 2e-16 ***
job_interrupt_the_family  -0.043814   0.021612  -2.027  0.04279 *  
family_interrupt_the_job  -0.054152   0.023150  -2.339  0.01944 *  
marriage_status2          -0.042986   0.121946  -0.353  0.72451    
marriage_status3           0.105862   0.056482   1.874  0.06108 .  
marriage_status4           0.104097   0.117846   0.883  0.37719    
marriage_status5           0.124820   0.043531   2.867  0.00419 ** 
total_people_in_household  0.003373   0.012665   0.266  0.79004    
age_group                 -0.040001   0.023762  -1.683  0.09249 .  
education                  0.106087   0.014181   7.481  1.2e-13 ***
family_income              0.025330   0.012064   2.100  0.03592 *  
race2                     -0.065996   0.050237  -1.314  0.18913    
race3                     -0.106880   0.053725  -1.989  0.04683 *  
sex                       -0.023005   0.035035  -0.657  0.51152    
working_status2            0.232353   0.089033   2.610  0.00914 ** 
working_status3            0.194280   0.080671   2.408  0.01614 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 0.4714587)

    Null deviance: 820.85  on 1645  degrees of freedom
Residual deviance: 768.48  on 1630  degrees of freedom
AIC: 3451.4

Number of Fisher Scoring iterations: 2

stargazer(ordered_health_l, ordered_health_p, ols_health_p,digits=3, type="text"
          ,title="Results", align=TRUE, dep.var.labels=c("General Health Status","General Health Status"), covariate.labels=c("Job interrupt the family", "Family interrupt the job", "Marriage status:WIDOWED","Marriage status:DIVORCED", "Marriage status:SEPARATED","Marriage status:NEVER MARRIED","Total people in household", "Age group", "Education", "Family income", "Race:BLACK", "Race:OTHER","Sex", "Working Status:WORKING FULL TIME", "Working Status:WORKING PART TIME" ))


Results
=============================================================================
                                             Dependent variable:             
                                 --------------------------------------------
                                 General Health Status  General Health Status
                                   ordered    ordered          normal        
                                  logistic     probit                        
                                     (1)        (2)              (3)         
-----------------------------------------------------------------------------
Job interrupt the family          -0.123**    -0.071**        -0.044**       
                                   (0.061)    (0.035)          (0.022)       
                                                                             
Family interrupt the job          -0.145**    -0.088**        -0.054**       
                                   (0.066)    (0.037)          (0.023)       
                                                                             
Marriage status:WIDOWED            -0.162      -0.080          -0.043        
                                   (0.332)    (0.196)          (0.122)       
                                                                             
Marriage status:DIVORCED           0.284*      0.166*          0.106*        
                                   (0.162)    (0.091)          (0.056)       
                                                                             
Marriage status:SEPARATED           0.311      0.171            0.104        
                                   (0.350)    (0.190)          (0.118)       
                                                                             
Marriage status:NEVER MARRIED     0.323***    0.197***        0.125***       
                                   (0.124)    (0.070)          (0.044)       
                                                                             
Total people in household           0.006      0.005            0.003        
                                   (0.036)    (0.020)          (0.013)       
                                                                             
Age group                          -0.099     -0.064*          -0.040*       
                                   (0.068)    (0.038)          (0.024)       
                                                                             
Education                         0.299***    0.173***        0.106***       
                                   (0.041)    (0.023)          (0.014)       
                                                                             
Family income                      0.071**    0.039**          0.025**       
                                   (0.034)    (0.019)          (0.012)       
                                                                             
Race:BLACK                         -0.230      -0.105          -0.066        
                                   (0.144)    (0.081)          (0.050)       
                                                                             
Race:OTHER                        -0.306**    -0.172**        -0.107**       
                                   (0.153)    (0.087)          (0.054)       
                                                                             
Sex                                -0.058      -0.037          -0.023        
                                   (0.099)    (0.057)          (0.035)       
                                                                             
Working Status:WORKING FULL TIME   0.599**    0.363**         0.232***       
                                   (0.257)    (0.143)          (0.089)       
                                                                             
Working Status:WORKING PART TIME   0.474**    0.299**          0.194**       
                                   (0.234)    (0.130)          (0.081)       
                                                                             
Constant                                                      2.427***       
                                                               (0.175)       
                                                                             
-----------------------------------------------------------------------------
Observations                        1,646      1,646            1,646        
Log Likelihood                                               -1,709.700      
Akaike Inf. Crit.                                             3,451.401      
=============================================================================
Note:                                             *p<0.1; **p<0.05; ***p<0.01

j_health <- ggpredict(ordered_health_l, terms="job_interrupt_the_family")
print(j_health)

# Predicted probabilities of general_health_status

general_health_status: 1

job_interrupt_the_family | Predicted |     95% CI
-------------------------------------------------
                       1 |      0.03 | 0.01, 0.07
                       2 |      0.03 | 0.01, 0.08
                       3 |      0.04 | 0.01, 0.09
                       4 |      0.04 | 0.02, 0.10

general_health_status: 2

job_interrupt_the_family | Predicted |     95% CI
-------------------------------------------------
                       1 |      0.27 | 0.13, 0.48
                       2 |      0.30 | 0.15, 0.51
                       3 |      0.32 | 0.16, 0.54
                       4 |      0.35 | 0.17, 0.58

general_health_status: 3

job_interrupt_the_family | Predicted |     95% CI
-------------------------------------------------
                       1 |      0.56 | 0.34, 0.76
                       2 |      0.55 | 0.33, 0.75
                       3 |      0.53 | 0.31, 0.74
                       4 |      0.52 | 0.29, 0.74

general_health_status: 4

job_interrupt_the_family | Predicted |     95% CI
-------------------------------------------------
                       1 |      0.14 | 0.06, 0.28
                       2 |      0.12 | 0.05, 0.26
                       3 |      0.11 | 0.05, 0.24
                       4 |      0.10 | 0.04, 0.22

Adjusted for:
*  family_interrupt_the_job =  2.06
*           marriage_status =     1
* total_people_in_household =  1.82
*                 age_group =  2.18
*                 education =  3.02
*             family_income = 11.61
*                      race =     1
*                       sex =  1.50
*            working_status =     1

f_health <- ggpredict(ordered_health_l, terms="family_interrupt_the_job")
print(f_health)

# Predicted probabilities of general_health_status

general_health_status: 1

family_interrupt_the_job | Predicted |     95% CI
-------------------------------------------------
                       1 |      0.03 | 0.01, 0.07
                       2 |      0.03 | 0.01, 0.08
                       3 |      0.04 | 0.02, 0.09
                       4 |      0.04 | 0.02, 0.11

general_health_status: 2

family_interrupt_the_job | Predicted |     95% CI
-------------------------------------------------
                       1 |      0.28 | 0.13, 0.49
                       2 |      0.30 | 0.15, 0.52
                       3 |      0.33 | 0.16, 0.56
                       4 |      0.36 | 0.17, 0.60

general_health_status: 3

family_interrupt_the_job | Predicted |     95% CI
-------------------------------------------------
                       1 |      0.56 | 0.34, 0.76
                       2 |      0.54 | 0.32, 0.75
                       3 |      0.53 | 0.30, 0.74
                       4 |      0.50 | 0.28, 0.73

general_health_status: 4

family_interrupt_the_job | Predicted |     95% CI
-------------------------------------------------
                       1 |      0.13 | 0.06, 0.28
                       2 |      0.12 | 0.05, 0.25
                       3 |      0.10 | 0.04, 0.23
                       4 |      0.09 | 0.04, 0.21

Adjusted for:
*  job_interrupt_the_family =  2.34
*           marriage_status =     1
* total_people_in_household =  1.82
*                 age_group =  2.18
*                 education =  3.02
*             family_income = 11.61
*                      race =     1
*                       sex =  1.50
*            working_status =     1

ggplot(j_health, aes(x = response.level, y = predicted, fill = factor(x))) +
  geom_bar(stat = "identity", position = "dodge") +  # Bar plot
  # Add error bars for confidence intervals
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7))+
  theme_minimal(base_size = 13)+
  labs(x = "Response Level: Genral Health Status", y = "Predicted Probability", 
       title = "Predicted Probability about General Health Status with Job interfere the family life")+scale_fill_discrete(labels= c("1"= "NEVER","2"="RARELY","3"="SOMETIMES","4"="OFTEN"))+
  scale_x_discrete(labels= c("1"="POOR" , "2" = "FAIR", "3"= "GOOD" , "4" = "EXCELLET"))+
  guides(fill = guide_legend(title = "Job interfere the family life", nrow=1), color = "none")+
   theme(legend.position = "bottom")

ggplot(f_health, aes(x = response.level, y = predicted, fill = factor(x))) +
  geom_bar(stat = "identity", position = "dodge") +  # Bar plot
  # Add error bars for confidence intervals
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7))+
  theme_minimal(base_size = 13)+
  labs(x = "Response Level: Genral Health Status", y = "Predicted Probability", 
       title = "Predicted Probability about General Health Status with Family life interfere the Job")+scale_fill_discrete(labels= c("1"= "NEVER","2"="RARELY","3"="SOMETIMES","4"="OFTEN"))+
  scale_x_discrete(labels= c("1"="POOR" , "2" = "FAIR", "3"= "GOOD" , "4" = "EXCELLET"))+
  guides(fill = guide_legend(title = "Family life interfere the job", nrow=1), color = "none")+
   theme(legend.position = "bottom")

Matching Model

 test_data_new <- final_data %>%
  mutate(binary_wif=case_when(
  job_interrupt_the_family == 1 ~ 0, 
  job_interrupt_the_family == 2 ~ 0,
  job_interrupt_the_family == 3 ~ 1,
  job_interrupt_the_family == 4 ~ 1),
  binary_wif = labelled(binary_wif, 
      c(`No` = 0, `Yes` = 1)))%>% mutate(binary_fiw=case_when(
  family_interrupt_the_job == 1 ~ 0, 
  family_interrupt_the_job == 2 ~ 0,
  family_interrupt_the_job == 3 ~ 1,
  family_interrupt_the_job == 4 ~ 1),
  binary_fiw = labelled(binary_fiw, 
      c(`No` = 0, `Yes` = 1)))

head(test_data_new)

# A tibble: 6 × 16
  job_interrupt_the_family family_interrupt_the_job physical_health_status
  <dbl+lbl>                <dbl+lbl>                                 <dbl>
1  3 [Sometimes]            2 [Rarely]                                  30
2 NA                       NA                                           NA
3  2 [Rarely]               2 [Rarely]                                   0
4  4 [Often]                1 [Never]                                    4
5 NA                       NA                                           NA
6  4 [Often]                3 [Sometimes]                                0
# ℹ 13 more variables: mental_health_status <dbl>, marriage_status <dbl+lbl>,
#   total_people_in_household <dbl+lbl>, age_group <dbl+lbl>,
#   education <dbl+lbl>, family_income <dbl+lbl>, race <dbl+lbl>,
#   sex <dbl+lbl>, working_status <dbl+lbl>, weight <dbl>,
#   general_health_status <dbl>, binary_wif <dbl+lbl>, binary_fiw <dbl+lbl>

test_data_new <- test_data_new %>% dplyr:: select(- c(weight, general_health_status))%>% mutate(job_interrupt_the_family= as.numeric(job_interrupt_the_family), family_interrupt_the_job= as.numeric(family_interrupt_the_job), physical_health_status= as.numeric(physical_health_status), mental_health_status= as.numeric(mental_health_status),  marriage_status= as.numeric(marriage_status), total_people_in_household= as.numeric(total_people_in_household), age_group=as.numeric(age_group), education= as.numeric(education),family_income= as.numeric(family_income), race= as.numeric(race), sex= as.numeric(sex), working_status = as.numeric(working_status), binary_wif= as.numeric(binary_wif),binary_fiw= as.numeric(binary_fiw)) %>% drop_na()
head(test_data_new)

# A tibble: 6 × 14
  job_interrupt_the_family family_interrupt_the_job physical_health_status
                     <dbl>                    <dbl>                  <dbl>
1                        3                        2                     30
2                        2                        2                      0
3                        4                        1                      4
4                        4                        2                      2
5                        2                        2                      0
6                        3                        4                      5
# ℹ 11 more variables: mental_health_status <dbl>, marriage_status <dbl>,
#   total_people_in_household <dbl>, age_group <dbl>, education <dbl>,
#   family_income <dbl>, race <dbl>, sex <dbl>, working_status <dbl>,
#   binary_wif <dbl>, binary_fiw <dbl>

The relationship between family-job conflict and physical health

test_data_new_wif_p<-test_data_new %>% dplyr::select(-c(job_interrupt_the_family,family_interrupt_the_job,binary_fiw,mental_health_status))

head(test_data_new_wif_p)

# A tibble: 6 × 10
  physical_health_status marriage_status total_people_in_household age_group
                   <dbl>           <dbl>                     <dbl>     <dbl>
1                     30               3                         1         4
2                      0               3                         3         3
3                      4               1                         1         1
4                      2               1                         3         1
5                      0               5                         2         2
6                      5               1                         2         2
# ℹ 6 more variables: education <dbl>, family_income <dbl>, race <dbl>,
#   sex <dbl>, working_status <dbl>, binary_wif <dbl>

##What is the impact of employment training on earnings?
#Testing for Imbalance Between Groups
check_wif_p<-test_data_new_wif_p %>%
  group_by(binary_wif) %>%
  summarise_at(vars(physical_health_status,marriage_status,total_people_in_household,age_group,education,family_income,race,sex, working_status), 
               list(mean = mean,
                    var = var))

round(t(check_wif_p), 3)

                                 [,1]   [,2]
binary_wif                      0.000  1.000
physical_health_status_mean     2.599  2.896
marriage_status_mean            3.140  3.294
total_people_in_household_mean  1.861  1.889
age_group_mean                  2.194  2.112
education_mean                  2.950  3.170
family_income_mean             11.556 11.708
race_mean                       1.534  1.493
sex_mean                        1.504  1.497
working_status_mean             2.692  2.782
physical_health_status_var     39.152 35.366
marriage_status_var             3.220  3.216
total_people_in_household_var   2.170  2.022
age_group_var                   0.736  0.584
education_var                   1.581  1.611
family_income_var               2.493  1.785
race_var                        0.595  0.572
sex_var                         0.250  0.250
working_status_var              0.312  0.272

match_exact_wif_p <- matchit(binary_wif~physical_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
                       method = "exact", data = test_data_new_wif_p)
data_exact_wif_p <- match.data(match_exact_wif_p) #Creates new dataframe that only includes the matched cases
summary(match_exact_wif_p)


Call:
matchit(formula = binary_wif ~ physical_health_status + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_wif_p, 
    method = "exact")

Summary of Balance for All Data:
                          Means Treated Means Control Std. Mean Diff.
physical_health_status           2.8958        2.5986          0.0500
marriage_status                  3.2940        3.1398          0.0860
total_people_in_household        1.8889        1.8611          0.0195
age_group                        2.1123        2.1935         -0.1063
education                        3.1701        2.9498          0.1736
family_income                   11.7083       11.5565          0.1137
race                             1.4931        1.5341         -0.0542
sex                              1.4965        1.5036         -0.0141
working_status                   2.7824        2.6918          0.1737
                          Var. Ratio eCDF Mean eCDF Max
physical_health_status        0.9033    0.0202   0.1027
marriage_status               0.9988    0.0308   0.0528
total_people_in_household     0.9320    0.0095   0.0301
age_group                     0.7941    0.0288   0.0675
education                     1.0185    0.0441   0.0867
family_income                 0.7161    0.0127   0.0511
race                          0.9616    0.0137   0.0289
sex                           1.0003    0.0035   0.0071
working_status                0.8729    0.0313   0.0923

Summary of Balance for Matched Data:
                          Means Treated Means Control Std. Mean Diff.
physical_health_status           0.3833        0.3833               0
marriage_status                  3.4867        3.4867               0
total_people_in_household        1.5967        1.5967               0
age_group                        2.1167        2.1167               0
education                        3.2200        3.2200               0
family_income                   11.9833       11.9833              -0
race                             1.3700        1.3700               0
sex                              1.4100        1.4100               0
working_status                   2.9433        2.9433               0
                          Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
physical_health_status        0.9988         0        0               0
marriage_status               0.9988         0        0               0
total_people_in_household     0.9988         0        0               0
age_group                     0.9988         0        0               0
education                     0.9988         0        0               0
family_income                 0.9988         0        0               0
race                          0.9988         0        0               0
sex                           0.9988         0        0               0
working_status                0.9988         0        0               0

Sample Sizes:
              Control Treated
All           1116.       864
Matched (ESS)  221.64     300
Matched        338.       300
Unmatched      778.       564
Discarded        0.         0

imbalance_exact_wif_p <- imbalance(group = data_exact_wif_p$binary_wif, data = data_exact_wif_p, 
          drop = c("physical_health_status", "binary_wif", "weights",  "subclass")) #With matched data, always add weights and subclass here

Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect

imbalance_exact_wif_p


Multivariate Imbalance Measure: L1=0.241
Percentage of local common support: LCS=100.0%

Univariate Imbalance Measures:

                           statistic   type          L1 min 25% 50% 75% max
marriage_status           2.99165811 (Chi2) 0.052998028  NA  NA  NA  NA  NA
total_people_in_household 2.27090332 (Chi2) 0.039112426  NA  NA  NA  NA  NA
age_group                 3.16844032 (Chi2) 0.032682446  NA  NA  NA  NA  NA
education                 3.85049816 (Chi2) 0.057080868  NA  NA  NA  NA  NA
family_income             0.01433048 (Chi2) 0.000000000  NA  NA  NA  NA  NA
race                      0.59561223 (Chi2) 0.016824458  NA  NA  NA  NA  NA
sex                       0.03181756 (Chi2) 0.010118343  NA  NA  NA  NA  NA
working_status            1.00454425 (Chi2) 0.003708087  NA  NA  NA  NA  NA

###Match Coarsened Exact 
###Perform the matching here with code that resembles most regressions
match_cem_wif_p <- matchit(binary_wif ~physical_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
                       method = "cem", data = test_data_new_wif_p)
data_cem_wif_p <- match.data(match_cem_wif_p) #Creates new dataframe that only includes the matched cases
summary(match_cem_wif_p)


Call:
matchit(formula = binary_wif ~ physical_health_status + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_wif_p, 
    method = "cem")

Summary of Balance for All Data:
                          Means Treated Means Control Std. Mean Diff.
physical_health_status           2.8958        2.5986          0.0500
marriage_status                  3.2940        3.1398          0.0860
total_people_in_household        1.8889        1.8611          0.0195
age_group                        2.1123        2.1935         -0.1063
education                        3.1701        2.9498          0.1736
family_income                   11.7083       11.5565          0.1137
race                             1.4931        1.5341         -0.0542
sex                              1.4965        1.5036         -0.0141
working_status                   2.7824        2.6918          0.1737
                          Var. Ratio eCDF Mean eCDF Max
physical_health_status        0.9033    0.0202   0.1027
marriage_status               0.9988    0.0308   0.0528
total_people_in_household     0.9320    0.0095   0.0301
age_group                     0.7941    0.0288   0.0675
education                     1.0185    0.0441   0.0867
family_income                 0.7161    0.0127   0.0511
race                          0.9616    0.0137   0.0289
sex                           1.0003    0.0035   0.0071
working_status                0.8729    0.0313   0.0923

Summary of Balance for Matched Data:
                          Means Treated Means Control Std. Mean Diff.
physical_health_status           0.6743        0.6372          0.0062
marriage_status                  3.4784        3.4784          0.0000
total_people_in_household        1.5573        1.5573          0.0000
age_group                        2.1527        2.1527          0.0000
education                        3.2952        3.2952          0.0000
family_income                   11.9822       11.9822          0.0000
race                             1.3740        1.3740          0.0000
sex                              1.4173        1.4173          0.0000
working_status                   2.9389        2.9389          0.0000
                          Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
physical_health_status        0.9385    0.0034   0.0319          0.0947
marriage_status               0.9990    0.0000   0.0000          0.0000
total_people_in_household     0.9990    0.0000   0.0000          0.0000
age_group                     0.9990    0.0000   0.0000          0.0000
education                     0.9990    0.0000   0.0000          0.0000
family_income                 0.9990    0.0000   0.0000          0.0000
race                          0.9990    0.0000   0.0000          0.0000
sex                           0.9990    0.0000   0.0000          0.0000
working_status                0.9990    0.0000   0.0000          0.0000

Sample Sizes:
              Control Treated
All           1116.       864
Matched (ESS)  280.57     393
Matched        423.       393
Unmatched      693.       471
Discarded        0.         0

imbalance_cem_wif_p <- imbalance(group = data_cem_wif_p$binary_wif, data = data_cem_wif_p, 
          drop = c("physical_health_status", "binary_wif", "weights",  "subclass"))

Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect

imbalance_cem_wif_p


Multivariate Imbalance Measure: L1=0.241
Percentage of local common support: LCS=100.0%

Univariate Imbalance Measures:

                           statistic   type          L1 min 25% 50% 75% max
marriage_status           4.28087075 (Chi2) 0.061700323  NA  NA  NA  NA  NA
total_people_in_household 1.66414135 (Chi2) 0.024488838  NA  NA  NA  NA  NA
age_group                 4.64491477 (Chi2) 0.043365275  NA  NA  NA  NA  NA
education                 5.13965175 (Chi2) 0.062620685  NA  NA  NA  NA  NA
family_income             0.08215837 (Chi2) 0.000000000  NA  NA  NA  NA  NA
race                      0.87142065 (Chi2) 0.020049447  NA  NA  NA  NA  NA
sex                       0.33132640 (Chi2) 0.022413513  NA  NA  NA  NA  NA
working_status            0.66705767 (Chi2) 0.002724992  NA  NA  NA  NA  NA

#Compare t-tests of DV on treated - no control variables 
t.test(test_data_new_wif_p$physical_health_status, test_data_new_wif_p$binary_wif)


    Welch Two Sample t-test

data:  test_data_new_wif_p$physical_health_status and test_data_new_wif_p$binary_wif
t = 16.599, df = 2005, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 2.021131 2.562707
sample estimates:
mean of x mean of y 
2.7282828 0.4363636

#Estimate Linear Regression on Raw Data
lm1_wif_p<-lm(physical_health_status~ binary_wif+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
        test_data_new_wif_p)
summary(lm1_wif_p)


Call:
lm(formula = physical_health_status ~ binary_wif + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_wif_p)

Residuals:
    Min      1Q  Median      3Q     Max 
-6.1422 -2.7202 -1.8632 -0.0346 28.7303 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)                7.38345    1.40277   5.263 1.57e-07 ***
binary_wif                 0.53314    0.27608   1.931   0.0536 .  
marriage_status           -0.18970    0.08314  -2.282   0.0226 *  
total_people_in_household -0.06786    0.09574  -0.709   0.4785    
age_group                  0.37601    0.17885   2.102   0.0356 *  
education                 -0.16131    0.11075  -1.457   0.1454    
family_income             -0.03139    0.09525  -0.330   0.7417    
race                      -0.36921    0.17847  -2.069   0.0387 *  
sex                        0.54958    0.27581   1.993   0.0464 *  
working_status            -1.60085    0.25550  -6.266 4.55e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 6.032 on 1970 degrees of freedom
Multiple R-squared:  0.03424,   Adjusted R-squared:  0.02983 
F-statistic:  7.76 on 9 and 1970 DF,  p-value: 2.672e-11

#Estimate Linear Regression on Coarsened Exact Matched Data
lm2_wif_p<-lm(physical_health_status~ binary_wif+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
        data_cem_wif_p, weights = weights)
summary(lm2_wif_p)


Call:
lm(formula = physical_health_status ~ binary_wif + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = data_cem_wif_p, weights = weights)

Weighted Residuals:
    Min      1Q  Median      3Q     Max 
-2.3124 -0.7259 -0.4870 -0.0952 19.7971 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)  
(Intercept)               -2.98140    3.50277  -0.851   0.3949  
binary_wif                 0.03713    0.12405   0.299   0.7648  
marriage_status           -0.08665    0.04031  -2.149   0.0319 *
total_people_in_household -0.03662    0.05846  -0.626   0.5312  
age_group                  0.11750    0.09675   1.214   0.2249  
education                  0.07494    0.05144   1.457   0.1456  
family_income              0.18698    0.28533   0.655   0.5124  
race                      -0.15027    0.09078  -1.655   0.0983 .
sex                        0.17660    0.13396   1.318   0.1878  
working_status             0.40588    0.24538   1.654   0.0985 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.771 on 806 degrees of freedom
Multiple R-squared:  0.02066,   Adjusted R-squared:  0.009721 
F-statistic: 1.889 on 9 and 806 DF,  p-value: 0.05037

 #Compare results 
plot_summs(lm1_wif_p, lm2_wif_p)

stargazer(lm1_wif_p, lm2_wif_p, type = "text", digits = 3, 
           dep.var.labels = c("Physical Health Status"),
          column.labels = c("Full Data", "Matched Data"),
          covariate.labels=c("Job interrupt the family(binary)", "Marriage status","Total people in household", "Age group", "Education","Family income","Race", "Sex", "Working Status","Constant"))


=============================================================================
                                             Dependent variable:             
                                 --------------------------------------------
                                            Physical Health Status           
                                        Full Data            Matched Data    
                                           (1)                   (2)         
-----------------------------------------------------------------------------
Job interrupt the family(binary)         0.533*                 0.037        
                                         (0.276)               (0.124)       
                                                                             
Marriage status                         -0.190**               -0.087**      
                                         (0.083)               (0.040)       
                                                                             
Total people in household                -0.068                 -0.037       
                                         (0.096)               (0.058)       
                                                                             
Age group                                0.376**                0.117        
                                         (0.179)               (0.097)       
                                                                             
Education                                -0.161                 0.075        
                                         (0.111)               (0.051)       
                                                                             
Family income                            -0.031                 0.187        
                                         (0.095)               (0.285)       
                                                                             
Race                                    -0.369**               -0.150*       
                                         (0.178)               (0.091)       
                                                                             
Sex                                      0.550**                0.177        
                                         (0.276)               (0.134)       
                                                                             
Working Status                          -1.601***               0.406*       
                                         (0.255)               (0.245)       
                                                                             
Constant                                7.383***                -2.981       
                                         (1.403)               (3.503)       
                                                                             
-----------------------------------------------------------------------------
Observations                              1,980                  816         
R2                                        0.034                 0.021        
Adjusted R2                               0.030                 0.010        
Residual Std. Error                 6.032 (df = 1970)      1.771 (df = 806)  
F Statistic                      7.760*** (df = 9; 1970) 1.889* (df = 9; 806)
=============================================================================
Note:                                             *p<0.1; **p<0.05; ***p<0.01

#######Entropy Balancing
# Create a subset of the dataset with the selected variables
treatment_var_wif_p <- "binary_wif"
covariates_vars_wif_p <- c("marriage_status", "total_people_in_household", "age_group","education", "family_income", "race", "sex", "working_status")
dependent_var_wif_p <- "physical_health_status"

# Prepare treatment and covariates
treatment_wif_p <- test_data_new_wif_p$binary_wif
covariates_wif_p <- test_data_new_wif_p[, covariates_vars_wif_p]

# Run entropy balancing
e_bal_wif_p <- ebalance(
  Treatment = treatment_wif_p,
  X = covariates_wif_p,
  max.iterations = 200,
  constraint.tolerance = 1)

Converged within tolerance

# Add weights back to LL
test_data_new_wif_p$eb_weight_wif_p <- NA
test_data_new_wif_p$eb_weight_wif_p[test_data_new_wif_p[[treatment_var_wif_p]] == 1] <- 1  # Treated units get weight = 1
test_data_new_wif_p$eb_weight_wif_p[test_data_new_wif_p[[treatment_var_wif_p]] == 0] <- e_bal_wif_p$w  # Control units get EB weights

# Final data for regression
eb_data_wif_p <- test_data_new_wif_p %>% filter(!is.na(eb_weight_wif_p))  # Exclude unmatched if any

#data for analysis 
#Now have a weight called 'eb_weight' that can be used in analysis 

##Let's check that the two groups are equal now 
eb_data_wif_p %>%
  group_by(binary_wif) %>%
  summarise(
    age_weighted_mean = wtd.mean(age_group, weights = eb_weight_wif_p), 
    age_weighted_variance = wtd.var(age_group, weights = eb_weight_wif_p)
  )

# A tibble: 2 × 3
  binary_wif age_weighted_mean age_weighted_variance
       <dbl>             <dbl>                 <dbl>
1          0              2.11                 0.676
2          1              2.11                 0.584

check_wif_p_2 <- eb_data_wif_p %>%
  group_by(binary_wif) %>%
  summarise(across(.cols = c(marriage_status, total_people_in_household, age_group,education, family_income, race, sex, working_status),
                   .fns = list(
                     weighted_mean = ~wtd.mean(., weights = eb_weight_wif_p),
                     weighted_variance = ~wtd.var(., weights = eb_weight_wif_p)),
                   .names = "{.col}_{.fn}"),
            .groups = "drop")

check_t<- round(t(check_wif_p_2), 2)

print(check_wif_p_2)

# A tibble: 2 × 17
  binary_wif marriage_status_weighted_mean marriage_status_weighted_variance
       <dbl>                         <dbl>                             <dbl>
1          0                          3.29                              3.24
2          1                          3.29                              3.22
# ℹ 14 more variables: total_people_in_household_weighted_mean <dbl>,
#   total_people_in_household_weighted_variance <dbl>,
#   age_group_weighted_mean <dbl>, age_group_weighted_variance <dbl>,
#   education_weighted_mean <dbl>, education_weighted_variance <dbl>,
#   family_income_weighted_mean <dbl>, family_income_weighted_variance <dbl>,
#   race_weighted_mean <dbl>, race_weighted_variance <dbl>,
#   sex_weighted_mean <dbl>, sex_weighted_variance <dbl>, …

lm3_wif_p_2<-lm(physical_health_status~ binary_wif+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=eb_data_wif_p, weights = eb_weight_wif_p)
lm4_wif_p_2<-lm(physical_health_status~ binary_wif+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=eb_data_wif_p)

stargazer(lm3_wif_p_2, lm4_wif_p_2, type = "text", digits = 3, 
          dep.var.labels = c("Earnings Post Training"),
          column.labels = c("Entropy Balanced", "Raw Data"),
          covariate.labels=c("Job interrupt the family (binary)", "Marriage status","Total people in household", "Age group", "Education","Family income","Race", "Sex", "Working Status" ))


==============================================================
                                      Dependent variable:     
                                  ----------------------------
                                     Earnings Post Training   
                                  Entropy Balanced   Raw Data 
                                         (1)           (2)    
--------------------------------------------------------------
Job interrupt the family (binary)      0.530**        0.533*  
                                       (0.262)       (0.276)  
                                                              
Marriage status                       -0.169**       -0.190** 
                                       (0.080)       (0.083)  
                                                              
Total people in household              -0.098         -0.068  
                                       (0.093)       (0.096)  
                                                              
Age group                              0.346*        0.376**  
                                       (0.178)       (0.179)  
                                                              
Education                              -0.175         -0.161  
                                       (0.106)       (0.111)  
                                                              
Family income                          -0.055         -0.031  
                                       (0.105)       (0.095)  
                                                              
Race                                   -0.312*       -0.369** 
                                       (0.174)       (0.178)  
                                                              
Sex                                    0.666**       0.550**  
                                       (0.268)       (0.276)  
                                                              
Working Status                        -1.386***     -1.601*** 
                                       (0.266)       (0.255)  
                                                              
Constant                              6.899***       7.383*** 
                                       (1.527)       (1.403)  
                                                              
--------------------------------------------------------------
Observations                            1,980         1,980   
R2                                      0.030         0.034   
Adjusted R2                             0.026         0.030   
Residual Std. Error (df = 1970)         5.449         6.032   
F Statistic (df = 9; 1970)            6.803***       7.760*** 
==============================================================
Note:                              *p<0.1; **p<0.05; ***p<0.01

The relationship between family-job conflict and physical health

test_data_new_fiw_p<-test_data_new %>% dplyr::select(-c(job_interrupt_the_family,family_interrupt_the_job,binary_wif,mental_health_status))

head(test_data_new_fiw_p)

# A tibble: 6 × 10
  physical_health_status marriage_status total_people_in_household age_group
                   <dbl>           <dbl>                     <dbl>     <dbl>
1                     30               3                         1         4
2                      0               3                         3         3
3                      4               1                         1         1
4                      2               1                         3         1
5                      0               5                         2         2
6                      5               1                         2         2
# ℹ 6 more variables: education <dbl>, family_income <dbl>, race <dbl>,
#   sex <dbl>, working_status <dbl>, binary_fiw <dbl>

##What is the impact of employment training on earnings?
#Testing for Imbalance Between Groups
check_fiw_p<-test_data_new_fiw_p %>%
  group_by(binary_fiw) %>%
  summarise_at(vars(physical_health_status,marriage_status,total_people_in_household,age_group,education,family_income,race,sex, working_status), 
               list(mean = mean,
                    var = var))

round(t(check_fiw_p), 3)

                                 [,1]   [,2]
binary_fiw                      0.000  1.000
physical_health_status_mean     2.561  3.089
marriage_status_mean            3.094  3.451
total_people_in_household_mean  1.818  1.992
age_group_mean                  2.172  2.129
education_mean                  3.000  3.145
family_income_mean             11.624 11.619
race_mean                       1.508  1.533
sex_mean                        1.496  1.511
working_status_mean             2.728  2.739
physical_health_status_var     37.924 36.464
marriage_status_var             3.224  3.138
total_people_in_household_var   2.050  2.206
age_group_var                   0.723  0.559
education_var                   1.574  1.662
family_income_var               2.167  2.239
race_var                        0.571  0.616
sex_var                         0.250  0.250
working_status_var              0.290  0.311

match_exact_fiw_p <- matchit(binary_fiw ~physical_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
                       method = "exact", data = test_data_new_fiw_p)
data_exact_fiw_p <- match.data(match_exact_fiw_p) #Creates new dataframe that only includes the matched cases
summary(match_exact_fiw_p)


Call:
matchit(formula = binary_fiw ~ physical_health_status + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_fiw_p, 
    method = "exact")

Summary of Balance for All Data:
                          Means Treated Means Control Std. Mean Diff.
physical_health_status           3.0892        2.5607          0.0875
marriage_status                  3.4506        3.0939          0.2014
total_people_in_household        1.9920        1.8180          0.1172
age_group                        2.1290        2.1716         -0.0570
education                        3.1449        3.0000          0.1124
family_income                   11.6194       11.6243         -0.0032
race                             1.5334        1.5081          0.0322
sex                              1.5111        1.4956          0.0312
working_status                   2.7389        2.7278          0.0198
                          Var. Ratio eCDF Mean eCDF Max
physical_health_status        0.9615    0.0272   0.1114
marriage_status               0.9734    0.0713   0.1093
total_people_in_household     1.0761    0.0226   0.0844
age_group                     0.7738    0.0328   0.0642
education                     1.0559    0.0290   0.0637
family_income                 1.0335    0.0039   0.0126
race                          1.0783    0.0084   0.0226
sex                           1.0004    0.0078   0.0156
working_status                1.0733    0.0124   0.0241

Summary of Balance for Matched Data:
                          Means Treated Means Control Std. Mean Diff.
physical_health_status           0.5808        0.5808               0
marriage_status                  3.5633        3.5633               0
total_people_in_household        1.6507        1.6507               0
age_group                        2.1266        2.1266               0
education                        3.1790        3.1790              -0
family_income                   11.9956       11.9956               0
race                             1.4105        1.4105               0
sex                              1.4367        1.4367              -0
working_status                   2.9345        2.9345               0
                          Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
physical_health_status        1.0001         0        0               0
marriage_status               1.0001         0        0               0
total_people_in_household     1.0001         0        0               0
age_group                     1.0001         0        0               0
education                     1.0001         0        0               0
family_income                 1.0001         0        0               0
race                          1.0001         0        0               0
sex                           1.0001         0        0               0
working_status                1.0001         0        0               0

Sample Sizes:
              Control Treated
All           1352.       628
Matched (ESS)  231.73     229
Matched        366.       229
Unmatched      986.       399
Discarded        0.         0

imbalance_exact_fiw_p <- imbalance(group = data_exact_fiw_p$binary_fiw, data = data_exact_fiw_p, 
          drop = c("physical_health_status", "binary_fiw", "weights",  "subclass")) #With matched data, always add weights and subclass here

Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect

imbalance_exact_fiw_p


Multivariate Imbalance Measure: L1=0.282
Percentage of local common support: LCS=100.0%

Univariate Imbalance Measures:

                             statistic   type          L1 min 25% 50% 75% max
marriage_status           2.415448e+00 (Chi2) 0.056732765  NA  NA  NA  NA  NA
total_people_in_household 4.415250e+00 (Chi2) 0.067458897  NA  NA  NA  NA  NA
age_group                 4.658060e-01 (Chi2) 0.024530508  NA  NA  NA  NA  NA
education                 2.729352e+00 (Chi2) 0.026212805  NA  NA  NA  NA  NA
family_income             7.054646e-31 (Chi2) 0.000000000  NA  NA  NA  NA  NA
race                      3.336199e+00 (Chi2) 0.064189754  NA  NA  NA  NA  NA
sex                       5.339079e-02 (Chi2) 0.013183955  NA  NA  NA  NA  NA
working_status            4.527508e-02 (Chi2) 0.001097669  NA  NA  NA  NA  NA

###Match Coarsened Exact 
###Perform the matching here with code that resembles most regressions
match_cem_fiw_p <- matchit(binary_fiw ~physical_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
                       method = "cem", data = test_data_new_fiw_p)
data_cem_fiw_p <- match.data(match_cem_fiw_p) #Creates new dataframe that only includes the matched cases
summary(match_cem_fiw_p)


Call:
matchit(formula = binary_fiw ~ physical_health_status + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_fiw_p, 
    method = "cem")

Summary of Balance for All Data:
                          Means Treated Means Control Std. Mean Diff.
physical_health_status           3.0892        2.5607          0.0875
marriage_status                  3.4506        3.0939          0.2014
total_people_in_household        1.9920        1.8180          0.1172
age_group                        2.1290        2.1716         -0.0570
education                        3.1449        3.0000          0.1124
family_income                   11.6194       11.6243         -0.0032
race                             1.5334        1.5081          0.0322
sex                              1.5111        1.4956          0.0312
working_status                   2.7389        2.7278          0.0198
                          Var. Ratio eCDF Mean eCDF Max
physical_health_status        0.9615    0.0272   0.1114
marriage_status               0.9734    0.0713   0.1093
total_people_in_household     1.0761    0.0226   0.0844
age_group                     0.7738    0.0328   0.0642
education                     1.0559    0.0290   0.0637
family_income                 1.0335    0.0039   0.0126
race                          1.0783    0.0084   0.0226
sex                           1.0004    0.0078   0.0156
working_status                1.0733    0.0124   0.0241

Summary of Balance for Matched Data:
                          Means Treated Means Control Std. Mean Diff.
physical_health_status           0.7649        0.7145          0.0084
marriage_status                  3.5860        3.5860          0.0000
total_people_in_household        1.6772        1.6772          0.0000
age_group                        2.1509        2.1509          0.0000
education                        3.2842        3.2842          0.0000
family_income                   11.9860       11.9860          0.0000
race                             1.4140        1.4140          0.0000
sex                              1.4421        1.4421          0.0000
working_status                   2.9228        2.9228          0.0000
                          Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
physical_health_status        0.9728     0.004   0.0386          0.0989
marriage_status               1.0000     0.000   0.0000          0.0000
total_people_in_household     1.0000     0.000   0.0000          0.0000
age_group                     1.0000     0.000   0.0000          0.0000
education                     1.0000     0.000   0.0000          0.0000
family_income                 1.0000     0.000   0.0000          0.0000
race                          1.0000     0.000   0.0000          0.0000
sex                           1.0000     0.000   0.0000          0.0000
working_status                1.0000     0.000   0.0000          0.0000

Sample Sizes:
              Control Treated
All           1352.       628
Matched (ESS)  283.37     285
Matched        447.       285
Unmatched      905.       343
Discarded        0.         0

imbalance_cem_fiw_p <- imbalance(group = data_cem_fiw_p$binary_fiw, data = data_cem_fiw_p, 
          drop = c("physical_health_status", "binary_fiw", "weights",  "subclass"))

Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect

imbalance_cem_fiw_p


Multivariate Imbalance Measure: L1=0.279
Percentage of local common support: LCS=100.0%

Univariate Imbalance Measures:

                           statistic   type          L1 min 25% 50% 75% max
marriage_status           3.25633286 (Chi2) 0.059578476  NA  NA  NA  NA  NA
total_people_in_household 5.36925857 (Chi2) 0.065418580  NA  NA  NA  NA  NA
age_group                 1.12679662 (Chi2) 0.024278818  NA  NA  NA  NA  NA
education                 4.47009965 (Chi2) 0.044012716  NA  NA  NA  NA  NA
family_income             0.31156120 (Chi2) 0.000000000  NA  NA  NA  NA  NA
race                      2.96910251 (Chi2) 0.049664430  NA  NA  NA  NA  NA
sex                       0.06667579 (Chi2) 0.012575062  NA  NA  NA  NA  NA
working_status            0.46179671 (Chi2) 0.000965501  NA  NA  NA  NA  NA

t.test(test_data_new_fiw_p$physical_health_status, test_data_new_fiw_p$binary_fiw)


    Welch Two Sample t-test

data:  test_data_new_fiw_p$physical_health_status and test_data_new_fiw_p$binary_fiw
t = 17.469, df = 2001.9, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 2.140428 2.681794
sample estimates:
mean of x mean of y 
2.7282828 0.3171717

#Estimate Linear Regression on Raw Data
lm1_fiw_p<-lm(physical_health_status~ binary_fiw+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
        test_data_new_fiw_p)
summary(lm1_fiw_p)


Call:
lm(formula = physical_health_status ~ binary_fiw + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_fiw_p)

Residuals:
   Min     1Q Median     3Q    Max 
-6.294 -2.698 -1.866 -0.119 28.675 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)                7.30610    1.40334   5.206 2.13e-07 ***
binary_fiw                 0.67058    0.29353   2.285   0.0224 *  
marriage_status           -0.20071    0.08341  -2.406   0.0162 *  
total_people_in_household -0.07627    0.09577  -0.796   0.4259    
age_group                  0.37297    0.17859   2.088   0.0369 *  
education                 -0.15660    0.11052  -1.417   0.1566    
family_income             -0.02262    0.09520  -0.238   0.8122    
race                      -0.38502    0.17837  -2.159   0.0310 *  
sex                        0.53774    0.27576   1.950   0.0513 .  
working_status            -1.57141    0.25480  -6.167 8.42e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 6.03 on 1970 degrees of freedom
Multiple R-squared:  0.03497,   Adjusted R-squared:  0.03056 
F-statistic: 7.931 on 9 and 1970 DF,  p-value: 1.365e-11

#Estimate Linear Regression on Coarsened Exact Matched Data
lm2_fiw_p<-lm(physical_health_status~ binary_fiw+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
        data_cem_fiw_p, weights = weights)
summary(lm2_fiw_p)


Call:
lm(formula = physical_health_status ~ binary_fiw + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = data_cem_fiw_p, weights = weights)

Weighted Residuals:
   Min     1Q Median     3Q    Max 
-1.751 -0.840 -0.563 -0.233 36.349 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)  
(Intercept)                1.163985   7.103808   0.164   0.8699  
binary_fiw                 0.050448   0.171168   0.295   0.7683  
marriage_status           -0.005979   0.056705  -0.105   0.9161  
total_people_in_household -0.024675   0.073854  -0.334   0.7384  
age_group                  0.173666   0.131595   1.320   0.1874  
education                 -0.018419   0.069222  -0.266   0.7903  
family_income             -0.157908   0.603973  -0.261   0.7938  
race                      -0.252034   0.119342  -2.112   0.0350 *
sex                        0.025212   0.180941   0.139   0.8892  
working_status             0.517641   0.313302   1.652   0.0989 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.258 on 722 degrees of freedom
Multiple R-squared:  0.01268,   Adjusted R-squared:  0.0003738 
F-statistic:  1.03 on 9 and 722 DF,  p-value: 0.4137

 #Compare results 
plot_summs(lm1_fiw_p, lm2_fiw_p)

stargazer(lm1_fiw_p, lm2_fiw_p, type = "text", digits = 3, 
          dep.var.labels = c("Earnings Post Training"),
          column.labels = c("Full Data", "Matched Data"),
          covariate.labels=c("Family interrupt the job(binary)", "Marriage status","Total people in household", "Age group", "Education","Family Income","Race", "Sex", "Working Status" ))


============================================================================
                                             Dependent variable:            
                                 -------------------------------------------
                                           Earnings Post Training           
                                        Full Data           Matched Data    
                                           (1)                   (2)        
----------------------------------------------------------------------------
Family interrupt the job(binary)         0.671**                0.050       
                                         (0.294)               (0.171)      
                                                                            
Marriage status                         -0.201**               -0.006       
                                         (0.083)               (0.057)      
                                                                            
Total people in household                -0.076                -0.025       
                                         (0.096)               (0.074)      
                                                                            
Age group                                0.373**                0.174       
                                         (0.179)               (0.132)      
                                                                            
Education                                -0.157                -0.018       
                                         (0.111)               (0.069)      
                                                                            
Family Income                            -0.023                -0.158       
                                         (0.095)               (0.604)      
                                                                            
Race                                    -0.385**              -0.252**      
                                         (0.178)               (0.119)      
                                                                            
Sex                                      0.538*                 0.025       
                                         (0.276)               (0.181)      
                                                                            
Working Status                          -1.571***              0.518*       
                                         (0.255)               (0.313)      
                                                                            
Constant                                7.306***                1.164       
                                         (1.403)               (7.104)      
                                                                            
----------------------------------------------------------------------------
Observations                              1,980                  732        
R2                                        0.035                 0.013       
Adjusted R2                               0.031                0.0004       
Residual Std. Error                 6.030 (df = 1970)     2.258 (df = 722)  
F Statistic                      7.931*** (df = 9; 1970) 1.030 (df = 9; 722)
============================================================================
Note:                                            *p<0.1; **p<0.05; ***p<0.01

#######Entropy Balancing
# Create a subset of the dataset with the selected variables
treatment_var_fiw_p <- "binary_fiw"
covariates_vars_fiw_p <- c("marriage_status", "total_people_in_household", "age_group","education", "family_income", "race", "sex", "working_status")
dependent_var_fiw_p <- "physical_health_status"

# Prepare treatment and covariates
treatment_fiw_p <- test_data_new_fiw_p$binary_fiw
covariates_fiw_p <- test_data_new_fiw_p[, covariates_vars_fiw_p]

# Run entropy balancing
e_bal_fiw_p  <- ebalance(
  Treatment = treatment_fiw_p,
  X = covariates_fiw_p,
  max.iterations = 200,
  constraint.tolerance = 1)

Converged within tolerance

test_data_new_fiw_p$eb_weight_fiw_p <- NA
test_data_new_fiw_p$eb_weight_fiw_p[test_data_new_fiw_p[[treatment_var_fiw_p]] == 1] <- 1  # Treated units get weight = 1
test_data_new_fiw_p$eb_weight_fiw_p[test_data_new_fiw_p[[treatment_var_fiw_p]] == 0] <- e_bal_fiw_p$w  # Control units get EB weights
# Final data for regression
eb_data_fiw_p <- test_data_new_fiw_p %>% filter(!is.na(eb_weight_fiw_p))  # Exclude unmatched if any

#data for analysis 
#Now have a weight called 'eb_weight' that can be used in analysis 

##Let's check that the two groups are equal now 
eb_data_fiw_p %>%
  group_by(binary_fiw) %>%
  summarise(
    age_weighted_mean_fiw_p = wtd.mean(age_group, weights = eb_weight_fiw_p), 
    age_weighted_variance_fiw_p = wtd.var(age_group, weights = eb_weight_fiw_p)
  )

# A tibble: 2 × 3
  binary_fiw age_weighted_mean_fiw_p age_weighted_variance_fiw_p
       <dbl>                   <dbl>                       <dbl>
1          0                    2.13                       0.676
2          1                    2.13                       0.559

check_fiw_p_2 <- eb_data_fiw_p %>%
  group_by(binary_fiw) %>%
  summarise(across(.cols = c(marriage_status, total_people_in_household, age_group,education, family_income, race, sex, working_status),
                   .fns = list(
                     weighted_mean_fiw_p = ~wtd.mean(., weights = eb_weight_fiw_p),
                     weighted_variance_fiw_p = ~wtd.var(., weights = eb_weight_fiw_p)),
                   .names = "{.col}_{.fn}"),
            .groups = "drop")

check_t_fiw_p_2<- round(t(check_fiw_p_2), 2)

print(check_t_fiw_p_2)

                                                   [,1]  [,2]
binary_fiw                                         0.00  1.00
marriage_status_weighted_mean_fiw_p                3.45  3.45
marriage_status_weighted_variance_fiw_p            3.12  3.14
total_people_in_household_weighted_mean_fiw_p      1.99  1.99
total_people_in_household_weighted_variance_fiw_p  2.43  2.21
age_group_weighted_mean_fiw_p                      2.13  2.13
age_group_weighted_variance_fiw_p                  0.68  0.56
education_weighted_mean_fiw_p                      3.14  3.14
education_weighted_variance_fiw_p                  1.64  1.66
family_income_weighted_mean_fiw_p                 11.62 11.62
family_income_weighted_variance_fiw_p              2.41  2.24
race_weighted_mean_fiw_p                           1.53  1.53
race_weighted_variance_fiw_p                       0.61  0.62
sex_weighted_mean_fiw_p                            1.51  1.51
sex_weighted_variance_fiw_p                        0.25  0.25
working_status_weighted_mean_fiw_p                 2.74  2.74
working_status_weighted_variance_fiw_p             0.29  0.31

lm3_fiw_p<-lm(physical_health_status~ binary_fiw+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=eb_data_fiw_p, weights = eb_weight_fiw_p)
lm4_fiw_p<-lm(physical_health_status~ binary_fiw+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=eb_data_fiw_p)

stargazer(lm3_fiw_p, lm4_fiw_p, type = "text", digits = 3, 
          dep.var.labels = c("Earnings Post Training"),
          column.labels = c("Entropy Balanced", "Raw Data"),
          covariate.labels=c("Family interrupt the job(binary)", "Marriage status","Total people in household", "Age group", "Education", "Family income", "Race", "Sex", "Working Status" ))


=============================================================
                                     Dependent variable:     
                                 ----------------------------
                                    Earnings Post Training   
                                 Entropy Balanced   Raw Data 
                                        (1)           (2)    
-------------------------------------------------------------
Family interrupt the job(binary)      0.672**       0.671**  
                                      (0.264)       (0.294)  
                                                             
Marriage status                      -0.178**       -0.201** 
                                      (0.082)       (0.083)  
                                                             
Total people in household             -0.091         -0.076  
                                      (0.089)       (0.096)  
                                                             
Age group                              0.175        0.373**  
                                      (0.180)       (0.179)  
                                                             
Education                             -0.154         -0.157  
                                      (0.107)       (0.111)  
                                                             
Family income                         -0.025         -0.023  
                                      (0.090)       (0.095)  
                                                             
Race                                  -0.315*       -0.385** 
                                      (0.170)       (0.178)  
                                                             
Sex                                   0.569**        0.538*  
                                      (0.269)       (0.276)  
                                                             
Working Status                       -1.574***     -1.571*** 
                                      (0.247)       (0.255)  
                                                             
Constant                             7.544***       7.306*** 
                                      (1.362)       (1.403)  
                                                             
-------------------------------------------------------------
Observations                           1,980         1,980   
R2                                     0.037         0.035   
Adjusted R2                            0.032         0.031   
Residual Std. Error (df = 1970)        4.679         6.030   
F Statistic (df = 9; 1970)           8.295***       7.931*** 
=============================================================
Note:                             *p<0.1; **p<0.05; ***p<0.01

The relationship between job-family conflict and mental health

test_data_new_wif_m<-test_data_new %>% dplyr::select(-c(job_interrupt_the_family,family_interrupt_the_job,binary_fiw, physical_health_status))

head(test_data_new_wif_m)

# A tibble: 6 × 10
  mental_health_status marriage_status total_people_in_household age_group
                 <dbl>           <dbl>                     <dbl>     <dbl>
1                   15               3                         1         4
2                    0               3                         3         3
3                   10               1                         1         1
4                   14               1                         3         1
5                    0               5                         2         2
6                    5               1                         2         2
# ℹ 6 more variables: education <dbl>, family_income <dbl>, race <dbl>,
#   sex <dbl>, working_status <dbl>, binary_wif <dbl>

##What is the impact of employment training on earnings?
#Testing for Imbalance Between Groups
check_wif_m<-test_data_new_wif_m %>%
  group_by(binary_wif) %>%
  summarise_at(vars(mental_health_status,marriage_status,total_people_in_household,age_group,education,family_income,race,sex, working_status), 
               list(mean = mean,
                    var = var))

round(t(check_wif_m), 3)

                                 [,1]   [,2]
binary_wif                      0.000  1.000
mental_health_status_mean       3.796  5.648
marriage_status_mean            3.140  3.294
total_people_in_household_mean  1.861  1.889
age_group_mean                  2.194  2.112
education_mean                  2.950  3.170
family_income_mean             11.556 11.708
race_mean                       1.534  1.493
sex_mean                        1.504  1.497
working_status_mean             2.692  2.782
mental_health_status_var       55.808 70.333
marriage_status_var             3.220  3.216
total_people_in_household_var   2.170  2.022
age_group_var                   0.736  0.584
education_var                   1.581  1.611
family_income_var               2.493  1.785
race_var                        0.595  0.572
sex_var                         0.250  0.250
working_status_var              0.312  0.272

match_exact_wif_m <- matchit(binary_wif~mental_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
                       method = "exact", data =test_data_new_wif_m)
data_exact_wif_m <- match.data(match_exact_wif_m) #Creates new dataframe that only includes the matched cases
summary(match_exact_wif_m)


Call:
matchit(formula = binary_wif ~ mental_health_status + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_wif_m, 
    method = "exact")

Summary of Balance for All Data:
                          Means Treated Means Control Std. Mean Diff.
mental_health_status             5.6481        3.7957          0.2209
marriage_status                  3.2940        3.1398          0.0860
total_people_in_household        1.8889        1.8611          0.0195
age_group                        2.1123        2.1935         -0.1063
education                        3.1701        2.9498          0.1736
family_income                   11.7083       11.5565          0.1137
race                             1.4931        1.5341         -0.0542
sex                              1.4965        1.5036         -0.0141
working_status                   2.7824        2.6918          0.1737
                          Var. Ratio eCDF Mean eCDF Max
mental_health_status          1.2603    0.0623   0.1606
marriage_status               0.9988    0.0308   0.0528
total_people_in_household     0.9320    0.0095   0.0301
age_group                     0.7941    0.0288   0.0675
education                     1.0185    0.0441   0.0867
family_income                 0.7161    0.0127   0.0511
race                          0.9616    0.0137   0.0289
sex                           1.0003    0.0035   0.0071
working_status                0.8729    0.0313   0.0923

Summary of Balance for Matched Data:
                          Means Treated Means Control Std. Mean Diff.
mental_health_status             1.8244        1.8244              -0
marriage_status                  3.6298        3.6298               0
total_people_in_household        1.4885        1.4885               0
age_group                        2.2366        2.2366               0
education                        3.3244        3.3244               0
family_income                   12.0000       12.0000               0
race                             1.3588        1.3588               0
sex                              1.3740        1.3740               0
working_status                   2.9580        2.9580              -0
                          Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
mental_health_status          0.9984         0        0               0
marriage_status               0.9984         0        0               0
total_people_in_household     0.9984         0        0               0
age_group                     0.9984         0        0               0
education                     0.9984         0        0               0
family_income                 0.0000         0        0               0
race                          0.9984         0        0               0
sex                           0.9984         0        0               0
working_status                0.9984         0        0               0

Sample Sizes:
              Control Treated
All           1116.       864
Matched (ESS)  186.04     262
Matched        288.       262
Unmatched      828.       602
Discarded        0.         0

imbalance_exact_wif_m <- imbalance(group = data_exact_wif_m$binary_wif, data = data_exact_wif_m, 
          drop = c("mental_health_status", "binary_wif", "weights",  "subclass")) #With matched data, always add weights and subclass here

Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect

imbalance_exact_wif_m


Multivariate Imbalance Measure: L1=0.225
Percentage of local common support: LCS=100.0%

Univariate Imbalance Measures:

                           statistic   type           L1 min 25% 50% 75% max
marriage_status           1.24285997 (Chi2) 0.0402088634  NA  NA  NA  NA  NA
total_people_in_household 2.28837788 (Chi2) 0.0342716285  NA  NA  NA  NA  NA
age_group                 1.53750809 (Chi2) 0.0485581001  NA  NA  NA  NA  NA
education                 8.34406798 (Chi2) 0.1176579729  NA  NA  NA  NA  NA
family_income             1.22909091 (Chi2) 0.0000000000  NA  NA  NA  NA  NA
race                      0.18225258 (Chi2) 0.0140744275  NA  NA  NA  NA  NA
sex                       0.46998088 (Chi2) 0.0322041985  NA  NA  NA  NA  NA
working_status            0.06195664 (Chi2) 0.0003445717  NA  NA  NA  NA  NA

###Match Coarsened Exact 
###Perform the matching here with code that resembles most regressions
match_cem_fiw_m <- matchit(binary_wif ~mental_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
                       method = "cem", data = test_data_new_wif_m)
data_cem_fiw_m <- match.data(match_cem_fiw_m) #Creates new dataframe that only includes the matched cases
summary(match_cem_fiw_m)


Call:
matchit(formula = binary_wif ~ mental_health_status + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_wif_m, 
    method = "cem")

Summary of Balance for All Data:
                          Means Treated Means Control Std. Mean Diff.
mental_health_status             5.6481        3.7957          0.2209
marriage_status                  3.2940        3.1398          0.0860
total_people_in_household        1.8889        1.8611          0.0195
age_group                        2.1123        2.1935         -0.1063
education                        3.1701        2.9498          0.1736
family_income                   11.7083       11.5565          0.1137
race                             1.4931        1.5341         -0.0542
sex                              1.4965        1.5036         -0.0141
working_status                   2.7824        2.6918          0.1737
                          Var. Ratio eCDF Mean eCDF Max
mental_health_status          1.2603    0.0623   0.1606
marriage_status               0.9988    0.0308   0.0528
total_people_in_household     0.9320    0.0095   0.0301
age_group                     0.7941    0.0288   0.0675
education                     1.0185    0.0441   0.0867
family_income                 0.7161    0.0127   0.0511
race                          0.9616    0.0137   0.0289
sex                           1.0003    0.0035   0.0071
working_status                0.8729    0.0313   0.0923

Summary of Balance for Matched Data:
                          Means Treated Means Control Std. Mean Diff.
mental_health_status             1.8585        1.8056          0.0063
marriage_status                  3.6092        3.6092          0.0000
total_people_in_household        1.4985        1.4985         -0.0000
age_group                        2.2492        2.2492          0.0000
education                        3.3385        3.3385          0.0000
family_income                   11.9938       11.9938         -0.0000
race                             1.3692        1.3692          0.0000
sex                              1.3969        1.3969          0.0000
working_status                   2.9385        2.9385          0.0000
                          Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
mental_health_status          0.9977     0.002   0.0291          0.0577
marriage_status               0.9988     0.000   0.0000          0.0000
total_people_in_household     0.9988     0.000   0.0000          0.0000
age_group                     0.9988     0.000   0.0000          0.0000
education                     0.9988     0.000   0.0000          0.0000
family_income                 0.9988     0.000   0.0000          0.0000
race                          0.9988     0.000   0.0000          0.0000
sex                           0.9988     0.000   0.0000          0.0000
working_status                0.9988     0.000   0.0000          0.0000

Sample Sizes:
              Control Treated
All           1116.       864
Matched (ESS)  233.91     325
Matched        344.       325
Unmatched      772.       539
Discarded        0.         0

imbalance_cem_fiw_m <- imbalance(group = data_cem_fiw_m$binary_wif, data = data_cem_fiw_m, 
          drop = c("mental_health_status", "binary_wif", "weights",  "subclass"))

Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect

imbalance_cem_fiw_m


Multivariate Imbalance Measure: L1=0.221
Percentage of local common support: LCS=100.0%

Univariate Imbalance Measures:

                             statistic   type          L1 min 25% 50% 75% max
marriage_status           8.570204e-01 (Chi2) 0.035366726  NA  NA  NA  NA  NA
total_people_in_household 1.630368e+00 (Chi2) 0.025769231  NA  NA  NA  NA  NA
age_group                 2.143303e+00 (Chi2) 0.022611807  NA  NA  NA  NA  NA
education                 9.081761e+00 (Chi2) 0.104436494  NA  NA  NA  NA  NA
family_income             3.113318e-30 (Chi2) 0.000000000  NA  NA  NA  NA  NA
race                      3.612972e-01 (Chi2) 0.018899821  NA  NA  NA  NA  NA
sex                       8.973683e-01 (Chi2) 0.039123435  NA  NA  NA  NA  NA
working_status            7.885286e-01 (Chi2) 0.000509839  NA  NA  NA  NA  NA

#Compare t-tests of DV on treated - no control variables 
t.test(test_data_new_wif_m$mental_health_status, test_data_new_wif_m$binary_wif)


    Welch Two Sample t-test

data:  test_data_new_wif_m$mental_health_status and test_data_new_wif_m$binary_wif
t = 23.327, df = 1994.5, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 3.817287 4.518066
sample estimates:
mean of x mean of y 
4.6040404 0.4363636

#Estimate Linear Regression on Raw Data
lm1_wif_m<-lm(mental_health_status~ binary_wif+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
        test_data_new_wif_m)
summary(lm1_wif_m)


Call:
lm(formula = mental_health_status ~ binary_wif + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_wif_m)

Residuals:
     Min       1Q   Median       3Q      Max 
-11.0442  -4.2887  -2.4698   0.7352  28.7621 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               11.854765   1.772030   6.690 2.90e-11 ***
binary_wif                 1.951673   0.348750   5.596 2.50e-08 ***
marriage_status           -0.304929   0.105031  -2.903 0.003735 ** 
total_people_in_household  0.008186   0.120944   0.068 0.946043    
age_group                 -1.449340   0.225933  -6.415 1.76e-10 ***
education                 -0.527716   0.139907  -3.772 0.000167 ***
family_income             -0.186525   0.120319  -1.550 0.121239    
race                      -0.913479   0.225446  -4.052 5.28e-05 ***
sex                        1.779887   0.348417   5.109 3.56e-07 ***
working_status            -0.557385   0.322752  -1.727 0.084329 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 7.62 on 1970 degrees of freedom
Multiple R-squared:  0.08199,   Adjusted R-squared:  0.07779 
F-statistic: 19.55 on 9 and 1970 DF,  p-value: < 2.2e-16

lm2_wif_m<-lm(mental_health_status~ binary_wif+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
        data_cem_fiw_m, weights = weights)
summary(lm2_wif_m)


Call:
lm(formula = mental_health_status ~ binary_wif + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = data_cem_fiw_m, weights = weights)

Weighted Residuals:
   Min     1Q Median     3Q    Max 
-6.851 -2.065 -1.127  0.214 47.395 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               -22.25821   30.43402  -0.731 0.464820    
binary_wif                  0.05285    0.39241   0.135 0.892905    
marriage_status            -0.06485    0.12761  -0.508 0.611477    
total_people_in_household  -0.41865    0.19613  -2.135 0.033167 *  
age_group                  -1.17613    0.30066  -3.912 0.000101 ***
education                  -0.59794    0.16223  -3.686 0.000247 ***
family_income               2.36965    2.53964   0.933 0.351129    
race                       -1.12192    0.28086  -3.995 7.21e-05 ***
sex                         0.23441    0.43222   0.542 0.587759    
working_status              0.80120    0.73518   1.090 0.276198    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.073 on 659 degrees of freedom
Multiple R-squared:  0.08085,   Adjusted R-squared:  0.06829 
F-statistic:  6.44 on 9 and 659 DF,  p-value: 8.395e-09

 #Compare results 
plot_summs(lm1_wif_m, lm2_wif_m)

stargazer(lm1_wif_m,lm2_wif_m, type = "text", digits = 3, 
          dep.var.labels = c("Mental Healtth Status"),
          column.labels = c("Full Data", "Matched Data"),
          covariate.labels=c("Job interrupt the family (binary)", "Marriage status","Total people in household", "Age group", "Education","Family income","Race", "Sex", "Working Status" ))


=================================================================================
                                                Dependent variable:              
                                  -----------------------------------------------
                                               Mental Healtth Status             
                                         Full Data              Matched Data     
                                            (1)                     (2)          
---------------------------------------------------------------------------------
Job interrupt the family (binary)         1.952***                 0.053         
                                          (0.349)                 (0.392)        
                                                                                 
Marriage status                          -0.305***                 -0.065        
                                          (0.105)                 (0.128)        
                                                                                 
Total people in household                  0.008                  -0.419**       
                                          (0.121)                 (0.196)        
                                                                                 
Age group                                -1.449***               -1.176***       
                                          (0.226)                 (0.301)        
                                                                                 
Education                                -0.528***               -0.598***       
                                          (0.140)                 (0.162)        
                                                                                 
Family income                              -0.187                  2.370         
                                          (0.120)                 (2.540)        
                                                                                 
Race                                     -0.913***               -1.122***       
                                          (0.225)                 (0.281)        
                                                                                 
Sex                                       1.780***                 0.234         
                                          (0.348)                 (0.432)        
                                                                                 
Working Status                            -0.557*                  0.801         
                                          (0.323)                 (0.735)        
                                                                                 
Constant                                 11.855***                -22.258        
                                          (1.772)                 (30.434)       
                                                                                 
---------------------------------------------------------------------------------
Observations                               1,980                    669          
R2                                         0.082                   0.081         
Adjusted R2                                0.078                   0.068         
Residual Std. Error                  7.620 (df = 1970)        5.073 (df = 659)   
F Statistic                       19.549*** (df = 9; 1970) 6.440*** (df = 9; 659)
=================================================================================
Note:                                                 *p<0.1; **p<0.05; ***p<0.01

#######Entropy Balancing
# Create a subset of the dataset with the selected variables
treatment_var_wif_m <- "binary_wif"
covariates_vars_wif_m <- c("marriage_status", "total_people_in_household", "age_group","education", "family_income", "race", "sex", "working_status")
dependent_var_wif_m <- "mental_health_status"

# Prepare treatment and covariates
treatment_wif_m <- test_data_new_wif_m$binary_wif
covariates_wif_m <- test_data_new_wif_m[, covariates_vars_wif_m]

# Run entropy balancing
e_bal_wif_m <- ebalance(
  Treatment = treatment_wif_m,
  X = covariates_wif_m,
  max.iterations = 200,
  constraint.tolerance = 1)

Converged within tolerance

# Add weights back to LL
test_data_new_wif_m$eb_weight_wif_m <- NA
test_data_new_wif_m$eb_weight_wif_m[test_data_new_wif_m[[treatment_var_wif_m]] == 1] <- 1  # Treated units get weight = 1
test_data_new_wif_m$eb_weight_wif_m[test_data_new_wif_m[[treatment_var_wif_m]] == 0] <- e_bal_wif_m$w  # Control units get EB weights

# Final data for regression
eb_data_wif_m <- test_data_new_wif_m %>% filter(!is.na(eb_weight_wif_m))  # Exclude unmatched if any

#data for analysis 
#Now have a weight called 'eb_weight' that can be used in analysis 

##Let's check that the two groups are equal now 
eb_data_wif_m %>%
  group_by(binary_wif) %>%
  summarise(
    age_weighted_mean = wtd.mean(age_group, weights = eb_weight_wif_m), 
    age_weighted_variance = wtd.var(age_group, weights = eb_weight_wif_m)
  )

# A tibble: 2 × 3
  binary_wif age_weighted_mean age_weighted_variance
       <dbl>             <dbl>                 <dbl>
1          0              2.11                 0.676
2          1              2.11                 0.584

check_wif_m_2 <- eb_data_wif_m %>%
  group_by(binary_wif) %>%
  summarise(across(.cols = c(marriage_status, total_people_in_household, age_group,education, family_income, race, sex, working_status),
                   .fns = list(
                     weighted_mean = ~wtd.mean(., weights = eb_weight_wif_m),
                     weighted_variance = ~wtd.var(., weights = eb_weight_wif_m)),
                   .names = "{.col}_{.fn}"),
            .groups = "drop")

check_t_wif_m_2<- round(t(check_wif_m_2), 2)

print(check_t_wif_m_2)

                                             [,1]  [,2]
binary_wif                                   0.00  1.00
marriage_status_weighted_mean                3.29  3.29
marriage_status_weighted_variance            3.24  3.22
total_people_in_household_weighted_mean      1.89  1.89
total_people_in_household_weighted_variance  2.13  2.02
age_group_weighted_mean                      2.11  2.11
age_group_weighted_variance                  0.68  0.58
education_weighted_mean                      3.17  3.17
education_weighted_variance                  1.62  1.61
family_income_weighted_mean                 11.71 11.71
family_income_weighted_variance              1.53  1.79
race_weighted_mean                           1.49  1.49
race_weighted_variance                       0.57  0.57
sex_weighted_mean                            1.50  1.50
sex_weighted_variance                        0.25  0.25
working_status_weighted_mean                 2.78  2.78
working_status_weighted_variance             0.23  0.27

lm3_wif_m<-lm(mental_health_status~ binary_wif+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=eb_data_wif_m, weights = eb_weight_wif_m)
lm4_wif_m<-lm(mental_health_status~ binary_wif+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=eb_data_wif_m)

stargazer(lm3_wif_m, lm4_wif_m, type = "text", digits = 3, 
          dep.var.labels = c("Earnings Post Training"),
          column.labels = c("Entropy Balanced", "Raw Data"),
          covariate.labels=c("Job interrupt the family (binary)", "Marriage status","Total people in household", "Age group", "Education","Family income", "Race", "Sex", "Working Status" ))


==============================================================
                                      Dependent variable:     
                                  ----------------------------
                                     Earnings Post Training   
                                  Entropy Balanced   Raw Data 
                                         (1)           (2)    
--------------------------------------------------------------
Job interrupt the family (binary)     1.980***       1.952*** 
                                       (0.340)       (0.349)  
                                                              
Marriage status                       -0.292***     -0.305*** 
                                       (0.104)       (0.105)  
                                                              
Total people in household               0.026         0.008   
                                       (0.121)       (0.121)  
                                                              
Age group                             -1.444***     -1.449*** 
                                       (0.230)       (0.226)  
                                                              
Education                             -0.543***     -0.528*** 
                                       (0.138)       (0.140)  
                                                              
Family income                          -0.205         -0.187  
                                       (0.137)       (0.120)  
                                                              
Race                                  -0.912***     -0.913*** 
                                       (0.226)       (0.225)  
                                                              
Sex                                   1.903***       1.780*** 
                                       (0.347)       (0.348)  
                                                              
Working Status                         -0.463        -0.557*  
                                       (0.345)       (0.323)  
                                                              
Constant                              11.556***     11.855*** 
                                       (1.981)       (1.772)  
                                                              
--------------------------------------------------------------
Observations                            1,980         1,980   
R2                                      0.084         0.082   
Adjusted R2                             0.080         0.078   
Residual Std. Error (df = 1970)         7.069         7.620   
F Statistic (df = 9; 1970)            20.152***     19.549*** 
==============================================================
Note:                              *p<0.1; **p<0.05; ***p<0.01

The relationship between family-work conflict and mental health

test_data_new_fiw_m<-test_data_new %>% dplyr::select(-c(job_interrupt_the_family,family_interrupt_the_job,binary_wif, physical_health_status))

head(test_data_new_fiw_m)

# A tibble: 6 × 10
  mental_health_status marriage_status total_people_in_household age_group
                 <dbl>           <dbl>                     <dbl>     <dbl>
1                   15               3                         1         4
2                    0               3                         3         3
3                   10               1                         1         1
4                   14               1                         3         1
5                    0               5                         2         2
6                    5               1                         2         2
# ℹ 6 more variables: education <dbl>, family_income <dbl>, race <dbl>,
#   sex <dbl>, working_status <dbl>, binary_fiw <dbl>

##What is the impact of employment training on earnings?
#Testing for Imbalance Between Groups
check_fiw_m<-test_data_new_fiw_m %>%
  group_by(binary_fiw) %>%
  summarise_at(vars(mental_health_status,marriage_status,total_people_in_household,age_group,education,family_income,race,sex, working_status), 
               list(mean = mean,
                    var = var))

round(t(check_fiw_m), 3)

                                 [,1]   [,2]
binary_fiw                      0.000  1.000
mental_health_status_mean       4.111  5.666
marriage_status_mean            3.094  3.451
total_people_in_household_mean  1.818  1.992
age_group_mean                  2.172  2.129
education_mean                  3.000  3.145
family_income_mean             11.624 11.619
race_mean                       1.508  1.533
sex_mean                        1.496  1.511
working_status_mean             2.728  2.739
mental_health_status_var       59.626 68.583
marriage_status_var             3.224  3.138
total_people_in_household_var   2.050  2.206
age_group_var                   0.723  0.559
education_var                   1.574  1.662
family_income_var               2.167  2.239
race_var                        0.571  0.616
sex_var                         0.250  0.250
working_status_var              0.290  0.311

match_exact_fiw_m <- matchit(binary_fiw~mental_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
                       method = "exact", data = test_data_new_fiw_m)
data_exact_fiw_m <- match.data(match_exact_fiw_m) #Creates new dataframe that only includes the matched cases
summary(match_exact_fiw_m)


Call:
matchit(formula = binary_fiw ~ mental_health_status + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_fiw_m, 
    method = "exact")

Summary of Balance for All Data:
                          Means Treated Means Control Std. Mean Diff.
mental_health_status             5.6656        4.1109          0.1877
marriage_status                  3.4506        3.0939          0.2014
total_people_in_household        1.9920        1.8180          0.1172
age_group                        2.1290        2.1716         -0.0570
education                        3.1449        3.0000          0.1124
family_income                   11.6194       11.6243         -0.0032
race                             1.5334        1.5081          0.0322
sex                              1.5111        1.4956          0.0312
working_status                   2.7389        2.7278          0.0198
                          Var. Ratio eCDF Mean eCDF Max
mental_health_status          1.1502    0.0528   0.1517
marriage_status               0.9734    0.0713   0.1093
total_people_in_household     1.0761    0.0226   0.0844
age_group                     0.7738    0.0328   0.0642
education                     1.0559    0.0290   0.0637
family_income                 1.0335    0.0039   0.0126
race                          1.0783    0.0084   0.0226
sex                           1.0004    0.0078   0.0156
working_status                1.0733    0.0124   0.0241

Summary of Balance for Matched Data:
                          Means Treated Means Control Std. Mean Diff.
mental_health_status             1.5351        1.5351               0
marriage_status                  3.7892        3.7892               0
total_people_in_household        1.6919        1.6919               0
age_group                        2.2000        2.2000               0
education                        3.3676        3.3676               0
family_income                   11.9946       11.9946               0
race                             1.4595        1.4595              -0
sex                              1.3838        1.3838               0
working_status                   2.9568        2.9568               0
                          Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
mental_health_status          0.9995         0        0               0
marriage_status               0.9995         0        0               0
total_people_in_household     0.9995         0        0               0
age_group                     0.9995         0        0               0
education                     0.9995         0        0               0
family_income                 0.9995         0        0               0
race                          0.9995         0        0               0
sex                           0.9995         0        0               0
working_status                0.9995         0        0               0

Sample Sizes:
              Control Treated
All           1352.       628
Matched (ESS)  168.63     185
Matched        292.       185
Unmatched     1060.       443
Discarded        0.         0

imbalance_exact_fiw_m <- imbalance(group = data_exact_fiw_m$binary_fiw, data = data_exact_fiw_m, 
          drop = c("mental_health_status", "binary_fiw", "weights",  "subclass")) #With matched data, always add weights and subclass here

Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect

imbalance_exact_fiw_m


Multivariate Imbalance Measure: L1=0.251
Percentage of local common support: LCS=100.0%

Univariate Imbalance Measures:

                             statistic   type         L1 min 25% 50% 75% max
marriage_status           1.351709e+00 (Chi2) 0.03337653  NA  NA  NA  NA  NA
total_people_in_household 6.005671e+00 (Chi2) 0.08463532  NA  NA  NA  NA  NA
age_group                 2.180951e+00 (Chi2) 0.05994076  NA  NA  NA  NA  NA
education                 4.061170e+00 (Chi2) 0.09207701  NA  NA  NA  NA  NA
family_income             1.573880e-30 (Chi2) 0.00000000  NA  NA  NA  NA  NA
race                      3.856778e+00 (Chi2) 0.07613847  NA  NA  NA  NA  NA
sex                       4.364911e-02 (Chi2) 0.01392077  NA  NA  NA  NA  NA
working_status            6.546799e-02 (Chi2) 0.00000000  NA  NA  NA  NA  NA

###Match Coarsened Exact 
###Perform the matching here with code that resembles most regressions
match_cem_fiw_m <- matchit(binary_fiw ~mental_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
                       method = "cem", data = test_data_new_fiw_m)
data_cem_fiw_m <- match.data(match_cem_fiw_m) #Creates new dataframe that only includes the matched cases
summary(match_cem_fiw_m)


Call:
matchit(formula = binary_fiw ~ mental_health_status + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_fiw_m, 
    method = "cem")

Summary of Balance for All Data:
                          Means Treated Means Control Std. Mean Diff.
mental_health_status             5.6656        4.1109          0.1877
marriage_status                  3.4506        3.0939          0.2014
total_people_in_household        1.9920        1.8180          0.1172
age_group                        2.1290        2.1716         -0.0570
education                        3.1449        3.0000          0.1124
family_income                   11.6194       11.6243         -0.0032
race                             1.5334        1.5081          0.0322
sex                              1.5111        1.4956          0.0312
working_status                   2.7389        2.7278          0.0198
                          Var. Ratio eCDF Mean eCDF Max
mental_health_status          1.1502    0.0528   0.1517
marriage_status               0.9734    0.0713   0.1093
total_people_in_household     1.0761    0.0226   0.0844
age_group                     0.7738    0.0328   0.0642
education                     1.0559    0.0290   0.0637
family_income                 1.0335    0.0039   0.0126
race                          1.0783    0.0084   0.0226
sex                           1.0004    0.0078   0.0156
working_status                1.0733    0.0124   0.0241

Summary of Balance for Matched Data:
                          Means Treated Means Control Std. Mean Diff.
mental_health_status             1.5983        1.4992           0.012
marriage_status                  3.8419        3.8419           0.000
total_people_in_household        1.6880        1.6880           0.000
age_group                        2.2137        2.2137           0.000
education                        3.3504        3.3504           0.000
family_income                   11.9957       11.9957           0.000
race                             1.4274        1.4274           0.000
sex                              1.3889        1.3889           0.000
working_status                   2.9274        2.9274          -0.000
                          Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
mental_health_status          0.9929    0.0037   0.0562          0.0634
marriage_status               0.9999    0.0000   0.0000          0.0000
total_people_in_household     0.9999    0.0000   0.0000          0.0000
age_group                     0.9999    0.0000   0.0000          0.0000
education                     0.9999    0.0000   0.0000          0.0000
family_income                 0.9999    0.0000   0.0000          0.0000
race                          0.9999    0.0000   0.0000          0.0000
sex                           0.9999    0.0000   0.0000          0.0000
working_status                0.9999    0.0000   0.0000          0.0000

Sample Sizes:
              Control Treated
All              1352     628
Matched (ESS)     230     234
Matched           378     234
Unmatched         974     394
Discarded           0       0

imbalance_cem_fiw_m <- imbalance(group = data_cem_fiw_m$binary_fiw, data = data_cem_fiw_m, 
          drop = c("mental_health_status", "binary_fiw", "weights",  "subclass"))

Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect

imbalance_cem_fiw_m


Multivariate Imbalance Measure: L1=0.264
Percentage of local common support: LCS=100.0%

Univariate Imbalance Measures:

                             statistic   type           L1 min 25% 50% 75% max
marriage_status           1.313909e+00 (Chi2) 4.273504e-02  NA  NA  NA  NA  NA
total_people_in_household 4.498646e+00 (Chi2) 6.715507e-02  NA  NA  NA  NA  NA
age_group                 1.439695e+00 (Chi2) 1.526252e-02  NA  NA  NA  NA  NA
education                 2.665052e+00 (Chi2) 5.840456e-02  NA  NA  NA  NA  NA
family_income             1.232107e-30 (Chi2) 0.000000e+00  NA  NA  NA  NA  NA
race                      3.706557e+00 (Chi2) 6.389906e-02  NA  NA  NA  NA  NA
sex                       0.000000e+00 (Chi2) 1.110223e-16  NA  NA  NA  NA  NA
working_status            5.938950e-01 (Chi2) 3.256003e-03  NA  NA  NA  NA  NA

t.test(test_data_new_fiw_m$mental_health_status, test_data_new_fiw_m$binary_fiw)


    Welch Two Sample t-test

data:  test_data_new_fiw_m$mental_health_status and test_data_new_fiw_m$binary_fiw
t = 23.999, df = 1992.6, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 3.936560 4.637177
sample estimates:
mean of x mean of y 
4.6040404 0.3171717

#Estimate Linear Regression on Raw Data
lm1_fiw_m<-lm(mental_health_status~ binary_fiw+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
        test_data_new_fiw_m)
summary(lm1_fiw_m)


Call:
lm(formula = mental_health_status ~ binary_fiw + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = test_data_new_fiw_m)

Residuals:
     Min       1Q   Median       3Q      Max 
-10.4107  -4.3076  -2.4834   0.6008  29.4565 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               11.75582    1.77824   6.611 4.90e-11 ***
binary_fiw                 1.68288    0.37194   4.525 6.41e-06 ***
marriage_status           -0.32441    0.10569  -3.069 0.002174 ** 
total_people_in_household -0.01361    0.12135  -0.112 0.910684    
age_group                 -1.48423    0.22630  -6.559 6.92e-11 ***
education                 -0.49801    0.14005  -3.556 0.000385 ***
family_income             -0.15934    0.12063  -1.321 0.186694    
race                      -0.96310    0.22602  -4.261 2.13e-05 ***
sex                        1.75048    0.34943   5.010 5.94e-07 ***
working_status            -0.44449    0.32288  -1.377 0.168778    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 7.64 on 1970 degrees of freedom
Multiple R-squared:  0.07698,   Adjusted R-squared:  0.07277 
F-statistic: 18.26 on 9 and 1970 DF,  p-value: < 2.2e-16

#Estimate Linear Regression on Coarsened Exact Matched Data
lm2_fiw_m<-lm(mental_health_status~ binary_fiw+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, 
        data_cem_fiw_m, weights = weights)
summary(lm2_fiw_m)


Call:
lm(formula = mental_health_status ~ binary_fiw + marriage_status + 
    total_people_in_household + age_group + education + family_income + 
    race + sex + working_status, data = data_cem_fiw_m, weights = weights)

Weighted Residuals:
   Min     1Q Median     3Q    Max 
-4.403 -1.551 -0.843  0.093 34.521 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               39.02669   29.43135   1.326  0.18533    
binary_fiw                 0.09908    0.32232   0.307  0.75865    
marriage_status            0.10052    0.10527   0.955  0.34004    
total_people_in_household -0.38302    0.13565  -2.824  0.00491 ** 
age_group                 -1.11395    0.24742  -4.502 8.07e-06 ***
education                 -0.10074    0.13024  -0.773  0.43953    
family_income             -2.79496    2.44650  -1.142  0.25373    
race                      -0.89485    0.21498  -4.162 3.61e-05 ***
sex                       -0.48910    0.34290  -1.426  0.15427    
working_status             0.34861    0.55219   0.631  0.52807    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.875 on 602 degrees of freedom
Multiple R-squared:  0.0772,    Adjusted R-squared:  0.0634 
F-statistic: 5.596 on 9 and 602 DF,  p-value: 1.935e-07

 #Compare results 
plot_summs(lm1_fiw_m,lm2_fiw_m)

stargazer(lm1_fiw_m, lm2_fiw_m, type = "text", digits = 3, 
          dep.var.labels = c("Earnings Post Training"),
          column.labels = c("Full Data", "Matched Data"),
         covariate.labels=c("Family interrupt the job (binary)", "Marriage status","Total people in household", "Age group", "Education","Family income","Race", "Sex", "Working Status" ))


=================================================================================
                                                Dependent variable:              
                                  -----------------------------------------------
                                              Earnings Post Training             
                                         Full Data              Matched Data     
                                            (1)                     (2)          
---------------------------------------------------------------------------------
Family interrupt the job (binary)         1.683***                 0.099         
                                          (0.372)                 (0.322)        
                                                                                 
Marriage status                          -0.324***                 0.101         
                                          (0.106)                 (0.105)        
                                                                                 
Total people in household                  -0.014                -0.383***       
                                          (0.121)                 (0.136)        
                                                                                 
Age group                                -1.484***               -1.114***       
                                          (0.226)                 (0.247)        
                                                                                 
Education                                -0.498***                 -0.101        
                                          (0.140)                 (0.130)        
                                                                                 
Family income                              -0.159                  -2.795        
                                          (0.121)                 (2.447)        
                                                                                 
Race                                     -0.963***               -0.895***       
                                          (0.226)                 (0.215)        
                                                                                 
Sex                                       1.750***                 -0.489        
                                          (0.349)                 (0.343)        
                                                                                 
Working Status                             -0.444                  0.349         
                                          (0.323)                 (0.552)        
                                                                                 
Constant                                 11.756***                 39.027        
                                          (1.778)                 (29.431)       
                                                                                 
---------------------------------------------------------------------------------
Observations                               1,980                    612          
R2                                         0.077                   0.077         
Adjusted R2                                0.073                   0.063         
Residual Std. Error                  7.640 (df = 1970)        3.875 (df = 602)   
F Statistic                       18.256*** (df = 9; 1970) 5.596*** (df = 9; 602)
=================================================================================
Note:                                                 *p<0.1; **p<0.05; ***p<0.01

#######Entropy Balancing
# Create a subset of the dataset with the selected variables
treatment_var_fiw_m <- "binary_fiw"
covariates_vars_fiw_m <- c("marriage_status", "total_people_in_household", "age_group","education", "family_income", "race", "sex", "working_status")
dependent_var_fiw_m <- "mental_health_status"

# Prepare treatment and covariates
treatment_fiw_m  <- test_data_new_fiw_m$binary_fiw
covariates_fiw_m <- test_data_new_fiw_m[, covariates_vars_fiw_m]

# Run entropy balancing
e_bal_fiw_m  <- ebalance(
  Treatment = treatment_fiw_m,
  X = covariates_fiw_m,
  max.iterations = 200,
  constraint.tolerance = 1)

Converged within tolerance

test_data_new_fiw_m$eb_weight_fiw_m <- NA
test_data_new_fiw_m$eb_weight_fiw_m[test_data_new_fiw_m[[treatment_var_fiw_m]] == 1] <- 1  # Treated units get weight = 1
test_data_new_fiw_m$eb_weight_fiw_m[test_data_new_fiw_m[[treatment_var_fiw_m]] == 0] <- e_bal_fiw_m$w  # Control units get EB weights
# Final data for regression
eb_data_fiw_m <- test_data_new_fiw_m %>% filter(!is.na(eb_weight_fiw_m))  # Exclude unmatched if any

#data for analysis 
#Now have a weight called 'eb_weight' that can be used in analysis 

##Let's check that the two groups are equal now 
eb_data_fiw_m %>%
  group_by(binary_fiw) %>%
  summarise(
    age_weighted_mean_fiw_m = wtd.mean(age_group, weights = eb_weight_fiw_m), 
    age_weighted_variance_fiw_m = wtd.var(age_group, weights = eb_weight_fiw_m)
  )

# A tibble: 2 × 3
  binary_fiw age_weighted_mean_fiw_m age_weighted_variance_fiw_m
       <dbl>                   <dbl>                       <dbl>
1          0                    2.13                       0.676
2          1                    2.13                       0.559

check_fiw_m <- eb_data_fiw_m %>%
  group_by(binary_fiw) %>%
  summarise(across(.cols = c(marriage_status, total_people_in_household, age_group,education, family_income, race, sex, working_status),
                   .fns = list(
                     weighted_mean_fiw_m = ~wtd.mean(., weights = eb_weight_fiw_m),
                     weighted_variance_fiw_m = ~wtd.var(., weights = eb_weight_fiw_m)),
                   .names = "{.col}_{.fn}"),
            .groups = "drop")

check_t_fiw_m<- round(t(check_fiw_m), 2)

print(check_t_fiw_m)

                                                   [,1]  [,2]
binary_fiw                                         0.00  1.00
marriage_status_weighted_mean_fiw_m                3.45  3.45
marriage_status_weighted_variance_fiw_m            3.12  3.14
total_people_in_household_weighted_mean_fiw_m      1.99  1.99
total_people_in_household_weighted_variance_fiw_m  2.43  2.21
age_group_weighted_mean_fiw_m                      2.13  2.13
age_group_weighted_variance_fiw_m                  0.68  0.56
education_weighted_mean_fiw_m                      3.14  3.14
education_weighted_variance_fiw_m                  1.64  1.66
family_income_weighted_mean_fiw_m                 11.62 11.62
family_income_weighted_variance_fiw_m              2.41  2.24
race_weighted_mean_fiw_m                           1.53  1.53
race_weighted_variance_fiw_m                       0.61  0.62
sex_weighted_mean_fiw_m                            1.51  1.51
sex_weighted_variance_fiw_m                        0.25  0.25
working_status_weighted_mean_fiw_m                 2.74  2.74
working_status_weighted_variance_fiw_m             0.29  0.31

lm3_fiw_m<-lm(mental_health_status~ binary_fiw+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=eb_data_fiw_m, weights = eb_weight_fiw_m)
lm4_fiw_m<-lm(mental_health_status~ binary_fiw+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=eb_data_fiw_m)

stargazer(lm3_fiw_m, lm4_fiw_m, type = "text", digits = 3, 
          dep.var.labels = c("Earnings Post Training"),
          column.labels = c("Entropy Balanced", "Raw Data"),
          covariate.labels=c("Family interrupt the job (binary)", "Marriage status","Total people in household", "Age group", "Education","Family income","Race", "Sex", "Working Status" ))


==============================================================
                                      Dependent variable:     
                                  ----------------------------
                                     Earnings Post Training   
                                  Entropy Balanced   Raw Data 
                                         (1)           (2)    
--------------------------------------------------------------
Family interrupt the job (binary)     1.740***       1.683*** 
                                       (0.342)       (0.372)  
                                                              
Marriage status                       -0.267**      -0.324*** 
                                       (0.106)       (0.106)  
                                                              
Total people in household              -0.023         -0.014  
                                       (0.115)       (0.121)  
                                                              
Age group                             -1.624***     -1.484*** 
                                       (0.233)       (0.226)  
                                                              
Education                             -0.422***     -0.498*** 
                                       (0.138)       (0.140)  
                                                              
Family income                          -0.076         -0.159  
                                       (0.117)       (0.121)  
                                                              
Race                                  -0.953***     -0.963*** 
                                       (0.220)       (0.226)  
                                                              
Sex                                   1.845***       1.750*** 
                                       (0.349)       (0.349)  
                                                              
Working Status                        -0.725**        -0.444  
                                       (0.321)       (0.323)  
                                                              
Constant                              11.217***     11.756*** 
                                       (1.765)       (1.778)  
                                                              
--------------------------------------------------------------
Observations                            1,980         1,980   
R2                                      0.082         0.077   
Adjusted R2                             0.078         0.073   
Residual Std. Error (df = 1970)         6.066         7.640   
F Statistic (df = 9; 1970)            19.506***     18.256*** 
==============================================================
Note:                              *p<0.1; **p<0.05; ***p<0.01