Final Project

Author

Jingyi Yang

Prepare

library(haven)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.5.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   4.0.1     ✔ stringr   1.5.1
✔ lubridate 1.9.4     ✔ tibble    3.2.1
✔ purrr     1.0.4     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(semPlot)
library(lavaan) 
This is lavaan 0.6-19
lavaan is FREE software! Please report any bugs.
library(psych)

Attaching package: 'psych'

The following object is masked from 'package:lavaan':

    cor2cov

The following objects are masked from 'package:ggplot2':

    %+%, alpha
library(skimr) 
library(corrplot)
corrplot 0.95 loaded
library(patchwork) #Merge GGPlots together 
Warning: package 'patchwork' was built under R version 4.5.2
library(ggplot2) #Graphing
library(jtools) #Tabular Regression Results
library(descr) #Easy Frequency Tables  
library(stats) #Imports survey data 
library(ggeffects) #Predicted Probabilities from Regressions
library(nnet) #For multinomial models
library(MASS) #For ordered models 

Attaching package: 'MASS'

The following object is masked from 'package:patchwork':

    area

The following object is masked from 'package:dplyr':

    select
library(brant) #Test parallel regression assumption 
library(boot) #Create CIs for Multinomial Modeling

Attaching package: 'boot'

The following object is masked from 'package:psych':

    logit
library(cem) #Coarsened Exact Matching 
Loading required package: tcltk
Loading required package: lattice

Attaching package: 'lattice'

The following object is masked from 'package:boot':

    melanoma


How to use CEM? Type vignette("cem")
library(MatchIt)  #Coarsened Exact Matching 
library(stargazer)  #Regression Output

Please cite as: 

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
library(jtools)  #Regression Output
library(WeightIt) #Entropy Balancing
library(Hmisc) #General Modeling 

Attaching package: 'Hmisc'

The following object is masked from 'package:jtools':

    %nin%

The following object is masked from 'package:psych':

    describe

The following objects are masked from 'package:dplyr':

    src, summarize

The following objects are masked from 'package:base':

    format.pval, units
library(ebal) #Entropy Balancing
##
## ebal Package: Implements Entropy Balancing.

## See http://www.stanford.edu/~jhain/ for additional information.
library(survey) #Applying EB weights 
Loading required package: grid
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

Loading required package: survival

Attaching package: 'survival'

The following object is masked from 'package:boot':

    aml


Attaching package: 'survey'

The following object is masked from 'package:Hmisc':

    deff

The following object is masked from 'package:WeightIt':

    calibrate

The following object is masked from 'package:graphics':

    dotchart
library(gganimate)
Warning: package 'gganimate' was built under R version 4.5.2

Data Import

GSS2022 <- read_dta("~/研一下/DACSS 790Q/Final Project/2022_stata/2022/GSS2022.dta")
head(GSS2022)
# A tibble: 6 × 1,185
  year         id wrkstat hrs1        hrs2        evwork      wrkslf  occ10     
  <dbl+lbl> <dbl> <dbl+l> <dbl+lbl>   <dbl+lbl>   <dbl+lbl>   <dbl+l> <dbl+lbl> 
1 2022          1 1 [wor…    40       NA(i) [iap] NA(i) [iap] 2 [som…  430 [man…
2 2022          2 5 [ret… NA(i) [iap] NA(i) [iap]     1 [yes] 2 [som…   50 [mar…
3 2022          3 1 [wor…    52       NA(i) [iap] NA(i) [iap] 2 [som… 4610 [per…
4 2022          4 3 [wit… NA(i) [iap]    25       NA(i) [iap] 2 [som… 4120 [foo…
5 2022          5 8 [oth… NA(i) [iap] NA(i) [iap]     1 [yes] 2 [som… 7330 [ind…
6 2022          6 1 [wor…    50       NA(i) [iap] NA(i) [iap] 2 [som… 4610 [per…
# ℹ 1,177 more variables: prestg10 <dbl+lbl>, prestg105plus <dbl+lbl>,
#   indus10 <dbl+lbl>, marital <dbl+lbl>, martype <dbl+lbl>, divorce <dbl+lbl>,
#   widowed <dbl+lbl>, spwrksta <dbl+lbl>, sphrs1 <dbl+lbl>, sphrs2 <dbl+lbl>,
#   spevwork <dbl+lbl>, cowrksta <dbl+lbl>, coevwork <dbl+lbl>,
#   cohrs1 <dbl+lbl>, cohrs2 <dbl+lbl>, spwrkslf <dbl+lbl>, sppres80 <dbl+lbl>,
#   spocc10 <dbl+lbl>, sppres10 <dbl+lbl>, sppres105plus <dbl+lbl>,
#   spind10 <dbl+lbl>, coocc10 <dbl+lbl>, coind10 <dbl+lbl>, …

Setting data set

# Change age to age group! 
age_breaks <- c(18, 30, 50, 65, 100)

GSS2022$age_group <- cut(GSS2022$age, 
                      age_breaks, labels = c("18-29", "30-49", "50-64", "65+"), 
                      include.lowest = TRUE) 
                      
GSS2022<- GSS2022 %>% mutate(age_group= recode(age_group,"18-29"="1", "30-49"="2", "50-64"="3", "65+"="4"))

test_data <- GSS2022 %>% dplyr::select(c(wkvsfam, famvswk, physhlth, mntlhlth, marital,hompop_exp, age_group, degree, income, race, sex, wrkstat, wtssps))%>% mutate(wkvsfam= as.numeric(wkvsfam),famvswk= as.numeric(famvswk), physhlth= as.numeric(physhlth), mntlhlth= as.numeric(mntlhlth), marital= as.numeric(marital), hompop_exp= as.numeric(hompop_exp), age_group= as.numeric(age_group), degree= as.numeric(degree), income= as.numeric(income), race= as.numeric(race), sex= as.numeric(sex), wrkstat=as.numeric(wrkstat), wtssps=as.numeric(wtssps))

new_names <- c("work_impacts_the_family", "family_impacts_the_work", "physical_health_status", "mental_health_status", "marriage_status", "total_people_in_household", "age_group","education", "family_income", "race", "sex", "working_status", "weight" )

colnames(test_data) <- new_names #Apply new names to your data frame
skim(test_data)
Data summary
Name test_data
Number of rows 4149
Number of columns 13
_______________________
Column type frequency:
numeric 13
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
work_impacts_the_family 1767 0.57 2.69 0.93 1.00 2.00 3.00 3.00 4.00 ▂▇▁▇▅
family_impacts_the_work 1779 0.57 2.94 0.86 1.00 2.00 3.00 4.00 4.00 ▁▆▁▇▆
physical_health_status 1844 0.56 2.67 6.11 0.00 0.00 0.00 2.00 30.00 ▇▁▁▁▁
mental_health_status 1870 0.55 4.53 7.93 0.00 0.00 0.00 5.00 30.00 ▇▁▁▁▁
marriage_status 16 1.00 2.77 1.74 1.00 1.00 3.00 5.00 5.00 ▇▁▃▁▆
total_people_in_household 8 1.00 1.75 1.67 0.00 1.00 1.00 2.00 49.00 ▇▁▁▁▁
age_group 256 0.94 2.46 1.03 1.00 2.00 2.00 3.00 4.00 ▅▇▁▅▅
education 2 1.00 1.82 1.27 0.00 1.00 1.00 3.00 4.00 ▂▇▂▃▂
family_income 484 0.88 11.22 2.07 1.00 12.00 12.00 12.00 12.00 ▁▁▁▁▇
race 64 0.98 1.51 0.76 1.00 1.00 1.00 2.00 3.00 ▇▁▂▁▂
sex 23 0.99 1.54 0.50 1.00 1.00 2.00 2.00 2.00 ▇▁▁▁▇
working_status 9 1.00 3.07 2.34 1.00 1.00 2.00 5.00 8.00 ▇▁▃▁▂
weight 605 0.85 1.00 1.26 0.07 0.34 0.58 1.06 14.27 ▇▁▁▁▁
# Reverse Coding
final_data <- test_data %>%
  mutate(
    work_impacts_the_family = case_when(
      work_impacts_the_family == 1 ~ 4,
      work_impacts_the_family == 2 ~ 3,
      work_impacts_the_family == 3 ~ 2,
      work_impacts_the_family == 4 ~ 1,
      work_impacts_the_family == "NA"~ NA_real_),
    work_impacts_the_family = labelled(work_impacts_the_family, 
      c(`Never` = 1, `Rarely` = 2, `Sometimes` = 3, `Often` = 4))) %>% 
  mutate(family_impacts_the_work = as.numeric(family_impacts_the_work),
    family_impacts_the_work = case_when(
      family_impacts_the_work == 1 ~ 4,
      family_impacts_the_work == 2 ~ 3,
      family_impacts_the_work == 3 ~ 2,
      family_impacts_the_work == 4 ~ 1,
      family_impacts_the_work == "NA"~ NA_real_),
    family_impacts_the_work = labelled(family_impacts_the_work, 
      c(`Never` = 1, `Rarely` = 2, `Sometimes` = 3, `Often` = 4))) %>%
  mutate(marriage_status= case_when(
    marriage_status== 1 ~ 5, 
    marriage_status== 2 ~ 4,
    marriage_status== 3 ~ 3,
    marriage_status== 4 ~ 2,
    marriage_status== 5 ~ 1,
    marriage_status== "NA"~ NA_real_),
    marriage_status=labelled(marriage_status, c(`Never married`=1, `Separated`=2, `Divorced`=3, `Widowed`=4, `Married`=5))) %>%
  mutate(age_group= case_when(
    age_group== 1 ~ 1,
    age_group== 2 ~ 2, 
    age_group== 3 ~ 3,
    age_group== 4 ~ 4,
    age_group== "NA"~ NA_real_),
    age_group=labelled(age_group, c(`18-29 YEARS OLD`=1, `30-49 YEARS OLD`=2, `50-64 YEARS OLD`=3, `64 YEARS OLD OR OVER`=4)))%>% 
  mutate(education= case_when(
    education== 0 ~ 1,
    education== 1 ~ 2,
    education== 2 ~ 3, 
    education== 3 ~ 4, 
    education== 4 ~ 5,
    education== "NA"~ NA_real_),
    education= labelled(education, c(`Less THAN HIGH SCHOOL`= 1, `HIGH SCHOOL`= 2, `ASSOCIATE/JUNIOR COLLEGE`=3, `BACHELORS`=4, `GRADUATE`= 5)))%>%
  mutate(family_income= case_when(
    family_income== 1 ~ 1, 
    family_income== 2 ~ 2, 
    family_income== 3 ~ 3, 
    family_income== 4 ~ 4, 
    family_income== 5 ~ 5, 
    family_income== 6 ~ 6, 
    family_income== 7 ~ 7, 
    family_income== 8 ~ 8, 
    family_income== 9 ~ 9,
    family_income== 10 ~ 10, 
    family_income== 11 ~ 11, 
    family_income== 12 ~ 12,
    family_income== "NA"~ NA_real_),
    family_income= labelled(family_income, c(`UNDER$1,000`= 1,`$1,000 TO $2,999`= 2, `$3,000 TO $3,999`=3, `$4,000 TO $4,999`=4, `$5,000 TOlibra $5,999`= 5, `$6,000 TO $6,999`=6, `$7,000 TO $7,999`=7, `$8,000 TO $9,999`=8, `$10,000 TO $14,999`= 9, `$15,000 TO $19,999`= 10,`$20,000 TO $24,999`= 11,  `$25,000 OR MORE`= 12))) %>%
  mutate(race= case_when(
    race== 1 ~ 1, 
    race== 2 ~ 2,
    race== 3 ~ 3,
    race== "NA"~ NA_real_),
  race= labelled(race, c(`WHITE`= 1, `BLACK`= 2, `OTHER`=3))) %>%
  mutate(sex= case_when(
    sex== 1 ~ 1, 
    sex== 2 ~ 2,
    sex== "NA"~ NA_real_),
    sex= labelled(sex, c(`MALE`=1, `FEMALE`=2)))%>%
  mutate(total_people_in_household = case_when(
    total_people_in_household == 0 ~ 0,
    total_people_in_household == 1 ~ 1,
    total_people_in_household == 2 ~ 2,
    total_people_in_household == 3 ~ 3,
    total_people_in_household == 4 ~ 4,
    total_people_in_household == 5 ~ 5,
    total_people_in_household == 6 ~ 6,
    total_people_in_household == 7 ~ 7,
    total_people_in_household >= 8 ~ 8,
    total_people_in_household == "NA"~ NA_real_),
    total_people_in_household = labelled(total_people_in_household, 
      c(`NONE`=0, `1`=1, `2`=2, `3`=3, `4`=4, `5`=5, `6`=6, `7`=7, `8+`=8)))%>%
  mutate(working_status = case_when(
    working_status == 1 ~ 3,
    working_status == 2 ~ 2,
    working_status == 3 ~ 1,
    working_status == 4 ~ 1,
    working_status == 5 ~ 1,
    working_status == 6 ~ 1,
    working_status == 7 ~ 1,
    working_status == 8 ~ 1,
    working_status == "NA"~ NA_real_),
    working_status = labelled(working_status,
      c(`WORKING FULL TIME`=3, `WORKING PART TIME`=2,`OTHER`=1)))
head(final_data)
# A tibble: 6 × 13
  work_impacts_the_family family_impacts_the_work physical_health_status
  <dbl+lbl>               <dbl+lbl>                                <dbl>
1  3 [Sometimes]           2 [Rarely]                                 30
2 NA                      NA                                          NA
3  2 [Rarely]              2 [Rarely]                                  0
4  4 [Often]               1 [Never]                                   4
5 NA                      NA                                          NA
6  4 [Often]               3 [Sometimes]                               0
# ℹ 10 more variables: mental_health_status <dbl>, marriage_status <dbl+lbl>,
#   total_people_in_household <dbl+lbl>, age_group <dbl+lbl>,
#   education <dbl+lbl>, family_income <dbl+lbl>, race <dbl+lbl>,
#   sex <dbl+lbl>, working_status <dbl+lbl>, weight <dbl>
str(final_data)
tibble [4,149 × 13] (S3: tbl_df/tbl/data.frame)
 $ work_impacts_the_family  : dbl+lbl [1:4149]  3, NA,  2,  4, NA,  4,  4,  2,  3, NA, NA,  1,  2,  ...
   ..@ labels: Named num [1:4] 1 2 3 4
   .. ..- attr(*, "names")= chr [1:4] "Never" "Rarely" "Sometimes" "Often"
 $ family_impacts_the_work  : dbl+lbl [1:4149]  2, NA,  2,  1, NA,  3,  2,  2,  4, NA, NA,  2,  2,  ...
   ..@ labels: Named num [1:4] 1 2 3 4
   .. ..- attr(*, "names")= chr [1:4] "Never" "Rarely" "Sometimes" "Often"
 $ physical_health_status   : num [1:4149] 30 NA 0 4 NA 0 2 0 5 NA ...
 $ mental_health_status     : num [1:4149] 15 NA 0 10 NA NA 14 0 5 NA ...
 $ marriage_status          : dbl+lbl [1:4149] 3, 5, 3, 1, 1, 1, 1, 5, 1, 1, 4, 1, 5, 1, 1, 3, 1, 5,...
   ..@ labels: Named num [1:5] 1 2 3 4 5
   .. ..- attr(*, "names")= chr [1:5] "Never married" "Separated" "Divorced" "Widowed" ...
 $ total_people_in_household: dbl+lbl [1:4149] 1, 1, 3, 1, 3, 1, 3, 2, 2, 1, 2, 1, 1, 1, 3, 1, 1, 3,...
   ..@ labels: Named num [1:9] 0 1 2 3 4 5 6 7 8
   .. ..- attr(*, "names")= chr [1:9] "NONE" "1" "2" "3" ...
 $ age_group                : dbl+lbl [1:4149]  4,  4,  3,  1,  3,  1,  1,  2,  2,  4,  3,  1,  2,  ...
   ..@ labels: Named num [1:4] 1 2 3 4
   .. ..- attr(*, "names")= chr [1:4] "18-29 YEARS OLD" "30-49 YEARS OLD" "50-64 YEARS OLD" "64 YEARS OLD OR OVER"
 $ education                : dbl+lbl [1:4149] 4, 5, 2, 4, 2, 2, 2, 5, 2, 2, 2, 4, 2, 2, 2, 2, 2, 4,...
   ..@ labels: Named num [1:5] 1 2 3 4 5
   .. ..- attr(*, "names")= chr [1:5] "Less THAN HIGH SCHOOL" "HIGH SCHOOL" "ASSOCIATE/JUNIOR COLLEGE" "BACHELORS" ...
 $ family_income            : dbl+lbl [1:4149] 12, NA, 12, 12, 12, 12, 12, 12, 11,  9, 12, 12, 12,  ...
   ..@ labels: Named num [1:12] 1 2 3 4 5 6 7 8 9 10 ...
   .. ..- attr(*, "names")= chr [1:12] "UNDER$1,000" "$1,000 TO $2,999" "$3,000 TO $3,999" "$4,000 TO $4,999" ...
 $ race                     : dbl+lbl [1:4149]  1,  1,  1,  1,  1,  1,  3,  1,  1, NA,  1,  1,  1,  ...
   ..@ labels: Named num [1:3] 1 2 3
   .. ..- attr(*, "names")= chr [1:3] "WHITE" "BLACK" "OTHER"
 $ sex                      : dbl+lbl [1:4149]  2,  1,  2,  2,  1,  1,  2,  1,  2,  2,  2,  2,  2,  ...
   ..@ labels: Named num [1:2] 1 2
   .. ..- attr(*, "names")= chr [1:2] "MALE" "FEMALE"
 $ working_status           : dbl+lbl [1:4149] 3, 1, 3, 1, 1, 3, 2, 3, 3, 1, 1, 2, 3, 2, 1, 1, 1, 2,...
   ..@ labels: Named num [1:3] 3 2 1
   .. ..- attr(*, "names")= chr [1:3] "WORKING FULL TIME" "WORKING PART TIME" "OTHER"
 $ weight                   : num [1:4149] 0.231 0.577 1.014 0.902 1.084 ...
descr_table <-summarytools::descr(final_data)
descr_table <- descr_table%>% as.data.frame()
descr_table 
                age_group     education family_impacts_the_work family_income
Mean           2.46134087    2.82372800              2.06075949   11.22155525
Std.Dev        1.02505005    1.26515370              0.86175007    2.06922350
Min            1.00000000    1.00000000              1.00000000    1.00000000
Q1             2.00000000    2.00000000              1.00000000   12.00000000
Median         2.00000000    2.00000000              2.00000000   12.00000000
Q3             3.00000000    4.00000000              3.00000000   12.00000000
Max            4.00000000    5.00000000              4.00000000   12.00000000
MAD            1.48260000    1.48260000              1.48260000    0.00000000
IQR            1.00000000    2.00000000              2.00000000    0.00000000
CV             0.41646001    0.44804376              0.41817110    0.18439721
Skewness       0.14783903    0.44857313              0.31041263   -3.46521709
SE.Skewness    0.03924336    0.03802346              0.05028365    0.04044464
Kurtosis      -1.11365244   -1.10626852             -0.77307445   12.32022739
N.Valid     3893.00000000 4147.00000000           2370.00000000 3665.00000000
N           4149.00000000 4149.00000000           4149.00000000 4149.00000000
Pct.Valid     93.82983852   99.95179561             57.12219812   88.33453844
            marriage_status mental_health_status physical_health_status
Mean             3.23227680         4.527424e+00           2.665510e+00
Std.Dev          1.73589003         7.932331e+00           6.106232e+00
Min              1.00000000         0.000000e+00           0.000000e+00
Q1               1.00000000         0.000000e+00           0.000000e+00
Median           3.00000000         0.000000e+00           0.000000e+00
Q3               5.00000000         5.000000e+00           2.000000e+00
Max              5.00000000         3.000000e+01           3.000000e+01
MAD              2.96520000         0.000000e+00           0.000000e+00
IQR              4.00000000         5.000000e+00           2.000000e+00
CV               0.53704869         1.752063e+00           2.290831e+00
Skewness        -0.24719383         2.088814e+00           3.243560e+00
SE.Skewness      0.03808776         5.127644e-02           5.098681e-02
Kurtosis        -1.65602454         3.465350e+00           1.058110e+01
N.Valid       4133.00000000         2.279000e+03           2.305000e+03
N             4149.00000000         4.149000e+03           4.149000e+03
Pct.Valid       99.61436491         5.492890e+01           5.555556e+01
                     race           sex total_people_in_household       weight
Mean           1.51236230    1.53708192              1.723497e+00 1.000000e+00
Std.Dev        0.75672397    0.49868347              1.328288e+00 1.261260e+00
Min            1.00000000    1.00000000              0.000000e+00 7.397235e-02
Q1             1.00000000    1.00000000              1.000000e+00 3.376568e-01
Median         1.00000000    2.00000000              1.000000e+00 5.789633e-01
Q3             2.00000000    2.00000000              2.000000e+00 1.058485e+00
Max            3.00000000    2.00000000              8.000000e+00 1.427246e+01
MAD            0.00000000    0.00000000              0.000000e+00 4.393498e-01
IQR            1.00000000    1.00000000              1.000000e+00 7.208285e-01
CV             0.50035892    0.32443519              7.706936e-01 1.261260e+00
Skewness       1.07499880   -0.14868322              2.105539e+00 3.751667e+00
SE.Skewness    0.03831071    0.03812003              3.805097e-02 4.112871e-02
Kurtosis      -0.42136820   -1.97837261              4.601020e+00 1.974101e+01
N.Valid     4085.00000000 4126.00000000              4.141000e+03 3.544000e+03
N           4149.00000000 4149.00000000              4.149000e+03 4.149000e+03
Pct.Valid     98.45745963   99.44564955              9.980718e+01 8.541817e+01
            work_impacts_the_family working_status
Mean                      2.3140218     2.02198068
Std.Dev                   0.9272377     0.94739629
Min                       1.0000000     1.00000000
Q1                        2.0000000     1.00000000
Median                    2.0000000     2.00000000
Q3                        3.0000000     3.00000000
Max                       4.0000000     3.00000000
MAD                       1.4826000     1.48260000
IQR                       1.0000000     2.00000000
CV                        0.4007039     0.46854864
Skewness                  0.1041239    -0.04374997
SE.Skewness               0.0501570     0.03805557
Kurtosis                 -0.8967973    -1.88470649
N.Valid                2382.0000000  4140.00000000
N                      4149.0000000  4149.00000000
Pct.Valid                57.4114244    99.78308026
kableExtra::kable(descr_table, caption = "Table 1. Summary of the data",digits = 2,align = "lcc",format="simple",table.envir = "table*") 
Table 1. Summary of the data
age_group education family_impacts_the_work family_income marriage_status mental_health_status physical_health_status race sex total_people_in_household weight work_impacts_the_family working_status
Mean 2.46 2.82 2.06 11.22 3.23 4.53 2.67 1.51 1.54 1.72 1.00 2.31 2.02
Std.Dev 1.03 1.27 0.86 2.07 1.74 7.93 6.11 0.76 0.50 1.33 1.26 0.93 0.95
Min 1.00 1.00 1.00 1.00 1.00 0.00 0.00 1.00 1.00 0.00 0.07 1.00 1.00
Q1 2.00 2.00 1.00 12.00 1.00 0.00 0.00 1.00 1.00 1.00 0.34 2.00 1.00
Median 2.00 2.00 2.00 12.00 3.00 0.00 0.00 1.00 2.00 1.00 0.58 2.00 2.00
Q3 3.00 4.00 3.00 12.00 5.00 5.00 2.00 2.00 2.00 2.00 1.06 3.00 3.00
Max 4.00 5.00 4.00 12.00 5.00 30.00 30.00 3.00 2.00 8.00 14.27 4.00 3.00
MAD 1.48 1.48 1.48 0.00 2.97 0.00 0.00 0.00 0.00 0.00 0.44 1.48 1.48
IQR 1.00 2.00 2.00 0.00 4.00 5.00 2.00 1.00 1.00 1.00 0.72 1.00 2.00
CV 0.42 0.45 0.42 0.18 0.54 1.75 2.29 0.50 0.32 0.77 1.26 0.40 0.47
Skewness 0.15 0.45 0.31 -3.47 -0.25 2.09 3.24 1.07 -0.15 2.11 3.75 0.10 -0.04
SE.Skewness 0.04 0.04 0.05 0.04 0.04 0.05 0.05 0.04 0.04 0.04 0.04 0.05 0.04
Kurtosis -1.11 -1.11 -0.77 12.32 -1.66 3.47 10.58 -0.42 -1.98 4.60 19.74 -0.90 -1.88
N.Valid 3893.00 4147.00 2370.00 3665.00 4133.00 2279.00 2305.00 4085.00 4126.00 4141.00 3544.00 2382.00 4140.00
N 4149.00 4149.00 4149.00 4149.00 4149.00 4149.00 4149.00 4149.00 4149.00 4149.00 4149.00 4149.00 4149.00
Pct.Valid 93.83 99.95 57.12 88.33 99.61 54.93 55.56 98.46 99.45 99.81 85.42 57.41 99.78
skim(final_data)
Data summary
Name final_data
Number of rows 4149
Number of columns 13
_______________________
Column type frequency:
numeric 13
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
work_impacts_the_family 1767 0.57 2.31 0.93 1.00 2.00 2.00 3.00 4.00 ▅▇▁▇▂
family_impacts_the_work 1779 0.57 2.06 0.86 1.00 1.00 2.00 3.00 4.00 ▆▇▁▆▁
physical_health_status 1844 0.56 2.67 6.11 0.00 0.00 0.00 2.00 30.00 ▇▁▁▁▁
mental_health_status 1870 0.55 4.53 7.93 0.00 0.00 0.00 5.00 30.00 ▇▁▁▁▁
marriage_status 16 1.00 3.23 1.74 1.00 1.00 3.00 5.00 5.00 ▆▁▃▁▇
total_people_in_household 8 1.00 1.72 1.33 0.00 1.00 1.00 2.00 8.00 ▇▂▁▁▁
age_group 256 0.94 2.46 1.03 1.00 2.00 2.00 3.00 4.00 ▅▇▁▅▅
education 2 1.00 2.82 1.27 1.00 2.00 2.00 4.00 5.00 ▂▇▂▃▂
family_income 484 0.88 11.22 2.07 1.00 12.00 12.00 12.00 12.00 ▁▁▁▁▇
race 64 0.98 1.51 0.76 1.00 1.00 1.00 2.00 3.00 ▇▁▂▁▂
sex 23 0.99 1.54 0.50 1.00 1.00 2.00 2.00 2.00 ▇▁▁▁▇
working_status 9 1.00 2.02 0.95 1.00 1.00 2.00 3.00 3.00 ▇▁▂▁▇
weight 605 0.85 1.00 1.26 0.07 0.34 0.58 1.06 14.27 ▇▁▁▁▁
summary_df <- summarytools::dfSummary(final_data,
                                     varnumbers=FALSE,
                                     plain.ascii=FALSE,
                                     style="grid",
                                     graph.col = TRUE,
                                     valid.col=FALSE)

# Print the summary table and suppress warnings
print(summary_df,
      method="render",
      table.classes="table-condensed")

Data Frame Summary

final_data

Dimensions: 4149 x 13
Duplicates: 132
Variable Stats / Values Freqs (% of Valid) Graph Missing
work_impacts_the_family [haven_labelled, vctrs_vctr, double]
1. [1] Never
2. [2] Rarely
3. [3] Sometimes
4. [4] Often
525 ( 22.0% )
826 ( 34.7% )
789 ( 33.1% )
242 ( 10.2% )
1767 (42.6%)
family_impacts_the_work [haven_labelled, vctrs_vctr, double]
1. [1] Never
2. [2] Rarely
3. [3] Sometimes
4. [4] Often
704 ( 29.7% )
926 ( 39.1% )
632 ( 26.7% )
108 ( 4.6% )
1779 (42.9%)
physical_health_status [numeric]
Mean (sd) : 2.7 (6.1)
min ≤ med ≤ max:
0 ≤ 0 ≤ 30
IQR (CV) : 2 (2.3)
24 distinct values 1844 (44.4%)
mental_health_status [numeric]
Mean (sd) : 4.5 (7.9)
min ≤ med ≤ max:
0 ≤ 0 ≤ 30
IQR (CV) : 5 (1.8)
29 distinct values 1870 (45.1%)
marriage_status [haven_labelled, vctrs_vctr, double]
1. [1] Never married
2. [2] Separated
3. [3] Divorced
4. [4] Widowed
5. [5] Married
1333 ( 32.3% )
121 ( 2.9% )
669 ( 16.2% )
273 ( 6.6% )
1737 ( 42.0% )
16 (0.4%)
total_people_in_household [haven_labelled, vctrs_vctr, double]
1. [0] NONE
2. [1] 1
3. [2] 2
4. [3] 3
5. [4] 4
6. [5] 5
7. [6] 6
8. [7] 7
9. [8] 8+
40 ( 1.0% )
2727 ( 65.9% )
580 ( 14.0% )
314 ( 7.6% )
270 ( 6.5% )
108 ( 2.6% )
52 ( 1.3% )
24 ( 0.6% )
26 ( 0.6% )
8 (0.2%)
age_group [haven_labelled, vctrs_vctr, double]
1. [1] 18-29 YEARS OLD
2. [2] 30-49 YEARS OLD
3. [3] 50-64 YEARS OLD
4. [4] 64 YEARS OLD OR OVER
750 ( 19.3% )
1408 ( 36.2% )
924 ( 23.7% )
811 ( 20.8% )
256 (6.2%)
education [haven_labelled, vctrs_vctr, double]
1. [1] Less THAN HIGH SCHOOL
2. [2] HIGH SCHOOL
3. [3] ASSOCIATE/JUNIOR COLL
4. [4] BACHELORS
5. [5] GRADUATE
417 ( 10.1% )
1919 ( 46.3% )
367 ( 8.8% )
866 ( 20.9% )
578 ( 13.9% )
2 (0.0%)
family_income [haven_labelled, vctrs_vctr, double]
1. [1] UNDER$1,000
2. [2] $1,000 TO $2,999
3. [3] $3,000 TO $3,999
4. [4] $4,000 TO $4,999
5. [5] $5,000 TOlibra $5,999
6. [6] $6,000 TO $6,999
7. [7] $7,000 TO $7,999
8. [8] $8,000 TO $9,999
9. [9] $10,000 TO $14,999
10. [10] $15,000 TO $19,999
[ 2 others ]
53 ( 1.4% )
37 ( 1.0% )
19 ( 0.5% )
14 ( 0.4% )
20 ( 0.5% )
15 ( 0.4% )
18 ( 0.5% )
46 ( 1.3% )
215 ( 5.9% )
126 ( 3.4% )
3102 ( 84.6% )
484 (11.7%)
race [haven_labelled, vctrs_vctr, double]
1. [1] WHITE
2. [2] BLACK
3. [3] OTHER
2651 ( 64.9% )
775 ( 19.0% )
659 ( 16.1% )
64 (1.5%)
sex [haven_labelled, vctrs_vctr, double]
1. [1] MALE
2. [2] FEMALE
1910 ( 46.3% )
2216 ( 53.7% )
23 (0.6%)
working_status [haven_labelled, vctrs_vctr, double]
1. [3] WORKING FULL TIME
2. [2] WORKING PART TIME
3. [1] OTHER
1904 ( 46.0% )
423 ( 10.2% )
1813 ( 43.8% )
9 (0.2%)
weight [numeric]
Mean (sd) : 1 (1.3)
min ≤ med ≤ max:
0.1 ≤ 0.6 ≤ 14.3
IQR (CV) : 0.7 (1.3)
2269 distinct values 605 (14.6%)

Generated by summarytools 1.1.4 (R version 4.5.1)
2025-12-16

Separate two scales

data_physical_health<-final_data%>% dplyr::select(c(work_impacts_the_family, family_impacts_the_work, physical_health_status, marriage_status, total_people_in_household,  age_group,education, family_income, race, sex, working_status, weight)) %>% mutate(marriage_status= as.factor(marriage_status))%>% mutate(race= as.factor(race))%>% mutate(working_status= as.factor(working_status))


head(data_physical_health)
# A tibble: 6 × 12
  work_impacts_the_family family_impacts_the_work physical_health_status
  <dbl+lbl>               <dbl+lbl>                                <dbl>
1  3 [Sometimes]           2 [Rarely]                                 30
2 NA                      NA                                          NA
3  2 [Rarely]              2 [Rarely]                                  0
4  4 [Often]               1 [Never]                                   4
5 NA                      NA                                          NA
6  4 [Often]               3 [Sometimes]                               0
# ℹ 9 more variables: marriage_status <fct>,
#   total_people_in_household <dbl+lbl>, age_group <dbl+lbl>,
#   education <dbl+lbl>, family_income <dbl+lbl>, race <fct>, sex <dbl+lbl>,
#   working_status <fct>, weight <dbl>
data_mental_health <- final_data%>% dplyr::select(c(work_impacts_the_family, family_impacts_the_work, mental_health_status,marriage_status, total_people_in_household,  age_group,education, family_income, race, sex, working_status, weight))%>% mutate(marriage_status= as.factor(marriage_status)) %>% mutate(race= as.factor(race)) %>% mutate(working_status= as.factor(working_status))

head(data_mental_health)
# A tibble: 6 × 12
  work_impacts_the_family family_impacts_the_work mental_health_status
  <dbl+lbl>               <dbl+lbl>                              <dbl>
1  3 [Sometimes]           2 [Rarely]                               15
2 NA                      NA                                        NA
3  2 [Rarely]              2 [Rarely]                                0
4  4 [Often]               1 [Never]                                10
5 NA                      NA                                        NA
6  4 [Often]               3 [Sometimes]                            NA
# ℹ 9 more variables: marriage_status <fct>,
#   total_people_in_household <dbl+lbl>, age_group <dbl+lbl>,
#   education <dbl+lbl>, family_income <dbl+lbl>, race <fct>, sex <dbl+lbl>,
#   working_status <fct>, weight <dbl>

Count Dependent Variable Tutorials

Negative-Binomial Model

nb_physical_health<- glm.nb(physical_health_status~factor(work_impacts_the_family)+ factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+ factor(age_group)+education+family_income+race+sex+working_status, data=data_physical_health, weights = data_physical_health$weight) 

summary(nb_physical_health)

Call:
glm.nb(formula = physical_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    factor(age_group) + education + family_income + race + sex + 
    working_status, data = data_physical_health, weights = data_physical_health$weight, 
    init.theta = 0.2094728664, link = log)

Coefficients:
                                  Estimate Std. Error z value Pr(>|z|)    
(Intercept)                       1.511360   0.600850   2.515 0.011891 *  
factor(work_impacts_the_family)2 -0.343587   0.185930  -1.848 0.064612 .  
factor(work_impacts_the_family)3 -0.093034   0.190429  -0.489 0.625162    
factor(work_impacts_the_family)4  0.057812   0.236210   0.245 0.806651    
factor(family_impacts_the_work)2  0.012271   0.165973   0.074 0.941062    
factor(family_impacts_the_work)3  0.173197   0.181481   0.954 0.339907    
factor(family_impacts_the_work)4  0.748579   0.296470   2.525 0.011571 *  
marriage_status2                 -0.432965   0.466801  -0.928 0.353660    
marriage_status3                 -0.203963   0.211080  -0.966 0.333901    
marriage_status4                  0.590118   0.401073   1.471 0.141197    
marriage_status5                 -0.563402   0.151358  -3.722 0.000197 ***
total_people_in_household         0.039358   0.037544   1.048 0.294493    
factor(age_group)2                0.390258   0.163958   2.380 0.017302 *  
factor(age_group)3                0.526365   0.188567   2.791 0.005248 ** 
factor(age_group)4                0.050911   0.296104   0.172 0.863487    
education                        -0.072313   0.048874  -1.480 0.138987    
family_income                    -0.002453   0.041675  -0.059 0.953055    
race2                            -0.929795   0.182298  -5.100 3.39e-07 ***
race3                            -0.428670   0.184055  -2.329 0.019857 *  
sex                               0.528623   0.116438   4.540 5.63e-06 ***
working_status2                  -0.804438   0.288735  -2.786 0.005335 ** 
working_status3                  -1.247546   0.262586  -4.751 2.02e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(0.2095) family taken to be 1)

    Null deviance: 1445.1  on 1663  degrees of freedom
Residual deviance: 1323.4  on 1642  degrees of freedom
  (2485 observations deleted due to missingness)
AIC: 5946.7

Number of Fisher Scoring iterations: 1

              Theta:  0.2095 
          Std. Err.:  0.0110 

 2 x log-likelihood:  -5900.7380 
nb_mental_health<- glm.nb(mental_health_status~factor(work_impacts_the_family)+factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+ factor(age_group)+education+family_income+race+sex+working_status, data=data_mental_health, weights = data_mental_health$weight) 

summary(nb_mental_health)

Call:
glm.nb(formula = mental_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    factor(age_group) + education + family_income + race + sex + 
    working_status, data = data_mental_health, weights = data_mental_health$weight, 
    init.theta = 0.3019571903, link = log)

Coefficients:
                                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)                       2.33523    0.48969   4.769 1.85e-06 ***
factor(work_impacts_the_family)2  0.10590    0.15668   0.676  0.49910    
factor(work_impacts_the_family)3  0.31542    0.16066   1.963  0.04961 *  
factor(work_impacts_the_family)4  1.06976    0.20018   5.344 9.10e-08 ***
factor(family_impacts_the_work)2 -0.05445    0.13874  -0.392  0.69470    
factor(family_impacts_the_work)3  0.23840    0.15183   1.570  0.11636    
factor(family_impacts_the_work)4  0.65586    0.25509   2.571  0.01014 *  
marriage_status2                 -0.75306    0.39191  -1.922  0.05466 .  
marriage_status3                 -0.00363    0.17564  -0.021  0.98351    
marriage_status4                  0.09807    0.34077   0.288  0.77350    
marriage_status5                 -0.56577    0.12530  -4.515 6.32e-06 ***
total_people_in_household         0.05490    0.03100   1.771  0.07656 .  
factor(age_group)2               -0.28534    0.13444  -2.122  0.03380 *  
factor(age_group)3               -0.73709    0.15626  -4.717 2.39e-06 ***
factor(age_group)4               -1.55231    0.26130  -5.941 2.84e-09 ***
education                        -0.11652    0.04079  -2.857  0.00428 ** 
family_income                    -0.09528    0.03316  -2.873  0.00406 ** 
race2                            -0.84015    0.14877  -5.647 1.63e-08 ***
race3                            -0.62518    0.15405  -4.058 4.94e-05 ***
sex                               0.64876    0.09718   6.676 2.46e-11 ***
working_status2                  -0.08706    0.24613  -0.354  0.72357    
working_status3                  -0.11122    0.22351  -0.498  0.61875    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(0.302) family taken to be 1)

    Null deviance: 1805.7  on 1655  degrees of freedom
Residual deviance: 1572.1  on 1634  degrees of freedom
  (2493 observations deleted due to missingness)
AIC: 7564.9

Number of Fisher Scoring iterations: 1

              Theta:  0.3020 
          Std. Err.:  0.0144 

 2 x log-likelihood:  -7518.9340 

Basic Poisson Model

pois_physical_health <- glm(physical_health_status~factor(work_impacts_the_family)+factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=data_physical_health, family = poisson, weights = data_physical_health$weight)
summary(pois_physical_health)

Call:
glm(formula = physical_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = poisson, data = data_physical_health, weights = data_physical_health$weight)

Coefficients:
                                  Estimate Std. Error z value Pr(>|z|)    
(Intercept)                       1.609684   0.151005  10.660  < 2e-16 ***
factor(work_impacts_the_family)2 -0.198042   0.050110  -3.952 7.75e-05 ***
factor(work_impacts_the_family)3 -0.126264   0.050317  -2.509 0.012095 *  
factor(work_impacts_the_family)4  0.094246   0.059904   1.573 0.115654    
factor(family_impacts_the_work)2  0.006691   0.046561   0.144 0.885736    
factor(family_impacts_the_work)3  0.308296   0.048031   6.419 1.37e-10 ***
factor(family_impacts_the_work)4  0.414250   0.073017   5.673 1.40e-08 ***
marriage_status2                 -0.496496   0.147905  -3.357 0.000788 ***
marriage_status3                 -0.014599   0.051450  -0.284 0.776602    
marriage_status4                  0.647988   0.078114   8.295  < 2e-16 ***
marriage_status5                 -0.330846   0.040291  -8.211  < 2e-16 ***
total_people_in_household         0.056837   0.009906   5.737 9.61e-09 ***
age_group                         0.087960   0.020990   4.191 2.78e-05 ***
education                        -0.095169   0.013513  -7.043 1.88e-12 ***
family_income                    -0.007990   0.010750  -0.743 0.457325    
race2                            -0.796804   0.061292 -13.000  < 2e-16 ***
race3                            -0.386999   0.055522  -6.970 3.16e-12 ***
sex                               0.376324   0.031882  11.804  < 2e-16 ***
working_status2                  -0.728866   0.054884 -13.280  < 2e-16 ***
working_status3                  -1.066988   0.048152 -22.159  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 13049  on 1663  degrees of freedom
Residual deviance: 11598  on 1644  degrees of freedom
  (2485 observations deleted due to missingness)
AIC: 13825

Number of Fisher Scoring iterations: 6
pois_mental_health <- glm(mental_health_status~factor(work_impacts_the_family)+factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=data_mental_health, family = poisson, weights = data_mental_health$weight)
summary(pois_mental_health)

Call:
glm(formula = mental_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = poisson, data = data_mental_health, weights = data_mental_health$weight)

Coefficients:
                                  Estimate Std. Error z value Pr(>|z|)    
(Intercept)                       2.433581   0.104620  23.261  < 2e-16 ***
factor(work_impacts_the_family)2  0.144862   0.041876   3.459 0.000542 ***
factor(work_impacts_the_family)3  0.293700   0.041728   7.038 1.94e-12 ***
factor(work_impacts_the_family)4  0.903941   0.046361  19.498  < 2e-16 ***
factor(family_impacts_the_work)2  0.049796   0.035719   1.394 0.163282    
factor(family_impacts_the_work)3  0.273578   0.037302   7.334 2.23e-13 ***
factor(family_impacts_the_work)4  0.232450   0.056760   4.095 4.22e-05 ***
marriage_status2                 -0.553176   0.111211  -4.974 6.55e-07 ***
marriage_status3                  0.112812   0.041455   2.721 0.006502 ** 
marriage_status4                  0.540910   0.071381   7.578 3.52e-14 ***
marriage_status5                 -0.254795   0.029143  -8.743  < 2e-16 ***
total_people_in_household         0.017912   0.007326   2.445 0.014493 *  
age_group                        -0.376252   0.017589 -21.391  < 2e-16 ***
education                        -0.116615   0.010390 -11.223  < 2e-16 ***
family_income                    -0.074216   0.006181 -12.007  < 2e-16 ***
race2                            -0.538645   0.039629 -13.592  < 2e-16 ***
race3                            -0.418799   0.042440  -9.868  < 2e-16 ***
sex                               0.574155   0.024507  23.428  < 2e-16 ***
working_status2                  -0.040346   0.053720  -0.751 0.452621    
working_status3                  -0.108636   0.048405  -2.244 0.024813 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 16817  on 1655  degrees of freedom
Residual deviance: 13810  on 1636  degrees of freedom
  (2493 observations deleted due to missingness)
AIC: 16899

Number of Fisher Scoring iterations: 6

Quasi-Poisson Model

quasi_physical_health <-  glm(physical_health_status~factor(work_impacts_the_family)+factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status,family = quasipoisson, data=data_physical_health, weights = data_physical_health$weight)
summary(quasi_physical_health)

Call:
glm(formula = physical_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = quasipoisson, data = data_physical_health, weights = data_physical_health$weight)

Coefficients:
                                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)                       1.609684   0.517519   3.110 0.001901 ** 
factor(work_impacts_the_family)2 -0.198042   0.171735  -1.153 0.249003    
factor(work_impacts_the_family)3 -0.126264   0.172444  -0.732 0.464151    
factor(work_impacts_the_family)4  0.094246   0.205300   0.459 0.646250    
factor(family_impacts_the_work)2  0.006691   0.159572   0.042 0.966560    
factor(family_impacts_the_work)3  0.308296   0.164609   1.873 0.061260 .  
factor(family_impacts_the_work)4  0.414250   0.250243   1.655 0.098036 .  
marriage_status2                 -0.496496   0.506896  -0.979 0.327486    
marriage_status3                 -0.014599   0.176328  -0.083 0.934025    
marriage_status4                  0.647988   0.267709   2.420 0.015607 *  
marriage_status5                 -0.330846   0.138085  -2.396 0.016688 *  
total_people_in_household         0.056837   0.033950   1.674 0.094299 .  
age_group                         0.087960   0.071935   1.223 0.221596    
education                        -0.095169   0.046310  -2.055 0.040034 *  
family_income                    -0.007990   0.036844  -0.217 0.828334    
race2                            -0.796804   0.210058  -3.793 0.000154 ***
race3                            -0.386999   0.190282  -2.034 0.042130 *  
sex                               0.376324   0.109264   3.444 0.000587 ***
working_status2                  -0.728866   0.188098  -3.875 0.000111 ***
working_status3                  -1.066988   0.165026  -6.466 1.33e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasipoisson family taken to be 11.74545)

    Null deviance: 13049  on 1663  degrees of freedom
Residual deviance: 11598  on 1644  degrees of freedom
  (2485 observations deleted due to missingness)
AIC: NA

Number of Fisher Scoring iterations: 6
quasi_mental_health <- glm(mental_health_status~factor(work_impacts_the_family)+factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, family = quasipoisson, data=data_mental_health, weights = data_mental_health$weight)
summary(quasi_mental_health)

Call:
glm(formula = mental_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = quasipoisson, data = data_mental_health, weights = data_mental_health$weight)

Coefficients:
                                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)                       2.43358    0.35176   6.918 6.54e-12 ***
factor(work_impacts_the_family)2  0.14486    0.14080   1.029 0.303694    
factor(work_impacts_the_family)3  0.29370    0.14030   2.093 0.036470 *  
factor(work_impacts_the_family)4  0.90394    0.15588   5.799 7.99e-09 ***
factor(family_impacts_the_work)2  0.04980    0.12010   0.415 0.678461    
factor(family_impacts_the_work)3  0.27358    0.12542   2.181 0.029302 *  
factor(family_impacts_the_work)4  0.23245    0.19084   1.218 0.223386    
marriage_status2                 -0.55318    0.37392  -1.479 0.139226    
marriage_status3                  0.11281    0.13938   0.809 0.418414    
marriage_status4                  0.54091    0.24000   2.254 0.024342 *  
marriage_status5                 -0.25479    0.09798  -2.600 0.009397 ** 
total_people_in_household         0.01791    0.02463   0.727 0.467249    
age_group                        -0.37625    0.05914  -6.362 2.58e-10 ***
education                        -0.11662    0.03494  -3.338 0.000863 ***
family_income                    -0.07422    0.02078  -3.571 0.000366 ***
race2                            -0.53865    0.13324  -4.043 5.53e-05 ***
race3                            -0.41880    0.14269  -2.935 0.003382 ** 
sex                               0.57415    0.08240   6.968 4.64e-12 ***
working_status2                  -0.04035    0.18062  -0.223 0.823269    
working_status3                  -0.10864    0.16275  -0.667 0.504548    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasipoisson family taken to be 11.30473)

    Null deviance: 16817  on 1655  degrees of freedom
Residual deviance: 13810  on 1636  degrees of freedom
  (2493 observations deleted due to missingness)
AIC: NA

Number of Fisher Scoring iterations: 6

Likelihood Ratio, AIC, BIC

#Create function to compare model fit stats
glm_fit <- function(model) {
  # Calculate Likelihood Ratio
  lr <- logLik(model)
  
  # Calculate AIC
  aic <- AIC(model)
  
  # Calculate BIC
  n <- nobs(model)
  p <- length(coef(model))
  bic <- -2 * logLik(model) + p * log(n)
  
  # Calculate Deviance
  deviance <- summary(model)$deviance
  
  # Return the metrics as a list
  metrics <- data.frame(Likelihood_Ratio = lr, AIC = aic, BIC = bic, Deviance = deviance, Coefficients = p)
  return(metrics)
}
glm_fit(pois_physical_health)
  Likelihood_Ratio      AIC      BIC Deviance Coefficients
1         -6892.74 13825.48 13933.82 11598.05           20
glm_fit(pois_mental_health)
  Likelihood_Ratio      AIC     BIC Deviance Coefficients
1        -8429.281 16898.56 17006.8 13810.46           20
glm_fit(quasi_physical_health)
  Likelihood_Ratio AIC BIC Deviance Coefficients
1               NA  NA  NA 11598.05           20
glm_fit(quasi_mental_health)
  Likelihood_Ratio AIC BIC Deviance Coefficients
1               NA  NA  NA 13810.46           20
glm_fit(nb_physical_health)
  Likelihood_Ratio      AIC      BIC Deviance Coefficients
1        -2950.369 5946.738 6063.912 1323.441           22
glm_fit(nb_mental_health)
  Likelihood_Ratio      AIC      BIC Deviance Coefficients
1        -3759.467 7564.934 7682.002 1572.057           22

Overall Table

stargazer(nb_physical_health, nb_mental_health, type="text",align=TRUE,  dep.var.labels=c("Physical Health Status","Mental Health Status"), covariate.labels=c("work impacts the family: Rarely","work impacts the family: Sometimes", "work impacts the family: Often", "Family impacts the work:Rarely","Family impacts the work:Sometimes","Family impacts the work:Often", "Marriage status:Separated","Marriage status:Divorced", "Marriage status:Widowed","Marriage status:Married","Total people in household", "Age group:30-49 YEARS OLD","Age group:50-64 YEARS OLD", "Age group:64 YEARS OLD OR OVER", "Education", "Family income", "Race:BLACK", "Race:OTHER","Sex", "Working Status:WORKING PART TIME", "Working Status:WORKING FULL TIME" ))

==============================================================================
                                               Dependent variable:            
                                   -------------------------------------------
                                   Physical Health Status Mental Health Status
                                            (1)                   (2)         
------------------------------------------------------------------------------
work impacts the family: Rarely           -0.344*                0.106        
                                          (0.186)               (0.157)       
                                                                              
work impacts the family: Sometimes         -0.093               0.315**       
                                          (0.190)               (0.161)       
                                                                              
work impacts the family: Often             0.058                1.070***      
                                          (0.236)               (0.200)       
                                                                              
Family impacts the work:Rarely             0.012                 -0.054       
                                          (0.166)               (0.139)       
                                                                              
Family impacts the work:Sometimes          0.173                 0.238        
                                          (0.181)               (0.152)       
                                                                              
Family impacts the work:Often             0.749**               0.656**       
                                          (0.296)               (0.255)       
                                                                              
Marriage status:Separated                  -0.433               -0.753*       
                                          (0.467)               (0.392)       
                                                                              
Marriage status:Divorced                   -0.204                -0.004       
                                          (0.211)               (0.176)       
                                                                              
Marriage status:Widowed                    0.590                 0.098        
                                          (0.401)               (0.341)       
                                                                              
Marriage status:Married                  -0.563***             -0.566***      
                                          (0.151)               (0.125)       
                                                                              
Total people in household                  0.039                 0.055*       
                                          (0.038)               (0.031)       
                                                                              
Age group:30-49 YEARS OLD                 0.390**               -0.285**      
                                          (0.164)               (0.134)       
                                                                              
Age group:50-64 YEARS OLD                 0.526***             -0.737***      
                                          (0.189)               (0.156)       
                                                                              
Age group:64 YEARS OLD OR OVER             0.051               -1.552***      
                                          (0.296)               (0.261)       
                                                                              
Education                                  -0.072              -0.117***      
                                          (0.049)               (0.041)       
                                                                              
Family income                              -0.002              -0.095***      
                                          (0.042)               (0.033)       
                                                                              
Race:BLACK                               -0.930***             -0.840***      
                                          (0.182)               (0.149)       
                                                                              
Race:OTHER                                -0.429**             -0.625***      
                                          (0.184)               (0.154)       
                                                                              
Sex                                       0.529***              0.649***      
                                          (0.116)               (0.097)       
                                                                              
Working Status:WORKING PART TIME         -0.804***               -0.087       
                                          (0.289)               (0.246)       
                                                                              
Working Status:WORKING FULL TIME         -1.248***               -0.111       
                                          (0.263)               (0.224)       
                                                                              
Constant                                  1.511**               2.335***      
                                          (0.601)               (0.490)       
                                                                              
------------------------------------------------------------------------------
Observations                               1,664                 1,656        
Log Likelihood                           -2,951.369            -3,760.467     
theta                                 0.209*** (0.011)      0.302*** (0.014)  
Akaike Inf. Crit.                        5,946.738             7,564.934      
==============================================================================
Note:                                              *p<0.1; **p<0.05; ***p<0.01

Prediction

physical_pred_j<-ggpredict(nb_physical_health, terms="work_impacts_the_family")
print(physical_pred_j)
# Predicted counts of physical_health_status

work_impacts_the_family | Predicted |      95% CI
-------------------------------------------------
                      1 |      8.46 | 4.59, 15.59
                      2 |      6.00 | 3.23, 11.14
                      3 |      7.71 | 4.13, 14.40
                      4 |      8.96 | 4.56, 17.63

Adjusted for:
*   family_impacts_the_work =     1
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.65
*                      race =     1
*                       sex =  1.48
*            working_status =     1
physical_pred_f<-ggpredict(nb_physical_health, terms="family_impacts_the_work")
print(physical_pred_f)
# Predicted counts of physical_health_status

family_impacts_the_work | Predicted |      95% CI
-------------------------------------------------
                      1 |      8.46 | 4.59, 15.59
                      2 |      8.57 | 4.41, 16.62
                      3 |     10.06 | 5.09, 19.87
                      4 |     17.89 | 7.71, 41.49

Adjusted for:
*   work_impacts_the_family =     1
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.65
*                      race =     1
*                       sex =  1.48
*            working_status =     1
mental_pred_j<-ggpredict(nb_mental_health, terms="work_impacts_the_family")
print(mental_pred_j)
# Predicted counts of mental_health_status

work_impacts_the_family | Predicted |       95% CI
--------------------------------------------------
                      1 |      7.08 |  4.22, 11.87
                      2 |      7.87 |  4.67, 13.27
                      3 |      9.70 |  5.73, 16.44
                      4 |     20.63 | 11.61, 36.65

Adjusted for:
*   family_impacts_the_work =     1
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.66
*                      race =     1
*                       sex =  1.48
*            working_status =     1
mental_pred_f<-ggpredict(nb_mental_health, terms="family_impacts_the_work")
print(mental_pred_f)
# Predicted counts of mental_health_status

family_impacts_the_work | Predicted |      95% CI
-------------------------------------------------
                      1 |      7.08 | 4.22, 11.87
                      2 |      6.70 | 3.83, 11.73
                      3 |      8.98 | 5.06, 15.95
                      4 |     13.64 | 6.65, 27.98

Adjusted for:
*   work_impacts_the_family =     1
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.66
*                      race =     1
*                       sex =  1.48
*            working_status =     1
custom_order <- c("Never", "Rarely"," Sometimes", "Often")

ggplot(physical_pred_j, aes(x = x, y = predicted, fill = as_factor(x))) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Work-to-family conflict", y = "Predicted Count: Days of Poor Physical Health", 
       title = "Predicted Days Of Poor Physical Health By Work-to-family conflict", 
       subtitle = "Results From Negative-Binomial Model",
       fill = "Work-to-family conflict")+
  scale_x_discrete(limits = custom_order)+
  theme(legend.position = "bottom" )

ggplot(physical_pred_f, aes(x = x, y = predicted, fill = as_factor(x))) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Family-to-work conflict", y = "Predicted Count: Days of Poor Physical Health", 
       title = "Predicted Days Of Poor Physical Health By Family-to-work conflict", 
       subtitle = "Results From Negative-Binomial Model",
       fill = "Family-to-work conflict")+
  scale_x_discrete(limits = custom_order)+
  theme(legend.position = "bottom" )

ggplot(mental_pred_j, aes(x = x, y = predicted, fill = as_factor(x))) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Work-to-family conflict", y = "Predicted Count: Days Of Poor Mental Health", 
       title = "Predicted Days Of Poor Mental Health By Work-to-family conflict", 
       subtitle = "Results From Negative-Binomial Model",
       fill = "Work-to-family conflict")+
  scale_x_discrete(limits = custom_order)+
  theme(legend.position = "bottom" )

ggplot(mental_pred_f, aes(x = x, y = predicted, fill = as_factor(x))) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Family-to-work conflict", y = "Predicted Count: Days Of Poor Mental Health", 
       title = "Predicted Days Of Poor Mental Health By Family-to-work conflict", 
       subtitle = "Results From Negative-Binomial Model",
       fill= "Family-to-work conflict")+
  scale_x_discrete(limits = custom_order)+
  theme(legend.position = "bottom" )

Hypothetical Outcome Plot (HOP)

Reference:

https://stats.stackexchange.com/questions/225544/simulating-an-overdispersed-negative-binomial-distribution#:~:text=Since%20you%20have%20the%20mean,/size’%20in%20this%20parametrization.&text=Note%20especially%20the%20right%20tail,strong%20position%20to%20argue%20from.

https://library.virginia.edu/data/articles/getting-started-with-negative-binomial-regression-modeling#:~:text=One%20approach%20that%20addresses%20this,k%2C%20called%20the%20dispersion%20parameter.

Var=μ+μ2/k

k=μ2/(Var−μ)

k≈μ2/sd2 [avoid negative number]

https://www.statology.org/r-pmax-pmin/ https://deepr.gagolewski.com/chapter/120-numeric.html Keep the simulation data in the range of 0 to 30

data_physical_health <- data_physical_health %>% 
  mutate(work_impacts_the_family = as.numeric(work_impacts_the_family),
         family_impacts_the_work = as.numeric(family_impacts_the_work)) %>%
  filter(!is.na(work_impacts_the_family),
         !is.na(physical_health_status))
  

data_physical_health <- data_physical_health %>%
  mutate(work_impacts_the_family = factor(work_impacts_the_family,
                                           levels = c(1,2,3,4),
                                           labels = c("Never", "Rarely", "Sometimes", "Often"))) %>%
  mutate(family_impacts_the_work = factor(family_impacts_the_work,
                                           levels = c(1,2,3,4),
                                           labels = c("Never", "Rarely", "Sometimes", "Often"))) 

# Summarize the data (assuming CO2_sum is already created)
data_physical_health_sum <- data_physical_health %>% group_by(work_impacts_the_family) %>% dplyr::summarise(mean = mean (physical_health_status, na.rm = TRUE),
            sd = sd(physical_health_status, na.rm = TRUE), .groups = "drop"
  ) %>%
  mutate(
    var = sd^2,
    size = ifelse(var > mean,
                  mean^2 / (var - mean),
                  NA_real_)
  )
# Simulate hypothetical outcomes
set.seed(01002)  # For reproducibility
n_simulations <- 100 # Number of hypothetical outcomes

# Create a data frame with simulated outcomes for each Type
simulated_data <- data_physical_health_sum %>%
  rowwise() %>%
do(data.frame(
    work_impacts_the_family = .$work_impacts_the_family,
    outcome = pmin(
      rnbinom(n_simulations, mu = .$mean, size = .$size),
      30),
    sim_id = 1:n_simulations
  ))

# Create an animated plot using ggplot and gganimmated
g <- ggplot (simulated_data, aes(x = work_impacts_the_family, y = outcome, fill = work_impacts_the_family))+
  geom_col(data = data_physical_health_sum, aes(y = mean), alpha = 0.4, color = "black") +
  geom_point(aes(color = work_impacts_the_family), size = 2, position = position_jitter(width = 0.1, height = 0)) +
  labs (title = "Hypothetical Outcome Plot: Days of Poor Physical Health",
        subtitle = "Simulation {frame} of {nframes}",
        x = "Work-to-family conflict",
        y = "Simulated Days of Poor Physical Health") + 
  theme_minimal()+
  theme(legend.position = "none") +
  transition_manual(sim_id)

animate(g, nframes = n_simulations, fps =1)

# Summarize the data (assuming CO2_sum is already created)
data_physical_health_sum_1 <- data_physical_health %>% filter(!is.na(family_impacts_the_work)) %>% group_by(family_impacts_the_work) %>%
   dplyr::summarise(mean = mean (physical_health_status, na.rm = TRUE),
            sd = sd(physical_health_status, na.rm = TRUE),.groups = "drop"
  ) %>%
  mutate(
    var = sd^2,
    size = ifelse(var > mean,
                  mean^2 / (var - mean),
                  NA_real_))
# Simulate hypothetical outcomes
set.seed(01002)  # For reproducibility
n_simulations <- 100 # Number of hypothetical outcomes

# Create a data frame with simulated outcomes for each Type
simulated_data_1 <- data_physical_health_sum_1 %>%
  rowwise() %>%
  do(data.frame(
    family_impacts_the_work = .$family_impacts_the_work,
    outcome = pmin(
      rnbinom(n_simulations, mu = .$mean, size = .$size),
      30),
    sim_id = 1:n_simulations
  ))

# Create an animated plot using ggplot and gganimmated
g <- ggplot (simulated_data_1, aes(x = family_impacts_the_work, y = outcome, fill = family_impacts_the_work))+
  geom_col(data = data_physical_health_sum_1, aes(y = mean), alpha = 0.4, color = "black") +
  geom_point(aes(color = family_impacts_the_work), size = 2, position = position_jitter(width = 0.1, height = 0)) +
  labs (title = "Hypothetical Outcome Plot: Days of Poor Physical Health",
        subtitle = "Simulation {frame} of {nframes}",
        x = "Family-to-work conflict",
        y = "Simulated Days of Poor Physical Health") + 
  theme_minimal()+
  theme(legend.position = "none") +
  transition_manual(sim_id)

animate(g, nframes = n_simulations, fps =1)

data_mental_health <- data_mental_health %>% 
  mutate(work_impacts_the_family = as.numeric(work_impacts_the_family),
         family_impacts_the_work = as.numeric(family_impacts_the_work))%>%
  filter(!is.na(work_impacts_the_family),
         !is.na(mental_health_status))

data_mental_health <- data_mental_health %>%
  mutate(work_impacts_the_family = factor(work_impacts_the_family,
                                           levels = c(1,2,3,4),
                                           labels = c("Never", "Rarely", "Sometimes", "Often"))) %>%
  mutate(family_impacts_the_work = factor(family_impacts_the_work,
                                           levels = c(1,2,3,4),
                                           labels = c("Never", "Rarely", "Sometimes", "Often"))) 

# Summarize the data (assuming CO2_sum is already created)
data_mental_health_sum <- data_mental_health |> group_by(work_impacts_the_family) |>
   dplyr::summarise(mean = mean (mental_health_status, na.rm = TRUE),
            sd = sd(mental_health_status, na.rm = TRUE),.groups = "drop"
  ) %>%
  mutate(
    var = sd^2,
    size = ifelse(var > mean,
                  mean^2 / (var - mean),
                  NA_real_))
# Simulate hypothetical outcomes
set.seed(01002)  # For reproducibility
n_simulations <- 100 # Number of hypothetical outcomes

# Create a data frame with simulated outcomes for each Type
simulated_data_3 <- data_mental_health_sum %>%
  rowwise() %>%
  do(data.frame(
    work_impacts_the_family = .$work_impacts_the_family,
    outcome = pmin(
      rnbinom(n_simulations, mu = .$mean, size = .$size),
      30),
    sim_id = 1:n_simulations
  ))

# Create an animated plot using ggplot and gganimmated
g <- ggplot (simulated_data_3, aes(x = work_impacts_the_family, y = outcome, fill = work_impacts_the_family))+
  geom_col(data = data_mental_health_sum, aes(y = mean), alpha = 0.4, color = "black") +
  geom_point(aes(color = work_impacts_the_family), size = 2, position = position_jitter(width = 0.1, height = 0)) +
  labs (title = "Hypothetical Outcome Plot: Days of Poor Mental Health",
        subtitle = "Simulation {frame} of {nframes}",
        x = "Work-to-family conflict",
        y = "Simulated Days of Poor Mental Health") + 
  theme_minimal()+
  theme(legend.position = "none") +
  transition_manual(sim_id)

animate(g, nframes = n_simulations, fps =1)

# Summarize the data (assuming CO2_sum is already created)
data_mental_health_sum_1 <- data_mental_health  %>% filter(!is.na(family_impacts_the_work)) %>% group_by(family_impacts_the_work) %>%
   dplyr::summarise(mean = mean (mental_health_status, na.rm = TRUE),
            sd = sd(mental_health_status, na.rm = TRUE),.groups = "drop"
  ) %>%
  mutate(
    var = sd^2,
    size = ifelse(var > mean,
                  mean^2 / (var - mean),
                  NA_real_))
# Simulate hypothetical outcomes
set.seed(01002)  # For reproducibility
n_simulations <- 100 # Number of hypothetical outcomes

# Create a data frame with simulated outcomes for each Type
simulated_data_4 <- data_mental_health_sum_1 |>
  rowwise() %>%
  do(data.frame(
    family_impacts_the_work = .$family_impacts_the_work,
    outcome = pmin(
      rnbinom(n_simulations, mu = .$mean, size = .$size),
      30),
    sim_id = 1:n_simulations
  ))

# Create an animated plot using ggplot and gganimmated
g <- ggplot (simulated_data_4, aes(x = family_impacts_the_work, y = outcome, fill = family_impacts_the_work))+
  geom_col(data = data_mental_health_sum_1, aes(y = mean), alpha = 0.4, color = "black") +
  geom_point(aes(color = family_impacts_the_work), size = 2, position = position_jitter(width = 0.1, height = 0)) +
  labs (title = "Hypothetical Outcome Plot: Days of Poor Mental Health",
        subtitle = "Simulation {frame} of {nframes}",
        x = "Family-to-work conflict",
        y = "Simulated Days of Poor Mental Health") +
  theme_minimal()+
  theme(legend.position = "none") +
  transition_manual(sim_id)

animate(g, nframes = n_simulations, fps =1)

Justify the combine of two variables

items_to_check <- final_data[, c("work_impacts_the_family", "family_impacts_the_work")]
items_to_check <- items_to_check %>% mutate %>% mutate(work_impacts_the_family = as.numeric(work_impacts_the_family)) %>% mutate(family_impacts_the_work = as.numeric(family_impacts_the_work))

Cronbach’s Alpha

Cronbach’s Alpha shows how closely related a set of items are as a group.

raw_alpha <- psych::alpha(items_to_check)
raw_alpha

Reliability analysis   
Call: psych::alpha(x = items_to_check)

  raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
      0.68      0.68    0.52      0.52 2.2 0.0098  2.2 0.78     0.52

    95% confidence boundaries 
         lower alpha upper
Feldt     0.66  0.68   0.7
Duhachek  0.66  0.68   0.7

 Reliability if an item is dropped:
                        raw_alpha std.alpha G6(smc) average_r S/N alpha se
work_impacts_the_family      0.56      0.52    0.27      0.52 1.1       NA
family_impacts_the_work      0.48      0.52    0.27      0.52 1.1       NA
                        var.r med.r
work_impacts_the_family     0  0.52
family_impacts_the_work     0  0.52

 Item statistics 
                           n raw.r std.r r.cor r.drop mean   sd
work_impacts_the_family 2382  0.88  0.87  0.63   0.52  2.3 0.93
family_impacts_the_work 2370  0.86  0.87  0.63   0.52  2.1 0.86

Non missing response frequency for each item
                           1    2    3    4 miss
work_impacts_the_family 0.22 0.35 0.33 0.10 0.43
family_impacts_the_work 0.30 0.39 0.27 0.05 0.43

Correlation

cor(items_to_check, use = "pairwise.complete.obs") # https://bwlewis.github.io/covar/missing.html
                        work_impacts_the_family family_impacts_the_work
work_impacts_the_family               1.0000000               0.5190463
family_impacts_the_work               0.5190463               1.0000000
r_matrix <- corr.test(items_to_check)$r

cor.plot(r_matrix)

Eigenvalues

sum(eigen(r_matrix)$values >1)
[1] 1

Factor Analysis

fa_1 <- fa(r_matrix, nfactors = 1, rotate = "oblimin", fm = "pa")

fa_1
Factor Analysis using method =  pa
Call: fa(r = r_matrix, nfactors = 1, rotate = "oblimin", fm = "pa")
Standardized loadings (pattern matrix) based upon correlation matrix
                         PA1   h2   u2 com
work_impacts_the_family 0.72 0.52 0.48   1
family_impacts_the_work 0.72 0.52 0.48   1

                PA1
SS loadings    1.04
Proportion Var 0.52

Mean item complexity =  1
Test of the hypothesis that 1 factor is sufficient.

df null model =  1  with the objective function =  0.31
df of  the model are -1  and the objective function was  0 

The root mean square of the residuals (RMSR) is  0 
The df corrected root mean square of the residuals is  NA 

Fit based upon off diagonal values = 1
Measures of factor score adequacy             
                                                   PA1
Correlation of (regression) scores with factors   0.83
Multiple R square of scores with factors          0.68
Minimum correlation of possible factor scores     0.37
fa.diagram(fa_1)

Overall Summary table

fa_table <- data.frame(
  "Variable" = rownames(fa_1$loadings[]),
  "Raw alpha Total" = round(raw_alpha$total[1],2),
  "Raw alpha Drop" = round(raw_alpha$alpha.drop[1],2),
  "Correlation" = round(fa_1$r[2],2),
  "Principal Axis Factor 1" = round(fa_1$loadings[,1],2),
  "SS Loadings" = round(fa_1$Vaccounted["SS loadings", 1],2),
  "Proportion Variance" = round(fa_1$Vaccounted["Proportion Var", 1],2)
)

fa_table <- fa_table %>% pivot_longer(cols = c(raw_alpha,raw_alpha.1,Correlation,Principal.Axis.Factor.1,SS.Loadings ,Proportion.Variance), names_to = "Measure",values_to = "Value") %>% pivot_wider(names_from = Variable, values_from = Value)

kableExtra::kable(fa_table, caption = "Table 2. Summary for analysis Work–Family Conflict Variables Combination",digits = 2,align = "lcc",format="simple",table.envir = "table*", col.names = c("Measure ","Work Impacts The Family", "Family Impacts The Work")) 
Table 2. Summary for analysis Work–Family Conflict Variables Combination
Measure Work Impacts The Family Family Impacts The Work
raw_alpha 0.68 0.68
raw_alpha.1 0.56 0.48
Correlation 0.52 0.52
Principal.Axis.Factor.1 0.72 0.72
SS.Loadings 1.04 1.04
Proportion.Variance 0.52 0.52