Final Project

Author

Jingyi Yang

Prepare

library(haven)
library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.2     ✔ stringr   1.5.1
✔ lubridate 1.9.4     ✔ tibble    3.2.1
✔ purrr     1.0.4     ✔ tidyr     1.3.1

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(semPlot)
library(lavaan)

This is lavaan 0.6-19
lavaan is FREE software! Please report any bugs.

library(psych)


Attaching package: 'psych'

The following object is masked from 'package:lavaan':

    cor2cov

The following objects are masked from 'package:ggplot2':

    %+%, alpha

library(skimr) 
library(corrplot)

corrplot 0.95 loaded

library(patchwork) #Merge GGPlots together 
library(ggplot2) #Graphing
library(jtools) #Tabular Regression Results
library(descr) #Easy Frequency Tables  
library(stats) #Imports survey data 
library(ggeffects) #Predicted Probabilities from Regressions
library(nnet) #For multinomial models
library(MASS) #For ordered models


Attaching package: 'MASS'

The following object is masked from 'package:patchwork':

    area

The following object is masked from 'package:dplyr':

    select

library(brant) #Test parallel regression assumption 
library(boot) #Create CIs for Multinomial Modeling


Attaching package: 'boot'

The following object is masked from 'package:psych':

    logit

library(cem) #Coarsened Exact Matching

Loading required package: tcltk
Loading required package: lattice

Attaching package: 'lattice'

The following object is masked from 'package:boot':

    melanoma


How to use CEM? Type vignette("cem")

library(MatchIt)  #Coarsened Exact Matching 
library(stargazer)  #Regression Output


Please cite as: 

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

library(jtools)  #Regression Output
library(WeightIt) #Entropy Balancing
library(Hmisc) #General Modeling


Attaching package: 'Hmisc'

The following object is masked from 'package:jtools':

    %nin%

The following object is masked from 'package:psych':

    describe

The following objects are masked from 'package:dplyr':

    src, summarize

The following objects are masked from 'package:base':

    format.pval, units

library(ebal) #Entropy Balancing

##
## ebal Package: Implements Entropy Balancing.

## See http://www.stanford.edu/~jhain/ for additional information.

library(survey) #Applying EB weights

Loading required package: grid
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

Loading required package: survival

Attaching package: 'survival'

The following object is masked from 'package:boot':

    aml


Attaching package: 'survey'

The following object is masked from 'package:Hmisc':

    deff

The following object is masked from 'package:WeightIt':

    calibrate

The following object is masked from 'package:graphics':

    dotchart

library(gganimate)

Warning: package 'gganimate' was built under R version 4.5.2

Data Import

GSS2022 <- read_dta("~/研一下/DACSS 790Q/Final Project/2022_stata/2022/GSS2022.dta")
head(GSS2022)

# A tibble: 6 × 1,185
  year         id wrkstat hrs1        hrs2        evwork      wrkslf  occ10     
  <dbl+lbl> <dbl> <dbl+l> <dbl+lbl>   <dbl+lbl>   <dbl+lbl>   <dbl+l> <dbl+lbl> 
1 2022          1 1 [wor…    40       NA(i) [iap] NA(i) [iap] 2 [som…  430 [man…
2 2022          2 5 [ret… NA(i) [iap] NA(i) [iap]     1 [yes] 2 [som…   50 [mar…
3 2022          3 1 [wor…    52       NA(i) [iap] NA(i) [iap] 2 [som… 4610 [per…
4 2022          4 3 [wit… NA(i) [iap]    25       NA(i) [iap] 2 [som… 4120 [foo…
5 2022          5 8 [oth… NA(i) [iap] NA(i) [iap]     1 [yes] 2 [som… 7330 [ind…
6 2022          6 1 [wor…    50       NA(i) [iap] NA(i) [iap] 2 [som… 4610 [per…
# ℹ 1,177 more variables: prestg10 <dbl+lbl>, prestg105plus <dbl+lbl>,
#   indus10 <dbl+lbl>, marital <dbl+lbl>, martype <dbl+lbl>, divorce <dbl+lbl>,
#   widowed <dbl+lbl>, spwrksta <dbl+lbl>, sphrs1 <dbl+lbl>, sphrs2 <dbl+lbl>,
#   spevwork <dbl+lbl>, cowrksta <dbl+lbl>, coevwork <dbl+lbl>,
#   cohrs1 <dbl+lbl>, cohrs2 <dbl+lbl>, spwrkslf <dbl+lbl>, sppres80 <dbl+lbl>,
#   spocc10 <dbl+lbl>, sppres10 <dbl+lbl>, sppres105plus <dbl+lbl>,
#   spind10 <dbl+lbl>, coocc10 <dbl+lbl>, coind10 <dbl+lbl>, …

Setting data set

# Change age to age group! 
age_breaks <- c(18, 30, 50, 65, 100)

GSS2022$age_group <- cut(GSS2022$age, 
                      age_breaks, labels = c("18-29", "30-49", "50-64", "65+"), 
                      include.lowest = TRUE) 
                      
GSS2022<- GSS2022 %>% mutate(age_group= recode(age_group,"18-29"="1", "30-49"="2", "50-64"="3", "65+"="4"))

test_data <- GSS2022 %>% dplyr::select(c(wkvsfam, famvswk, physhlth, mntlhlth, marital,hompop_exp, age_group, degree, income, race, sex, wrkstat, wtssps))%>% mutate(wkvsfam= as.numeric(wkvsfam),famvswk= as.numeric(famvswk), physhlth= as.numeric(physhlth), mntlhlth= as.numeric(mntlhlth), marital= as.numeric(marital), hompop_exp= as.numeric(hompop_exp), age_group= as.numeric(age_group), degree= as.numeric(degree), income= as.numeric(income), race= as.numeric(race), sex= as.numeric(sex), wrkstat=as.numeric(wrkstat), wtssps=as.numeric(wtssps))

new_names <- c("work_impacts_the_family", "family_impacts_the_work", "physical_health_status", "mental_health_status", "marriage_status", "total_people_in_household", "age_group","education", "family_income", "race", "sex", "working_status", "weight" )

colnames(test_data) <- new_names #Apply new names to your data frame
skim(test_data)

Data summary
Name	test_data
Number of rows	4149
Number of columns	13
_______________________
Column type frequency:
numeric	13
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
work_impacts_the_family	1767	0.57	2.69	0.93	1.00	2.00	3.00	3.00	4.00	▂▇▁▇▅
family_impacts_the_work	1779	0.57	2.94	0.86	1.00	2.00	3.00	4.00	4.00	▁▆▁▇▆
physical_health_status	1844	0.56	2.67	6.11	0.00	0.00	0.00	2.00	30.00	▇▁▁▁▁
mental_health_status	1870	0.55	4.53	7.93	0.00	0.00	0.00	5.00	30.00	▇▁▁▁▁
marriage_status	16	1.00	2.77	1.74	1.00	1.00	3.00	5.00	5.00	▇▁▃▁▆
total_people_in_household	8	1.00	1.75	1.67	0.00	1.00	1.00	2.00	49.00	▇▁▁▁▁
age_group	256	0.94	2.46	1.03	1.00	2.00	2.00	3.00	4.00	▅▇▁▅▅
education	2	1.00	1.82	1.27	0.00	1.00	1.00	3.00	4.00	▂▇▂▃▂
family_income	484	0.88	11.22	2.07	1.00	12.00	12.00	12.00	12.00	▁▁▁▁▇
race	64	0.98	1.51	0.76	1.00	1.00	1.00	2.00	3.00	▇▁▂▁▂
sex	23	0.99	1.54	0.50	1.00	1.00	2.00	2.00	2.00	▇▁▁▁▇
working_status	9	1.00	3.07	2.34	1.00	1.00	2.00	5.00	8.00	▇▁▃▁▂
weight	605	0.85	1.00	1.26	0.07	0.34	0.58	1.06	14.27	▇▁▁▁▁

# Reverse Coding
final_data <- test_data %>%
  mutate(
    work_impacts_the_family = case_when(
      work_impacts_the_family == 1 ~ 4,
      work_impacts_the_family == 2 ~ 3,
      work_impacts_the_family == 3 ~ 2,
      work_impacts_the_family == 4 ~ 1,
      work_impacts_the_family == "NA"~ NA_real_),
    work_impacts_the_family = labelled(work_impacts_the_family, 
      c(`Never` = 1, `Rarely` = 2, `Sometimes` = 3, `Often` = 4))) %>% 
  mutate(family_impacts_the_work = as.numeric(family_impacts_the_work),
    family_impacts_the_work = case_when(
      family_impacts_the_work == 1 ~ 4,
      family_impacts_the_work == 2 ~ 3,
      family_impacts_the_work == 3 ~ 2,
      family_impacts_the_work == 4 ~ 1,
      family_impacts_the_work == "NA"~ NA_real_),
    family_impacts_the_work = labelled(family_impacts_the_work, 
      c(`Never` = 1, `Rarely` = 2, `Sometimes` = 3, `Often` = 4))) %>%
  mutate(marriage_status= case_when(
    marriage_status== 1 ~ 5, 
    marriage_status== 2 ~ 4,
    marriage_status== 3 ~ 3,
    marriage_status== 4 ~ 2,
    marriage_status== 5 ~ 1,
    marriage_status== "NA"~ NA_real_),
    marriage_status=labelled(marriage_status, c(`Never married`=1, `Separated`=2, `Divorced`=3, `Widowed`=4, `Married`=5))) %>%
  mutate(age_group= case_when(
    age_group== 1 ~ 1,
    age_group== 2 ~ 2, 
    age_group== 3 ~ 3,
    age_group== 4 ~ 4,
    age_group== "NA"~ NA_real_),
    age_group=labelled(age_group, c(`18-29 YEARS OLD`=1, `30-49 YEARS OLD`=2, `50-64 YEARS OLD`=3, `64 YEARS OLD OR OVER`=4)))%>% 
  mutate(education= case_when(
    education== 0 ~ 1,
    education== 1 ~ 2,
    education== 2 ~ 3, 
    education== 3 ~ 4, 
    education== 4 ~ 5,
    education== "NA"~ NA_real_),
    education= labelled(education, c(`Less THAN HIGH SCHOOL`= 1, `HIGH SCHOOL`= 2, `ASSOCIATE/JUNIOR COLLEGE`=3, `BACHELORS`=4, `GRADUATE`= 5)))%>%
  mutate(family_income= case_when(
    family_income== 1 ~ 1, 
    family_income== 2 ~ 2, 
    family_income== 3 ~ 3, 
    family_income== 4 ~ 4, 
    family_income== 5 ~ 5, 
    family_income== 6 ~ 6, 
    family_income== 7 ~ 7, 
    family_income== 8 ~ 8, 
    family_income== 9 ~ 9,
    family_income== 10 ~ 10, 
    family_income== 11 ~ 11, 
    family_income== 12 ~ 12,
    family_income== "NA"~ NA_real_),
    family_income= labelled(family_income, c(`UNDER$1,000`= 1,`$1,000 TO $2,999`= 2, `$3,000 TO $3,999`=3, `$4,000 TO $4,999`=4, `$5,000 TOlibra $5,999`= 5, `$6,000 TO $6,999`=6, `$7,000 TO $7,999`=7, `$8,000 TO $9,999`=8, `$10,000 TO $14,999`= 9, `$15,000 TO $19,999`= 10,`$20,000 TO $24,999`= 11,  `$25,000 OR MORE`= 12))) %>%
  mutate(race= case_when(
    race== 1 ~ 1, 
    race== 2 ~ 2,
    race== 3 ~ 3,
    race== "NA"~ NA_real_),
  race= labelled(race, c(`WHITE`= 1, `BLACK`= 2, `OTHER`=3))) %>%
  mutate(sex= case_when(
    sex== 1 ~ 1, 
    sex== 2 ~ 2,
    sex== "NA"~ NA_real_),
    sex= labelled(sex, c(`MALE`=1, `FEMALE`=2)))%>%
  mutate(total_people_in_household = case_when(
    total_people_in_household == 0 ~ 0,
    total_people_in_household == 1 ~ 1,
    total_people_in_household == 2 ~ 2,
    total_people_in_household == 3 ~ 3,
    total_people_in_household == 4 ~ 4,
    total_people_in_household == 5 ~ 5,
    total_people_in_household == 6 ~ 6,
    total_people_in_household == 7 ~ 7,
    total_people_in_household >= 8 ~ 8,
    total_people_in_household == "NA"~ NA_real_),
    total_people_in_household = labelled(total_people_in_household, 
      c(`NONE`=0, `1`=1, `2`=2, `3`=3, `4`=4, `5`=5, `6`=6, `7`=7, `8+`=8)))%>%
  mutate(working_status = case_when(
    working_status == 1 ~ 3,
    working_status == 2 ~ 2,
    working_status == 3 ~ 1,
    working_status == 4 ~ 1,
    working_status == 5 ~ 1,
    working_status == 6 ~ 1,
    working_status == 7 ~ 1,
    working_status == 8 ~ 1,
    working_status == "NA"~ NA_real_),
    working_status = labelled(working_status,
      c(`WORKING FULL TIME`=3, `WORKING PART TIME`=2,`OTHER`=1)))
head(final_data)

# A tibble: 6 × 13
  work_impacts_the_family family_impacts_the_work physical_health_status
  <dbl+lbl>               <dbl+lbl>                                <dbl>
1  3 [Sometimes]           2 [Rarely]                                 30
2 NA                      NA                                          NA
3  2 [Rarely]              2 [Rarely]                                  0
4  4 [Often]               1 [Never]                                   4
5 NA                      NA                                          NA
6  4 [Often]               3 [Sometimes]                               0
# ℹ 10 more variables: mental_health_status <dbl>, marriage_status <dbl+lbl>,
#   total_people_in_household <dbl+lbl>, age_group <dbl+lbl>,
#   education <dbl+lbl>, family_income <dbl+lbl>, race <dbl+lbl>,
#   sex <dbl+lbl>, working_status <dbl+lbl>, weight <dbl>

str(final_data)

tibble [4,149 × 13] (S3: tbl_df/tbl/data.frame)
 $ work_impacts_the_family  : dbl+lbl [1:4149]  3, NA,  2,  4, NA,  4,  4,  2,  3, NA, NA,  1,  2,  ...
   ..@ labels: Named num [1:4] 1 2 3 4
   .. ..- attr(*, "names")= chr [1:4] "Never" "Rarely" "Sometimes" "Often"
 $ family_impacts_the_work  : dbl+lbl [1:4149]  2, NA,  2,  1, NA,  3,  2,  2,  4, NA, NA,  2,  2,  ...
   ..@ labels: Named num [1:4] 1 2 3 4
   .. ..- attr(*, "names")= chr [1:4] "Never" "Rarely" "Sometimes" "Often"
 $ physical_health_status   : num [1:4149] 30 NA 0 4 NA 0 2 0 5 NA ...
 $ mental_health_status     : num [1:4149] 15 NA 0 10 NA NA 14 0 5 NA ...
 $ marriage_status          : dbl+lbl [1:4149] 3, 5, 3, 1, 1, 1, 1, 5, 1, 1, 4, 1, 5, 1, 1, 3, 1, 5,...
   ..@ labels: Named num [1:5] 1 2 3 4 5
   .. ..- attr(*, "names")= chr [1:5] "Never married" "Separated" "Divorced" "Widowed" ...
 $ total_people_in_household: dbl+lbl [1:4149] 1, 1, 3, 1, 3, 1, 3, 2, 2, 1, 2, 1, 1, 1, 3, 1, 1, 3,...
   ..@ labels: Named num [1:9] 0 1 2 3 4 5 6 7 8
   .. ..- attr(*, "names")= chr [1:9] "NONE" "1" "2" "3" ...
 $ age_group                : dbl+lbl [1:4149]  4,  4,  3,  1,  3,  1,  1,  2,  2,  4,  3,  1,  2,  ...
   ..@ labels: Named num [1:4] 1 2 3 4
   .. ..- attr(*, "names")= chr [1:4] "18-29 YEARS OLD" "30-49 YEARS OLD" "50-64 YEARS OLD" "64 YEARS OLD OR OVER"
 $ education                : dbl+lbl [1:4149] 4, 5, 2, 4, 2, 2, 2, 5, 2, 2, 2, 4, 2, 2, 2, 2, 2, 4,...
   ..@ labels: Named num [1:5] 1 2 3 4 5
   .. ..- attr(*, "names")= chr [1:5] "Less THAN HIGH SCHOOL" "HIGH SCHOOL" "ASSOCIATE/JUNIOR COLLEGE" "BACHELORS" ...
 $ family_income            : dbl+lbl [1:4149] 12, NA, 12, 12, 12, 12, 12, 12, 11,  9, 12, 12, 12,  ...
   ..@ labels: Named num [1:12] 1 2 3 4 5 6 7 8 9 10 ...
   .. ..- attr(*, "names")= chr [1:12] "UNDER$1,000" "$1,000 TO $2,999" "$3,000 TO $3,999" "$4,000 TO $4,999" ...
 $ race                     : dbl+lbl [1:4149]  1,  1,  1,  1,  1,  1,  3,  1,  1, NA,  1,  1,  1,  ...
   ..@ labels: Named num [1:3] 1 2 3
   .. ..- attr(*, "names")= chr [1:3] "WHITE" "BLACK" "OTHER"
 $ sex                      : dbl+lbl [1:4149]  2,  1,  2,  2,  1,  1,  2,  1,  2,  2,  2,  2,  2,  ...
   ..@ labels: Named num [1:2] 1 2
   .. ..- attr(*, "names")= chr [1:2] "MALE" "FEMALE"
 $ working_status           : dbl+lbl [1:4149] 3, 1, 3, 1, 1, 3, 2, 3, 3, 1, 1, 2, 3, 2, 1, 1, 1, 2,...
   ..@ labels: Named num [1:3] 3 2 1
   .. ..- attr(*, "names")= chr [1:3] "WORKING FULL TIME" "WORKING PART TIME" "OTHER"
 $ weight                   : num [1:4149] 0.231 0.577 1.014 0.902 1.084 ...

skim(final_data)

Data summary
Name	final_data
Number of rows	4149
Number of columns	13
_______________________
Column type frequency:
numeric	13
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
work_impacts_the_family	1767	0.57	2.31	0.93	1.00	2.00	2.00	3.00	4.00	▅▇▁▇▂
family_impacts_the_work	1779	0.57	2.06	0.86	1.00	1.00	2.00	3.00	4.00	▆▇▁▆▁
physical_health_status	1844	0.56	2.67	6.11	0.00	0.00	0.00	2.00	30.00	▇▁▁▁▁
mental_health_status	1870	0.55	4.53	7.93	0.00	0.00	0.00	5.00	30.00	▇▁▁▁▁
marriage_status	16	1.00	3.23	1.74	1.00	1.00	3.00	5.00	5.00	▆▁▃▁▇
total_people_in_household	8	1.00	1.72	1.33	0.00	1.00	1.00	2.00	8.00	▇▂▁▁▁
age_group	256	0.94	2.46	1.03	1.00	2.00	2.00	3.00	4.00	▅▇▁▅▅
education	2	1.00	2.82	1.27	1.00	2.00	2.00	4.00	5.00	▂▇▂▃▂
family_income	484	0.88	11.22	2.07	1.00	12.00	12.00	12.00	12.00	▁▁▁▁▇
race	64	0.98	1.51	0.76	1.00	1.00	1.00	2.00	3.00	▇▁▂▁▂
sex	23	0.99	1.54	0.50	1.00	1.00	2.00	2.00	2.00	▇▁▁▁▇
working_status	9	1.00	2.02	0.95	1.00	1.00	2.00	3.00	3.00	▇▁▂▁▇
weight	605	0.85	1.00	1.26	0.07	0.34	0.58	1.06	14.27	▇▁▁▁▁

summary_df <- summarytools::dfSummary(final_data,
                                     varnumbers=FALSE,
                                     plain.ascii=FALSE,
                                     style="grid",
                                     graph.col = TRUE,
                                     valid.col=FALSE)

# Print the summary table and suppress warnings
print(summary_df,
      method="render",
      table.classes="table-condensed")

Data Frame Summary

final_data

Dimensions: 4149 x 13
Duplicates: 132

work_impacts_the_family [haven_labelled, vctrs_vctr, double]

1. [1] Never
2. [2] Rarely
3. [3] Sometimes
4. [4] Often

525	(	22.0%	)
826	(	34.7%	)
789	(	33.1%	)
242	(	10.2%	)

1767 (42.6%)

family_impacts_the_work [haven_labelled, vctrs_vctr, double]

1. [1] Never
2. [2] Rarely
3. [3] Sometimes
4. [4] Often

704	(	29.7%	)
926	(	39.1%	)
632	(	26.7%	)
108	(	4.6%	)

1779 (42.9%)

physical_health_status [numeric]

Mean (sd) : 2.7 (6.1)
min ≤ med ≤ max:
0 ≤ 0 ≤ 30
IQR (CV) : 2 (2.3)

24 distinct values

1844 (44.4%)

mental_health_status [numeric]

Mean (sd) : 4.5 (7.9)
min ≤ med ≤ max:
0 ≤ 0 ≤ 30
IQR (CV) : 5 (1.8)

29 distinct values

1870 (45.1%)

marriage_status [haven_labelled, vctrs_vctr, double]

1. [1] Never married
2. [2] Separated
3. [3] Divorced
4. [4] Widowed
5. [5] Married

1333	(	32.3%	)
121	(	2.9%	)
669	(	16.2%	)
273	(	6.6%	)
1737	(	42.0%	)

16 (0.4%)

total_people_in_household [haven_labelled, vctrs_vctr, double]

1. [0] NONE
2. [1] 1
3. [2] 2
4. [3] 3
5. [4] 4
6. [5] 5
7. [6] 6
8. [7] 7
9. [8] 8+

40	(	1.0%	)
2727	(	65.9%	)
580	(	14.0%	)
314	(	7.6%	)
270	(	6.5%	)
108	(	2.6%	)
52	(	1.3%	)
24	(	0.6%	)
26	(	0.6%	)

8 (0.2%)

age_group [haven_labelled, vctrs_vctr, double]

1. [1] 18-29 YEARS OLD
2. [2] 30-49 YEARS OLD
3. [3] 50-64 YEARS OLD
4. [4] 64 YEARS OLD OR OVER

750	(	19.3%	)
1408	(	36.2%	)
924	(	23.7%	)
811	(	20.8%	)

256 (6.2%)

education [haven_labelled, vctrs_vctr, double]

1. [1] Less THAN HIGH SCHOOL
2. [2] HIGH SCHOOL
3. [3] ASSOCIATE/JUNIOR COLL
4. [4] BACHELORS
5. [5] GRADUATE

417	(	10.1%	)
1919	(	46.3%	)
367	(	8.8%	)
866	(	20.9%	)
578	(	13.9%	)

2 (0.0%)

family_income [haven_labelled, vctrs_vctr, double]

1. [1] UNDER$1,000
2. [2] $1,000 TO $2,999
3. [3] $3,000 TO $3,999
4. [4] $4,000 TO $4,999
5. [5] $5,000 TOlibra $5,999
6. [6] $6,000 TO $6,999
7. [7] $7,000 TO $7,999
8. [8] $8,000 TO $9,999
9. [9] $10,000 TO $14,999
10. [10] $15,000 TO $19,999
[ 2 others ]

53	(	1.4%	)
37	(	1.0%	)
19	(	0.5%	)
14	(	0.4%	)
20	(	0.5%	)
15	(	0.4%	)
18	(	0.5%	)
46	(	1.3%	)
215	(	5.9%	)
126	(	3.4%	)
3102	(	84.6%	)

484 (11.7%)

race [haven_labelled, vctrs_vctr, double]

1. [1] WHITE
2. [2] BLACK
3. [3] OTHER

2651	(	64.9%	)
775	(	19.0%	)
659	(	16.1%	)

64 (1.5%)

sex [haven_labelled, vctrs_vctr, double]

1. [1] MALE
2. [2] FEMALE

1910	(	46.3%	)
2216	(	53.7%	)

23 (0.6%)

working_status [haven_labelled, vctrs_vctr, double]

1. [3] WORKING FULL TIME
2. [2] WORKING PART TIME
3. [1] OTHER

1904	(	46.0%	)
423	(	10.2%	)
1813	(	43.8%	)

9 (0.2%)

weight [numeric]

Mean (sd) : 1 (1.3)
min ≤ med ≤ max:
0.1 ≤ 0.6 ≤ 14.3
IQR (CV) : 0.7 (1.3)

2269 distinct values

605 (14.6%)

Generated by summarytools 1.1.4 (R version 4.5.1)
2025-11-21

Separate two scales

data_physical_health<-final_data%>% dplyr::select(c(work_impacts_the_family, family_impacts_the_work, physical_health_status, marriage_status, total_people_in_household,  age_group,education, family_income, race, sex, working_status, weight)) %>% mutate(marriage_status= as.factor(marriage_status))%>% mutate(race= as.factor(race))%>% mutate(working_status= as.factor(working_status))


head(data_physical_health)

# A tibble: 6 × 12
  work_impacts_the_family family_impacts_the_work physical_health_status
  <dbl+lbl>               <dbl+lbl>                                <dbl>
1  3 [Sometimes]           2 [Rarely]                                 30
2 NA                      NA                                          NA
3  2 [Rarely]              2 [Rarely]                                  0
4  4 [Often]               1 [Never]                                   4
5 NA                      NA                                          NA
6  4 [Often]               3 [Sometimes]                               0
# ℹ 9 more variables: marriage_status <fct>,
#   total_people_in_household <dbl+lbl>, age_group <dbl+lbl>,
#   education <dbl+lbl>, family_income <dbl+lbl>, race <fct>, sex <dbl+lbl>,
#   working_status <fct>, weight <dbl>

data_mental_health <- final_data%>% dplyr::select(c(work_impacts_the_family, family_impacts_the_work, mental_health_status,marriage_status, total_people_in_household,  age_group,education, family_income, race, sex, working_status, weight))%>% mutate(marriage_status= as.factor(marriage_status)) %>% mutate(race= as.factor(race)) %>% mutate(working_status= as.factor(working_status))

head(data_mental_health)

# A tibble: 6 × 12
  work_impacts_the_family family_impacts_the_work mental_health_status
  <dbl+lbl>               <dbl+lbl>                              <dbl>
1  3 [Sometimes]           2 [Rarely]                               15
2 NA                      NA                                        NA
3  2 [Rarely]              2 [Rarely]                                0
4  4 [Often]               1 [Never]                                10
5 NA                      NA                                        NA
6  4 [Often]               3 [Sometimes]                            NA
# ℹ 9 more variables: marriage_status <fct>,
#   total_people_in_household <dbl+lbl>, age_group <dbl+lbl>,
#   education <dbl+lbl>, family_income <dbl+lbl>, race <fct>, sex <dbl+lbl>,
#   working_status <fct>, weight <dbl>

Count Dependent Variable Tutorials

Negative-Binomial Model

nb_physical_health<- glm.nb(physical_health_status~factor(work_impacts_the_family)+ factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+ factor(age_group)+education+family_income+race+sex+working_status, data=data_physical_health, weights = data_physical_health$weight) 

summary(nb_physical_health)


Call:
glm.nb(formula = physical_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    factor(age_group) + education + family_income + race + sex + 
    working_status, data = data_physical_health, weights = data_physical_health$weight, 
    init.theta = 0.2094728664, link = log)

Coefficients:
                                  Estimate Std. Error z value Pr(>|z|)    
(Intercept)                       1.511360   0.600850   2.515 0.011891 *  
factor(work_impacts_the_family)2 -0.343587   0.185930  -1.848 0.064612 .  
factor(work_impacts_the_family)3 -0.093034   0.190429  -0.489 0.625162    
factor(work_impacts_the_family)4  0.057812   0.236210   0.245 0.806651    
factor(family_impacts_the_work)2  0.012271   0.165973   0.074 0.941062    
factor(family_impacts_the_work)3  0.173197   0.181481   0.954 0.339907    
factor(family_impacts_the_work)4  0.748579   0.296470   2.525 0.011571 *  
marriage_status2                 -0.432965   0.466801  -0.928 0.353660    
marriage_status3                 -0.203963   0.211080  -0.966 0.333901    
marriage_status4                  0.590118   0.401073   1.471 0.141197    
marriage_status5                 -0.563402   0.151358  -3.722 0.000197 ***
total_people_in_household         0.039358   0.037544   1.048 0.294493    
factor(age_group)2                0.390258   0.163958   2.380 0.017302 *  
factor(age_group)3                0.526365   0.188567   2.791 0.005248 ** 
factor(age_group)4                0.050911   0.296104   0.172 0.863487    
education                        -0.072313   0.048874  -1.480 0.138987    
family_income                    -0.002453   0.041675  -0.059 0.953055    
race2                            -0.929795   0.182298  -5.100 3.39e-07 ***
race3                            -0.428670   0.184055  -2.329 0.019857 *  
sex                               0.528623   0.116438   4.540 5.63e-06 ***
working_status2                  -0.804438   0.288735  -2.786 0.005335 ** 
working_status3                  -1.247546   0.262586  -4.751 2.02e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(0.2095) family taken to be 1)

    Null deviance: 1445.1  on 1663  degrees of freedom
Residual deviance: 1323.4  on 1642  degrees of freedom
  (2485 observations deleted due to missingness)
AIC: 5946.7

Number of Fisher Scoring iterations: 1

              Theta:  0.2095 
          Std. Err.:  0.0110 

 2 x log-likelihood:  -5900.7380

nb_mental_health<- glm.nb(mental_health_status~factor(work_impacts_the_family)+factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+ factor(age_group)+education+family_income+race+sex+working_status, data=data_mental_health, weights = data_mental_health$weight) 

summary(nb_mental_health)


Call:
glm.nb(formula = mental_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    factor(age_group) + education + family_income + race + sex + 
    working_status, data = data_mental_health, weights = data_mental_health$weight, 
    init.theta = 0.3019571903, link = log)

Coefficients:
                                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)                       2.33523    0.48969   4.769 1.85e-06 ***
factor(work_impacts_the_family)2  0.10590    0.15668   0.676  0.49910    
factor(work_impacts_the_family)3  0.31542    0.16066   1.963  0.04961 *  
factor(work_impacts_the_family)4  1.06976    0.20018   5.344 9.10e-08 ***
factor(family_impacts_the_work)2 -0.05445    0.13874  -0.392  0.69470    
factor(family_impacts_the_work)3  0.23840    0.15183   1.570  0.11636    
factor(family_impacts_the_work)4  0.65586    0.25509   2.571  0.01014 *  
marriage_status2                 -0.75306    0.39191  -1.922  0.05466 .  
marriage_status3                 -0.00363    0.17564  -0.021  0.98351    
marriage_status4                  0.09807    0.34077   0.288  0.77350    
marriage_status5                 -0.56577    0.12530  -4.515 6.32e-06 ***
total_people_in_household         0.05490    0.03100   1.771  0.07656 .  
factor(age_group)2               -0.28534    0.13444  -2.122  0.03380 *  
factor(age_group)3               -0.73709    0.15626  -4.717 2.39e-06 ***
factor(age_group)4               -1.55231    0.26130  -5.941 2.84e-09 ***
education                        -0.11652    0.04079  -2.857  0.00428 ** 
family_income                    -0.09528    0.03316  -2.873  0.00406 ** 
race2                            -0.84015    0.14877  -5.647 1.63e-08 ***
race3                            -0.62518    0.15405  -4.058 4.94e-05 ***
sex                               0.64876    0.09718   6.676 2.46e-11 ***
working_status2                  -0.08706    0.24613  -0.354  0.72357    
working_status3                  -0.11122    0.22351  -0.498  0.61875    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(0.302) family taken to be 1)

    Null deviance: 1805.7  on 1655  degrees of freedom
Residual deviance: 1572.1  on 1634  degrees of freedom
  (2493 observations deleted due to missingness)
AIC: 7564.9

Number of Fisher Scoring iterations: 1

              Theta:  0.3020 
          Std. Err.:  0.0144 

 2 x log-likelihood:  -7518.9340

Basic Poisson Model

pois_physical_health <- glm(physical_health_status~factor(work_impacts_the_family)+factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=data_physical_health, family = poisson, weights = data_physical_health$weight)
summary(pois_physical_health)


Call:
glm(formula = physical_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = poisson, data = data_physical_health, weights = data_physical_health$weight)

Coefficients:
                                  Estimate Std. Error z value Pr(>|z|)    
(Intercept)                       1.609684   0.151005  10.660  < 2e-16 ***
factor(work_impacts_the_family)2 -0.198042   0.050110  -3.952 7.75e-05 ***
factor(work_impacts_the_family)3 -0.126264   0.050317  -2.509 0.012095 *  
factor(work_impacts_the_family)4  0.094246   0.059904   1.573 0.115654    
factor(family_impacts_the_work)2  0.006691   0.046561   0.144 0.885736    
factor(family_impacts_the_work)3  0.308296   0.048031   6.419 1.37e-10 ***
factor(family_impacts_the_work)4  0.414250   0.073017   5.673 1.40e-08 ***
marriage_status2                 -0.496496   0.147905  -3.357 0.000788 ***
marriage_status3                 -0.014599   0.051450  -0.284 0.776602    
marriage_status4                  0.647988   0.078114   8.295  < 2e-16 ***
marriage_status5                 -0.330846   0.040291  -8.211  < 2e-16 ***
total_people_in_household         0.056837   0.009906   5.737 9.61e-09 ***
age_group                         0.087960   0.020990   4.191 2.78e-05 ***
education                        -0.095169   0.013513  -7.043 1.88e-12 ***
family_income                    -0.007990   0.010750  -0.743 0.457325    
race2                            -0.796804   0.061292 -13.000  < 2e-16 ***
race3                            -0.386999   0.055522  -6.970 3.16e-12 ***
sex                               0.376324   0.031882  11.804  < 2e-16 ***
working_status2                  -0.728866   0.054884 -13.280  < 2e-16 ***
working_status3                  -1.066988   0.048152 -22.159  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 13049  on 1663  degrees of freedom
Residual deviance: 11598  on 1644  degrees of freedom
  (2485 observations deleted due to missingness)
AIC: 13825

Number of Fisher Scoring iterations: 6

pois_mental_health <- glm(mental_health_status~factor(work_impacts_the_family)+factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, data=data_mental_health, family = poisson, weights = data_mental_health$weight)
summary(pois_mental_health)


Call:
glm(formula = mental_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = poisson, data = data_mental_health, weights = data_mental_health$weight)

Coefficients:
                                  Estimate Std. Error z value Pr(>|z|)    
(Intercept)                       2.433581   0.104620  23.261  < 2e-16 ***
factor(work_impacts_the_family)2  0.144862   0.041876   3.459 0.000542 ***
factor(work_impacts_the_family)3  0.293700   0.041728   7.038 1.94e-12 ***
factor(work_impacts_the_family)4  0.903941   0.046361  19.498  < 2e-16 ***
factor(family_impacts_the_work)2  0.049796   0.035719   1.394 0.163282    
factor(family_impacts_the_work)3  0.273578   0.037302   7.334 2.23e-13 ***
factor(family_impacts_the_work)4  0.232450   0.056760   4.095 4.22e-05 ***
marriage_status2                 -0.553176   0.111211  -4.974 6.55e-07 ***
marriage_status3                  0.112812   0.041455   2.721 0.006502 ** 
marriage_status4                  0.540910   0.071381   7.578 3.52e-14 ***
marriage_status5                 -0.254795   0.029143  -8.743  < 2e-16 ***
total_people_in_household         0.017912   0.007326   2.445 0.014493 *  
age_group                        -0.376252   0.017589 -21.391  < 2e-16 ***
education                        -0.116615   0.010390 -11.223  < 2e-16 ***
family_income                    -0.074216   0.006181 -12.007  < 2e-16 ***
race2                            -0.538645   0.039629 -13.592  < 2e-16 ***
race3                            -0.418799   0.042440  -9.868  < 2e-16 ***
sex                               0.574155   0.024507  23.428  < 2e-16 ***
working_status2                  -0.040346   0.053720  -0.751 0.452621    
working_status3                  -0.108636   0.048405  -2.244 0.024813 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 16817  on 1655  degrees of freedom
Residual deviance: 13810  on 1636  degrees of freedom
  (2493 observations deleted due to missingness)
AIC: 16899

Number of Fisher Scoring iterations: 6

Quasi-Poisson Model

quasi_physical_health <-  glm(physical_health_status~factor(work_impacts_the_family)+factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status,family = quasipoisson, data=data_physical_health, weights = data_physical_health$weight)
summary(quasi_physical_health)


Call:
glm(formula = physical_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = quasipoisson, data = data_physical_health, weights = data_physical_health$weight)

Coefficients:
                                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)                       1.609684   0.517519   3.110 0.001901 ** 
factor(work_impacts_the_family)2 -0.198042   0.171735  -1.153 0.249003    
factor(work_impacts_the_family)3 -0.126264   0.172444  -0.732 0.464151    
factor(work_impacts_the_family)4  0.094246   0.205300   0.459 0.646250    
factor(family_impacts_the_work)2  0.006691   0.159572   0.042 0.966560    
factor(family_impacts_the_work)3  0.308296   0.164609   1.873 0.061260 .  
factor(family_impacts_the_work)4  0.414250   0.250243   1.655 0.098036 .  
marriage_status2                 -0.496496   0.506896  -0.979 0.327486    
marriage_status3                 -0.014599   0.176328  -0.083 0.934025    
marriage_status4                  0.647988   0.267709   2.420 0.015607 *  
marriage_status5                 -0.330846   0.138085  -2.396 0.016688 *  
total_people_in_household         0.056837   0.033950   1.674 0.094299 .  
age_group                         0.087960   0.071935   1.223 0.221596    
education                        -0.095169   0.046310  -2.055 0.040034 *  
family_income                    -0.007990   0.036844  -0.217 0.828334    
race2                            -0.796804   0.210058  -3.793 0.000154 ***
race3                            -0.386999   0.190282  -2.034 0.042130 *  
sex                               0.376324   0.109264   3.444 0.000587 ***
working_status2                  -0.728866   0.188098  -3.875 0.000111 ***
working_status3                  -1.066988   0.165026  -6.466 1.33e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasipoisson family taken to be 11.74545)

    Null deviance: 13049  on 1663  degrees of freedom
Residual deviance: 11598  on 1644  degrees of freedom
  (2485 observations deleted due to missingness)
AIC: NA

Number of Fisher Scoring iterations: 6

quasi_mental_health <- glm(mental_health_status~factor(work_impacts_the_family)+factor(family_impacts_the_work)+ marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, family = quasipoisson, data=data_mental_health, weights = data_mental_health$weight)
summary(quasi_mental_health)


Call:
glm(formula = mental_health_status ~ factor(work_impacts_the_family) + 
    factor(family_impacts_the_work) + marriage_status + total_people_in_household + 
    age_group + education + family_income + race + sex + working_status, 
    family = quasipoisson, data = data_mental_health, weights = data_mental_health$weight)

Coefficients:
                                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)                       2.43358    0.35176   6.918 6.54e-12 ***
factor(work_impacts_the_family)2  0.14486    0.14080   1.029 0.303694    
factor(work_impacts_the_family)3  0.29370    0.14030   2.093 0.036470 *  
factor(work_impacts_the_family)4  0.90394    0.15588   5.799 7.99e-09 ***
factor(family_impacts_the_work)2  0.04980    0.12010   0.415 0.678461    
factor(family_impacts_the_work)3  0.27358    0.12542   2.181 0.029302 *  
factor(family_impacts_the_work)4  0.23245    0.19084   1.218 0.223386    
marriage_status2                 -0.55318    0.37392  -1.479 0.139226    
marriage_status3                  0.11281    0.13938   0.809 0.418414    
marriage_status4                  0.54091    0.24000   2.254 0.024342 *  
marriage_status5                 -0.25479    0.09798  -2.600 0.009397 ** 
total_people_in_household         0.01791    0.02463   0.727 0.467249    
age_group                        -0.37625    0.05914  -6.362 2.58e-10 ***
education                        -0.11662    0.03494  -3.338 0.000863 ***
family_income                    -0.07422    0.02078  -3.571 0.000366 ***
race2                            -0.53865    0.13324  -4.043 5.53e-05 ***
race3                            -0.41880    0.14269  -2.935 0.003382 ** 
sex                               0.57415    0.08240   6.968 4.64e-12 ***
working_status2                  -0.04035    0.18062  -0.223 0.823269    
working_status3                  -0.10864    0.16275  -0.667 0.504548    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasipoisson family taken to be 11.30473)

    Null deviance: 16817  on 1655  degrees of freedom
Residual deviance: 13810  on 1636  degrees of freedom
  (2493 observations deleted due to missingness)
AIC: NA

Number of Fisher Scoring iterations: 6

Likelihood Ratio, AIC, BIC

#Create function to compare model fit stats
glm_fit <- function(model) {
  # Calculate Likelihood Ratio
  lr <- logLik(model)
  
  # Calculate AIC
  aic <- AIC(model)
  
  # Calculate BIC
  n <- nobs(model)
  p <- length(coef(model))
  bic <- -2 * logLik(model) + p * log(n)
  
  # Calculate Deviance
  deviance <- summary(model)$deviance
  
  # Return the metrics as a list
  metrics <- data.frame(Likelihood_Ratio = lr, AIC = aic, BIC = bic, Deviance = deviance, Coefficients = p)
  return(metrics)
}

glm_fit(pois_physical_health)

  Likelihood_Ratio      AIC      BIC Deviance Coefficients
1         -6892.74 13825.48 13933.82 11598.05           20

glm_fit(pois_mental_health)

  Likelihood_Ratio      AIC     BIC Deviance Coefficients
1        -8429.281 16898.56 17006.8 13810.46           20

glm_fit(quasi_physical_health)

  Likelihood_Ratio AIC BIC Deviance Coefficients
1               NA  NA  NA 11598.05           20

glm_fit(quasi_mental_health)

  Likelihood_Ratio AIC BIC Deviance Coefficients
1               NA  NA  NA 13810.46           20

glm_fit(nb_physical_health)

  Likelihood_Ratio      AIC      BIC Deviance Coefficients
1        -2950.369 5946.738 6063.912 1323.441           22

glm_fit(nb_mental_health)

  Likelihood_Ratio      AIC      BIC Deviance Coefficients
1        -3759.467 7564.934 7682.002 1572.057           22

Overall Table

stargazer(nb_physical_health, nb_mental_health, type="text",align=TRUE,  dep.var.labels=c("Physical Health Status","Mental Health Status"), covariate.labels=c("work impacts the family: Rarely","work impacts the family: Sometimes", "work impacts the family: Often", "Family impacts the work:Rarely","Family impacts the work:Sometimes","Family impacts the work:Often", "Marriage status:Separated","Marriage status:Divorced", "Marriage status:Widowed","Marriage status:Married","Total people in household", "Age group：30-49 YEARS OLD","Age group：50-64 YEARS OLD", "Age group：64 YEARS OLD OR OVER", "Education", "Family income", "Race:BLACK", "Race:OTHER","Sex", "Working Status:WORKING PART TIME", "Working Status:WORKING FULL TIME" ))


==============================================================================
                                               Dependent variable:            
                                   -------------------------------------------
                                   Physical Health Status Mental Health Status
                                            (1)                   (2)         
------------------------------------------------------------------------------
work impacts the family: Rarely           -0.344*                0.106        
                                          (0.186)               (0.157)       
                                                                              
work impacts the family: Sometimes         -0.093               0.315**       
                                          (0.190)               (0.161)       
                                                                              
work impacts the family: Often             0.058                1.070***      
                                          (0.236)               (0.200)       
                                                                              
Family impacts the work:Rarely             0.012                 -0.054       
                                          (0.166)               (0.139)       
                                                                              
Family impacts the work:Sometimes          0.173                 0.238        
                                          (0.181)               (0.152)       
                                                                              
Family impacts the work:Often             0.749**               0.656**       
                                          (0.296)               (0.255)       
                                                                              
Marriage status:Separated                  -0.433               -0.753*       
                                          (0.467)               (0.392)       
                                                                              
Marriage status:Divorced                   -0.204                -0.004       
                                          (0.211)               (0.176)       
                                                                              
Marriage status:Widowed                    0.590                 0.098        
                                          (0.401)               (0.341)       
                                                                              
Marriage status:Married                  -0.563***             -0.566***      
                                          (0.151)               (0.125)       
                                                                              
Total people in household                  0.039                 0.055*       
                                          (0.038)               (0.031)       
                                                                              
Age group：30-49 YEARS OLD                 0.390**               -0.285**      
                                          (0.164)               (0.134)       
                                                                              
Age group：50-64 YEARS OLD                 0.526***             -0.737***      
                                          (0.189)               (0.156)       
                                                                              
Age group：64 YEARS OLD OR OVER             0.051               -1.552***      
                                          (0.296)               (0.261)       
                                                                              
Education                                  -0.072              -0.117***      
                                          (0.049)               (0.041)       
                                                                              
Family income                              -0.002              -0.095***      
                                          (0.042)               (0.033)       
                                                                              
Race:BLACK                               -0.930***             -0.840***      
                                          (0.182)               (0.149)       
                                                                              
Race:OTHER                                -0.429**             -0.625***      
                                          (0.184)               (0.154)       
                                                                              
Sex                                       0.529***              0.649***      
                                          (0.116)               (0.097)       
                                                                              
Working Status:WORKING PART TIME         -0.804***               -0.087       
                                          (0.289)               (0.246)       
                                                                              
Working Status:WORKING FULL TIME         -1.248***               -0.111       
                                          (0.263)               (0.224)       
                                                                              
Constant                                  1.511**               2.335***      
                                          (0.601)               (0.490)       
                                                                              
------------------------------------------------------------------------------
Observations                               1,664                 1,656        
Log Likelihood                           -2,951.369            -3,760.467     
theta                                 0.209*** (0.011)      0.302*** (0.014)  
Akaike Inf. Crit.                        5,946.738             7,564.934      
==============================================================================
Note:                                              *p<0.1; **p<0.05; ***p<0.01

Prediction

physical_pred_j<-ggpredict(nb_physical_health, terms="work_impacts_the_family")
print(physical_pred_j)

# Predicted counts of physical_health_status

work_impacts_the_family | Predicted |      95% CI
-------------------------------------------------
                      1 |      8.46 | 4.59, 15.59
                      2 |      6.00 | 3.23, 11.14
                      3 |      7.71 | 4.13, 14.40
                      4 |      8.96 | 4.56, 17.63

Adjusted for:
*   family_impacts_the_work =     1
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.65
*                      race =     1
*                       sex =  1.48
*            working_status =     1

physical_pred_f<-ggpredict(nb_physical_health, terms="family_impacts_the_work")
print(physical_pred_f)

# Predicted counts of physical_health_status

family_impacts_the_work | Predicted |      95% CI
-------------------------------------------------
                      1 |      8.46 | 4.59, 15.59
                      2 |      8.57 | 4.41, 16.62
                      3 |     10.06 | 5.09, 19.87
                      4 |     17.89 | 7.71, 41.49

Adjusted for:
*   work_impacts_the_family =     1
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.65
*                      race =     1
*                       sex =  1.48
*            working_status =     1

mental_pred_j<-ggpredict(nb_mental_health, terms="work_impacts_the_family")
print(mental_pred_j)

# Predicted counts of mental_health_status

work_impacts_the_family | Predicted |       95% CI
--------------------------------------------------
                      1 |      7.08 |  4.22, 11.87
                      2 |      7.87 |  4.67, 13.27
                      3 |      9.70 |  5.73, 16.44
                      4 |     20.63 | 11.61, 36.65

Adjusted for:
*   family_impacts_the_work =     1
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.66
*                      race =     1
*                       sex =  1.48
*            working_status =     1

mental_pred_f<-ggpredict(nb_mental_health, terms="family_impacts_the_work")
print(mental_pred_f)

# Predicted counts of mental_health_status

family_impacts_the_work | Predicted |      95% CI
-------------------------------------------------
                      1 |      7.08 | 4.22, 11.87
                      2 |      6.70 | 3.83, 11.73
                      3 |      8.98 | 5.06, 15.95
                      4 |     13.64 | 6.65, 27.98

Adjusted for:
*   work_impacts_the_family =     1
*           marriage_status =     1
* total_people_in_household =  2.03
*                 age_group =     1
*                 education =  2.89
*             family_income = 11.66
*                      race =     1
*                       sex =  1.48
*            working_status =     1

custom_order <- c("Never", "Rarely"," Sometimes", "Often")

ggplot(physical_pred_j, aes(x = x, y = predicted, fill = as_factor(x))) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Work Impacts Family", y = "Predicted Count: Days of Poor Physical Health", 
       title = "Predicted Days Of Poor Physical Health By Work Impacts Family", 
       subtitle = "Results From Negative-Binomial Model",
       fill = "Work Impacts Family")+
  scale_x_discrete(limits = custom_order)+
  theme(legend.position = "bottom" )

ggplot(physical_pred_f, aes(x = x, y = predicted, fill = as_factor(x))) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Family Impacts Work", y = "Predicted Count: Days of Poor Physical Health", 
       title = "Predicted Days Of Poor Physical Health By Family Impacts Work", 
       subtitle = "Results From Negative-Binomial Model",
       fill = "Family Impacts work")+
  scale_x_discrete(limits = custom_order)+
  theme(legend.position = "bottom" )

ggplot(mental_pred_j, aes(x = x, y = predicted, fill = as_factor(x))) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Work Impacts Family", y = "Predicted Count: Days Of Poor Mental Health", 
       title = "Predicted Days Of Poor Mental Health By Work Impacts Family", 
       subtitle = "Results From Negative-Binomial Model",
       fill = "Work Impacts Family")+
  scale_x_discrete(limits = custom_order)+
  theme(legend.position = "bottom" )

ggplot(mental_pred_f, aes(x = x, y = predicted, fill = as_factor(x))) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.4, position = position_dodge(width = 0.7)) +
  theme_minimal(base_size = 13) +
  labs(x = "Family Impacts Work", y = "Predicted Count: Days Of Poor Mental Health", 
       title = "Predicted Days Of Poor Mental Health By Family Impacts Work", 
       subtitle = "Results From Negative-Binomial Model",
       fill= "Family Impacts Work")+
  scale_x_discrete(limits = custom_order)+
  theme(legend.position = "bottom" )

Hypothetical Outcome Plot (HOP)

Reference：

https://stats.stackexchange.com/questions/225544/simulating-an-overdispersed-negative-binomial-distribution#:~:text=Since%20you%20have%20the%20mean,/size’%20in%20this%20parametrization.&text=Note%20especially%20the%20right%20tail,strong%20position%20to%20argue%20from.

https://library.virginia.edu/data/articles/getting-started-with-negative-binomial-regression-modeling#:~:text=One%20approach%20that%20addresses%20this,k%2C%20called%20the%20dispersion%20parameter.

Var=μ+μ2/k

k=μ2/(Var−μ)

k≈μ2/sd2 [avoid negative number]

data_physical_health <- data_physical_health %>% 
  mutate(work_impacts_the_family = as.numeric(work_impacts_the_family),
         family_impacts_the_work = as.numeric(family_impacts_the_work))

data_physical_health <- data_physical_health %>%
  mutate(work_impacts_the_family = factor(work_impacts_the_family,
                                           levels = c(1,2,3,4),
                                           labels = c("Never", "Rarely", "Sometimes", "Often"))) %>%
  mutate(family_impacts_the_work = factor(family_impacts_the_work,
                                           levels = c(1,2,3,4),
                                           labels = c("Never", "Rarely", "Sometimes", "Often"))) 

# Summarize the data (assuming CO2_sum is already created)
data_physical_health_sum <- data_physical_health |> group_by(work_impacts_the_family) |>
   dplyr::summarise(mean = mean (physical_health_status, na.rm = TRUE),
            sd = sd(physical_health_status, na.rm = TRUE))
# Simulate hypothetical outcomes
set.seed(01002)  # For reproducibility
n_simulations <- 100 # Number of hypothetical outcomes

# Create a data frame with simulated outcomes for each Type
simulated_data <- data_physical_health_sum |>
  rowwise() |>
  do(data.frame(work_impacts_the_family = .$work_impacts_the_family,
                outcome = rnbinom(n_simulations, size = (.$mean^2 / .$sd^2), mu = .$mean),
                sim_id = 1:n_simulations))

# Create an animated plot using ggplot and gganimmated
g <- ggplot (simulated_data, aes(x = work_impacts_the_family, y = outcome, fill = work_impacts_the_family))+
  geom_col(data = data_physical_health_sum, aes(y = mean), alpha = 0.4, color = "black") +
  geom_point(aes(color = work_impacts_the_family), size = 2, position = position_jitter(width = 0.1)) +
  labs (title = "Hypothetical Outcome Plot: Days of Poor Physical Health",
        subtitle = "Simulation {frame} of {nframes}",
        x = "Work Impacts Family",
        y = "Simulated Days of Poor Physical Health") + 
  theme_minimal()+
  theme(legend.position = "none") +
  transition_manual(sim_id)

animate(g, nframes = n_simulations, fps =1)

# Summarize the data (assuming CO2_sum is already created)
data_physical_health_sum_1 <- data_physical_health |> group_by(family_impacts_the_work) |>
   dplyr::summarise(mean = mean (physical_health_status, na.rm = TRUE),
            sd = sd(physical_health_status, na.rm = TRUE))
# Simulate hypothetical outcomes
set.seed(01002)  # For reproducibility
n_simulations <- 100 # Number of hypothetical outcomes

# Create a data frame with simulated outcomes for each Type
simulated_data_1 <- data_physical_health_sum_1 |>
  rowwise() |>
  do(data.frame(family_impacts_the_work = .$family_impacts_the_work,
                outcome = rnbinom(n_simulations, size = (.$mean^2 / .$sd^2), mu = .$mean),
                sim_id = 1:n_simulations))

# Create an animated plot using ggplot and gganimmated
g <- ggplot (simulated_data_1, aes(x = family_impacts_the_work, y = outcome, fill = family_impacts_the_work))+
  geom_col(data = data_physical_health_sum_1, aes(y = mean), alpha = 0.4, color = "black") +
  geom_point(aes(color = family_impacts_the_work), size = 2, position = position_jitter(width = 0.1)) +
  labs (title = "Hypothetical Outcome Plot: Days of Poor Physical Health",
        subtitle = "Simulation {frame} of {nframes}",
        x = "Family Impacts Work",
        y = "Simulated Days of Poor Physical Health") + 
  theme_minimal()+
  theme(legend.position = "none") +
  transition_manual(sim_id)

animate(g, nframes = n_simulations, fps =1)

data_mental_health <- data_mental_health %>% 
  mutate(work_impacts_the_family = as.numeric(work_impacts_the_family),
         family_impacts_the_work = as.numeric(family_impacts_the_work))

data_mental_health <- data_mental_health %>%
  mutate(work_impacts_the_family = factor(work_impacts_the_family,
                                           levels = c(1,2,3,4),
                                           labels = c("Never", "Rarely", "Sometimes", "Often"))) %>%
  mutate(family_impacts_the_work = factor(family_impacts_the_work,
                                           levels = c(1,2,3,4),
                                           labels = c("Never", "Rarely", "Sometimes", "Often"))) 

# Summarize the data (assuming CO2_sum is already created)
data_mental_health_sum <- data_mental_health |> group_by(work_impacts_the_family) |>
   dplyr::summarise(mean = mean (mental_health_status, na.rm = TRUE),
            sd = sd(mental_health_status, na.rm = TRUE))
# Simulate hypothetical outcomes
set.seed(01002)  # For reproducibility
n_simulations <- 100 # Number of hypothetical outcomes

# Create a data frame with simulated outcomes for each Type
simulated_data_3 <- data_mental_health_sum |>
  rowwise() |>
  do(data.frame(work_impacts_the_family = .$work_impacts_the_family,
                outcome = rnbinom(n_simulations, size = (.$mean^2 / .$sd^2), mu = .$mean),
                sim_id = 1:n_simulations))

# Create an animated plot using ggplot and gganimmated
g <- ggplot (simulated_data_3, aes(x = work_impacts_the_family, y = outcome, fill = work_impacts_the_family))+
  geom_col(data = data_mental_health_sum, aes(y = mean), alpha = 0.4, color = "black") +
  geom_point(aes(color = work_impacts_the_family), size = 2, position = position_jitter(width = 0.1)) +
  labs (title = "Hypothetical Outcome Plot: Days of Poor Mental Health",
        subtitle = "Simulation {frame} of {nframes}",
        x = "Work Impacts Family",
        y = "Simulated Days of Poor Mental Health") + 
  theme_minimal()+
  theme(legend.position = "none") +
  transition_manual(sim_id)

animate(g, nframes = n_simulations, fps =1)

# Summarize the data (assuming CO2_sum is already created)
data_mental_health_sum_1 <- data_mental_health |> group_by(family_impacts_the_work) |>
   dplyr::summarise(mean = mean (mental_health_status, na.rm = TRUE),
            sd = sd(mental_health_status, na.rm = TRUE))
# Simulate hypothetical outcomes
set.seed(01002)  # For reproducibility
n_simulations <- 100 # Number of hypothetical outcomes

# Create a data frame with simulated outcomes for each Type
simulated_data_4 <- data_mental_health_sum_1 |>
  rowwise() |>
  do(data.frame(family_impacts_the_work = .$family_impacts_the_work,
                outcome = rnbinom(n_simulations, size = (.$mean^2 / .$sd^2), mu = .$mean),
                sim_id = 1:n_simulations))

# Create an animated plot using ggplot and gganimmated
g <- ggplot (simulated_data_4, aes(x = family_impacts_the_work, y = outcome, fill = family_impacts_the_work))+
  geom_col(data = data_mental_health_sum_1, aes(y = mean), alpha = 0.4, color = "black") +
  geom_point(aes(color = family_impacts_the_work), size = 2, position = position_jitter(width = 0.1)) +
  labs (title = "Hypothetical Outcome Plot: Days of Poor Mental Health",
        subtitle = "Simulation {frame} of {nframes}",
        x = "Family Impacts Work",
        y = "Simulated Days of Poor Mental Health") + 
  theme_minimal()+
  theme(legend.position = "none") +
  transition_manual(sim_id)

animate(g, nframes = n_simulations, fps =1)