── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(semPlot)library(lavaan)
This is lavaan 0.6-19
lavaan is FREE software! Please report any bugs.
library(psych)
Attaching package: 'psych'
The following object is masked from 'package:lavaan':
cor2cov
The following objects are masked from 'package:ggplot2':
%+%, alpha
library(skimr) library(corrplot)
corrplot 0.95 loaded
library(patchwork) #Merge GGPlots together library(ggplot2) #Graphinglibrary(jtools) #Tabular Regression Resultslibrary(descr) #Easy Frequency Tables library(stats) #Imports survey data library(ggeffects) #Predicted Probabilities from Regressionslibrary(nnet) #For multinomial modelslibrary(MASS) #For ordered models
Attaching package: 'MASS'
The following object is masked from 'package:patchwork':
area
The following object is masked from 'package:dplyr':
select
Attaching package: 'boot'
The following object is masked from 'package:psych':
logit
library(cem) #Coarsened Exact Matching
Loading required package: tcltk
Loading required package: lattice
Attaching package: 'lattice'
The following object is masked from 'package:boot':
melanoma
How to use CEM? Type vignette("cem")
Please cite as:
Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
Attaching package: 'Hmisc'
The following object is masked from 'package:jtools':
%nin%
The following object is masked from 'package:psych':
describe
The following objects are masked from 'package:dplyr':
src, summarize
The following objects are masked from 'package:base':
format.pval, units
library(ebal) #Entropy Balancing
##
## ebal Package: Implements Entropy Balancing.
## See http://www.stanford.edu/~jhain/ for additional information.
library(survey) #Applying EB weights
Loading required package: grid
Loading required package: Matrix
Attaching package: 'Matrix'
The following objects are masked from 'package:tidyr':
expand, pack, unpack
Loading required package: survival
Attaching package: 'survival'
The following object is masked from 'package:boot':
aml
Attaching package: 'survey'
The following object is masked from 'package:Hmisc':
deff
The following object is masked from 'package:WeightIt':
calibrate
The following object is masked from 'package:graphics':
dotchart
Call:
glm(formula = mental_health_status ~ job_interrupt_the_family +
family_interrupt_the_job + marriage_status + total_people_in_household +
age_group + education + family_income + race + sex + working_status,
family = quasipoisson, data = data_mental_health, weights = data_mental_health$weight)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.86773 0.36572 5.107 3.65e-07 ***
job_interrupt_the_family 0.29156 0.04948 5.892 4.61e-09 ***
family_interrupt_the_job 0.10048 0.05254 1.913 0.055968 .
marriage_status2 -0.52643 0.37660 -1.398 0.162350
marriage_status3 0.08600 0.14084 0.611 0.541569
marriage_status4 0.55237 0.24163 2.286 0.022382 *
marriage_status5 -0.26321 0.09866 -2.668 0.007709 **
total_people_in_household 0.01614 0.02485 0.649 0.516177
age_group -0.36941 0.05908 -6.253 5.13e-10 ***
education -0.12536 0.03503 -3.579 0.000355 ***
family_income -0.07043 0.02108 -3.341 0.000853 ***
race2 -0.52015 0.13415 -3.877 0.000110 ***
race3 -0.42281 0.14408 -2.935 0.003386 **
sex 0.56889 0.08343 6.819 1.28e-11 ***
working_status2 -0.03299 0.18205 -0.181 0.856200
working_status3 -0.09345 0.16372 -0.571 0.568228
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for quasipoisson family taken to be 11.56031)
Null deviance: 16817 on 1655 degrees of freedom
Residual deviance: 13943 on 1640 degrees of freedom
(2493 observations deleted due to missingness)
AIC: NA
Number of Fisher Scoring iterations: 6
Likelihood Ratio, AIC, BIC
#Create function to compare model fit statsglm_fit <-function(model) {# Calculate Likelihood Ratio lr <-logLik(model)# Calculate AIC aic <-AIC(model)# Calculate BIC n <-nobs(model) p <-length(coef(model)) bic <--2*logLik(model) + p *log(n)# Calculate Deviance deviance <-summary(model)$deviance# Return the metrics as a list metrics <-data.frame(Likelihood_Ratio = lr, AIC = aic, BIC = bic, Deviance = deviance, Coefficients = p)return(metrics)}
stargazer(nb_physical_health, nb_mental_health, type="text",align=TRUE, dep.var.labels=c("Physical Health Status","Mental Health Status"), covariate.labels=c("Job interrupt the family", "Family interrupt the job", "Marriage status:Separated","Marriage status:Divorced", "Marriage status:Widowed","Marriage status:Married","Total people in household", "Age group:30-49 YEARS OLD","Age group:50-64 YEARS OLD", "Age group:64 YEARS OLD OR OVER", "Education", "Family income", "Race:BLACK", "Race:OTHER","Sex", "Working Status:WORKING PART TIME", "Working Status:WORKING FULL TIME" ))
============================================================================
Dependent variable:
-------------------------------------------
Physical Health Status Mental Health Status
(1) (2)
----------------------------------------------------------------------------
Job interrupt the family 0.047 0.305***
(0.071) (0.060)
Family interrupt the job 0.150* 0.145**
(0.077) (0.065)
Marriage status:Separated -0.306 -0.678*
(0.466) (0.392)
Marriage status:Divorced -0.114 0.025
(0.212) (0.177)
Marriage status:Widowed 0.419 -0.014
(0.404) (0.344)
Marriage status:Married -0.532*** -0.571***
(0.152) (0.126)
Total people in household 0.035 0.048
(0.038) (0.031)
Age group:30-49 YEARS OLD 0.420** -0.258*
(0.164) (0.135)
Age group:50-64 YEARS OLD 0.597*** -0.688***
(0.188) (0.156)
Age group:64 YEARS OLD OR OVER 0.162 -1.424***
(0.294) (0.258)
Education -0.088* -0.145***
(0.048) (0.041)
Family income -0.006 -0.102***
(0.042) (0.033)
Race:BLACK -0.870*** -0.782***
(0.182) (0.149)
Race:OTHER -0.380** -0.602***
(0.184) (0.155)
Sex 0.481*** 0.616***
(0.117) (0.098)
Working Status:WORKING PART TIME -0.705** -0.160
(0.290) (0.247)
Working Status:WORKING FULL TIME -1.189*** -0.124
(0.263) (0.224)
Constant 1.070* 1.867***
(0.611) (0.500)
----------------------------------------------------------------------------
Observations 1,664 1,656
Log Likelihood -2,957.217 -3,769.208
theta 0.207*** (0.011) 0.297*** (0.014)
Akaike Inf. Crit. 5,950.434 7,574.416
============================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
ggplot(j_health, aes(x = response.level, y = predicted, fill =factor(x))) +geom_bar(stat ="identity", position ="dodge") +# Bar plot# Add error bars for confidence intervalsgeom_errorbar(aes(ymin = conf.low, ymax = conf.high), width =0.4, position =position_dodge(width =0.7))+theme_minimal(base_size =13)+labs(x ="Response Level: Genral Health Status", y ="Predicted Probability", title ="Predicted Probability about General Health Status with Job interfere the family life")+scale_fill_discrete(labels=c("1"="NEVER","2"="RARELY","3"="SOMETIMES","4"="OFTEN"))+scale_x_discrete(labels=c("1"="POOR" , "2"="FAIR", "3"="GOOD" , "4"="EXCELLET"))+guides(fill =guide_legend(title ="Job interfere the family life", nrow=1), color ="none")+theme(legend.position ="bottom")
ggplot(f_health, aes(x = response.level, y = predicted, fill =factor(x))) +geom_bar(stat ="identity", position ="dodge") +# Bar plot# Add error bars for confidence intervalsgeom_errorbar(aes(ymin = conf.low, ymax = conf.high), width =0.4, position =position_dodge(width =0.7))+theme_minimal(base_size =13)+labs(x ="Response Level: Genral Health Status", y ="Predicted Probability", title ="Predicted Probability about General Health Status with Family life interfere the Job")+scale_fill_discrete(labels=c("1"="NEVER","2"="RARELY","3"="SOMETIMES","4"="OFTEN"))+scale_x_discrete(labels=c("1"="POOR" , "2"="FAIR", "3"="GOOD" , "4"="EXCELLET"))+guides(fill =guide_legend(title ="Family life interfere the job", nrow=1), color ="none")+theme(legend.position ="bottom")
##What is the impact of employment training on earnings?#Testing for Imbalance Between Groupscheck_wif_p<-test_data_new_wif_p %>%group_by(binary_wif) %>%summarise_at(vars(physical_health_status,marriage_status,total_people_in_household,age_group,education,family_income,race,sex, working_status), list(mean = mean,var = var))round(t(check_wif_p), 3)
match_exact_wif_p <-matchit(binary_wif~physical_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, method ="exact", data = test_data_new_wif_p)data_exact_wif_p <-match.data(match_exact_wif_p) #Creates new dataframe that only includes the matched casessummary(match_exact_wif_p)
Call:
matchit(formula = binary_wif ~ physical_health_status + marriage_status +
total_people_in_household + age_group + education + family_income +
race + sex + working_status, data = test_data_new_wif_p,
method = "exact")
Summary of Balance for All Data:
Means Treated Means Control Std. Mean Diff.
physical_health_status 2.8958 2.5986 0.0500
marriage_status 3.2940 3.1398 0.0860
total_people_in_household 1.8889 1.8611 0.0195
age_group 2.1123 2.1935 -0.1063
education 3.1701 2.9498 0.1736
family_income 11.7083 11.5565 0.1137
race 1.4931 1.5341 -0.0542
sex 1.4965 1.5036 -0.0141
working_status 2.7824 2.6918 0.1737
Var. Ratio eCDF Mean eCDF Max
physical_health_status 0.9033 0.0202 0.1027
marriage_status 0.9988 0.0308 0.0528
total_people_in_household 0.9320 0.0095 0.0301
age_group 0.7941 0.0288 0.0675
education 1.0185 0.0441 0.0867
family_income 0.7161 0.0127 0.0511
race 0.9616 0.0137 0.0289
sex 1.0003 0.0035 0.0071
working_status 0.8729 0.0313 0.0923
Summary of Balance for Matched Data:
Means Treated Means Control Std. Mean Diff.
physical_health_status 0.3833 0.3833 0
marriage_status 3.4867 3.4867 0
total_people_in_household 1.5967 1.5967 0
age_group 2.1167 2.1167 0
education 3.2200 3.2200 0
family_income 11.9833 11.9833 -0
race 1.3700 1.3700 0
sex 1.4100 1.4100 0
working_status 2.9433 2.9433 0
Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
physical_health_status 0.9988 0 0 0
marriage_status 0.9988 0 0 0
total_people_in_household 0.9988 0 0 0
age_group 0.9988 0 0 0
education 0.9988 0 0 0
family_income 0.9988 0 0 0
race 0.9988 0 0 0
sex 0.9988 0 0 0
working_status 0.9988 0 0 0
Sample Sizes:
Control Treated
All 1116. 864
Matched (ESS) 221.64 300
Matched 338. 300
Unmatched 778. 564
Discarded 0. 0
imbalance_exact_wif_p <-imbalance(group = data_exact_wif_p$binary_wif, data = data_exact_wif_p, drop =c("physical_health_status", "binary_wif", "weights", "subclass")) #With matched data, always add weights and subclass here
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
imbalance_exact_wif_p
Multivariate Imbalance Measure: L1=0.241
Percentage of local common support: LCS=100.0%
Univariate Imbalance Measures:
statistic type L1 min 25% 50% 75% max
marriage_status 2.99165811 (Chi2) 0.052998028 NA NA NA NA NA
total_people_in_household 2.27090332 (Chi2) 0.039112426 NA NA NA NA NA
age_group 3.16844032 (Chi2) 0.032682446 NA NA NA NA NA
education 3.85049816 (Chi2) 0.057080868 NA NA NA NA NA
family_income 0.01433048 (Chi2) 0.000000000 NA NA NA NA NA
race 0.59561223 (Chi2) 0.016824458 NA NA NA NA NA
sex 0.03181756 (Chi2) 0.010118343 NA NA NA NA NA
working_status 1.00454425 (Chi2) 0.003708087 NA NA NA NA NA
###Match Coarsened Exact ###Perform the matching here with code that resembles most regressionsmatch_cem_wif_p <-matchit(binary_wif ~physical_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, method ="cem", data = test_data_new_wif_p)data_cem_wif_p <-match.data(match_cem_wif_p) #Creates new dataframe that only includes the matched casessummary(match_cem_wif_p)
Call:
matchit(formula = binary_wif ~ physical_health_status + marriage_status +
total_people_in_household + age_group + education + family_income +
race + sex + working_status, data = test_data_new_wif_p,
method = "cem")
Summary of Balance for All Data:
Means Treated Means Control Std. Mean Diff.
physical_health_status 2.8958 2.5986 0.0500
marriage_status 3.2940 3.1398 0.0860
total_people_in_household 1.8889 1.8611 0.0195
age_group 2.1123 2.1935 -0.1063
education 3.1701 2.9498 0.1736
family_income 11.7083 11.5565 0.1137
race 1.4931 1.5341 -0.0542
sex 1.4965 1.5036 -0.0141
working_status 2.7824 2.6918 0.1737
Var. Ratio eCDF Mean eCDF Max
physical_health_status 0.9033 0.0202 0.1027
marriage_status 0.9988 0.0308 0.0528
total_people_in_household 0.9320 0.0095 0.0301
age_group 0.7941 0.0288 0.0675
education 1.0185 0.0441 0.0867
family_income 0.7161 0.0127 0.0511
race 0.9616 0.0137 0.0289
sex 1.0003 0.0035 0.0071
working_status 0.8729 0.0313 0.0923
Summary of Balance for Matched Data:
Means Treated Means Control Std. Mean Diff.
physical_health_status 0.6743 0.6372 0.0062
marriage_status 3.4784 3.4784 0.0000
total_people_in_household 1.5573 1.5573 0.0000
age_group 2.1527 2.1527 0.0000
education 3.2952 3.2952 0.0000
family_income 11.9822 11.9822 0.0000
race 1.3740 1.3740 0.0000
sex 1.4173 1.4173 0.0000
working_status 2.9389 2.9389 0.0000
Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
physical_health_status 0.9385 0.0034 0.0319 0.0947
marriage_status 0.9990 0.0000 0.0000 0.0000
total_people_in_household 0.9990 0.0000 0.0000 0.0000
age_group 0.9990 0.0000 0.0000 0.0000
education 0.9990 0.0000 0.0000 0.0000
family_income 0.9990 0.0000 0.0000 0.0000
race 0.9990 0.0000 0.0000 0.0000
sex 0.9990 0.0000 0.0000 0.0000
working_status 0.9990 0.0000 0.0000 0.0000
Sample Sizes:
Control Treated
All 1116. 864
Matched (ESS) 280.57 393
Matched 423. 393
Unmatched 693. 471
Discarded 0. 0
imbalance_cem_wif_p <-imbalance(group = data_cem_wif_p$binary_wif, data = data_cem_wif_p, drop =c("physical_health_status", "binary_wif", "weights", "subclass"))
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
imbalance_cem_wif_p
Multivariate Imbalance Measure: L1=0.241
Percentage of local common support: LCS=100.0%
Univariate Imbalance Measures:
statistic type L1 min 25% 50% 75% max
marriage_status 4.28087075 (Chi2) 0.061700323 NA NA NA NA NA
total_people_in_household 1.66414135 (Chi2) 0.024488838 NA NA NA NA NA
age_group 4.64491477 (Chi2) 0.043365275 NA NA NA NA NA
education 5.13965175 (Chi2) 0.062620685 NA NA NA NA NA
family_income 0.08215837 (Chi2) 0.000000000 NA NA NA NA NA
race 0.87142065 (Chi2) 0.020049447 NA NA NA NA NA
sex 0.33132640 (Chi2) 0.022413513 NA NA NA NA NA
working_status 0.66705767 (Chi2) 0.002724992 NA NA NA NA NA
#Compare t-tests of DV on treated - no control variables t.test(test_data_new_wif_p$physical_health_status, test_data_new_wif_p$binary_wif)
Welch Two Sample t-test
data: test_data_new_wif_p$physical_health_status and test_data_new_wif_p$binary_wif
t = 16.599, df = 2005, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
2.021131 2.562707
sample estimates:
mean of x mean of y
2.7282828 0.4363636
#Estimate Linear Regression on Raw Datalm1_wif_p<-lm(physical_health_status~ binary_wif+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, test_data_new_wif_p)summary(lm1_wif_p)
stargazer(lm1_wif_p, lm2_wif_p, type ="text", digits =3, dep.var.labels =c("Physical Health Status"),column.labels =c("Full Data", "Matched Data"),covariate.labels=c("Job interrupt the family(binary)", "Marriage status","Total people in household", "Age group", "Education","Family income","Race", "Sex", "Working Status","Constant"))
=============================================================================
Dependent variable:
--------------------------------------------
Physical Health Status
Full Data Matched Data
(1) (2)
-----------------------------------------------------------------------------
Job interrupt the family(binary) 0.533* 0.037
(0.276) (0.124)
Marriage status -0.190** -0.087**
(0.083) (0.040)
Total people in household -0.068 -0.037
(0.096) (0.058)
Age group 0.376** 0.117
(0.179) (0.097)
Education -0.161 0.075
(0.111) (0.051)
Family income -0.031 0.187
(0.095) (0.285)
Race -0.369** -0.150*
(0.178) (0.091)
Sex 0.550** 0.177
(0.276) (0.134)
Working Status -1.601*** 0.406*
(0.255) (0.245)
Constant 7.383*** -2.981
(1.403) (3.503)
-----------------------------------------------------------------------------
Observations 1,980 816
R2 0.034 0.021
Adjusted R2 0.030 0.010
Residual Std. Error 6.032 (df = 1970) 1.771 (df = 806)
F Statistic 7.760*** (df = 9; 1970) 1.889* (df = 9; 806)
=============================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
#######Entropy Balancing# Create a subset of the dataset with the selected variablestreatment_var_wif_p <-"binary_wif"covariates_vars_wif_p <-c("marriage_status", "total_people_in_household", "age_group","education", "family_income", "race", "sex", "working_status")dependent_var_wif_p <-"physical_health_status"# Prepare treatment and covariatestreatment_wif_p <- test_data_new_wif_p$binary_wifcovariates_wif_p <- test_data_new_wif_p[, covariates_vars_wif_p]# Run entropy balancinge_bal_wif_p <-ebalance(Treatment = treatment_wif_p,X = covariates_wif_p,max.iterations =200,constraint.tolerance =1)
Converged within tolerance
# Add weights back to LLtest_data_new_wif_p$eb_weight_wif_p <-NAtest_data_new_wif_p$eb_weight_wif_p[test_data_new_wif_p[[treatment_var_wif_p]] ==1] <-1# Treated units get weight = 1test_data_new_wif_p$eb_weight_wif_p[test_data_new_wif_p[[treatment_var_wif_p]] ==0] <- e_bal_wif_p$w # Control units get EB weights# Final data for regressioneb_data_wif_p <- test_data_new_wif_p %>%filter(!is.na(eb_weight_wif_p)) # Exclude unmatched if any#data for analysis #Now have a weight called 'eb_weight' that can be used in analysis ##Let's check that the two groups are equal now eb_data_wif_p %>%group_by(binary_wif) %>%summarise(age_weighted_mean =wtd.mean(age_group, weights = eb_weight_wif_p), age_weighted_variance =wtd.var(age_group, weights = eb_weight_wif_p) )
##What is the impact of employment training on earnings?#Testing for Imbalance Between Groupscheck_fiw_p<-test_data_new_fiw_p %>%group_by(binary_fiw) %>%summarise_at(vars(physical_health_status,marriage_status,total_people_in_household,age_group,education,family_income,race,sex, working_status), list(mean = mean,var = var))round(t(check_fiw_p), 3)
match_exact_fiw_p <-matchit(binary_fiw ~physical_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, method ="exact", data = test_data_new_fiw_p)data_exact_fiw_p <-match.data(match_exact_fiw_p) #Creates new dataframe that only includes the matched casessummary(match_exact_fiw_p)
Call:
matchit(formula = binary_fiw ~ physical_health_status + marriage_status +
total_people_in_household + age_group + education + family_income +
race + sex + working_status, data = test_data_new_fiw_p,
method = "exact")
Summary of Balance for All Data:
Means Treated Means Control Std. Mean Diff.
physical_health_status 3.0892 2.5607 0.0875
marriage_status 3.4506 3.0939 0.2014
total_people_in_household 1.9920 1.8180 0.1172
age_group 2.1290 2.1716 -0.0570
education 3.1449 3.0000 0.1124
family_income 11.6194 11.6243 -0.0032
race 1.5334 1.5081 0.0322
sex 1.5111 1.4956 0.0312
working_status 2.7389 2.7278 0.0198
Var. Ratio eCDF Mean eCDF Max
physical_health_status 0.9615 0.0272 0.1114
marriage_status 0.9734 0.0713 0.1093
total_people_in_household 1.0761 0.0226 0.0844
age_group 0.7738 0.0328 0.0642
education 1.0559 0.0290 0.0637
family_income 1.0335 0.0039 0.0126
race 1.0783 0.0084 0.0226
sex 1.0004 0.0078 0.0156
working_status 1.0733 0.0124 0.0241
Summary of Balance for Matched Data:
Means Treated Means Control Std. Mean Diff.
physical_health_status 0.5808 0.5808 0
marriage_status 3.5633 3.5633 0
total_people_in_household 1.6507 1.6507 0
age_group 2.1266 2.1266 0
education 3.1790 3.1790 -0
family_income 11.9956 11.9956 0
race 1.4105 1.4105 0
sex 1.4367 1.4367 -0
working_status 2.9345 2.9345 0
Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
physical_health_status 1.0001 0 0 0
marriage_status 1.0001 0 0 0
total_people_in_household 1.0001 0 0 0
age_group 1.0001 0 0 0
education 1.0001 0 0 0
family_income 1.0001 0 0 0
race 1.0001 0 0 0
sex 1.0001 0 0 0
working_status 1.0001 0 0 0
Sample Sizes:
Control Treated
All 1352. 628
Matched (ESS) 231.73 229
Matched 366. 229
Unmatched 986. 399
Discarded 0. 0
imbalance_exact_fiw_p <-imbalance(group = data_exact_fiw_p$binary_fiw, data = data_exact_fiw_p, drop =c("physical_health_status", "binary_fiw", "weights", "subclass")) #With matched data, always add weights and subclass here
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
imbalance_exact_fiw_p
Multivariate Imbalance Measure: L1=0.282
Percentage of local common support: LCS=100.0%
Univariate Imbalance Measures:
statistic type L1 min 25% 50% 75% max
marriage_status 2.415448e+00 (Chi2) 0.056732765 NA NA NA NA NA
total_people_in_household 4.415250e+00 (Chi2) 0.067458897 NA NA NA NA NA
age_group 4.658060e-01 (Chi2) 0.024530508 NA NA NA NA NA
education 2.729352e+00 (Chi2) 0.026212805 NA NA NA NA NA
family_income 7.054646e-31 (Chi2) 0.000000000 NA NA NA NA NA
race 3.336199e+00 (Chi2) 0.064189754 NA NA NA NA NA
sex 5.339079e-02 (Chi2) 0.013183955 NA NA NA NA NA
working_status 4.527508e-02 (Chi2) 0.001097669 NA NA NA NA NA
###Match Coarsened Exact ###Perform the matching here with code that resembles most regressionsmatch_cem_fiw_p <-matchit(binary_fiw ~physical_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, method ="cem", data = test_data_new_fiw_p)data_cem_fiw_p <-match.data(match_cem_fiw_p) #Creates new dataframe that only includes the matched casessummary(match_cem_fiw_p)
Call:
matchit(formula = binary_fiw ~ physical_health_status + marriage_status +
total_people_in_household + age_group + education + family_income +
race + sex + working_status, data = test_data_new_fiw_p,
method = "cem")
Summary of Balance for All Data:
Means Treated Means Control Std. Mean Diff.
physical_health_status 3.0892 2.5607 0.0875
marriage_status 3.4506 3.0939 0.2014
total_people_in_household 1.9920 1.8180 0.1172
age_group 2.1290 2.1716 -0.0570
education 3.1449 3.0000 0.1124
family_income 11.6194 11.6243 -0.0032
race 1.5334 1.5081 0.0322
sex 1.5111 1.4956 0.0312
working_status 2.7389 2.7278 0.0198
Var. Ratio eCDF Mean eCDF Max
physical_health_status 0.9615 0.0272 0.1114
marriage_status 0.9734 0.0713 0.1093
total_people_in_household 1.0761 0.0226 0.0844
age_group 0.7738 0.0328 0.0642
education 1.0559 0.0290 0.0637
family_income 1.0335 0.0039 0.0126
race 1.0783 0.0084 0.0226
sex 1.0004 0.0078 0.0156
working_status 1.0733 0.0124 0.0241
Summary of Balance for Matched Data:
Means Treated Means Control Std. Mean Diff.
physical_health_status 0.7649 0.7145 0.0084
marriage_status 3.5860 3.5860 0.0000
total_people_in_household 1.6772 1.6772 0.0000
age_group 2.1509 2.1509 0.0000
education 3.2842 3.2842 0.0000
family_income 11.9860 11.9860 0.0000
race 1.4140 1.4140 0.0000
sex 1.4421 1.4421 0.0000
working_status 2.9228 2.9228 0.0000
Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
physical_health_status 0.9728 0.004 0.0386 0.0989
marriage_status 1.0000 0.000 0.0000 0.0000
total_people_in_household 1.0000 0.000 0.0000 0.0000
age_group 1.0000 0.000 0.0000 0.0000
education 1.0000 0.000 0.0000 0.0000
family_income 1.0000 0.000 0.0000 0.0000
race 1.0000 0.000 0.0000 0.0000
sex 1.0000 0.000 0.0000 0.0000
working_status 1.0000 0.000 0.0000 0.0000
Sample Sizes:
Control Treated
All 1352. 628
Matched (ESS) 283.37 285
Matched 447. 285
Unmatched 905. 343
Discarded 0. 0
imbalance_cem_fiw_p <-imbalance(group = data_cem_fiw_p$binary_fiw, data = data_cem_fiw_p, drop =c("physical_health_status", "binary_fiw", "weights", "subclass"))
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
imbalance_cem_fiw_p
Multivariate Imbalance Measure: L1=0.279
Percentage of local common support: LCS=100.0%
Univariate Imbalance Measures:
statistic type L1 min 25% 50% 75% max
marriage_status 3.25633286 (Chi2) 0.059578476 NA NA NA NA NA
total_people_in_household 5.36925857 (Chi2) 0.065418580 NA NA NA NA NA
age_group 1.12679662 (Chi2) 0.024278818 NA NA NA NA NA
education 4.47009965 (Chi2) 0.044012716 NA NA NA NA NA
family_income 0.31156120 (Chi2) 0.000000000 NA NA NA NA NA
race 2.96910251 (Chi2) 0.049664430 NA NA NA NA NA
sex 0.06667579 (Chi2) 0.012575062 NA NA NA NA NA
working_status 0.46179671 (Chi2) 0.000965501 NA NA NA NA NA
Welch Two Sample t-test
data: test_data_new_fiw_p$physical_health_status and test_data_new_fiw_p$binary_fiw
t = 17.469, df = 2001.9, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
2.140428 2.681794
sample estimates:
mean of x mean of y
2.7282828 0.3171717
#Estimate Linear Regression on Raw Datalm1_fiw_p<-lm(physical_health_status~ binary_fiw+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, test_data_new_fiw_p)summary(lm1_fiw_p)
stargazer(lm1_fiw_p, lm2_fiw_p, type ="text", digits =3, dep.var.labels =c("Earnings Post Training"),column.labels =c("Full Data", "Matched Data"),covariate.labels=c("Family interrupt the job(binary)", "Marriage status","Total people in household", "Age group", "Education","Family Income","Race", "Sex", "Working Status" ))
============================================================================
Dependent variable:
-------------------------------------------
Earnings Post Training
Full Data Matched Data
(1) (2)
----------------------------------------------------------------------------
Family interrupt the job(binary) 0.671** 0.050
(0.294) (0.171)
Marriage status -0.201** -0.006
(0.083) (0.057)
Total people in household -0.076 -0.025
(0.096) (0.074)
Age group 0.373** 0.174
(0.179) (0.132)
Education -0.157 -0.018
(0.111) (0.069)
Family Income -0.023 -0.158
(0.095) (0.604)
Race -0.385** -0.252**
(0.178) (0.119)
Sex 0.538* 0.025
(0.276) (0.181)
Working Status -1.571*** 0.518*
(0.255) (0.313)
Constant 7.306*** 1.164
(1.403) (7.104)
----------------------------------------------------------------------------
Observations 1,980 732
R2 0.035 0.013
Adjusted R2 0.031 0.0004
Residual Std. Error 6.030 (df = 1970) 2.258 (df = 722)
F Statistic 7.931*** (df = 9; 1970) 1.030 (df = 9; 722)
============================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
#######Entropy Balancing# Create a subset of the dataset with the selected variablestreatment_var_fiw_p <-"binary_fiw"covariates_vars_fiw_p <-c("marriage_status", "total_people_in_household", "age_group","education", "family_income", "race", "sex", "working_status")dependent_var_fiw_p <-"physical_health_status"# Prepare treatment and covariatestreatment_fiw_p <- test_data_new_fiw_p$binary_fiwcovariates_fiw_p <- test_data_new_fiw_p[, covariates_vars_fiw_p]# Run entropy balancinge_bal_fiw_p <-ebalance(Treatment = treatment_fiw_p,X = covariates_fiw_p,max.iterations =200,constraint.tolerance =1)
Converged within tolerance
test_data_new_fiw_p$eb_weight_fiw_p <-NAtest_data_new_fiw_p$eb_weight_fiw_p[test_data_new_fiw_p[[treatment_var_fiw_p]] ==1] <-1# Treated units get weight = 1test_data_new_fiw_p$eb_weight_fiw_p[test_data_new_fiw_p[[treatment_var_fiw_p]] ==0] <- e_bal_fiw_p$w # Control units get EB weights# Final data for regressioneb_data_fiw_p <- test_data_new_fiw_p %>%filter(!is.na(eb_weight_fiw_p)) # Exclude unmatched if any#data for analysis #Now have a weight called 'eb_weight' that can be used in analysis ##Let's check that the two groups are equal now eb_data_fiw_p %>%group_by(binary_fiw) %>%summarise(age_weighted_mean_fiw_p =wtd.mean(age_group, weights = eb_weight_fiw_p), age_weighted_variance_fiw_p =wtd.var(age_group, weights = eb_weight_fiw_p) )
##What is the impact of employment training on earnings?#Testing for Imbalance Between Groupscheck_wif_m<-test_data_new_wif_m %>%group_by(binary_wif) %>%summarise_at(vars(mental_health_status,marriage_status,total_people_in_household,age_group,education,family_income,race,sex, working_status), list(mean = mean,var = var))round(t(check_wif_m), 3)
match_exact_wif_m <-matchit(binary_wif~mental_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, method ="exact", data =test_data_new_wif_m)data_exact_wif_m <-match.data(match_exact_wif_m) #Creates new dataframe that only includes the matched casessummary(match_exact_wif_m)
Call:
matchit(formula = binary_wif ~ mental_health_status + marriage_status +
total_people_in_household + age_group + education + family_income +
race + sex + working_status, data = test_data_new_wif_m,
method = "exact")
Summary of Balance for All Data:
Means Treated Means Control Std. Mean Diff.
mental_health_status 5.6481 3.7957 0.2209
marriage_status 3.2940 3.1398 0.0860
total_people_in_household 1.8889 1.8611 0.0195
age_group 2.1123 2.1935 -0.1063
education 3.1701 2.9498 0.1736
family_income 11.7083 11.5565 0.1137
race 1.4931 1.5341 -0.0542
sex 1.4965 1.5036 -0.0141
working_status 2.7824 2.6918 0.1737
Var. Ratio eCDF Mean eCDF Max
mental_health_status 1.2603 0.0623 0.1606
marriage_status 0.9988 0.0308 0.0528
total_people_in_household 0.9320 0.0095 0.0301
age_group 0.7941 0.0288 0.0675
education 1.0185 0.0441 0.0867
family_income 0.7161 0.0127 0.0511
race 0.9616 0.0137 0.0289
sex 1.0003 0.0035 0.0071
working_status 0.8729 0.0313 0.0923
Summary of Balance for Matched Data:
Means Treated Means Control Std. Mean Diff.
mental_health_status 1.8244 1.8244 -0
marriage_status 3.6298 3.6298 0
total_people_in_household 1.4885 1.4885 0
age_group 2.2366 2.2366 0
education 3.3244 3.3244 0
family_income 12.0000 12.0000 0
race 1.3588 1.3588 0
sex 1.3740 1.3740 0
working_status 2.9580 2.9580 -0
Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
mental_health_status 0.9984 0 0 0
marriage_status 0.9984 0 0 0
total_people_in_household 0.9984 0 0 0
age_group 0.9984 0 0 0
education 0.9984 0 0 0
family_income 0.0000 0 0 0
race 0.9984 0 0 0
sex 0.9984 0 0 0
working_status 0.9984 0 0 0
Sample Sizes:
Control Treated
All 1116. 864
Matched (ESS) 186.04 262
Matched 288. 262
Unmatched 828. 602
Discarded 0. 0
imbalance_exact_wif_m <-imbalance(group = data_exact_wif_m$binary_wif, data = data_exact_wif_m, drop =c("mental_health_status", "binary_wif", "weights", "subclass")) #With matched data, always add weights and subclass here
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
imbalance_exact_wif_m
Multivariate Imbalance Measure: L1=0.225
Percentage of local common support: LCS=100.0%
Univariate Imbalance Measures:
statistic type L1 min 25% 50% 75% max
marriage_status 1.24285997 (Chi2) 0.0402088634 NA NA NA NA NA
total_people_in_household 2.28837788 (Chi2) 0.0342716285 NA NA NA NA NA
age_group 1.53750809 (Chi2) 0.0485581001 NA NA NA NA NA
education 8.34406798 (Chi2) 0.1176579729 NA NA NA NA NA
family_income 1.22909091 (Chi2) 0.0000000000 NA NA NA NA NA
race 0.18225258 (Chi2) 0.0140744275 NA NA NA NA NA
sex 0.46998088 (Chi2) 0.0322041985 NA NA NA NA NA
working_status 0.06195664 (Chi2) 0.0003445717 NA NA NA NA NA
###Match Coarsened Exact ###Perform the matching here with code that resembles most regressionsmatch_cem_fiw_m <-matchit(binary_wif ~mental_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, method ="cem", data = test_data_new_wif_m)data_cem_fiw_m <-match.data(match_cem_fiw_m) #Creates new dataframe that only includes the matched casessummary(match_cem_fiw_m)
Call:
matchit(formula = binary_wif ~ mental_health_status + marriage_status +
total_people_in_household + age_group + education + family_income +
race + sex + working_status, data = test_data_new_wif_m,
method = "cem")
Summary of Balance for All Data:
Means Treated Means Control Std. Mean Diff.
mental_health_status 5.6481 3.7957 0.2209
marriage_status 3.2940 3.1398 0.0860
total_people_in_household 1.8889 1.8611 0.0195
age_group 2.1123 2.1935 -0.1063
education 3.1701 2.9498 0.1736
family_income 11.7083 11.5565 0.1137
race 1.4931 1.5341 -0.0542
sex 1.4965 1.5036 -0.0141
working_status 2.7824 2.6918 0.1737
Var. Ratio eCDF Mean eCDF Max
mental_health_status 1.2603 0.0623 0.1606
marriage_status 0.9988 0.0308 0.0528
total_people_in_household 0.9320 0.0095 0.0301
age_group 0.7941 0.0288 0.0675
education 1.0185 0.0441 0.0867
family_income 0.7161 0.0127 0.0511
race 0.9616 0.0137 0.0289
sex 1.0003 0.0035 0.0071
working_status 0.8729 0.0313 0.0923
Summary of Balance for Matched Data:
Means Treated Means Control Std. Mean Diff.
mental_health_status 1.8585 1.8056 0.0063
marriage_status 3.6092 3.6092 0.0000
total_people_in_household 1.4985 1.4985 -0.0000
age_group 2.2492 2.2492 0.0000
education 3.3385 3.3385 0.0000
family_income 11.9938 11.9938 -0.0000
race 1.3692 1.3692 0.0000
sex 1.3969 1.3969 0.0000
working_status 2.9385 2.9385 0.0000
Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
mental_health_status 0.9977 0.002 0.0291 0.0577
marriage_status 0.9988 0.000 0.0000 0.0000
total_people_in_household 0.9988 0.000 0.0000 0.0000
age_group 0.9988 0.000 0.0000 0.0000
education 0.9988 0.000 0.0000 0.0000
family_income 0.9988 0.000 0.0000 0.0000
race 0.9988 0.000 0.0000 0.0000
sex 0.9988 0.000 0.0000 0.0000
working_status 0.9988 0.000 0.0000 0.0000
Sample Sizes:
Control Treated
All 1116. 864
Matched (ESS) 233.91 325
Matched 344. 325
Unmatched 772. 539
Discarded 0. 0
imbalance_cem_fiw_m <-imbalance(group = data_cem_fiw_m$binary_wif, data = data_cem_fiw_m, drop =c("mental_health_status", "binary_wif", "weights", "subclass"))
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
imbalance_cem_fiw_m
Multivariate Imbalance Measure: L1=0.221
Percentage of local common support: LCS=100.0%
Univariate Imbalance Measures:
statistic type L1 min 25% 50% 75% max
marriage_status 8.570204e-01 (Chi2) 0.035366726 NA NA NA NA NA
total_people_in_household 1.630368e+00 (Chi2) 0.025769231 NA NA NA NA NA
age_group 2.143303e+00 (Chi2) 0.022611807 NA NA NA NA NA
education 9.081761e+00 (Chi2) 0.104436494 NA NA NA NA NA
family_income 3.113318e-30 (Chi2) 0.000000000 NA NA NA NA NA
race 3.612972e-01 (Chi2) 0.018899821 NA NA NA NA NA
sex 8.973683e-01 (Chi2) 0.039123435 NA NA NA NA NA
working_status 7.885286e-01 (Chi2) 0.000509839 NA NA NA NA NA
#Compare t-tests of DV on treated - no control variables t.test(test_data_new_wif_m$mental_health_status, test_data_new_wif_m$binary_wif)
Welch Two Sample t-test
data: test_data_new_wif_m$mental_health_status and test_data_new_wif_m$binary_wif
t = 23.327, df = 1994.5, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
3.817287 4.518066
sample estimates:
mean of x mean of y
4.6040404 0.4363636
#Estimate Linear Regression on Raw Datalm1_wif_m<-lm(mental_health_status~ binary_wif+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, test_data_new_wif_m)summary(lm1_wif_m)
stargazer(lm1_wif_m,lm2_wif_m, type ="text", digits =3, dep.var.labels =c("Mental Healtth Status"),column.labels =c("Full Data", "Matched Data"),covariate.labels=c("Job interrupt the family (binary)", "Marriage status","Total people in household", "Age group", "Education","Family income","Race", "Sex", "Working Status" ))
=================================================================================
Dependent variable:
-----------------------------------------------
Mental Healtth Status
Full Data Matched Data
(1) (2)
---------------------------------------------------------------------------------
Job interrupt the family (binary) 1.952*** 0.053
(0.349) (0.392)
Marriage status -0.305*** -0.065
(0.105) (0.128)
Total people in household 0.008 -0.419**
(0.121) (0.196)
Age group -1.449*** -1.176***
(0.226) (0.301)
Education -0.528*** -0.598***
(0.140) (0.162)
Family income -0.187 2.370
(0.120) (2.540)
Race -0.913*** -1.122***
(0.225) (0.281)
Sex 1.780*** 0.234
(0.348) (0.432)
Working Status -0.557* 0.801
(0.323) (0.735)
Constant 11.855*** -22.258
(1.772) (30.434)
---------------------------------------------------------------------------------
Observations 1,980 669
R2 0.082 0.081
Adjusted R2 0.078 0.068
Residual Std. Error 7.620 (df = 1970) 5.073 (df = 659)
F Statistic 19.549*** (df = 9; 1970) 6.440*** (df = 9; 659)
=================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
#######Entropy Balancing# Create a subset of the dataset with the selected variablestreatment_var_wif_m <-"binary_wif"covariates_vars_wif_m <-c("marriage_status", "total_people_in_household", "age_group","education", "family_income", "race", "sex", "working_status")dependent_var_wif_m <-"mental_health_status"# Prepare treatment and covariatestreatment_wif_m <- test_data_new_wif_m$binary_wifcovariates_wif_m <- test_data_new_wif_m[, covariates_vars_wif_m]# Run entropy balancinge_bal_wif_m <-ebalance(Treatment = treatment_wif_m,X = covariates_wif_m,max.iterations =200,constraint.tolerance =1)
Converged within tolerance
# Add weights back to LLtest_data_new_wif_m$eb_weight_wif_m <-NAtest_data_new_wif_m$eb_weight_wif_m[test_data_new_wif_m[[treatment_var_wif_m]] ==1] <-1# Treated units get weight = 1test_data_new_wif_m$eb_weight_wif_m[test_data_new_wif_m[[treatment_var_wif_m]] ==0] <- e_bal_wif_m$w # Control units get EB weights# Final data for regressioneb_data_wif_m <- test_data_new_wif_m %>%filter(!is.na(eb_weight_wif_m)) # Exclude unmatched if any#data for analysis #Now have a weight called 'eb_weight' that can be used in analysis ##Let's check that the two groups are equal now eb_data_wif_m %>%group_by(binary_wif) %>%summarise(age_weighted_mean =wtd.mean(age_group, weights = eb_weight_wif_m), age_weighted_variance =wtd.var(age_group, weights = eb_weight_wif_m) )
##What is the impact of employment training on earnings?#Testing for Imbalance Between Groupscheck_fiw_m<-test_data_new_fiw_m %>%group_by(binary_fiw) %>%summarise_at(vars(mental_health_status,marriage_status,total_people_in_household,age_group,education,family_income,race,sex, working_status), list(mean = mean,var = var))round(t(check_fiw_m), 3)
match_exact_fiw_m <-matchit(binary_fiw~mental_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, method ="exact", data = test_data_new_fiw_m)data_exact_fiw_m <-match.data(match_exact_fiw_m) #Creates new dataframe that only includes the matched casessummary(match_exact_fiw_m)
Call:
matchit(formula = binary_fiw ~ mental_health_status + marriage_status +
total_people_in_household + age_group + education + family_income +
race + sex + working_status, data = test_data_new_fiw_m,
method = "exact")
Summary of Balance for All Data:
Means Treated Means Control Std. Mean Diff.
mental_health_status 5.6656 4.1109 0.1877
marriage_status 3.4506 3.0939 0.2014
total_people_in_household 1.9920 1.8180 0.1172
age_group 2.1290 2.1716 -0.0570
education 3.1449 3.0000 0.1124
family_income 11.6194 11.6243 -0.0032
race 1.5334 1.5081 0.0322
sex 1.5111 1.4956 0.0312
working_status 2.7389 2.7278 0.0198
Var. Ratio eCDF Mean eCDF Max
mental_health_status 1.1502 0.0528 0.1517
marriage_status 0.9734 0.0713 0.1093
total_people_in_household 1.0761 0.0226 0.0844
age_group 0.7738 0.0328 0.0642
education 1.0559 0.0290 0.0637
family_income 1.0335 0.0039 0.0126
race 1.0783 0.0084 0.0226
sex 1.0004 0.0078 0.0156
working_status 1.0733 0.0124 0.0241
Summary of Balance for Matched Data:
Means Treated Means Control Std. Mean Diff.
mental_health_status 1.5351 1.5351 0
marriage_status 3.7892 3.7892 0
total_people_in_household 1.6919 1.6919 0
age_group 2.2000 2.2000 0
education 3.3676 3.3676 0
family_income 11.9946 11.9946 0
race 1.4595 1.4595 -0
sex 1.3838 1.3838 0
working_status 2.9568 2.9568 0
Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
mental_health_status 0.9995 0 0 0
marriage_status 0.9995 0 0 0
total_people_in_household 0.9995 0 0 0
age_group 0.9995 0 0 0
education 0.9995 0 0 0
family_income 0.9995 0 0 0
race 0.9995 0 0 0
sex 0.9995 0 0 0
working_status 0.9995 0 0 0
Sample Sizes:
Control Treated
All 1352. 628
Matched (ESS) 168.63 185
Matched 292. 185
Unmatched 1060. 443
Discarded 0. 0
imbalance_exact_fiw_m <-imbalance(group = data_exact_fiw_m$binary_fiw, data = data_exact_fiw_m, drop =c("mental_health_status", "binary_fiw", "weights", "subclass")) #With matched data, always add weights and subclass here
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
imbalance_exact_fiw_m
Multivariate Imbalance Measure: L1=0.251
Percentage of local common support: LCS=100.0%
Univariate Imbalance Measures:
statistic type L1 min 25% 50% 75% max
marriage_status 1.351709e+00 (Chi2) 0.03337653 NA NA NA NA NA
total_people_in_household 6.005671e+00 (Chi2) 0.08463532 NA NA NA NA NA
age_group 2.180951e+00 (Chi2) 0.05994076 NA NA NA NA NA
education 4.061170e+00 (Chi2) 0.09207701 NA NA NA NA NA
family_income 1.573880e-30 (Chi2) 0.00000000 NA NA NA NA NA
race 3.856778e+00 (Chi2) 0.07613847 NA NA NA NA NA
sex 4.364911e-02 (Chi2) 0.01392077 NA NA NA NA NA
working_status 6.546799e-02 (Chi2) 0.00000000 NA NA NA NA NA
###Match Coarsened Exact ###Perform the matching here with code that resembles most regressionsmatch_cem_fiw_m <-matchit(binary_fiw ~mental_health_status+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, method ="cem", data = test_data_new_fiw_m)data_cem_fiw_m <-match.data(match_cem_fiw_m) #Creates new dataframe that only includes the matched casessummary(match_cem_fiw_m)
Call:
matchit(formula = binary_fiw ~ mental_health_status + marriage_status +
total_people_in_household + age_group + education + family_income +
race + sex + working_status, data = test_data_new_fiw_m,
method = "cem")
Summary of Balance for All Data:
Means Treated Means Control Std. Mean Diff.
mental_health_status 5.6656 4.1109 0.1877
marriage_status 3.4506 3.0939 0.2014
total_people_in_household 1.9920 1.8180 0.1172
age_group 2.1290 2.1716 -0.0570
education 3.1449 3.0000 0.1124
family_income 11.6194 11.6243 -0.0032
race 1.5334 1.5081 0.0322
sex 1.5111 1.4956 0.0312
working_status 2.7389 2.7278 0.0198
Var. Ratio eCDF Mean eCDF Max
mental_health_status 1.1502 0.0528 0.1517
marriage_status 0.9734 0.0713 0.1093
total_people_in_household 1.0761 0.0226 0.0844
age_group 0.7738 0.0328 0.0642
education 1.0559 0.0290 0.0637
family_income 1.0335 0.0039 0.0126
race 1.0783 0.0084 0.0226
sex 1.0004 0.0078 0.0156
working_status 1.0733 0.0124 0.0241
Summary of Balance for Matched Data:
Means Treated Means Control Std. Mean Diff.
mental_health_status 1.5983 1.4992 0.012
marriage_status 3.8419 3.8419 0.000
total_people_in_household 1.6880 1.6880 0.000
age_group 2.2137 2.2137 0.000
education 3.3504 3.3504 0.000
family_income 11.9957 11.9957 0.000
race 1.4274 1.4274 0.000
sex 1.3889 1.3889 0.000
working_status 2.9274 2.9274 -0.000
Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
mental_health_status 0.9929 0.0037 0.0562 0.0634
marriage_status 0.9999 0.0000 0.0000 0.0000
total_people_in_household 0.9999 0.0000 0.0000 0.0000
age_group 0.9999 0.0000 0.0000 0.0000
education 0.9999 0.0000 0.0000 0.0000
family_income 0.9999 0.0000 0.0000 0.0000
race 0.9999 0.0000 0.0000 0.0000
sex 0.9999 0.0000 0.0000 0.0000
working_status 0.9999 0.0000 0.0000 0.0000
Sample Sizes:
Control Treated
All 1352 628
Matched (ESS) 230 234
Matched 378 234
Unmatched 974 394
Discarded 0 0
imbalance_cem_fiw_m <-imbalance(group = data_cem_fiw_m$binary_fiw, data = data_cem_fiw_m, drop =c("mental_health_status", "binary_fiw", "weights", "subclass"))
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
Warning in chisq.test(cbind(t1[keep], t2[keep])): Chi-squared approximation may
be incorrect
imbalance_cem_fiw_m
Multivariate Imbalance Measure: L1=0.264
Percentage of local common support: LCS=100.0%
Univariate Imbalance Measures:
statistic type L1 min 25% 50% 75% max
marriage_status 1.313909e+00 (Chi2) 4.273504e-02 NA NA NA NA NA
total_people_in_household 4.498646e+00 (Chi2) 6.715507e-02 NA NA NA NA NA
age_group 1.439695e+00 (Chi2) 1.526252e-02 NA NA NA NA NA
education 2.665052e+00 (Chi2) 5.840456e-02 NA NA NA NA NA
family_income 1.232107e-30 (Chi2) 0.000000e+00 NA NA NA NA NA
race 3.706557e+00 (Chi2) 6.389906e-02 NA NA NA NA NA
sex 0.000000e+00 (Chi2) 1.110223e-16 NA NA NA NA NA
working_status 5.938950e-01 (Chi2) 3.256003e-03 NA NA NA NA NA
Welch Two Sample t-test
data: test_data_new_fiw_m$mental_health_status and test_data_new_fiw_m$binary_fiw
t = 23.999, df = 1992.6, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
3.936560 4.637177
sample estimates:
mean of x mean of y
4.6040404 0.3171717
#Estimate Linear Regression on Raw Datalm1_fiw_m<-lm(mental_health_status~ binary_fiw+marriage_status+ total_people_in_household+age_group+education+family_income+race+sex+working_status, test_data_new_fiw_m)summary(lm1_fiw_m)
stargazer(lm1_fiw_m, lm2_fiw_m, type ="text", digits =3, dep.var.labels =c("Earnings Post Training"),column.labels =c("Full Data", "Matched Data"),covariate.labels=c("Family interrupt the job (binary)", "Marriage status","Total people in household", "Age group", "Education","Family income","Race", "Sex", "Working Status" ))
=================================================================================
Dependent variable:
-----------------------------------------------
Earnings Post Training
Full Data Matched Data
(1) (2)
---------------------------------------------------------------------------------
Family interrupt the job (binary) 1.683*** 0.099
(0.372) (0.322)
Marriage status -0.324*** 0.101
(0.106) (0.105)
Total people in household -0.014 -0.383***
(0.121) (0.136)
Age group -1.484*** -1.114***
(0.226) (0.247)
Education -0.498*** -0.101
(0.140) (0.130)
Family income -0.159 -2.795
(0.121) (2.447)
Race -0.963*** -0.895***
(0.226) (0.215)
Sex 1.750*** -0.489
(0.349) (0.343)
Working Status -0.444 0.349
(0.323) (0.552)
Constant 11.756*** 39.027
(1.778) (29.431)
---------------------------------------------------------------------------------
Observations 1,980 612
R2 0.077 0.077
Adjusted R2 0.073 0.063
Residual Std. Error 7.640 (df = 1970) 3.875 (df = 602)
F Statistic 18.256*** (df = 9; 1970) 5.596*** (df = 9; 602)
=================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
#######Entropy Balancing# Create a subset of the dataset with the selected variablestreatment_var_fiw_m <-"binary_fiw"covariates_vars_fiw_m <-c("marriage_status", "total_people_in_household", "age_group","education", "family_income", "race", "sex", "working_status")dependent_var_fiw_m <-"mental_health_status"# Prepare treatment and covariatestreatment_fiw_m <- test_data_new_fiw_m$binary_fiwcovariates_fiw_m <- test_data_new_fiw_m[, covariates_vars_fiw_m]# Run entropy balancinge_bal_fiw_m <-ebalance(Treatment = treatment_fiw_m,X = covariates_fiw_m,max.iterations =200,constraint.tolerance =1)
Converged within tolerance
test_data_new_fiw_m$eb_weight_fiw_m <-NAtest_data_new_fiw_m$eb_weight_fiw_m[test_data_new_fiw_m[[treatment_var_fiw_m]] ==1] <-1# Treated units get weight = 1test_data_new_fiw_m$eb_weight_fiw_m[test_data_new_fiw_m[[treatment_var_fiw_m]] ==0] <- e_bal_fiw_m$w # Control units get EB weights# Final data for regressioneb_data_fiw_m <- test_data_new_fiw_m %>%filter(!is.na(eb_weight_fiw_m)) # Exclude unmatched if any#data for analysis #Now have a weight called 'eb_weight' that can be used in analysis ##Let's check that the two groups are equal now eb_data_fiw_m %>%group_by(binary_fiw) %>%summarise(age_weighted_mean_fiw_m =wtd.mean(age_group, weights = eb_weight_fiw_m), age_weighted_variance_fiw_m =wtd.var(age_group, weights = eb_weight_fiw_m) )