This code is designed to explore the factors associated with insurance acceptance among mental health providers (data from psychology today).
The following steps were taken: - Exploratory univariate analysis - Building the full logistic regression model - Comparing reduced models to full model - Running full model through a training and testing sample, then exploring the accuracy of the model - Running the model through a random forest to better understand the importance of the variables on insurance acceptance among mental health providers
data_acs <- read.csv("K:/WorkingData2/User_Folders/Aine/Project Work/Psychology Today/Original files/working data acs 4-5-24.csv")
data_acs <- data_acs[which(!is.na(data_acs$id)), ] #just in case it reuploads weirdly from csv
#Desc(data_acs$id)
#length of df should be 184'957
# unique IDs should be 167'854Exhibit 4. Regression of provider- and county/zip level covariates on insurance acceptance 1. County or zip code composition (population, % white, HRSA supply of MH professionals, income) 2. Provider-level covariates – years in practice, psychologist vs. therapist, telehealth, gender, # of locations (can use addresses to identify multi-provider groups e.g. count number of providers at same location), whether providers themselves practice in >1 location
data_acs_inc <- data_acs[which(data_acs$excluded==0),]
data_acs_inc <- data_acs_inc %>%
group_by(GEOID) %>%
mutate(provider_1k = (n() / county_population)*1000) %>%
ungroup()
#making tertiles
data_acs_inc <- data_acs_inc %>%
mutate(supply_tert = ntile(provider_1k, 3))
data_acs_inc <- data_acs_inc %>%
mutate(supply_tert = case_when(supply_tert==1 ~ "low supply",
supply_tert==2 ~ "mid supply",
supply_tert==3 ~ "high supply")
)
data_acs_inc$supply_tert <- as.factor(data_acs_inc$supply_tert)
data_acs_inc$provider_type <- as.factor(data_acs_inc$provider_type)
data_acs_inc$insurance_flag <- as.factor(data_acs_inc$insurance_flag)
data_acs_inc$any_child <- as.factor(data_acs_inc$any_child)
data_acs_inc$region <- as.factor(data_acs_inc$region)
data_acs_inc$group_pr <- as.factor(data_acs_inc$group_pr)
data_acs_inc$telehealth <- as.factor(data_acs_inc$telehealth)psych_forest <- as.data.frame(select(data_acs_inc, c("insurance_flag", "supply_tert", "provider_type","any_child","region","median_income","group_pr","fraction_white", "fraction_no_health_ins")))
Desc(psych_forest)## ------------------------------------------------------------------------------
## Describe psych_forest (data.frame):
##
## data frame: 175083 obs. of 9 variables
## 175083 complete cases (100.0%)
##
## Nr ColName Class NAs Levels
## 1 insurance_flag factor . (2): 1-0, 2-1
## 2 supply_tert factor . (3): 1-high supply, 2-low
## supply, 3-mid supply
## 3 provider_type factor . (4): 1-Advanced practice
## providers, 2-All other
## therapists and counselors,
## 3-Masters level therapists and
## counselors, 4-Psychologists/PhD
## 4 any_child factor . (2): 1-0, 2-1
## 5 region factor . (4): 1-Midwest, 2-Northeast,
## 3-South, 4-West
## 6 median_income integer .
## 7 group_pr factor . (2): 1-0, 2-1
## 8 fraction_white numeric .
## 9 fraction_no_health_ins numeric .
##
##
## ------------------------------------------------------------------------------
## 1 - insurance_flag (factor - dichotomous)
##
## length n NAs unique
## 175'083 175'083 0 2
## 100.0% 0.0%
##
## freq perc lci.95 uci.95'
## 0 61'422 35.1% 34.9% 35.3%
## 1 113'661 64.9% 64.7% 65.1%
##
## ' 95%-CI (Wilson)
## ------------------------------------------------------------------------------
## 2 - supply_tert (factor)
##
## length n NAs unique levels dupes
## 175'083 175'083 0 3 3 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 high supply 58'361 33.3% 58'361 33.3%
## 2 low supply 58'361 33.3% 116'722 66.7%
## 3 mid supply 58'361 33.3% 175'083 100.0%
## ------------------------------------------------------------------------------
## 3 - provider_type (factor)
##
## length n NAs unique levels dupes
## 175'083 175'083 0 4 4 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 Masters level therapists and counselors 80'573 46.0% 80'573 46.0%
## 2 All other therapists and counselors 65'478 37.4% 146'051 83.4%
## 3 Psychologists/PhD 26'489 15.1% 172'540 98.5%
## 4 Advanced practice providers 2'543 1.5% 175'083 100.0%
## ------------------------------------------------------------------------------
## 4 - any_child (factor - dichotomous)
##
## length n NAs unique
## 175'083 175'083 0 2
## 100.0% 0.0%
##
## freq perc lci.95 uci.95'
## 0 61'038 34.9% 34.6% 35.1%
## 1 114'045 65.1% 64.9% 65.4%
##
## ' 95%-CI (Wilson)
## ------------------------------------------------------------------------------
## 5 - region (factor)
##
## length n NAs unique levels dupes
## 175'083 175'083 0 4 4 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 South 58'361 33.3% 58'361 33.3%
## 2 West 48'847 27.9% 107'208 61.2%
## 3 Midwest 35'057 20.0% 142'265 81.3%
## 4 Northeast 32'818 18.7% 175'083 100.0%
## ------------------------------------------------------------------------------
## 6 - median_income (integer)
##
## length n NAs unique 0s mean'
## 175'083 175'083 0 1'930 0 79'975.52
## 100.0% 0.0% 0.0%
##
## .05 .10 .25 median .75 .90
## 54'096.00 57'625.00 65'554.50 76'367.00 92'600.00 109'497.00
##
## range sd vcoef mad IQR skew
## 132'472.00 19'788.68 0.25 18'023.97 27'045.50 0.80
##
## meanCI
## 79'882.83
## 80'068.21
##
## .95
## 117'345.00
##
## kurt
## 0.49
##
## lowest : 24'349, 24'958, 25'000, 26'395, 29'206 (2)
## highest: 133'974 (1'130), 136'837 (400), 140'258 (819), 155'071 (152), 156'821 (308)
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 7 - group_pr (factor - dichotomous)
##
## length n NAs unique
## 175'083 175'083 0 2
## 100.0% 0.0%
##
## freq perc lci.95 uci.95'
## 0 104'650 59.8% 59.5% 60.0%
## 1 70'433 40.2% 40.0% 40.5%
##
## ' 95%-CI (Wilson)
## ------------------------------------------------------------------------------
## 8 - fraction_white (numeric)
##
## length n NAs unique 0s mean'
## 175'083 175'083 0 1'968 0 0.65388363
## 100.0% 0.0% 0.0%
##
## .05 .10 .25 median .75 .90
## 0.40455446 0.43734916 0.53077050 0.66204236 0.76608561 0.86258328
##
## range sd vcoef mad IQR skew
## 0.88685797 0.15491622 0.23691711 0.18615061 0.23531511 -0.22528595
##
## meanCI
## 0.65315799
## 0.65460928
##
## .95
## 0.89229776
##
## kurt
## -0.44534041
##
## lowest : 0.09554931, 0.11500288 (2), 0.1408701 (40), 0.14954472 (456), 0.15072783
## highest: 0.97435702, 0.97494897, 0.97499129, 0.98115465, 0.98240728
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 9 - fraction_no_health_ins (numeric)
##
## length n NAs unique 0s mean'
## 175'083 175'083 0 1'969 0 0.07892316
## 100.0% 0.0% 0.0%
##
## .05 .10 .25 median .75 .90
## 0.03374088 0.03922616 0.05158083 0.07274425 0.09945425 0.12392894
##
## range sd vcoef mad IQR skew
## 0.43848897 0.03681770 0.46650060 0.03424311 0.04787342 1.14426349
##
## meanCI
## 0.07875070
## 0.07909562
##
## .95
## 0.14244232
##
## kurt
## 2.15543525
##
## lowest : 0.01056877 (152), 0.01166718, 0.01610489 (33), 0.01964938 (977), 0.02076926 (6)
## highest: 0.29298357, 0.30837921 (37), 0.36968859, 0.41878254, 0.44905774
##
## ' 95%-CI (classic)
uni_prov <- glm(insurance_flag ~ provider_type, data = data_acs_inc,
family = "binomial")
summary(uni_prov) #this is currently comparing against advanced practice providers##
## Call:
## glm(formula = insurance_flag ~ provider_type, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.11416 0.03973
## provider_typeAll other therapists and counselors 0.83044 0.04067
## provider_typeMasters level therapists and counselors 0.38909 0.04038
## provider_typePsychologists/PhD 0.14574 0.04161
## z value Pr(>|z|)
## (Intercept) 2.874 0.004055 **
## provider_typeAll other therapists and counselors 20.420 < 2e-16 ***
## provider_typeMasters level therapists and counselors 9.635 < 2e-16 ***
## provider_typePsychologists/PhD 3.502 0.000461 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 224196 on 175079 degrees of freedom
## AIC: 224204
##
## Number of Fisher Scoring iterations: 4
#advanced practice providers are the smallest group, which may not lead to a great
#reference category if cells are small. I will change this to be masters level
#providers for now since that is the largest group
data_acs_inc$provider_type <- contr_code_treatment(data_acs_inc$provider_type,
base = "Masters level therapists and counselors")
#some other primary independent var models
uni_child <- glm(insurance_flag ~ any_child, data = data_acs_inc,
family = "binomial")
summary(uni_child)##
## Call:
## glm(formula = insurance_flag ~ any_child, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.509375 0.008359 60.94 <2e-16 ***
## any_child1 0.164224 0.010444 15.72 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 226644 on 175081 degrees of freedom
## AIC: 226648
##
## Number of Fisher Scoring iterations: 4
uni_region <- glm(insurance_flag ~ region, data = data_acs_inc,
family = "binomial")
summary(uni_region)##
## Call:
## glm(formula = insurance_flag ~ region, family = "binomial", data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.26177 0.01288 97.97 <2e-16 ***
## regionNortheast -0.59669 0.01737 -34.35 <2e-16 ***
## regionSouth -0.73377 0.01547 -47.44 <2e-16 ***
## regionWest -0.97037 0.01580 -61.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 222726 on 175079 degrees of freedom
## AIC: 222734
##
## Number of Fisher Scoring iterations: 4
uni_income <- glm(insurance_flag ~ median_income, data = data_acs_inc,
family = "binomial")
summary(uni_income)##
## Call:
## glm(formula = insurance_flag ~ median_income, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.483e+00 2.100e-02 70.61 <2e-16 ***
## median_income -1.077e-05 2.514e-07 -42.82 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 225053 on 175081 degrees of freedom
## AIC: 225057
##
## Number of Fisher Scoring iterations: 4
uni_group <- glm(insurance_flag ~ group_pr, data = data_acs_inc,
family = "binomial")
summary(uni_group)##
## Call:
## glm(formula = insurance_flag ~ group_pr, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.524293 0.006396 81.97 <2e-16 ***
## group_pr1 0.231474 0.010306 22.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 226382 on 175081 degrees of freedom
## AIC: 226386
##
## Number of Fisher Scoring iterations: 4
uni_prov1k <- glm(insurance_flag ~ supply_tert, data = data_acs_inc,
family = "binomial")
summary(uni_prov1k)##
## Call:
## glm(formula = insurance_flag ~ supply_tert, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.623965 0.008685 71.844 < 2e-16 ***
## supply_tertlow supply 0.071394 0.012353 5.780 7.49e-09 ***
## supply_tertmid supply -0.094866 0.012202 -7.775 7.55e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 226705 on 175080 degrees of freedom
## AIC: 226711
##
## Number of Fisher Scoring iterations: 4
uni_frac_wh <- glm(insurance_flag ~ fraction_white, data = data_acs_inc,
family = "binomial")
summary(uni_frac_wh)##
## Call:
## glm(formula = insurance_flag ~ fraction_white, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.09857 0.02155 -4.573 4.8e-06 ***
## fraction_white 1.09844 0.03243 33.870 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 225737 on 175081 degrees of freedom
## AIC: 225741
##
## Number of Fisher Scoring iterations: 4
uni_no_hi <- glm(insurance_flag ~ fraction_no_health_ins, data = data_acs_inc,
family = "binomial")
summary(uni_no_hi)##
## Call:
## glm(formula = insurance_flag ~ fraction_no_health_ins, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.72061 0.01184 60.850 <2e-16 ***
## fraction_no_health_ins -1.32794 0.13508 -9.831 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 226794 on 175081 degrees of freedom
## AIC: 226798
##
## Number of Fisher Scoring iterations: 4
model1 <- glm(insurance_flag ~ supply_tert + provider_type + any_child
+ region + median_income + group_pr + fraction_white
+ fraction_no_health_ins, data = data_acs_inc, family = "binomial")
summary(model1)##
## Call:
## glm(formula = insurance_flag ~ supply_tert + provider_type +
## any_child + region + median_income + group_pr + fraction_white +
## fraction_no_health_ins, family = "binomial", data = data_acs_inc)
##
## Coefficients:
## Estimate
## (Intercept) 1.579e+00
## supply_tertlow supply 4.015e-02
## supply_tertmid supply -1.578e-01
## provider_type.Advanced practice providers-Masters level therapists and counselors -3.788e-01
## provider_type.All other therapists and counselors-Masters level therapists and counselors 4.641e-01
## provider_type.Psychologists/PhD-Masters level therapists and counselors -1.941e-01
## any_child1 1.394e-01
## regionNortheast -4.712e-01
## regionSouth -6.021e-01
## regionWest -7.629e-01
## median_income -1.032e-05
## group_pr1 1.719e-01
## fraction_white 4.839e-01
## fraction_no_health_ins -2.215e+00
## Std. Error
## (Intercept) 4.750e-02
## supply_tertlow supply 1.359e-02
## supply_tertmid supply 1.287e-02
## provider_type.Advanced practice providers-Masters level therapists and counselors 4.108e-02
## provider_type.All other therapists and counselors-Masters level therapists and counselors 1.171e-02
## provider_type.Psychologists/PhD-Masters level therapists and counselors 1.480e-02
## any_child1 1.083e-02
## regionNortheast 1.846e-02
## regionSouth 1.789e-02
## regionWest 1.680e-02
## median_income 3.159e-07
## group_pr1 1.075e-02
## fraction_white 3.534e-02
## fraction_no_health_ins 1.955e-01
## z value
## (Intercept) 33.240
## supply_tertlow supply 2.955
## supply_tertmid supply -12.268
## provider_type.Advanced practice providers-Masters level therapists and counselors -9.220
## provider_type.All other therapists and counselors-Masters level therapists and counselors 39.634
## provider_type.Psychologists/PhD-Masters level therapists and counselors -13.118
## any_child1 12.876
## regionNortheast -25.525
## regionSouth -33.649
## regionWest -45.409
## median_income -32.666
## group_pr1 15.998
## fraction_white 13.692
## fraction_no_health_ins -11.327
## Pr(>|z|)
## (Intercept) < 2e-16
## supply_tertlow supply 0.00313
## supply_tertmid supply < 2e-16
## provider_type.Advanced practice providers-Masters level therapists and counselors < 2e-16
## provider_type.All other therapists and counselors-Masters level therapists and counselors < 2e-16
## provider_type.Psychologists/PhD-Masters level therapists and counselors < 2e-16
## any_child1 < 2e-16
## regionNortheast < 2e-16
## regionSouth < 2e-16
## regionWest < 2e-16
## median_income < 2e-16
## group_pr1 < 2e-16
## fraction_white < 2e-16
## fraction_no_health_ins < 2e-16
##
## (Intercept) ***
## supply_tertlow supply **
## supply_tertmid supply ***
## provider_type.Advanced practice providers-Masters level therapists and counselors ***
## provider_type.All other therapists and counselors-Masters level therapists and counselors ***
## provider_type.Psychologists/PhD-Masters level therapists and counselors ***
## any_child1 ***
## regionNortheast ***
## regionSouth ***
## regionWest ***
## median_income ***
## group_pr1 ***
## fraction_white ***
## fraction_no_health_ins ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 217820 on 175069 degrees of freedom
## AIC: 217848
##
## Number of Fisher Scoring iterations: 4
## GVIF Df GVIF^(1/(2*Df))
## supply_tert 1.168423 2 1.039681
## provider_type 1.050051 3 1.008173
## any_child 1.021663 1 1.010774
## region 1.700132 3 1.092481
## median_income 1.517536 1 1.231883
## group_pr 1.035179 1 1.017437
## fraction_white 1.129257 1 1.062665
## fraction_no_health_ins 2.018609 1 1.420778
| Df | Deviance | Resid. Df | Resid. Dev | Pr(>Chi) | |
|---|---|---|---|---|---|
| NULL | NA | NA | 175082 | 226890.3 | NA |
| supply_tert | 2 | 185.1744 | 175080 | 226705.1 | 0 |
| provider_type | 3 | 2712.3846 | 175077 | 223992.7 | 0 |
| any_child | 1 | 285.8982 | 175076 | 223706.8 | 0 |
| region | 3 | 4169.6224 | 175073 | 219537.2 | 0 |
| median_income | 1 | 1053.9256 | 175072 | 218483.3 | 0 |
| group_pr | 1 | 257.4005 | 175071 | 218225.9 | 0 |
| fraction_white | 1 | 278.0680 | 175070 | 217947.8 | 0 |
| fraction_no_health_ins | 1 | 128.0225 | 175069 | 217819.8 | 0 |
options(width = 60)
model2 <- glm(insurance_flag ~ provider_type + any_child + region
+ median_income + group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model2)
anova(model2, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175071 | 218093.1 | NA | NA | NA |
| 175069 | 217819.8 | 2 | 273.3505 | 0 |
model3 <- glm(insurance_flag ~ supply_tert + any_child + region
+ median_income + group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model3)
anova(model3, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175072 | 220377.5 | NA | NA | NA |
| 175069 | 217819.8 | 3 | 2557.744 | 0 |
model4 <- glm(insurance_flag ~ supply_tert + provider_type + region
+ median_income + group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model4)
anova(model4, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175070 | 217985.1 | NA | NA | NA |
| 175069 | 217819.8 | 1 | 165.3135 | 0 |
model5 <- glm(insurance_flag ~ supply_tert + provider_type + any_child
+ median_income + group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model5)
anova(model5, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175072 | 219987.4 | NA | NA | NA |
| 175069 | 217819.8 | 3 | 2167.596 | 0 |
model6 <- glm(insurance_flag ~ supply_tert + provider_type + any_child + region
+ group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model6)
anova(model6, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175070 | 218892.0 | NA | NA | NA |
| 175069 | 217819.8 | 1 | 1072.195 | 0 |
model7 <- glm(insurance_flag ~ supply_tert + provider_type + any_child + region
+ median_income + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model7)
anova(model7, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175070 | 218076.6 | NA | NA | NA |
| 175069 | 217819.8 | 1 | 256.842 | 0 |
model8 <- glm(insurance_flag ~ supply_tert + provider_type + any_child + region
+ median_income + group_pr + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model8)
anova(model8, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175070 | 218007.1 | NA | NA | NA |
| 175069 | 217819.8 | 1 | 187.3096 | 0 |
model9 <- glm(insurance_flag ~ supply_tert + provider_type + any_child + region
+ median_income + group_pr + fraction_white,
data = data_acs_inc, family = "binomial")
#summary(model9)
anova(model9, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175070 | 217947.8 | NA | NA | NA |
| 175069 | 217819.8 | 1 | 128.0225 | 0 |
## Waiting for profiling to be done...
## OR
## (Intercept) 4.8504912
## supply_tertlow supply 1.0409650
## supply_tertmid supply 0.8539787
## provider_type.Advanced practice providers-Masters level therapists and counselors 0.6847082
## provider_type.All other therapists and counselors-Masters level therapists and counselors 1.5905917
## provider_type.Psychologists/PhD-Masters level therapists and counselors 0.8235810
## any_child1 1.1495943
## regionNortheast 0.6242585
## regionSouth 0.5476330
## regionWest 0.4663065
## median_income 0.9999897
## group_pr1 1.1875774
## fraction_white 1.6223401
## fraction_no_health_ins 0.1091874
## 2.5 %
## (Intercept) 4.41942179
## supply_tertlow supply 1.01361152
## supply_tertmid supply 0.83271046
## provider_type.Advanced practice providers-Masters level therapists and counselors 0.63176617
## provider_type.All other therapists and counselors-Masters level therapists and counselors 1.55451686
## provider_type.Psychologists/PhD-Masters level therapists and counselors 0.80004626
## any_child1 1.12545105
## regionNortheast 0.60206547
## regionSouth 0.52874464
## regionWest 0.45118757
## median_income 0.99998906
## group_pr1 1.16283179
## fraction_white 1.51376793
## fraction_no_health_ins 0.07443379
## 97.5 %
## (Intercept) 5.3240009
## supply_tertlow supply 1.0690577
## supply_tertmid supply 0.8757852
## provider_type.Advanced practice providers-Masters level therapists and counselors 0.7421629
## provider_type.All other therapists and counselors-Masters level therapists and counselors 1.6275344
## provider_type.Psychologists/PhD-Masters level therapists and counselors 0.8478208
## any_child1 1.1742458
## regionNortheast 0.6472477
## regionSouth 0.5671664
## regionWest 0.4819026
## median_income 0.9999903
## group_pr1 1.2128622
## fraction_white 1.7386952
## fraction_no_health_ins 0.1601839
data_acs_inc$supply_tert <- contr_code_treatment(data_acs_inc$supply_tert,
base = "low supply")
data_acs_inc$provider_type <- contr_code_treatment(data_acs_inc$provider_type,
base = "Psychologists/PhD")
data_acs_inc$region <- contr_code_treatment(data_acs_inc$region, base = "West")
model11 <- glm(insurance_flag ~ supply_tert + provider_type + any_child + region
+ median_income + group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
summary(model11)##
## Call:
## glm(formula = insurance_flag ~ supply_tert + provider_type +
## any_child + region + median_income + group_pr + fraction_white +
## fraction_no_health_ins, family = "binomial", data = data_acs_inc)
##
## Coefficients:
## Estimate
## (Intercept) 6.622e-01
## supply_tert.high supply-low supply -4.015e-02
## supply_tert.mid supply-low supply -1.980e-01
## provider_type.Advanced practice providers-Psychologists/PhD -1.847e-01
## provider_type.All other therapists and counselors-Psychologists/PhD 6.582e-01
## provider_type.Masters level therapists and counselors-Psychologists/PhD 1.941e-01
## any_child1 1.394e-01
## region.Midwest-West 7.629e-01
## region.Northeast-West 2.917e-01
## region.South-West 1.608e-01
## median_income -1.032e-05
## group_pr1 1.719e-01
## fraction_white 4.839e-01
## fraction_no_health_ins -2.215e+00
## Std. Error
## (Intercept) 4.941e-02
## supply_tert.high supply-low supply 1.359e-02
## supply_tert.mid supply-low supply 1.286e-02
## provider_type.Advanced practice providers-Psychologists/PhD 4.237e-02
## provider_type.All other therapists and counselors-Psychologists/PhD 1.552e-02
## provider_type.Masters level therapists and counselors-Psychologists/PhD 1.480e-02
## any_child1 1.083e-02
## region.Midwest-West 1.680e-02
## region.Northeast-West 1.572e-02
## region.South-West 1.390e-02
## median_income 3.159e-07
## group_pr1 1.075e-02
## fraction_white 3.534e-02
## fraction_no_health_ins 1.955e-01
## z value
## (Intercept) 13.402
## supply_tert.high supply-low supply -2.955
## supply_tert.mid supply-low supply -15.396
## provider_type.Advanced practice providers-Psychologists/PhD -4.358
## provider_type.All other therapists and counselors-Psychologists/PhD 42.398
## provider_type.Masters level therapists and counselors-Psychologists/PhD 13.118
## any_child1 12.876
## region.Midwest-West 45.409
## region.Northeast-West 18.554
## region.South-West 11.564
## median_income -32.666
## group_pr1 15.998
## fraction_white 13.692
## fraction_no_health_ins -11.327
## Pr(>|z|)
## (Intercept) < 2e-16
## supply_tert.high supply-low supply 0.00313
## supply_tert.mid supply-low supply < 2e-16
## provider_type.Advanced practice providers-Psychologists/PhD 1.31e-05
## provider_type.All other therapists and counselors-Psychologists/PhD < 2e-16
## provider_type.Masters level therapists and counselors-Psychologists/PhD < 2e-16
## any_child1 < 2e-16
## region.Midwest-West < 2e-16
## region.Northeast-West < 2e-16
## region.South-West < 2e-16
## median_income < 2e-16
## group_pr1 < 2e-16
## fraction_white < 2e-16
## fraction_no_health_ins < 2e-16
##
## (Intercept) ***
## supply_tert.high supply-low supply **
## supply_tert.mid supply-low supply ***
## provider_type.Advanced practice providers-Psychologists/PhD ***
## provider_type.All other therapists and counselors-Psychologists/PhD ***
## provider_type.Masters level therapists and counselors-Psychologists/PhD ***
## any_child1 ***
## region.Midwest-West ***
## region.Northeast-West ***
## region.South-West ***
## median_income ***
## group_pr1 ***
## fraction_white ***
## fraction_no_health_ins ***
## ---
## Signif. codes:
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 217820 on 175069 degrees of freedom
## AIC: 217848
##
## Number of Fisher Scoring iterations: 4
## Waiting for profiling to be done...
## OR
## (Intercept) 1.9390976
## supply_tert.high supply-low supply 0.9606471
## supply_tert.mid supply-low supply 0.8203722
## provider_type.Advanced practice providers-Psychologists/PhD 0.8313793
## provider_type.All other therapists and counselors-Psychologists/PhD 1.9313117
## provider_type.Masters level therapists and counselors-Psychologists/PhD 1.2142096
## any_child1 1.1495943
## region.Midwest-West 2.1445121
## region.Northeast-West 1.3387298
## region.South-West 1.1744055
## median_income 0.9999897
## group_pr1 1.1875774
## fraction_white 1.6223401
## fraction_no_health_ins 0.1091874
## 2.5 %
## (Intercept) 1.76014418
## supply_tert.high supply-low supply 0.93540323
## supply_tert.mid supply-low supply 0.79994891
## provider_type.Advanced practice providers-Psychologists/PhD 0.76515578
## provider_type.All other therapists and counselors-Psychologists/PhD 1.87342995
## provider_type.Masters level therapists and counselors-Psychologists/PhD 1.17949448
## any_child1 1.12545105
## region.Midwest-West 2.07510798
## region.Northeast-West 1.29811958
## region.South-West 1.14284048
## median_income 0.99998906
## group_pr1 1.16283179
## fraction_white 1.51376793
## fraction_no_health_ins 0.07443379
## 97.5 %
## (Intercept) 2.1363443
## supply_tert.high supply-low supply 0.9865713
## supply_tert.mid supply-low supply 0.8413106
## provider_type.Advanced practice providers-Psychologists/PhD 0.9034182
## provider_type.All other therapists and counselors-Psychologists/PhD 1.9909768
## provider_type.Masters level therapists and counselors-Psychologists/PhD 1.2499277
## any_child1 1.1742458
## region.Midwest-West 2.2163731
## region.Northeast-West 1.3806422
## region.South-West 1.2068450
## median_income 0.9999903
## group_pr1 1.2128622
## fraction_white 1.7386952
## fraction_no_health_ins 0.1601839
# Split data into train and test
set.seed(421)
split <- initial_split(data_acs_inc, prop = 0.8, strata = insurance_flag)
train <- split %>%
training()
test <- split %>%
testing()mod_train <- glm(insurance_flag ~ supply_tert + provider_type +
any_child + region + median_income + group_pr +
fraction_white + fraction_no_health_ins,
data = train, family = "binomial")
glm_probs = data.frame(probs = predict(mod_train,
newdata = test,
type="response"))## Warning: contrasts dropped from factor supply_tert
## Warning: contrasts dropped from factor provider_type
## Warning: contrasts dropped from factor region
glm_pred = glm_probs %>%
mutate(pred = ifelse(probs>.5, "accept", "does not accept"))
glm_pred = cbind(test, glm_pred)
glm_pred %>%
count(pred, insurance_flag) %>%
spread(insurance_flag, n, fill = 0)| pred | 0 | 1 |
|---|---|---|
| accept | 10731 | 21410 |
| does not accept | 1554 | 1323 |
## [1] 0.655777
pr <- prediction(glm_probs,test$insurance_flag)
perf <- performance(pr,measure = "tpr",x.measure = "fpr")
plot(perf) ## Warning in auc(test$insurance_flag, glm_probs): longer
## object length is not a multiple of shorter object length
## [1] 0.6350857
set.seed(421)
split <- initial_split(psych_forest, prop = 0.7, strata = insurance_flag)
train <- split %>%
training()
test <- split %>%
testing()model <- randomForest(y=train[,1], x=train[,2:9], data = train, ntree = 500, mtry = 5, importance = TRUE , proximity = FALSE)
model##
## Call:
## randomForest(x = train[, 2:9], y = train[, 1], ntree = 500, mtry = 5, importance = TRUE, proximity = FALSE, data = train)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 5
##
## OOB estimate of error rate: 32.26%
## Confusion matrix:
## 0 1 class.error
## 0 16829 26166 0.6085824
## 1 13372 66190 0.1680702
## 0 1 class.error
## 0 16829 26166 0.6085824
## 1 13372 66190 0.1680702
set.seed(421)
# The next step is to validate our model using the test data
prediction <- predict(model, newdata = test)
table(prediction, test$insurance_flag) #(28324+7656)/(28324+7656+10771+5775)##
## prediction 0 1
## 0 7409 5569
## 1 11018 28530
results<-cbind(prediction,test$insurance_flag)
colnames(results)<-c('pred','real')
results<-as.data.frame(results)
# Finally, let’s calculate the accuracy of the model
sum(prediction==test$insurance_flag) / nrow(test)## [1] 0.6842135
#another way to view this
imp=as.data.frame(importance(model)) #Analyze the importance of each variable in the model
imp=cbind(vars=rownames(imp),imp)
print(imp)## vars 0
## supply_tert supply_tert 29.55256
## provider_type provider_type 193.57484
## any_child any_child 50.29224
## region region 59.88007
## median_income median_income 82.57672
## group_pr group_pr 71.35168
## fraction_white fraction_white 61.36744
## fraction_no_health_ins fraction_no_health_ins 53.19779
## 1 MeanDecreaseAccuracy
## supply_tert 63.89101 105.3157
## provider_type 171.59124 317.2663
## any_child 120.23778 139.9174
## region 88.36147 160.9842
## median_income 84.91863 227.3938
## group_pr 153.82289 189.3485
## fraction_white 87.74077 155.8341
## fraction_no_health_ins 106.55105 190.7691
## MeanDecreaseGini
## supply_tert 572.5611
## provider_type 1747.6035
## any_child 924.7983
## region 1322.0404
## median_income 2726.5782
## group_pr 853.3012
## fraction_white 2690.2180
## fraction_no_health_ins 2439.5981
par(mfrow = c(2, 2))
partialPlot(model, test, median_income, "1", xlab="Median Income",
ylab="Log of fraction of votes", lwd=4, col="green")
partialPlot(model, test, fraction_white, "1", xlab="Fraction White",
ylab="Log of fraction of votes", lwd=4, col="green")
partialPlot(model, test, fraction_no_health_ins, "1",
xlab="Fraction no health insurance",
ylab="Log of fraction of votes", lwd=4, col="green")
partialPlot(x=model, pred.data=test, x.var=provider_type, which.class="1")par(mfrow = c(2, 2))
partialPlot(x=model, pred.data=test, x.var=supply_tert, which.class="1")
partialPlot(x=model, pred.data=test, x.var=any_child, which.class="1")
partialPlot(x=model, pred.data=test, x.var=region, which.class="1")
partialPlot(x=model, pred.data=test, x.var=group_pr, which.class="1")Importance is for predicting that a mental health provider will accept insurance (insurance_flag==1)
R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows Server 2019 x64 (build 17763)
Matrix products: default
attached base packages:
[1] stats graphics grDevices utils datasets
[6] methods base
other attached packages:
[1] summarytools_1.0.1 Metrics_0.1.4
[3] car_3.1-2 carData_3.0-5
[5] ROCR_1.0-11 randomForest_4.7-1.1
[7] aod_1.3.3 faux_1.2.1
[9] ggpubr_0.6.0 unhcrthemes_0.6.2
[11] gtsummary_1.7.2 tigris_2.1
[13] DescTools_0.99.52 lubridate_1.9.3
[15] forcats_1.0.0 stringr_1.5.1
[17] readr_2.1.5 tidyverse_2.0.0
[19] magrittr_2.0.3 yardstick_1.3.1
[21] workflowsets_1.1.0 workflows_1.1.4
[23] tune_1.2.0 tidyr_1.3.1
[25] tibble_3.2.1 rsample_1.2.1
[27] recipes_1.0.10 purrr_1.0.2
[29] parsnip_1.2.1 modeldata_1.3.0
[31] infer_1.0.6 ggplot2_3.5.0
[33] dplyr_1.1.4 dials_1.2.1
[35] scales_1.3.0 broom_1.0.5
[37] tidymodels_1.2.0 data.table_1.14.10
[39] chse_1.3.2
loaded via a namespace (and not attached):
[1] rstudioapi_0.16.0 jsonlite_1.8.8
[3] magick_2.8.2 rmarkdown_2.26
[5] vctrs_0.6.5 base64enc_0.1-3
[7] rstatix_0.7.2 htmltools_0.5.7
[9] cellranger_1.1.0 sass_0.4.8
[11] parallelly_1.36.0 KernSmooth_2.23-22
[13] bslib_0.6.1 plyr_1.8.9
[15] rootSolve_1.8.2.4 cachem_1.0.8
[17] gt_0.10.0 uuid_1.2-0
[19] lifecycle_1.0.4 iterators_1.0.14
[21] pkgconfig_2.0.3 Matrix_1.6-4
[23] R6_2.5.1 fastmap_1.1.1
[25] future_1.33.1 digest_0.6.34
[27] Exact_3.2 colorspace_2.1-0
[29] furrr_0.3.1 fansi_1.0.6
[31] timechange_0.2.0 httr_1.4.7
[33] abind_1.4-5 compiler_4.3.1
[35] proxy_0.4-27 pander_0.6.5
[37] withr_3.0.0 backports_1.4.1
[39] DBI_1.2.2 highr_0.10
[41] Rttf2pt1_1.3.12 ggsignif_0.6.4
[43] MASS_7.3-60 lava_1.7.3
[45] rappdirs_0.3.3 classInt_0.4-10
[47] gld_2.6.6 tools_4.3.1
[49] units_0.8-5 extrafontdb_1.0
[51] future.apply_1.11.1 nnet_7.3-19
[53] glue_1.7.0 grid_4.3.1
[55] sf_1.0-15 checkmate_2.3.1
[57] reshape2_1.4.4 generics_0.1.3
[59] gtable_0.3.4 tzdb_0.4.0
[61] class_7.3-22 lmom_3.0
[63] hms_1.1.3 xml2_1.3.6
[65] utf8_1.2.4 ggrepel_0.9.4
[67] foreach_1.5.2 pillar_1.9.0
[69] splines_4.3.1 lhs_1.1.6
[71] pryr_0.1.6 lattice_0.22-5
[73] survival_3.5-7 tidyselect_1.2.0
[75] knitr_1.45 xfun_0.41
[77] expm_0.999-8 hardhat_1.3.1
[79] rapportools_1.1 matrixStats_1.2.0
[81] timeDate_4032.109 stringi_1.8.3
[83] DiceDesign_1.10 yaml_2.3.8
[85] boot_1.3-28.1 evaluate_0.23
[87] codetools_0.2-19 tcltk_4.3.1
[89] extrafont_0.19 cli_3.6.2
[91] rpart_4.1.23 systemfonts_1.0.5
[93] munsell_0.5.0 jquerylib_0.1.4
[95] Rcpp_1.0.12 readxl_1.4.3
[97] globals_0.16.2 parallel_4.3.1
[99] gower_1.0.1 GPfit_1.0-8
[101] listenv_0.9.0 broom.helpers_1.14.0
[103] mvtnorm_1.2-4 ipred_0.9-14
[105] prodlim_2023.08.28 e1071_1.7-14
[107] rlang_1.1.3