This code is designed to explore the factors associated with insurance acceptance among mental health providers (data from psychology today).
data_acs <- read.csv("K:/WorkingData2/User_Folders/Aine/Project Work/Psychology Today/Original files/working data acs 4-5-24.csv")
data_acs <- data_acs[which(!is.na(data_acs$id)), ] #just in case it reuploads weirdly from csv
#Desc(data_acs$id)
#length of df should be 184'957
# unique IDs should be 167'854metros <- read.csv("K:/WorkingData2/User_Folders/Aine/Project Work/Psychology Today/Original files/NCHSURCodes2013.csv")
data_acs <- left_join(data_acs, metros, by = "fips")
data_acs <- data_acs %>%
mutate(metro_code = case_when(metro_codes==1 | metro_codes==2 ~ "large",
metro_codes==3 | metro_codes==4 ~ "small/mid",
metro_codes==5 | metro_codes==6 ~ "non")
)Exhibit 4. Regression of provider- and county/zip level covariates on insurance acceptance 1. County or zip code composition (population, % white, HRSA supply of MH professionals, income) 2. Provider-level covariates – years in practice, psychologist vs. therapist, telehealth, gender, # of locations (can use addresses to identify multi-provider groups e.g. count number of providers at same location), whether providers themselves practice in >1 location
data_acs_inc <- data_acs[which(data_acs$excluded==0),]
data_acs_inc <- data_acs_inc %>%
group_by(GEOID) %>%
mutate(provider_1k = (n() / county_population)*1000) %>%
ungroup()
#making tertiles
data_acs_inc <- data_acs_inc %>%
mutate(supply_tert = ntile(provider_1k, 3))
data_acs_inc <- data_acs_inc %>%
mutate(supply_tert = case_when(supply_tert==1 ~ "low",
supply_tert==2 ~ "mid",
supply_tert==3 ~ "high")
)
data_acs_inc <- data_acs_inc %>%
mutate(provider_type = case_when(provider_type=="Masters level therapists and counselors" ~ "masters",
provider_type=="All other therapists and counselors" ~ "counselors",
provider_type=="Psychologists/PhD" ~ "psych",
provider_type=="Advanced practice providers" ~ "advanced")
)
data_acs_inc$supply_tert <- as.factor(data_acs_inc$supply_tert)
data_acs_inc$provs <- as.factor(data_acs_inc$provider_type)
data_acs_inc$insurance_flag <- as.factor(data_acs_inc$insurance_flag)
data_acs_inc$any_child <- as.factor(data_acs_inc$any_child)
data_acs_inc$region <- as.factor(data_acs_inc$region)
data_acs_inc$group_pr <- as.factor(data_acs_inc$group_pr)
data_acs_inc$telehealth <- as.factor(data_acs_inc$telehealth)
data_acs_inc$metro_code <- as.factor(data_acs_inc$metro_code)
#i need to adjust the names in the provider_type variable so it doesn't mess with
# our outputpsych_forest <- as.data.frame(select(data_acs_inc, c("insurance_flag", "supply_tert", "provs","any_child","region","median_income","group_pr","fraction_white", "fraction_no_health_ins", "metro_code")))
Desc(psych_forest)## ------------------------------------------------------------------------------
## Describe psych_forest (data.frame):
##
## data frame: 175083 obs. of 10 variables
## 175083 complete cases (100.0%)
##
## Nr ColName Class NAs Levels
## 1 insurance_flag factor . (2): 1-0, 2-1
## 2 supply_tert factor . (3): 1-high, 2-low, 3-mid
## 3 provs factor . (4): 1-advanced, 2-counselors,
## 3-masters, 4-psych
## 4 any_child factor . (2): 1-0, 2-1
## 5 region factor . (4): 1-Midwest, 2-Northeast,
## 3-South, 4-West
## 6 median_income integer .
## 7 group_pr factor . (2): 1-0, 2-1
## 8 fraction_white numeric .
## 9 fraction_no_health_ins numeric .
## 10 metro_code factor . (3): 1-large, 2-non,
## 3-small/mid
##
##
## ------------------------------------------------------------------------------
## 1 - insurance_flag (factor - dichotomous)
##
## length n NAs unique
## 175'083 175'083 0 2
## 100.0% 0.0%
##
## freq perc lci.95 uci.95'
## 0 61'422 35.1% 34.9% 35.3%
## 1 113'661 64.9% 64.7% 65.1%
##
## ' 95%-CI (Wilson)
## ------------------------------------------------------------------------------
## 2 - supply_tert (factor)
##
## length n NAs unique levels dupes
## 175'083 175'083 0 3 3 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 high 58'361 33.3% 58'361 33.3%
## 2 low 58'361 33.3% 116'722 66.7%
## 3 mid 58'361 33.3% 175'083 100.0%
## ------------------------------------------------------------------------------
## 3 - provs (factor)
##
## length n NAs unique levels dupes
## 175'083 175'083 0 4 4 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 masters 80'573 46.0% 80'573 46.0%
## 2 counselors 65'478 37.4% 146'051 83.4%
## 3 psych 26'489 15.1% 172'540 98.5%
## 4 advanced 2'543 1.5% 175'083 100.0%
## ------------------------------------------------------------------------------
## 4 - any_child (factor - dichotomous)
##
## length n NAs unique
## 175'083 175'083 0 2
## 100.0% 0.0%
##
## freq perc lci.95 uci.95'
## 0 61'038 34.9% 34.6% 35.1%
## 1 114'045 65.1% 64.9% 65.4%
##
## ' 95%-CI (Wilson)
## ------------------------------------------------------------------------------
## 5 - region (factor)
##
## length n NAs unique levels dupes
## 175'083 175'083 0 4 4 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 South 58'361 33.3% 58'361 33.3%
## 2 West 48'847 27.9% 107'208 61.2%
## 3 Midwest 35'057 20.0% 142'265 81.3%
## 4 Northeast 32'818 18.7% 175'083 100.0%
## ------------------------------------------------------------------------------
## 6 - median_income (integer)
##
## length n NAs unique 0s mean'
## 175'083 175'083 0 1'930 0 79'975.52
## 100.0% 0.0% 0.0%
##
## .05 .10 .25 median .75 .90
## 54'096.00 57'625.00 65'554.50 76'367.00 92'600.00 109'497.00
##
## range sd vcoef mad IQR skew
## 132'472.00 19'788.68 0.25 18'023.97 27'045.50 0.80
##
## meanCI
## 79'882.83
## 80'068.21
##
## .95
## 117'345.00
##
## kurt
## 0.49
##
## lowest : 24'349, 24'958, 25'000, 26'395, 29'206 (2)
## highest: 133'974 (1'130), 136'837 (400), 140'258 (819), 155'071 (152), 156'821 (308)
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 7 - group_pr (factor - dichotomous)
##
## length n NAs unique
## 175'083 175'083 0 2
## 100.0% 0.0%
##
## freq perc lci.95 uci.95'
## 0 104'650 59.8% 59.5% 60.0%
## 1 70'433 40.2% 40.0% 40.5%
##
## ' 95%-CI (Wilson)
## ------------------------------------------------------------------------------
## 8 - fraction_white (numeric)
##
## length n NAs unique 0s mean'
## 175'083 175'083 0 1'968 0 0.65388363
## 100.0% 0.0% 0.0%
##
## .05 .10 .25 median .75 .90
## 0.40455446 0.43734916 0.53077050 0.66204236 0.76608561 0.86258328
##
## range sd vcoef mad IQR skew
## 0.88685797 0.15491622 0.23691711 0.18615061 0.23531511 -0.22528595
##
## meanCI
## 0.65315799
## 0.65460928
##
## .95
## 0.89229776
##
## kurt
## -0.44534041
##
## lowest : 0.09554931, 0.11500288 (2), 0.1408701 (40), 0.14954472 (456), 0.15072783
## highest: 0.97435702, 0.97494897, 0.97499129, 0.98115465, 0.98240728
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 9 - fraction_no_health_ins (numeric)
##
## length n NAs unique 0s mean'
## 175'083 175'083 0 1'969 0 0.07892316
## 100.0% 0.0% 0.0%
##
## .05 .10 .25 median .75 .90
## 0.03374088 0.03922616 0.05158083 0.07274425 0.09945425 0.12392894
##
## range sd vcoef mad IQR skew
## 0.43848897 0.03681770 0.46650060 0.03424311 0.04787342 1.14426349
##
## meanCI
## 0.07875070
## 0.07909562
##
## .95
## 0.14244232
##
## kurt
## 2.15543525
##
## lowest : 0.01056877 (152), 0.01166718, 0.01610489 (33), 0.01964938 (977), 0.02076926 (6)
## highest: 0.29298357, 0.30837921 (37), 0.36968859, 0.41878254, 0.44905774
##
## ' 95%-CI (classic)
## ------------------------------------------------------------------------------
## 10 - metro_code (factor)
##
## length n NAs unique levels dupes
## 175'083 175'083 0 3 3 y
## 100.0% 0.0%
##
## level freq perc cumfreq cumperc
## 1 large 122'282 69.8% 122'282 69.8%
## 2 small/mid 45'791 26.2% 168'073 96.0%
## 3 non 7'010 4.0% 175'083 100.0%
#this is currently comparing against advanced practice providers
#advanced practice providers are the smallest group, which may not lead to a great
#reference category if cells are small. I will change this to be masters level
#providers for now since that is the largest group
data_acs_inc$provs <- contr_code_treatment(data_acs_inc$provs,
base = "masters")
uni_prov <- glm(insurance_flag ~ provs, data = data_acs_inc,
family = "binomial")
summary(uni_prov) ##
## Call:
## glm(formula = insurance_flag ~ provs, family = "binomial", data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.50325 0.00727 69.222 <2e-16 ***
## provs.advanced-masters -0.38909 0.04038 -9.635 <2e-16 ***
## provs.counselors-masters 0.44135 0.01134 38.917 <2e-16 ***
## provs.psych-masters -0.24335 0.01437 -16.938 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 224196 on 175079 degrees of freedom
## AIC: 224204
##
## Number of Fisher Scoring iterations: 4
#some other primary independent var models
uni_child <- glm(insurance_flag ~ any_child, data = data_acs_inc,
family = "binomial")
summary(uni_child)##
## Call:
## glm(formula = insurance_flag ~ any_child, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.509375 0.008359 60.94 <2e-16 ***
## any_child1 0.164224 0.010444 15.72 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 226644 on 175081 degrees of freedom
## AIC: 226648
##
## Number of Fisher Scoring iterations: 4
uni_region <- glm(insurance_flag ~ region, data = data_acs_inc,
family = "binomial")
summary(uni_region)##
## Call:
## glm(formula = insurance_flag ~ region, family = "binomial", data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.26177 0.01288 97.97 <2e-16 ***
## regionNortheast -0.59669 0.01737 -34.35 <2e-16 ***
## regionSouth -0.73377 0.01547 -47.44 <2e-16 ***
## regionWest -0.97037 0.01580 -61.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 222726 on 175079 degrees of freedom
## AIC: 222734
##
## Number of Fisher Scoring iterations: 4
uni_income <- glm(insurance_flag ~ median_income, data = data_acs_inc,
family = "binomial")
summary(uni_income)##
## Call:
## glm(formula = insurance_flag ~ median_income, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.483e+00 2.100e-02 70.61 <2e-16 ***
## median_income -1.077e-05 2.514e-07 -42.82 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 225053 on 175081 degrees of freedom
## AIC: 225057
##
## Number of Fisher Scoring iterations: 4
uni_group <- glm(insurance_flag ~ group_pr, data = data_acs_inc,
family = "binomial")
summary(uni_group)##
## Call:
## glm(formula = insurance_flag ~ group_pr, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.524293 0.006396 81.97 <2e-16 ***
## group_pr1 0.231474 0.010306 22.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 226382 on 175081 degrees of freedom
## AIC: 226386
##
## Number of Fisher Scoring iterations: 4
uni_prov1k <- glm(insurance_flag ~ supply_tert, data = data_acs_inc,
family = "binomial")
summary(uni_prov1k)##
## Call:
## glm(formula = insurance_flag ~ supply_tert, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.623965 0.008685 71.844 < 2e-16 ***
## supply_tertlow 0.071394 0.012353 5.780 7.49e-09 ***
## supply_tertmid -0.094866 0.012202 -7.775 7.55e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 226705 on 175080 degrees of freedom
## AIC: 226711
##
## Number of Fisher Scoring iterations: 4
uni_frac_wh <- glm(insurance_flag ~ fraction_white, data = data_acs_inc,
family = "binomial")
summary(uni_frac_wh)##
## Call:
## glm(formula = insurance_flag ~ fraction_white, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.09857 0.02155 -4.573 4.8e-06 ***
## fraction_white 1.09844 0.03243 33.870 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 225737 on 175081 degrees of freedom
## AIC: 225741
##
## Number of Fisher Scoring iterations: 4
uni_no_hi <- glm(insurance_flag ~ fraction_no_health_ins, data = data_acs_inc,
family = "binomial")
summary(uni_no_hi)##
## Call:
## glm(formula = insurance_flag ~ fraction_no_health_ins, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.72061 0.01184 60.850 <2e-16 ***
## fraction_no_health_ins -1.32794 0.13508 -9.831 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 226794 on 175081 degrees of freedom
## AIC: 226798
##
## Number of Fisher Scoring iterations: 4
uni_metro<- glm(insurance_flag ~ metro_code, data = data_acs_inc,
family = "binomial")
summary(uni_metro)##
## Call:
## glm(formula = insurance_flag ~ metro_code, family = "binomial",
## data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.51497 0.00591 87.14 <2e-16 ***
## metro_codenon 0.49541 0.02764 17.92 <2e-16 ***
## metro_codesmall/mid 0.32436 0.01177 27.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 225880 on 175080 degrees of freedom
## AIC: 225886
##
## Number of Fisher Scoring iterations: 4
model1 <- glm(insurance_flag ~ supply_tert + provs + any_child
+ region + median_income + group_pr + fraction_white
+ fraction_no_health_ins, data = data_acs_inc, family = "binomial")
summary(model1)##
## Call:
## glm(formula = insurance_flag ~ supply_tert + provs + any_child +
## region + median_income + group_pr + fraction_white + fraction_no_health_ins,
## family = "binomial", data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.579e+00 4.750e-02 33.240 < 2e-16 ***
## supply_tertlow 4.015e-02 1.359e-02 2.955 0.00313 **
## supply_tertmid -1.578e-01 1.287e-02 -12.268 < 2e-16 ***
## provs.advanced-masters -3.788e-01 4.108e-02 -9.220 < 2e-16 ***
## provs.counselors-masters 4.641e-01 1.171e-02 39.634 < 2e-16 ***
## provs.psych-masters -1.941e-01 1.480e-02 -13.118 < 2e-16 ***
## any_child1 1.394e-01 1.083e-02 12.876 < 2e-16 ***
## regionNortheast -4.712e-01 1.846e-02 -25.525 < 2e-16 ***
## regionSouth -6.021e-01 1.789e-02 -33.649 < 2e-16 ***
## regionWest -7.629e-01 1.680e-02 -45.409 < 2e-16 ***
## median_income -1.032e-05 3.159e-07 -32.666 < 2e-16 ***
## group_pr1 1.719e-01 1.075e-02 15.998 < 2e-16 ***
## fraction_white 4.839e-01 3.534e-02 13.692 < 2e-16 ***
## fraction_no_health_ins -2.215e+00 1.955e-01 -11.327 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 217820 on 175069 degrees of freedom
## AIC: 217848
##
## Number of Fisher Scoring iterations: 4
## GVIF Df GVIF^(1/(2*Df))
## supply_tert 1.168423 2 1.039681
## provs 1.050051 3 1.008173
## any_child 1.021663 1 1.010774
## region 1.700132 3 1.092481
## median_income 1.517536 1 1.231883
## group_pr 1.035179 1 1.017437
## fraction_white 1.129257 1 1.062665
## fraction_no_health_ins 2.018609 1 1.420778
| Df | Deviance | Resid. Df | Resid. Dev | Pr(>Chi) | |
|---|---|---|---|---|---|
| NULL | NA | NA | 175082 | 226890.3 | NA |
| supply_tert | 2 | 185.1744 | 175080 | 226705.1 | 0 |
| provs | 3 | 2712.3846 | 175077 | 223992.7 | 0 |
| any_child | 1 | 285.8982 | 175076 | 223706.8 | 0 |
| region | 3 | 4169.6224 | 175073 | 219537.2 | 0 |
| median_income | 1 | 1053.9256 | 175072 | 218483.3 | 0 |
| group_pr | 1 | 257.4005 | 175071 | 218225.9 | 0 |
| fraction_white | 1 | 278.0680 | 175070 | 217947.8 | 0 |
| fraction_no_health_ins | 1 | 128.0225 | 175069 | 217819.8 | 0 |
options(width = 60)
model2 <- glm(insurance_flag ~ provs + any_child + region
+ median_income + group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model2)
anova(model2, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175071 | 218093.1 | NA | NA | NA |
| 175069 | 217819.8 | 2 | 273.3505 | 0 |
model3 <- glm(insurance_flag ~ supply_tert + any_child + region
+ median_income + group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model3)
anova(model3, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175072 | 220377.5 | NA | NA | NA |
| 175069 | 217819.8 | 3 | 2557.744 | 0 |
model4 <- glm(insurance_flag ~ supply_tert + provs + region
+ median_income + group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model4)
anova(model4, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175070 | 217985.1 | NA | NA | NA |
| 175069 | 217819.8 | 1 | 165.3135 | 0 |
model5 <- glm(insurance_flag ~ supply_tert + provs + any_child
+ median_income + group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model5)
anova(model5, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175072 | 219987.4 | NA | NA | NA |
| 175069 | 217819.8 | 3 | 2167.596 | 0 |
model6 <- glm(insurance_flag ~ supply_tert + provs + any_child + region
+ group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model6)
anova(model6, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175070 | 218892.0 | NA | NA | NA |
| 175069 | 217819.8 | 1 | 1072.195 | 0 |
model7 <- glm(insurance_flag ~ supply_tert + provs + any_child + region
+ median_income + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model7)
anova(model7, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175070 | 218076.6 | NA | NA | NA |
| 175069 | 217819.8 | 1 | 256.842 | 0 |
model8 <- glm(insurance_flag ~ supply_tert + provs + any_child + region
+ median_income + group_pr + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
#summary(model8)
anova(model8, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175070 | 218007.1 | NA | NA | NA |
| 175069 | 217819.8 | 1 | 187.3096 | 0 |
model9 <- glm(insurance_flag ~ supply_tert + provs + any_child + region
+ median_income + group_pr + fraction_white,
data = data_acs_inc, family = "binomial")
#summary(model9)
anova(model9, model1, test='LR')| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 175070 | 217947.8 | NA | NA | NA |
| 175069 | 217819.8 | 1 | 128.0225 | 0 |
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 4.85 4.42 5.32
## supply_tertlow 1.04 1.01 1.07
## supply_tertmid 0.85 0.83 0.88
## provs.advanced-masters 0.68 0.63 0.74
## provs.counselors-masters 1.59 1.55 1.63
## provs.psych-masters 0.82 0.80 0.85
## any_child1 1.15 1.13 1.17
## regionNortheast 0.62 0.60 0.65
## regionSouth 0.55 0.53 0.57
## regionWest 0.47 0.45 0.48
## median_income 1.00 1.00 1.00
## group_pr1 1.19 1.16 1.21
## fraction_white 1.62 1.51 1.74
## fraction_no_health_ins 0.11 0.07 0.16
data_acs_inc$supply_tert <- contr_code_treatment(data_acs_inc$supply_tert,
base = "low")
data_acs_inc$provs <- contr_code_treatment(data_acs_inc$provs,
base = "psych")
data_acs_inc$region <- contr_code_treatment(data_acs_inc$region, base = "West")
model11 <- glm(insurance_flag ~ supply_tert + provs + any_child + region
+ median_income + group_pr + fraction_white + fraction_no_health_ins,
data = data_acs_inc, family = "binomial")
summary(model11)##
## Call:
## glm(formula = insurance_flag ~ supply_tert + provs + any_child +
## region + median_income + group_pr + fraction_white + fraction_no_health_ins,
## family = "binomial", data = data_acs_inc)
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 6.622e-01 4.941e-02 13.402
## supply_tert.high-low -4.015e-02 1.359e-02 -2.955
## supply_tert.mid-low -1.980e-01 1.286e-02 -15.396
## provs.advanced-psych -1.847e-01 4.237e-02 -4.358
## provs.counselors-psych 6.582e-01 1.552e-02 42.398
## provs.masters-psych 1.941e-01 1.480e-02 13.118
## any_child1 1.394e-01 1.083e-02 12.876
## region.Midwest-West 7.629e-01 1.680e-02 45.409
## region.Northeast-West 2.917e-01 1.572e-02 18.554
## region.South-West 1.608e-01 1.390e-02 11.564
## median_income -1.032e-05 3.159e-07 -32.666
## group_pr1 1.719e-01 1.075e-02 15.998
## fraction_white 4.839e-01 3.534e-02 13.692
## fraction_no_health_ins -2.215e+00 1.955e-01 -11.327
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## supply_tert.high-low 0.00313 **
## supply_tert.mid-low < 2e-16 ***
## provs.advanced-psych 1.31e-05 ***
## provs.counselors-psych < 2e-16 ***
## provs.masters-psych < 2e-16 ***
## any_child1 < 2e-16 ***
## region.Midwest-West < 2e-16 ***
## region.Northeast-West < 2e-16 ***
## region.South-West < 2e-16 ***
## median_income < 2e-16 ***
## group_pr1 < 2e-16 ***
## fraction_white < 2e-16 ***
## fraction_no_health_ins < 2e-16 ***
## ---
## Signif. codes:
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 226890 on 175082 degrees of freedom
## Residual deviance: 217820 on 175069 degrees of freedom
## AIC: 217848
##
## Number of Fisher Scoring iterations: 4
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 1.94 1.76 2.14
## supply_tert.high-low 0.96 0.94 0.99
## supply_tert.mid-low 0.82 0.80 0.84
## provs.advanced-psych 0.83 0.77 0.90
## provs.counselors-psych 1.93 1.87 1.99
## provs.masters-psych 1.21 1.18 1.25
## any_child1 1.15 1.13 1.17
## region.Midwest-West 2.14 2.08 2.22
## region.Northeast-West 1.34 1.30 1.38
## region.South-West 1.17 1.14 1.21
## median_income 1.00 1.00 1.00
## group_pr1 1.19 1.16 1.21
## fraction_white 1.62 1.51 1.74
## fraction_no_health_ins 0.11 0.07 0.16
# Split data into train and test
set.seed(421)
split <- initial_split(data_acs_inc, prop = 0.8, strata = insurance_flag)
train <- split %>%
training()
test <- split %>%
testing()mod_train <- glm(insurance_flag ~ supply_tert + provs +
any_child + region + median_income + group_pr +
fraction_white + fraction_no_health_ins,
data = train, family = "binomial")
glm_probs = data.frame(probs = predict(mod_train,
newdata = test,
type="response"))## Warning: contrasts dropped from factor supply_tert
## Warning: contrasts dropped from factor provs
## Warning: contrasts dropped from factor region
glm_pred = glm_probs %>%
mutate(pred = ifelse(probs>.5, "accept", "does not accept"))
glm_pred = cbind(test, glm_pred)
glm_pred %>%
count(pred, insurance_flag) %>%
spread(insurance_flag, n, fill = 0)| pred | 0 | 1 |
|---|---|---|
| accept | 10731 | 21410 |
| does not accept | 1554 | 1323 |
## [1] 0.660289
pr <- prediction(glm_probs,test$insurance_flag)
perf <- performance(pr,measure = "tpr",x.measure = "fpr")
plot(perf) ## Warning in auc(test$insurance_flag, glm_probs): longer
## object length is not a multiple of shorter object length
## [1] 0.6350857
set.seed(421)
split <- initial_split(psych_forest, prop = 0.7, strata = insurance_flag)
train <- split %>%
training()
test <- split %>%
testing()model <- randomForest(y=train[,1], x=train[,2:9], data = train, ntree = 500,
mtry = 5, importance = TRUE , proximity = FALSE)
model##
## Call:
## randomForest(x = train[, 2:9], y = train[, 1], ntree = 500, mtry = 5, importance = TRUE, proximity = FALSE, data = train)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 5
##
## OOB estimate of error rate: 32.26%
## Confusion matrix:
## 0 1 class.error
## 0 16829 26166 0.6085824
## 1 13372 66190 0.1680702
set.seed(421)
# The next step is to validate our model using the test data
prediction <- predict(model, newdata = test)
table(prediction, test$insurance_flag) ##
## prediction 0 1
## 0 7409 5569
## 1 11018 28530
results<-cbind(prediction,test$insurance_flag)
colnames(results)<-c('pred','real')
results<-as.data.frame(results)
# Finally, let’s calculate the accuracy of the model
sum(prediction==test$insurance_flag) / nrow(test)## [1] 0.6842135
#another way to view this
imp=as.data.frame(importance(model)) #Analyze the importance of each variable in the model
imp=cbind(vars=rownames(imp),imp)
print(imp)## vars 0
## supply_tert supply_tert 29.55256
## provs provs 193.57484
## any_child any_child 50.29224
## region region 59.88007
## median_income median_income 82.57672
## group_pr group_pr 71.35168
## fraction_white fraction_white 61.36744
## fraction_no_health_ins fraction_no_health_ins 53.19779
## 1 MeanDecreaseAccuracy
## supply_tert 63.89101 105.3157
## provs 171.59124 317.2663
## any_child 120.23778 139.9174
## region 88.36147 160.9842
## median_income 84.91863 227.3938
## group_pr 153.82289 189.3485
## fraction_white 87.74077 155.8341
## fraction_no_health_ins 106.55105 190.7691
## MeanDecreaseGini
## supply_tert 572.5611
## provs 1747.6035
## any_child 924.7983
## region 1322.0404
## median_income 2726.5782
## group_pr 853.3012
## fraction_white 2690.2180
## fraction_no_health_ins 2439.5981
par(mfrow = c(2, 2))
partialPlot(model, test, median_income, "1", xlab="Median Income",
ylab="Log of fraction of votes", lwd=4, col="green")
partialPlot(model, test, fraction_white, "1", xlab="Fraction White",
ylab="Log of fraction of votes", lwd=4, col="green")
partialPlot(model, test, fraction_no_health_ins, "1",
xlab="Fraction no health insurance",
ylab="Log of fraction of votes", lwd=4, col="green")
partialPlot(x=model, pred.data=test, x.var=provs, which.class="1")par(mfrow = c(2, 2))
partialPlot(x=model, pred.data=test, x.var=supply_tert, which.class="1")
partialPlot(x=model, pred.data=test, x.var=any_child, which.class="1")
partialPlot(x=model, pred.data=test, x.var=region, which.class="1")
partialPlot(x=model, pred.data=test, x.var=group_pr, which.class="1")Importance is for predicting that a mental health provider will accept insurance (insurance_flag==1)
R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows Server 2019 x64 (build 17763)
Matrix products: default
attached base packages:
[1] stats graphics grDevices utils datasets
[6] methods base
other attached packages:
[1] summarytools_1.0.1 Metrics_0.1.4
[3] car_3.1-2 carData_3.0-5
[5] ROCR_1.0-11 randomForest_4.7-1.1
[7] aod_1.3.3 faux_1.2.1
[9] ggpubr_0.6.0 unhcrthemes_0.6.2
[11] gtsummary_1.7.2 tigris_2.1
[13] DescTools_0.99.52 lubridate_1.9.3
[15] forcats_1.0.0 stringr_1.5.1
[17] readr_2.1.5 tidyverse_2.0.0
[19] magrittr_2.0.3 yardstick_1.3.1
[21] workflowsets_1.1.0 workflows_1.1.4
[23] tune_1.2.0 tidyr_1.3.1
[25] tibble_3.2.1 rsample_1.2.1
[27] recipes_1.0.10 purrr_1.0.2
[29] parsnip_1.2.1 modeldata_1.3.0
[31] infer_1.0.6 ggplot2_3.5.0
[33] dplyr_1.1.4 dials_1.2.1
[35] scales_1.3.0 broom_1.0.5
[37] tidymodels_1.2.0 data.table_1.14.10
[39] chse_1.3.2
loaded via a namespace (and not attached):
[1] rstudioapi_0.16.0 jsonlite_1.8.8
[3] magick_2.8.2 rmarkdown_2.26
[5] vctrs_0.6.5 base64enc_0.1-3
[7] rstatix_0.7.2 htmltools_0.5.7
[9] cellranger_1.1.0 sass_0.4.8
[11] parallelly_1.36.0 KernSmooth_2.23-22
[13] bslib_0.6.1 plyr_1.8.9
[15] rootSolve_1.8.2.4 cachem_1.0.8
[17] gt_0.10.0 uuid_1.2-0
[19] lifecycle_1.0.4 iterators_1.0.14
[21] pkgconfig_2.0.3 Matrix_1.6-4
[23] R6_2.5.1 fastmap_1.1.1
[25] future_1.33.1 digest_0.6.34
[27] Exact_3.2 colorspace_2.1-0
[29] furrr_0.3.1 fansi_1.0.6
[31] timechange_0.2.0 httr_1.4.7
[33] abind_1.4-5 compiler_4.3.1
[35] proxy_0.4-27 pander_0.6.5
[37] withr_3.0.0 backports_1.4.1
[39] DBI_1.2.2 highr_0.10
[41] Rttf2pt1_1.3.12 ggsignif_0.6.4
[43] MASS_7.3-60 lava_1.7.3
[45] rappdirs_0.3.3 classInt_0.4-10
[47] gld_2.6.6 tools_4.3.1
[49] units_0.8-5 extrafontdb_1.0
[51] future.apply_1.11.1 nnet_7.3-19
[53] glue_1.7.0 grid_4.3.1
[55] sf_1.0-15 checkmate_2.3.1
[57] reshape2_1.4.4 generics_0.1.3
[59] gtable_0.3.4 tzdb_0.4.0
[61] class_7.3-22 lmom_3.0
[63] hms_1.1.3 xml2_1.3.6
[65] utf8_1.2.4 ggrepel_0.9.4
[67] foreach_1.5.2 pillar_1.9.0
[69] splines_4.3.1 lhs_1.1.6
[71] pryr_0.1.6 lattice_0.22-5
[73] survival_3.5-7 tidyselect_1.2.0
[75] knitr_1.45 xfun_0.41
[77] expm_0.999-8 hardhat_1.3.1
[79] rapportools_1.1 matrixStats_1.2.0
[81] timeDate_4032.109 stringi_1.8.3
[83] DiceDesign_1.10 yaml_2.3.8
[85] boot_1.3-28.1 evaluate_0.23
[87] codetools_0.2-19 tcltk_4.3.1
[89] extrafont_0.19 cli_3.6.2
[91] rpart_4.1.23 systemfonts_1.0.5
[93] munsell_0.5.0 jquerylib_0.1.4
[95] Rcpp_1.0.12 readxl_1.4.3
[97] globals_0.16.2 parallel_4.3.1
[99] gower_1.0.1 GPfit_1.0-8
[101] listenv_0.9.0 broom.helpers_1.14.0
[103] mvtnorm_1.2-4 ipred_0.9-14
[105] prodlim_2023.08.28 e1071_1.7-14
[107] rlang_1.1.3