Context

This code is designed to explore the factors associated with insurance acceptance among mental health providers (data from psychology today).

The following steps were taken: - Exploratory univariate analysis - Building the full logistic regression model - Comparing reduced models to full model - Running full model through a training and testing sample, then exploring the accuracy of the model - Running the model through a random forest to better understand the importance of the variables on insurance acceptance among mental health providers


data_acs <- read.csv("K:/WorkingData2/User_Folders/Aine/Project Work/Psychology Today/Original files/working data acs 4-5-24.csv")

data_acs <- data_acs[which(!is.na(data_acs$id)), ] #just in case it reuploads weirdly from csv
#Desc(data_acs$id)
#length of df should be 184'957
# unique IDs should be 167'854

Load Required Packages

Model building

Exhibit 4. Regression of provider- and county/zip level covariates on insurance acceptance 1. County or zip code composition (population, % white, HRSA supply of MH professionals, income) 2. Provider-level covariates – years in practice, psychologist vs. therapist, telehealth, gender, # of locations (can use addresses to identify multi-provider groups e.g. count number of providers at same location), whether providers themselves practice in >1 location


Preparing the data

data_acs_inc <- data_acs[which(data_acs$excluded==0),]

data_acs_inc <- data_acs_inc %>%
  group_by(GEOID) %>%
  mutate(provider_1k = (n() / county_population)*1000) %>%
  ungroup()

#making tertiles
data_acs_inc <- data_acs_inc %>%
  mutate(supply_tert = ntile(provider_1k, 3))

data_acs_inc <- data_acs_inc %>%
  mutate(supply_tert = case_when(supply_tert==1 ~ "low supply",
                            supply_tert==2 ~ "mid supply",
                            supply_tert==3 ~ "high supply")
         )

data_acs_inc$supply_tert <- as.factor(data_acs_inc$supply_tert)
data_acs_inc$provider_type <- as.factor(data_acs_inc$provider_type)
data_acs_inc$insurance_flag <- as.factor(data_acs_inc$insurance_flag)
data_acs_inc$any_child <- as.factor(data_acs_inc$any_child)
data_acs_inc$region <- as.factor(data_acs_inc$region)
data_acs_inc$group_pr <- as.factor(data_acs_inc$group_pr)
data_acs_inc$telehealth <- as.factor(data_acs_inc$telehealth)

Exploring the distrobutions

  • The psych_forest dataset has all of our variables of interest, it will be used for the random forest in later code but I will use it now as well to easily explore the distrobutions of the variables of interest
psych_forest <- as.data.frame(select(data_acs_inc, c("insurance_flag", "supply_tert", "provider_type","any_child","region","median_income","group_pr","fraction_white", "fraction_no_health_ins")))

Desc(psych_forest)
## ------------------------------------------------------------------------------ 
## Describe psych_forest (data.frame):
## 
## data frame:  175083 obs. of  9 variables
##      175083 complete cases (100.0%)
## 
##   Nr  ColName                 Class    NAs  Levels                         
##   1   insurance_flag          factor   .    (2): 1-0, 2-1                  
##   2   supply_tert             factor   .    (3): 1-high supply, 2-low      
##                                             supply, 3-mid supply           
##   3   provider_type           factor   .    (4): 1-Advanced practice       
##                                             providers, 2-All other         
##                                             therapists and counselors,     
##                                             3-Masters level therapists and 
##                                             counselors, 4-Psychologists/PhD
##   4   any_child               factor   .    (2): 1-0, 2-1                  
##   5   region                  factor   .    (4): 1-Midwest, 2-Northeast,   
##                                             3-South, 4-West                
##   6   median_income           integer  .                                   
##   7   group_pr                factor   .    (2): 1-0, 2-1                  
##   8   fraction_white          numeric  .                                   
##   9   fraction_no_health_ins  numeric  .                                   
## 
## 
## ------------------------------------------------------------------------------ 
## 1 - insurance_flag (factor - dichotomous)
## 
##    length       n     NAs  unique
##   175'083 175'083       0       2
##            100.0%    0.0%        
## 
##       freq   perc  lci.95  uci.95'
## 0   61'422  35.1%   34.9%   35.3%
## 1  113'661  64.9%   64.7%   65.1%
## 
## ' 95%-CI (Wilson)

## ------------------------------------------------------------------------------ 
## 2 - supply_tert (factor)
## 
##    length       n     NAs  unique  levels   dupes
##   175'083 175'083       0       3       3       y
##            100.0%    0.0%                        
## 
##          level    freq   perc  cumfreq  cumperc
## 1  high supply  58'361  33.3%   58'361    33.3%
## 2   low supply  58'361  33.3%  116'722    66.7%
## 3   mid supply  58'361  33.3%  175'083   100.0%

## ------------------------------------------------------------------------------ 
## 3 - provider_type (factor)
## 
##    length       n     NAs  unique  levels   dupes
##   175'083 175'083       0       4       4       y
##            100.0%    0.0%                        
## 
##                                      level    freq   perc  cumfreq  cumperc
## 1  Masters level therapists and counselors  80'573  46.0%   80'573    46.0%
## 2      All other therapists and counselors  65'478  37.4%  146'051    83.4%
## 3                        Psychologists/PhD  26'489  15.1%  172'540    98.5%
## 4              Advanced practice providers   2'543   1.5%  175'083   100.0%

## ------------------------------------------------------------------------------ 
## 4 - any_child (factor - dichotomous)
## 
##    length       n     NAs  unique
##   175'083 175'083       0       2
##            100.0%    0.0%        
## 
##       freq   perc  lci.95  uci.95'
## 0   61'038  34.9%   34.6%   35.1%
## 1  114'045  65.1%   64.9%   65.4%
## 
## ' 95%-CI (Wilson)

## ------------------------------------------------------------------------------ 
## 5 - region (factor)
## 
##    length       n     NAs  unique  levels   dupes
##   175'083 175'083       0       4       4       y
##            100.0%    0.0%                        
## 
##        level    freq   perc  cumfreq  cumperc
## 1      South  58'361  33.3%   58'361    33.3%
## 2       West  48'847  27.9%  107'208    61.2%
## 3    Midwest  35'057  20.0%  142'265    81.3%
## 4  Northeast  32'818  18.7%  175'083   100.0%

## ------------------------------------------------------------------------------ 
## 6 - median_income (integer)
## 
##       length          n        NAs     unique         0s        mean'
##      175'083    175'083          0      1'930          0   79'975.52
##                  100.0%       0.0%                  0.0%            
##                                                                     
##          .05        .10        .25     median        .75         .90
##    54'096.00  57'625.00  65'554.50  76'367.00  92'600.00  109'497.00
##                                                                     
##        range         sd      vcoef        mad        IQR        skew
##   132'472.00  19'788.68       0.25  18'023.97  27'045.50        0.80
##                                                                     
##       meanCI
##    79'882.83
##    80'068.21
##             
##          .95
##   117'345.00
##             
##         kurt
##         0.49
##             
## lowest : 24'349, 24'958, 25'000, 26'395, 29'206 (2)
## highest: 133'974 (1'130), 136'837 (400), 140'258 (819), 155'071 (152), 156'821 (308)
## 
## ' 95%-CI (classic)

## ------------------------------------------------------------------------------ 
## 7 - group_pr (factor - dichotomous)
## 
##    length       n     NAs  unique
##   175'083 175'083       0       2
##            100.0%    0.0%        
## 
##       freq   perc  lci.95  uci.95'
## 0  104'650  59.8%   59.5%   60.0%
## 1   70'433  40.2%   40.0%   40.5%
## 
## ' 95%-CI (Wilson)

## ------------------------------------------------------------------------------ 
## 8 - fraction_white (numeric)
## 
##       length           n         NAs      unique          0s         mean'
##      175'083     175'083           0       1'968           0   0.65388363
##                   100.0%        0.0%                    0.0%             
##                                                                          
##          .05         .10         .25      median         .75          .90
##   0.40455446  0.43734916  0.53077050  0.66204236  0.76608561   0.86258328
##                                                                          
##        range          sd       vcoef         mad         IQR         skew
##   0.88685797  0.15491622  0.23691711  0.18615061  0.23531511  -0.22528595
##                                                                          
##        meanCI
##    0.65315799
##    0.65460928
##              
##           .95
##    0.89229776
##              
##          kurt
##   -0.44534041
##              
## lowest : 0.09554931, 0.11500288 (2), 0.1408701 (40), 0.14954472 (456), 0.15072783
## highest: 0.97435702, 0.97494897, 0.97499129, 0.98115465, 0.98240728
## 
## ' 95%-CI (classic)

## ------------------------------------------------------------------------------ 
## 9 - fraction_no_health_ins (numeric)
## 
##       length           n         NAs      unique          0s        mean'
##      175'083     175'083           0       1'969           0  0.07892316
##                   100.0%        0.0%                    0.0%            
##                                                                         
##          .05         .10         .25      median         .75         .90
##   0.03374088  0.03922616  0.05158083  0.07274425  0.09945425  0.12392894
##                                                                         
##        range          sd       vcoef         mad         IQR        skew
##   0.43848897  0.03681770  0.46650060  0.03424311  0.04787342  1.14426349
##                                                                         
##       meanCI
##   0.07875070
##   0.07909562
##             
##          .95
##   0.14244232
##             
##         kurt
##   2.15543525
##             
## lowest : 0.01056877 (152), 0.01166718, 0.01610489 (33), 0.01964938 (977), 0.02076926 (6)
## highest: 0.29298357, 0.30837921 (37), 0.36968859, 0.41878254, 0.44905774
## 
## ' 95%-CI (classic)


Univariate models

uni_prov <- glm(insurance_flag ~ provider_type, data = data_acs_inc, 
                family = "binomial")
summary(uni_prov) #this is currently comparing against advanced practice providers
## 
## Call:
## glm(formula = insurance_flag ~ provider_type, family = "binomial", 
##     data = data_acs_inc)
## 
## Coefficients:
##                                                      Estimate Std. Error
## (Intercept)                                           0.11416    0.03973
## provider_typeAll other therapists and counselors      0.83044    0.04067
## provider_typeMasters level therapists and counselors  0.38909    0.04038
## provider_typePsychologists/PhD                        0.14574    0.04161
##                                                      z value Pr(>|z|)    
## (Intercept)                                            2.874 0.004055 ** 
## provider_typeAll other therapists and counselors      20.420  < 2e-16 ***
## provider_typeMasters level therapists and counselors   9.635  < 2e-16 ***
## provider_typePsychologists/PhD                         3.502 0.000461 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 226890  on 175082  degrees of freedom
## Residual deviance: 224196  on 175079  degrees of freedom
## AIC: 224204
## 
## Number of Fisher Scoring iterations: 4
#advanced practice providers are the smallest group, which may not lead to a great 
#reference category if cells are small. I will change this to be masters level
#providers for now since that is the largest group
data_acs_inc$provider_type <- contr_code_treatment(data_acs_inc$provider_type, 
                                                   base = "Masters level therapists and counselors")

#some other primary independent var models
uni_child <- glm(insurance_flag ~ any_child, data = data_acs_inc, 
                 family = "binomial")
summary(uni_child)
## 
## Call:
## glm(formula = insurance_flag ~ any_child, family = "binomial", 
##     data = data_acs_inc)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 0.509375   0.008359   60.94   <2e-16 ***
## any_child1  0.164224   0.010444   15.72   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 226890  on 175082  degrees of freedom
## Residual deviance: 226644  on 175081  degrees of freedom
## AIC: 226648
## 
## Number of Fisher Scoring iterations: 4
uni_region <- glm(insurance_flag ~ region, data = data_acs_inc, 
                  family = "binomial")
summary(uni_region)
## 
## Call:
## glm(formula = insurance_flag ~ region, family = "binomial", data = data_acs_inc)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      1.26177    0.01288   97.97   <2e-16 ***
## regionNortheast -0.59669    0.01737  -34.35   <2e-16 ***
## regionSouth     -0.73377    0.01547  -47.44   <2e-16 ***
## regionWest      -0.97037    0.01580  -61.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 226890  on 175082  degrees of freedom
## Residual deviance: 222726  on 175079  degrees of freedom
## AIC: 222734
## 
## Number of Fisher Scoring iterations: 4
uni_income <- glm(insurance_flag ~ median_income, data = data_acs_inc, 
                  family = "binomial")
summary(uni_income)
## 
## Call:
## glm(formula = insurance_flag ~ median_income, family = "binomial", 
##     data = data_acs_inc)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    1.483e+00  2.100e-02   70.61   <2e-16 ***
## median_income -1.077e-05  2.514e-07  -42.82   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 226890  on 175082  degrees of freedom
## Residual deviance: 225053  on 175081  degrees of freedom
## AIC: 225057
## 
## Number of Fisher Scoring iterations: 4
uni_group <- glm(insurance_flag ~ group_pr, data = data_acs_inc, 
                 family = "binomial")
summary(uni_group)
## 
## Call:
## glm(formula = insurance_flag ~ group_pr, family = "binomial", 
##     data = data_acs_inc)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 0.524293   0.006396   81.97   <2e-16 ***
## group_pr1   0.231474   0.010306   22.46   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 226890  on 175082  degrees of freedom
## Residual deviance: 226382  on 175081  degrees of freedom
## AIC: 226386
## 
## Number of Fisher Scoring iterations: 4
uni_prov1k <- glm(insurance_flag ~ supply_tert, data = data_acs_inc, 
                  family = "binomial")
summary(uni_prov1k)
## 
## Call:
## glm(formula = insurance_flag ~ supply_tert, family = "binomial", 
##     data = data_acs_inc)
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            0.623965   0.008685  71.844  < 2e-16 ***
## supply_tertlow supply  0.071394   0.012353   5.780 7.49e-09 ***
## supply_tertmid supply -0.094866   0.012202  -7.775 7.55e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 226890  on 175082  degrees of freedom
## Residual deviance: 226705  on 175080  degrees of freedom
## AIC: 226711
## 
## Number of Fisher Scoring iterations: 4
uni_frac_wh <- glm(insurance_flag ~ fraction_white, data = data_acs_inc, 
                   family = "binomial")
summary(uni_frac_wh)
## 
## Call:
## glm(formula = insurance_flag ~ fraction_white, family = "binomial", 
##     data = data_acs_inc)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.09857    0.02155  -4.573  4.8e-06 ***
## fraction_white  1.09844    0.03243  33.870  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 226890  on 175082  degrees of freedom
## Residual deviance: 225737  on 175081  degrees of freedom
## AIC: 225741
## 
## Number of Fisher Scoring iterations: 4
uni_no_hi <- glm(insurance_flag ~ fraction_no_health_ins, data = data_acs_inc, 
                 family = "binomial")
summary(uni_no_hi)
## 
## Call:
## glm(formula = insurance_flag ~ fraction_no_health_ins, family = "binomial", 
##     data = data_acs_inc)
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             0.72061    0.01184  60.850   <2e-16 ***
## fraction_no_health_ins -1.32794    0.13508  -9.831   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 226890  on 175082  degrees of freedom
## Residual deviance: 226794  on 175081  degrees of freedom
## AIC: 226798
## 
## Number of Fisher Scoring iterations: 4

Full model with all vars

model1 <- glm(insurance_flag ~ supply_tert + provider_type + any_child 
              + region + median_income + group_pr + fraction_white 
              + fraction_no_health_ins, data = data_acs_inc, family = "binomial")
summary(model1)
## 
## Call:
## glm(formula = insurance_flag ~ supply_tert + provider_type + 
##     any_child + region + median_income + group_pr + fraction_white + 
##     fraction_no_health_ins, family = "binomial", data = data_acs_inc)
## 
## Coefficients:
##                                                                                             Estimate
## (Intercept)                                                                                1.579e+00
## supply_tertlow supply                                                                      4.015e-02
## supply_tertmid supply                                                                     -1.578e-01
## provider_type.Advanced practice providers-Masters level therapists and counselors         -3.788e-01
## provider_type.All other therapists and counselors-Masters level therapists and counselors  4.641e-01
## provider_type.Psychologists/PhD-Masters level therapists and counselors                   -1.941e-01
## any_child1                                                                                 1.394e-01
## regionNortheast                                                                           -4.712e-01
## regionSouth                                                                               -6.021e-01
## regionWest                                                                                -7.629e-01
## median_income                                                                             -1.032e-05
## group_pr1                                                                                  1.719e-01
## fraction_white                                                                             4.839e-01
## fraction_no_health_ins                                                                    -2.215e+00
##                                                                                           Std. Error
## (Intercept)                                                                                4.750e-02
## supply_tertlow supply                                                                      1.359e-02
## supply_tertmid supply                                                                      1.287e-02
## provider_type.Advanced practice providers-Masters level therapists and counselors          4.108e-02
## provider_type.All other therapists and counselors-Masters level therapists and counselors  1.171e-02
## provider_type.Psychologists/PhD-Masters level therapists and counselors                    1.480e-02
## any_child1                                                                                 1.083e-02
## regionNortheast                                                                            1.846e-02
## regionSouth                                                                                1.789e-02
## regionWest                                                                                 1.680e-02
## median_income                                                                              3.159e-07
## group_pr1                                                                                  1.075e-02
## fraction_white                                                                             3.534e-02
## fraction_no_health_ins                                                                     1.955e-01
##                                                                                           z value
## (Intercept)                                                                                33.240
## supply_tertlow supply                                                                       2.955
## supply_tertmid supply                                                                     -12.268
## provider_type.Advanced practice providers-Masters level therapists and counselors          -9.220
## provider_type.All other therapists and counselors-Masters level therapists and counselors  39.634
## provider_type.Psychologists/PhD-Masters level therapists and counselors                   -13.118
## any_child1                                                                                 12.876
## regionNortheast                                                                           -25.525
## regionSouth                                                                               -33.649
## regionWest                                                                                -45.409
## median_income                                                                             -32.666
## group_pr1                                                                                  15.998
## fraction_white                                                                             13.692
## fraction_no_health_ins                                                                    -11.327
##                                                                                           Pr(>|z|)
## (Intercept)                                                                                < 2e-16
## supply_tertlow supply                                                                      0.00313
## supply_tertmid supply                                                                      < 2e-16
## provider_type.Advanced practice providers-Masters level therapists and counselors          < 2e-16
## provider_type.All other therapists and counselors-Masters level therapists and counselors  < 2e-16
## provider_type.Psychologists/PhD-Masters level therapists and counselors                    < 2e-16
## any_child1                                                                                 < 2e-16
## regionNortheast                                                                            < 2e-16
## regionSouth                                                                                < 2e-16
## regionWest                                                                                 < 2e-16
## median_income                                                                              < 2e-16
## group_pr1                                                                                  < 2e-16
## fraction_white                                                                             < 2e-16
## fraction_no_health_ins                                                                     < 2e-16
##                                                                                              
## (Intercept)                                                                               ***
## supply_tertlow supply                                                                     ** 
## supply_tertmid supply                                                                     ***
## provider_type.Advanced practice providers-Masters level therapists and counselors         ***
## provider_type.All other therapists and counselors-Masters level therapists and counselors ***
## provider_type.Psychologists/PhD-Masters level therapists and counselors                   ***
## any_child1                                                                                ***
## regionNortheast                                                                           ***
## regionSouth                                                                               ***
## regionWest                                                                                ***
## median_income                                                                             ***
## group_pr1                                                                                 ***
## fraction_white                                                                            ***
## fraction_no_health_ins                                                                    ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 226890  on 175082  degrees of freedom
## Residual deviance: 217820  on 175069  degrees of freedom
## AIC: 217848
## 
## Number of Fisher Scoring iterations: 4
vif(model1)
##                            GVIF Df GVIF^(1/(2*Df))
## supply_tert            1.168423  2        1.039681
## provider_type          1.050051  3        1.008173
## any_child              1.021663  1        1.010774
## region                 1.700132  3        1.092481
## median_income          1.517536  1        1.231883
## group_pr               1.035179  1        1.017437
## fraction_white         1.129257  1        1.062665
## fraction_no_health_ins 2.018609  1        1.420778
anova(model1, test = 'Chisq')
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL NA NA 175082 226890.3 NA
supply_tert 2 185.1744 175080 226705.1 0
provider_type 3 2712.3846 175077 223992.7 0
any_child 1 285.8982 175076 223706.8 0
region 3 4169.6224 175073 219537.2 0
median_income 1 1053.9256 175072 218483.3 0
group_pr 1 257.4005 175071 218225.9 0
fraction_white 1 278.0680 175070 217947.8 0
fraction_no_health_ins 1 128.0225 175069 217819.8 0

Step-wise deletion and comparison with full model

options(width = 60)
model2 <- glm(insurance_flag ~ provider_type + any_child + region 
              + median_income + group_pr + fraction_white + fraction_no_health_ins,
              data = data_acs_inc, family = "binomial")
#summary(model2)
anova(model2, model1, test='LR')
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
175071 218093.1 NA NA NA
175069 217819.8 2 273.3505 0
model3 <- glm(insurance_flag ~ supply_tert + any_child + region 
              + median_income + group_pr + fraction_white + fraction_no_health_ins, 
              data = data_acs_inc, family = "binomial")
#summary(model3)
anova(model3, model1, test='LR')
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
175072 220377.5 NA NA NA
175069 217819.8 3 2557.744 0
model4 <- glm(insurance_flag ~ supply_tert + provider_type + region 
              + median_income + group_pr + fraction_white + fraction_no_health_ins, 
              data = data_acs_inc, family = "binomial")
#summary(model4)
anova(model4, model1, test='LR')
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
175070 217985.1 NA NA NA
175069 217819.8 1 165.3135 0
model5 <- glm(insurance_flag ~ supply_tert + provider_type + any_child 
              + median_income + group_pr + fraction_white + fraction_no_health_ins, 
              data = data_acs_inc, family = "binomial")
#summary(model5)
anova(model5, model1, test='LR')
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
175072 219987.4 NA NA NA
175069 217819.8 3 2167.596 0
model6 <- glm(insurance_flag ~ supply_tert + provider_type + any_child + region 
              + group_pr + fraction_white + fraction_no_health_ins, 
              data = data_acs_inc, family = "binomial")
#summary(model6)
anova(model6, model1, test='LR')
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
175070 218892.0 NA NA NA
175069 217819.8 1 1072.195 0
model7 <- glm(insurance_flag ~ supply_tert + provider_type + any_child + region 
              + median_income + fraction_white + fraction_no_health_ins, 
              data = data_acs_inc, family = "binomial")
#summary(model7)
anova(model7, model1, test='LR')
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
175070 218076.6 NA NA NA
175069 217819.8 1 256.842 0
model8 <- glm(insurance_flag ~ supply_tert + provider_type + any_child + region 
              + median_income + group_pr + fraction_no_health_ins, 
              data = data_acs_inc, family = "binomial")
#summary(model8)
anova(model8, model1, test='LR')
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
175070 218007.1 NA NA NA
175069 217819.8 1 187.3096 0
model9 <- glm(insurance_flag ~ supply_tert + provider_type + any_child + region
              + median_income + group_pr + fraction_white, 
              data = data_acs_inc, family = "binomial")
#summary(model9)
anova(model9, model1, test='LR')
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
175070 217947.8 NA NA NA
175069 217819.8 1 128.0225 0

Full model odds ratios

exp(cbind(OR = coef(model1), confint(model1)))
## Waiting for profiling to be done...
##                                                                                                  OR
## (Intercept)                                                                               4.8504912
## supply_tertlow supply                                                                     1.0409650
## supply_tertmid supply                                                                     0.8539787
## provider_type.Advanced practice providers-Masters level therapists and counselors         0.6847082
## provider_type.All other therapists and counselors-Masters level therapists and counselors 1.5905917
## provider_type.Psychologists/PhD-Masters level therapists and counselors                   0.8235810
## any_child1                                                                                1.1495943
## regionNortheast                                                                           0.6242585
## regionSouth                                                                               0.5476330
## regionWest                                                                                0.4663065
## median_income                                                                             0.9999897
## group_pr1                                                                                 1.1875774
## fraction_white                                                                            1.6223401
## fraction_no_health_ins                                                                    0.1091874
##                                                                                                2.5 %
## (Intercept)                                                                               4.41942179
## supply_tertlow supply                                                                     1.01361152
## supply_tertmid supply                                                                     0.83271046
## provider_type.Advanced practice providers-Masters level therapists and counselors         0.63176617
## provider_type.All other therapists and counselors-Masters level therapists and counselors 1.55451686
## provider_type.Psychologists/PhD-Masters level therapists and counselors                   0.80004626
## any_child1                                                                                1.12545105
## regionNortheast                                                                           0.60206547
## regionSouth                                                                               0.52874464
## regionWest                                                                                0.45118757
## median_income                                                                             0.99998906
## group_pr1                                                                                 1.16283179
## fraction_white                                                                            1.51376793
## fraction_no_health_ins                                                                    0.07443379
##                                                                                              97.5 %
## (Intercept)                                                                               5.3240009
## supply_tertlow supply                                                                     1.0690577
## supply_tertmid supply                                                                     0.8757852
## provider_type.Advanced practice providers-Masters level therapists and counselors         0.7421629
## provider_type.All other therapists and counselors-Masters level therapists and counselors 1.6275344
## provider_type.Psychologists/PhD-Masters level therapists and counselors                   0.8478208
## any_child1                                                                                1.1742458
## regionNortheast                                                                           0.6472477
## regionSouth                                                                               0.5671664
## regionWest                                                                                0.4819026
## median_income                                                                             0.9999903
## group_pr1                                                                                 1.2128622
## fraction_white                                                                            1.7386952
## fraction_no_health_ins                                                                    0.1601839

Changing reference categories

data_acs_inc$supply_tert <- contr_code_treatment(data_acs_inc$supply_tert, 
                                                 base = "low supply")
data_acs_inc$provider_type <- contr_code_treatment(data_acs_inc$provider_type, 
                                                   base = "Psychologists/PhD")
data_acs_inc$region <- contr_code_treatment(data_acs_inc$region, base = "West")

model11 <- glm(insurance_flag ~ supply_tert + provider_type + any_child + region 
               + median_income + group_pr + fraction_white + fraction_no_health_ins, 
              data = data_acs_inc, family = "binomial")
summary(model11)
## 
## Call:
## glm(formula = insurance_flag ~ supply_tert + provider_type + 
##     any_child + region + median_income + group_pr + fraction_white + 
##     fraction_no_health_ins, family = "binomial", data = data_acs_inc)
## 
## Coefficients:
##                                                                           Estimate
## (Intercept)                                                              6.622e-01
## supply_tert.high supply-low supply                                      -4.015e-02
## supply_tert.mid supply-low supply                                       -1.980e-01
## provider_type.Advanced practice providers-Psychologists/PhD             -1.847e-01
## provider_type.All other therapists and counselors-Psychologists/PhD      6.582e-01
## provider_type.Masters level therapists and counselors-Psychologists/PhD  1.941e-01
## any_child1                                                               1.394e-01
## region.Midwest-West                                                      7.629e-01
## region.Northeast-West                                                    2.917e-01
## region.South-West                                                        1.608e-01
## median_income                                                           -1.032e-05
## group_pr1                                                                1.719e-01
## fraction_white                                                           4.839e-01
## fraction_no_health_ins                                                  -2.215e+00
##                                                                         Std. Error
## (Intercept)                                                              4.941e-02
## supply_tert.high supply-low supply                                       1.359e-02
## supply_tert.mid supply-low supply                                        1.286e-02
## provider_type.Advanced practice providers-Psychologists/PhD              4.237e-02
## provider_type.All other therapists and counselors-Psychologists/PhD      1.552e-02
## provider_type.Masters level therapists and counselors-Psychologists/PhD  1.480e-02
## any_child1                                                               1.083e-02
## region.Midwest-West                                                      1.680e-02
## region.Northeast-West                                                    1.572e-02
## region.South-West                                                        1.390e-02
## median_income                                                            3.159e-07
## group_pr1                                                                1.075e-02
## fraction_white                                                           3.534e-02
## fraction_no_health_ins                                                   1.955e-01
##                                                                         z value
## (Intercept)                                                              13.402
## supply_tert.high supply-low supply                                       -2.955
## supply_tert.mid supply-low supply                                       -15.396
## provider_type.Advanced practice providers-Psychologists/PhD              -4.358
## provider_type.All other therapists and counselors-Psychologists/PhD      42.398
## provider_type.Masters level therapists and counselors-Psychologists/PhD  13.118
## any_child1                                                               12.876
## region.Midwest-West                                                      45.409
## region.Northeast-West                                                    18.554
## region.South-West                                                        11.564
## median_income                                                           -32.666
## group_pr1                                                                15.998
## fraction_white                                                           13.692
## fraction_no_health_ins                                                  -11.327
##                                                                         Pr(>|z|)
## (Intercept)                                                              < 2e-16
## supply_tert.high supply-low supply                                       0.00313
## supply_tert.mid supply-low supply                                        < 2e-16
## provider_type.Advanced practice providers-Psychologists/PhD             1.31e-05
## provider_type.All other therapists and counselors-Psychologists/PhD      < 2e-16
## provider_type.Masters level therapists and counselors-Psychologists/PhD  < 2e-16
## any_child1                                                               < 2e-16
## region.Midwest-West                                                      < 2e-16
## region.Northeast-West                                                    < 2e-16
## region.South-West                                                        < 2e-16
## median_income                                                            < 2e-16
## group_pr1                                                                < 2e-16
## fraction_white                                                           < 2e-16
## fraction_no_health_ins                                                   < 2e-16
##                                                                            
## (Intercept)                                                             ***
## supply_tert.high supply-low supply                                      ** 
## supply_tert.mid supply-low supply                                       ***
## provider_type.Advanced practice providers-Psychologists/PhD             ***
## provider_type.All other therapists and counselors-Psychologists/PhD     ***
## provider_type.Masters level therapists and counselors-Psychologists/PhD ***
## any_child1                                                              ***
## region.Midwest-West                                                     ***
## region.Northeast-West                                                   ***
## region.South-West                                                       ***
## median_income                                                           ***
## group_pr1                                                               ***
## fraction_white                                                          ***
## fraction_no_health_ins                                                  ***
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 226890  on 175082  degrees of freedom
## Residual deviance: 217820  on 175069  degrees of freedom
## AIC: 217848
## 
## Number of Fisher Scoring iterations: 4

OR for changed ref cats

exp(cbind(OR = coef(model11), confint(model11)))
## Waiting for profiling to be done...
##                                                                                OR
## (Intercept)                                                             1.9390976
## supply_tert.high supply-low supply                                      0.9606471
## supply_tert.mid supply-low supply                                       0.8203722
## provider_type.Advanced practice providers-Psychologists/PhD             0.8313793
## provider_type.All other therapists and counselors-Psychologists/PhD     1.9313117
## provider_type.Masters level therapists and counselors-Psychologists/PhD 1.2142096
## any_child1                                                              1.1495943
## region.Midwest-West                                                     2.1445121
## region.Northeast-West                                                   1.3387298
## region.South-West                                                       1.1744055
## median_income                                                           0.9999897
## group_pr1                                                               1.1875774
## fraction_white                                                          1.6223401
## fraction_no_health_ins                                                  0.1091874
##                                                                              2.5 %
## (Intercept)                                                             1.76014418
## supply_tert.high supply-low supply                                      0.93540323
## supply_tert.mid supply-low supply                                       0.79994891
## provider_type.Advanced practice providers-Psychologists/PhD             0.76515578
## provider_type.All other therapists and counselors-Psychologists/PhD     1.87342995
## provider_type.Masters level therapists and counselors-Psychologists/PhD 1.17949448
## any_child1                                                              1.12545105
## region.Midwest-West                                                     2.07510798
## region.Northeast-West                                                   1.29811958
## region.South-West                                                       1.14284048
## median_income                                                           0.99998906
## group_pr1                                                               1.16283179
## fraction_white                                                          1.51376793
## fraction_no_health_ins                                                  0.07443379
##                                                                            97.5 %
## (Intercept)                                                             2.1363443
## supply_tert.high supply-low supply                                      0.9865713
## supply_tert.mid supply-low supply                                       0.8413106
## provider_type.Advanced practice providers-Psychologists/PhD             0.9034182
## provider_type.All other therapists and counselors-Psychologists/PhD     1.9909768
## provider_type.Masters level therapists and counselors-Psychologists/PhD 1.2499277
## any_child1                                                              1.1742458
## region.Midwest-West                                                     2.2163731
## region.Northeast-West                                                   1.3806422
## region.South-West                                                       1.2068450
## median_income                                                           0.9999903
## group_pr1                                                               1.2128622
## fraction_white                                                          1.7386952
## fraction_no_health_ins                                                  0.1601839

Testing the accuracy of this model by training then testing

# Split data into train and test
set.seed(421)

split <- initial_split(data_acs_inc, prop = 0.8, strata = insurance_flag)
train <- split %>% 
         training()
test <- split %>% 
        testing()
mod_train <- glm(insurance_flag ~ supply_tert + provider_type + 
                   any_child + region + median_income + group_pr  + 
                   fraction_white + fraction_no_health_ins, 
                 data = train, family = "binomial")

glm_probs = data.frame(probs = predict(mod_train, 
                                       newdata = test, 
                                       type="response"))
## Warning: contrasts dropped from factor supply_tert
## Warning: contrasts dropped from factor provider_type
## Warning: contrasts dropped from factor region
glm_pred = glm_probs %>%
  mutate(pred = ifelse(probs>.5, "accept", "does not accept"))

glm_pred = cbind(test, glm_pred)

glm_pred %>% 
  count(pred, insurance_flag) %>%
  spread(insurance_flag, n, fill = 0)
pred 0 1
accept 10731 21410
does not accept 1554 1323
(21410+1554)/(21410+1554+10731+1323) 
## [1] 0.655777
pr <- prediction(glm_probs,test$insurance_flag)
perf <- performance(pr,measure = "tpr",x.measure = "fpr") 
plot(perf)  

auc(test$insurance_flag,glm_probs) 
## Warning in auc(test$insurance_flag, glm_probs): longer
## object length is not a multiple of shorter object length
## [1] 0.6350857

Random forest for variable importance

  • This is where we are using the psych_forest dataset
set.seed(421)

split <- initial_split(psych_forest, prop = 0.7, strata = insurance_flag)
train <- split %>% 
         training()
test <- split %>% 
        testing()

Training

model <- randomForest(y=train[,1], x=train[,2:9], data = train, ntree = 500, mtry = 5, importance = TRUE , proximity = FALSE)

model
## 
## Call:
##  randomForest(x = train[, 2:9], y = train[, 1], ntree = 500, mtry = 5,      importance = TRUE, proximity = FALSE, data = train) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 5
## 
##         OOB estimate of  error rate: 32.26%
## Confusion matrix:
##       0     1 class.error
## 0 16829 26166   0.6085824
## 1 13372 66190   0.1680702
model$confusion #this is not great
##       0     1 class.error
## 0 16829 26166   0.6085824
## 1 13372 66190   0.1680702

The next step is to validate our model using the test data

set.seed(421)
# The next step is to validate our model using the test data

prediction <- predict(model, newdata = test)

table(prediction, test$insurance_flag) #(28324+7656)/(28324+7656+10771+5775)
##           
## prediction     0     1
##          0  7409  5569
##          1 11018 28530
results<-cbind(prediction,test$insurance_flag)

colnames(results)<-c('pred','real')

results<-as.data.frame(results)

# Finally, let’s calculate the accuracy of the model

sum(prediction==test$insurance_flag) / nrow(test)
## [1] 0.6842135

Variable importance

  • The first column shows the predictor variables used in the RF model, the following columns shows how much each of those variables contribute to predicting absences (0), presences (1) or both (MeanDecreaseAccuracy).
  • The last column shows the node impurity of the model.
varImpPlot(model, sort=T, n.var= 8, 
           main= "Health insurance acceptance: yes or no", pch=16)

#another way to view this
imp=as.data.frame(importance(model)) #Analyze the importance of each variable in the model
imp=cbind(vars=rownames(imp),imp)
print(imp)
##                                          vars         0
## supply_tert                       supply_tert  29.55256
## provider_type                   provider_type 193.57484
## any_child                           any_child  50.29224
## region                                 region  59.88007
## median_income                   median_income  82.57672
## group_pr                             group_pr  71.35168
## fraction_white                 fraction_white  61.36744
## fraction_no_health_ins fraction_no_health_ins  53.19779
##                                1 MeanDecreaseAccuracy
## supply_tert             63.89101             105.3157
## provider_type          171.59124             317.2663
## any_child              120.23778             139.9174
## region                  88.36147             160.9842
## median_income           84.91863             227.3938
## group_pr               153.82289             189.3485
## fraction_white          87.74077             155.8341
## fraction_no_health_ins 106.55105             190.7691
##                        MeanDecreaseGini
## supply_tert                    572.5611
## provider_type                 1747.6035
## any_child                      924.7983
## region                        1322.0404
## median_income                 2726.5782
## group_pr                       853.3012
## fraction_white                2690.2180
## fraction_no_health_ins        2439.5981
barplot(imp$MeanDecreaseAccuracy, names.arg=imp$vars)

Examples of variable relationship with outcome and importance

  • Partial dependence plots
par(mfrow = c(2, 2))

partialPlot(model, test, median_income, "1", xlab="Median Income", 
            ylab="Log of fraction of votes", lwd=4, col="green")

partialPlot(model, test, fraction_white, "1", xlab="Fraction White", 
            ylab="Log of fraction of votes", lwd=4, col="green")

partialPlot(model, test, fraction_no_health_ins, "1", 
            xlab="Fraction no health insurance", 
            ylab="Log of fraction of votes", lwd=4, col="green")

partialPlot(x=model, pred.data=test, x.var=provider_type, which.class="1")

par(mfrow = c(2, 2))
partialPlot(x=model, pred.data=test, x.var=supply_tert, which.class="1")
partialPlot(x=model, pred.data=test, x.var=any_child, which.class="1")
partialPlot(x=model, pred.data=test, x.var=region, which.class="1")
partialPlot(x=model, pred.data=test, x.var=group_pr, which.class="1")


Conclusion

Importance is for predicting that a mental health provider will accept insurance (insurance_flag==1)

  • Provider type appears to have the hgihest importance on insurance acceptance followed by median county income.
  • Tertiles of provider supply (county level) has the lowest importance

Session Info


R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows Server 2019 x64 (build 17763)

Matrix products: default


attached base packages:
[1] stats     graphics  grDevices utils     datasets 
[6] methods   base     

other attached packages:
 [1] summarytools_1.0.1   Metrics_0.1.4       
 [3] car_3.1-2            carData_3.0-5       
 [5] ROCR_1.0-11          randomForest_4.7-1.1
 [7] aod_1.3.3            faux_1.2.1          
 [9] ggpubr_0.6.0         unhcrthemes_0.6.2   
[11] gtsummary_1.7.2      tigris_2.1          
[13] DescTools_0.99.52    lubridate_1.9.3     
[15] forcats_1.0.0        stringr_1.5.1       
[17] readr_2.1.5          tidyverse_2.0.0     
[19] magrittr_2.0.3       yardstick_1.3.1     
[21] workflowsets_1.1.0   workflows_1.1.4     
[23] tune_1.2.0           tidyr_1.3.1         
[25] tibble_3.2.1         rsample_1.2.1       
[27] recipes_1.0.10       purrr_1.0.2         
[29] parsnip_1.2.1        modeldata_1.3.0     
[31] infer_1.0.6          ggplot2_3.5.0       
[33] dplyr_1.1.4          dials_1.2.1         
[35] scales_1.3.0         broom_1.0.5         
[37] tidymodels_1.2.0     data.table_1.14.10  
[39] chse_1.3.2          

loaded via a namespace (and not attached):
  [1] rstudioapi_0.16.0    jsonlite_1.8.8      
  [3] magick_2.8.2         rmarkdown_2.26      
  [5] vctrs_0.6.5          base64enc_0.1-3     
  [7] rstatix_0.7.2        htmltools_0.5.7     
  [9] cellranger_1.1.0     sass_0.4.8          
 [11] parallelly_1.36.0    KernSmooth_2.23-22  
 [13] bslib_0.6.1          plyr_1.8.9          
 [15] rootSolve_1.8.2.4    cachem_1.0.8        
 [17] gt_0.10.0            uuid_1.2-0          
 [19] lifecycle_1.0.4      iterators_1.0.14    
 [21] pkgconfig_2.0.3      Matrix_1.6-4        
 [23] R6_2.5.1             fastmap_1.1.1       
 [25] future_1.33.1        digest_0.6.34       
 [27] Exact_3.2            colorspace_2.1-0    
 [29] furrr_0.3.1          fansi_1.0.6         
 [31] timechange_0.2.0     httr_1.4.7          
 [33] abind_1.4-5          compiler_4.3.1      
 [35] proxy_0.4-27         pander_0.6.5        
 [37] withr_3.0.0          backports_1.4.1     
 [39] DBI_1.2.2            highr_0.10          
 [41] Rttf2pt1_1.3.12      ggsignif_0.6.4      
 [43] MASS_7.3-60          lava_1.7.3          
 [45] rappdirs_0.3.3       classInt_0.4-10     
 [47] gld_2.6.6            tools_4.3.1         
 [49] units_0.8-5          extrafontdb_1.0     
 [51] future.apply_1.11.1  nnet_7.3-19         
 [53] glue_1.7.0           grid_4.3.1          
 [55] sf_1.0-15            checkmate_2.3.1     
 [57] reshape2_1.4.4       generics_0.1.3      
 [59] gtable_0.3.4         tzdb_0.4.0          
 [61] class_7.3-22         lmom_3.0            
 [63] hms_1.1.3            xml2_1.3.6          
 [65] utf8_1.2.4           ggrepel_0.9.4       
 [67] foreach_1.5.2        pillar_1.9.0        
 [69] splines_4.3.1        lhs_1.1.6           
 [71] pryr_0.1.6           lattice_0.22-5      
 [73] survival_3.5-7       tidyselect_1.2.0    
 [75] knitr_1.45           xfun_0.41           
 [77] expm_0.999-8         hardhat_1.3.1       
 [79] rapportools_1.1      matrixStats_1.2.0   
 [81] timeDate_4032.109    stringi_1.8.3       
 [83] DiceDesign_1.10      yaml_2.3.8          
 [85] boot_1.3-28.1        evaluate_0.23       
 [87] codetools_0.2-19     tcltk_4.3.1         
 [89] extrafont_0.19       cli_3.6.2           
 [91] rpart_4.1.23         systemfonts_1.0.5   
 [93] munsell_0.5.0        jquerylib_0.1.4     
 [95] Rcpp_1.0.12          readxl_1.4.3        
 [97] globals_0.16.2       parallel_4.3.1      
 [99] gower_1.0.1          GPfit_1.0-8         
[101] listenv_0.9.0        broom.helpers_1.14.0
[103] mvtnorm_1.2-4        ipred_0.9-14        
[105] prodlim_2023.08.28   e1071_1.7-14        
[107] rlang_1.1.3