Appendix to Lab 2 - Alex Crawford

Loading Data

gard <- read.delim("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/jh_gardasil.dat", 
    header = TRUE)
names(gard)
##  [1] "Age"           "AgeGroup"      "Race"          "Shots"        
##  [5] "Completed"     "InsuranceType" "MedAssist"     "Location"     
##  [9] "LocationType"  "PracticeType"  "X"             "X.1"
head(gard)
##   Age AgeGroup Race Shots Completed InsuranceType MedAssist Location
## 1  21        1    0     3         1             3         0        1
## 2  21        1    0     3         1             3         0        1
## 3  20        1    0     1         0             1         0        1
## 4  14        0    0     3         1             3         0        1
## 5  17        0    3     2         0             3         0        1
## 6  11        0    1     1         0             0         1        1
##   LocationType PracticeType  X X.1
## 1            0            1 NA  NA
## 2            0            1 NA  NA
## 3            0            1 NA  NA
## 4            0            0 NA  NA
## 5            0            1 NA  NA
## 6            0            0 NA  NA
# Remove unnecessary columns
gard <- gard[, 1:10]
gardtable <- table(gard)  # Creates a table from the data.frame
summary(gard$Completed)  # The overall probability is 0.3319.
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.332   1.000   1.000

Creating Dummy Variables for each Category:

## the ifelse(LOGIC, c(1), c(0)) function will create a dummy variable
## with 1 if the logic statement is true and 0 if the logic statement is
## false.  Race Dummy Variables:
gard$white <- ifelse(gard$Race == 0, c(1), c(0))
gard$black <- ifelse(gard$Race == 1, c(1), c(0))
gard$hispanic <- ifelse(gard$Race == 2, c(1), c(0))
gard$otherrace <- ifelse(gard$Race == 3, c(1), c(0))
# Insurance Type Variables:
gard$assisted <- ifelse(gard$InsuranceType == 0, c(1), c(0))
gard$private <- ifelse(gard$InsuranceType == 1, c(1), c(0))
gard$hospital <- ifelse(gard$InsuranceType == 2, c(1), c(0))
gard$military <- ifelse(gard$InsuranceType == 3, c(1), c(0))
# Location Dummy Variables:
gard$Odenton <- ifelse(gard$Location == 1, c(1), c(0))
gard$WhiteMarsh <- ifelse(gard$Location == 2, c(1), c(0))
gard$JohnsHopkins <- ifelse(gard$Location == 3, c(1), c(0))
gard$Bayview <- ifelse(gard$Location == 4, c(1), c(0))
# Practice Type Dummy Variables:
gard$pediatric <- ifelse(gard$PracticeType == 0, c(1), c(0))
gard$family <- ifelse(gard$PracticeType == 1, c(1), c(0))
gard$obgyn <- ifelse(gard$PracticeType == 2, c(1), c(0))
# Location Type Variable:
gard$urban <- ifelse(gard$LocationType == 0, c(1), c(0))  # Urban = 0, Suburban = 1
# Age Group Variable:
gard$young <- ifelse(gard$AgeGroup == 0, c(1), c(0))  # Age 18-26 = 0, Age 11-17 = 1

Answering Question 1: For each characteristic, which groups of patients appear to have a higher rate of completion?

Univariate Versions on Characteristics w/ More than Two Groups Age Group

## Age Group - Old
m1_agegrp <- glm(gard$Completed ~ gard$AgeGroup, family = binomial("logit"))
summary(m1_agegrp)
## 
## Call:
## glm(formula = gard$Completed ~ gard$AgeGroup, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.932  -0.932  -0.865   1.444   1.527  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.6087     0.0791   -7.70  1.4e-14 ***
## gard$AgeGroup  -0.1830     0.1131   -1.62     0.11    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1793.4  on 1411  degrees of freedom
## AIC: 1797
## 
## Number of Fisher Scoring iterations: 4
plot(gard$AgeGroup, fitted(m1_agegrp), main = "Completion Probability by Age Group", 
    xlab = "Age Group", ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-3

exp(-0.60871 - 0.18302 * 0)/(1 + exp(-0.60871 - 0.18302 * 0))  # For ages 11-17
## [1] 0.3524
exp(-0.60871 - 0.18302 * 1)/(1 + exp(-0.60871 - 0.18302 * 1))  # For ages 18-26
## [1] 0.3118
exp(cbind(OR = coef(m1_agegrp), confint(m1_agegrp)))
## Waiting for profiling to be done...
##                   OR  2.5 % 97.5 %
## (Intercept)   0.5441 0.4654 0.6345
## gard$AgeGroup 0.8328 0.6669 1.0393

## Age Group - Young
m0_agegrp <- glm(gard$Completed ~ gard$young, family = binomial("logit"))
summary(m0_agegrp)
## 
## Call:
## glm(formula = gard$Completed ~ gard$young, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.932  -0.932  -0.865   1.444   1.527  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.7917     0.0809   -9.79   <2e-16 ***
## gard$young    0.1830     0.1131    1.62     0.11    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1793.4  on 1411  degrees of freedom
## AIC: 1797
## 
## Number of Fisher Scoring iterations: 4
exp(-0.7917 + 0.183 * 0)/(1 + exp(-0.7917 + 0.183 * 0))  # For ages 18-26
## [1] 0.3118
exp(-0.7917 + 0.183 * 1)/(1 + exp(-0.7917 + 0.183 * 1))  # For ages 11-17
## [1] 0.3524
exp(cbind(OR = coef(m0_agegrp), confint(m0_agegrp)))
## Waiting for profiling to be done...
##                 OR  2.5 % 97.5 %
## (Intercept) 0.4531 0.3860 0.5301
## gard$young  1.2008 0.9622 1.4994

Race

## Race -- ALL
m1_race <- glm(gard$Completed ~ gard$white + gard$black + gard$hispanic, family = binomial("logit"))
summary(m1_race)
## 
## Call:
## glm(formula = gard$Completed ~ gard$white + gard$black + gard$hispanic, 
##     family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.982  -0.982  -0.736   1.386   1.697  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.5744     0.1527   -3.76  0.00017 ***
## gard$white      0.0955     0.1706    0.56  0.57552    
## gard$black     -0.5947     0.1892   -3.14  0.00168 ** 
## gard$hispanic  -0.1477     0.3328   -0.44  0.65712    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1768  on 1409  degrees of freedom
## AIC: 1776
## 
## Number of Fisher Scoring iterations: 4
plot(gard$Race, fitted(m1_race), main = "Completion Probability by Race", xlab = "Race", 
    ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-4

exp(-0.57443 + 0.09554 * 1 - 0.59465 * 0 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 * 
    1 - 0.59465 * 0 - 0.1477 * 0))  # For White
## [1] 0.3825
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 1 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 * 
    0 - 0.59465 * 1 - 0.1477 * 0))  # For Black
## [1] 0.237
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 0 - 0.1477 * 1)/(1 + exp(-0.57443 + 0.09554 * 
    0 - 0.59465 * 0 - 0.1477 * 1))  # For Hispanic
## [1] 0.3269
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 0 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 * 
    0 - 0.59465 * 0 - 0.1477 * 0))  # For OtherRace
## [1] 0.3602
exp(cbind(OR = coef(m1_race), confint(m1_race)))
## Waiting for profiling to be done...
##                   OR  2.5 % 97.5 %
## (Intercept)   0.5630 0.4154 0.7568
## gard$white    1.1003 0.7897 1.5428
## gard$black    0.5518 0.3811 0.8009
## gard$hispanic 0.8627 0.4414 1.6375

# White
m0_racew <- glm(gard$Completed ~ gard$white, family = binomial("logit"))
summary(m0_racew)
## 
## Call:
## glm(formula = gard$Completed ~ gard$white, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.982  -0.982  -0.806   1.386   1.601  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.9567     0.0856  -11.18   <2e-16 ***
## gard$white    0.4778     0.1145    4.17    3e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1778.4  on 1411  degrees of freedom
## AIC: 1782
## 
## Number of Fisher Scoring iterations: 4
exp(-0.95673 + 0.47784 * 0)/(1 + exp(-0.95673 + 0.47784 * 0))  # Not White
## [1] 0.2775
exp(-0.95673 + 0.47784 * 1)/(1 + exp(-0.95673 + 0.47784 * 1))  # White
## [1] 0.3825
exp(cbind(OR = coef(m0_racew), confint(m0_racew)))
## Waiting for profiling to be done...
##                 OR  2.5 % 97.5 %
## (Intercept) 0.3841 0.3241 0.4534
## gard$white  1.6126 1.2894 2.0200

# Black
m0_raceb <- glm(gard$Completed ~ gard$black, family = binomial("logit"))
summary(m0_raceb)
## 
## Call:
## glm(formula = gard$Completed ~ gard$black, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.970  -0.970  -0.736   1.400   1.697  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.5097     0.0663   -7.69  1.5e-14 ***
## gard$black   -0.6594     0.1299   -5.08  3.9e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1768.9  on 1411  degrees of freedom
## AIC: 1773
## 
## Number of Fisher Scoring iterations: 4
exp(-0.50973 - 0.65936 * 0)/(1 + exp(-0.50973 - 0.65936 * 0))  # Not Black
## [1] 0.3753
exp(-0.50973 - 0.65936 * 1)/(1 + exp(-0.50973 - 0.65936 * 1))  # Black
## [1] 0.237
exp(cbind(OR = coef(m0_raceb), confint(m0_raceb)))
## Waiting for profiling to be done...
##                 OR  2.5 % 97.5 %
## (Intercept) 0.6007 0.5271 0.6836
## gard$black  0.5172 0.3997 0.6653

# Hispanic
m0_raceh <- glm(gard$Completed ~ gard$hispanic, family = binomial("logit"))
summary(m0_raceh)
## 
## Call:
## glm(formula = gard$Completed ~ gard$hispanic, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.898  -0.898  -0.898   1.485   1.495  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.6987     0.0576  -12.14   <2e-16 ***
## gard$hispanic  -0.0235     0.3012   -0.08     0.94    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1796  on 1411  degrees of freedom
## AIC: 1800
## 
## Number of Fisher Scoring iterations: 4
exp(-0.69866 - 0.02347 * 0)/(1 + exp(-0.69866 - 0.02347 * 0))  # Not Hispanic
## [1] 0.3321
exp(-0.69866 - 0.02347 * 1)/(1 + exp(-0.69866 - 0.02347 * 1))  # Hispanic
## [1] 0.3269
exp(cbind(OR = coef(m0_raceh), confint(m0_raceh)))
## Waiting for profiling to be done...
##                   OR  2.5 % 97.5 %
## (Intercept)   0.4972 0.4439 0.5562
## gard$hispanic 0.9768 0.5291 1.7368

# Hispanic
m0_raceo <- glm(gard$Completed ~ gard$otherrace, family = binomial("logit"))
summary(m0_raceo)
## 
## Call:
## glm(formula = gard$Completed ~ gard$otherrace, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.945  -0.891  -0.891   1.494   1.494  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.7189     0.0608  -11.82   <2e-16 ***
## gard$otherrace   0.1445     0.1644    0.88     0.38    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1795.2  on 1411  degrees of freedom
## AIC: 1799
## 
## Number of Fisher Scoring iterations: 4
exp(-0.71893 + 0.1445 * 0)/(1 + exp(-0.71893 + 0.1445 * 0))  # Not Other (White, Black, or Hispanic)
## [1] 0.3276
exp(-0.71893 + 0.1445 * 1)/(1 + exp(-0.71893 + 0.1445 * 1))  # Other Race
## [1] 0.3602
exp(cbind(OR = coef(m0_raceo), confint(m0_raceo)))
## Waiting for profiling to be done...
##                    OR  2.5 % 97.5 %
## (Intercept)    0.4873 0.4321 0.5485
## gard$otherrace 1.1555 0.8338 1.5899

Location

## Location -- ALL
m1_location <- glm(gard$Completed ~ gard$Odenton + gard$WhiteMarsh + gard$JohnsHopkins, 
    family = binomial("logit"))
summary(m1_location)
## 
## Call:
## glm(formula = gard$Completed ~ gard$Odenton + gard$WhiteMarsh + 
##     gard$JohnsHopkins, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.152  -0.919  -0.781   1.460   1.757  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.030      0.120   -8.61   <2e-16 ***
## gard$Odenton         0.387      0.141    2.75    0.006 ** 
## gard$WhiteMarsh      0.969      0.196    4.94    8e-07 ***
## gard$JohnsHopkins   -0.274      0.285   -0.96    0.336    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1764.9  on 1409  degrees of freedom
## AIC: 1773
## 
## Number of Fisher Scoring iterations: 4
plot(gard$Location, fitted(m1_location), main = "Completion Probability by Location", 
    xlab = "Location", ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-5

exp(-1.0296 + 0.3868 * 1 + 0.969 * 0 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 * 
    1 + 0.969 * 0 - 0.2744 * 0))  # For Odenton
## [1] 0.3446
exp(-1.0296 + 0.3868 * 0 + 0.969 * 1 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 * 
    0 + 0.969 * 1 - 0.2744 * 0))  # For White Marsh
## [1] 0.4849
exp(-1.0296 + 0.3868 * 0 + 0.969 * 0 - 0.2744 * 1)/(1 + exp(-1.0296 + 0.3868 * 
    0 + 0.969 * 0 - 0.2744 * 1))  # For Johns Hopkins
## [1] 0.2135
exp(-1.0296 + 0.3868 * 0 + 0.969 * 0 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 * 
    0 + 0.969 * 0 - 0.2744 * 0))  # For Bayview
## [1] 0.2632
exp(cbind(OR = coef(m1_location), confint(m1_location)))
## Waiting for profiling to be done...
##                       OR  2.5 % 97.5 %
## (Intercept)       0.3571 0.2812 0.4496
## gard$Odenton      1.4723 1.1200 1.9462
## gard$WhiteMarsh   2.6353 1.7951 3.8788
## gard$JohnsHopkins 0.7600 0.4251 1.3062

# Odenton
m0_locO <- glm(gard$Completed ~ gard$Odenton, family = binomial("logit"))
summary(m0_locO)
## 
## Call:
## glm(formula = gard$Completed ~ gard$Odenton, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.919  -0.919  -0.871   1.460   1.519  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.7748     0.0868   -8.93   <2e-16 ***
## gard$Odenton   0.1320     0.1144    1.15     0.25    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1794.7  on 1411  degrees of freedom
## AIC: 1799
## 
## Number of Fisher Scoring iterations: 4
exp(-0.77477 + 0.13196 * 0)/(1 + exp(-0.77477 + 0.13196 * 0))  # Not Odenton
## [1] 0.3154
exp(-0.77477 + 0.13196 * 1)/(1 + exp(-0.77477 + 0.13196 * 1))  # Odenton
## [1] 0.3446
exp(cbind(OR = coef(m0_locO), confint(m0_locO)))
## Waiting for profiling to be done...
##                  OR  2.5 % 97.5 %
## (Intercept)  0.4608 0.3880 0.5453
## gard$Odenton 1.1411 0.9124 1.4287

# White Marsh
m0_locW <- glm(gard$Completed ~ gard$WhiteMarsh, family = binomial("logit"))
summary(m0_locW)
## 
## Call:
## glm(formula = gard$Completed ~ gard$WhiteMarsh, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.152  -0.864  -0.864   1.527   1.527  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -0.7922     0.0611  -12.96  < 2e-16 ***
## gard$WhiteMarsh   0.7316     0.1673    4.37  1.2e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1777.2  on 1411  degrees of freedom
## AIC: 1781
## 
## Number of Fisher Scoring iterations: 4
exp(-0.79219 + 0.73156 * 0)/(1 + exp(-0.79219 + 0.73156 * 0))  # Not White Marsh
## [1] 0.3117
exp(-0.79219 + 0.73156 * 1)/(1 + exp(-0.79219 + 0.73156 * 1))  # White Marsh
## [1] 0.4848
exp(cbind(OR = coef(m0_locW), confint(m0_locW)))
## Waiting for profiling to be done...
##                     OR  2.5 % 97.5 %
## (Intercept)     0.4529 0.4014  0.510
## gard$WhiteMarsh 2.0783 1.4960  2.886

# Johns Hopkins
m0_locJ <- glm(gard$Completed ~ gard$JohnsHopkins, family = binomial("logit"))
summary(m0_locJ)
## 
## Call:
## glm(formula = gard$Completed ~ gard$JohnsHopkins, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.911  -0.911  -0.911   1.469   1.757  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.664      0.058  -11.44   <2e-16 ***
## gard$JohnsHopkins   -0.640      0.265   -2.41    0.016 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1789.5  on 1411  degrees of freedom
## AIC: 1794
## 
## Number of Fisher Scoring iterations: 4
exp(-0.66383 - 0.64022 * 0)/(1 + exp(-0.66383 - 0.64022 * 0))  # Not White Marsh
## [1] 0.3399
exp(-0.66383 - 0.64022 * 1)/(1 + exp(-0.66383 - 0.64022 * 1))  # White Marsh
## [1] 0.2135
exp(cbind(OR = coef(m0_locJ), confint(m0_locJ)))
## Waiting for profiling to be done...
##                       OR  2.5 % 97.5 %
## (Intercept)       0.5149 0.4592 0.5765
## gard$JohnsHopkins 0.5272 0.3054 0.8683

# Bayview
m0_locB <- glm(gard$Completed ~ gard$Bayview, family = binomial("logit"))
summary(m0_locB)
## 
## Call:
## glm(formula = gard$Completed ~ gard$Bayview, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.937  -0.937  -0.781   1.438   1.634  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.5949     0.0644   -9.24   <2e-16 ***
## gard$Bayview  -0.4347     0.1358   -3.20   0.0014 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1785.4  on 1411  degrees of freedom
## AIC: 1789
## 
## Number of Fisher Scoring iterations: 4
exp(-0.59489 - 0.43473 * 0)/(1 + exp(-0.59489 - 0.43473 * 0))  # Not Bayview
## [1] 0.3555
exp(-0.59489 - 0.43473 * 1)/(1 + exp(-0.59489 - 0.43473 * 1))  # Bayview
## [1] 0.2632
exp(cbind(OR = coef(m0_locB), confint(m0_locB)))
## Waiting for profiling to be done...
##                  OR  2.5 % 97.5 %
## (Intercept)  0.5516 0.4858 0.6254
## gard$Bayview 0.6474 0.4945 0.8423

Location Type

## LocationType - Urban
m1_locationtype <- glm(gard$Completed ~ gard$LocationType, family = binomial("logit"))
summary(m1_locationtype)
## 
## Call:
## glm(formula = gard$Completed ~ gard$LocationType, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.959  -0.959  -0.764   1.413   1.657  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -0.5381     0.0668   -8.06  7.9e-16 ***
## gard$LocationType  -0.5429     0.1273   -4.26  2.0e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1777.1  on 1411  degrees of freedom
## AIC: 1781
## 
## Number of Fisher Scoring iterations: 4
plot(gard$LocationType, fitted(m1_locationtype), main = "Completion Probability by Location Type", 
    xlab = "Urban (1) or Suburban (0)", ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-6

exp(-0.5381 - 0.5429 * 0)/(1 + exp(-0.5381 - 0.5429 * 0))  # Suburban
## [1] 0.3686
exp(-0.5381 - 0.5429 * 1)/(1 + exp(-0.5381 - 0.5429 * 1))  # Urban
## [1] 0.2533
exp(cbind(OR = coef(m1_locationtype), confint(m1_locationtype)))
## Waiting for profiling to be done...
##                       OR  2.5 % 97.5 %
## (Intercept)       0.5839 0.5118 0.6651
## gard$LocationType 0.5811 0.4516 0.7441

## LocationType - Suburban
m0_locationtype <- glm(gard$Completed ~ gard$urban, family = binomial("logit"))
summary(m0_locationtype)
## 
## Call:
## glm(formula = gard$Completed ~ gard$urban, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.959  -0.959  -0.764   1.413   1.657  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.081      0.108   -9.97   <2e-16 ***
## gard$urban     0.543      0.127    4.26    2e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1777.1  on 1411  degrees of freedom
## AIC: 1781
## 
## Number of Fisher Scoring iterations: 4
exp(-1.0809 + 0.5429 * 0)/(1 + exp(-1.0809 + 0.5429 * 0))  # Urban
## [1] 0.2533
exp(-1.0809 + 0.5429 * 1)/(1 + exp(-1.0809 + 0.5429 * 1))  # Suburban
## [1] 0.3687
exp(cbind(OR = coef(m0_locationtype), confint(m0_locationtype)))
## Waiting for profiling to be done...
##                 OR  2.5 % 97.5 %
## (Intercept) 0.3393 0.2733 0.4181
## gard$urban  1.7209 1.3440 2.2146

Practice Type

## Practice Type -- ALL
m1_practice <- glm(gard$Completed ~ gard$obgyn + gard$family, family = binomial("logit"))
summary(m1_practice)
## 
## Call:
## glm(formula = gard$Completed ~ gard$obgyn + gard$family, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.973  -0.869  -0.828   1.397   1.573  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.7789     0.0949   -8.21  2.3e-16 ***
## gard$obgyn    0.2770     0.1304    2.13    0.034 *  
## gard$family  -0.1145     0.1493   -0.77    0.443    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1787.6  on 1410  degrees of freedom
## AIC: 1794
## 
## Number of Fisher Scoring iterations: 4
plot(gard$PracticeType, fitted(m1_practice), main = " Completion Probability by Practice Type", 
    xlab = "Practice Type", ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-7

exp(-0.7789 + 0.277 * 1 - 0.1145 * 0)/(1 + exp(-0.7789 + 0.277 * 1 - 0.1145 * 
    0))  # For OB-GYN
## [1] 0.3771
exp(-0.7789 + 0.277 * 0 - 0.1145 * 1)/(1 + exp(-0.7789 + 0.277 * 0 - 0.1145 * 
    1))  # For Family
## [1] 0.2904
exp(-0.7789 + 0.277 * 0 - 0.1145 * 0)/(1 + exp(-0.7789 + 0.277 * 0 - 0.1145 * 
    0))  # For Pediatric
## [1] 0.3146
exp(cbind(OR = coef(m1_practice), confint(m1_practice)))
## Waiting for profiling to be done...
##                 OR  2.5 % 97.5 %
## (Intercept) 0.4589 0.3802 0.5516
## gard$obgyn  1.3192 1.0222 1.7044
## gard$family 0.8918 0.6645 1.1938

# OB-GYN
m0_pracO <- glm(gard$Completed ~ gard$obgyn, family = binomial("logit"))
summary(m0_pracO)
## 
## Call:
## glm(formula = gard$Completed ~ gard$obgyn, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.973  -0.852  -0.852   1.397   1.542  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.8257     0.0732   -11.3   <2e-16 ***
## gard$obgyn    0.3239     0.1156     2.8   0.0051 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1788.2  on 1411  degrees of freedom
## AIC: 1792
## 
## Number of Fisher Scoring iterations: 4
exp(-0.82575 + 0.32392 * 0)/(1 + exp(-0.82575 + 0.32392 * 0))  # Not OB-GYN
## [1] 0.3045
exp(-0.82575 + 0.32392 * 1)/(1 + exp(-0.82575 + 0.32392 * 1))  # OB-GYN
## [1] 0.3771
exp(cbind(OR = coef(m0_pracO), confint(m0_pracO)))
## Waiting for profiling to be done...
##                 OR  2.5 % 97.5 %
## (Intercept) 0.4379 0.3788 0.5049
## gard$obgyn  1.3825 1.1020 1.7337

# Family
m0_pracF <- glm(gard$Completed ~ gard$family, family = binomial("logit"))
summary(m0_pracF)
## 
## Call:
## glm(formula = gard$Completed ~ gard$family, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.922  -0.922  -0.828   1.456   1.573  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.6350     0.0649   -9.78   <2e-16 ***
## gard$family  -0.2584     0.1323   -1.95    0.051 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1792.1  on 1411  degrees of freedom
## AIC: 1796
## 
## Number of Fisher Scoring iterations: 4
exp(-0.63502 - 0.25837 * 0)/(1 + exp(-0.63502 - 0.25837 * 0))  # Not Family
## [1] 0.3464
exp(-0.63502 - 0.25837 * 1)/(1 + exp(-0.63502 - 0.25837 * 1))  # Family
## [1] 0.2904
exp(cbind(OR = coef(m0_pracF), confint(m0_pracF)))
## Waiting for profiling to be done...
##                 OR  2.5 % 97.5 %
## (Intercept) 0.5299 0.4662 0.6014
## gard$family 0.7723 0.5943 0.9987

# Pediatric
m0_pracP <- glm(gard$Completed ~ gard$pediatric, family = binomial("logit"))
summary(m0_pracP)
## 
## Call:
## glm(formula = gard$Completed ~ gard$pediatric, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.915  -0.915  -0.869   1.465   1.521  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.6550     0.0704   -9.31   <2e-16 ***
## gard$pediatric  -0.1239     0.1181   -1.05     0.29    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1794.9  on 1411  degrees of freedom
## AIC: 1799
## 
## Number of Fisher Scoring iterations: 4
exp(-0.65497 - 0.1239 * 0)/(1 + exp(-0.65497 - 0.1239 * 0))  # Not Pediatric
## [1] 0.3419
exp(-0.65497 - 0.1239 * 1)/(1 + exp(-0.65497 - 0.1239 * 1))  # Pediatric
## [1] 0.3146
exp(cbind(OR = coef(m0_pracP), confint(m0_pracP)))
## Waiting for profiling to be done...
##                    OR  2.5 % 97.5 %
## (Intercept)    0.5195 0.4521 0.5957
## gard$pediatric 0.8835 0.7001 1.1126

Insurance Type

## Insurance Type - ALL
m1_insurance <- glm(gard$Completed ~ gard$assisted + gard$private + gard$hospital, 
    family = binomial("logit"))
summary(m1_insurance)
## 
## Call:
## glm(formula = gard$Completed ~ gard$assisted + gard$private + 
##     gard$hospital, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.117  -0.928  -0.928   1.413   1.794  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.538      0.114   -4.72  2.3e-06 ***
## gard$assisted   -0.848      0.189   -4.49  7.2e-06 ***
## gard$private    -0.081      0.138   -0.59     0.56    
## gard$hospital    0.395      0.247    1.60     0.11    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1763.1  on 1409  degrees of freedom
## AIC: 1771
## 
## Number of Fisher Scoring iterations: 4
plot(gard$InsuranceType, fitted(m1_insurance), main = "Completion Probability by Insurance Type", 
    xlab = "Insurance Type", ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-8

exp(-0.53831 - 0.84798 * 1 - 0.08103 * 0 + 0.39521 * 0)/(1 + exp(-0.53831 - 
    0.84798 * 1 - 0.08103 * 0 + 0.39521 * 0))  # For Assisted
## [1] 0.2
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 1 + 0.39521 * 0)/(1 + exp(-0.53831 - 
    0.84798 * 0 - 0.08103 * 1 + 0.39521 * 0))  # For Private
## [1] 0.3499
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 0 + 0.39521 * 1)/(1 + exp(-0.53831 - 
    0.84798 * 0 - 0.08103 * 0 + 0.39521 * 1))  # For Hospital
## [1] 0.4643
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 0 + 0.39521 * 0)/(1 + exp(-0.53831 - 
    0.84798 * 0 - 0.08103 * 0 + 0.39521 * 0))  # For Military
## [1] 0.3686
exp(cbind(OR = coef(m1_insurance), confint(m1_insurance)))
## Waiting for profiling to be done...
##                   OR  2.5 % 97.5 %
## (Intercept)   0.5837 0.4658 0.7284
## gard$assisted 0.4283 0.2942 0.6176
## gard$private  0.9222 0.7042 1.2103
## gard$hospital 1.4847 0.9133 2.4075

# Military
m0_insm <- glm(gard$Completed ~ gard$military, family = binomial("logit"))
summary(m0_insm)
## 
## Call:
## glm(formula = gard$Completed ~ gard$military, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.959  -0.879  -0.879   1.413   1.508  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.7505     0.0651  -11.52   <2e-16 ***
## gard$military   0.2122     0.1312    1.62     0.11    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1793.4  on 1411  degrees of freedom
## AIC: 1797
## 
## Number of Fisher Scoring iterations: 4
exp(-0.75055 + 0.21223 * 0)/(1 + exp(-0.75055 + 0.21223 * 0))  # Not Military
## [1] 0.3207
exp(-0.75055 + 0.21223 * 1)/(1 + exp(-0.75055 + 0.21223 * 1))  # Military
## [1] 0.3686
exp(cbind(OR = coef(m0_insm), confint(m0_insm)))
## Waiting for profiling to be done...
##                   OR  2.5 % 97.5 %
## (Intercept)   0.4721 0.4151 0.5359
## gard$military 1.2364 0.9545 1.5971

# Private
m0_insp <- glm(gard$Completed ~ gard$private, family = binomial("logit"))
summary(m0_insp)
## 
## Call:
## glm(formula = gard$Completed ~ gard$private, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.928  -0.928  -0.867   1.449   1.524  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.7859     0.0821   -9.57   <2e-16 ***
## gard$private   0.1666     0.1132    1.47     0.14    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1793.8  on 1411  degrees of freedom
## AIC: 1798
## 
## Number of Fisher Scoring iterations: 4
exp(-0.78593 + 0.16659 * 0)/(1 + exp(-0.78593 + 0.16659 * 0))  # Not Private
## [1] 0.313
exp(-0.78593 + 0.16659 * 1)/(1 + exp(-0.78593 + 0.16659 * 1))  # Private
## [1] 0.3499
exp(cbind(OR = coef(m0_insp), confint(m0_insp)))
## Waiting for profiling to be done...
##                  OR  2.5 % 97.5 %
## (Intercept)  0.4557 0.3873 0.5344
## gard$private 1.1813 0.9464 1.4753

# Hospital
m0_insh <- glm(gard$Completed ~ gard$hospital, family = binomial("logit"))
summary(m0_insh)
## 
## Call:
## glm(formula = gard$Completed ~ gard$hospital, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.117  -0.884  -0.884   1.502   1.502  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.7375     0.0586  -12.58   <2e-16 ***
## gard$hospital   0.5944     0.2265    2.62   0.0087 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1789.3  on 1411  degrees of freedom
## AIC: 1793
## 
## Number of Fisher Scoring iterations: 4
exp(-0.7375 + 0.5944 * 0)/(1 + exp(-0.7375 + 0.5944 * 0))  # Not Hospital
## [1] 0.3236
exp(-0.7375 + 0.5944 * 1)/(1 + exp(-0.7375 + 0.5944 * 1))  # Hospital
## [1] 0.4643
exp(cbind(OR = coef(m0_insh), confint(m0_insh)))
## Waiting for profiling to be done...
##                   OR 2.5 % 97.5 %
## (Intercept)   0.4783 0.426 0.5362
## gard$hospital 1.8119 1.158 2.8230

# Assisted
m0_insa <- glm(gard$Completed ~ gard$assisted, family = binomial("logit"))
summary(m0_insa)
## 
## Call:
## glm(formula = gard$Completed ~ gard$assisted, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.951  -0.951  -0.951   1.422   1.794  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.5589     0.0616   -9.07  < 2e-16 ***
## gard$assisted  -0.8274     0.1629   -5.08  3.8e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1767.3  on 1411  degrees of freedom
## AIC: 1771
## 
## Number of Fisher Scoring iterations: 4
exp(-0.65497 - 0.1239 * 0)/(1 + exp(-0.65497 - 0.1239 * 0))  # Not Assisted
## [1] 0.3419
exp(-0.65497 - 0.1239 * 1)/(1 + exp(-0.65497 - 0.1239 * 1))  # Assisted
## [1] 0.3146
exp(cbind(OR = coef(m0_insa), confint(m0_insa)))
## Waiting for profiling to be done...
##                   OR  2.5 % 97.5 %
## (Intercept)   0.5718 0.5064 0.6448
## gard$assisted 0.4372 0.3152 0.5975

Answering Question 2: Which patient characteristics best predict Gardasil vaccination completion?

Labeling Technique:
Use m#_data – m for model, # for the number of variables, and data for the independent characteristic being used.
Hypothesis Tests: Applicable for all models in this section.
Null Hypothesis (Deviance Test): The model does not explain variation in completion probability.
Alt Hypothesis (Deviance Test): The model explains some of the variation in completion probability.
For each coefficient, use Wald's Test:
Null Hypothesis (Wald's Test): The coefficient does not explain variation in completion probability.
Alt Hypothesis (Wald's Test): The coefficient explains some of the variation in completion probability.
Logistic Regression Models By Category

## Age Group
m1_agegrp <- glm(gard$Completed ~ gard$AgeGroup, family = binomial("logit"))
summary(m1_agegrp)  # AIC = 1797.4, Residual deviance = 1793.4
## 
## Call:
## glm(formula = gard$Completed ~ gard$AgeGroup, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.932  -0.932  -0.865   1.444   1.527  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.6087     0.0791   -7.70  1.4e-14 ***
## gard$AgeGroup  -0.1830     0.1131   -1.62     0.11    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1793.4  on 1411  degrees of freedom
## AIC: 1797
## 
## Number of Fisher Scoring iterations: 4
plot(gard$AgeGroup, fitted(m1_agegrp), main = "Completion Probability by Age Group", 
    xlab = "Age Group", ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-9

exp(-0.60871 - 0.18302 * 0)/(1 + exp(-0.60871 - 0.18302 * 0))  # For ages 11-17
## [1] 0.3524
exp(-0.60871 - 0.18302 * 1)/(1 + exp(-0.60871 - 0.18302 * 1))  # For ages 18-26
## [1] 0.3118
exp(cbind(OR = coef(m1_agegrp), confint(m1_agegrp)))
## Waiting for profiling to be done...
##                   OR  2.5 % 97.5 %
## (Intercept)   0.5441 0.4654 0.6345
## gard$AgeGroup 0.8328 0.6669 1.0393

## Age
m1_age <- glm(gard$Completed ~ gard$Age, family = binomial("logit"))
summary(m1_age)  # AIC = 1797.4, Residual deviance = 1793.4
## 
## Call:
## glm(formula = gard$Completed ~ gard$Age, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.954  -0.909  -0.873   1.454   1.552  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.3341     0.2533   -1.32     0.19
## gard$Age     -0.0198     0.0134   -1.47     0.14
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1793.8  on 1411  degrees of freedom
## AIC: 1798
## 
## Number of Fisher Scoring iterations: 4
plot(gard$Age, fitted(m1_age), main = "Completion Probaiblity by Age", xlab = "Age (yrs)", 
    ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-9

# exp(-0.60871 - 0.18302*0) / (1 + exp(-0.60871 - 0.18302*0)) # For ages
# 11-17 exp(-0.60871 - 0.18302*1) / (1 + exp(-0.60871 - 0.18302*1)) # For
# ages 18-26
exp(cbind(OR = coef(m1_age), confint(m1_age)))
## Waiting for profiling to be done...
##                 OR  2.5 % 97.5 %
## (Intercept) 0.7160 0.4353  1.176
## gard$Age    0.9804 0.9549  1.006

## Race
m1_race <- glm(gard$Completed ~ gard$white + gard$black + gard$hispanic, family = binomial("logit"))
summary(m1_race)  # AIC = 1776, Residual deviance = 1768
## 
## Call:
## glm(formula = gard$Completed ~ gard$white + gard$black + gard$hispanic, 
##     family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.982  -0.982  -0.736   1.386   1.697  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.5744     0.1527   -3.76  0.00017 ***
## gard$white      0.0955     0.1706    0.56  0.57552    
## gard$black     -0.5947     0.1892   -3.14  0.00168 ** 
## gard$hispanic  -0.1477     0.3328   -0.44  0.65712    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1768  on 1409  degrees of freedom
## AIC: 1776
## 
## Number of Fisher Scoring iterations: 4
plot(gard$Race, fitted(m1_race), main = "Completion Probability by Race", xlab = "Race", 
    ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-9

exp(-0.57443 + 0.09554 * 1 - 0.59465 * 0 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 * 
    1 - 0.59465 * 0 - 0.1477 * 0))  # For White
## [1] 0.3825
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 1 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 * 
    0 - 0.59465 * 1 - 0.1477 * 0))  # For Black
## [1] 0.237
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 0 - 0.1477 * 1)/(1 + exp(-0.57443 + 0.09554 * 
    0 - 0.59465 * 0 - 0.1477 * 1))  # For Hispanic
## [1] 0.3269
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 0 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 * 
    0 - 0.59465 * 0 - 0.1477 * 0))  # For OtherRace
## [1] 0.3602
exp(cbind(OR = coef(m1_race), confint(m1_race)))
## Waiting for profiling to be done...
##                   OR  2.5 % 97.5 %
## (Intercept)   0.5630 0.4154 0.7568
## gard$white    1.1003 0.7897 1.5428
## gard$black    0.5518 0.3811 0.8009
## gard$hispanic 0.8627 0.4414 1.6375

## Insurance Type
m1_insurance <- glm(gard$Completed ~ gard$military + gard$private + gard$hospital, 
    family = binomial("logit"))
summary(m1_insurance)  # AIC: 1771.1, Residual deviance: 1763.1
## 
## Call:
## glm(formula = gard$Completed ~ gard$military + gard$private + 
##     gard$hospital, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.117  -0.928  -0.928   1.413   1.794  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -1.386      0.151   -9.20  < 2e-16 ***
## gard$military    0.848      0.189    4.49  7.2e-06 ***
## gard$private     0.767      0.170    4.52  6.2e-06 ***
## gard$hospital    1.243      0.266    4.68  2.9e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1763.1  on 1409  degrees of freedom
## AIC: 1771
## 
## Number of Fisher Scoring iterations: 4
plot(gard$InsuranceType, fitted(m1_insurance), main = "Completion Probability by Insurance Type", 
    xlab = "Insurance Type", ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-9

exp(-0.53831 - 0.84798 * 1 - 0.08103 * 0 + 0.39521 * 0)/(1 + exp(-0.53831 - 
    0.84798 * 1 - 0.08103 * 0 + 0.39521 * 0))  # For Assisted
## [1] 0.2
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 1 + 0.39521 * 0)/(1 + exp(-0.53831 - 
    0.84798 * 0 - 0.08103 * 1 + 0.39521 * 0))  # For Private
## [1] 0.3499
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 0 + 0.39521 * 1)/(1 + exp(-0.53831 - 
    0.84798 * 0 - 0.08103 * 0 + 0.39521 * 1))  # For Hospital
## [1] 0.4643
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 0 + 0.39521 * 0)/(1 + exp(-0.53831 - 
    0.84798 * 0 - 0.08103 * 0 + 0.39521 * 0))  # For Military
## [1] 0.3686
exp(cbind(OR = coef(m1_insurance), confint(m1_insurance)))
## Waiting for profiling to be done...
##                  OR  2.5 % 97.5 %
## (Intercept)   0.250 0.1843 0.3332
## gard$military 2.335 1.6191 3.3996
## gard$private  2.153 1.5534 3.0247
## gard$hospital 3.467 2.0598 5.8496

## Med Assisted
m1_medassist <- glm(gard$Completed ~ gard$MedAssist, family = binomial("logit"))
summary(m1_medassist)  # AIC: 1767.3, Residual deviance: 1771.3
## 
## Call:
## glm(formula = gard$Completed ~ gard$MedAssist, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.951  -0.951  -0.951   1.422   1.794  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.5589     0.0616   -9.07  < 2e-16 ***
## gard$MedAssist  -0.8274     0.1629   -5.08  3.8e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1767.3  on 1411  degrees of freedom
## AIC: 1771
## 
## Number of Fisher Scoring iterations: 4
plot(gard$MedAssist, fitted(glm(gard$Completed ~ gard$MedAssist, binomial)), 
    main = "Completion Probability by Assistance", xlab = "Medical Assistance", 
    ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-9

exp(-0.55893 - 0.82737 * 0)/(1 + exp(-0.55893 - 0.82737 * 0))  # No Med Assist
## [1] 0.3638
exp(-0.55893 - 0.82737 * 1)/(1 + exp(-0.55893 - 0.82737 * 1))  # Yes Med Assist
## [1] 0.2
exp(cbind(OR = coef(m1_medassist), confint(m1_medassist)))
## Waiting for profiling to be done...
##                    OR  2.5 % 97.5 %
## (Intercept)    0.5718 0.5064 0.6448
## gard$MedAssist 0.4372 0.3152 0.5975

## LocationType
m1_locationtype <- glm(gard$Completed ~ gard$LocationType, family = binomial("logit"))
summary(m1_locationtype)  # AIC: 1777.1, Residual deviance: 1781.1
## 
## Call:
## glm(formula = gard$Completed ~ gard$LocationType, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.959  -0.959  -0.764   1.413   1.657  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -0.5381     0.0668   -8.06  7.9e-16 ***
## gard$LocationType  -0.5429     0.1273   -4.26  2.0e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1777.1  on 1411  degrees of freedom
## AIC: 1781
## 
## Number of Fisher Scoring iterations: 4
plot(gard$LocationType, fitted(glm(gard$Completed ~ gard$LocationType, binomial)), 
    main = "Completion Probability by Location Type", xlab = "Urban (1) or Suburban (0)", 
    ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-9

exp(-0.5381 - 0.5429 * 0)/(1 + exp(-0.5381 - 0.5429 * 0))  # Suburban
## [1] 0.3686
exp(-0.5381 - 0.5429 * 1)/(1 + exp(-0.5381 - 0.5429 * 1))  # Urban
## [1] 0.2533
exp(cbind(OR = coef(m1_locationtype), confint(m1_locationtype)))
## Waiting for profiling to be done...
##                       OR  2.5 % 97.5 %
## (Intercept)       0.5839 0.5118 0.6651
## gard$LocationType 0.5811 0.4516 0.7441

## Location
m1_location <- glm(gard$Completed ~ gard$Odenton + gard$WhiteMarsh + gard$JohnsHopkins, 
    family = binomial("logit"))
summary(m1_location)  # AIC: 1772.9, Residual deviance: 1764.9
## 
## Call:
## glm(formula = gard$Completed ~ gard$Odenton + gard$WhiteMarsh + 
##     gard$JohnsHopkins, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.152  -0.919  -0.781   1.460   1.757  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.030      0.120   -8.61   <2e-16 ***
## gard$Odenton         0.387      0.141    2.75    0.006 ** 
## gard$WhiteMarsh      0.969      0.196    4.94    8e-07 ***
## gard$JohnsHopkins   -0.274      0.285   -0.96    0.336    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1764.9  on 1409  degrees of freedom
## AIC: 1773
## 
## Number of Fisher Scoring iterations: 4
plot(gard$Location, fitted(glm(gard$Completed ~ gard$Location, binomial)), main = "Completion Probability by Location", 
    xlab = "Location", ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-9

exp(-1.0296 + 0.3868 * 1 + 0.969 * 0 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 * 
    1 + 0.969 * 0 - 0.2744 * 0))  # For Odenton
## [1] 0.3446
exp(-1.0296 + 0.3868 * 0 + 0.969 * 1 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 * 
    0 + 0.969 * 1 - 0.2744 * 0))  # For White Marsh
## [1] 0.4849
exp(-1.0296 + 0.3868 * 0 + 0.969 * 0 - 0.2744 * 1)/(1 + exp(-1.0296 + 0.3868 * 
    0 + 0.969 * 0 - 0.2744 * 1))  # For Johns Hopkins
## [1] 0.2135
exp(-1.0296 + 0.3868 * 0 + 0.969 * 0 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 * 
    0 + 0.969 * 0 - 0.2744 * 0))  # For Bayview
## [1] 0.2632
exp(cbind(OR = coef(m1_location), confint(m1_location)))
## Waiting for profiling to be done...
##                       OR  2.5 % 97.5 %
## (Intercept)       0.3571 0.2812 0.4496
## gard$Odenton      1.4723 1.1200 1.9462
## gard$WhiteMarsh   2.6353 1.7951 3.8788
## gard$JohnsHopkins 0.7600 0.4251 1.3062

## Practice Type
m1_practice <- glm(gard$Completed ~ gard$obgyn + gard$family, family = binomial("logit"))
summary(m1_practice)  # AIC: 1793.6, Residual deviance: 1787.6
## 
## Call:
## glm(formula = gard$Completed ~ gard$obgyn + gard$family, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.973  -0.869  -0.828   1.397   1.573  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.7789     0.0949   -8.21  2.3e-16 ***
## gard$obgyn    0.2770     0.1304    2.13    0.034 *  
## gard$family  -0.1145     0.1493   -0.77    0.443    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1787.6  on 1410  degrees of freedom
## AIC: 1794
## 
## Number of Fisher Scoring iterations: 4
plot(gard$PracticeType, fitted(glm(gard$Completed ~ gard$PracticeType, binomial)), 
    main = "Completion Probability by Practice Type", xlab = "Practice Type", 
    ylab = "P(Completion)", pch = 15)

plot of chunk unnamed-chunk-9

exp(-0.7789 + 0.277 * 1 - 0.1145 * 0)/(1 + exp(-0.7789 + 0.277 * 1 - 0.1145 * 
    0))  # For OB-GYN
## [1] 0.3771
exp(-0.7789 + 0.277 * 0 - 0.1145 * 1)/(1 + exp(-0.7789 + 0.277 * 0 - 0.1145 * 
    1))  # For Family
## [1] 0.2904
exp(-0.7789 + 0.277 * 0 - 0.1145 * 0)/(1 + exp(-0.7789 + 0.277 * 0 - 0.1145 * 
    0))  # For Pediatric
## [1] 0.3146
exp(cbind(OR = coef(m1_practice), confint(m1_practice)))
## Waiting for profiling to be done...
##                 OR  2.5 % 97.5 %
## (Intercept) 0.4589 0.3802 0.5516
## gard$obgyn  1.3192 1.0222 1.7044
## gard$family 0.8918 0.6645 1.1938

The best model from the above options uses insurance type. Nest, check to see if the full model is really better than all possible reduced models.

Residuals of Groups Logistic Regression Models

## Age Group
plot(m1_agegrp$residuals ~ m1_agegrp$fitted.values, main = "Residuals for Age Group", 
    xlab = "Age Group", ylab = "Residuals")

plot of chunk unnamed-chunk-10

summary(m1_agegrp$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.54   -1.54   -1.45    0.00    2.84    3.21

## Location
plot(m1_location$residuals ~ m1_location$fitted.values, main = "Residuals for Location", 
    xlab = "Location", ylab = "Residuals")

plot of chunk unnamed-chunk-10

summary(m1_location$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.94   -1.53   -1.36    0.00    2.90    4.68

# Mean = 0.000 for all categorical x variables except age, which is the
# only continuous x variable.
summary(m1_agegrp$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.54   -1.54   -1.45    0.00    2.84    3.21
summary(m1_age$residuals)  # Mean = 0.000179
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.58   -1.51   -1.46    0.00    2.88    3.33
summary(m1_insurance$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.87   -1.54   -1.54    0.00    2.71    5.00
summary(m1_location$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.94   -1.53   -1.36    0.00    2.90    4.68
summary(m1_locationtype$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.58   -1.58   -1.34    0.00    2.71    3.95
summary(m1_medassist$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.57   -1.57   -1.57    0.00    2.75    5.00
summary(m1_practice$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.61   -1.46   -1.41    0.00    2.65    3.44
summary(m1_race$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.62   -1.62   -1.31    0.00    2.61    4.22

Based on the above findings, residuals are not a useful tool for evaluating these models.

Alternative Multiple Logistic Regression Models Use Any Significant Binary

# Anything Significant - The idea here is to include everything variable
# that had a significant coefficient.

# Subsetting Locations
Odenton <- gard[gard$Odenton == 1, ]
WhiteMarsh <- gard[gard$WhiteMarsh == 1, ]
JohnsHopkins <- gard[gard$JohnsHopkins == 1, ]
Bayview <- gard[gard$Bayview == 1, ]
summary(Odenton$LocationType)  # All Suburban
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0
summary(WhiteMarsh$LocationType)  # All Suburban
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0
summary(JohnsHopkins$LocationType)  # All Urban
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       1       1       1       1       1
summary(Bayview$LocationType)  # All Urban
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       1       1       1       1       1

# Don't include both urban and suburban because location type is a binary
# characteristic.  Since Odenton and WhiteMarsh are both suburban and
# Johns Hopkins and Bayview are both urban, including LocationType makes
# including both JohnsHopkins and Bayview redundant.  These have also been
# omitted.  WhiteMarsh is included because it is significant, whereas
# Odenton is not.

# Deviance Test Null Hypothesis: This model predicts the probability that
# an inidividual completed the Gardasil regimen no better than the overall
# (average) probability.  Wald's Test: For each independent variable,
# there is also the null hypothesis that the coefficient for that variable
# = 0.
m8_allsigs <- glm(Completed ~ white + black + LocationType + WhiteMarsh + obgyn + 
    family + assisted + hospital, data = gard, family = binomial("logit"))
summary(m8_allsigs)  # AIC = 1743, Residual deviance = 1725
## 
## Call:
## glm(formula = Completed ~ white + black + LocationType + WhiteMarsh + 
##     obgyn + family + assisted + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.384  -0.890  -0.738   1.269   1.983  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   -0.2301     0.1747   -1.32   0.1878   
## white          0.0173     0.1616    0.11   0.9146   
## black         -0.4919     0.1810   -2.72   0.0066 **
## LocationType  -0.4205     0.1770   -2.38   0.0175 * 
## WhiteMarsh     0.3301     0.2055    1.61   0.1083   
## obgyn         -0.1623     0.1551   -1.05   0.2955   
## family        -0.5449     0.1718   -3.17   0.0015 **
## assisted      -0.5096     0.2057   -2.48   0.0132 * 
## hospital       0.5188     0.2466    2.10   0.0354 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1725  on 1404  degrees of freedom
## AIC: 1743
## 
## Number of Fisher Scoring iterations: 4

# Does adding the next independent variable improve the model based on
# Deviance Test? (Null = No difference in deviance between reduced and
# full models.)
anova(m8_allsigs, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## white         1    17.62      1411       1778  2.7e-05 ***
## black         1    10.17      1410       1768  0.00142 ** 
## LocationType  1    12.34      1409       1756  0.00044 ***
## WhiteMarsh    1     8.20      1408       1748  0.00420 ** 
## obgyn         1     0.91      1407       1747  0.33915    
## family        1     7.89      1406       1739  0.00496 ** 
## assisted      1     9.49      1405       1729  0.00207 ** 
## hospital      1     4.38      1404       1725  0.03630 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# This shows me that some variables with non-significant coefficients are
# still useful.  obgyn is still insignificant, so investigate it for
# redundnacies:
obgyn <- gard[gard$obgyn == 1, ]
hist(obgyn$LocationType, main = "OBGYN Location Type Count", xlab = "Location Type", 
    ylab = "Frequency")  # Both urban and suburban represented, but mostly suburban.

plot of chunk unnamed-chunk-11

hist(obgyn$Location, main = "OBGYN Location Count", xlab = "Location", ylab = "Frequency")  # All locations present.

plot of chunk unnamed-chunk-11

hist(obgyn$InsuranceType, main = "OBGYN Insurance Type Count", xlab = "Insurance Type", 
    ylab = "Frequency")  # All insurance types, but mostly #1 (private).

plot of chunk unnamed-chunk-11

hist(obgyn$Race, main = "OBGYN Race Count", xlab = "Race", ylab = "Frequency")  # All races respresented, well mixed, but more white than other races.

plot of chunk unnamed-chunk-11


# No definite redundancies, but obgyn and suburban may be drawing from
# similar populations.  Will try eliminating it.
m7_allsigs <- glm(Completed ~ white + black + LocationType + WhiteMarsh + family + 
    assisted + hospital, data = gard, family = binomial("logit"))
summary(m7_allsigs)  # AIC = 1742.2; Residual deviance = 1726.1
## 
## Call:
## glm(formula = Completed ~ white + black + LocationType + WhiteMarsh + 
##     family + assisted + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.382  -0.892  -0.721   1.307   1.949  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  -0.30563    0.15921   -1.92   0.0549 . 
## white         0.00692    0.16130    0.04   0.9658   
## black        -0.49974    0.18089   -2.76   0.0057 **
## LocationType -0.41001    0.17665   -2.32   0.0203 * 
## WhiteMarsh    0.25247    0.19141    1.32   0.1872   
## family       -0.46318    0.15328   -3.02   0.0025 **
## assisted     -0.46878    0.20192   -2.32   0.0203 * 
## hospital      0.51436    0.24625    2.09   0.0367 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1726.1  on 1405  degrees of freedom
## AIC: 1742
## 
## Number of Fisher Scoring iterations: 4
# When reduced model has a lower AIC, eliminate the full model.
anova(m7_allsigs, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## white         1    17.62      1411       1778  2.7e-05 ***
## black         1    10.17      1410       1768  0.00142 ** 
## LocationType  1    12.34      1409       1756  0.00044 ***
## WhiteMarsh    1     8.20      1408       1748  0.00420 ** 
## family        1     8.64      1407       1739  0.00329 ** 
## assisted      1     8.62      1406       1730  0.00332 ** 
## hospital      1     4.32      1405       1726  0.03770 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables are significant in the anova, but white is essentially 0
# using Wald's test.

m6_allsigs <- glm(Completed ~ black + LocationType + WhiteMarsh + family + assisted + 
    hospital, data = gard, family = binomial("logit"))
summary(m6_allsigs)  # AIC = 1740.1; Residual deviance = 1726.1
## 
## Call:
## glm(formula = Completed ~ black + LocationType + WhiteMarsh + 
##     family + assisted + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.381  -0.894  -0.721   1.307   1.950  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.300      0.105   -2.87  0.00404 ** 
## black          -0.505      0.135   -3.74  0.00018 ***
## LocationType   -0.410      0.176   -2.32  0.02019 *  
## WhiteMarsh      0.254      0.189    1.34  0.18021    
## family         -0.464      0.153   -3.03  0.00241 ** 
## assisted       -0.469      0.202   -2.33  0.01993 *  
## hospital        0.514      0.246    2.09  0.03676 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1726.1  on 1406  degrees of freedom
## AIC: 1740
## 
## Number of Fisher Scoring iterations: 4
# When reduced model has a lower AIC, eliminate the full model.
anova(m6_allsigs, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## black         1    27.12      1411       1769  1.9e-07 ***
## LocationType  1    12.12      1410       1757   0.0005 ***
## WhiteMarsh    1     8.93      1409       1748   0.0028 ** 
## family        1     8.77      1408       1739   0.0031 ** 
## assisted      1     8.65      1407       1730   0.0033 ** 
## hospital      1     4.32      1406       1726   0.0377 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# WhiteMarsh has the only non-significant co-efficient left.

m5_allsigs <- glm(Completed ~ black + LocationType + family + assisted + hospital, 
    data = gard, family = binomial("logit"))
summary(m5_allsigs)  # AIC = 1739.9; Residual deviance = 1727.9
## 
## Call:
## glm(formula = Completed ~ black + LocationType + family + assisted + 
##     hospital, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.326  -0.892  -0.724   1.276   1.950  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.2289     0.0894   -2.56  0.01048 *  
## black         -0.5191     0.1345   -3.86  0.00011 ***
## LocationType  -0.4886     0.1660   -2.94  0.00324 ** 
## family        -0.5339     0.1433   -3.73  0.00019 ***
## assisted      -0.4585     0.2014   -2.28  0.02283 *  
## hospital       0.5714     0.2415    2.37  0.01799 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1727.9  on 1407  degrees of freedom
## AIC: 1740
## 
## Number of Fisher Scoring iterations: 4
# When reduced model has a lower AIC, eliminate the full model.
anova(m5_allsigs, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## black         1    27.12      1411       1769  1.9e-07 ***
## LocationType  1    12.12      1410       1757  0.00050 ***
## family        1    14.66      1409       1742  0.00013 ***
## assisted      1     8.69      1408       1733  0.00321 ** 
## hospital      1     5.53      1407       1728  0.01864 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# I cannot tell which would be better to remove; hopstial or assisted.
# Try each.

m4A_allsigs <- glm(Completed ~ black + LocationType + family + assisted, data = gard, 
    family = binomial("logit"))
summary(m4A_allsigs)  # AIC = 1743.4; Residual deviance = 1733.4
## 
## Call:
## glm(formula = Completed ~ black + LocationType + family + assisted, 
##     family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.094  -0.893  -0.737   1.263   1.989  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.1986     0.0884   -2.25  0.02460 *  
## black         -0.5139     0.1342   -3.83  0.00013 ***
## LocationType  -0.3970     0.1605   -2.47  0.01336 *  
## family        -0.5480     0.1430   -3.83  0.00013 ***
## assisted      -0.5685     0.1956   -2.91  0.00366 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1733.4  on 1408  degrees of freedom
## AIC: 1743
## 
## Number of Fisher Scoring iterations: 4
anova(m4A_allsigs, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## black         1    27.12      1411       1769  1.9e-07 ***
## LocationType  1    12.12      1410       1757  0.00050 ***
## family        1    14.66      1409       1742  0.00013 ***
## assisted      1     8.69      1408       1733  0.00321 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m4B_allsigs <- glm(Completed ~ black + LocationType + family + hospital, data = gard, 
    family = binomial("logit"))
summary(m4B_allsigs)  # AIC = 1743.2; Residual deviance = 1733.2
## 
## Call:
## glm(formula = Completed ~ black + LocationType + family + hospital, 
##     family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.382  -0.873  -0.813   1.282   1.846  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.2430     0.0892   -2.72  0.00647 ** 
## black         -0.5644     0.1330   -4.24  2.2e-05 ***
## LocationType  -0.6950     0.1412   -4.92  8.5e-07 ***
## family        -0.5253     0.1432   -3.67  0.00024 ***
## hospital       0.7127     0.2358    3.02  0.00250 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1733.2  on 1408  degrees of freedom
## AIC: 1743
## 
## Number of Fisher Scoring iterations: 4
anova(m4B_allsigs, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## black         1    27.12      1411       1769  1.9e-07 ***
## LocationType  1    12.12      1410       1757  0.00050 ***
## family        1    14.66      1409       1742  0.00013 ***
## hospital      1     8.94      1408       1733  0.00279 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Using Chi-squared test with better m4 model in comparison to m5 model.
anova(m5_allsigs, m4B_allsigs, test = "Chisq")  # Null rejected, keep the m5 model.
## Analysis of Deviance Table
## 
## Model 1: Completed ~ black + LocationType + family + assisted + hospital
## Model 2: Completed ~ black + LocationType + family + hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1407       1728                       
## 2      1408       1733 -1    -5.28    0.022 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Use Only Positive Significant Binaries

# I'm using urban instead of LocationType because urban has suburban = 1,
# and suburban is the group for location type characteristics that has an
# odds ratio over 1.

# Deviance Test Null Hypothesis: This model better predicts the
# probability that an inidividual completed the Gardasil regimen than the
# overall (average) probability.  Wald's Test: For each independent
# variable, there is also the null hypothesis that the coefficient for
# that variable = 0.
m5_possigs <- glm(Completed ~ white + urban + WhiteMarsh + obgyn + hospital, 
    data = gard, family = binomial("logit"))
summary(m5_possigs)  # AIC = 1759.9, Residual deviance = 1747.9
## 
## Call:
## glm(formula = Completed ~ white + urban + WhiteMarsh + obgyn + 
##     hospital, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.422  -0.973  -0.795   1.355   1.783  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.3608     0.1341  -10.15  < 2e-16 ***
## white         0.3697     0.1185    3.12  0.00181 ** 
## urban         0.4893     0.1357    3.61  0.00031 ***
## WhiteMarsh    0.3284     0.2022    1.62  0.10438    
## obgyn         0.0943     0.1353    0.70  0.48573    
## hospital      0.6375     0.2386    2.67  0.00755 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1747.9  on 1407  degrees of freedom
## AIC: 1760
## 
## Number of Fisher Scoring iterations: 4
anova(m5_possigs, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##            Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                        1412       1796             
## white       1    17.62      1411       1778  2.7e-05 ***
## urban       1    15.56      1410       1763  8.0e-05 ***
## WhiteMarsh  1     7.23      1409       1756   0.0072 ** 
## obgyn       1     0.65      1408       1755   0.4187    
## hospital    1     6.99      1407       1748   0.0082 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Again, obgyn is the only non-sigificant improvement.

m4_possigs <- glm(Completed ~ white + urban + WhiteMarsh + hospital, data = gard, 
    family = binomial("logit"))
summary(m4_possigs)  # AIC = 1758.4, Residual deviance = 1748.4
## 
## Call:
## glm(formula = Completed ~ white + urban + WhiteMarsh + hospital, 
##     family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.425  -0.984  -0.807   1.383   1.770  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.333      0.128  -10.44  < 2e-16 ***
## white          0.376      0.118    3.19  0.00143 ** 
## urban          0.483      0.135    3.57  0.00035 ***
## WhiteMarsh     0.394      0.179    2.20  0.02814 *  
## hospital       0.645      0.239    2.70  0.00684 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1748.4  on 1408  degrees of freedom
## AIC: 1758
## 
## Number of Fisher Scoring iterations: 4
# The AIC is lower for the reduced model, so eliminating m5 model.
anova(m4_possigs, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##            Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                        1412       1796             
## white       1    17.62      1411       1778  2.7e-05 ***
## urban       1    15.56      1410       1763  8.0e-05 ***
## WhiteMarsh  1     7.23      1409       1756   0.0072 ** 
## hospital    1     7.16      1408       1748   0.0075 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# WhiteMarsh is the weakest link in the anova despite not being the last
# variable added and has the only coefficient not significant at the 0.01
# level.

m3_possigs <- glm(Completed ~ white + urban + hospital, data = gard, family = binomial("logit"))
summary(m3_possigs)  # AIC = 1761.2, Residual deviance = 1753.2
## 
## Call:
## glm(formula = Completed ~ white + urban + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.328  -0.922  -0.811   1.345   1.786  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.367      0.127  -10.75  < 2e-16 ***
## white          0.423      0.116    3.65  0.00026 ***
## urban          0.559      0.131    4.27    2e-05 ***
## hospital       0.732      0.233    3.14  0.00171 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1753.2  on 1409  degrees of freedom
## AIC: 1761
## 
## Number of Fisher Scoring iterations: 4
anova(m3_possigs, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                      1412       1796             
## white     1     17.6      1411       1778  2.7e-05 ***
## urban     1     15.6      1410       1763  8.0e-05 ***
## hospital  1      9.6      1409       1753   0.0019 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The AIC is higher than m4, so checking difference with a Chi squared
# test.
anova(m4_possigs, m3_possigs, test = "Chisq")  # Null rejected; keep m4 model.
## Analysis of Deviance Table
## 
## Model 1: Completed ~ white + urban + WhiteMarsh + hospital
## Model 2: Completed ~ white + urban + hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1408       1748                       
## 2      1409       1753 -1    -4.78    0.029 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Use Only Negative Significant Binaries

# Deviance Test Null Hypothesis: This model better predicts the
# probability that an inidividual completed the Gardasil regimen than the
# overall (average) probability.  Wald's Test: For each independent
# variable, there is also the null hypothesis that the coefficient for
# that variable = 0.

# Bayview and Johns Hopkins are not included, because they would be
# redundant with urban.
m4_negsigs <- glm(Completed ~ black + LocationType + family + assisted, data = gard, 
    family = binomial("logit"))
summary(m4_negsigs)  # AIC = 1743.4, Residual deviance = 1733.4
## 
## Call:
## glm(formula = Completed ~ black + LocationType + family + assisted, 
##     family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.094  -0.893  -0.737   1.263   1.989  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.1986     0.0884   -2.25  0.02460 *  
## black         -0.5139     0.1342   -3.83  0.00013 ***
## LocationType  -0.3970     0.1605   -2.47  0.01336 *  
## family        -0.5480     0.1430   -3.83  0.00013 ***
## assisted      -0.5685     0.1956   -2.91  0.00366 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1733.4  on 1408  degrees of freedom
## AIC: 1743
## 
## Number of Fisher Scoring iterations: 4
anova(m4_negsigs, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## black         1    27.12      1411       1769  1.9e-07 ***
## LocationType  1    12.12      1410       1757  0.00050 ***
## family        1    14.66      1409       1742  0.00013 ***
## assisted      1     8.69      1408       1733  0.00321 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All values contribute to the explanatory power of the model, but
# LocationType is the only one not significant at the 0.01 level.

m3_negsigs <- glm(Completed ~ black + family + assisted, data = gard, family = binomial("logit"))
summary(m3_negsigs)  # AIC = 1747.6, Residual deviance = 1739.6
## 
## Call:
## glm(formula = Completed ~ black + family + assisted, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.054  -0.885  -0.757   1.306   2.097  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.2970     0.0794   -3.74  0.00018 ***
## black        -0.5394     0.1335   -4.04  5.4e-05 ***
## family       -0.4390     0.1366   -3.21  0.00131 ** 
## assisted     -0.8066     0.1702   -4.74  2.1e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1739.6  on 1409  degrees of freedom
## AIC: 1748
## 
## Number of Fisher Scoring iterations: 4
anova(m3_negsigs, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                      1412       1796             
## black     1    27.12      1411       1769  1.9e-07 ***
## family    1     4.89      1410       1764    0.027 *  
## assisted  1    24.38      1409       1740  7.9e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The AIC is higher than m4, so checking difference with a Chi squared
# test.
anova(m4_possigs, m3_possigs, test = "Chisq")  # Null rejected; keep m4 model.
## Analysis of Deviance Table
## 
## Model 1: Completed ~ white + urban + WhiteMarsh + hospital
## Model 2: Completed ~ white + urban + hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1408       1748                       
## 2      1409       1753 -1    -4.78    0.029 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Use One Binary per Catergory

# Deviance Test Null Hypothesis: This model better predicts the
# probability that an inidividual completed the Gardasil regimen than the
# overall (average) probability.  Wald's Test: For each independent
# variable, there is also the null hypothesis that the coefficient for
# that variable = 0.

# For Race, black has the greatest difference from the mean probability.
# For LocationType and AgeGroup, there is already only 1 binary.  For
# Location, Bayview and Johns Hopkins together are redundant with urban,
# so use White Marsh.  For PracticeType, obgyn has not been significant in
# the past, so family is prefered.  For InsuranceType, assisted and
# hospital are both good candidates.  Will try both.
m6_percatA <- glm(Completed ~ black + LocationType + AgeGroup + WhiteMarsh + 
    family + assisted, data = gard, family = binomial("logit"))
summary(m6_percatA)  # AIC = 1733.8, Residual deviance = 1719.8
## 
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + WhiteMarsh + 
##     family + assisted, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.323  -0.954  -0.758   1.227   2.088  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.117      0.118   -0.99  0.32181    
## black          -0.485      0.135   -3.59  0.00033 ***
## LocationType   -0.304      0.169   -1.80  0.07250 .  
## AgeGroup       -0.403      0.124   -3.24  0.00118 ** 
## WhiteMarsh      0.452      0.191    2.36  0.01805 *  
## family         -0.374      0.155   -2.41  0.01604 *  
## assisted       -0.681      0.199   -3.42  0.00063 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1719.8  on 1406  degrees of freedom
## AIC: 1734
## 
## Number of Fisher Scoring iterations: 4
anova(m6_percatA, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## black         1    27.12      1411       1769  1.9e-07 ***
## LocationType  1    12.12      1410       1757  0.00050 ***
## AgeGroup      1     7.14      1409       1750  0.00755 ** 
## WhiteMarsh    1    11.70      1408       1738  0.00062 ***
## family        1     6.06      1407       1732  0.01383 *  
## assisted      1    12.06      1406       1720  0.00052 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.

m6_percatB <- glm(Completed ~ black + LocationType + AgeGroup + WhiteMarsh + 
    family + hospital, data = gard, family = binomial("logit"))
summary(m6_percatB)  # AIC = 1738.1, Residual deviance = 1724.1
## 
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + WhiteMarsh + 
##     family + hospital, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.558  -0.892  -0.737   1.248   1.940  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.164      0.117   -1.40   0.1604    
## black          -0.551      0.134   -4.12  3.7e-05 ***
## LocationType   -0.666      0.153   -4.36  1.3e-05 ***
## AgeGroup       -0.335      0.122   -2.74   0.0062 ** 
## WhiteMarsh      0.341      0.194    1.76   0.0785 .  
## family         -0.390      0.155   -2.51   0.0121 *  
## hospital        0.683      0.242    2.82   0.0048 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1724.1  on 1406  degrees of freedom
## AIC: 1738
## 
## Number of Fisher Scoring iterations: 4
anova(m6_percatB, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## black         1    27.12      1411       1769  1.9e-07 ***
## LocationType  1    12.12      1410       1757  0.00050 ***
## AgeGroup      1     7.14      1409       1750  0.00755 ** 
## WhiteMarsh    1    11.70      1408       1738  0.00062 ***
## family        1     6.06      1407       1732  0.01383 *  
## hospital      1     7.79      1406       1724  0.00524 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed, but m6_1percatA has a lower AIC.  So try
# using both.

m7_percat <- glm(Completed ~ black + LocationType + AgeGroup + WhiteMarsh + 
    family + hospital + assisted, data = gard, family = binomial("logit"))
summary(m7_percat)  # AIC = 1731.8, Residual deviance = 1715.8
## 
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + WhiteMarsh + 
##     family + hospital + assisted, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.512  -0.963  -0.754   1.231   2.063  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.125      0.118   -1.06  0.28828    
## black          -0.493      0.135   -3.64  0.00027 ***
## LocationType   -0.402      0.177   -2.27  0.02305 *  
## AgeGroup       -0.398      0.124   -3.20  0.00136 ** 
## WhiteMarsh      0.383      0.195    1.97  0.04882 *  
## family         -0.382      0.155   -2.46  0.01406 *  
## hospital        0.500      0.248    2.02  0.04325 *  
## assisted       -0.584      0.205   -2.85  0.00437 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1715.8  on 1405  degrees of freedom
## AIC: 1732
## 
## Number of Fisher Scoring iterations: 4
anova(m7_percat, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## black         1    27.12      1411       1769  1.9e-07 ***
## LocationType  1    12.12      1410       1757  0.00050 ***
## AgeGroup      1     7.14      1409       1750  0.00755 ** 
## WhiteMarsh    1    11.70      1408       1738  0.00062 ***
## family        1     6.06      1407       1732  0.01383 *  
## hospital      1     7.79      1406       1724  0.00524 ** 
## assisted      1     8.31      1405       1716  0.00394 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.  And it isn't clear which is the weak link.
# I'm also that 7 variables is necessary, so I'll just try all m6
# possibilities.

m6_percatC <- glm(Completed ~ black + LocationType + AgeGroup + WhiteMarsh + 
    hospital + assisted, data = gard, family = binomial("logit"))
summary(m6_percatC)  # AIC = 1735.8, Residual deviance = 1721.8
## 
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + WhiteMarsh + 
##     hospital + assisted, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.523  -0.892  -0.724   1.292   2.079  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.266      0.104   -2.56  0.01054 *  
## black          -0.491      0.135   -3.64  0.00027 ***
## LocationType   -0.239      0.165   -1.45  0.14647    
## AgeGroup       -0.449      0.123   -3.67  0.00025 ***
## WhiteMarsh      0.565      0.181    3.12  0.00180 ** 
## hospital        0.485      0.247    1.96  0.04985 *  
## assisted       -0.594      0.205   -2.89  0.00379 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1721.8  on 1406  degrees of freedom
## AIC: 1736
## 
## Number of Fisher Scoring iterations: 4
anova(m6_percatC, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## black         1    27.12      1411       1769  1.9e-07 ***
## LocationType  1    12.12      1410       1757  0.00050 ***
## AgeGroup      1     7.14      1409       1750  0.00755 ** 
## WhiteMarsh    1    11.70      1408       1738  0.00062 ***
## hospital      1     7.50      1407       1730  0.00616 ** 
## assisted      1     8.58      1406       1722  0.00340 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.

m6_percatD <- glm(Completed ~ black + LocationType + AgeGroup + family + hospital + 
    assisted, data = gard, family = binomial("logit"))
summary(m6_percatD)  # AIC = 1733.6, Residual deviance = 1719.6
## 
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + family + 
##     hospital + assisted, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.414  -0.951  -0.753   1.196   2.053  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.0442     0.1101   -0.40  0.68802    
## black         -0.5153     0.1348   -3.82  0.00013 ***
## LocationType  -0.5155     0.1666   -3.09  0.00197 ** 
## AgeGroup      -0.3473     0.1212   -2.87  0.00416 ** 
## family        -0.4925     0.1443   -3.41  0.00064 ***
## hospital       0.5848     0.2423    2.41  0.01579 *  
## assisted      -0.5547     0.2044   -2.71  0.00666 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1719.6  on 1406  degrees of freedom
## AIC: 1734
## 
## Number of Fisher Scoring iterations: 4
anova(m6_percatD, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## black         1    27.12      1411       1769  1.9e-07 ***
## LocationType  1    12.12      1410       1757  0.00050 ***
## AgeGroup      1     7.14      1409       1750  0.00755 ** 
## family        1    12.69      1408       1737  0.00037 ***
## hospital      1     9.78      1407       1727  0.00177 ** 
## assisted      1     7.52      1406       1720  0.00609 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.

m6_percatE <- glm(Completed ~ black + LocationType + WhiteMarsh + family + hospital + 
    assisted, data = gard, family = binomial("logit"))
summary(m6_percatE)  # AIC = 1740.1, Residual deviance = 1726.1
## 
## Call:
## glm(formula = Completed ~ black + LocationType + WhiteMarsh + 
##     family + hospital + assisted, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.381  -0.894  -0.721   1.307   1.950  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.300      0.105   -2.87  0.00404 ** 
## black          -0.505      0.135   -3.74  0.00018 ***
## LocationType   -0.410      0.176   -2.32  0.02019 *  
## WhiteMarsh      0.254      0.189    1.34  0.18021    
## family         -0.464      0.153   -3.03  0.00241 ** 
## hospital        0.514      0.246    2.09  0.03676 *  
## assisted       -0.469      0.202   -2.33  0.01993 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1726.1  on 1406  degrees of freedom
## AIC: 1740
## 
## Number of Fisher Scoring iterations: 4
anova(m6_percatE, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## black         1    27.12      1411       1769  1.9e-07 ***
## LocationType  1    12.12      1410       1757   0.0005 ***
## WhiteMarsh    1     8.93      1409       1748   0.0028 ** 
## family        1     8.77      1408       1739   0.0031 ** 
## hospital      1     7.45      1407       1732   0.0064 ** 
## assisted      1     5.52      1406       1726   0.0188 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.

m6_percatF <- glm(Completed ~ black + AgeGroup + WhiteMarsh + family + hospital + 
    assisted, data = gard, family = binomial("logit"))
summary(m6_percatF)  # AIC = 1735.0, Residual deviance = 1721.0
## 
## Call:
## glm(formula = Completed ~ black + AgeGroup + WhiteMarsh + family + 
##     hospital + assisted, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.455  -0.919  -0.742   1.282   2.153  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.243      0.107   -2.28  0.02257 *  
## black         -0.504      0.135   -3.74  0.00019 ***
## AgeGroup      -0.402      0.124   -3.24  0.00120 ** 
## WhiteMarsh     0.534      0.184    2.91  0.00364 ** 
## family        -0.250      0.145   -1.72  0.08505 .  
## hospital       0.342      0.236    1.45  0.14742    
## assisted      -0.814      0.178   -4.57  4.8e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1721  on 1406  degrees of freedom
## AIC: 1735
## 
## Number of Fisher Scoring iterations: 4
anova(m6_percatF, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##            Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                        1412       1796             
## black       1    27.12      1411       1769  1.9e-07 ***
## AgeGroup    1     3.34      1410       1766    0.067 .  
## WhiteMarsh  1    17.18      1409       1748  3.4e-05 ***
## family      1     0.67      1408       1748    0.414    
## hospital    1     4.28      1407       1743    0.038 *  
## assisted    1    22.44      1406       1721  2.2e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.

m6_percatG <- glm(Completed ~ LocationType + AgeGroup + WhiteMarsh + family + 
    hospital + assisted, data = gard, family = binomial("logit"))
summary(m6_percatG)  # AIC = 1743.4, Residual deviance = 1729.4
## 
## Call:
## glm(formula = Completed ~ LocationType + AgeGroup + WhiteMarsh + 
##     family + hospital + assisted, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.474  -0.916  -0.766   1.282   1.962  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.243      0.113   -2.15  0.03191 *  
## LocationType   -0.424      0.175   -2.43  0.01518 *  
## AgeGroup       -0.410      0.124   -3.32  0.00090 ***
## WhiteMarsh      0.445      0.193    2.30  0.02131 *  
## family         -0.379      0.155   -2.45  0.01424 *  
## hospital        0.471      0.245    1.92  0.05431 .  
## assisted       -0.692      0.201   -3.45  0.00056 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1729.4  on 1406  degrees of freedom
## AIC: 1743
## 
## Number of Fisher Scoring iterations: 4
anova(m6_percatG, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## LocationType  1    18.89      1411       1777  1.4e-05 ***
## AgeGroup      1     7.01      1410       1770  0.00811 ** 
## WhiteMarsh    1    14.38      1409       1756  0.00015 ***
## family        1     6.01      1408       1750  0.01420 *  
## hospital      1     8.03      1407       1742  0.00460 ** 
## assisted      1    12.27      1406       1729  0.00046 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.

anova(m7_percat, m6_percatA, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family + 
##     hospital + assisted
## Model 2: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family + 
##     assisted
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1405       1716                       
## 2      1406       1720 -1    -4.05    0.044 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatB, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family + 
##     hospital + assisted
## Model 2: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family + 
##     hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)   
## 1      1405       1716                        
## 2      1406       1724 -1    -8.31   0.0039 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatC, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family + 
##     hospital + assisted
## Model 2: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + hospital + 
##     assisted
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1405       1716                       
## 2      1406       1722 -1    -6.08    0.014 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatD, test = "Chisq")  # Null rejected (barely)
## Analysis of Deviance Table
## 
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family + 
##     hospital + assisted
## Model 2: Completed ~ black + LocationType + AgeGroup + family + hospital + 
##     assisted
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1405       1716                       
## 2      1406       1720 -1    -3.88    0.049 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatE, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family + 
##     hospital + assisted
## Model 2: Completed ~ black + LocationType + WhiteMarsh + family + hospital + 
##     assisted
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)   
## 1      1405       1716                        
## 2      1406       1726 -1    -10.3   0.0013 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatF, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family + 
##     hospital + assisted
## Model 2: Completed ~ black + AgeGroup + WhiteMarsh + family + hospital + 
##     assisted
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1405       1716                       
## 2      1406       1721 -1    -5.21    0.022 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatG, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family + 
##     hospital + assisted
## Model 2: Completed ~ LocationType + AgeGroup + WhiteMarsh + family + hospital + 
##     assisted
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)    
## 1      1405       1716                         
## 2      1406       1729 -1    -13.7  0.00022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# m7 is significantly better than all m6 models, so it's worth keeping.

Everything (but with References)

m12_everything <- glm(Completed ~ AgeGroup + LocationType + white + otherrace + 
    hispanic + WhiteMarsh + Bayview + obgyn + pediatric + private + hospital + 
    military, data = gard, family = binomial("logit"))
summary(m12_everything)  # AIC = 1739.9, Residual deviance = 1713.9
## 
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + white + otherrace + 
##     hispanic + WhiteMarsh + Bayview + obgyn + pediatric + private + 
##     hospital + military, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.548  -0.945  -0.748   1.258   2.114  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.490      0.261   -5.70  1.2e-08 ***
## AgeGroup       -0.513      0.159   -3.24  0.00121 ** 
## LocationType   -0.498      0.312   -1.60  0.10983    
## white           0.477      0.144    3.31  0.00094 ***
## otherrace       0.449      0.200    2.25  0.02430 *  
## hispanic        0.522      0.325    1.61  0.10755    
## WhiteMarsh      0.294      0.215    1.37  0.17036    
## Bayview         0.158      0.326    0.49  0.62687    
## obgyn           0.502      0.181    2.78  0.00542 ** 
## pediatric       0.221      0.198    1.12  0.26431    
## private         0.557      0.209    2.67  0.00761 ** 
## hospital        1.056      0.283    3.73  0.00019 ***
## military        0.606      0.246    2.47  0.01365 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1713.9  on 1400  degrees of freedom
## AIC: 1740
## 
## Number of Fisher Scoring iterations: 4
anova(m12_everything, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## AgeGroup      1     2.62      1411       1793   0.1055    
## LocationType  1    23.27      1410       1770  1.4e-06 ***
## white         1    15.22      1409       1755  9.6e-05 ***
## otherrace     1     4.01      1408       1751   0.0451 *  
## hispanic      1     2.52      1407       1748   0.1126    
## WhiteMarsh    1    10.71      1406       1738   0.0011 ** 
## Bayview       1     0.20      1405       1737   0.6586    
## obgyn         1     7.77      1404       1730   0.0053 ** 
## pediatric     1     0.78      1403       1729   0.3774    
## private       1     0.12      1402       1729   0.7330    
## hospital      1     8.73      1401       1720   0.0031 ** 
## military      1     6.17      1400       1714   0.0130 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Hispanic isn't significant for the summary or the anova, and it's
# placement is fairly early in the anova.

m11_everything <- glm(Completed ~ AgeGroup + LocationType + white + otherrace + 
    WhiteMarsh + Bayview + obgyn + pediatric + private + hospital + military, 
    data = gard, family = binomial("logit"))
summary(m11_everything)  # AIC = 1740.4, Residual deviance = 1716.4
## 
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + white + otherrace + 
##     WhiteMarsh + Bayview + obgyn + pediatric + private + hospital + 
##     military, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.545  -0.950  -0.745   1.260   2.080  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.439      0.258   -5.57  2.5e-08 ***
## AgeGroup       -0.508      0.158   -3.21  0.00132 ** 
## LocationType   -0.497      0.311   -1.60  0.10977    
## white           0.416      0.138    3.01  0.00257 ** 
## otherrace       0.394      0.196    2.01  0.04421 *  
## WhiteMarsh      0.318      0.214    1.49  0.13730    
## Bayview         0.186      0.324    0.57  0.56664    
## obgyn           0.495      0.180    2.74  0.00607 ** 
## pediatric       0.217      0.197    1.10  0.27201    
## private         0.556      0.208    2.68  0.00747 ** 
## hospital        1.043      0.282    3.69  0.00022 ***
## military        0.614      0.245    2.51  0.01216 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1716.4  on 1401  degrees of freedom
## AIC: 1740
## 
## Number of Fisher Scoring iterations: 4
# The m12 model has a lower AIC, so checking difference with a Chi-squared
# test.
anova(m12_everything, m11_everything, test = "Chisq")  # Null not rejected; eliminate m12 model.
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + LocationType + white + otherrace + hispanic + 
##     WhiteMarsh + Bayview + obgyn + pediatric + private + hospital + 
##     military
## Model 2: Completed ~ AgeGroup + LocationType + white + otherrace + WhiteMarsh + 
##     Bayview + obgyn + pediatric + private + hospital + military
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1      1400       1714                     
## 2      1401       1716 -1    -2.47     0.12
anova(m11_everything, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## AgeGroup      1     2.62      1411       1793  0.10547    
## LocationType  1    23.27      1410       1770  1.4e-06 ***
## white         1    15.22      1409       1755  9.6e-05 ***
## otherrace     1     4.01      1408       1751  0.04513 *  
## WhiteMarsh    1    11.23      1407       1740  0.00081 ***
## Bayview       1     0.11      1406       1740  0.73713    
## obgyn         1     7.55      1405       1732  0.00600 ** 
## pediatric     1     0.75      1404       1731  0.38579    
## private       1     0.10      1403       1731  0.74727    
## hospital      1     8.38      1402       1723  0.00379 ** 
## military      1     6.38      1401       1716  0.01157 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Bayview is the first variable that isn't significant for the summary or
# the anova.  Removing...

m10_everything <- glm(Completed ~ AgeGroup + LocationType + white + otherrace + 
    WhiteMarsh + obgyn + pediatric + private + hospital + military, data = gard, 
    family = binomial("logit"))
summary(m10_everything)  # AIC = 1738.7, Residual deviance = 1716.7
## 
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + white + otherrace + 
##     WhiteMarsh + obgyn + pediatric + private + hospital + military, 
##     family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.546  -0.944  -0.740   1.251   2.091  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.436      0.258   -5.57  2.5e-08 ***
## AgeGroup       -0.511      0.158   -3.23  0.00125 ** 
## LocationType   -0.355      0.185   -1.92  0.05438 .  
## white           0.427      0.137    3.12  0.00180 ** 
## otherrace       0.401      0.195    2.05  0.04038 *  
## WhiteMarsh      0.335      0.212    1.58  0.11341    
## obgyn           0.473      0.177    2.68  0.00738 ** 
## pediatric       0.236      0.194    1.21  0.22473    
## private         0.552      0.207    2.66  0.00784 ** 
## hospital        1.036      0.282    3.68  0.00024 ***
## military        0.602      0.244    2.47  0.01352 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1716.7  on 1402  degrees of freedom
## AIC: 1739
## 
## Number of Fisher Scoring iterations: 4
# m10 model has lower AIC, so eliminate m11 model.
anova(m10_everything, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## AgeGroup      1     2.62      1411       1793  0.10547    
## LocationType  1    23.27      1410       1770  1.4e-06 ***
## white         1    15.22      1409       1755  9.6e-05 ***
## otherrace     1     4.01      1408       1751  0.04513 *  
## WhiteMarsh    1    11.23      1407       1740  0.00081 ***
## obgyn         1     7.36      1406       1732  0.00665 ** 
## pediatric     1     0.91      1405       1731  0.34135    
## private       1     0.12      1404       1731  0.73302    
## hospital      1     8.38      1403       1723  0.00380 ** 
## military      1     6.18      1402       1717  0.01291 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# pediatric is the first variable that isn't significant for the summary
# or the anova.  Removing...

m9_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
    obgyn + private + hospital + military + LocationType, data = gard, family = binomial("logit"))
summary(m9_everything)  # AIC = 1738.2, Residual deviance = 1718.2
## 
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
##     obgyn + private + hospital + military + LocationType, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.572  -0.935  -0.730   1.292   2.133  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.282      0.224   -5.73  1.0e-08 ***
## AgeGroup       -0.600      0.140   -4.29  1.8e-05 ***
## white           0.426      0.137    3.11  0.00184 ** 
## otherrace       0.391      0.195    2.00  0.04518 *  
## WhiteMarsh      0.356      0.212    1.68  0.09297 .  
## obgyn           0.381      0.159    2.40  0.01649 *  
## private         0.540      0.207    2.60  0.00923 ** 
## hospital        1.013      0.281    3.60  0.00031 ***
## military        0.591      0.244    2.42  0.01536 *  
## LocationType   -0.284      0.175   -1.62  0.10536    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1718.2  on 1403  degrees of freedom
## AIC: 1738
## 
## Number of Fisher Scoring iterations: 4
# m9 model has lower AIC, so eliminate m10 model.
anova(m9_everything, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                          1412       1796             
## AgeGroup      1     2.62      1411       1793   0.1055    
## white         1    18.51      1410       1775  1.7e-05 ***
## otherrace     1     8.61      1409       1766   0.0033 ** 
## WhiteMarsh    1    17.12      1408       1749  3.5e-05 ***
## obgyn         1     5.02      1407       1744   0.0250 *  
## private       1     0.66      1406       1743   0.4161    
## hospital      1     5.44      1405       1738   0.0196 *  
## military      1    17.19      1404       1721  3.4e-05 ***
## LocationType  1     2.64      1403       1718   0.1043    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# LocationType does not have a significant coefficient, and it only
# contributes if placed at the beginning of the model. Removing...

m8_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
    obgyn + private + hospital + military, data = gard, family = binomial("logit"))
summary(m8_everything)  # AIC = 1738.8, Residual deviance = 1720.8
## 
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
##     obgyn + private + hospital + military, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.523  -0.925  -0.757   1.301   2.104  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.530      0.165   -9.26  < 2e-16 ***
## AgeGroup      -0.568      0.138   -4.10  4.1e-05 ***
## white          0.427      0.137    3.13  0.00176 ** 
## otherrace      0.428      0.194    2.21  0.02694 *  
## WhiteMarsh     0.473      0.199    2.37  0.01767 *  
## obgyn          0.349      0.158    2.21  0.02708 *  
## private        0.695      0.183    3.79  0.00015 ***
## hospital       1.065      0.278    3.83  0.00013 ***
## military       0.817      0.201    4.07  4.7e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1720.8  on 1404  degrees of freedom
## AIC: 1739
## 
## Number of Fisher Scoring iterations: 4
# m9 model has a higher AIC value, so checking difference with a
# Chi-squared test.
anova(m9_everything, m8_everything, test = "Chisq")  # Null not rejected; eliminate m9 model.
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     private + hospital + military + LocationType
## Model 2: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     private + hospital + military
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1      1403       1718                     
## 2      1404       1721 -1    -2.64      0.1
anova(m8_everything, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##            Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                        1412       1796             
## AgeGroup    1     2.62      1411       1793   0.1055    
## white       1    18.51      1410       1775  1.7e-05 ***
## otherrace   1     8.61      1409       1766   0.0033 ** 
## WhiteMarsh  1    17.12      1408       1749  3.5e-05 ***
## obgyn       1     5.02      1407       1744   0.0250 *  
## private     1     0.66      1406       1743   0.4161    
## hospital    1     5.44      1405       1738   0.0196 *  
## military    1    17.19      1404       1721  3.4e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# There is no clear weakest link.  Attempting all variables.

m7A_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
    obgyn + military + hospital, data = gard, family = binomial("logit"))
summary(m7A_everything, test = "LRT")  # AIC = 1752, Residual deviance = 1736
## 
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
##     obgyn + military + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.538  -0.923  -0.742   1.307   1.891  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.148      0.123   -9.35  < 2e-16 ***
## AgeGroup      -0.456      0.136   -3.36  0.00077 ***
## white          0.514      0.134    3.83  0.00013 ***
## otherrace      0.549      0.191    2.87  0.00409 ** 
## WhiteMarsh     0.553      0.198    2.79  0.00521 ** 
## obgyn          0.380      0.157    2.41  0.01581 *  
## military       0.301      0.143    2.11  0.03525 *  
## hospital       0.517      0.236    2.19  0.02861 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1736  on 1405  degrees of freedom
## AIC: 1752
## 
## Number of Fisher Scoring iterations: 4
m7B_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
    obgyn + military + private, data = gard, family = binomial("logit"))
summary(m7B_everything, test = "LRT")  # AIC = 1751.3, Residual deviance = 1735.3
## 
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
##     obgyn + military + private, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.389  -0.935  -0.767   1.311   1.981  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.284      0.144   -8.91  < 2e-16 ***
## AgeGroup      -0.528      0.138   -3.82  0.00013 ***
## white          0.472      0.136    3.48  0.00050 ***
## otherrace      0.493      0.192    2.56  0.01039 *  
## WhiteMarsh     0.559      0.198    2.83  0.00465 ** 
## obgyn          0.385      0.157    2.45  0.01414 *  
## military       0.503      0.177    2.85  0.00443 ** 
## private        0.352      0.153    2.30  0.02153 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1735.3  on 1405  degrees of freedom
## AIC: 1751
## 
## Number of Fisher Scoring iterations: 4
m7C_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
    obgyn + private + hospital, data = gard, family = binomial("logit"))
summary(m7C_everything, test = "LRT")  # AIC = 1754.0, Residual deviance = 1738.0
## 
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
##     obgyn + private + hospital, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.504  -0.934  -0.743   1.353   1.907  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.164      0.129   -9.01  < 2e-16 ***
## AgeGroup      -0.476      0.137   -3.48  0.00050 ***
## white          0.561      0.132    4.25  2.2e-05 ***
## otherrace      0.624      0.187    3.33  0.00086 ***
## WhiteMarsh     0.465      0.199    2.34  0.01943 *  
## obgyn          0.295      0.157    1.88  0.05992 .  
## private        0.199      0.130    1.53  0.12607    
## hospital       0.585      0.249    2.35  0.01864 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1738  on 1405  degrees of freedom
## AIC: 1754
## 
## Number of Fisher Scoring iterations: 4
m7D_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
    private + military + hospital, data = gard, family = binomial("logit"))
summary(m7D_everything, test = "LRT")  # AIC = 1741.7 Residual deviance = 1725.7
## 
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh + 
##     private + military + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.487  -0.891  -0.720   1.294   2.033  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.499      0.165   -9.10  < 2e-16 ***
## AgeGroup      -0.433      0.123   -3.52  0.00043 ***
## white          0.448      0.136    3.29  0.00099 ***
## otherrace      0.414      0.193    2.15  0.03175 *  
## WhiteMarsh     0.649      0.183    3.54  0.00040 ***
## private        0.715      0.183    3.90  9.5e-05 ***
## military       0.781      0.200    3.91  9.2e-05 ***
## hospital       1.105      0.278    3.97  7.1e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1725.7  on 1405  degrees of freedom
## AIC: 1742
## 
## Number of Fisher Scoring iterations: 4
m7E_everything <- glm(Completed ~ AgeGroup + white + otherrace + private + obgyn + 
    military + hospital, data = gard, family = binomial("logit"))
summary(m7E_everything, test = "LRT")  # AIC = 1742.4, Residual deviance = 1726.4
## 
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + private + 
##     obgyn + military + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.410  -0.991  -0.759   1.320   2.126  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.560      0.165   -9.45  < 2e-16 ***
## AgeGroup      -0.589      0.138   -4.27  2.0e-05 ***
## white          0.464      0.135    3.43  0.00061 ***
## otherrace      0.419      0.194    2.16  0.03045 *  
## private        0.736      0.182    4.04  5.3e-05 ***
## obgyn          0.492      0.145    3.40  0.00067 ***
## military       0.813      0.201    4.05  5.1e-05 ***
## hospital       1.136      0.275    4.12  3.7e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1726.4  on 1405  degrees of freedom
## AIC: 1742
## 
## Number of Fisher Scoring iterations: 4
m7F_everything <- glm(Completed ~ AgeGroup + white + private + WhiteMarsh + 
    obgyn + military + hospital, data = gard, family = binomial("logit"))
summary(m7F_everything, test = "LRT")  # AIC = 1741.7, Residual deviance = 1725.7
## 
## Call:
## glm(formula = Completed ~ AgeGroup + white + private + WhiteMarsh + 
##     obgyn + military + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.511  -0.921  -0.738   1.282   2.071  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.453      0.160   -9.06  < 2e-16 ***
## AgeGroup      -0.568      0.138   -4.11  4.0e-05 ***
## white          0.292      0.121    2.42    0.016 *  
## private        0.754      0.181    4.16  3.2e-05 ***
## WhiteMarsh     0.463      0.199    2.33    0.020 *  
## obgyn          0.338      0.157    2.15    0.032 *  
## military       0.919      0.195    4.71  2.5e-06 ***
## hospital       1.117      0.277    4.04  5.4e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1725.7  on 1405  degrees of freedom
## AIC: 1742
## 
## Number of Fisher Scoring iterations: 4
m7G_everything <- glm(Completed ~ AgeGroup + private + otherrace + WhiteMarsh + 
    obgyn + military + hospital, data = gard, family = binomial("logit"))
summary(m7G_everything, test = "LRT")  # AIC = 1746.7, Residual deviance = 1730.7
## 
## Call:
## glm(formula = Completed ~ AgeGroup + private + otherrace + WhiteMarsh + 
##     obgyn + military + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.488  -0.943  -0.737   1.284   2.041  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.365      0.155   -8.82  < 2e-16 ***
## AgeGroup      -0.585      0.138   -4.24  2.2e-05 ***
## private        0.784      0.180    4.35  1.4e-05 ***
## otherrace      0.153      0.172    0.89   0.3729    
## WhiteMarsh     0.546      0.197    2.77   0.0056 ** 
## obgyn          0.384      0.157    2.45   0.0144 *  
## military       0.966      0.195    4.95  7.3e-07 ***
## hospital       1.141      0.276    4.13  3.6e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1730.7  on 1405  degrees of freedom
## AIC: 1747
## 
## Number of Fisher Scoring iterations: 4
m7H_everything <- glm(Completed ~ private + white + otherrace + WhiteMarsh + 
    obgyn + military + hospital, data = gard, family = binomial("logit"))
summary(m7H_everything, test = "LRT")  # AIC = 1754.0, Residual deviance = 1738.0
## 
## Call:
## glm(formula = Completed ~ private + white + otherrace + WhiteMarsh + 
##     obgyn + military + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.353  -0.947  -0.771   1.382   1.888  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.598      0.165   -9.71  < 2e-16 ***
## private        0.537      0.178    3.01  0.00263 ** 
## white          0.449      0.136    3.30  0.00095 ***
## otherrace      0.426      0.192    2.22  0.02653 *  
## WhiteMarsh     0.525      0.198    2.66  0.00793 ** 
## obgyn          0.060      0.139    0.43  0.66622    
## military       0.681      0.197    3.46  0.00055 ***
## hospital       0.968      0.276    3.51  0.00045 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1738  on 1405  degrees of freedom
## AIC: 1754
## 
## Number of Fisher Scoring iterations: 4

# The m8 model is better than all m7 models.  Checking difference with
# Chi-squared tests.
anova(m8_everything, m7A_everything, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     private + hospital + military
## Model 2: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     military + hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)    
## 1      1404       1721                         
## 2      1405       1736 -1    -15.1    1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7B_everything, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     private + hospital + military
## Model 2: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     military + private
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)    
## 1      1404       1721                         
## 2      1405       1735 -1    -14.5  0.00014 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7C_everything, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     private + hospital + military
## Model 2: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     private + hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)    
## 1      1404       1721                         
## 2      1405       1738 -1    -17.2  3.4e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7D_everything, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     private + hospital + military
## Model 2: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + private + 
##     military + hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1404       1721                       
## 2      1405       1726 -1    -4.88    0.027 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7E_everything, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     private + hospital + military
## Model 2: Completed ~ AgeGroup + white + otherrace + private + obgyn + 
##     military + hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1404       1721                       
## 2      1405       1726 -1    -5.62    0.018 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7F_everything, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     private + hospital + military
## Model 2: Completed ~ AgeGroup + white + private + WhiteMarsh + obgyn + 
##     military + hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1404       1721                       
## 2      1405       1726 -1    -4.83    0.028 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7G_everything, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     private + hospital + military
## Model 2: Completed ~ AgeGroup + private + otherrace + WhiteMarsh + obgyn + 
##     military + hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)   
## 1      1404       1721                        
## 2      1405       1731 -1    -9.91   0.0016 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7H_everything, test = "Chisq")  # Null rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn + 
##     private + hospital + military
## Model 2: Completed ~ private + white + otherrace + WhiteMarsh + obgyn + 
##     military + hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)    
## 1      1404       1721                         
## 2      1405       1738 -1    -17.2  3.3e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# The null hypothesis was rejected in all cases; keep m8 model.

Regsubsets?

m_reg <- regsubsets(Completed ~ ., data = gard[, c(2, 5, 9, 11:25)], nbest = 1, 
    nvmax = 10)
## Error: could not find function "regsubsets"
summary(m_reg)
## Error: object 'm_reg' not found
# Regsubsets prescribes which combination of variables is best for each
# number of variables allowed in the model.  Starting at m11 and working
# down until the AIC minimum.

m11_regsub <- glm(Completed ~ AgeGroup + LocationType + black + otherrace + 
    assisted + private + hospital + WhiteMarsh + Bayview + obgyn + pediatric, 
    data = gard, family = binomial("logit"))
summary(m11_regsub)  # AIC = 1737.9, Residual devinace = 1713.9
## 
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + otherrace + 
##     assisted + private + hospital + WhiteMarsh + Bayview + obgyn + 
##     pediatric, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.549  -0.946  -0.750   1.257   2.113  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.4049     0.1804   -2.24  0.02482 *  
## AgeGroup      -0.5125     0.1585   -3.23  0.00122 ** 
## LocationType  -0.4961     0.3112   -1.59  0.11094    
## black         -0.4801     0.1422   -3.38  0.00073 ***
## otherrace     -0.0298     0.1786   -0.17  0.86765    
## assisted      -0.6035     0.2451   -2.46  0.01380 *  
## private       -0.0486     0.1561   -0.31  0.75538    
## hospital       0.4499     0.2832    1.59  0.11220    
## WhiteMarsh     0.2951     0.2145    1.38  0.16881    
## Bayview        0.1581     0.3258    0.49  0.62740    
## obgyn          0.5008     0.1803    2.78  0.00548 ** 
## pediatric      0.2206     0.1976    1.12  0.26421    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1713.9  on 1401  degrees of freedom
## AIC: 1738
## 
## Number of Fisher Scoring iterations: 4

m10_regsub <- glm(Completed ~ AgeGroup + LocationType + black + assisted + private + 
    hospital + WhiteMarsh + Bayview + obgyn + pediatric, data = gard, family = binomial("logit"))
summary(m10_regsub)  # AIC = 1735.9, Residual deviance = 1713.9
## 
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + assisted + 
##     private + hospital + WhiteMarsh + Bayview + obgyn + pediatric, 
##     family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.548  -0.945  -0.748   1.260   2.112  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.4139     0.1722   -2.40  0.01622 *  
## AgeGroup      -0.5122     0.1585   -3.23  0.00123 ** 
## LocationType  -0.4942     0.3110   -1.59  0.11209    
## black         -0.4744     0.1379   -3.44  0.00058 ***
## assisted      -0.6037     0.2451   -2.46  0.01377 *  
## private       -0.0483     0.1561   -0.31  0.75695    
## hospital       0.4488     0.2832    1.59  0.11296    
## WhiteMarsh     0.3001     0.2124    1.41  0.15767    
## Bayview        0.1602     0.3255    0.49  0.62264    
## obgyn          0.5033     0.1797    2.80  0.00509 ** 
## pediatric      0.2217     0.1974    1.12  0.26158    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1713.9  on 1402  degrees of freedom
## AIC: 1736
## 
## Number of Fisher Scoring iterations: 4

m9_regsub <- glm(Completed ~ AgeGroup + LocationType + black + assisted + private + 
    hospital + WhiteMarsh + obgyn + pediatric, data = gard, family = binomial("logit"))
summary(m9_regsub)  # AIC = 1734.2, Residual deviance = 1714.2
## 
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + assisted + 
##     private + hospital + WhiteMarsh + obgyn + pediatric, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.548  -0.940  -0.745   1.253   2.121  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.4138     0.1722   -2.40   0.0163 *  
## AgeGroup      -0.5145     0.1584   -3.25   0.0012 ** 
## LocationType  -0.3716     0.1839   -2.02   0.0434 *  
## black         -0.4839     0.1366   -3.54   0.0004 ***
## assisted      -0.5931     0.2440   -2.43   0.0151 *  
## private       -0.0419     0.1556   -0.27   0.7875    
## hospital       0.4532     0.2829    1.60   0.1092    
## WhiteMarsh     0.3153     0.2103    1.50   0.1338    
## obgyn          0.4850     0.1760    2.76   0.0059 ** 
## pediatric      0.2384     0.1945    1.23   0.2203    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1714.2  on 1403  degrees of freedom
## AIC: 1734
## 
## Number of Fisher Scoring iterations: 4

m8_regsub <- glm(Completed ~ AgeGroup + LocationType + black + assisted + hospital + 
    Odenton + obgyn + pediatric, data = gard, family = binomial("logit"))
summary(m8_regsub)  # AIC = 1732.3, Residual deviance = 1714.3
## 
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + assisted + 
##     hospital + Odenton + obgyn + pediatric, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.548  -0.943  -0.746   1.261   2.124  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.131      0.254   -0.51  0.60710    
## AgeGroup       -0.517      0.158   -3.27  0.00107 ** 
## LocationType   -0.688      0.229   -3.00  0.00268 ** 
## black          -0.489      0.135   -3.61  0.00031 ***
## assisted       -0.558      0.206   -2.71  0.00679 ** 
## hospital        0.490      0.248    1.98  0.04777 *  
## Odenton        -0.303      0.205   -1.48  0.13998    
## obgyn           0.480      0.175    2.74  0.00608 ** 
## pediatric       0.239      0.195    1.23  0.22009    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1714.3  on 1404  degrees of freedom
## AIC: 1732
## 
## Number of Fisher Scoring iterations: 4

m7_regsub <- glm(Completed ~ AgeGroup + LocationType + black + assisted + hospital + 
    Odenton + obgyn, data = gard, family = binomial("logit"))  # AIC = 1731.8, Residual deviance = 1715.8
summary(m7_regsub)
## 
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + assisted + 
##     hospital + Odenton + obgyn, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.574  -0.933  -0.741   1.304   2.166  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    0.0314     0.2172    0.14  0.88520    
## AgeGroup      -0.6075     0.1395   -4.35  1.3e-05 ***
## LocationType  -0.6370     0.2253   -2.83  0.00470 ** 
## black         -0.4858     0.1354   -3.59  0.00033 ***
## assisted      -0.5457     0.2060   -2.65  0.00807 ** 
## hospital       0.4782     0.2473    1.93  0.05320 .  
## Odenton       -0.3251     0.2052   -1.58  0.11310    
## obgyn          0.3874     0.1573    2.46  0.01379 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1715.8  on 1405  degrees of freedom
## AIC: 1732
## 
## Number of Fisher Scoring iterations: 4
# m7 is the AIC minimum.  But is it significantly better than m6?

m6_regsub <- glm(Completed ~ AgeGroup + LocationType + black + assisted + hospital + 
    obgyn, data = gard, family = binomial("logit"))
summary(m6_regsub)  # AIC = 1732.3, Residual deviance = 1718.3
## 
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + assisted + 
##     hospital + obgyn, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.516  -0.914  -0.736   1.294   2.187  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.271      0.103   -2.62  0.00869 ** 
## AgeGroup       -0.628      0.139   -4.52    6e-06 ***
## LocationType   -0.387      0.161   -2.41  0.01609 *  
## black          -0.501      0.135   -3.71  0.00021 ***
## assisted       -0.510      0.205   -2.49  0.01269 *  
## hospital        0.538      0.243    2.21  0.02719 *  
## obgyn           0.502      0.139    3.61  0.00030 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1718.3  on 1406  degrees of freedom
## AIC: 1732
## 
## Number of Fisher Scoring iterations: 4
anova(m7_regsub, m6_regsub, test = "Chisq")  # Null not rejected; eliminate m7.
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + LocationType + black + assisted + hospital + 
##     Odenton + obgyn
## Model 2: Completed ~ AgeGroup + LocationType + black + assisted + hospital + 
##     obgyn
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1      1405       1716                     
## 2      1406       1718 -1    -2.51     0.11

m5_regsub <- glm(Completed ~ AgeGroup + black + assisted + WhiteMarsh + obgyn, 
    data = gard, family = binomial("logit"))
summary(m5_regsub)  # AIC = 1733.4, Residual devaince = 1721.4
## 
## Call:
## glm(formula = Completed ~ AgeGroup + black + assisted + WhiteMarsh + 
##     obgyn, family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.386  -0.949  -0.756   1.320   2.139  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.3291     0.0989   -3.33  0.00087 ***
## AgeGroup     -0.5858     0.1381   -4.24  2.2e-05 ***
## black        -0.4885     0.1348   -3.62  0.00029 ***
## assisted     -0.7763     0.1733   -4.48  7.5e-06 ***
## WhiteMarsh    0.4574     0.1957    2.34  0.01944 *  
## obgyn         0.3495     0.1543    2.26  0.02352 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1721.4  on 1407  degrees of freedom
## AIC: 1733
## 
## Number of Fisher Scoring iterations: 4
anova(m6_regsub, m5_regsub, test = "Chisq")  # Null not rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + LocationType + black + assisted + hospital + 
##     obgyn
## Model 2: Completed ~ AgeGroup + black + assisted + WhiteMarsh + obgyn
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1406       1718                       
## 2      1407       1721 -1    -3.13    0.077 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_regsub, m5_regsub, test = "Chisq")  # Null not rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + LocationType + black + assisted + hospital + 
##     Odenton + obgyn
## Model 2: Completed ~ AgeGroup + black + assisted + WhiteMarsh + obgyn
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1405       1716                       
## 2      1407       1721 -2    -5.64     0.06 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Eliminate m6.

m4_regsub <- glm(Completed ~ AgeGroup + black + assisted + WhiteMarsh, data = gard, 
    family = binomial("logit"))
summary(m4_regsub)  # AIC = 1736.5, Residual deviance = 1726.5
## 
## Call:
## glm(formula = Completed ~ AgeGroup + black + assisted + WhiteMarsh, 
##     family = binomial("logit"), data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.338  -0.886  -0.718   1.302   2.064  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.2888     0.0972   -2.97  0.00296 ** 
## AgeGroup     -0.4450     0.1221   -3.65  0.00027 ***
## black        -0.4901     0.1345   -3.64  0.00027 ***
## assisted     -0.7794     0.1733   -4.50  6.9e-06 ***
## WhiteMarsh    0.6596     0.1746    3.78  0.00016 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1726.5  on 1408  degrees of freedom
## AIC: 1737
## 
## Number of Fisher Scoring iterations: 4
anova(m5_regsub, m4_regsub, test = "Chisq")  # Null rejected; keep m5.
## Analysis of Deviance Table
## 
## Model 1: Completed ~ AgeGroup + black + assisted + WhiteMarsh + obgyn
## Model 2: Completed ~ AgeGroup + black + assisted + WhiteMarsh
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1407       1721                       
## 2      1408       1727 -1    -5.12    0.024 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Using a chi-squared test, we can see that the proportions in m5_regsub
# do vary from m4_regsub.  Although m6_regsub and m7_regsub have lower AIC
# values, the residual deviance is not significantly different from
# m5_regsub.

Including Interactions

# First, I'll use the best model without interactions as a basis.  Logical
# interactions include age group with location type or race, and race with
# location type or insurance type.
m7_inter1A <- glm(Completed ~ black + LocationType * AgeGroup + WhiteMarsh + 
    family + assisted + hospital, data = gard, family = binomial("logit"))
summary(m7_inter1A)  # AIC = 1718.2, Residual deviance = 1700.2
## 
## Call:
## glm(formula = Completed ~ black + LocationType * AgeGroup + WhiteMarsh + 
##     family + assisted + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.598  -0.903  -0.694   1.178   2.057  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -0.00137    0.12190   -0.01    0.991    
## black                 -0.53386    0.13689   -3.90  9.6e-05 ***
## LocationType          -0.85218    0.21299   -4.00  6.3e-05 ***
## AgeGroup              -0.68361    0.14420   -4.74  2.1e-06 ***
## WhiteMarsh             0.48413    0.19838    2.44    0.015 *  
## family                -0.32234    0.15718   -2.05    0.040 *  
## assisted              -0.44691    0.20625   -2.17    0.030 *  
## hospital               0.46642    0.24817    1.88    0.060 .  
## LocationType:AgeGroup  1.08611    0.27346    3.97  7.1e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1700.2  on 1404  degrees of freedom
## AIC: 1718
## 
## Number of Fisher Scoring iterations: 4
anova(m7_inter1A, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##                       Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                                   1412       1796             
## black                  1    27.12      1411       1769  1.9e-07 ***
## LocationType           1    12.12      1410       1757  0.00050 ***
## AgeGroup               1     7.14      1409       1750  0.00755 ** 
## WhiteMarsh             1    11.70      1408       1738  0.00062 ***
## family                 1     6.06      1407       1732  0.01383 *  
## assisted               1    12.06      1406       1720  0.00052 ***
## hospital               1     4.05      1405       1716  0.04423 *  
## LocationType:AgeGroup  1    15.54      1404       1700  8.1e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.
exp(cbind(OR = coef(m7_inter1A), confint(m7_inter1A)))
## Waiting for profiling to be done...
##                           OR  2.5 % 97.5 %
## (Intercept)           0.9986 0.7861 1.2682
## black                 0.5863 0.4471 0.7650
## LocationType          0.4265 0.2798 0.6454
## AgeGroup              0.5048 0.3800 0.6690
## WhiteMarsh            1.6228 1.1004 2.3968
## family                0.7245 0.5318 0.9852
## assisted              0.6396 0.4252 0.9556
## hospital              1.5943 0.9779 2.5936
## LocationType:AgeGroup 2.9627 1.7309 5.0609

gard$interaction <- gard$LocationType * gard$AgeGroup
m0_interaction <- glm(Completed ~ interaction, data = gard, family = binomial("logit"))
summary(m0_interaction)
## 
## Call:
## glm(formula = Completed ~ interaction, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.901  -0.901  -0.901   1.482   1.513  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.6920     0.0598  -11.58   <2e-16 ***
## interaction  -0.0702     0.1830   -0.38      0.7    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1795.9  on 1411  degrees of freedom
## AIC: 1800
## 
## Number of Fisher Scoring iterations: 4
exp(cbind(OR = coef(m0_interaction), confint(m0_interaction)))
## Waiting for profiling to be done...
##                 OR  2.5 % 97.5 %
## (Intercept) 0.5006 0.4449 0.5624
## interaction 0.9322 0.6467 1.3273

m7_inter1B <- glm(Completed ~ black * assisted + LocationType + AgeGroup + WhiteMarsh + 
    family + hospital, data = gard, family = binomial("logit"))
summary(m7_inter1B)  # AIC = 1733.7, Residual deviance = 1715.7
## 
## Call:
## glm(formula = Completed ~ black * assisted + LocationType + AgeGroup + 
##     WhiteMarsh + family + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.510  -0.960  -0.767   1.233   2.083  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      -0.130      0.119   -1.09   0.2748   
## black            -0.472      0.151   -3.13   0.0018 **
## assisted         -0.537      0.255   -2.10   0.0357 * 
## LocationType     -0.407      0.178   -2.29   0.0220 * 
## AgeGroup         -0.396      0.124   -3.19   0.0014 **
## WhiteMarsh        0.384      0.195    1.97   0.0485 * 
## family           -0.383      0.155   -2.46   0.0138 * 
## hospital          0.501      0.247    2.02   0.0429 * 
## black:assisted   -0.107      0.344   -0.31   0.7558   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1715.7  on 1404  degrees of freedom
## AIC: 1734
## 
## Number of Fisher Scoring iterations: 4
anova(m7_inter1B, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                            1412       1796             
## black           1    27.12      1411       1769  1.9e-07 ***
## assisted        1    18.67      1410       1750  1.6e-05 ***
## LocationType    1     1.76      1409       1748  0.18468    
## AgeGroup        1    10.39      1408       1738  0.00127 ** 
## WhiteMarsh      1    12.41      1407       1726  0.00043 ***
## family          1     5.84      1406       1720  0.01562 *  
## hospital        1     4.05      1405       1716  0.04423 *  
## black:assisted  1     0.10      1404       1716  0.75554    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.

m7_inter1C <- glm(Completed ~ black * AgeGroup + assisted + LocationType + WhiteMarsh + 
    family + hospital, data = gard, family = binomial("logit"))
summary(m7_inter1C)  # AIC = 1731.0, Residual deviance = 1713.0
## 
## Call:
## glm(formula = Completed ~ black * AgeGroup + assisted + LocationType + 
##     WhiteMarsh + family + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.537  -0.944  -0.731   1.204   1.986  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.0636     0.1235   -0.51  0.60683    
## black           -0.7052     0.1877   -3.76  0.00017 ***
## AgeGroup        -0.5134     0.1424   -3.61  0.00031 ***
## assisted        -0.5718     0.2055   -2.78  0.00539 ** 
## LocationType    -0.4132     0.1774   -2.33  0.01987 *  
## WhiteMarsh       0.3876     0.1947    1.99  0.04655 *  
## family          -0.3907     0.1556   -2.51  0.01206 *  
## hospital         0.4915     0.2475    1.99  0.04701 *  
## black:AgeGroup   0.4439     0.2651    1.67  0.09412 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1713  on 1404  degrees of freedom
## AIC: 1731
## 
## Number of Fisher Scoring iterations: 4
anova(m7_inter1C, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                            1412       1796             
## black           1    27.12      1411       1769  1.9e-07 ***
## AgeGroup        1     3.34      1410       1766  0.06749 .  
## assisted        1    24.87      1409       1741  6.1e-07 ***
## LocationType    1     2.61      1408       1738  0.10613    
## WhiteMarsh      1    12.41      1407       1726  0.00043 ***
## family          1     5.84      1406       1720  0.01562 *  
## hospital        1     4.05      1405       1716  0.04423 *  
## black:AgeGroup  1     2.80      1404       1713  0.09429 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.

m7_inter1D <- glm(Completed ~ black * LocationType + AgeGroup + assisted + WhiteMarsh + 
    family + hospital, data = gard, family = binomial("logit"))
summary(m7_inter1D)  # AIC = 1731.6, Residual deviance = 1713.6
## 
## Call:
## glm(formula = Completed ~ black * LocationType + AgeGroup + assisted + 
##     WhiteMarsh + family + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.500  -0.953  -0.758   1.247   2.137  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)   
## (Intercept)          -0.162      0.121   -1.34   0.1792   
## black                -0.356      0.164   -2.17   0.0298 * 
## LocationType         -0.270      0.198   -1.36   0.1726   
## AgeGroup             -0.392      0.125   -3.15   0.0017 **
## assisted             -0.582      0.205   -2.84   0.0045 **
## WhiteMarsh            0.396      0.195    2.04   0.0416 * 
## family               -0.382      0.155   -2.46   0.0138 * 
## hospital              0.498      0.248    2.00   0.0450 * 
## black:LocationType   -0.415      0.287   -1.44   0.1489   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1713.6  on 1404  degrees of freedom
## AIC: 1732
## 
## Number of Fisher Scoring iterations: 4
anova(m7_inter1D, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##                    Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                                1412       1796             
## black               1    27.12      1411       1769  1.9e-07 ***
## LocationType        1    12.12      1410       1757  0.00050 ***
## AgeGroup            1     7.14      1409       1750  0.00755 ** 
## assisted            1    11.57      1408       1738  0.00067 ***
## WhiteMarsh          1    12.41      1407       1726  0.00043 ***
## family              1     5.84      1406       1720  0.01562 *  
## hospital            1     4.05      1405       1716  0.04423 *  
## black:LocationType  1     2.11      1404       1714  0.14665    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.

m7_inter1E <- glm(Completed ~ black * (assisted + hospital) + LocationType + 
    AgeGroup + WhiteMarsh + family, data = gard, family = binomial("logit"))
summary(m7_inter1E)  # AIC = 1735.4, Residual deviance = 1715.4
## 
## Call:
## glm(formula = Completed ~ black * (assisted + hospital) + LocationType + 
##     AgeGroup + WhiteMarsh + family, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.539  -0.958  -0.767   1.236   2.082  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      -0.137      0.120   -1.14   0.2537   
## black            -0.449      0.158   -2.85   0.0044 **
## assisted         -0.532      0.256   -2.08   0.0373 * 
## hospital          0.575      0.289    1.99   0.0465 * 
## LocationType     -0.405      0.178   -2.28   0.0228 * 
## AgeGroup         -0.395      0.125   -3.17   0.0015 **
## WhiteMarsh        0.381      0.195    1.95   0.0507 . 
## family           -0.381      0.155   -2.45   0.0143 * 
## black:assisted   -0.130      0.346   -0.37   0.7077   
## black:hospital   -0.264      0.531   -0.50   0.6188   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1715.4  on 1403  degrees of freedom
## AIC: 1735
## 
## Number of Fisher Scoring iterations: 4
anova(m7_inter1E, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                            1412       1796             
## black           1    27.12      1411       1769  1.9e-07 ***
## assisted        1    18.67      1410       1750  1.6e-05 ***
## hospital        1     4.39      1409       1746   0.0361 *  
## LocationType    1     3.73      1408       1742   0.0536 .  
## AgeGroup        1    10.59      1407       1732   0.0011 ** 
## WhiteMarsh      1     9.67      1406       1722   0.0019 ** 
## family          1     6.08      1405       1716   0.0137 *  
## black:assisted  1     0.10      1404       1716   0.7555    
## black:hospital  1     0.25      1403       1715   0.6165    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.

m7_inter2A <- glm(Completed ~ black * LocationType * AgeGroup + assisted + hospital + 
    WhiteMarsh + family, data = gard, family = binomial("logit"))
summary(m7_inter2A)  # AIC = 1720.5, Residual deviance = 1696.5
## 
## Call:
## glm(formula = Completed ~ black * LocationType * AgeGroup + assisted + 
##     hospital + WhiteMarsh + family, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.601  -0.880  -0.734   1.177   2.101  
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  0.000214   0.132261    0.00  0.99871    
## black                       -0.538195   0.237439   -2.27  0.02341 *  
## LocationType                -0.743866   0.243793   -3.05  0.00228 ** 
## AgeGroup                    -0.748667   0.162287   -4.61    4e-06 ***
## assisted                    -0.430680   0.207913   -2.07  0.03832 *  
## hospital                     0.461043   0.248793    1.85  0.06387 .  
## WhiteMarsh                   0.496215   0.198313    2.50  0.01234 *  
## family                      -0.330555   0.157201   -2.10  0.03549 *  
## black:LocationType          -0.377147   0.396865   -0.95  0.34195    
## black:AgeGroup               0.309645   0.326496    0.95  0.34293    
## LocationType:AgeGroup        1.134612   0.337040    3.37  0.00076 ***
## black:LocationType:AgeGroup -0.158033   0.584988   -0.27  0.78705    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1696.5  on 1401  degrees of freedom
## AIC: 1721
## 
## Number of Fisher Scoring iterations: 4
anova(m7_inter2A, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##                             Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                                         1412       1796             
## black                        1    27.12      1411       1769  1.9e-07 ***
## LocationType                 1    12.12      1410       1757  0.00050 ***
## AgeGroup                     1     7.14      1409       1750  0.00755 ** 
## assisted                     1    11.57      1408       1738  0.00067 ***
## hospital                     1     6.55      1407       1732  0.01048 *  
## WhiteMarsh                   1     9.67      1406       1722  0.00187 ** 
## family                       1     6.08      1405       1716  0.01367 *  
## black:LocationType           1     2.11      1404       1714  0.14665    
## black:AgeGroup               1     2.07      1403       1712  0.15000    
## LocationType:AgeGroup        1    14.96      1402       1697  0.00011 ***
## black:LocationType:AgeGroup  1     0.07      1401       1697  0.78711    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.

m7_inter2B <- glm(Completed ~ black * (LocationType + AgeGroup) + assisted + 
    hospital + WhiteMarsh + family, data = gard, family = binomial("logit"))
summary(m7_inter2B)  # AIC = 1731.6, Residual deviance = 1711.6
## 
## Call:
## glm(formula = Completed ~ black * (LocationType + AgeGroup) + 
##     assisted + hospital + WhiteMarsh + family, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.524  -0.938  -0.738   1.221   2.058  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.102      0.128   -0.80  0.42636    
## black                -0.567      0.221   -2.56  0.01035 *  
## LocationType         -0.303      0.200   -1.52  0.12876    
## AgeGroup             -0.493      0.143   -3.44  0.00058 ***
## assisted             -0.573      0.206   -2.78  0.00536 ** 
## hospital              0.491      0.248    1.98  0.04775 *  
## WhiteMarsh            0.396      0.195    2.04  0.04165 *  
## family               -0.391      0.155   -2.51  0.01192 *  
## black:LocationType   -0.341      0.291   -1.17  0.24225    
## black:AgeGroup        0.389      0.270    1.44  0.14952    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1711.6  on 1403  degrees of freedom
## AIC: 1732
## 
## Number of Fisher Scoring iterations: 4
anova(m7_inter2B, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##                    Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                                1412       1796             
## black               1    27.12      1411       1769  1.9e-07 ***
## LocationType        1    12.12      1410       1757  0.00050 ***
## AgeGroup            1     7.14      1409       1750  0.00755 ** 
## assisted            1    11.57      1408       1738  0.00067 ***
## hospital            1     6.55      1407       1732  0.01048 *  
## WhiteMarsh          1     9.67      1406       1722  0.00187 ** 
## family              1     6.08      1405       1716  0.01367 *  
## black:LocationType  1     2.11      1404       1714  0.14665    
## black:AgeGroup      1     2.07      1403       1712  0.15000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.

m7_inter2C <- glm(Completed ~ (black + LocationType) * AgeGroup + assisted + 
    hospital + WhiteMarsh + family, data = gard, family = binomial("logit"))
summary(m7_inter2C)  # AIC = 1719.0, Residual deviance = 1699.0
## 
## Call:
## glm(formula = Completed ~ (black + LocationType) * AgeGroup + 
##     assisted + hospital + WhiteMarsh + family, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.612  -0.894  -0.709   1.162   2.032  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             0.0361     0.1269    0.28  0.77596    
## black                  -0.6743     0.1899   -3.55  0.00038 ***
## LocationType           -0.8444     0.2139   -3.95  7.9e-05 ***
## AgeGroup               -0.7478     0.1562   -4.79  1.7e-06 ***
## assisted               -0.4452     0.2065   -2.16  0.03106 *  
## hospital                0.4625     0.2480    1.86  0.06221 .  
## WhiteMarsh              0.4819     0.1984    2.43  0.01513 *  
## family                 -0.3307     0.1575   -2.10  0.03572 *  
## black:AgeGroup          0.2924     0.2697    1.08  0.27833    
## LocationType:AgeGroup   1.0386     0.2767    3.75  0.00017 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1699  on 1403  degrees of freedom
## AIC: 1719
## 
## Number of Fisher Scoring iterations: 4
anova(m7_inter2C, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##                       Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                                   1412       1796             
## black                  1    27.12      1411       1769  1.9e-07 ***
## LocationType           1    12.12      1410       1757  0.00050 ***
## AgeGroup               1     7.14      1409       1750  0.00755 ** 
## assisted               1    11.57      1408       1738  0.00067 ***
## hospital               1     6.55      1407       1732  0.01048 *  
## WhiteMarsh             1     9.67      1406       1722  0.00187 ** 
## family                 1     6.08      1405       1716  0.01367 *  
## black:AgeGroup         1     2.80      1404       1713  0.09429 .  
## LocationType:AgeGroup  1    13.91      1403       1699  0.00019 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.

m7_inter2D <- glm(Completed ~ (black + AgeGroup) * LocationType + assisted + 
    hospital + WhiteMarsh + family, data = gard, family = binomial("logit"))
summary(m7_inter2D)  # AIC = 1717.5, Residual deviance = 1697.5
## 
## Call:
## glm(formula = Completed ~ (black + AgeGroup) * LocationType + 
##     assisted + hospital + WhiteMarsh + family, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.584  -0.890  -0.727   1.196   2.082  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -0.043      0.124   -0.35   0.7293    
## black                   -0.379      0.165   -2.29   0.0217 *  
## AgeGroup                -0.678      0.144   -4.71  2.5e-06 ***
## LocationType            -0.716      0.228   -3.14   0.0017 ** 
## assisted                -0.439      0.207   -2.12   0.0338 *  
## hospital                 0.463      0.249    1.86   0.0627 .  
## WhiteMarsh               0.499      0.198    2.52   0.0117 *  
## family                  -0.321      0.157   -2.05   0.0404 *  
## black:LocationType      -0.470      0.289   -1.62   0.1042    
## AgeGroup:LocationType    1.115      0.276    4.04  5.4e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1697.5  on 1403  degrees of freedom
## AIC: 1718
## 
## Number of Fisher Scoring iterations: 4
anova(m7_inter2D, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##                       Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                                   1412       1796             
## black                  1    27.12      1411       1769  1.9e-07 ***
## AgeGroup               1     3.34      1410       1766  0.06749 .  
## LocationType           1    15.91      1409       1750  6.6e-05 ***
## assisted               1    11.57      1408       1738  0.00067 ***
## hospital               1     6.55      1407       1732  0.01048 *  
## WhiteMarsh             1     9.67      1406       1722  0.00187 ** 
## family                 1     6.08      1405       1716  0.01367 *  
## black:LocationType     1     2.11      1404       1714  0.14665    
## AgeGroup:LocationType  1    16.10      1403       1698  6.0e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.

# The best interaction models can be compared to the best non-interaction
# model.
anova(m7_inter1A, m7_percat, test = "Chisq")  # Null Rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ black + LocationType * AgeGroup + WhiteMarsh + family + 
##     assisted + hospital
## Model 2: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family + 
##     hospital + assisted
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)    
## 1      1404       1700                         
## 2      1405       1716 -1    -15.5  8.1e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_inter2D, m7_inter1A, test = "Chisq")  # Null not Rejected
## Analysis of Deviance Table
## 
## Model 1: Completed ~ (black + AgeGroup) * LocationType + assisted + hospital + 
##     WhiteMarsh + family
## Model 2: Completed ~ black + LocationType * AgeGroup + WhiteMarsh + family + 
##     assisted + hospital
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1      1403       1698                     
## 2      1404       1700 -1    -2.67      0.1

Some Model Notes

## Comparing nested models ## anova(MODEL1, MODEL2, test='Chisq') # Tests
## null hypothesis that proportions for each group are the same -- but
## must be nested.  When null is not rejected at 0.05 level, go by
## parsimony (use smaller model)

## Comparing variables in a single model ## Wald's Test: use
## summary(MODEL); then read the p-value anova(MODEL, test='LRT') #
## Additive -- Does adding the next independent variable improve the model
## based on Deviance Test? (Null = no) drop1(MODEL, test='LRT') #
## Subtractive -- Does removing the next independent variable reduce the
## predictive power of the model based on Deviance Test (i.e. does it
## increase the deviance)? (Null = 0)

Summary of Best Models

# Best Non-Interacting Model:
summary(m7_percat)  # AIC = 1731.8, Residual deviance = 1715.8
## 
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + WhiteMarsh + 
##     family + hospital + assisted, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.512  -0.963  -0.754   1.231   2.063  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.125      0.118   -1.06  0.28828    
## black          -0.493      0.135   -3.64  0.00027 ***
## LocationType   -0.402      0.177   -2.27  0.02305 *  
## AgeGroup       -0.398      0.124   -3.20  0.00136 ** 
## WhiteMarsh      0.383      0.195    1.97  0.04882 *  
## family         -0.382      0.155   -2.46  0.01406 *  
## hospital        0.500      0.248    2.02  0.04325 *  
## assisted       -0.584      0.205   -2.85  0.00437 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1715.8  on 1405  degrees of freedom
## AIC: 1732
## 
## Number of Fisher Scoring iterations: 4

# 00000 Model
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 0 + 0.3835 * 0 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 
    0 + 0.3835 * 0 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0))  # Not black, suburban, age 11-17, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4687
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 1 + 0.3835 * 0 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 
    1 + 0.3835 * 0 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0))  # Not black, suburban, age 18-26, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.3721
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 0 + 0.3835 * 0 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 
    0 + 0.3835 * 0 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0))  # Not black, urban, age 11-17, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.3712
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 1 + 0.3835 * 0 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 
    1 + 0.3835 * 0 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0))  # Not black, urban, age 18-26, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.2839

# 11110 Model
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 1 - 
    0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 
    0 + 0.3835 * 1 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0))  # black, suburban, age 11-17, attends White Marsh, family practice, assisted insurance
## [1] 0.2314
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 1 - 
    0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 
    1 + 0.3835 * 1 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0))  # black, suburban, age 18-26, attends White Marsh, family practice, assisted insurance
## [1] 0.1682
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 1 - 
    0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 
    0 + 0.3835 * 1 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0))  # black, urban, age 11-17, attends White Marsh, family practice, assisted insurance
## [1] 0.1677
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 1 - 
    0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 
    1 + 0.3835 * 1 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0))  # black, urban, age 18-26, attends White Marsh, family practice, assisted insurance
## [1] 0.1192

# 01000 Model
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 
    0 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0))  # Not black, suburban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.5642
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 
    1 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0))  # Not black, suburban, age 18-26, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4651
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 
    0 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0))  # Not black, urban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4642
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 
    1 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0))  # Not black, urban, age 18-26,White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.3678

# 10110 Model
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 0 + 0.3835 * 0 - 0.3816 * 1 - 
    0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 
    0 + 0.3835 * 0 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0))  # black, suburban, age 11-17, not White Marsh, family practice, assisted insurance
## [1] 0.1702
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 1 + 0.3835 * 0 - 0.3816 * 1 - 
    0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 
    1 + 0.3835 * 0 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0))  # black, suburban, age 18-26, not White Marsh, family practice, assisted insurance
## [1] 0.1211
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 0 + 0.3835 * 0 - 0.3816 * 1 - 
    0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 
    0 + 0.3835 * 0 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0))  # black, urban, age 11-17, not White Marsh, family practice, assisted insurance
## [1] 0.1207
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 1 + 0.3835 * 0 - 0.3816 * 1 - 
    0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 
    1 + 0.3835 * 0 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0))  # black, urban, age 18-26, not White Marsh, family practice, assisted insurance
## [1] 0.08442

# 01001 Model
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 1)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 
    0 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 1))  # Not black, suburban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.681
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 1)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 
    1 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 1))  # Not black, suburban, age 18-26, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.5892
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 1)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 
    0 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 1))  # Not black, urban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.5883
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 0 - 
    0.5843 * 0 + 0.5004 * 1)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 
    1 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 1))  # Not black, urban, age 18-26,White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4897

# Best Model With Interactions:
summary(m7_inter1A)  # AIC = 1718.2, Residual deviance = 1700.2
## 
## Call:
## glm(formula = Completed ~ black + LocationType * AgeGroup + WhiteMarsh + 
##     family + assisted + hospital, family = binomial("logit"), 
##     data = gard)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.598  -0.903  -0.694   1.178   2.057  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -0.00137    0.12190   -0.01    0.991    
## black                 -0.53386    0.13689   -3.90  9.6e-05 ***
## LocationType          -0.85218    0.21299   -4.00  6.3e-05 ***
## AgeGroup              -0.68361    0.14420   -4.74  2.1e-06 ***
## WhiteMarsh             0.48413    0.19838    2.44    0.015 *  
## family                -0.32234    0.15718   -2.05    0.040 *  
## assisted              -0.44691    0.20625   -2.17    0.030 *  
## hospital               0.46642    0.24817    1.88    0.060 .  
## LocationType:AgeGroup  1.08611    0.27346    3.97  7.1e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1700.2  on 1404  degrees of freedom
## AIC: 1718
## 
## Number of Fisher Scoring iterations: 4
# All variables contributed.

# 00000 Model
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 0 + 1.08611 * 0))  # Not black, suburban, age 11-17, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4997
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 0 + 1.08611 * 0))  # Not black, urban, age 11-17, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.2987
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 0 + 1.08611 * 0))  # Not black, suburban, age 18-26, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.3352
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 1)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 0 + 1.08611 * 1))  # Not black, urban, age 18-26, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.3891

# 11110 Model
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 
    1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    1 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 1 - 0.446915 * 
    1 + 0.46642 * 0 + 1.08611 * 0))  # black, suburban, age 11-17, attends White Marsh, family practice, assisted insurance
## [1] 0.3057
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 
    1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    1 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 1 - 0.446915 * 
    1 + 0.46642 * 0 + 1.08611 * 0))  # black, urban, age 11-17, attends White Marsh, family practice, assisted insurance
## [1] 0.1581
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 
    1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    1 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 1 - 0.446915 * 
    1 + 0.46642 * 0 + 1.08611 * 0))  # black, suburban, age 18-26, attends White Marsh, family practice, assisted insurance
## [1] 0.1818
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 
    1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 1)/(1 + exp(-0.001367 - 0.53386 * 
    1 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 1 - 0.446915 * 
    1 + 0.46642 * 0 + 1.08611 * 1))  # black, urban, age 18-26, attends White Marsh, family practice, assisted insurance
## [1] 0.2193

# 01000 Model
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 0 + 1.08611 * 0))  # Not black, suburban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.6184
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 0 + 1.08611 * 0))  # Not black, urban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4087
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 0 + 1.08611 * 0))  # Not black, suburban, age 18-26, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.45
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 1)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 0 + 1.08611 * 1))  # Not black, urban, age 18-26, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.5083

# 10110 Model
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 
    1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    1 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 1 - 0.446915 * 
    1 + 0.46642 * 0 + 1.08611 * 0))  # black, suburban, age 11-17, not White Marsh, family practice, assisted insurance
## [1] 0.2134
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 
    1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    1 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 1 - 0.446915 * 
    1 + 0.46642 * 0 + 1.08611 * 0))  # black, urban, age 11-17, not White Marsh, family practice, assisted insurance
## [1] 0.1037
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 
    1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    1 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 1 - 0.446915 * 
    1 + 0.46642 * 0 + 1.08611 * 0))  # black, suburban, age 18-26, not White Marsh, family practice, assisted insurance
## [1] 0.1205
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 
    1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 1)/(1 + exp(-0.001367 - 0.53386 * 
    1 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 1 - 0.446915 * 
    1 + 0.46642 * 0 + 1.08611 * 1))  # black, urban, age 18-26, not White Marsh, family practice, assisted insurance
## [1] 0.1475

# 01001 Model
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 1 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 1 + 1.08611 * 0))  # Not black, suburban, age 11-17, White Marsh, Not family, hospital insurance
## [1] 0.721
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 1 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 1 + 1.08611 * 0))  # Not black, urban, age 11-17, White Marsh, Not family, hospital insurance
## [1] 0.5242
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 1 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 1 + 1.08611 * 0))  # Not black, suburban, age 18-26, White Marsh, Not family, hospital insurance
## [1] 0.566
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 
    0 - 0.446915 * 0 + 0.46642 * 1 + 1.08611 * 1)/(1 + exp(-0.001367 - 0.53386 * 
    0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 * 
    0 + 0.46642 * 1 + 1.08611 * 1))  # Not black, urban, age 18-26, White Marsh, Not family, hospital insurance
## [1] 0.6223

Other Code

exp(cbind(OR = coef(m8_allsigs), confint(m8_allsigs)))
## Waiting for profiling to be done...
##                  OR  2.5 % 97.5 %
## (Intercept)  0.7945 0.5629 1.1173
## white        1.0175 0.7425 1.4000
## black        0.6114 0.4289 0.8726
## LocationType 0.6567 0.4633 0.9276
## WhiteMarsh   1.3911 0.9296 2.0823
## obgyn        0.8502 0.6264 1.1511
## family       0.5799 0.4136 0.8114
## assisted     0.6007 0.3997 0.8961
## hospital     1.6800 1.0339 2.7248
plot(residuals(m8_allsigs) ~ fitted(m8_allsigs), main = "Residuals for m8_allsigs Model", 
    xlab = "Fitted Values - P(Completion)", ylab = "Residuals", pch = 15)

plot of chunk unnamed-chunk-20

plot(fitted(m8_allsigs) ~ gard$Location, main = "Fitted Values for m8_allsigs Model", 
    ylab = "P(Completion)", xlab = "Location", pch = 15)

plot of chunk unnamed-chunk-20


black <- gard[gard$black == 1, ]
black1 <- black[black$assisted == 1, ]
black11 <- black1[black1$LocationType == 1, ]
black111 <- black11[black11$WhiteMarsh == 0, ]
black1111 <- black111[black111$family == 1, ]
black2 <- black[black$family == 1, ]

hospital <- WhiteMarsh[WhiteMarsh$hospital == 1, ]
hospital1 <- hospital[hospital$black == 0, ]
hospital2 <- hospital1[hospital1$family == 0, ]
hospital3 <- hospital2[hospital2$AgeGroup == 1, ]
hospital4 <- hospital3[hospital3$LocationType == 1, ]
hospital5 <- WhiteMarsh[WhiteMarsh$LocationType == 1, ]