Loading Data
gard <- read.delim("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/jh_gardasil.dat",
header = TRUE)
names(gard)
## [1] "Age" "AgeGroup" "Race" "Shots"
## [5] "Completed" "InsuranceType" "MedAssist" "Location"
## [9] "LocationType" "PracticeType" "X" "X.1"
head(gard)
## Age AgeGroup Race Shots Completed InsuranceType MedAssist Location
## 1 21 1 0 3 1 3 0 1
## 2 21 1 0 3 1 3 0 1
## 3 20 1 0 1 0 1 0 1
## 4 14 0 0 3 1 3 0 1
## 5 17 0 3 2 0 3 0 1
## 6 11 0 1 1 0 0 1 1
## LocationType PracticeType X X.1
## 1 0 1 NA NA
## 2 0 1 NA NA
## 3 0 1 NA NA
## 4 0 0 NA NA
## 5 0 1 NA NA
## 6 0 0 NA NA
# Remove unnecessary columns
gard <- gard[, 1:10]
gardtable <- table(gard) # Creates a table from the data.frame
summary(gard$Completed) # The overall probability is 0.3319.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 0.332 1.000 1.000
Creating Dummy Variables for each Category:
## the ifelse(LOGIC, c(1), c(0)) function will create a dummy variable
## with 1 if the logic statement is true and 0 if the logic statement is
## false. Race Dummy Variables:
gard$white <- ifelse(gard$Race == 0, c(1), c(0))
gard$black <- ifelse(gard$Race == 1, c(1), c(0))
gard$hispanic <- ifelse(gard$Race == 2, c(1), c(0))
gard$otherrace <- ifelse(gard$Race == 3, c(1), c(0))
# Insurance Type Variables:
gard$assisted <- ifelse(gard$InsuranceType == 0, c(1), c(0))
gard$private <- ifelse(gard$InsuranceType == 1, c(1), c(0))
gard$hospital <- ifelse(gard$InsuranceType == 2, c(1), c(0))
gard$military <- ifelse(gard$InsuranceType == 3, c(1), c(0))
# Location Dummy Variables:
gard$Odenton <- ifelse(gard$Location == 1, c(1), c(0))
gard$WhiteMarsh <- ifelse(gard$Location == 2, c(1), c(0))
gard$JohnsHopkins <- ifelse(gard$Location == 3, c(1), c(0))
gard$Bayview <- ifelse(gard$Location == 4, c(1), c(0))
# Practice Type Dummy Variables:
gard$pediatric <- ifelse(gard$PracticeType == 0, c(1), c(0))
gard$family <- ifelse(gard$PracticeType == 1, c(1), c(0))
gard$obgyn <- ifelse(gard$PracticeType == 2, c(1), c(0))
# Location Type Variable:
gard$urban <- ifelse(gard$LocationType == 0, c(1), c(0)) # Urban = 0, Suburban = 1
# Age Group Variable:
gard$young <- ifelse(gard$AgeGroup == 0, c(1), c(0)) # Age 18-26 = 0, Age 11-17 = 1
Univariate Versions on Characteristics w/ More than Two Groups Age Group
## Age Group - Old
m1_agegrp <- glm(gard$Completed ~ gard$AgeGroup, family = binomial("logit"))
summary(m1_agegrp)
##
## Call:
## glm(formula = gard$Completed ~ gard$AgeGroup, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.932 -0.932 -0.865 1.444 1.527
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6087 0.0791 -7.70 1.4e-14 ***
## gard$AgeGroup -0.1830 0.1131 -1.62 0.11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1793.4 on 1411 degrees of freedom
## AIC: 1797
##
## Number of Fisher Scoring iterations: 4
plot(gard$AgeGroup, fitted(m1_agegrp), main = "Completion Probability by Age Group",
xlab = "Age Group", ylab = "P(Completion)", pch = 15)
exp(-0.60871 - 0.18302 * 0)/(1 + exp(-0.60871 - 0.18302 * 0)) # For ages 11-17
## [1] 0.3524
exp(-0.60871 - 0.18302 * 1)/(1 + exp(-0.60871 - 0.18302 * 1)) # For ages 18-26
## [1] 0.3118
exp(cbind(OR = coef(m1_agegrp), confint(m1_agegrp)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5441 0.4654 0.6345
## gard$AgeGroup 0.8328 0.6669 1.0393
## Age Group - Young
m0_agegrp <- glm(gard$Completed ~ gard$young, family = binomial("logit"))
summary(m0_agegrp)
##
## Call:
## glm(formula = gard$Completed ~ gard$young, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.932 -0.932 -0.865 1.444 1.527
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7917 0.0809 -9.79 <2e-16 ***
## gard$young 0.1830 0.1131 1.62 0.11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1793.4 on 1411 degrees of freedom
## AIC: 1797
##
## Number of Fisher Scoring iterations: 4
exp(-0.7917 + 0.183 * 0)/(1 + exp(-0.7917 + 0.183 * 0)) # For ages 18-26
## [1] 0.3118
exp(-0.7917 + 0.183 * 1)/(1 + exp(-0.7917 + 0.183 * 1)) # For ages 11-17
## [1] 0.3524
exp(cbind(OR = coef(m0_agegrp), confint(m0_agegrp)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4531 0.3860 0.5301
## gard$young 1.2008 0.9622 1.4994
Race
## Race -- ALL
m1_race <- glm(gard$Completed ~ gard$white + gard$black + gard$hispanic, family = binomial("logit"))
summary(m1_race)
##
## Call:
## glm(formula = gard$Completed ~ gard$white + gard$black + gard$hispanic,
## family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.982 -0.982 -0.736 1.386 1.697
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5744 0.1527 -3.76 0.00017 ***
## gard$white 0.0955 0.1706 0.56 0.57552
## gard$black -0.5947 0.1892 -3.14 0.00168 **
## gard$hispanic -0.1477 0.3328 -0.44 0.65712
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1768 on 1409 degrees of freedom
## AIC: 1776
##
## Number of Fisher Scoring iterations: 4
plot(gard$Race, fitted(m1_race), main = "Completion Probability by Race", xlab = "Race",
ylab = "P(Completion)", pch = 15)
exp(-0.57443 + 0.09554 * 1 - 0.59465 * 0 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 *
1 - 0.59465 * 0 - 0.1477 * 0)) # For White
## [1] 0.3825
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 1 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 *
0 - 0.59465 * 1 - 0.1477 * 0)) # For Black
## [1] 0.237
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 0 - 0.1477 * 1)/(1 + exp(-0.57443 + 0.09554 *
0 - 0.59465 * 0 - 0.1477 * 1)) # For Hispanic
## [1] 0.3269
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 0 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 *
0 - 0.59465 * 0 - 0.1477 * 0)) # For OtherRace
## [1] 0.3602
exp(cbind(OR = coef(m1_race), confint(m1_race)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5630 0.4154 0.7568
## gard$white 1.1003 0.7897 1.5428
## gard$black 0.5518 0.3811 0.8009
## gard$hispanic 0.8627 0.4414 1.6375
# White
m0_racew <- glm(gard$Completed ~ gard$white, family = binomial("logit"))
summary(m0_racew)
##
## Call:
## glm(formula = gard$Completed ~ gard$white, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.982 -0.982 -0.806 1.386 1.601
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.9567 0.0856 -11.18 <2e-16 ***
## gard$white 0.4778 0.1145 4.17 3e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1778.4 on 1411 degrees of freedom
## AIC: 1782
##
## Number of Fisher Scoring iterations: 4
exp(-0.95673 + 0.47784 * 0)/(1 + exp(-0.95673 + 0.47784 * 0)) # Not White
## [1] 0.2775
exp(-0.95673 + 0.47784 * 1)/(1 + exp(-0.95673 + 0.47784 * 1)) # White
## [1] 0.3825
exp(cbind(OR = coef(m0_racew), confint(m0_racew)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.3841 0.3241 0.4534
## gard$white 1.6126 1.2894 2.0200
# Black
m0_raceb <- glm(gard$Completed ~ gard$black, family = binomial("logit"))
summary(m0_raceb)
##
## Call:
## glm(formula = gard$Completed ~ gard$black, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.970 -0.970 -0.736 1.400 1.697
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5097 0.0663 -7.69 1.5e-14 ***
## gard$black -0.6594 0.1299 -5.08 3.9e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1768.9 on 1411 degrees of freedom
## AIC: 1773
##
## Number of Fisher Scoring iterations: 4
exp(-0.50973 - 0.65936 * 0)/(1 + exp(-0.50973 - 0.65936 * 0)) # Not Black
## [1] 0.3753
exp(-0.50973 - 0.65936 * 1)/(1 + exp(-0.50973 - 0.65936 * 1)) # Black
## [1] 0.237
exp(cbind(OR = coef(m0_raceb), confint(m0_raceb)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.6007 0.5271 0.6836
## gard$black 0.5172 0.3997 0.6653
# Hispanic
m0_raceh <- glm(gard$Completed ~ gard$hispanic, family = binomial("logit"))
summary(m0_raceh)
##
## Call:
## glm(formula = gard$Completed ~ gard$hispanic, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.898 -0.898 -0.898 1.485 1.495
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6987 0.0576 -12.14 <2e-16 ***
## gard$hispanic -0.0235 0.3012 -0.08 0.94
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1796 on 1411 degrees of freedom
## AIC: 1800
##
## Number of Fisher Scoring iterations: 4
exp(-0.69866 - 0.02347 * 0)/(1 + exp(-0.69866 - 0.02347 * 0)) # Not Hispanic
## [1] 0.3321
exp(-0.69866 - 0.02347 * 1)/(1 + exp(-0.69866 - 0.02347 * 1)) # Hispanic
## [1] 0.3269
exp(cbind(OR = coef(m0_raceh), confint(m0_raceh)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4972 0.4439 0.5562
## gard$hispanic 0.9768 0.5291 1.7368
# Hispanic
m0_raceo <- glm(gard$Completed ~ gard$otherrace, family = binomial("logit"))
summary(m0_raceo)
##
## Call:
## glm(formula = gard$Completed ~ gard$otherrace, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.945 -0.891 -0.891 1.494 1.494
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7189 0.0608 -11.82 <2e-16 ***
## gard$otherrace 0.1445 0.1644 0.88 0.38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1795.2 on 1411 degrees of freedom
## AIC: 1799
##
## Number of Fisher Scoring iterations: 4
exp(-0.71893 + 0.1445 * 0)/(1 + exp(-0.71893 + 0.1445 * 0)) # Not Other (White, Black, or Hispanic)
## [1] 0.3276
exp(-0.71893 + 0.1445 * 1)/(1 + exp(-0.71893 + 0.1445 * 1)) # Other Race
## [1] 0.3602
exp(cbind(OR = coef(m0_raceo), confint(m0_raceo)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4873 0.4321 0.5485
## gard$otherrace 1.1555 0.8338 1.5899
Location
## Location -- ALL
m1_location <- glm(gard$Completed ~ gard$Odenton + gard$WhiteMarsh + gard$JohnsHopkins,
family = binomial("logit"))
summary(m1_location)
##
## Call:
## glm(formula = gard$Completed ~ gard$Odenton + gard$WhiteMarsh +
## gard$JohnsHopkins, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.152 -0.919 -0.781 1.460 1.757
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.030 0.120 -8.61 <2e-16 ***
## gard$Odenton 0.387 0.141 2.75 0.006 **
## gard$WhiteMarsh 0.969 0.196 4.94 8e-07 ***
## gard$JohnsHopkins -0.274 0.285 -0.96 0.336
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1764.9 on 1409 degrees of freedom
## AIC: 1773
##
## Number of Fisher Scoring iterations: 4
plot(gard$Location, fitted(m1_location), main = "Completion Probability by Location",
xlab = "Location", ylab = "P(Completion)", pch = 15)
exp(-1.0296 + 0.3868 * 1 + 0.969 * 0 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 *
1 + 0.969 * 0 - 0.2744 * 0)) # For Odenton
## [1] 0.3446
exp(-1.0296 + 0.3868 * 0 + 0.969 * 1 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 *
0 + 0.969 * 1 - 0.2744 * 0)) # For White Marsh
## [1] 0.4849
exp(-1.0296 + 0.3868 * 0 + 0.969 * 0 - 0.2744 * 1)/(1 + exp(-1.0296 + 0.3868 *
0 + 0.969 * 0 - 0.2744 * 1)) # For Johns Hopkins
## [1] 0.2135
exp(-1.0296 + 0.3868 * 0 + 0.969 * 0 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 *
0 + 0.969 * 0 - 0.2744 * 0)) # For Bayview
## [1] 0.2632
exp(cbind(OR = coef(m1_location), confint(m1_location)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.3571 0.2812 0.4496
## gard$Odenton 1.4723 1.1200 1.9462
## gard$WhiteMarsh 2.6353 1.7951 3.8788
## gard$JohnsHopkins 0.7600 0.4251 1.3062
# Odenton
m0_locO <- glm(gard$Completed ~ gard$Odenton, family = binomial("logit"))
summary(m0_locO)
##
## Call:
## glm(formula = gard$Completed ~ gard$Odenton, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.919 -0.919 -0.871 1.460 1.519
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7748 0.0868 -8.93 <2e-16 ***
## gard$Odenton 0.1320 0.1144 1.15 0.25
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1794.7 on 1411 degrees of freedom
## AIC: 1799
##
## Number of Fisher Scoring iterations: 4
exp(-0.77477 + 0.13196 * 0)/(1 + exp(-0.77477 + 0.13196 * 0)) # Not Odenton
## [1] 0.3154
exp(-0.77477 + 0.13196 * 1)/(1 + exp(-0.77477 + 0.13196 * 1)) # Odenton
## [1] 0.3446
exp(cbind(OR = coef(m0_locO), confint(m0_locO)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4608 0.3880 0.5453
## gard$Odenton 1.1411 0.9124 1.4287
# White Marsh
m0_locW <- glm(gard$Completed ~ gard$WhiteMarsh, family = binomial("logit"))
summary(m0_locW)
##
## Call:
## glm(formula = gard$Completed ~ gard$WhiteMarsh, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.152 -0.864 -0.864 1.527 1.527
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7922 0.0611 -12.96 < 2e-16 ***
## gard$WhiteMarsh 0.7316 0.1673 4.37 1.2e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1777.2 on 1411 degrees of freedom
## AIC: 1781
##
## Number of Fisher Scoring iterations: 4
exp(-0.79219 + 0.73156 * 0)/(1 + exp(-0.79219 + 0.73156 * 0)) # Not White Marsh
## [1] 0.3117
exp(-0.79219 + 0.73156 * 1)/(1 + exp(-0.79219 + 0.73156 * 1)) # White Marsh
## [1] 0.4848
exp(cbind(OR = coef(m0_locW), confint(m0_locW)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4529 0.4014 0.510
## gard$WhiteMarsh 2.0783 1.4960 2.886
# Johns Hopkins
m0_locJ <- glm(gard$Completed ~ gard$JohnsHopkins, family = binomial("logit"))
summary(m0_locJ)
##
## Call:
## glm(formula = gard$Completed ~ gard$JohnsHopkins, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.911 -0.911 -0.911 1.469 1.757
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.664 0.058 -11.44 <2e-16 ***
## gard$JohnsHopkins -0.640 0.265 -2.41 0.016 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1789.5 on 1411 degrees of freedom
## AIC: 1794
##
## Number of Fisher Scoring iterations: 4
exp(-0.66383 - 0.64022 * 0)/(1 + exp(-0.66383 - 0.64022 * 0)) # Not White Marsh
## [1] 0.3399
exp(-0.66383 - 0.64022 * 1)/(1 + exp(-0.66383 - 0.64022 * 1)) # White Marsh
## [1] 0.2135
exp(cbind(OR = coef(m0_locJ), confint(m0_locJ)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5149 0.4592 0.5765
## gard$JohnsHopkins 0.5272 0.3054 0.8683
# Bayview
m0_locB <- glm(gard$Completed ~ gard$Bayview, family = binomial("logit"))
summary(m0_locB)
##
## Call:
## glm(formula = gard$Completed ~ gard$Bayview, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.937 -0.937 -0.781 1.438 1.634
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5949 0.0644 -9.24 <2e-16 ***
## gard$Bayview -0.4347 0.1358 -3.20 0.0014 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1785.4 on 1411 degrees of freedom
## AIC: 1789
##
## Number of Fisher Scoring iterations: 4
exp(-0.59489 - 0.43473 * 0)/(1 + exp(-0.59489 - 0.43473 * 0)) # Not Bayview
## [1] 0.3555
exp(-0.59489 - 0.43473 * 1)/(1 + exp(-0.59489 - 0.43473 * 1)) # Bayview
## [1] 0.2632
exp(cbind(OR = coef(m0_locB), confint(m0_locB)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5516 0.4858 0.6254
## gard$Bayview 0.6474 0.4945 0.8423
Location Type
## LocationType - Urban
m1_locationtype <- glm(gard$Completed ~ gard$LocationType, family = binomial("logit"))
summary(m1_locationtype)
##
## Call:
## glm(formula = gard$Completed ~ gard$LocationType, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.959 -0.959 -0.764 1.413 1.657
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5381 0.0668 -8.06 7.9e-16 ***
## gard$LocationType -0.5429 0.1273 -4.26 2.0e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1777.1 on 1411 degrees of freedom
## AIC: 1781
##
## Number of Fisher Scoring iterations: 4
plot(gard$LocationType, fitted(m1_locationtype), main = "Completion Probability by Location Type",
xlab = "Urban (1) or Suburban (0)", ylab = "P(Completion)", pch = 15)
exp(-0.5381 - 0.5429 * 0)/(1 + exp(-0.5381 - 0.5429 * 0)) # Suburban
## [1] 0.3686
exp(-0.5381 - 0.5429 * 1)/(1 + exp(-0.5381 - 0.5429 * 1)) # Urban
## [1] 0.2533
exp(cbind(OR = coef(m1_locationtype), confint(m1_locationtype)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5839 0.5118 0.6651
## gard$LocationType 0.5811 0.4516 0.7441
## LocationType - Suburban
m0_locationtype <- glm(gard$Completed ~ gard$urban, family = binomial("logit"))
summary(m0_locationtype)
##
## Call:
## glm(formula = gard$Completed ~ gard$urban, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.959 -0.959 -0.764 1.413 1.657
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.081 0.108 -9.97 <2e-16 ***
## gard$urban 0.543 0.127 4.26 2e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1777.1 on 1411 degrees of freedom
## AIC: 1781
##
## Number of Fisher Scoring iterations: 4
exp(-1.0809 + 0.5429 * 0)/(1 + exp(-1.0809 + 0.5429 * 0)) # Urban
## [1] 0.2533
exp(-1.0809 + 0.5429 * 1)/(1 + exp(-1.0809 + 0.5429 * 1)) # Suburban
## [1] 0.3687
exp(cbind(OR = coef(m0_locationtype), confint(m0_locationtype)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.3393 0.2733 0.4181
## gard$urban 1.7209 1.3440 2.2146
Practice Type
## Practice Type -- ALL
m1_practice <- glm(gard$Completed ~ gard$obgyn + gard$family, family = binomial("logit"))
summary(m1_practice)
##
## Call:
## glm(formula = gard$Completed ~ gard$obgyn + gard$family, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.973 -0.869 -0.828 1.397 1.573
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7789 0.0949 -8.21 2.3e-16 ***
## gard$obgyn 0.2770 0.1304 2.13 0.034 *
## gard$family -0.1145 0.1493 -0.77 0.443
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1787.6 on 1410 degrees of freedom
## AIC: 1794
##
## Number of Fisher Scoring iterations: 4
plot(gard$PracticeType, fitted(m1_practice), main = " Completion Probability by Practice Type",
xlab = "Practice Type", ylab = "P(Completion)", pch = 15)
exp(-0.7789 + 0.277 * 1 - 0.1145 * 0)/(1 + exp(-0.7789 + 0.277 * 1 - 0.1145 *
0)) # For OB-GYN
## [1] 0.3771
exp(-0.7789 + 0.277 * 0 - 0.1145 * 1)/(1 + exp(-0.7789 + 0.277 * 0 - 0.1145 *
1)) # For Family
## [1] 0.2904
exp(-0.7789 + 0.277 * 0 - 0.1145 * 0)/(1 + exp(-0.7789 + 0.277 * 0 - 0.1145 *
0)) # For Pediatric
## [1] 0.3146
exp(cbind(OR = coef(m1_practice), confint(m1_practice)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4589 0.3802 0.5516
## gard$obgyn 1.3192 1.0222 1.7044
## gard$family 0.8918 0.6645 1.1938
# OB-GYN
m0_pracO <- glm(gard$Completed ~ gard$obgyn, family = binomial("logit"))
summary(m0_pracO)
##
## Call:
## glm(formula = gard$Completed ~ gard$obgyn, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.973 -0.852 -0.852 1.397 1.542
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8257 0.0732 -11.3 <2e-16 ***
## gard$obgyn 0.3239 0.1156 2.8 0.0051 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1788.2 on 1411 degrees of freedom
## AIC: 1792
##
## Number of Fisher Scoring iterations: 4
exp(-0.82575 + 0.32392 * 0)/(1 + exp(-0.82575 + 0.32392 * 0)) # Not OB-GYN
## [1] 0.3045
exp(-0.82575 + 0.32392 * 1)/(1 + exp(-0.82575 + 0.32392 * 1)) # OB-GYN
## [1] 0.3771
exp(cbind(OR = coef(m0_pracO), confint(m0_pracO)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4379 0.3788 0.5049
## gard$obgyn 1.3825 1.1020 1.7337
# Family
m0_pracF <- glm(gard$Completed ~ gard$family, family = binomial("logit"))
summary(m0_pracF)
##
## Call:
## glm(formula = gard$Completed ~ gard$family, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.922 -0.922 -0.828 1.456 1.573
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6350 0.0649 -9.78 <2e-16 ***
## gard$family -0.2584 0.1323 -1.95 0.051 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1792.1 on 1411 degrees of freedom
## AIC: 1796
##
## Number of Fisher Scoring iterations: 4
exp(-0.63502 - 0.25837 * 0)/(1 + exp(-0.63502 - 0.25837 * 0)) # Not Family
## [1] 0.3464
exp(-0.63502 - 0.25837 * 1)/(1 + exp(-0.63502 - 0.25837 * 1)) # Family
## [1] 0.2904
exp(cbind(OR = coef(m0_pracF), confint(m0_pracF)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5299 0.4662 0.6014
## gard$family 0.7723 0.5943 0.9987
# Pediatric
m0_pracP <- glm(gard$Completed ~ gard$pediatric, family = binomial("logit"))
summary(m0_pracP)
##
## Call:
## glm(formula = gard$Completed ~ gard$pediatric, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.915 -0.915 -0.869 1.465 1.521
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6550 0.0704 -9.31 <2e-16 ***
## gard$pediatric -0.1239 0.1181 -1.05 0.29
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1794.9 on 1411 degrees of freedom
## AIC: 1799
##
## Number of Fisher Scoring iterations: 4
exp(-0.65497 - 0.1239 * 0)/(1 + exp(-0.65497 - 0.1239 * 0)) # Not Pediatric
## [1] 0.3419
exp(-0.65497 - 0.1239 * 1)/(1 + exp(-0.65497 - 0.1239 * 1)) # Pediatric
## [1] 0.3146
exp(cbind(OR = coef(m0_pracP), confint(m0_pracP)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5195 0.4521 0.5957
## gard$pediatric 0.8835 0.7001 1.1126
Insurance Type
## Insurance Type - ALL
m1_insurance <- glm(gard$Completed ~ gard$assisted + gard$private + gard$hospital,
family = binomial("logit"))
summary(m1_insurance)
##
## Call:
## glm(formula = gard$Completed ~ gard$assisted + gard$private +
## gard$hospital, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.117 -0.928 -0.928 1.413 1.794
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.538 0.114 -4.72 2.3e-06 ***
## gard$assisted -0.848 0.189 -4.49 7.2e-06 ***
## gard$private -0.081 0.138 -0.59 0.56
## gard$hospital 0.395 0.247 1.60 0.11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1763.1 on 1409 degrees of freedom
## AIC: 1771
##
## Number of Fisher Scoring iterations: 4
plot(gard$InsuranceType, fitted(m1_insurance), main = "Completion Probability by Insurance Type",
xlab = "Insurance Type", ylab = "P(Completion)", pch = 15)
exp(-0.53831 - 0.84798 * 1 - 0.08103 * 0 + 0.39521 * 0)/(1 + exp(-0.53831 -
0.84798 * 1 - 0.08103 * 0 + 0.39521 * 0)) # For Assisted
## [1] 0.2
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 1 + 0.39521 * 0)/(1 + exp(-0.53831 -
0.84798 * 0 - 0.08103 * 1 + 0.39521 * 0)) # For Private
## [1] 0.3499
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 0 + 0.39521 * 1)/(1 + exp(-0.53831 -
0.84798 * 0 - 0.08103 * 0 + 0.39521 * 1)) # For Hospital
## [1] 0.4643
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 0 + 0.39521 * 0)/(1 + exp(-0.53831 -
0.84798 * 0 - 0.08103 * 0 + 0.39521 * 0)) # For Military
## [1] 0.3686
exp(cbind(OR = coef(m1_insurance), confint(m1_insurance)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5837 0.4658 0.7284
## gard$assisted 0.4283 0.2942 0.6176
## gard$private 0.9222 0.7042 1.2103
## gard$hospital 1.4847 0.9133 2.4075
# Military
m0_insm <- glm(gard$Completed ~ gard$military, family = binomial("logit"))
summary(m0_insm)
##
## Call:
## glm(formula = gard$Completed ~ gard$military, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.959 -0.879 -0.879 1.413 1.508
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7505 0.0651 -11.52 <2e-16 ***
## gard$military 0.2122 0.1312 1.62 0.11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1793.4 on 1411 degrees of freedom
## AIC: 1797
##
## Number of Fisher Scoring iterations: 4
exp(-0.75055 + 0.21223 * 0)/(1 + exp(-0.75055 + 0.21223 * 0)) # Not Military
## [1] 0.3207
exp(-0.75055 + 0.21223 * 1)/(1 + exp(-0.75055 + 0.21223 * 1)) # Military
## [1] 0.3686
exp(cbind(OR = coef(m0_insm), confint(m0_insm)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4721 0.4151 0.5359
## gard$military 1.2364 0.9545 1.5971
# Private
m0_insp <- glm(gard$Completed ~ gard$private, family = binomial("logit"))
summary(m0_insp)
##
## Call:
## glm(formula = gard$Completed ~ gard$private, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.928 -0.928 -0.867 1.449 1.524
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7859 0.0821 -9.57 <2e-16 ***
## gard$private 0.1666 0.1132 1.47 0.14
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1793.8 on 1411 degrees of freedom
## AIC: 1798
##
## Number of Fisher Scoring iterations: 4
exp(-0.78593 + 0.16659 * 0)/(1 + exp(-0.78593 + 0.16659 * 0)) # Not Private
## [1] 0.313
exp(-0.78593 + 0.16659 * 1)/(1 + exp(-0.78593 + 0.16659 * 1)) # Private
## [1] 0.3499
exp(cbind(OR = coef(m0_insp), confint(m0_insp)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4557 0.3873 0.5344
## gard$private 1.1813 0.9464 1.4753
# Hospital
m0_insh <- glm(gard$Completed ~ gard$hospital, family = binomial("logit"))
summary(m0_insh)
##
## Call:
## glm(formula = gard$Completed ~ gard$hospital, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.117 -0.884 -0.884 1.502 1.502
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7375 0.0586 -12.58 <2e-16 ***
## gard$hospital 0.5944 0.2265 2.62 0.0087 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1789.3 on 1411 degrees of freedom
## AIC: 1793
##
## Number of Fisher Scoring iterations: 4
exp(-0.7375 + 0.5944 * 0)/(1 + exp(-0.7375 + 0.5944 * 0)) # Not Hospital
## [1] 0.3236
exp(-0.7375 + 0.5944 * 1)/(1 + exp(-0.7375 + 0.5944 * 1)) # Hospital
## [1] 0.4643
exp(cbind(OR = coef(m0_insh), confint(m0_insh)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4783 0.426 0.5362
## gard$hospital 1.8119 1.158 2.8230
# Assisted
m0_insa <- glm(gard$Completed ~ gard$assisted, family = binomial("logit"))
summary(m0_insa)
##
## Call:
## glm(formula = gard$Completed ~ gard$assisted, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.951 -0.951 -0.951 1.422 1.794
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5589 0.0616 -9.07 < 2e-16 ***
## gard$assisted -0.8274 0.1629 -5.08 3.8e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1767.3 on 1411 degrees of freedom
## AIC: 1771
##
## Number of Fisher Scoring iterations: 4
exp(-0.65497 - 0.1239 * 0)/(1 + exp(-0.65497 - 0.1239 * 0)) # Not Assisted
## [1] 0.3419
exp(-0.65497 - 0.1239 * 1)/(1 + exp(-0.65497 - 0.1239 * 1)) # Assisted
## [1] 0.3146
exp(cbind(OR = coef(m0_insa), confint(m0_insa)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5718 0.5064 0.6448
## gard$assisted 0.4372 0.3152 0.5975
Labeling Technique:
Use m#_data – m for model, # for the number of variables, and data for the independent characteristic being used.
Hypothesis Tests: Applicable for all models in this section.
Null Hypothesis (Deviance Test): The model does not explain variation in completion probability.
Alt Hypothesis (Deviance Test): The model explains some of the variation in completion probability.
For each coefficient, use Wald's Test:
Null Hypothesis (Wald's Test): The coefficient does not explain variation in completion probability.
Alt Hypothesis (Wald's Test): The coefficient explains some of the variation in completion probability.
Logistic Regression Models By Category
## Age Group
m1_agegrp <- glm(gard$Completed ~ gard$AgeGroup, family = binomial("logit"))
summary(m1_agegrp) # AIC = 1797.4, Residual deviance = 1793.4
##
## Call:
## glm(formula = gard$Completed ~ gard$AgeGroup, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.932 -0.932 -0.865 1.444 1.527
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6087 0.0791 -7.70 1.4e-14 ***
## gard$AgeGroup -0.1830 0.1131 -1.62 0.11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1793.4 on 1411 degrees of freedom
## AIC: 1797
##
## Number of Fisher Scoring iterations: 4
plot(gard$AgeGroup, fitted(m1_agegrp), main = "Completion Probability by Age Group",
xlab = "Age Group", ylab = "P(Completion)", pch = 15)
exp(-0.60871 - 0.18302 * 0)/(1 + exp(-0.60871 - 0.18302 * 0)) # For ages 11-17
## [1] 0.3524
exp(-0.60871 - 0.18302 * 1)/(1 + exp(-0.60871 - 0.18302 * 1)) # For ages 18-26
## [1] 0.3118
exp(cbind(OR = coef(m1_agegrp), confint(m1_agegrp)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5441 0.4654 0.6345
## gard$AgeGroup 0.8328 0.6669 1.0393
## Age
m1_age <- glm(gard$Completed ~ gard$Age, family = binomial("logit"))
summary(m1_age) # AIC = 1797.4, Residual deviance = 1793.4
##
## Call:
## glm(formula = gard$Completed ~ gard$Age, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.954 -0.909 -0.873 1.454 1.552
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3341 0.2533 -1.32 0.19
## gard$Age -0.0198 0.0134 -1.47 0.14
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1793.8 on 1411 degrees of freedom
## AIC: 1798
##
## Number of Fisher Scoring iterations: 4
plot(gard$Age, fitted(m1_age), main = "Completion Probaiblity by Age", xlab = "Age (yrs)",
ylab = "P(Completion)", pch = 15)
# exp(-0.60871 - 0.18302*0) / (1 + exp(-0.60871 - 0.18302*0)) # For ages
# 11-17 exp(-0.60871 - 0.18302*1) / (1 + exp(-0.60871 - 0.18302*1)) # For
# ages 18-26
exp(cbind(OR = coef(m1_age), confint(m1_age)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.7160 0.4353 1.176
## gard$Age 0.9804 0.9549 1.006
## Race
m1_race <- glm(gard$Completed ~ gard$white + gard$black + gard$hispanic, family = binomial("logit"))
summary(m1_race) # AIC = 1776, Residual deviance = 1768
##
## Call:
## glm(formula = gard$Completed ~ gard$white + gard$black + gard$hispanic,
## family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.982 -0.982 -0.736 1.386 1.697
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5744 0.1527 -3.76 0.00017 ***
## gard$white 0.0955 0.1706 0.56 0.57552
## gard$black -0.5947 0.1892 -3.14 0.00168 **
## gard$hispanic -0.1477 0.3328 -0.44 0.65712
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1768 on 1409 degrees of freedom
## AIC: 1776
##
## Number of Fisher Scoring iterations: 4
plot(gard$Race, fitted(m1_race), main = "Completion Probability by Race", xlab = "Race",
ylab = "P(Completion)", pch = 15)
exp(-0.57443 + 0.09554 * 1 - 0.59465 * 0 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 *
1 - 0.59465 * 0 - 0.1477 * 0)) # For White
## [1] 0.3825
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 1 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 *
0 - 0.59465 * 1 - 0.1477 * 0)) # For Black
## [1] 0.237
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 0 - 0.1477 * 1)/(1 + exp(-0.57443 + 0.09554 *
0 - 0.59465 * 0 - 0.1477 * 1)) # For Hispanic
## [1] 0.3269
exp(-0.57443 + 0.09554 * 0 - 0.59465 * 0 - 0.1477 * 0)/(1 + exp(-0.57443 + 0.09554 *
0 - 0.59465 * 0 - 0.1477 * 0)) # For OtherRace
## [1] 0.3602
exp(cbind(OR = coef(m1_race), confint(m1_race)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5630 0.4154 0.7568
## gard$white 1.1003 0.7897 1.5428
## gard$black 0.5518 0.3811 0.8009
## gard$hispanic 0.8627 0.4414 1.6375
## Insurance Type
m1_insurance <- glm(gard$Completed ~ gard$military + gard$private + gard$hospital,
family = binomial("logit"))
summary(m1_insurance) # AIC: 1771.1, Residual deviance: 1763.1
##
## Call:
## glm(formula = gard$Completed ~ gard$military + gard$private +
## gard$hospital, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.117 -0.928 -0.928 1.413 1.794
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.386 0.151 -9.20 < 2e-16 ***
## gard$military 0.848 0.189 4.49 7.2e-06 ***
## gard$private 0.767 0.170 4.52 6.2e-06 ***
## gard$hospital 1.243 0.266 4.68 2.9e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1763.1 on 1409 degrees of freedom
## AIC: 1771
##
## Number of Fisher Scoring iterations: 4
plot(gard$InsuranceType, fitted(m1_insurance), main = "Completion Probability by Insurance Type",
xlab = "Insurance Type", ylab = "P(Completion)", pch = 15)
exp(-0.53831 - 0.84798 * 1 - 0.08103 * 0 + 0.39521 * 0)/(1 + exp(-0.53831 -
0.84798 * 1 - 0.08103 * 0 + 0.39521 * 0)) # For Assisted
## [1] 0.2
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 1 + 0.39521 * 0)/(1 + exp(-0.53831 -
0.84798 * 0 - 0.08103 * 1 + 0.39521 * 0)) # For Private
## [1] 0.3499
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 0 + 0.39521 * 1)/(1 + exp(-0.53831 -
0.84798 * 0 - 0.08103 * 0 + 0.39521 * 1)) # For Hospital
## [1] 0.4643
exp(-0.53831 - 0.84798 * 0 - 0.08103 * 0 + 0.39521 * 0)/(1 + exp(-0.53831 -
0.84798 * 0 - 0.08103 * 0 + 0.39521 * 0)) # For Military
## [1] 0.3686
exp(cbind(OR = coef(m1_insurance), confint(m1_insurance)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.250 0.1843 0.3332
## gard$military 2.335 1.6191 3.3996
## gard$private 2.153 1.5534 3.0247
## gard$hospital 3.467 2.0598 5.8496
## Med Assisted
m1_medassist <- glm(gard$Completed ~ gard$MedAssist, family = binomial("logit"))
summary(m1_medassist) # AIC: 1767.3, Residual deviance: 1771.3
##
## Call:
## glm(formula = gard$Completed ~ gard$MedAssist, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.951 -0.951 -0.951 1.422 1.794
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5589 0.0616 -9.07 < 2e-16 ***
## gard$MedAssist -0.8274 0.1629 -5.08 3.8e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1767.3 on 1411 degrees of freedom
## AIC: 1771
##
## Number of Fisher Scoring iterations: 4
plot(gard$MedAssist, fitted(glm(gard$Completed ~ gard$MedAssist, binomial)),
main = "Completion Probability by Assistance", xlab = "Medical Assistance",
ylab = "P(Completion)", pch = 15)
exp(-0.55893 - 0.82737 * 0)/(1 + exp(-0.55893 - 0.82737 * 0)) # No Med Assist
## [1] 0.3638
exp(-0.55893 - 0.82737 * 1)/(1 + exp(-0.55893 - 0.82737 * 1)) # Yes Med Assist
## [1] 0.2
exp(cbind(OR = coef(m1_medassist), confint(m1_medassist)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5718 0.5064 0.6448
## gard$MedAssist 0.4372 0.3152 0.5975
## LocationType
m1_locationtype <- glm(gard$Completed ~ gard$LocationType, family = binomial("logit"))
summary(m1_locationtype) # AIC: 1777.1, Residual deviance: 1781.1
##
## Call:
## glm(formula = gard$Completed ~ gard$LocationType, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.959 -0.959 -0.764 1.413 1.657
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5381 0.0668 -8.06 7.9e-16 ***
## gard$LocationType -0.5429 0.1273 -4.26 2.0e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1777.1 on 1411 degrees of freedom
## AIC: 1781
##
## Number of Fisher Scoring iterations: 4
plot(gard$LocationType, fitted(glm(gard$Completed ~ gard$LocationType, binomial)),
main = "Completion Probability by Location Type", xlab = "Urban (1) or Suburban (0)",
ylab = "P(Completion)", pch = 15)
exp(-0.5381 - 0.5429 * 0)/(1 + exp(-0.5381 - 0.5429 * 0)) # Suburban
## [1] 0.3686
exp(-0.5381 - 0.5429 * 1)/(1 + exp(-0.5381 - 0.5429 * 1)) # Urban
## [1] 0.2533
exp(cbind(OR = coef(m1_locationtype), confint(m1_locationtype)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5839 0.5118 0.6651
## gard$LocationType 0.5811 0.4516 0.7441
## Location
m1_location <- glm(gard$Completed ~ gard$Odenton + gard$WhiteMarsh + gard$JohnsHopkins,
family = binomial("logit"))
summary(m1_location) # AIC: 1772.9, Residual deviance: 1764.9
##
## Call:
## glm(formula = gard$Completed ~ gard$Odenton + gard$WhiteMarsh +
## gard$JohnsHopkins, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.152 -0.919 -0.781 1.460 1.757
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.030 0.120 -8.61 <2e-16 ***
## gard$Odenton 0.387 0.141 2.75 0.006 **
## gard$WhiteMarsh 0.969 0.196 4.94 8e-07 ***
## gard$JohnsHopkins -0.274 0.285 -0.96 0.336
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1764.9 on 1409 degrees of freedom
## AIC: 1773
##
## Number of Fisher Scoring iterations: 4
plot(gard$Location, fitted(glm(gard$Completed ~ gard$Location, binomial)), main = "Completion Probability by Location",
xlab = "Location", ylab = "P(Completion)", pch = 15)
exp(-1.0296 + 0.3868 * 1 + 0.969 * 0 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 *
1 + 0.969 * 0 - 0.2744 * 0)) # For Odenton
## [1] 0.3446
exp(-1.0296 + 0.3868 * 0 + 0.969 * 1 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 *
0 + 0.969 * 1 - 0.2744 * 0)) # For White Marsh
## [1] 0.4849
exp(-1.0296 + 0.3868 * 0 + 0.969 * 0 - 0.2744 * 1)/(1 + exp(-1.0296 + 0.3868 *
0 + 0.969 * 0 - 0.2744 * 1)) # For Johns Hopkins
## [1] 0.2135
exp(-1.0296 + 0.3868 * 0 + 0.969 * 0 - 0.2744 * 0)/(1 + exp(-1.0296 + 0.3868 *
0 + 0.969 * 0 - 0.2744 * 0)) # For Bayview
## [1] 0.2632
exp(cbind(OR = coef(m1_location), confint(m1_location)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.3571 0.2812 0.4496
## gard$Odenton 1.4723 1.1200 1.9462
## gard$WhiteMarsh 2.6353 1.7951 3.8788
## gard$JohnsHopkins 0.7600 0.4251 1.3062
## Practice Type
m1_practice <- glm(gard$Completed ~ gard$obgyn + gard$family, family = binomial("logit"))
summary(m1_practice) # AIC: 1793.6, Residual deviance: 1787.6
##
## Call:
## glm(formula = gard$Completed ~ gard$obgyn + gard$family, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.973 -0.869 -0.828 1.397 1.573
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7789 0.0949 -8.21 2.3e-16 ***
## gard$obgyn 0.2770 0.1304 2.13 0.034 *
## gard$family -0.1145 0.1493 -0.77 0.443
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1787.6 on 1410 degrees of freedom
## AIC: 1794
##
## Number of Fisher Scoring iterations: 4
plot(gard$PracticeType, fitted(glm(gard$Completed ~ gard$PracticeType, binomial)),
main = "Completion Probability by Practice Type", xlab = "Practice Type",
ylab = "P(Completion)", pch = 15)
exp(-0.7789 + 0.277 * 1 - 0.1145 * 0)/(1 + exp(-0.7789 + 0.277 * 1 - 0.1145 *
0)) # For OB-GYN
## [1] 0.3771
exp(-0.7789 + 0.277 * 0 - 0.1145 * 1)/(1 + exp(-0.7789 + 0.277 * 0 - 0.1145 *
1)) # For Family
## [1] 0.2904
exp(-0.7789 + 0.277 * 0 - 0.1145 * 0)/(1 + exp(-0.7789 + 0.277 * 0 - 0.1145 *
0)) # For Pediatric
## [1] 0.3146
exp(cbind(OR = coef(m1_practice), confint(m1_practice)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4589 0.3802 0.5516
## gard$obgyn 1.3192 1.0222 1.7044
## gard$family 0.8918 0.6645 1.1938
The best model from the above options uses insurance type. Nest, check to see if the full model is really better than all possible reduced models.
Residuals of Groups Logistic Regression Models
## Age Group
plot(m1_agegrp$residuals ~ m1_agegrp$fitted.values, main = "Residuals for Age Group",
xlab = "Age Group", ylab = "Residuals")
summary(m1_agegrp$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.54 -1.54 -1.45 0.00 2.84 3.21
## Location
plot(m1_location$residuals ~ m1_location$fitted.values, main = "Residuals for Location",
xlab = "Location", ylab = "Residuals")
summary(m1_location$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.94 -1.53 -1.36 0.00 2.90 4.68
# Mean = 0.000 for all categorical x variables except age, which is the
# only continuous x variable.
summary(m1_agegrp$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.54 -1.54 -1.45 0.00 2.84 3.21
summary(m1_age$residuals) # Mean = 0.000179
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.58 -1.51 -1.46 0.00 2.88 3.33
summary(m1_insurance$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.87 -1.54 -1.54 0.00 2.71 5.00
summary(m1_location$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.94 -1.53 -1.36 0.00 2.90 4.68
summary(m1_locationtype$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.58 -1.58 -1.34 0.00 2.71 3.95
summary(m1_medassist$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.57 -1.57 -1.57 0.00 2.75 5.00
summary(m1_practice$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.61 -1.46 -1.41 0.00 2.65 3.44
summary(m1_race$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.62 -1.62 -1.31 0.00 2.61 4.22
Based on the above findings, residuals are not a useful tool for evaluating these models.
Alternative Multiple Logistic Regression Models Use Any Significant Binary
# Anything Significant - The idea here is to include everything variable
# that had a significant coefficient.
# Subsetting Locations
Odenton <- gard[gard$Odenton == 1, ]
WhiteMarsh <- gard[gard$WhiteMarsh == 1, ]
JohnsHopkins <- gard[gard$JohnsHopkins == 1, ]
Bayview <- gard[gard$Bayview == 1, ]
summary(Odenton$LocationType) # All Suburban
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 0 0 0
summary(WhiteMarsh$LocationType) # All Suburban
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 0 0 0
summary(JohnsHopkins$LocationType) # All Urban
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 1 1 1 1
summary(Bayview$LocationType) # All Urban
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 1 1 1 1
# Don't include both urban and suburban because location type is a binary
# characteristic. Since Odenton and WhiteMarsh are both suburban and
# Johns Hopkins and Bayview are both urban, including LocationType makes
# including both JohnsHopkins and Bayview redundant. These have also been
# omitted. WhiteMarsh is included because it is significant, whereas
# Odenton is not.
# Deviance Test Null Hypothesis: This model predicts the probability that
# an inidividual completed the Gardasil regimen no better than the overall
# (average) probability. Wald's Test: For each independent variable,
# there is also the null hypothesis that the coefficient for that variable
# = 0.
m8_allsigs <- glm(Completed ~ white + black + LocationType + WhiteMarsh + obgyn +
family + assisted + hospital, data = gard, family = binomial("logit"))
summary(m8_allsigs) # AIC = 1743, Residual deviance = 1725
##
## Call:
## glm(formula = Completed ~ white + black + LocationType + WhiteMarsh +
## obgyn + family + assisted + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.384 -0.890 -0.738 1.269 1.983
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2301 0.1747 -1.32 0.1878
## white 0.0173 0.1616 0.11 0.9146
## black -0.4919 0.1810 -2.72 0.0066 **
## LocationType -0.4205 0.1770 -2.38 0.0175 *
## WhiteMarsh 0.3301 0.2055 1.61 0.1083
## obgyn -0.1623 0.1551 -1.05 0.2955
## family -0.5449 0.1718 -3.17 0.0015 **
## assisted -0.5096 0.2057 -2.48 0.0132 *
## hospital 0.5188 0.2466 2.10 0.0354 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1725 on 1404 degrees of freedom
## AIC: 1743
##
## Number of Fisher Scoring iterations: 4
# Does adding the next independent variable improve the model based on
# Deviance Test? (Null = No difference in deviance between reduced and
# full models.)
anova(m8_allsigs, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## white 1 17.62 1411 1778 2.7e-05 ***
## black 1 10.17 1410 1768 0.00142 **
## LocationType 1 12.34 1409 1756 0.00044 ***
## WhiteMarsh 1 8.20 1408 1748 0.00420 **
## obgyn 1 0.91 1407 1747 0.33915
## family 1 7.89 1406 1739 0.00496 **
## assisted 1 9.49 1405 1729 0.00207 **
## hospital 1 4.38 1404 1725 0.03630 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# This shows me that some variables with non-significant coefficients are
# still useful. obgyn is still insignificant, so investigate it for
# redundnacies:
obgyn <- gard[gard$obgyn == 1, ]
hist(obgyn$LocationType, main = "OBGYN Location Type Count", xlab = "Location Type",
ylab = "Frequency") # Both urban and suburban represented, but mostly suburban.
hist(obgyn$Location, main = "OBGYN Location Count", xlab = "Location", ylab = "Frequency") # All locations present.
hist(obgyn$InsuranceType, main = "OBGYN Insurance Type Count", xlab = "Insurance Type",
ylab = "Frequency") # All insurance types, but mostly #1 (private).
hist(obgyn$Race, main = "OBGYN Race Count", xlab = "Race", ylab = "Frequency") # All races respresented, well mixed, but more white than other races.
# No definite redundancies, but obgyn and suburban may be drawing from
# similar populations. Will try eliminating it.
m7_allsigs <- glm(Completed ~ white + black + LocationType + WhiteMarsh + family +
assisted + hospital, data = gard, family = binomial("logit"))
summary(m7_allsigs) # AIC = 1742.2; Residual deviance = 1726.1
##
## Call:
## glm(formula = Completed ~ white + black + LocationType + WhiteMarsh +
## family + assisted + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.382 -0.892 -0.721 1.307 1.949
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.30563 0.15921 -1.92 0.0549 .
## white 0.00692 0.16130 0.04 0.9658
## black -0.49974 0.18089 -2.76 0.0057 **
## LocationType -0.41001 0.17665 -2.32 0.0203 *
## WhiteMarsh 0.25247 0.19141 1.32 0.1872
## family -0.46318 0.15328 -3.02 0.0025 **
## assisted -0.46878 0.20192 -2.32 0.0203 *
## hospital 0.51436 0.24625 2.09 0.0367 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1726.1 on 1405 degrees of freedom
## AIC: 1742
##
## Number of Fisher Scoring iterations: 4
# When reduced model has a lower AIC, eliminate the full model.
anova(m7_allsigs, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## white 1 17.62 1411 1778 2.7e-05 ***
## black 1 10.17 1410 1768 0.00142 **
## LocationType 1 12.34 1409 1756 0.00044 ***
## WhiteMarsh 1 8.20 1408 1748 0.00420 **
## family 1 8.64 1407 1739 0.00329 **
## assisted 1 8.62 1406 1730 0.00332 **
## hospital 1 4.32 1405 1726 0.03770 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables are significant in the anova, but white is essentially 0
# using Wald's test.
m6_allsigs <- glm(Completed ~ black + LocationType + WhiteMarsh + family + assisted +
hospital, data = gard, family = binomial("logit"))
summary(m6_allsigs) # AIC = 1740.1; Residual deviance = 1726.1
##
## Call:
## glm(formula = Completed ~ black + LocationType + WhiteMarsh +
## family + assisted + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.381 -0.894 -0.721 1.307 1.950
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.300 0.105 -2.87 0.00404 **
## black -0.505 0.135 -3.74 0.00018 ***
## LocationType -0.410 0.176 -2.32 0.02019 *
## WhiteMarsh 0.254 0.189 1.34 0.18021
## family -0.464 0.153 -3.03 0.00241 **
## assisted -0.469 0.202 -2.33 0.01993 *
## hospital 0.514 0.246 2.09 0.03676 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1726.1 on 1406 degrees of freedom
## AIC: 1740
##
## Number of Fisher Scoring iterations: 4
# When reduced model has a lower AIC, eliminate the full model.
anova(m6_allsigs, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.0005 ***
## WhiteMarsh 1 8.93 1409 1748 0.0028 **
## family 1 8.77 1408 1739 0.0031 **
## assisted 1 8.65 1407 1730 0.0033 **
## hospital 1 4.32 1406 1726 0.0377 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# WhiteMarsh has the only non-significant co-efficient left.
m5_allsigs <- glm(Completed ~ black + LocationType + family + assisted + hospital,
data = gard, family = binomial("logit"))
summary(m5_allsigs) # AIC = 1739.9; Residual deviance = 1727.9
##
## Call:
## glm(formula = Completed ~ black + LocationType + family + assisted +
## hospital, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.326 -0.892 -0.724 1.276 1.950
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2289 0.0894 -2.56 0.01048 *
## black -0.5191 0.1345 -3.86 0.00011 ***
## LocationType -0.4886 0.1660 -2.94 0.00324 **
## family -0.5339 0.1433 -3.73 0.00019 ***
## assisted -0.4585 0.2014 -2.28 0.02283 *
## hospital 0.5714 0.2415 2.37 0.01799 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1727.9 on 1407 degrees of freedom
## AIC: 1740
##
## Number of Fisher Scoring iterations: 4
# When reduced model has a lower AIC, eliminate the full model.
anova(m5_allsigs, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## family 1 14.66 1409 1742 0.00013 ***
## assisted 1 8.69 1408 1733 0.00321 **
## hospital 1 5.53 1407 1728 0.01864 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# I cannot tell which would be better to remove; hopstial or assisted.
# Try each.
m4A_allsigs <- glm(Completed ~ black + LocationType + family + assisted, data = gard,
family = binomial("logit"))
summary(m4A_allsigs) # AIC = 1743.4; Residual deviance = 1733.4
##
## Call:
## glm(formula = Completed ~ black + LocationType + family + assisted,
## family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.094 -0.893 -0.737 1.263 1.989
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1986 0.0884 -2.25 0.02460 *
## black -0.5139 0.1342 -3.83 0.00013 ***
## LocationType -0.3970 0.1605 -2.47 0.01336 *
## family -0.5480 0.1430 -3.83 0.00013 ***
## assisted -0.5685 0.1956 -2.91 0.00366 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1733.4 on 1408 degrees of freedom
## AIC: 1743
##
## Number of Fisher Scoring iterations: 4
anova(m4A_allsigs, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## family 1 14.66 1409 1742 0.00013 ***
## assisted 1 8.69 1408 1733 0.00321 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m4B_allsigs <- glm(Completed ~ black + LocationType + family + hospital, data = gard,
family = binomial("logit"))
summary(m4B_allsigs) # AIC = 1743.2; Residual deviance = 1733.2
##
## Call:
## glm(formula = Completed ~ black + LocationType + family + hospital,
## family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.382 -0.873 -0.813 1.282 1.846
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2430 0.0892 -2.72 0.00647 **
## black -0.5644 0.1330 -4.24 2.2e-05 ***
## LocationType -0.6950 0.1412 -4.92 8.5e-07 ***
## family -0.5253 0.1432 -3.67 0.00024 ***
## hospital 0.7127 0.2358 3.02 0.00250 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1733.2 on 1408 degrees of freedom
## AIC: 1743
##
## Number of Fisher Scoring iterations: 4
anova(m4B_allsigs, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## family 1 14.66 1409 1742 0.00013 ***
## hospital 1 8.94 1408 1733 0.00279 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Using Chi-squared test with better m4 model in comparison to m5 model.
anova(m5_allsigs, m4B_allsigs, test = "Chisq") # Null rejected, keep the m5 model.
## Analysis of Deviance Table
##
## Model 1: Completed ~ black + LocationType + family + assisted + hospital
## Model 2: Completed ~ black + LocationType + family + hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1407 1728
## 2 1408 1733 -1 -5.28 0.022 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Use Only Positive Significant Binaries
# I'm using urban instead of LocationType because urban has suburban = 1,
# and suburban is the group for location type characteristics that has an
# odds ratio over 1.
# Deviance Test Null Hypothesis: This model better predicts the
# probability that an inidividual completed the Gardasil regimen than the
# overall (average) probability. Wald's Test: For each independent
# variable, there is also the null hypothesis that the coefficient for
# that variable = 0.
m5_possigs <- glm(Completed ~ white + urban + WhiteMarsh + obgyn + hospital,
data = gard, family = binomial("logit"))
summary(m5_possigs) # AIC = 1759.9, Residual deviance = 1747.9
##
## Call:
## glm(formula = Completed ~ white + urban + WhiteMarsh + obgyn +
## hospital, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.422 -0.973 -0.795 1.355 1.783
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.3608 0.1341 -10.15 < 2e-16 ***
## white 0.3697 0.1185 3.12 0.00181 **
## urban 0.4893 0.1357 3.61 0.00031 ***
## WhiteMarsh 0.3284 0.2022 1.62 0.10438
## obgyn 0.0943 0.1353 0.70 0.48573
## hospital 0.6375 0.2386 2.67 0.00755 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1747.9 on 1407 degrees of freedom
## AIC: 1760
##
## Number of Fisher Scoring iterations: 4
anova(m5_possigs, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## white 1 17.62 1411 1778 2.7e-05 ***
## urban 1 15.56 1410 1763 8.0e-05 ***
## WhiteMarsh 1 7.23 1409 1756 0.0072 **
## obgyn 1 0.65 1408 1755 0.4187
## hospital 1 6.99 1407 1748 0.0082 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Again, obgyn is the only non-sigificant improvement.
m4_possigs <- glm(Completed ~ white + urban + WhiteMarsh + hospital, data = gard,
family = binomial("logit"))
summary(m4_possigs) # AIC = 1758.4, Residual deviance = 1748.4
##
## Call:
## glm(formula = Completed ~ white + urban + WhiteMarsh + hospital,
## family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.425 -0.984 -0.807 1.383 1.770
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.333 0.128 -10.44 < 2e-16 ***
## white 0.376 0.118 3.19 0.00143 **
## urban 0.483 0.135 3.57 0.00035 ***
## WhiteMarsh 0.394 0.179 2.20 0.02814 *
## hospital 0.645 0.239 2.70 0.00684 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1748.4 on 1408 degrees of freedom
## AIC: 1758
##
## Number of Fisher Scoring iterations: 4
# The AIC is lower for the reduced model, so eliminating m5 model.
anova(m4_possigs, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## white 1 17.62 1411 1778 2.7e-05 ***
## urban 1 15.56 1410 1763 8.0e-05 ***
## WhiteMarsh 1 7.23 1409 1756 0.0072 **
## hospital 1 7.16 1408 1748 0.0075 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# WhiteMarsh is the weakest link in the anova despite not being the last
# variable added and has the only coefficient not significant at the 0.01
# level.
m3_possigs <- glm(Completed ~ white + urban + hospital, data = gard, family = binomial("logit"))
summary(m3_possigs) # AIC = 1761.2, Residual deviance = 1753.2
##
## Call:
## glm(formula = Completed ~ white + urban + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.328 -0.922 -0.811 1.345 1.786
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.367 0.127 -10.75 < 2e-16 ***
## white 0.423 0.116 3.65 0.00026 ***
## urban 0.559 0.131 4.27 2e-05 ***
## hospital 0.732 0.233 3.14 0.00171 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1753.2 on 1409 degrees of freedom
## AIC: 1761
##
## Number of Fisher Scoring iterations: 4
anova(m3_possigs, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## white 1 17.6 1411 1778 2.7e-05 ***
## urban 1 15.6 1410 1763 8.0e-05 ***
## hospital 1 9.6 1409 1753 0.0019 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The AIC is higher than m4, so checking difference with a Chi squared
# test.
anova(m4_possigs, m3_possigs, test = "Chisq") # Null rejected; keep m4 model.
## Analysis of Deviance Table
##
## Model 1: Completed ~ white + urban + WhiteMarsh + hospital
## Model 2: Completed ~ white + urban + hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1408 1748
## 2 1409 1753 -1 -4.78 0.029 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Use Only Negative Significant Binaries
# Deviance Test Null Hypothesis: This model better predicts the
# probability that an inidividual completed the Gardasil regimen than the
# overall (average) probability. Wald's Test: For each independent
# variable, there is also the null hypothesis that the coefficient for
# that variable = 0.
# Bayview and Johns Hopkins are not included, because they would be
# redundant with urban.
m4_negsigs <- glm(Completed ~ black + LocationType + family + assisted, data = gard,
family = binomial("logit"))
summary(m4_negsigs) # AIC = 1743.4, Residual deviance = 1733.4
##
## Call:
## glm(formula = Completed ~ black + LocationType + family + assisted,
## family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.094 -0.893 -0.737 1.263 1.989
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1986 0.0884 -2.25 0.02460 *
## black -0.5139 0.1342 -3.83 0.00013 ***
## LocationType -0.3970 0.1605 -2.47 0.01336 *
## family -0.5480 0.1430 -3.83 0.00013 ***
## assisted -0.5685 0.1956 -2.91 0.00366 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1733.4 on 1408 degrees of freedom
## AIC: 1743
##
## Number of Fisher Scoring iterations: 4
anova(m4_negsigs, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## family 1 14.66 1409 1742 0.00013 ***
## assisted 1 8.69 1408 1733 0.00321 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All values contribute to the explanatory power of the model, but
# LocationType is the only one not significant at the 0.01 level.
m3_negsigs <- glm(Completed ~ black + family + assisted, data = gard, family = binomial("logit"))
summary(m3_negsigs) # AIC = 1747.6, Residual deviance = 1739.6
##
## Call:
## glm(formula = Completed ~ black + family + assisted, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.054 -0.885 -0.757 1.306 2.097
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2970 0.0794 -3.74 0.00018 ***
## black -0.5394 0.1335 -4.04 5.4e-05 ***
## family -0.4390 0.1366 -3.21 0.00131 **
## assisted -0.8066 0.1702 -4.74 2.1e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1739.6 on 1409 degrees of freedom
## AIC: 1748
##
## Number of Fisher Scoring iterations: 4
anova(m3_negsigs, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## family 1 4.89 1410 1764 0.027 *
## assisted 1 24.38 1409 1740 7.9e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The AIC is higher than m4, so checking difference with a Chi squared
# test.
anova(m4_possigs, m3_possigs, test = "Chisq") # Null rejected; keep m4 model.
## Analysis of Deviance Table
##
## Model 1: Completed ~ white + urban + WhiteMarsh + hospital
## Model 2: Completed ~ white + urban + hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1408 1748
## 2 1409 1753 -1 -4.78 0.029 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Use One Binary per Catergory
# Deviance Test Null Hypothesis: This model better predicts the
# probability that an inidividual completed the Gardasil regimen than the
# overall (average) probability. Wald's Test: For each independent
# variable, there is also the null hypothesis that the coefficient for
# that variable = 0.
# For Race, black has the greatest difference from the mean probability.
# For LocationType and AgeGroup, there is already only 1 binary. For
# Location, Bayview and Johns Hopkins together are redundant with urban,
# so use White Marsh. For PracticeType, obgyn has not been significant in
# the past, so family is prefered. For InsuranceType, assisted and
# hospital are both good candidates. Will try both.
m6_percatA <- glm(Completed ~ black + LocationType + AgeGroup + WhiteMarsh +
family + assisted, data = gard, family = binomial("logit"))
summary(m6_percatA) # AIC = 1733.8, Residual deviance = 1719.8
##
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + WhiteMarsh +
## family + assisted, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.323 -0.954 -0.758 1.227 2.088
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.117 0.118 -0.99 0.32181
## black -0.485 0.135 -3.59 0.00033 ***
## LocationType -0.304 0.169 -1.80 0.07250 .
## AgeGroup -0.403 0.124 -3.24 0.00118 **
## WhiteMarsh 0.452 0.191 2.36 0.01805 *
## family -0.374 0.155 -2.41 0.01604 *
## assisted -0.681 0.199 -3.42 0.00063 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1719.8 on 1406 degrees of freedom
## AIC: 1734
##
## Number of Fisher Scoring iterations: 4
anova(m6_percatA, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## AgeGroup 1 7.14 1409 1750 0.00755 **
## WhiteMarsh 1 11.70 1408 1738 0.00062 ***
## family 1 6.06 1407 1732 0.01383 *
## assisted 1 12.06 1406 1720 0.00052 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.
m6_percatB <- glm(Completed ~ black + LocationType + AgeGroup + WhiteMarsh +
family + hospital, data = gard, family = binomial("logit"))
summary(m6_percatB) # AIC = 1738.1, Residual deviance = 1724.1
##
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + WhiteMarsh +
## family + hospital, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.558 -0.892 -0.737 1.248 1.940
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.164 0.117 -1.40 0.1604
## black -0.551 0.134 -4.12 3.7e-05 ***
## LocationType -0.666 0.153 -4.36 1.3e-05 ***
## AgeGroup -0.335 0.122 -2.74 0.0062 **
## WhiteMarsh 0.341 0.194 1.76 0.0785 .
## family -0.390 0.155 -2.51 0.0121 *
## hospital 0.683 0.242 2.82 0.0048 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1724.1 on 1406 degrees of freedom
## AIC: 1738
##
## Number of Fisher Scoring iterations: 4
anova(m6_percatB, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## AgeGroup 1 7.14 1409 1750 0.00755 **
## WhiteMarsh 1 11.70 1408 1738 0.00062 ***
## family 1 6.06 1407 1732 0.01383 *
## hospital 1 7.79 1406 1724 0.00524 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed, but m6_1percatA has a lower AIC. So try
# using both.
m7_percat <- glm(Completed ~ black + LocationType + AgeGroup + WhiteMarsh +
family + hospital + assisted, data = gard, family = binomial("logit"))
summary(m7_percat) # AIC = 1731.8, Residual deviance = 1715.8
##
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + WhiteMarsh +
## family + hospital + assisted, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.512 -0.963 -0.754 1.231 2.063
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.125 0.118 -1.06 0.28828
## black -0.493 0.135 -3.64 0.00027 ***
## LocationType -0.402 0.177 -2.27 0.02305 *
## AgeGroup -0.398 0.124 -3.20 0.00136 **
## WhiteMarsh 0.383 0.195 1.97 0.04882 *
## family -0.382 0.155 -2.46 0.01406 *
## hospital 0.500 0.248 2.02 0.04325 *
## assisted -0.584 0.205 -2.85 0.00437 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1715.8 on 1405 degrees of freedom
## AIC: 1732
##
## Number of Fisher Scoring iterations: 4
anova(m7_percat, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## AgeGroup 1 7.14 1409 1750 0.00755 **
## WhiteMarsh 1 11.70 1408 1738 0.00062 ***
## family 1 6.06 1407 1732 0.01383 *
## hospital 1 7.79 1406 1724 0.00524 **
## assisted 1 8.31 1405 1716 0.00394 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed. And it isn't clear which is the weak link.
# I'm also that 7 variables is necessary, so I'll just try all m6
# possibilities.
m6_percatC <- glm(Completed ~ black + LocationType + AgeGroup + WhiteMarsh +
hospital + assisted, data = gard, family = binomial("logit"))
summary(m6_percatC) # AIC = 1735.8, Residual deviance = 1721.8
##
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + WhiteMarsh +
## hospital + assisted, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.523 -0.892 -0.724 1.292 2.079
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.266 0.104 -2.56 0.01054 *
## black -0.491 0.135 -3.64 0.00027 ***
## LocationType -0.239 0.165 -1.45 0.14647
## AgeGroup -0.449 0.123 -3.67 0.00025 ***
## WhiteMarsh 0.565 0.181 3.12 0.00180 **
## hospital 0.485 0.247 1.96 0.04985 *
## assisted -0.594 0.205 -2.89 0.00379 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1721.8 on 1406 degrees of freedom
## AIC: 1736
##
## Number of Fisher Scoring iterations: 4
anova(m6_percatC, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## AgeGroup 1 7.14 1409 1750 0.00755 **
## WhiteMarsh 1 11.70 1408 1738 0.00062 ***
## hospital 1 7.50 1407 1730 0.00616 **
## assisted 1 8.58 1406 1722 0.00340 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.
m6_percatD <- glm(Completed ~ black + LocationType + AgeGroup + family + hospital +
assisted, data = gard, family = binomial("logit"))
summary(m6_percatD) # AIC = 1733.6, Residual deviance = 1719.6
##
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + family +
## hospital + assisted, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.414 -0.951 -0.753 1.196 2.053
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0442 0.1101 -0.40 0.68802
## black -0.5153 0.1348 -3.82 0.00013 ***
## LocationType -0.5155 0.1666 -3.09 0.00197 **
## AgeGroup -0.3473 0.1212 -2.87 0.00416 **
## family -0.4925 0.1443 -3.41 0.00064 ***
## hospital 0.5848 0.2423 2.41 0.01579 *
## assisted -0.5547 0.2044 -2.71 0.00666 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1719.6 on 1406 degrees of freedom
## AIC: 1734
##
## Number of Fisher Scoring iterations: 4
anova(m6_percatD, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## AgeGroup 1 7.14 1409 1750 0.00755 **
## family 1 12.69 1408 1737 0.00037 ***
## hospital 1 9.78 1407 1727 0.00177 **
## assisted 1 7.52 1406 1720 0.00609 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.
m6_percatE <- glm(Completed ~ black + LocationType + WhiteMarsh + family + hospital +
assisted, data = gard, family = binomial("logit"))
summary(m6_percatE) # AIC = 1740.1, Residual deviance = 1726.1
##
## Call:
## glm(formula = Completed ~ black + LocationType + WhiteMarsh +
## family + hospital + assisted, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.381 -0.894 -0.721 1.307 1.950
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.300 0.105 -2.87 0.00404 **
## black -0.505 0.135 -3.74 0.00018 ***
## LocationType -0.410 0.176 -2.32 0.02019 *
## WhiteMarsh 0.254 0.189 1.34 0.18021
## family -0.464 0.153 -3.03 0.00241 **
## hospital 0.514 0.246 2.09 0.03676 *
## assisted -0.469 0.202 -2.33 0.01993 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1726.1 on 1406 degrees of freedom
## AIC: 1740
##
## Number of Fisher Scoring iterations: 4
anova(m6_percatE, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.0005 ***
## WhiteMarsh 1 8.93 1409 1748 0.0028 **
## family 1 8.77 1408 1739 0.0031 **
## hospital 1 7.45 1407 1732 0.0064 **
## assisted 1 5.52 1406 1726 0.0188 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.
m6_percatF <- glm(Completed ~ black + AgeGroup + WhiteMarsh + family + hospital +
assisted, data = gard, family = binomial("logit"))
summary(m6_percatF) # AIC = 1735.0, Residual deviance = 1721.0
##
## Call:
## glm(formula = Completed ~ black + AgeGroup + WhiteMarsh + family +
## hospital + assisted, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.455 -0.919 -0.742 1.282 2.153
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.243 0.107 -2.28 0.02257 *
## black -0.504 0.135 -3.74 0.00019 ***
## AgeGroup -0.402 0.124 -3.24 0.00120 **
## WhiteMarsh 0.534 0.184 2.91 0.00364 **
## family -0.250 0.145 -1.72 0.08505 .
## hospital 0.342 0.236 1.45 0.14742
## assisted -0.814 0.178 -4.57 4.8e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1721 on 1406 degrees of freedom
## AIC: 1735
##
## Number of Fisher Scoring iterations: 4
anova(m6_percatF, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## AgeGroup 1 3.34 1410 1766 0.067 .
## WhiteMarsh 1 17.18 1409 1748 3.4e-05 ***
## family 1 0.67 1408 1748 0.414
## hospital 1 4.28 1407 1743 0.038 *
## assisted 1 22.44 1406 1721 2.2e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.
m6_percatG <- glm(Completed ~ LocationType + AgeGroup + WhiteMarsh + family +
hospital + assisted, data = gard, family = binomial("logit"))
summary(m6_percatG) # AIC = 1743.4, Residual deviance = 1729.4
##
## Call:
## glm(formula = Completed ~ LocationType + AgeGroup + WhiteMarsh +
## family + hospital + assisted, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.474 -0.916 -0.766 1.282 1.962
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.243 0.113 -2.15 0.03191 *
## LocationType -0.424 0.175 -2.43 0.01518 *
## AgeGroup -0.410 0.124 -3.32 0.00090 ***
## WhiteMarsh 0.445 0.193 2.30 0.02131 *
## family -0.379 0.155 -2.45 0.01424 *
## hospital 0.471 0.245 1.92 0.05431 .
## assisted -0.692 0.201 -3.45 0.00056 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1729.4 on 1406 degrees of freedom
## AIC: 1743
##
## Number of Fisher Scoring iterations: 4
anova(m6_percatG, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## LocationType 1 18.89 1411 1777 1.4e-05 ***
## AgeGroup 1 7.01 1410 1770 0.00811 **
## WhiteMarsh 1 14.38 1409 1756 0.00015 ***
## family 1 6.01 1408 1750 0.01420 *
## hospital 1 8.03 1407 1742 0.00460 **
## assisted 1 12.27 1406 1729 0.00046 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.
anova(m7_percat, m6_percatA, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family +
## hospital + assisted
## Model 2: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family +
## assisted
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1405 1716
## 2 1406 1720 -1 -4.05 0.044 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatB, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family +
## hospital + assisted
## Model 2: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family +
## hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1405 1716
## 2 1406 1724 -1 -8.31 0.0039 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatC, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family +
## hospital + assisted
## Model 2: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + hospital +
## assisted
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1405 1716
## 2 1406 1722 -1 -6.08 0.014 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatD, test = "Chisq") # Null rejected (barely)
## Analysis of Deviance Table
##
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family +
## hospital + assisted
## Model 2: Completed ~ black + LocationType + AgeGroup + family + hospital +
## assisted
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1405 1716
## 2 1406 1720 -1 -3.88 0.049 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatE, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family +
## hospital + assisted
## Model 2: Completed ~ black + LocationType + WhiteMarsh + family + hospital +
## assisted
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1405 1716
## 2 1406 1726 -1 -10.3 0.0013 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatF, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family +
## hospital + assisted
## Model 2: Completed ~ black + AgeGroup + WhiteMarsh + family + hospital +
## assisted
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1405 1716
## 2 1406 1721 -1 -5.21 0.022 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_percat, m6_percatG, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family +
## hospital + assisted
## Model 2: Completed ~ LocationType + AgeGroup + WhiteMarsh + family + hospital +
## assisted
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1405 1716
## 2 1406 1729 -1 -13.7 0.00022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# m7 is significantly better than all m6 models, so it's worth keeping.
Everything (but with References)
m12_everything <- glm(Completed ~ AgeGroup + LocationType + white + otherrace +
hispanic + WhiteMarsh + Bayview + obgyn + pediatric + private + hospital +
military, data = gard, family = binomial("logit"))
summary(m12_everything) # AIC = 1739.9, Residual deviance = 1713.9
##
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + white + otherrace +
## hispanic + WhiteMarsh + Bayview + obgyn + pediatric + private +
## hospital + military, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.548 -0.945 -0.748 1.258 2.114
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.490 0.261 -5.70 1.2e-08 ***
## AgeGroup -0.513 0.159 -3.24 0.00121 **
## LocationType -0.498 0.312 -1.60 0.10983
## white 0.477 0.144 3.31 0.00094 ***
## otherrace 0.449 0.200 2.25 0.02430 *
## hispanic 0.522 0.325 1.61 0.10755
## WhiteMarsh 0.294 0.215 1.37 0.17036
## Bayview 0.158 0.326 0.49 0.62687
## obgyn 0.502 0.181 2.78 0.00542 **
## pediatric 0.221 0.198 1.12 0.26431
## private 0.557 0.209 2.67 0.00761 **
## hospital 1.056 0.283 3.73 0.00019 ***
## military 0.606 0.246 2.47 0.01365 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1713.9 on 1400 degrees of freedom
## AIC: 1740
##
## Number of Fisher Scoring iterations: 4
anova(m12_everything, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## AgeGroup 1 2.62 1411 1793 0.1055
## LocationType 1 23.27 1410 1770 1.4e-06 ***
## white 1 15.22 1409 1755 9.6e-05 ***
## otherrace 1 4.01 1408 1751 0.0451 *
## hispanic 1 2.52 1407 1748 0.1126
## WhiteMarsh 1 10.71 1406 1738 0.0011 **
## Bayview 1 0.20 1405 1737 0.6586
## obgyn 1 7.77 1404 1730 0.0053 **
## pediatric 1 0.78 1403 1729 0.3774
## private 1 0.12 1402 1729 0.7330
## hospital 1 8.73 1401 1720 0.0031 **
## military 1 6.17 1400 1714 0.0130 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Hispanic isn't significant for the summary or the anova, and it's
# placement is fairly early in the anova.
m11_everything <- glm(Completed ~ AgeGroup + LocationType + white + otherrace +
WhiteMarsh + Bayview + obgyn + pediatric + private + hospital + military,
data = gard, family = binomial("logit"))
summary(m11_everything) # AIC = 1740.4, Residual deviance = 1716.4
##
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + white + otherrace +
## WhiteMarsh + Bayview + obgyn + pediatric + private + hospital +
## military, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.545 -0.950 -0.745 1.260 2.080
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.439 0.258 -5.57 2.5e-08 ***
## AgeGroup -0.508 0.158 -3.21 0.00132 **
## LocationType -0.497 0.311 -1.60 0.10977
## white 0.416 0.138 3.01 0.00257 **
## otherrace 0.394 0.196 2.01 0.04421 *
## WhiteMarsh 0.318 0.214 1.49 0.13730
## Bayview 0.186 0.324 0.57 0.56664
## obgyn 0.495 0.180 2.74 0.00607 **
## pediatric 0.217 0.197 1.10 0.27201
## private 0.556 0.208 2.68 0.00747 **
## hospital 1.043 0.282 3.69 0.00022 ***
## military 0.614 0.245 2.51 0.01216 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1716.4 on 1401 degrees of freedom
## AIC: 1740
##
## Number of Fisher Scoring iterations: 4
# The m12 model has a lower AIC, so checking difference with a Chi-squared
# test.
anova(m12_everything, m11_everything, test = "Chisq") # Null not rejected; eliminate m12 model.
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + LocationType + white + otherrace + hispanic +
## WhiteMarsh + Bayview + obgyn + pediatric + private + hospital +
## military
## Model 2: Completed ~ AgeGroup + LocationType + white + otherrace + WhiteMarsh +
## Bayview + obgyn + pediatric + private + hospital + military
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1400 1714
## 2 1401 1716 -1 -2.47 0.12
anova(m11_everything, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## AgeGroup 1 2.62 1411 1793 0.10547
## LocationType 1 23.27 1410 1770 1.4e-06 ***
## white 1 15.22 1409 1755 9.6e-05 ***
## otherrace 1 4.01 1408 1751 0.04513 *
## WhiteMarsh 1 11.23 1407 1740 0.00081 ***
## Bayview 1 0.11 1406 1740 0.73713
## obgyn 1 7.55 1405 1732 0.00600 **
## pediatric 1 0.75 1404 1731 0.38579
## private 1 0.10 1403 1731 0.74727
## hospital 1 8.38 1402 1723 0.00379 **
## military 1 6.38 1401 1716 0.01157 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Bayview is the first variable that isn't significant for the summary or
# the anova. Removing...
m10_everything <- glm(Completed ~ AgeGroup + LocationType + white + otherrace +
WhiteMarsh + obgyn + pediatric + private + hospital + military, data = gard,
family = binomial("logit"))
summary(m10_everything) # AIC = 1738.7, Residual deviance = 1716.7
##
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + white + otherrace +
## WhiteMarsh + obgyn + pediatric + private + hospital + military,
## family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.546 -0.944 -0.740 1.251 2.091
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.436 0.258 -5.57 2.5e-08 ***
## AgeGroup -0.511 0.158 -3.23 0.00125 **
## LocationType -0.355 0.185 -1.92 0.05438 .
## white 0.427 0.137 3.12 0.00180 **
## otherrace 0.401 0.195 2.05 0.04038 *
## WhiteMarsh 0.335 0.212 1.58 0.11341
## obgyn 0.473 0.177 2.68 0.00738 **
## pediatric 0.236 0.194 1.21 0.22473
## private 0.552 0.207 2.66 0.00784 **
## hospital 1.036 0.282 3.68 0.00024 ***
## military 0.602 0.244 2.47 0.01352 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1716.7 on 1402 degrees of freedom
## AIC: 1739
##
## Number of Fisher Scoring iterations: 4
# m10 model has lower AIC, so eliminate m11 model.
anova(m10_everything, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## AgeGroup 1 2.62 1411 1793 0.10547
## LocationType 1 23.27 1410 1770 1.4e-06 ***
## white 1 15.22 1409 1755 9.6e-05 ***
## otherrace 1 4.01 1408 1751 0.04513 *
## WhiteMarsh 1 11.23 1407 1740 0.00081 ***
## obgyn 1 7.36 1406 1732 0.00665 **
## pediatric 1 0.91 1405 1731 0.34135
## private 1 0.12 1404 1731 0.73302
## hospital 1 8.38 1403 1723 0.00380 **
## military 1 6.18 1402 1717 0.01291 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# pediatric is the first variable that isn't significant for the summary
# or the anova. Removing...
m9_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
obgyn + private + hospital + military + LocationType, data = gard, family = binomial("logit"))
summary(m9_everything) # AIC = 1738.2, Residual deviance = 1718.2
##
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
## obgyn + private + hospital + military + LocationType, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.572 -0.935 -0.730 1.292 2.133
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.282 0.224 -5.73 1.0e-08 ***
## AgeGroup -0.600 0.140 -4.29 1.8e-05 ***
## white 0.426 0.137 3.11 0.00184 **
## otherrace 0.391 0.195 2.00 0.04518 *
## WhiteMarsh 0.356 0.212 1.68 0.09297 .
## obgyn 0.381 0.159 2.40 0.01649 *
## private 0.540 0.207 2.60 0.00923 **
## hospital 1.013 0.281 3.60 0.00031 ***
## military 0.591 0.244 2.42 0.01536 *
## LocationType -0.284 0.175 -1.62 0.10536
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1718.2 on 1403 degrees of freedom
## AIC: 1738
##
## Number of Fisher Scoring iterations: 4
# m9 model has lower AIC, so eliminate m10 model.
anova(m9_everything, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## AgeGroup 1 2.62 1411 1793 0.1055
## white 1 18.51 1410 1775 1.7e-05 ***
## otherrace 1 8.61 1409 1766 0.0033 **
## WhiteMarsh 1 17.12 1408 1749 3.5e-05 ***
## obgyn 1 5.02 1407 1744 0.0250 *
## private 1 0.66 1406 1743 0.4161
## hospital 1 5.44 1405 1738 0.0196 *
## military 1 17.19 1404 1721 3.4e-05 ***
## LocationType 1 2.64 1403 1718 0.1043
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# LocationType does not have a significant coefficient, and it only
# contributes if placed at the beginning of the model. Removing...
m8_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
obgyn + private + hospital + military, data = gard, family = binomial("logit"))
summary(m8_everything) # AIC = 1738.8, Residual deviance = 1720.8
##
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
## obgyn + private + hospital + military, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.523 -0.925 -0.757 1.301 2.104
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.530 0.165 -9.26 < 2e-16 ***
## AgeGroup -0.568 0.138 -4.10 4.1e-05 ***
## white 0.427 0.137 3.13 0.00176 **
## otherrace 0.428 0.194 2.21 0.02694 *
## WhiteMarsh 0.473 0.199 2.37 0.01767 *
## obgyn 0.349 0.158 2.21 0.02708 *
## private 0.695 0.183 3.79 0.00015 ***
## hospital 1.065 0.278 3.83 0.00013 ***
## military 0.817 0.201 4.07 4.7e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1720.8 on 1404 degrees of freedom
## AIC: 1739
##
## Number of Fisher Scoring iterations: 4
# m9 model has a higher AIC value, so checking difference with a
# Chi-squared test.
anova(m9_everything, m8_everything, test = "Chisq") # Null not rejected; eliminate m9 model.
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## private + hospital + military + LocationType
## Model 2: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## private + hospital + military
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1403 1718
## 2 1404 1721 -1 -2.64 0.1
anova(m8_everything, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## AgeGroup 1 2.62 1411 1793 0.1055
## white 1 18.51 1410 1775 1.7e-05 ***
## otherrace 1 8.61 1409 1766 0.0033 **
## WhiteMarsh 1 17.12 1408 1749 3.5e-05 ***
## obgyn 1 5.02 1407 1744 0.0250 *
## private 1 0.66 1406 1743 0.4161
## hospital 1 5.44 1405 1738 0.0196 *
## military 1 17.19 1404 1721 3.4e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# There is no clear weakest link. Attempting all variables.
m7A_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
obgyn + military + hospital, data = gard, family = binomial("logit"))
summary(m7A_everything, test = "LRT") # AIC = 1752, Residual deviance = 1736
##
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
## obgyn + military + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.538 -0.923 -0.742 1.307 1.891
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.148 0.123 -9.35 < 2e-16 ***
## AgeGroup -0.456 0.136 -3.36 0.00077 ***
## white 0.514 0.134 3.83 0.00013 ***
## otherrace 0.549 0.191 2.87 0.00409 **
## WhiteMarsh 0.553 0.198 2.79 0.00521 **
## obgyn 0.380 0.157 2.41 0.01581 *
## military 0.301 0.143 2.11 0.03525 *
## hospital 0.517 0.236 2.19 0.02861 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1736 on 1405 degrees of freedom
## AIC: 1752
##
## Number of Fisher Scoring iterations: 4
m7B_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
obgyn + military + private, data = gard, family = binomial("logit"))
summary(m7B_everything, test = "LRT") # AIC = 1751.3, Residual deviance = 1735.3
##
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
## obgyn + military + private, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.389 -0.935 -0.767 1.311 1.981
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.284 0.144 -8.91 < 2e-16 ***
## AgeGroup -0.528 0.138 -3.82 0.00013 ***
## white 0.472 0.136 3.48 0.00050 ***
## otherrace 0.493 0.192 2.56 0.01039 *
## WhiteMarsh 0.559 0.198 2.83 0.00465 **
## obgyn 0.385 0.157 2.45 0.01414 *
## military 0.503 0.177 2.85 0.00443 **
## private 0.352 0.153 2.30 0.02153 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1735.3 on 1405 degrees of freedom
## AIC: 1751
##
## Number of Fisher Scoring iterations: 4
m7C_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
obgyn + private + hospital, data = gard, family = binomial("logit"))
summary(m7C_everything, test = "LRT") # AIC = 1754.0, Residual deviance = 1738.0
##
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
## obgyn + private + hospital, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.504 -0.934 -0.743 1.353 1.907
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.164 0.129 -9.01 < 2e-16 ***
## AgeGroup -0.476 0.137 -3.48 0.00050 ***
## white 0.561 0.132 4.25 2.2e-05 ***
## otherrace 0.624 0.187 3.33 0.00086 ***
## WhiteMarsh 0.465 0.199 2.34 0.01943 *
## obgyn 0.295 0.157 1.88 0.05992 .
## private 0.199 0.130 1.53 0.12607
## hospital 0.585 0.249 2.35 0.01864 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1738 on 1405 degrees of freedom
## AIC: 1754
##
## Number of Fisher Scoring iterations: 4
m7D_everything <- glm(Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
private + military + hospital, data = gard, family = binomial("logit"))
summary(m7D_everything, test = "LRT") # AIC = 1741.7 Residual deviance = 1725.7
##
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + WhiteMarsh +
## private + military + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.487 -0.891 -0.720 1.294 2.033
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.499 0.165 -9.10 < 2e-16 ***
## AgeGroup -0.433 0.123 -3.52 0.00043 ***
## white 0.448 0.136 3.29 0.00099 ***
## otherrace 0.414 0.193 2.15 0.03175 *
## WhiteMarsh 0.649 0.183 3.54 0.00040 ***
## private 0.715 0.183 3.90 9.5e-05 ***
## military 0.781 0.200 3.91 9.2e-05 ***
## hospital 1.105 0.278 3.97 7.1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1725.7 on 1405 degrees of freedom
## AIC: 1742
##
## Number of Fisher Scoring iterations: 4
m7E_everything <- glm(Completed ~ AgeGroup + white + otherrace + private + obgyn +
military + hospital, data = gard, family = binomial("logit"))
summary(m7E_everything, test = "LRT") # AIC = 1742.4, Residual deviance = 1726.4
##
## Call:
## glm(formula = Completed ~ AgeGroup + white + otherrace + private +
## obgyn + military + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.410 -0.991 -0.759 1.320 2.126
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.560 0.165 -9.45 < 2e-16 ***
## AgeGroup -0.589 0.138 -4.27 2.0e-05 ***
## white 0.464 0.135 3.43 0.00061 ***
## otherrace 0.419 0.194 2.16 0.03045 *
## private 0.736 0.182 4.04 5.3e-05 ***
## obgyn 0.492 0.145 3.40 0.00067 ***
## military 0.813 0.201 4.05 5.1e-05 ***
## hospital 1.136 0.275 4.12 3.7e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1726.4 on 1405 degrees of freedom
## AIC: 1742
##
## Number of Fisher Scoring iterations: 4
m7F_everything <- glm(Completed ~ AgeGroup + white + private + WhiteMarsh +
obgyn + military + hospital, data = gard, family = binomial("logit"))
summary(m7F_everything, test = "LRT") # AIC = 1741.7, Residual deviance = 1725.7
##
## Call:
## glm(formula = Completed ~ AgeGroup + white + private + WhiteMarsh +
## obgyn + military + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.511 -0.921 -0.738 1.282 2.071
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.453 0.160 -9.06 < 2e-16 ***
## AgeGroup -0.568 0.138 -4.11 4.0e-05 ***
## white 0.292 0.121 2.42 0.016 *
## private 0.754 0.181 4.16 3.2e-05 ***
## WhiteMarsh 0.463 0.199 2.33 0.020 *
## obgyn 0.338 0.157 2.15 0.032 *
## military 0.919 0.195 4.71 2.5e-06 ***
## hospital 1.117 0.277 4.04 5.4e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1725.7 on 1405 degrees of freedom
## AIC: 1742
##
## Number of Fisher Scoring iterations: 4
m7G_everything <- glm(Completed ~ AgeGroup + private + otherrace + WhiteMarsh +
obgyn + military + hospital, data = gard, family = binomial("logit"))
summary(m7G_everything, test = "LRT") # AIC = 1746.7, Residual deviance = 1730.7
##
## Call:
## glm(formula = Completed ~ AgeGroup + private + otherrace + WhiteMarsh +
## obgyn + military + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.488 -0.943 -0.737 1.284 2.041
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.365 0.155 -8.82 < 2e-16 ***
## AgeGroup -0.585 0.138 -4.24 2.2e-05 ***
## private 0.784 0.180 4.35 1.4e-05 ***
## otherrace 0.153 0.172 0.89 0.3729
## WhiteMarsh 0.546 0.197 2.77 0.0056 **
## obgyn 0.384 0.157 2.45 0.0144 *
## military 0.966 0.195 4.95 7.3e-07 ***
## hospital 1.141 0.276 4.13 3.6e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1730.7 on 1405 degrees of freedom
## AIC: 1747
##
## Number of Fisher Scoring iterations: 4
m7H_everything <- glm(Completed ~ private + white + otherrace + WhiteMarsh +
obgyn + military + hospital, data = gard, family = binomial("logit"))
summary(m7H_everything, test = "LRT") # AIC = 1754.0, Residual deviance = 1738.0
##
## Call:
## glm(formula = Completed ~ private + white + otherrace + WhiteMarsh +
## obgyn + military + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.353 -0.947 -0.771 1.382 1.888
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.598 0.165 -9.71 < 2e-16 ***
## private 0.537 0.178 3.01 0.00263 **
## white 0.449 0.136 3.30 0.00095 ***
## otherrace 0.426 0.192 2.22 0.02653 *
## WhiteMarsh 0.525 0.198 2.66 0.00793 **
## obgyn 0.060 0.139 0.43 0.66622
## military 0.681 0.197 3.46 0.00055 ***
## hospital 0.968 0.276 3.51 0.00045 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1738 on 1405 degrees of freedom
## AIC: 1754
##
## Number of Fisher Scoring iterations: 4
# The m8 model is better than all m7 models. Checking difference with
# Chi-squared tests.
anova(m8_everything, m7A_everything, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## private + hospital + military
## Model 2: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## military + hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1404 1721
## 2 1405 1736 -1 -15.1 1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7B_everything, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## private + hospital + military
## Model 2: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## military + private
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1404 1721
## 2 1405 1735 -1 -14.5 0.00014 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7C_everything, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## private + hospital + military
## Model 2: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## private + hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1404 1721
## 2 1405 1738 -1 -17.2 3.4e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7D_everything, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## private + hospital + military
## Model 2: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + private +
## military + hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1404 1721
## 2 1405 1726 -1 -4.88 0.027 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7E_everything, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## private + hospital + military
## Model 2: Completed ~ AgeGroup + white + otherrace + private + obgyn +
## military + hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1404 1721
## 2 1405 1726 -1 -5.62 0.018 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7F_everything, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## private + hospital + military
## Model 2: Completed ~ AgeGroup + white + private + WhiteMarsh + obgyn +
## military + hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1404 1721
## 2 1405 1726 -1 -4.83 0.028 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7G_everything, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## private + hospital + military
## Model 2: Completed ~ AgeGroup + private + otherrace + WhiteMarsh + obgyn +
## military + hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1404 1721
## 2 1405 1731 -1 -9.91 0.0016 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m8_everything, m7H_everything, test = "Chisq") # Null rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + white + otherrace + WhiteMarsh + obgyn +
## private + hospital + military
## Model 2: Completed ~ private + white + otherrace + WhiteMarsh + obgyn +
## military + hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1404 1721
## 2 1405 1738 -1 -17.2 3.3e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The null hypothesis was rejected in all cases; keep m8 model.
Regsubsets?
m_reg <- regsubsets(Completed ~ ., data = gard[, c(2, 5, 9, 11:25)], nbest = 1,
nvmax = 10)
## Error: could not find function "regsubsets"
summary(m_reg)
## Error: object 'm_reg' not found
# Regsubsets prescribes which combination of variables is best for each
# number of variables allowed in the model. Starting at m11 and working
# down until the AIC minimum.
m11_regsub <- glm(Completed ~ AgeGroup + LocationType + black + otherrace +
assisted + private + hospital + WhiteMarsh + Bayview + obgyn + pediatric,
data = gard, family = binomial("logit"))
summary(m11_regsub) # AIC = 1737.9, Residual devinace = 1713.9
##
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + otherrace +
## assisted + private + hospital + WhiteMarsh + Bayview + obgyn +
## pediatric, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.549 -0.946 -0.750 1.257 2.113
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4049 0.1804 -2.24 0.02482 *
## AgeGroup -0.5125 0.1585 -3.23 0.00122 **
## LocationType -0.4961 0.3112 -1.59 0.11094
## black -0.4801 0.1422 -3.38 0.00073 ***
## otherrace -0.0298 0.1786 -0.17 0.86765
## assisted -0.6035 0.2451 -2.46 0.01380 *
## private -0.0486 0.1561 -0.31 0.75538
## hospital 0.4499 0.2832 1.59 0.11220
## WhiteMarsh 0.2951 0.2145 1.38 0.16881
## Bayview 0.1581 0.3258 0.49 0.62740
## obgyn 0.5008 0.1803 2.78 0.00548 **
## pediatric 0.2206 0.1976 1.12 0.26421
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1713.9 on 1401 degrees of freedom
## AIC: 1738
##
## Number of Fisher Scoring iterations: 4
m10_regsub <- glm(Completed ~ AgeGroup + LocationType + black + assisted + private +
hospital + WhiteMarsh + Bayview + obgyn + pediatric, data = gard, family = binomial("logit"))
summary(m10_regsub) # AIC = 1735.9, Residual deviance = 1713.9
##
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + assisted +
## private + hospital + WhiteMarsh + Bayview + obgyn + pediatric,
## family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.548 -0.945 -0.748 1.260 2.112
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4139 0.1722 -2.40 0.01622 *
## AgeGroup -0.5122 0.1585 -3.23 0.00123 **
## LocationType -0.4942 0.3110 -1.59 0.11209
## black -0.4744 0.1379 -3.44 0.00058 ***
## assisted -0.6037 0.2451 -2.46 0.01377 *
## private -0.0483 0.1561 -0.31 0.75695
## hospital 0.4488 0.2832 1.59 0.11296
## WhiteMarsh 0.3001 0.2124 1.41 0.15767
## Bayview 0.1602 0.3255 0.49 0.62264
## obgyn 0.5033 0.1797 2.80 0.00509 **
## pediatric 0.2217 0.1974 1.12 0.26158
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1713.9 on 1402 degrees of freedom
## AIC: 1736
##
## Number of Fisher Scoring iterations: 4
m9_regsub <- glm(Completed ~ AgeGroup + LocationType + black + assisted + private +
hospital + WhiteMarsh + obgyn + pediatric, data = gard, family = binomial("logit"))
summary(m9_regsub) # AIC = 1734.2, Residual deviance = 1714.2
##
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + assisted +
## private + hospital + WhiteMarsh + obgyn + pediatric, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.548 -0.940 -0.745 1.253 2.121
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4138 0.1722 -2.40 0.0163 *
## AgeGroup -0.5145 0.1584 -3.25 0.0012 **
## LocationType -0.3716 0.1839 -2.02 0.0434 *
## black -0.4839 0.1366 -3.54 0.0004 ***
## assisted -0.5931 0.2440 -2.43 0.0151 *
## private -0.0419 0.1556 -0.27 0.7875
## hospital 0.4532 0.2829 1.60 0.1092
## WhiteMarsh 0.3153 0.2103 1.50 0.1338
## obgyn 0.4850 0.1760 2.76 0.0059 **
## pediatric 0.2384 0.1945 1.23 0.2203
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1714.2 on 1403 degrees of freedom
## AIC: 1734
##
## Number of Fisher Scoring iterations: 4
m8_regsub <- glm(Completed ~ AgeGroup + LocationType + black + assisted + hospital +
Odenton + obgyn + pediatric, data = gard, family = binomial("logit"))
summary(m8_regsub) # AIC = 1732.3, Residual deviance = 1714.3
##
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + assisted +
## hospital + Odenton + obgyn + pediatric, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.548 -0.943 -0.746 1.261 2.124
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.131 0.254 -0.51 0.60710
## AgeGroup -0.517 0.158 -3.27 0.00107 **
## LocationType -0.688 0.229 -3.00 0.00268 **
## black -0.489 0.135 -3.61 0.00031 ***
## assisted -0.558 0.206 -2.71 0.00679 **
## hospital 0.490 0.248 1.98 0.04777 *
## Odenton -0.303 0.205 -1.48 0.13998
## obgyn 0.480 0.175 2.74 0.00608 **
## pediatric 0.239 0.195 1.23 0.22009
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1714.3 on 1404 degrees of freedom
## AIC: 1732
##
## Number of Fisher Scoring iterations: 4
m7_regsub <- glm(Completed ~ AgeGroup + LocationType + black + assisted + hospital +
Odenton + obgyn, data = gard, family = binomial("logit")) # AIC = 1731.8, Residual deviance = 1715.8
summary(m7_regsub)
##
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + assisted +
## hospital + Odenton + obgyn, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.574 -0.933 -0.741 1.304 2.166
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0314 0.2172 0.14 0.88520
## AgeGroup -0.6075 0.1395 -4.35 1.3e-05 ***
## LocationType -0.6370 0.2253 -2.83 0.00470 **
## black -0.4858 0.1354 -3.59 0.00033 ***
## assisted -0.5457 0.2060 -2.65 0.00807 **
## hospital 0.4782 0.2473 1.93 0.05320 .
## Odenton -0.3251 0.2052 -1.58 0.11310
## obgyn 0.3874 0.1573 2.46 0.01379 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1715.8 on 1405 degrees of freedom
## AIC: 1732
##
## Number of Fisher Scoring iterations: 4
# m7 is the AIC minimum. But is it significantly better than m6?
m6_regsub <- glm(Completed ~ AgeGroup + LocationType + black + assisted + hospital +
obgyn, data = gard, family = binomial("logit"))
summary(m6_regsub) # AIC = 1732.3, Residual deviance = 1718.3
##
## Call:
## glm(formula = Completed ~ AgeGroup + LocationType + black + assisted +
## hospital + obgyn, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.516 -0.914 -0.736 1.294 2.187
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.271 0.103 -2.62 0.00869 **
## AgeGroup -0.628 0.139 -4.52 6e-06 ***
## LocationType -0.387 0.161 -2.41 0.01609 *
## black -0.501 0.135 -3.71 0.00021 ***
## assisted -0.510 0.205 -2.49 0.01269 *
## hospital 0.538 0.243 2.21 0.02719 *
## obgyn 0.502 0.139 3.61 0.00030 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1718.3 on 1406 degrees of freedom
## AIC: 1732
##
## Number of Fisher Scoring iterations: 4
anova(m7_regsub, m6_regsub, test = "Chisq") # Null not rejected; eliminate m7.
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + LocationType + black + assisted + hospital +
## Odenton + obgyn
## Model 2: Completed ~ AgeGroup + LocationType + black + assisted + hospital +
## obgyn
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1405 1716
## 2 1406 1718 -1 -2.51 0.11
m5_regsub <- glm(Completed ~ AgeGroup + black + assisted + WhiteMarsh + obgyn,
data = gard, family = binomial("logit"))
summary(m5_regsub) # AIC = 1733.4, Residual devaince = 1721.4
##
## Call:
## glm(formula = Completed ~ AgeGroup + black + assisted + WhiteMarsh +
## obgyn, family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.386 -0.949 -0.756 1.320 2.139
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3291 0.0989 -3.33 0.00087 ***
## AgeGroup -0.5858 0.1381 -4.24 2.2e-05 ***
## black -0.4885 0.1348 -3.62 0.00029 ***
## assisted -0.7763 0.1733 -4.48 7.5e-06 ***
## WhiteMarsh 0.4574 0.1957 2.34 0.01944 *
## obgyn 0.3495 0.1543 2.26 0.02352 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1721.4 on 1407 degrees of freedom
## AIC: 1733
##
## Number of Fisher Scoring iterations: 4
anova(m6_regsub, m5_regsub, test = "Chisq") # Null not rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + LocationType + black + assisted + hospital +
## obgyn
## Model 2: Completed ~ AgeGroup + black + assisted + WhiteMarsh + obgyn
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1406 1718
## 2 1407 1721 -1 -3.13 0.077 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_regsub, m5_regsub, test = "Chisq") # Null not rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + LocationType + black + assisted + hospital +
## Odenton + obgyn
## Model 2: Completed ~ AgeGroup + black + assisted + WhiteMarsh + obgyn
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1405 1716
## 2 1407 1721 -2 -5.64 0.06 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Eliminate m6.
m4_regsub <- glm(Completed ~ AgeGroup + black + assisted + WhiteMarsh, data = gard,
family = binomial("logit"))
summary(m4_regsub) # AIC = 1736.5, Residual deviance = 1726.5
##
## Call:
## glm(formula = Completed ~ AgeGroup + black + assisted + WhiteMarsh,
## family = binomial("logit"), data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.338 -0.886 -0.718 1.302 2.064
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2888 0.0972 -2.97 0.00296 **
## AgeGroup -0.4450 0.1221 -3.65 0.00027 ***
## black -0.4901 0.1345 -3.64 0.00027 ***
## assisted -0.7794 0.1733 -4.50 6.9e-06 ***
## WhiteMarsh 0.6596 0.1746 3.78 0.00016 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1726.5 on 1408 degrees of freedom
## AIC: 1737
##
## Number of Fisher Scoring iterations: 4
anova(m5_regsub, m4_regsub, test = "Chisq") # Null rejected; keep m5.
## Analysis of Deviance Table
##
## Model 1: Completed ~ AgeGroup + black + assisted + WhiteMarsh + obgyn
## Model 2: Completed ~ AgeGroup + black + assisted + WhiteMarsh
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1407 1721
## 2 1408 1727 -1 -5.12 0.024 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Using a chi-squared test, we can see that the proportions in m5_regsub
# do vary from m4_regsub. Although m6_regsub and m7_regsub have lower AIC
# values, the residual deviance is not significantly different from
# m5_regsub.
Including Interactions
# First, I'll use the best model without interactions as a basis. Logical
# interactions include age group with location type or race, and race with
# location type or insurance type.
m7_inter1A <- glm(Completed ~ black + LocationType * AgeGroup + WhiteMarsh +
family + assisted + hospital, data = gard, family = binomial("logit"))
summary(m7_inter1A) # AIC = 1718.2, Residual deviance = 1700.2
##
## Call:
## glm(formula = Completed ~ black + LocationType * AgeGroup + WhiteMarsh +
## family + assisted + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.598 -0.903 -0.694 1.178 2.057
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.00137 0.12190 -0.01 0.991
## black -0.53386 0.13689 -3.90 9.6e-05 ***
## LocationType -0.85218 0.21299 -4.00 6.3e-05 ***
## AgeGroup -0.68361 0.14420 -4.74 2.1e-06 ***
## WhiteMarsh 0.48413 0.19838 2.44 0.015 *
## family -0.32234 0.15718 -2.05 0.040 *
## assisted -0.44691 0.20625 -2.17 0.030 *
## hospital 0.46642 0.24817 1.88 0.060 .
## LocationType:AgeGroup 1.08611 0.27346 3.97 7.1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1700.2 on 1404 degrees of freedom
## AIC: 1718
##
## Number of Fisher Scoring iterations: 4
anova(m7_inter1A, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## AgeGroup 1 7.14 1409 1750 0.00755 **
## WhiteMarsh 1 11.70 1408 1738 0.00062 ***
## family 1 6.06 1407 1732 0.01383 *
## assisted 1 12.06 1406 1720 0.00052 ***
## hospital 1 4.05 1405 1716 0.04423 *
## LocationType:AgeGroup 1 15.54 1404 1700 8.1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.
exp(cbind(OR = coef(m7_inter1A), confint(m7_inter1A)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.9986 0.7861 1.2682
## black 0.5863 0.4471 0.7650
## LocationType 0.4265 0.2798 0.6454
## AgeGroup 0.5048 0.3800 0.6690
## WhiteMarsh 1.6228 1.1004 2.3968
## family 0.7245 0.5318 0.9852
## assisted 0.6396 0.4252 0.9556
## hospital 1.5943 0.9779 2.5936
## LocationType:AgeGroup 2.9627 1.7309 5.0609
gard$interaction <- gard$LocationType * gard$AgeGroup
m0_interaction <- glm(Completed ~ interaction, data = gard, family = binomial("logit"))
summary(m0_interaction)
##
## Call:
## glm(formula = Completed ~ interaction, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.901 -0.901 -0.901 1.482 1.513
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6920 0.0598 -11.58 <2e-16 ***
## interaction -0.0702 0.1830 -0.38 0.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1795.9 on 1411 degrees of freedom
## AIC: 1800
##
## Number of Fisher Scoring iterations: 4
exp(cbind(OR = coef(m0_interaction), confint(m0_interaction)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5006 0.4449 0.5624
## interaction 0.9322 0.6467 1.3273
m7_inter1B <- glm(Completed ~ black * assisted + LocationType + AgeGroup + WhiteMarsh +
family + hospital, data = gard, family = binomial("logit"))
summary(m7_inter1B) # AIC = 1733.7, Residual deviance = 1715.7
##
## Call:
## glm(formula = Completed ~ black * assisted + LocationType + AgeGroup +
## WhiteMarsh + family + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.510 -0.960 -0.767 1.233 2.083
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.130 0.119 -1.09 0.2748
## black -0.472 0.151 -3.13 0.0018 **
## assisted -0.537 0.255 -2.10 0.0357 *
## LocationType -0.407 0.178 -2.29 0.0220 *
## AgeGroup -0.396 0.124 -3.19 0.0014 **
## WhiteMarsh 0.384 0.195 1.97 0.0485 *
## family -0.383 0.155 -2.46 0.0138 *
## hospital 0.501 0.247 2.02 0.0429 *
## black:assisted -0.107 0.344 -0.31 0.7558
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1715.7 on 1404 degrees of freedom
## AIC: 1734
##
## Number of Fisher Scoring iterations: 4
anova(m7_inter1B, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## assisted 1 18.67 1410 1750 1.6e-05 ***
## LocationType 1 1.76 1409 1748 0.18468
## AgeGroup 1 10.39 1408 1738 0.00127 **
## WhiteMarsh 1 12.41 1407 1726 0.00043 ***
## family 1 5.84 1406 1720 0.01562 *
## hospital 1 4.05 1405 1716 0.04423 *
## black:assisted 1 0.10 1404 1716 0.75554
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.
m7_inter1C <- glm(Completed ~ black * AgeGroup + assisted + LocationType + WhiteMarsh +
family + hospital, data = gard, family = binomial("logit"))
summary(m7_inter1C) # AIC = 1731.0, Residual deviance = 1713.0
##
## Call:
## glm(formula = Completed ~ black * AgeGroup + assisted + LocationType +
## WhiteMarsh + family + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.537 -0.944 -0.731 1.204 1.986
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0636 0.1235 -0.51 0.60683
## black -0.7052 0.1877 -3.76 0.00017 ***
## AgeGroup -0.5134 0.1424 -3.61 0.00031 ***
## assisted -0.5718 0.2055 -2.78 0.00539 **
## LocationType -0.4132 0.1774 -2.33 0.01987 *
## WhiteMarsh 0.3876 0.1947 1.99 0.04655 *
## family -0.3907 0.1556 -2.51 0.01206 *
## hospital 0.4915 0.2475 1.99 0.04701 *
## black:AgeGroup 0.4439 0.2651 1.67 0.09412 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1713 on 1404 degrees of freedom
## AIC: 1731
##
## Number of Fisher Scoring iterations: 4
anova(m7_inter1C, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## AgeGroup 1 3.34 1410 1766 0.06749 .
## assisted 1 24.87 1409 1741 6.1e-07 ***
## LocationType 1 2.61 1408 1738 0.10613
## WhiteMarsh 1 12.41 1407 1726 0.00043 ***
## family 1 5.84 1406 1720 0.01562 *
## hospital 1 4.05 1405 1716 0.04423 *
## black:AgeGroup 1 2.80 1404 1713 0.09429 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.
m7_inter1D <- glm(Completed ~ black * LocationType + AgeGroup + assisted + WhiteMarsh +
family + hospital, data = gard, family = binomial("logit"))
summary(m7_inter1D) # AIC = 1731.6, Residual deviance = 1713.6
##
## Call:
## glm(formula = Completed ~ black * LocationType + AgeGroup + assisted +
## WhiteMarsh + family + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.500 -0.953 -0.758 1.247 2.137
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.162 0.121 -1.34 0.1792
## black -0.356 0.164 -2.17 0.0298 *
## LocationType -0.270 0.198 -1.36 0.1726
## AgeGroup -0.392 0.125 -3.15 0.0017 **
## assisted -0.582 0.205 -2.84 0.0045 **
## WhiteMarsh 0.396 0.195 2.04 0.0416 *
## family -0.382 0.155 -2.46 0.0138 *
## hospital 0.498 0.248 2.00 0.0450 *
## black:LocationType -0.415 0.287 -1.44 0.1489
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1713.6 on 1404 degrees of freedom
## AIC: 1732
##
## Number of Fisher Scoring iterations: 4
anova(m7_inter1D, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## AgeGroup 1 7.14 1409 1750 0.00755 **
## assisted 1 11.57 1408 1738 0.00067 ***
## WhiteMarsh 1 12.41 1407 1726 0.00043 ***
## family 1 5.84 1406 1720 0.01562 *
## hospital 1 4.05 1405 1716 0.04423 *
## black:LocationType 1 2.11 1404 1714 0.14665
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.
m7_inter1E <- glm(Completed ~ black * (assisted + hospital) + LocationType +
AgeGroup + WhiteMarsh + family, data = gard, family = binomial("logit"))
summary(m7_inter1E) # AIC = 1735.4, Residual deviance = 1715.4
##
## Call:
## glm(formula = Completed ~ black * (assisted + hospital) + LocationType +
## AgeGroup + WhiteMarsh + family, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.539 -0.958 -0.767 1.236 2.082
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.137 0.120 -1.14 0.2537
## black -0.449 0.158 -2.85 0.0044 **
## assisted -0.532 0.256 -2.08 0.0373 *
## hospital 0.575 0.289 1.99 0.0465 *
## LocationType -0.405 0.178 -2.28 0.0228 *
## AgeGroup -0.395 0.125 -3.17 0.0015 **
## WhiteMarsh 0.381 0.195 1.95 0.0507 .
## family -0.381 0.155 -2.45 0.0143 *
## black:assisted -0.130 0.346 -0.37 0.7077
## black:hospital -0.264 0.531 -0.50 0.6188
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1715.4 on 1403 degrees of freedom
## AIC: 1735
##
## Number of Fisher Scoring iterations: 4
anova(m7_inter1E, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## assisted 1 18.67 1410 1750 1.6e-05 ***
## hospital 1 4.39 1409 1746 0.0361 *
## LocationType 1 3.73 1408 1742 0.0536 .
## AgeGroup 1 10.59 1407 1732 0.0011 **
## WhiteMarsh 1 9.67 1406 1722 0.0019 **
## family 1 6.08 1405 1716 0.0137 *
## black:assisted 1 0.10 1404 1716 0.7555
## black:hospital 1 0.25 1403 1715 0.6165
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.
m7_inter2A <- glm(Completed ~ black * LocationType * AgeGroup + assisted + hospital +
WhiteMarsh + family, data = gard, family = binomial("logit"))
summary(m7_inter2A) # AIC = 1720.5, Residual deviance = 1696.5
##
## Call:
## glm(formula = Completed ~ black * LocationType * AgeGroup + assisted +
## hospital + WhiteMarsh + family, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.601 -0.880 -0.734 1.177 2.101
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.000214 0.132261 0.00 0.99871
## black -0.538195 0.237439 -2.27 0.02341 *
## LocationType -0.743866 0.243793 -3.05 0.00228 **
## AgeGroup -0.748667 0.162287 -4.61 4e-06 ***
## assisted -0.430680 0.207913 -2.07 0.03832 *
## hospital 0.461043 0.248793 1.85 0.06387 .
## WhiteMarsh 0.496215 0.198313 2.50 0.01234 *
## family -0.330555 0.157201 -2.10 0.03549 *
## black:LocationType -0.377147 0.396865 -0.95 0.34195
## black:AgeGroup 0.309645 0.326496 0.95 0.34293
## LocationType:AgeGroup 1.134612 0.337040 3.37 0.00076 ***
## black:LocationType:AgeGroup -0.158033 0.584988 -0.27 0.78705
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1696.5 on 1401 degrees of freedom
## AIC: 1721
##
## Number of Fisher Scoring iterations: 4
anova(m7_inter2A, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## AgeGroup 1 7.14 1409 1750 0.00755 **
## assisted 1 11.57 1408 1738 0.00067 ***
## hospital 1 6.55 1407 1732 0.01048 *
## WhiteMarsh 1 9.67 1406 1722 0.00187 **
## family 1 6.08 1405 1716 0.01367 *
## black:LocationType 1 2.11 1404 1714 0.14665
## black:AgeGroup 1 2.07 1403 1712 0.15000
## LocationType:AgeGroup 1 14.96 1402 1697 0.00011 ***
## black:LocationType:AgeGroup 1 0.07 1401 1697 0.78711
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.
m7_inter2B <- glm(Completed ~ black * (LocationType + AgeGroup) + assisted +
hospital + WhiteMarsh + family, data = gard, family = binomial("logit"))
summary(m7_inter2B) # AIC = 1731.6, Residual deviance = 1711.6
##
## Call:
## glm(formula = Completed ~ black * (LocationType + AgeGroup) +
## assisted + hospital + WhiteMarsh + family, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.524 -0.938 -0.738 1.221 2.058
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.102 0.128 -0.80 0.42636
## black -0.567 0.221 -2.56 0.01035 *
## LocationType -0.303 0.200 -1.52 0.12876
## AgeGroup -0.493 0.143 -3.44 0.00058 ***
## assisted -0.573 0.206 -2.78 0.00536 **
## hospital 0.491 0.248 1.98 0.04775 *
## WhiteMarsh 0.396 0.195 2.04 0.04165 *
## family -0.391 0.155 -2.51 0.01192 *
## black:LocationType -0.341 0.291 -1.17 0.24225
## black:AgeGroup 0.389 0.270 1.44 0.14952
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1711.6 on 1403 degrees of freedom
## AIC: 1732
##
## Number of Fisher Scoring iterations: 4
anova(m7_inter2B, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## AgeGroup 1 7.14 1409 1750 0.00755 **
## assisted 1 11.57 1408 1738 0.00067 ***
## hospital 1 6.55 1407 1732 0.01048 *
## WhiteMarsh 1 9.67 1406 1722 0.00187 **
## family 1 6.08 1405 1716 0.01367 *
## black:LocationType 1 2.11 1404 1714 0.14665
## black:AgeGroup 1 2.07 1403 1712 0.15000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.
m7_inter2C <- glm(Completed ~ (black + LocationType) * AgeGroup + assisted +
hospital + WhiteMarsh + family, data = gard, family = binomial("logit"))
summary(m7_inter2C) # AIC = 1719.0, Residual deviance = 1699.0
##
## Call:
## glm(formula = Completed ~ (black + LocationType) * AgeGroup +
## assisted + hospital + WhiteMarsh + family, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.612 -0.894 -0.709 1.162 2.032
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0361 0.1269 0.28 0.77596
## black -0.6743 0.1899 -3.55 0.00038 ***
## LocationType -0.8444 0.2139 -3.95 7.9e-05 ***
## AgeGroup -0.7478 0.1562 -4.79 1.7e-06 ***
## assisted -0.4452 0.2065 -2.16 0.03106 *
## hospital 0.4625 0.2480 1.86 0.06221 .
## WhiteMarsh 0.4819 0.1984 2.43 0.01513 *
## family -0.3307 0.1575 -2.10 0.03572 *
## black:AgeGroup 0.2924 0.2697 1.08 0.27833
## LocationType:AgeGroup 1.0386 0.2767 3.75 0.00017 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1699 on 1403 degrees of freedom
## AIC: 1719
##
## Number of Fisher Scoring iterations: 4
anova(m7_inter2C, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## LocationType 1 12.12 1410 1757 0.00050 ***
## AgeGroup 1 7.14 1409 1750 0.00755 **
## assisted 1 11.57 1408 1738 0.00067 ***
## hospital 1 6.55 1407 1732 0.01048 *
## WhiteMarsh 1 9.67 1406 1722 0.00187 **
## family 1 6.08 1405 1716 0.01367 *
## black:AgeGroup 1 2.80 1404 1713 0.09429 .
## LocationType:AgeGroup 1 13.91 1403 1699 0.00019 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All variables contributed.
m7_inter2D <- glm(Completed ~ (black + AgeGroup) * LocationType + assisted +
hospital + WhiteMarsh + family, data = gard, family = binomial("logit"))
summary(m7_inter2D) # AIC = 1717.5, Residual deviance = 1697.5
##
## Call:
## glm(formula = Completed ~ (black + AgeGroup) * LocationType +
## assisted + hospital + WhiteMarsh + family, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.584 -0.890 -0.727 1.196 2.082
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.043 0.124 -0.35 0.7293
## black -0.379 0.165 -2.29 0.0217 *
## AgeGroup -0.678 0.144 -4.71 2.5e-06 ***
## LocationType -0.716 0.228 -3.14 0.0017 **
## assisted -0.439 0.207 -2.12 0.0338 *
## hospital 0.463 0.249 1.86 0.0627 .
## WhiteMarsh 0.499 0.198 2.52 0.0117 *
## family -0.321 0.157 -2.05 0.0404 *
## black:LocationType -0.470 0.289 -1.62 0.1042
## AgeGroup:LocationType 1.115 0.276 4.04 5.4e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1697.5 on 1403 degrees of freedom
## AIC: 1718
##
## Number of Fisher Scoring iterations: 4
anova(m7_inter2D, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## black 1 27.12 1411 1769 1.9e-07 ***
## AgeGroup 1 3.34 1410 1766 0.06749 .
## LocationType 1 15.91 1409 1750 6.6e-05 ***
## assisted 1 11.57 1408 1738 0.00067 ***
## hospital 1 6.55 1407 1732 0.01048 *
## WhiteMarsh 1 9.67 1406 1722 0.00187 **
## family 1 6.08 1405 1716 0.01367 *
## black:LocationType 1 2.11 1404 1714 0.14665
## AgeGroup:LocationType 1 16.10 1403 1698 6.0e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Not all variables contributed.
# The best interaction models can be compared to the best non-interaction
# model.
anova(m7_inter1A, m7_percat, test = "Chisq") # Null Rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ black + LocationType * AgeGroup + WhiteMarsh + family +
## assisted + hospital
## Model 2: Completed ~ black + LocationType + AgeGroup + WhiteMarsh + family +
## hospital + assisted
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1404 1700
## 2 1405 1716 -1 -15.5 8.1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m7_inter2D, m7_inter1A, test = "Chisq") # Null not Rejected
## Analysis of Deviance Table
##
## Model 1: Completed ~ (black + AgeGroup) * LocationType + assisted + hospital +
## WhiteMarsh + family
## Model 2: Completed ~ black + LocationType * AgeGroup + WhiteMarsh + family +
## assisted + hospital
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1403 1698
## 2 1404 1700 -1 -2.67 0.1
Some Model Notes
## Comparing nested models ## anova(MODEL1, MODEL2, test='Chisq') # Tests
## null hypothesis that proportions for each group are the same -- but
## must be nested. When null is not rejected at 0.05 level, go by
## parsimony (use smaller model)
## Comparing variables in a single model ## Wald's Test: use
## summary(MODEL); then read the p-value anova(MODEL, test='LRT') #
## Additive -- Does adding the next independent variable improve the model
## based on Deviance Test? (Null = no) drop1(MODEL, test='LRT') #
## Subtractive -- Does removing the next independent variable reduce the
## predictive power of the model based on Deviance Test (i.e. does it
## increase the deviance)? (Null = 0)
Summary of Best Models
# Best Non-Interacting Model:
summary(m7_percat) # AIC = 1731.8, Residual deviance = 1715.8
##
## Call:
## glm(formula = Completed ~ black + LocationType + AgeGroup + WhiteMarsh +
## family + hospital + assisted, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.512 -0.963 -0.754 1.231 2.063
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.125 0.118 -1.06 0.28828
## black -0.493 0.135 -3.64 0.00027 ***
## LocationType -0.402 0.177 -2.27 0.02305 *
## AgeGroup -0.398 0.124 -3.20 0.00136 **
## WhiteMarsh 0.383 0.195 1.97 0.04882 *
## family -0.382 0.155 -2.46 0.01406 *
## hospital 0.500 0.248 2.02 0.04325 *
## assisted -0.584 0.205 -2.85 0.00437 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1715.8 on 1405 degrees of freedom
## AIC: 1732
##
## Number of Fisher Scoring iterations: 4
# 00000 Model
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 0 + 0.3835 * 0 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 *
0 + 0.3835 * 0 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0)) # Not black, suburban, age 11-17, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4687
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 1 + 0.3835 * 0 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 *
1 + 0.3835 * 0 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0)) # Not black, suburban, age 18-26, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.3721
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 0 + 0.3835 * 0 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 *
0 + 0.3835 * 0 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0)) # Not black, urban, age 11-17, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.3712
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 1 + 0.3835 * 0 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 *
1 + 0.3835 * 0 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0)) # Not black, urban, age 18-26, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.2839
# 11110 Model
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 1 -
0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 *
0 + 0.3835 * 1 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0)) # black, suburban, age 11-17, attends White Marsh, family practice, assisted insurance
## [1] 0.2314
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 1 -
0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 *
1 + 0.3835 * 1 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0)) # black, suburban, age 18-26, attends White Marsh, family practice, assisted insurance
## [1] 0.1682
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 1 -
0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 *
0 + 0.3835 * 1 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0)) # black, urban, age 11-17, attends White Marsh, family practice, assisted insurance
## [1] 0.1677
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 1 -
0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 *
1 + 0.3835 * 1 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0)) # black, urban, age 18-26, attends White Marsh, family practice, assisted insurance
## [1] 0.1192
# 01000 Model
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 *
0 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0)) # Not black, suburban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.5642
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 *
1 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0)) # Not black, suburban, age 18-26, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4651
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 *
0 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0)) # Not black, urban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4642
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 *
1 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 0)) # Not black, urban, age 18-26,White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.3678
# 10110 Model
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 0 + 0.3835 * 0 - 0.3816 * 1 -
0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 *
0 + 0.3835 * 0 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0)) # black, suburban, age 11-17, not White Marsh, family practice, assisted insurance
## [1] 0.1702
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 * 1 + 0.3835 * 0 - 0.3816 * 1 -
0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 0 - 0.3981 *
1 + 0.3835 * 0 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0)) # black, suburban, age 18-26, not White Marsh, family practice, assisted insurance
## [1] 0.1211
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 0 + 0.3835 * 0 - 0.3816 * 1 -
0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 *
0 + 0.3835 * 0 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0)) # black, urban, age 11-17, not White Marsh, family practice, assisted insurance
## [1] 0.1207
exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 * 1 + 0.3835 * 0 - 0.3816 * 1 -
0.5843 * 1 + 0.5004 * 0)/(1 + exp(-0.1253 - 0.4929 * 1 - 0.4016 * 1 - 0.3981 *
1 + 0.3835 * 0 - 0.3816 * 1 - 0.5843 * 1 + 0.5004 * 0)) # black, urban, age 18-26, not White Marsh, family practice, assisted insurance
## [1] 0.08442
# 01001 Model
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 1)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 *
0 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 1)) # Not black, suburban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.681
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 1)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 0 - 0.3981 *
1 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 1)) # Not black, suburban, age 18-26, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.5892
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 0 + 0.3835 * 1 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 1)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 *
0 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 1)) # Not black, urban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.5883
exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 * 1 + 0.3835 * 1 - 0.3816 * 0 -
0.5843 * 0 + 0.5004 * 1)/(1 + exp(-0.1253 - 0.4929 * 0 - 0.4016 * 1 - 0.3981 *
1 + 0.3835 * 1 - 0.3816 * 0 - 0.5843 * 0 + 0.5004 * 1)) # Not black, urban, age 18-26,White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4897
# Best Model With Interactions:
summary(m7_inter1A) # AIC = 1718.2, Residual deviance = 1700.2
##
## Call:
## glm(formula = Completed ~ black + LocationType * AgeGroup + WhiteMarsh +
## family + assisted + hospital, family = binomial("logit"),
## data = gard)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.598 -0.903 -0.694 1.178 2.057
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.00137 0.12190 -0.01 0.991
## black -0.53386 0.13689 -3.90 9.6e-05 ***
## LocationType -0.85218 0.21299 -4.00 6.3e-05 ***
## AgeGroup -0.68361 0.14420 -4.74 2.1e-06 ***
## WhiteMarsh 0.48413 0.19838 2.44 0.015 *
## family -0.32234 0.15718 -2.05 0.040 *
## assisted -0.44691 0.20625 -2.17 0.030 *
## hospital 0.46642 0.24817 1.88 0.060 .
## LocationType:AgeGroup 1.08611 0.27346 3.97 7.1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1700.2 on 1404 degrees of freedom
## AIC: 1718
##
## Number of Fisher Scoring iterations: 4
# All variables contributed.
# 00000 Model
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 0 + 1.08611 * 0)) # Not black, suburban, age 11-17, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4997
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 0 + 1.08611 * 0)) # Not black, urban, age 11-17, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.2987
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 0 + 1.08611 * 0)) # Not black, suburban, age 18-26, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.3352
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 1)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 0 + 1.08611 * 1)) # Not black, urban, age 18-26, Not White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.3891
# 11110 Model
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 *
1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
1 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 1 - 0.446915 *
1 + 0.46642 * 0 + 1.08611 * 0)) # black, suburban, age 11-17, attends White Marsh, family practice, assisted insurance
## [1] 0.3057
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 *
1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
1 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 1 - 0.446915 *
1 + 0.46642 * 0 + 1.08611 * 0)) # black, urban, age 11-17, attends White Marsh, family practice, assisted insurance
## [1] 0.1581
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 *
1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
1 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 1 - 0.446915 *
1 + 0.46642 * 0 + 1.08611 * 0)) # black, suburban, age 18-26, attends White Marsh, family practice, assisted insurance
## [1] 0.1818
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 *
1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 1)/(1 + exp(-0.001367 - 0.53386 *
1 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 1 - 0.446915 *
1 + 0.46642 * 0 + 1.08611 * 1)) # black, urban, age 18-26, attends White Marsh, family practice, assisted insurance
## [1] 0.2193
# 01000 Model
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 0 + 1.08611 * 0)) # Not black, suburban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.6184
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 0 + 1.08611 * 0)) # Not black, urban, age 11-17, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.4087
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 0 + 1.08611 * 0)) # Not black, suburban, age 18-26, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.45
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 0 + 1.08611 * 1)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 0 + 1.08611 * 1)) # Not black, urban, age 18-26, White Marsh, Not family, Not assisted or hospital insurance
## [1] 0.5083
# 10110 Model
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 *
1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
1 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 1 - 0.446915 *
1 + 0.46642 * 0 + 1.08611 * 0)) # black, suburban, age 11-17, not White Marsh, family practice, assisted insurance
## [1] 0.2134
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 *
1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
1 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 0 - 0.32234 * 1 - 0.446915 *
1 + 0.46642 * 0 + 1.08611 * 0)) # black, urban, age 11-17, not White Marsh, family practice, assisted insurance
## [1] 0.1037
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 *
1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
1 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 1 - 0.446915 *
1 + 0.46642 * 0 + 1.08611 * 0)) # black, suburban, age 18-26, not White Marsh, family practice, assisted insurance
## [1] 0.1205
exp(-0.001367 - 0.53386 * 1 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 *
1 - 0.446915 * 1 + 0.46642 * 0 + 1.08611 * 1)/(1 + exp(-0.001367 - 0.53386 *
1 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 0 - 0.32234 * 1 - 0.446915 *
1 + 0.46642 * 0 + 1.08611 * 1)) # black, urban, age 18-26, not White Marsh, family practice, assisted insurance
## [1] 0.1475
# 01001 Model
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 1 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 0 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 1 + 1.08611 * 0)) # Not black, suburban, age 11-17, White Marsh, Not family, hospital insurance
## [1] 0.721
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 1 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 1 - 0.683608 * 0 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 1 + 1.08611 * 0)) # Not black, urban, age 11-17, White Marsh, Not family, hospital insurance
## [1] 0.5242
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 1 + 1.08611 * 0)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 0 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 1 + 1.08611 * 0)) # Not black, suburban, age 18-26, White Marsh, Not family, hospital insurance
## [1] 0.566
exp(-0.001367 - 0.53386 * 0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 *
0 - 0.446915 * 0 + 0.46642 * 1 + 1.08611 * 1)/(1 + exp(-0.001367 - 0.53386 *
0 - 0.852176 * 1 - 0.683608 * 1 + 0.484134 * 1 - 0.32234 * 0 - 0.446915 *
0 + 0.46642 * 1 + 1.08611 * 1)) # Not black, urban, age 18-26, White Marsh, Not family, hospital insurance
## [1] 0.6223
Other Code
exp(cbind(OR = coef(m8_allsigs), confint(m8_allsigs)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.7945 0.5629 1.1173
## white 1.0175 0.7425 1.4000
## black 0.6114 0.4289 0.8726
## LocationType 0.6567 0.4633 0.9276
## WhiteMarsh 1.3911 0.9296 2.0823
## obgyn 0.8502 0.6264 1.1511
## family 0.5799 0.4136 0.8114
## assisted 0.6007 0.3997 0.8961
## hospital 1.6800 1.0339 2.7248
plot(residuals(m8_allsigs) ~ fitted(m8_allsigs), main = "Residuals for m8_allsigs Model",
xlab = "Fitted Values - P(Completion)", ylab = "Residuals", pch = 15)
plot(fitted(m8_allsigs) ~ gard$Location, main = "Fitted Values for m8_allsigs Model",
ylab = "P(Completion)", xlab = "Location", pch = 15)
black <- gard[gard$black == 1, ]
black1 <- black[black$assisted == 1, ]
black11 <- black1[black1$LocationType == 1, ]
black111 <- black11[black11$WhiteMarsh == 0, ]
black1111 <- black111[black111$family == 1, ]
black2 <- black[black$family == 1, ]
hospital <- WhiteMarsh[WhiteMarsh$hospital == 1, ]
hospital1 <- hospital[hospital$black == 0, ]
hospital2 <- hospital1[hospital1$family == 0, ]
hospital3 <- hospital2[hospital2$AgeGroup == 1, ]
hospital4 <- hospital3[hospital3$LocationType == 1, ]
hospital5 <- WhiteMarsh[WhiteMarsh$LocationType == 1, ]