X Wang - Lab 2 Appendix

GEOG 5023: Quantitative Methods in Geography

library(arm)
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lattice
## Loading required package: lme4
## Attaching package: 'lme4'
## The following object(s) are masked from 'package:stats':
## 
## AIC, BIC
## Loading required package: R2WinBUGS
## Loading required package: coda
## Attaching package: 'coda'
## The following object(s) are masked from 'package:lme4':
## 
## HPDinterval
## Loading required package: abind
## Loading required package: foreign
## arm (Version 1.6-03, built: 2013-2-20)
## Working directory is /Users/xiwang/Dropbox/GEOG 5023 - offline/Labs/Lab 2
## Attaching package: 'arm'
## The following object(s) are masked from 'package:coda':
## 
## traceplot
# Load data
gardasil <- read.table("/Users/xiwang/Dropbox/GEOG 5023 - offline/Labs/Lab 2/jh_gardasil.dat", 
    header = TRUE)
# Data summaries
summary(gardasil)
##       Age          AgeGroup          Race           Shots     
##  Min.   :11.0   Min.   :0.000   Min.   :0.000   Min.   :1.00  
##  1st Qu.:15.0   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:1.00  
##  Median :18.0   Median :1.000   Median :0.000   Median :2.00  
##  Mean   :18.5   Mean   :0.504   Mean   :0.782   Mean   :2.07  
##  3rd Qu.:22.0   3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:3.00  
##  Max.   :26.0   Max.   :1.000   Max.   :3.000   Max.   :3.00  
##    Completed     InsuranceType    MedAssist        Location   
##  Min.   :0.000   Min.   :0.00   Min.   :0.000   Min.   :1.00  
##  1st Qu.:0.000   1st Qu.:1.00   1st Qu.:0.000   1st Qu.:1.00  
##  Median :0.000   Median :1.00   Median :0.000   Median :1.00  
##  Mean   :0.332   Mean   :1.33   Mean   :0.195   Mean   :2.01  
##  3rd Qu.:1.000   3rd Qu.:2.00   3rd Qu.:0.000   3rd Qu.:4.00  
##  Max.   :1.000   Max.   :3.00   Max.   :1.000   Max.   :4.00  
##   LocationType    PracticeType 
##  Min.   :0.000   Min.   :0.00  
##  1st Qu.:0.000   1st Qu.:0.00  
##  Median :0.000   Median :1.00  
##  Mean   :0.319   Mean   :1.01  
##  3rd Qu.:1.000   3rd Qu.:2.00  
##  Max.   :1.000   Max.   :2.00

Creating dummy variables for each variable

# `ifelse(test, c(1), c(0))` returns a value that is the same shape as
# `test` which is filled with either 1 if it satisfies the test or 0 if it
# doesn't Race Dummy Variables
gardasil$White <- ifelse(gardasil$Race == 0, c(1), c(0))
gardasil$Black <- ifelse(gardasil$Race == 1, c(1), c(0))
gardasil$Hispanic <- ifelse(gardasil$Race == 2, c(1), c(0))
gardasil$Other <- ifelse(gardasil$Race == 3, c(1), c(0))
# Insurance Dummy Variables
gardasil$MedicalAssist <- ifelse(gardasil$InsuranceType == 0, c(1), c(0))  #MedicalAssist should match MedAssist, the original variable
gardasil$Private <- ifelse(gardasil$InsuranceType == 1, c(1), c(0))
gardasil$Hospital <- ifelse(gardasil$InsuranceType == 2, c(1), c(0))
gardasil$Military <- ifelse(gardasil$InsuranceType == 3, c(1), c(0))
# Location Dummy Variables
gardasil$Odenton <- ifelse(gardasil$Location == 1, c(1), c(0))
gardasil$WhiteMarsh <- ifelse(gardasil$Location == 2, c(1), c(0))
gardasil$JohnsHopkins <- ifelse(gardasil$Location == 3, c(1), c(0))
gardasil$Bayview <- ifelse(gardasil$Location == 4, c(1), c(0))
# Practice Type Dummy Variables
gardasil$Pediatric <- ifelse(gardasil$PracticeType == 0, c(1), c(0))
gardasil$Family <- ifelse(gardasil$PracticeType == 1, c(1), c(0))
gardasil$Obgyn <- ifelse(gardasil$PracticeType == 2, c(1), c(0))
# Medical Assistance Reverse Dummy Variable
gardasil$NonMedAssist <- ifelse(gardasil$MedAssist == 0, c(1), c(0))  # 0 = No, 1 = Yes
# Location Type Reverse Dummy Variable
gardasil$Suburban <- ifelse(gardasil$LocationType == 0, c(1), c(0))  # 0 = Urban, 1 = Suburban
# Check that MedAssist matches MedicalAssist
MACompare <- ifelse(gardasil$MedicalAssist == gardasil$MedAssist, c(0), c(1))  # Count mismatches
sum(MACompare)  # This will be > 0 if there are mismatches; MedicalAssist and MedAssist are equivalent
## [1] 0

Univariate logistic regression models

The naming convention for models is mData#.g, where m stands for model, Data is the category, # is the number of variables, g is the abbreviation of the group used under that variable if is the only one modeled.

The naming convention for probablity is pData#.g_Group, where p stands for probability of completion, Data#.g mirrors the naming convention for models, and Group is the group for which the probability is calculated.

Age

mAge1 <- glm(gardasil$Completed ~ gardasil$Age, family = binomial("logit"))
summary(mAge1)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Age, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.954  -0.909  -0.873   1.454   1.552  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)   -0.3341     0.2533   -1.32     0.19
## gardasil$Age  -0.0198     0.0134   -1.47     0.14
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1793.8  on 1411  degrees of freedom
## AIC: 1798
## 
## Number of Fisher Scoring iterations: 4
# P(completion) = (exp(-0.33412 - 0.01976*x))/(1 + exp(-0.33412 -
# 0.01976*x)), where x is age
exp(cbind(OR = coef(mAge1), confint(mAge1)))  # The probability of completion does not apper to be significantly affected by age (the probability decreases slightly with age in this particular data)
## Waiting for profiling to be done...
##                  OR  2.5 % 97.5 %
## (Intercept)  0.7160 0.4353  1.176
## gardasil$Age 0.9804 0.9549  1.006
plot(gardasil$Age, fitted(mAge1), main = "Probability of Vaccine Completion by Age", 
    xlab = "Patient Age (years)", ylab = "Completion Probability", pch = 16)

plot of chunk unnamed-chunk-3

AgeGroup

mAgeGrp1 <- glm(gardasil$Completed ~ gardasil$AgeGroup, family = binomial("logit"))
summary(mAgeGrp1)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$AgeGroup, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.932  -0.932  -0.865   1.444   1.527  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -0.6087     0.0791   -7.70  1.4e-14 ***
## gardasil$AgeGroup  -0.1830     0.1131   -1.62     0.11    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1793.4  on 1411  degrees of freedom
## AIC: 1797
## 
## Number of Fisher Scoring iterations: 4
pAgeGrp1_younger <- (exp(-0.60871 - 0.18302 * 0))/(1 + exp(-0.60871 - 0.18302 * 
    0))
pAgeGrp1_older <- (exp(-0.60871 - 0.18302 * 1))/(1 + exp(-0.60871 - 0.18302 * 
    1))
pAgeGrp1_younger  # For ages 11-17
## [1] 0.3524
pAgeGrp1_older  # For ages 18-26
## [1] 0.3118
exp(cbind(OR = coef(mAgeGrp1), confint(mAgeGrp1)))  # The probability of completion does not apper to be significantly affected by age group (the probability decreases slightly with the older age group in this particular data)
## Waiting for profiling to be done...
##                       OR  2.5 % 97.5 %
## (Intercept)       0.5441 0.4654 0.6345
## gardasil$AgeGroup 0.8328 0.6669 1.0393
plot(gardasil$AgeGroup, fitted(mAgeGrp1), main = "Probability of Vaccine Completion by Age Group", 
    xlab = "Age Group\n(0 = ages 11-17, 1 = ages 18-26 yo)", ylab = "Completion Probability", 
    pch = 16)

plot of chunk unnamed-chunk-4

Race

# Race: All
mRace1 <- glm(gardasil$Completed ~ gardasil$White + gardasil$Hispanic + gardasil$Other, 
    family = binomial("logit"))  # Using black as reference
summary(mRace1)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$White + gardasil$Hispanic + 
##     gardasil$Other, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.982  -0.982  -0.736   1.386   1.697  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.169      0.112  -10.46  < 2e-16 ***
## gardasil$White       0.690      0.135    5.11  3.3e-07 ***
## gardasil$Hispanic    0.447      0.316    1.41   0.1573    
## gardasil$Other       0.595      0.189    3.14   0.0017 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1768  on 1409  degrees of freedom
## AIC: 1776
## 
## Number of Fisher Scoring iterations: 4
pRace1_White <- (exp(-1.1691 + 0.6902 * 1 + 0.447 * 0 + 0.5947 * 0))/(1 + exp(-1.1691 + 
    0.6902 * 1 + 0.447 * 0 + 0.5947 * 0))
pRace1_Black <- (exp(-1.1691 + 0.6902 * 0 + 0.447 * 0 + 0.5947 * 0))/(1 + exp(-1.1691 + 
    0.6902 * 0 + 0.447 * 0 + 0.5947 * 0))
pRace1_Hispanic <- (exp(-1.1691 + 0.6902 * 0 + 0.447 * 1 + 0.5947 * 0))/(1 + 
    exp(-1.1691 + 0.6902 * 0 + 0.447 * 1 + 0.5947 * 0))
pRace1_Other <- (exp(-1.1691 + 0.6902 * 0 + 0.447 * 0 + 0.5947 * 1))/(1 + exp(-1.1691 + 
    0.6902 * 0 + 0.447 * 0 + 0.5947 * 1))
pRace1_White
## [1] 0.3825
pRace1_Black
## [1] 0.237
pRace1_Hispanic
## [1] 0.3269
pRace1_Other
## [1] 0.3602
exp(cbind(OR = coef(mRace1), confint(mRace1)))
## Waiting for profiling to be done...
##                       OR  2.5 % 97.5 %
## (Intercept)       0.3107 0.2484 0.3852
## gardasil$White    1.9941 1.5337 2.6062
## gardasil$Hispanic 1.5635 0.8247 2.8674
## gardasil$Other    1.8124 1.2486 2.6241
plot(gardasil$Race, fitted(mRace1), main = "Probabiliy of Vaccine Completion by Race", 
    xlab = "Race\n(0 = white, 1 = black, 2 = Hispanice, 3 = other)", ylab = "Completion Probability", 
    pch = 16)

plot of chunk unnamed-chunk-5

# Race: White
mRace1.w <- glm(gardasil$Completed ~ gardasil$White, family = binomial("logit"))
summary(mRace1.w)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$White, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.982  -0.982  -0.806   1.386   1.601  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.9567     0.0856  -11.18   <2e-16 ***
## gardasil$White   0.4778     0.1145    4.17    3e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1778.4  on 1411  degrees of freedom
## AIC: 1782
## 
## Number of Fisher Scoring iterations: 4
pRace1.w_no = (exp(-0.95673 + 0.47784 * 0))/(1 + exp(-0.95673 + 0.47784 * 0))
pRace1.w_yes = (exp(-0.95673 + 0.47784 * 1))/(1 + exp(-0.95673 + 0.47784 * 1))
pRace1.w_no
## [1] 0.2775
pRace1.w_yes
## [1] 0.3825
exp(cbind(OR = coef(mRace1.w), confint(mRace1.w)))  # Whites are significantly more likely to complete than the other racial groups combined
## Waiting for profiling to be done...
##                    OR  2.5 % 97.5 %
## (Intercept)    0.3841 0.3241 0.4534
## gardasil$White 1.6126 1.2894 2.0200
# Race: Black
mRace1.b <- glm(gardasil$Completed ~ gardasil$Black, family = binomial("logit"))
summary(mRace1.b)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Black, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.970  -0.970  -0.736   1.400   1.697  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.5097     0.0663   -7.69  1.5e-14 ***
## gardasil$Black  -0.6594     0.1299   -5.08  3.9e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1768.9  on 1411  degrees of freedom
## AIC: 1773
## 
## Number of Fisher Scoring iterations: 4
pRace1.b_no = (exp(-0.50973 - 0.65936 * 0))/(1 + exp(-0.50973 - 0.65936 * 0))
pRace1.b_yes = (exp(-0.50973 - 0.65936 * 1))/(1 + exp(-0.50973 - 0.65936 * 1))
pRace1.b_no
## [1] 0.3753
pRace1.b_yes
## [1] 0.237
exp(cbind(OR = coef(mRace1.b), confint(mRace1.b)))  # Blacks are significantly less likely to complete than the other racial groups combined
## Waiting for profiling to be done...
##                    OR  2.5 % 97.5 %
## (Intercept)    0.6007 0.5271 0.6836
## gardasil$Black 0.5172 0.3997 0.6653
# Race: Hispanic
mRace1.h <- glm(gardasil$Completed ~ gardasil$Hispanic, family = binomial("logit"))
summary(mRace1.h)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Hispanic, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.898  -0.898  -0.898   1.485   1.495  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -0.6987     0.0576  -12.14   <2e-16 ***
## gardasil$Hispanic  -0.0235     0.3012   -0.08     0.94    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1796  on 1411  degrees of freedom
## AIC: 1800
## 
## Number of Fisher Scoring iterations: 4
pRace1.h_no = (exp(-0.69866 - 0.02347 * 0))/(1 + exp(-0.69866 - 0.02347 * 0))
pRace1.h_yes = (exp(-0.69866 - 0.02347 * 1))/(1 + exp(-0.69866 - 0.02347 * 1))
pRace1.h_no
## [1] 0.3321
pRace1.h_yes
## [1] 0.3269
exp(cbind(OR = coef(mRace1.h), confint(mRace1.h)))  # Hispanics are as likely to complete as the other racial groups combined
## Waiting for profiling to be done...
##                       OR  2.5 % 97.5 %
## (Intercept)       0.4972 0.4439 0.5562
## gardasil$Hispanic 0.9768 0.5291 1.7368
# Race: Other
mRace1.o <- glm(gardasil$Completed ~ gardasil$Other, family = binomial("logit"))
summary(mRace1.o)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Other, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.945  -0.891  -0.891   1.494   1.494  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.7189     0.0608  -11.82   <2e-16 ***
## gardasil$Other   0.1445     0.1644    0.88     0.38    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1795.2  on 1411  degrees of freedom
## AIC: 1799
## 
## Number of Fisher Scoring iterations: 4
pRace1.o_no = (exp(-0.71893 + 0.1445 * 0))/(1 + exp(-0.71893 + 0.1445 * 0))
pRace1.o_yes = (exp(-0.71893 + 0.1445 * 1))/(1 + exp(-0.71893 + 0.1445 * 1))
pRace1.o_no
## [1] 0.3276
pRace1.o_yes
## [1] 0.3602
exp(cbind(OR = coef(mRace1.o), confint(mRace1.o)))
## Waiting for profiling to be done...
##                    OR  2.5 % 97.5 %
## (Intercept)    0.4873 0.4321 0.5485
## gardasil$Other 1.1555 0.8338 1.5899

Insurance Type

# Insurance Type: All
mInsurance1 <- glm(gardasil$Completed ~ gardasil$Private + gardasil$Hospital + 
    gardasil$Military, family = binomial("logit"))  # Using MedicalAssist as reference
summary(mInsurance1)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Private + gardasil$Hospital + 
##     gardasil$Military, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.117  -0.928  -0.928   1.413   1.794  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.386      0.151   -9.20  < 2e-16 ***
## gardasil$Private     0.767      0.170    4.52  6.2e-06 ***
## gardasil$Hospital    1.243      0.266    4.68  2.9e-06 ***
## gardasil$Military    0.848      0.189    4.49  7.2e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1763.1  on 1409  degrees of freedom
## AIC: 1771
## 
## Number of Fisher Scoring iterations: 4
pInsurance1_MedicalAssist <- (exp(-1.3863 + 0.767 * 0 + 1.2432 * 0 + 0.848 * 
    0))/(1 + exp(-1.3863 + 0.767 * 0 + 1.2432 * 0 + 0.848 * 0))
pInsurance1_Private <- (exp(-1.3863 + 0.767 * 1 + 1.2432 * 0 + 0.848 * 0))/(1 + 
    exp(-1.3863 + 0.767 * 1 + 1.2432 * 0 + 0.848 * 0))
pInsurance1_Hospital <- (exp(-1.3863 + 0.767 * 0 + 1.2432 * 1 + 0.848 * 0))/(1 + 
    exp(-1.3863 + 0.767 * 0 + 1.2432 * 1 + 0.848 * 0))
pInsurance1_Military <- (exp(-1.3863 + 0.767 * 0 + 1.2432 * 0 + 0.848 * 1))/(1 + 
    exp(-1.3863 + 0.767 * 0 + 1.2432 * 0 + 0.848 * 1))
pInsurance1_MedicalAssist
## [1] 0.2
pInsurance1_Private
## [1] 0.3499
pInsurance1_Hospital
## [1] 0.4643
pInsurance1_Military
## [1] 0.3686
exp(cbind(OR = coef(mInsurance1), confint(mInsurance1)))  # Compared to medical assistance, all other insurance groups are more significantly likely to complete
## Waiting for profiling to be done...
##                      OR  2.5 % 97.5 %
## (Intercept)       0.250 0.1843 0.3332
## gardasil$Private  2.153 1.5534 3.0247
## gardasil$Hospital 3.467 2.0598 5.8496
## gardasil$Military 2.335 1.6191 3.3996
plot(gardasil$Insurance, fitted(mInsurance1), main = "Probability of Vaccine Completion by Insurance Type", 
    xlab = "Insurance Type\n(0 = medical assistance, 1 = private, 2 = hospital, 3 = military)", 
    ylab = "Completion Probability", pch = 16)

plot of chunk unnamed-chunk-6

# Insurance Type: Medical Assistance
mInsurance1.ma <- glm(gardasil$Completed ~ gardasil$MedicalAssist, family = binomial("logit"))
summary(mInsurance1.ma)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$MedicalAssist, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.951  -0.951  -0.951   1.422   1.794  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -0.5589     0.0616   -9.07  < 2e-16 ***
## gardasil$MedicalAssist  -0.8274     0.1629   -5.08  3.8e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1767.3  on 1411  degrees of freedom
## AIC: 1771
## 
## Number of Fisher Scoring iterations: 4
pInsurance1.ma_no = (exp(-0.55893 - 0.82737 * 0))/(1 + exp(-0.55893 - 0.82737 * 
    0))
pInsurance1.ma_yes = (exp(-0.55893 - 0.82737 * 1))/(1 + exp(-0.55893 - 0.82737 * 
    1))
pInsurance1.ma_no
## [1] 0.3638
pInsurance1.ma_yes
## [1] 0.2
exp(cbind(OR = coef(mInsurance1.ma), confint(mInsurance1.ma)))  # Medical assistance insurance patients are less likely to complete than other groups combined
## Waiting for profiling to be done...
##                            OR  2.5 % 97.5 %
## (Intercept)            0.5718 0.5064 0.6448
## gardasil$MedicalAssist 0.4372 0.3152 0.5975
# Insurance Type: Private
mInsurance1.p <- glm(gardasil$Completed ~ gardasil$Private, family = binomial("logit"))
summary(mInsurance1.p)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Private, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.928  -0.928  -0.867   1.449   1.524  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -0.7859     0.0821   -9.57   <2e-16 ***
## gardasil$Private   0.1666     0.1132    1.47     0.14    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1793.8  on 1411  degrees of freedom
## AIC: 1798
## 
## Number of Fisher Scoring iterations: 4
pInsurance1.p_no = (exp(-0.78593 + 0.16659 * 0))/(1 + exp(-0.78593 + 0.16659 * 
    0))
pInsurance1.p_yes = (exp(-0.78593 + 0.16659 * 1))/(1 + exp(-0.78593 + 0.16659 * 
    1))
pInsurance1.p_no
## [1] 0.313
pInsurance1.p_yes
## [1] 0.3499
exp(cbind(OR = coef(mInsurance1.p), confint(mInsurance1.p)))  # Private insurance patients are not significantly less or more likely to complete than the other groups combined (though their probability is slightly higher in this particular data)
## Waiting for profiling to be done...
##                      OR  2.5 % 97.5 %
## (Intercept)      0.4557 0.3873 0.5344
## gardasil$Private 1.1813 0.9464 1.4753
# Insurance Type: Hospital
mInsurance1.h <- glm(gardasil$Completed ~ gardasil$Hospital, family = binomial("logit"))
summary(mInsurance1.h)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Hospital, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.117  -0.884  -0.884   1.502   1.502  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -0.7375     0.0586  -12.58   <2e-16 ***
## gardasil$Hospital   0.5944     0.2265    2.62   0.0087 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1789.3  on 1411  degrees of freedom
## AIC: 1793
## 
## Number of Fisher Scoring iterations: 4
pInsurance1.h_no = (exp(-0.7375 + 0.5944 * 0))/(1 + exp(-0.7375 + 0.5944 * 0))
pInsurance1.h_yes = (exp(-0.7375 + 0.5944 * 1))/(1 + exp(-0.7375 + 0.5944 * 
    1))
pInsurance1.h_no
## [1] 0.3236
pInsurance1.h_yes
## [1] 0.4643
exp(cbind(OR = coef(mInsurance1.h), confint(mInsurance1.h)))  # Hospital insurance patients are significantly more likely to complete than the other groups combined
## Waiting for profiling to be done...
##                       OR 2.5 % 97.5 %
## (Intercept)       0.4783 0.426 0.5362
## gardasil$Hospital 1.8119 1.158 2.8230
# Insurance Type: Military
mInsurance1.mi <- glm(gardasil$Completed ~ gardasil$Military, family = binomial("logit"))
summary(mInsurance1.mi)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Military, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.959  -0.879  -0.879   1.413   1.508  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -0.7505     0.0651  -11.52   <2e-16 ***
## gardasil$Military   0.2122     0.1312    1.62     0.11    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1793.4  on 1411  degrees of freedom
## AIC: 1797
## 
## Number of Fisher Scoring iterations: 4
pInsurance1.mi_no = (exp(-0.75055 + 0.21223 * 0))/(1 + exp(-0.75055 + 0.21223 * 
    0))
pInsurance1.mi_yes = (exp(-0.75055 + 0.21223 * 1))/(1 + exp(-0.75055 + 0.21223 * 
    1))
pInsurance1.mi_no
## [1] 0.3207
pInsurance1.mi_yes
## [1] 0.3686
exp(cbind(OR = coef(mInsurance1.mi), confint(mInsurance1.mi)))  # Military insurance patients are not significantly less or more likely to complete than the other groups combined (though their probability is slightly higher in this particular data)
## Waiting for profiling to be done...
##                       OR  2.5 % 97.5 %
## (Intercept)       0.4721 0.4151 0.5359
## gardasil$Military 1.2364 0.9545 1.5971

Medical Assistance

# The probabilities, odds ratio, and confidence interval should be the
# same as the 'mInsurance1.ma'' model because the 'MedAssist'' and
# 'MedicalAssist'' variables are exactly the same.
plot(gardasil$MedicalAssist, fitted(mInsurance1.ma), main = "Probability of Vaccine Completion by Medical Assistance", 
    xlab = "Medical Assistance \n(0 = no, 1 = yes)", ylab = "Completion Probability", 
    pch = 16)

plot of chunk unnamed-chunk-7

Location

# Location: All
mLocation1 <- glm(gardasil$Completed ~ gardasil$Odenton + gardasil$WhiteMarsh + 
    gardasil$Bayview, family = binomial("logit"))  # Using Johns Hopkins Outpatient Center as reference
summary(mLocation1)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Odenton + gardasil$WhiteMarsh + 
##     gardasil$Bayview, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.152  -0.919  -0.781   1.460   1.757  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -1.304      0.259   -5.04  4.6e-07 ***
## gardasil$Odenton       0.661      0.269    2.46    0.014 *  
## gardasil$WhiteMarsh    1.243      0.302    4.12  3.8e-05 ***
## gardasil$Bayview       0.274      0.285    0.96    0.336    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1764.9  on 1409  degrees of freedom
## AIC: 1773
## 
## Number of Fisher Scoring iterations: 4
pLocation1_Odenton <- (exp(-1.3041 + 0.6612 * 1 + 1.2434 * 0 + 0.2744 * 0))/(1 + 
    exp(-1.3041 + 0.6612 * 1 + 1.2434 * 0 + 0.2744 * 0))
pLocation1_WhiteMarsh <- (exp(-1.3041 + 0.6612 * 0 + 1.2434 * 1 + 0.2744 * 0))/(1 + 
    exp(-1.3041 + 0.6612 * 0 + 1.2434 * 1 + 0.2744 * 0))
pLocation1_JohnsHopkins <- (exp(-1.3041 + 0.6612 * 0 + 1.2434 * 0 + 0.2744 * 
    0))/(1 + exp(-1.3041 + 0.6612 * 0 + 1.2434 * 0 + 0.2744 * 0))
pLocation1_Bayview <- (exp(-1.3041 + 0.6612 * 0 + 1.2434 * 0 + 0.2744 * 1))/(1 + 
    exp(-1.3041 + 0.6612 * 0 + 1.2434 * 0 + 0.2744 * 1))
pLocation1_Odenton
## [1] 0.3446
pLocation1_WhiteMarsh
## [1] 0.4848
pLocation1_JohnsHopkins
## [1] 0.2135
pLocation1_Bayview
## [1] 0.2631
exp(cbind(OR = coef(mLocation1), confint(mLocation1)))  # Compared to Johns Hopkins, the Odenton and White Marsh groups are significantly more likely to complete while the Bayview group is not significantly less or more likely (though Bayview's probability is slightly higher in this particular data)
## Waiting for profiling to be done...
##                         OR  2.5 % 97.5 %
## (Intercept)         0.2714 0.1590  0.441
## gardasil$Odenton    1.9372 1.1660  3.369
## gardasil$WhiteMarsh 3.4675 1.9489  6.397
## gardasil$Bayview    1.3158 0.7656  2.352
plot(gardasil$Location, fitted(mLocation1), main = "Probability of Vaccine Completion by Location", 
    xlab = "Location\n(1 = Odenton, 2 = White Marsh, 3 = Johns Hopkins, 4 = Bayview)", 
    ylab = "Completion Probability", pch = 16)

plot of chunk unnamed-chunk-8

# Location: Odenton
mLocation1.o <- glm(gardasil$Completed ~ gardasil$Odenton, family = binomial("logit"))
summary(mLocation1.o)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Odenton, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.919  -0.919  -0.871   1.460   1.519  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -0.7748     0.0868   -8.93   <2e-16 ***
## gardasil$Odenton   0.1320     0.1144    1.15     0.25    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1794.7  on 1411  degrees of freedom
## AIC: 1799
## 
## Number of Fisher Scoring iterations: 4
pLocation1.o_no = (exp(-0.77477 + 0.13196 * 0))/(1 + exp(-0.77477 + 0.13196 * 
    0))
pLocation1.o_yes = (exp(-0.77477 + 0.13196 * 1))/(1 + exp(-0.77477 + 0.13196 * 
    1))
pLocation1.o_no
## [1] 0.3154
pLocation1.o_yes
## [1] 0.3446
exp(cbind(OR = coef(mLocation1.o), confint(mLocation1.o)))  # Odenton patients are not significantly less or more likely to complete than the other groups combined (though their probablity is lightly higher in this particular data)
## Waiting for profiling to be done...
##                      OR  2.5 % 97.5 %
## (Intercept)      0.4608 0.3880 0.5453
## gardasil$Odenton 1.1411 0.9124 1.4287
# Location: White Marsh
mLocation1.w <- glm(gardasil$Completed ~ gardasil$WhiteMarsh, family = binomial("logit"))
summary(mLocation1.w)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$WhiteMarsh, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.152  -0.864  -0.864   1.527   1.527  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.7922     0.0611  -12.96  < 2e-16 ***
## gardasil$WhiteMarsh   0.7316     0.1673    4.37  1.2e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1777.2  on 1411  degrees of freedom
## AIC: 1781
## 
## Number of Fisher Scoring iterations: 4
pLocation1.w_no = (exp(-0.79219 + 0.73156 * 0))/(1 + exp(-0.79219 + 0.73156 * 
    0))
pLocation1.w_yes = (exp(-0.79219 + 0.73156 * 1))/(1 + exp(-0.79219 + 0.73156 * 
    1))
pLocation1.w_no
## [1] 0.3117
pLocation1.w_yes
## [1] 0.4848
exp(cbind(OR = coef(mLocation1.w), confint(mLocation1.w)))  # White Marsh patients are significantly more likely to complete than the other groups combined
## Waiting for profiling to be done...
##                         OR  2.5 % 97.5 %
## (Intercept)         0.4529 0.4014  0.510
## gardasil$WhiteMarsh 2.0783 1.4960  2.886
# Location: Johns Hopkins Outpatient Center
mLocation1.j <- glm(gardasil$Completed ~ gardasil$JohnsHopkins, family = binomial("logit"))
summary(mLocation1.j)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$JohnsHopkins, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.911  -0.911  -0.911   1.469   1.757  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -0.664      0.058  -11.44   <2e-16 ***
## gardasil$JohnsHopkins   -0.640      0.265   -2.41    0.016 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1789.5  on 1411  degrees of freedom
## AIC: 1794
## 
## Number of Fisher Scoring iterations: 4
pLocation1.j_no = (exp(-0.66383 - 0.64022 * 0))/(1 + exp(-0.66383 - 0.64022 * 
    0))
pLocation1.j_yes = (exp(-0.66383 - 0.64022 * 1))/(1 + exp(-0.66383 - 0.64022 * 
    1))
pLocation1.j_no
## [1] 0.3399
pLocation1.j_yes
## [1] 0.2135
exp(cbind(OR = coef(mLocation1.j), confint(mLocation1.j)))  # Johns Hopkins patients are significantly less likely to complete than the other groups combined
## Waiting for profiling to be done...
##                           OR  2.5 % 97.5 %
## (Intercept)           0.5149 0.4592 0.5765
## gardasil$JohnsHopkins 0.5272 0.3054 0.8683
# Location: Bayview
mLocation1.b <- glm(gardasil$Completed ~ gardasil$Bayview, family = binomial("logit"))
summary(mLocation1.b)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Bayview, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.937  -0.937  -0.781   1.438   1.634  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -0.5949     0.0644   -9.24   <2e-16 ***
## gardasil$Bayview  -0.4347     0.1358   -3.20   0.0014 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1785.4  on 1411  degrees of freedom
## AIC: 1789
## 
## Number of Fisher Scoring iterations: 4
pLocation1.b_no = (exp(-0.59489 - 0.43473 * 0))/(1 + exp(-0.59489 - 0.43473 * 
    0))
pLocation1.b_yes = (exp(-0.59489 - 0.43473 * 1))/(1 + exp(-0.59489 - 0.43473 * 
    1))
pLocation1.b_no
## [1] 0.3555
pLocation1.b_yes
## [1] 0.2632
exp(cbind(OR = coef(mLocation1.b), confint(mLocation1.b)))  # Bayview patients are significantly less likely to complete than the other groups combined
## Waiting for profiling to be done...
##                      OR  2.5 % 97.5 %
## (Intercept)      0.5516 0.4858 0.6254
## gardasil$Bayview 0.6474 0.4945 0.8423

Location Type

mLocationType1 <- glm(gardasil$Completed ~ gardasil$LocationType, family = binomial("logit"))
summary(mLocationType1)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$LocationType, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.959  -0.959  -0.764   1.413   1.657  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.5381     0.0668   -8.06  7.9e-16 ***
## gardasil$LocationType  -0.5429     0.1273   -4.26  2.0e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1777.1  on 1411  degrees of freedom
## AIC: 1781
## 
## Number of Fisher Scoring iterations: 4
pLocationType1_suburban = (exp(-0.5381 - 0.5429 * 0))/(1 + exp(-0.5381 - 0.5429 * 
    0))
pLocationType1_urban = (exp(-0.5381 - 0.5429 * 1))/(1 + exp(-0.5381 - 0.5429 * 
    1))
pLocationType1_suburban
## [1] 0.3686
pLocationType1_urban
## [1] 0.2533
exp(cbind(OR = coef(mLocationType1), confint(mLocationType1)))  # Urban patients are significantly less likely to complete compared to suburban patients
## Waiting for profiling to be done...
##                           OR  2.5 % 97.5 %
## (Intercept)           0.5839 0.5118 0.6651
## gardasil$LocationType 0.5811 0.4516 0.7441
plot(gardasil$LocationType, fitted(mLocationType1), main = "Probability of Vaccine Completion by Location Type", 
    xlab = "Location Type\n(0 = suburban, 1 = urban)", ylab = "Completion Probability", 
    pch = 16)

plot of chunk unnamed-chunk-9

Practice Type

# Practice Type: All
mPractice1 <- glm(gardasil$Completed ~ gardasil$Pediatric + gardasil$Obgyn, 
    family = binomial("logit"))  # Using family practice as reference
summary(mPractice1)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Pediatric + gardasil$Obgyn, 
##     family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.973  -0.869  -0.828   1.397   1.573  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.893      0.115   -7.75  9.3e-15 ***
## gardasil$Pediatric    0.115      0.149    0.77   0.4432    
## gardasil$Obgyn        0.392      0.146    2.68   0.0073 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1787.6  on 1410  degrees of freedom
## AIC: 1794
## 
## Number of Fisher Scoring iterations: 4
pPractice1_Pediatric <- (exp(-0.8934 + 0.1145 * 1 + 0.3916 * 0))/(1 + exp(-0.8934 + 
    0.1145 * 1 + 0.3916 * 0))
pPractice1_Family <- (exp(-0.8934 + 0.1145 * 0 + 0.3916 * 0))/(1 + exp(-0.8934 + 
    0.1145 * 0 + 0.3916 * 0))
pPractice1_Obgyn <- (exp(-0.8934 + 0.1145 * 0 + 0.3916 * 1))/(1 + exp(-0.8934 + 
    0.1145 * 0 + 0.3916 * 1))
pPractice1_Pediatric
## [1] 0.3146
pPractice1_Family
## [1] 0.2904
pPractice1_Obgyn
## [1] 0.3771
exp(cbind(OR = coef(mPractice1), confint(mPractice1)))  # Compared to the family group, the OB-GYN group is significantly more likely to complete while the pediatric group is not significantly less or more likely (though pediatric's probability is slightly higher in this particular data)
## Waiting for profiling to be done...
##                        OR  2.5 % 97.5 %
## (Intercept)        0.4093 0.3252 0.5113
## gardasil$Pediatric 1.1213 0.8377 1.5048
## gardasil$Obgyn     1.4793 1.1132 1.9728
plot(gardasil$PracticeType, fitted(mPractice1), main = "Probability of Vaccine Completion by Practice Type", 
    xlab = "Practice Type\n(0 = pediatric, 1 = family, 2 = OB-GYN)", ylab = "Completion Probability", 
    pch = 16)

plot of chunk unnamed-chunk-10

# Practice Type: Pediatric
mPractice1.p <- glm(gardasil$Completed ~ gardasil$Pediatric, family = binomial("logit"))
summary(mPractice1.p)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Pediatric, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.915  -0.915  -0.869   1.465   1.521  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.6550     0.0704   -9.31   <2e-16 ***
## gardasil$Pediatric  -0.1239     0.1181   -1.05     0.29    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1794.9  on 1411  degrees of freedom
## AIC: 1799
## 
## Number of Fisher Scoring iterations: 4
pPractice1.p_no = (exp(-0.65497 - 0.1239 * 0))/(1 + exp(-0.65497 - 0.1239 * 
    0))
pPractice1.p_yes = (exp(-0.65497 - 0.1239 * 1))/(1 + exp(-0.65497 - 0.1239 * 
    1))
pPractice1.p_no
## [1] 0.3419
pPractice1.p_yes
## [1] 0.3146
exp(cbind(OR = coef(mPractice1.p), confint(mPractice1.p)))  # Pediatric practice patients are not significantly less or more likely to complete compared to the other groups combined
## Waiting for profiling to be done...
##                        OR  2.5 % 97.5 %
## (Intercept)        0.5195 0.4521 0.5957
## gardasil$Pediatric 0.8835 0.7001 1.1126
# Practice Type: Family
mPractice1.f <- glm(gardasil$Completed ~ gardasil$Family, family = binomial("logit"))
summary(mPractice1.f)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Family, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.922  -0.922  -0.828   1.456   1.573  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -0.6350     0.0649   -9.78   <2e-16 ***
## gardasil$Family  -0.2584     0.1323   -1.95    0.051 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1792.1  on 1411  degrees of freedom
## AIC: 1796
## 
## Number of Fisher Scoring iterations: 4
pPractice1.f_no = (exp(-0.63502 - 0.25837 * 0))/(1 + exp(-0.63502 - 0.25837 * 
    0))
pPractice1.f_yes = (exp(-0.63502 - 0.25837 * 1))/(1 + exp(-0.63502 - 0.25837 * 
    1))
pPractice1.f_no
## [1] 0.3464
pPractice1.f_yes
## [1] 0.2904
exp(cbind(OR = coef(mPractice1.f), confint(mPractice1.f)))  # Family practice patients are (borderline) significantly less likely to complete compared to the other groups combined
## Waiting for profiling to be done...
##                     OR  2.5 % 97.5 %
## (Intercept)     0.5299 0.4662 0.6014
## gardasil$Family 0.7723 0.5943 0.9987
# Practice Type: OB-GYN
mPractice1.o <- glm(gardasil$Completed ~ gardasil$Obgyn, family = binomial("logit"))
summary(mPractice1.o)
## 
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Obgyn, family = binomial("logit"))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.973  -0.852  -0.852   1.397   1.542  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.8257     0.0732   -11.3   <2e-16 ***
## gardasil$Obgyn   0.3239     0.1156     2.8   0.0051 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1788.2  on 1411  degrees of freedom
## AIC: 1792
## 
## Number of Fisher Scoring iterations: 4
pPractice1.o_no = (exp(-0.82575 + 0.32392 * 0))/(1 + exp(-0.82575 + 0.32392 * 
    0))
pPractice1.o_yes = (exp(-0.82575 + 0.32392 * 1))/(1 + exp(-0.82575 + 0.32392 * 
    1))
pPractice1.o_no
## [1] 0.3045
pPractice1.o_yes
## [1] 0.3771
exp(cbind(OR = coef(mPractice1.o), confint(mPractice1.o)))  # OB-GYN practice patients are significanly more likely to complete compared to the other groups combined
## Waiting for profiling to be done...
##                    OR  2.5 % 97.5 %
## (Intercept)    0.4379 0.3788 0.5049
## gardasil$Obgyn 1.3825 1.1020 1.7337

Residuals of Logistic Regression Models

summary(mAge1$residuals)  # Mean is 0.000179
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.58   -1.51   -1.46    0.00    2.88    3.33
summary(mAgeGrp1$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.54   -1.54   -1.45    0.00    2.84    3.21
summary(mRace1$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.62   -1.62   -1.31    0.00    2.61    4.22
summary(mInsurance1$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.87   -1.54   -1.54    0.00    2.71    5.00
summary(mInsurance1.ma$residuals)  # Equivalent model for the MedAssist variable
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.57   -1.57   -1.57    0.00    2.75    5.00
summary(mLocation1$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.94   -1.53   -1.36    0.00    2.90    4.68
summary(mLocationType1$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.58   -1.58   -1.34    0.00    2.71    3.95
summary(mPractice1$residuals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -1.61   -1.46   -1.41    0.00    2.65    3.44
# For all categorical variables, the mean is 0.000

Multivariate logistic Regression Models

Using Any Significant Binaries

# Begin with a model that contains all parameters that were significant in
# univariate analysis. Since both Johns Hopkins and Bayview were
# significant and urban, I've included LocationType (urban = 1) instead.

# Null hypothesis for Deviance Test: The model predicts the probability of
# a patient's vaccine completion no better than the average probabiliy.

# Null hypothesis for Wald's Test: For each independent variabel, the
# coefficient is 0.
mAllSig8 <- glm(Completed ~ White + Black + MedicalAssist + Hospital + WhiteMarsh + 
    LocationType + Family + Obgyn, data = gardasil, family = binomial("logit"))
summary(mAllSig8)  # AIC: 1743, Residual deviance: 1725 on 1404  degrees of freedom
## 
## Call:
## glm(formula = Completed ~ White + Black + MedicalAssist + Hospital + 
##     WhiteMarsh + LocationType + Family + Obgyn, family = binomial("logit"), 
##     data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.384  -0.890  -0.738   1.269   1.983  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)    -0.2301     0.1747   -1.32   0.1878   
## White           0.0173     0.1616    0.11   0.9146   
## Black          -0.4919     0.1810   -2.72   0.0066 **
## MedicalAssist  -0.5096     0.2057   -2.48   0.0132 * 
## Hospital        0.5188     0.2466    2.10   0.0354 * 
## WhiteMarsh      0.3301     0.2055    1.61   0.1083   
## LocationType   -0.4205     0.1770   -2.38   0.0175 * 
## Family         -0.5449     0.1718   -3.17   0.0015 **
## Obgyn          -0.1623     0.1551   -1.05   0.2955   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796  on 1412  degrees of freedom
## Residual deviance: 1725  on 1404  degrees of freedom
## AIC: 1743
## 
## Number of Fisher Scoring iterations: 4
# Use analysis of deviance table to see if adding the next independent
# variable improves the model.

# Null hypothesis: There is no difference in the deviance between the
# reduced and full models.
anova(mAllSig8, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##               Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                           1412       1796             
## White          1    17.62      1411       1778  2.7e-05 ***
## Black          1    10.17      1410       1768   0.0014 ** 
## MedicalAssist  1    18.52      1409       1750  1.7e-05 ***
## Hospital       1     4.29      1408       1745   0.0382 *  
## WhiteMarsh     1     8.43      1407       1737   0.0037 ** 
## LocationType   1     1.64      1406       1735   0.2010    
## Family         1     9.25      1405       1726   0.0024 ** 
## Obgyn          1     1.10      1404       1725   0.2946    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Obgyn is the least significant for both the Wald and LRT tests. Will
# eliminate it.
mAllSig7 <- glm(Completed ~ White + Black + MedicalAssist + Hospital + WhiteMarsh + 
    LocationType + Family, data = gardasil, family = binomial)
summary(mAllSig7)  # AIC: 1742.1, Residual deviance: 1726.1 on 1405  degrees
## 
## Call:
## glm(formula = Completed ~ White + Black + MedicalAssist + Hospital + 
##     WhiteMarsh + LocationType + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.382  -0.892  -0.721   1.307   1.949  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   -0.30563    0.15921   -1.92   0.0549 . 
## White          0.00692    0.16130    0.04   0.9658   
## Black         -0.49974    0.18089   -2.76   0.0057 **
## MedicalAssist -0.46878    0.20192   -2.32   0.0203 * 
## Hospital       0.51436    0.24625    2.09   0.0367 * 
## WhiteMarsh     0.25247    0.19141    1.32   0.1872   
## LocationType  -0.41001    0.17665   -2.32   0.0203 * 
## Family        -0.46318    0.15328   -3.02   0.0025 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1726.1  on 1405  degrees of freedom
## AIC: 1742
## 
## Number of Fisher Scoring iterations: 4
# The m7 (reduced) model has a smaller AIC, so keep it.
anova(mAllSig7, test = "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Completed
## 
## Terms added sequentially (first to last)
## 
## 
##               Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                           1412       1796             
## White          1    17.62      1411       1778  2.7e-05 ***
## Black          1    10.17      1410       1768   0.0014 ** 
## MedicalAssist  1    18.52      1409       1750  1.7e-05 ***
## Hospital       1     4.29      1408       1745   0.0382 *  
## WhiteMarsh     1     8.43      1407       1737   0.0037 ** 
## LocationType   1     1.64      1406       1735   0.2010    
## Family         1     9.25      1405       1726   0.0024 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Though LocationType is insignificant in the LRT test, it is one of the
# last variables to be added. White is very insignificant in the Wald
# test. To further evaluate, I will try the single term deletion analysis
# so LocationType is not at a disadvantage for order of being added.

# Null hypothesis: There is no difference in the deviance between the
# reduced and full models.
drop1(mAllSig7, test = "LRT")
## Single term deletions
## 
## Model:
## Completed ~ White + Black + MedicalAssist + Hospital + WhiteMarsh + 
##     LocationType + Family
##               Df Deviance  AIC  LRT Pr(>Chi)   
## <none>               1726 1742                 
## White          1     1726 1740 0.00   0.9658   
## Black          1     1734 1748 7.58   0.0059 **
## MedicalAssist  1     1732 1746 5.50   0.0191 * 
## Hospital       1     1730 1744 4.32   0.0377 * 
## WhiteMarsh     1     1728 1742 1.74   0.1876   
## LocationType   1     1732 1746 5.44   0.0197 * 
## Family         1     1735 1749 9.25   0.0024 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# This confirms that White appears is not significant, so will eliminate
# it.
mAllSig6 <- glm(Completed ~ Black + MedicalAssist + Hospital + WhiteMarsh + 
    LocationType + Family, data = gardasil, family = binomial)
summary(mAllSig6)  # AIC: 1740.1, Residual deviance: 1726.1 on 1406  degrees
## 
## Call:
## glm(formula = Completed ~ Black + MedicalAssist + Hospital + 
##     WhiteMarsh + LocationType + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.381  -0.894  -0.721   1.307   1.950  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.300      0.105   -2.87  0.00404 ** 
## Black           -0.505      0.135   -3.74  0.00018 ***
## MedicalAssist   -0.469      0.202   -2.33  0.01993 *  
## Hospital         0.514      0.246    2.09  0.03676 *  
## WhiteMarsh       0.254      0.189    1.34  0.18021    
## LocationType    -0.410      0.176   -2.32  0.02019 *  
## Family          -0.464      0.153   -3.03  0.00241 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1726.1  on 1406  degrees of freedom
## AIC: 1740
## 
## Number of Fisher Scoring iterations: 4
# The reduced model has a smaller AIC, so keep it.
drop1(mAllSig6, test = "LRT")
## Single term deletions
## 
## Model:
## Completed ~ Black + MedicalAssist + Hospital + WhiteMarsh + LocationType + 
##     Family
##               Df Deviance  AIC   LRT Pr(>Chi)    
## <none>               1726 1740                   
## Black          1     1741 1753 14.43  0.00015 ***
## MedicalAssist  1     1732 1744  5.52  0.01876 *  
## Hospital       1     1730 1742  4.32  0.03774 *  
## WhiteMarsh     1     1728 1740  1.79  0.18069    
## LocationType   1     1732 1744  5.45  0.01958 *  
## Family         1     1735 1747  9.33  0.00226 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# WhiteMarsh is insignificant in the Wald and single term deletion tests,
# so will eliminate it.
mAllSig5 <- glm(Completed ~ Black + MedicalAssist + Hospital + LocationType + 
    Family, data = gardasil, family = binomial)
summary(mAllSig5)  # AIC: 1739.9, Residual deviance: 1727.9  on 1407  degrees
## 
## Call:
## glm(formula = Completed ~ Black + MedicalAssist + Hospital + 
##     LocationType + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.326  -0.892  -0.724   1.276   1.950  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.2289     0.0894   -2.56  0.01048 *  
## Black          -0.5191     0.1345   -3.86  0.00011 ***
## MedicalAssist  -0.4585     0.2014   -2.28  0.02283 *  
## Hospital        0.5714     0.2415    2.37  0.01799 *  
## LocationType   -0.4886     0.1660   -2.94  0.00324 ** 
## Family         -0.5339     0.1433   -3.73  0.00019 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1727.9  on 1407  degrees of freedom
## AIC: 1740
## 
## Number of Fisher Scoring iterations: 4
# The reduced model has a smaller AIC, so keep it.
drop1(mAllSig5, test = "LRT")
## Single term deletions
## 
## Model:
## Completed ~ Black + MedicalAssist + Hospital + LocationType + 
##     Family
##               Df Deviance  AIC   LRT Pr(>Chi)    
## <none>               1728 1740                   
## Black          1     1743 1753 15.38  8.8e-05 ***
## MedicalAssist  1     1733 1743  5.28  0.02156 *  
## Hospital       1     1733 1743  5.53  0.01864 *  
## LocationType   1     1737 1747  8.82  0.00299 ** 
## Family         1     1742 1752 14.21  0.00016 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# MedicalAssist and Hospital are similar in terms of signficance. Try
# dropping each for m4 model.
mAllSig4a <- glm(Completed ~ Black + MedicalAssist + LocationType + Family, 
    data = gardasil, family = binomial)
summary(mAllSig4a)  # AIC: 1743.4, Residual deviance: 1733.4  on 1408
## 
## Call:
## glm(formula = Completed ~ Black + MedicalAssist + LocationType + 
##     Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.094  -0.893  -0.737   1.263   1.989  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.1986     0.0884   -2.25  0.02460 *  
## Black          -0.5139     0.1342   -3.83  0.00013 ***
## MedicalAssist  -0.5685     0.1956   -2.91  0.00366 ** 
## LocationType   -0.3970     0.1605   -2.47  0.01336 *  
## Family         -0.5480     0.1430   -3.83  0.00013 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1733.4  on 1408  degrees of freedom
## AIC: 1743
## 
## Number of Fisher Scoring iterations: 4
mAllSig4b <- glm(Completed ~ Black + Hospital + LocationType + Family, data = gardasil, 
    family = binomial)
summary(mAllSig4b)  # AIC: 1743.2, Residual deviance: 1733.2  on 1408
## 
## Call:
## glm(formula = Completed ~ Black + Hospital + LocationType + Family, 
##     family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.382  -0.873  -0.813   1.282   1.846  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -0.2430     0.0892   -2.72  0.00647 ** 
## Black         -0.5644     0.1330   -4.24  2.2e-05 ***
## Hospital       0.7127     0.2358    3.02  0.00250 ** 
## LocationType  -0.6950     0.1412   -4.92  8.5e-07 ***
## Family        -0.5253     0.1432   -3.67  0.00024 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1733.2  on 1408  degrees of freedom
## AIC: 1743
## 
## Number of Fisher Scoring iterations: 4
# The m4h model is slightly better. Use the Chi-squared test to compare
# this with the m5 model.

# Null hypothesis: There is no difference in the deviance between the
# reduced and full models.
anova(mAllSig5, mAllSig4b, test = "Chisq")  # Reject null: the residual deviance is significantly higher for reduced model. Keep the mAllSig5 model.
## Analysis of Deviance Table
## 
## Model 1: Completed ~ Black + MedicalAssist + Hospital + LocationType + 
##     Family
## Model 2: Completed ~ Black + Hospital + LocationType + Family
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1      1407       1728                       
## 2      1408       1733 -1    -5.28    0.022 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Using Positive Significant Binaries

# Model all parameters that were significantly positive in predicting the
# probability of completion. For Medical Assistance, using inverse
# NonMedAssist variable, where yes = 1. For Location Type, using inverse
# Suburban variable instead, where Suburban = 1.

# Null hypothesis for Deviance Test: The model predicts the probability of
# a patient's vaccine completion no better than the average probabiliy.

# Null hypothesis for Wald's Test: For each independent variabel, the
# coefficient is 0.
mPositiveSig6 <- glm(Completed ~ White + Hospital + NonMedAssist + WhiteMarsh + 
    Suburban + Obgyn, data = gardasil, family = binomial)
summary(mPositiveSig6)  # AIC: 1756.1, Residual deviance: 1742.1  on 1406 degrees
## 
## Call:
## glm(formula = Completed ~ White + Hospital + NonMedAssist + WhiteMarsh + 
##     Suburban + Obgyn, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.367  -0.959  -0.767   1.370   1.871  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -1.5594     0.1622   -9.61   <2e-16 ***
## White          0.3314     0.1196    2.77   0.0056 ** 
## Hospital       0.4879     0.2444    2.00   0.0458 *  
## NonMedAssist   0.4864     0.2025    2.40   0.0163 *  
## WhiteMarsh     0.3908     0.2039    1.92   0.0553 .  
## Suburban       0.2562     0.1648    1.56   0.1199    
## Obgyn          0.0429     0.1372    0.31   0.7544    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1742.1  on 1406  degrees of freedom
## AIC: 1756
## 
## Number of Fisher Scoring iterations: 4
# Use single term deletion analysis to evaluate whether any variables can
# be dropped without affectint the model.

# Null hypothesis: There is no difference in the deviance between the
# reduced and full models.
drop1(mPositiveSig6, test = "LRT")
## Single term deletions
## 
## Model:
## Completed ~ White + Hospital + NonMedAssist + WhiteMarsh + Suburban + 
##     Obgyn
##              Df Deviance  AIC  LRT Pr(>Chi)   
## <none>              1742 1756                 
## White         1     1750 1762 7.70   0.0055 **
## Hospital      1     1746 1758 3.95   0.0470 * 
## NonMedAssist  1     1748 1760 5.89   0.0152 * 
## WhiteMarsh    1     1746 1758 3.67   0.0554 . 
## Suburban      1     1745 1757 2.45   0.1178   
## Obgyn         1     1742 1754 0.10   0.7546   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Both Suburban and Obgyn are insignicant in the Wald and single term
# deletion tests. Obgyn is more insigificant, so will try eliminate it
# first.
mPositiveSig5 <- glm(Completed ~ White + Hospital + NonMedAssist + WhiteMarsh + 
    Suburban, data = gardasil, family = binomial)
summary(mPositiveSig5)  # AIC: 1754.2, Residual deviance: 1742.4  on 1407  degrees
## 
## Call:
## glm(formula = Completed ~ White + Hospital + NonMedAssist + WhiteMarsh + 
##     Suburban, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.368  -0.956  -0.773   1.383   1.867  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.551      0.160   -9.71   <2e-16 ***
## White           0.334      0.119    2.80   0.0052 ** 
## Hospital        0.488      0.244    2.00   0.0457 *  
## NonMedAssist    0.496      0.200    2.48   0.0132 *  
## WhiteMarsh      0.421      0.180    2.34   0.0191 *  
## Suburban        0.249      0.163    1.53   0.1269    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1742.2  on 1407  degrees of freedom
## AIC: 1754
## 
## Number of Fisher Scoring iterations: 4
# Reduced model has lower AIC, so keep it.
drop1(mPositiveSig5, test = "LRT")
## Single term deletions
## 
## Model:
## Completed ~ White + Hospital + NonMedAssist + WhiteMarsh + Suburban
##              Df Deviance  AIC  LRT Pr(>Chi)   
## <none>              1742 1754                 
## White         1     1750 1760 7.84   0.0051 **
## Hospital      1     1746 1756 3.95   0.0468 * 
## NonMedAssist  1     1748 1758 6.28   0.0122 * 
## WhiteMarsh    1     1748 1758 5.45   0.0195 * 
## Suburban      1     1745 1755 2.36   0.1248   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Suburban is insignificant in both tests, so eliminate it.
mPositiveSig4 <- glm(Completed ~ White + Hospital + NonMedAssist + WhiteMarsh, 
    data = gardasil, family = binomial)
summary(mPositiveSig4)  # AIC: 1754.5, Residual deviance: 1744.5 on 1408  degrees
## 
## Call:
## glm(formula = Completed ~ White + Hospital + NonMedAssist + WhiteMarsh, 
##     family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.327  -0.965  -0.844   1.406   1.850  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.513      0.158   -9.60   <2e-16 ***
## White           0.326      0.119    2.74   0.0062 ** 
## Hospital        0.377      0.232    1.62   0.1047    
## NonMedAssist    0.664      0.167    3.98    7e-05 ***
## WhiteMarsh      0.490      0.174    2.82   0.0048 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1744.5  on 1408  degrees of freedom
## AIC: 1755
## 
## Number of Fisher Scoring iterations: 4
# Reduced model has a slightly higher AIC, so use the Chi-squared test to
# compare it with the m5 model.

# Null hypothesis: There is no difference in the deviance between the
# reduced and full models.
anova(mPositiveSig5, mPositiveSig4, test = "Chisq")  # Cannot reject null: there is no statistical difference in the deviance of the two models. So keep the mPositiveSig5 model because of lower AIC.
## Analysis of Deviance Table
## 
## Model 1: Completed ~ White + Hospital + NonMedAssist + WhiteMarsh + Suburban
## Model 2: Completed ~ White + Hospital + NonMedAssist + WhiteMarsh
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1      1407       1742                     
## 2      1408       1745 -1    -2.36     0.12

Interactions

# Use lowest AIC model, mAllSig5, for parameters. Interactions seem likely
# between race, insurance, and location. For the naming convention,
# mInterac.#letter, where # is the number of interactions and the letter
# denotes the permutation.
mInteract5.1a <- glm(Completed ~ Black * MedicalAssist + Hospital + LocationType + 
    Family, data = gardasil, family = binomial)
summary(mInteract5.1a)  # Not all variables contributed
## 
## Call:
## glm(formula = Completed ~ Black * MedicalAssist + Hospital + 
##     LocationType + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.324  -0.889  -0.719   1.278   1.976  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.2337     0.0901   -2.59  0.00951 ** 
## Black                -0.4903     0.1499   -3.27  0.00107 ** 
## MedicalAssist        -0.3932     0.2513   -1.56  0.11766    
## Hospital              0.5723     0.2414    2.37  0.01774 *  
## LocationType         -0.4964     0.1671   -2.97  0.00298 ** 
## Family               -0.5353     0.1433   -3.74  0.00019 ***
## Black:MedicalAssist  -0.1470     0.3424   -0.43  0.66773    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1727.7  on 1406  degrees of freedom
## AIC: 1742
## 
## Number of Fisher Scoring iterations: 4
mInteract5.1b <- glm(Completed ~ Black * Hospital + MedicalAssist + LocationType + 
    Family, data = gardasil, family = binomial)
summary(mInteract5.1b)  # Not all variables contributed
## 
## Call:
## glm(formula = Completed ~ Black * Hospital + MedicalAssist + 
##     LocationType + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.360  -0.886  -0.726   1.279   1.946  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -0.2356     0.0902   -2.61  0.00899 ** 
## Black           -0.4973     0.1394   -3.57  0.00036 ***
## Hospital         0.6543     0.2816    2.32  0.02016 *  
## MedicalAssist   -0.4666     0.2018   -2.31  0.02079 *  
## LocationType    -0.4832     0.1662   -2.91  0.00365 ** 
## Family          -0.5300     0.1434   -3.70  0.00022 ***
## Black:Hospital  -0.3001     0.5219   -0.58  0.56526    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1727.5  on 1406  degrees of freedom
## AIC: 1742
## 
## Number of Fisher Scoring iterations: 4
mInteract5.1c <- glm(Completed ~ Black * LocationType + Hospital + MedicalAssist + 
    Family, data = gardasil, family = binomial)
summary(mInteract5.1c)
## 
## Call:
## glm(formula = Completed ~ Black * LocationType + Hospital + MedicalAssist + 
##     Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.313  -0.921  -0.760   1.289   2.011  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.2594     0.0917   -2.83  0.00469 ** 
## Black               -0.3793     0.1622   -2.34  0.01934 *  
## LocationType        -0.3585     0.1864   -1.92  0.05442 .  
## Hospital             0.5727     0.2421    2.37  0.01801 *  
## MedicalAssist       -0.4560     0.2015   -2.26  0.02363 *  
## Family              -0.5368     0.1430   -3.76  0.00017 ***
## Black:LocationType  -0.4259     0.2859   -1.49  0.13627    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1725.6  on 1406  degrees of freedom
## AIC: 1740
## 
## Number of Fisher Scoring iterations: 4
mInteract5.1d <- glm(Completed ~ Black * Family + LocationType + Hospital + 
    MedicalAssist, data = gardasil, family = binomial)
summary(mInteract5.1d)
## 
## Call:
## glm(formula = Completed ~ Black * Family + LocationType + Hospital + 
##     MedicalAssist, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.327  -0.894  -0.719   1.274   1.935  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.2246     0.0909   -2.47  0.01343 *  
## Black          -0.5385     0.1541   -3.49  0.00047 ***
## Family         -0.5525     0.1606   -3.44  0.00058 ***
## LocationType   -0.4858     0.1663   -2.92  0.00349 ** 
## Hospital        0.5688     0.2417    2.35  0.01862 *  
## MedicalAssist  -0.4573     0.2015   -2.27  0.02325 *  
## Black:Family    0.0811     0.3126    0.26  0.79535    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1727.8  on 1406  degrees of freedom
## AIC: 1742
## 
## Number of Fisher Scoring iterations: 4
mInteract5.1e <- glm(Completed ~ Black + MedicalAssist * LocationType + Hospital + 
    Family, data = gardasil, family = binomial)
summary(mInteract5.1e)  # All variables contributed, with AIC: 1738.7, Residual deviance: 1720.6  on 1406 degrees
## 
## Call:
## glm(formula = Completed ~ Black + MedicalAssist * LocationType + 
##     Hospital + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.352  -0.894  -0.710   1.270   2.240  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 -0.2159     0.0898   -2.41  0.01613 *  
## Black                       -0.4939     0.1351   -3.65  0.00026 ***
## MedicalAssist               -1.1723     0.4919   -2.38  0.01716 *  
## LocationType                -0.6050     0.1815   -3.33  0.00086 ***
## Hospital                     0.6167     0.2440    2.53  0.01149 *  
## Family                      -0.5408     0.1437   -3.76  0.00017 ***
## MedicalAssist:LocationType   0.9046     0.5398    1.68  0.09378 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1724.7  on 1406  degrees of freedom
## AIC: 1739
## 
## Number of Fisher Scoring iterations: 4
mInteract5.1f <- glm(Completed ~ Black + Hospital * LocationType + MedicalAssist + 
    Family, data = gardasil, family = binomial)
summary(mInteract5.1f)  # All variables contributed, with AIC: 1734.6, Residual deviance: 1720.6  on 1406  degrees
## 
## Call:
## glm(formula = Completed ~ Black + Hospital * LocationType + MedicalAssist + 
##     Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.618  -0.873  -0.740   1.288   2.005  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.2566     0.0903   -2.84  0.00447 ** 
## Black                  -0.5130     0.1348   -3.81  0.00014 ***
## Hospital                1.2498     0.3642    3.43  0.00060 ***
## LocationType           -0.3273     0.1749   -1.87  0.06137 .  
## MedicalAssist          -0.5716     0.2051   -2.79  0.00532 ** 
## Family                 -0.5250     0.1439   -3.65  0.00026 ***
## Hospital:LocationType  -1.3398     0.5088   -2.63  0.00846 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1720.6  on 1406  degrees of freedom
## AIC: 1735
## 
## Number of Fisher Scoring iterations: 4
mInteract5.2a <- glm(Completed ~ Black * (MedicalAssist + LocationType) + Hospital + 
    Family, data = gardasil, family = binomial)
summary(mInteract5.2a)  # Not all variables contributed
## 
## Call:
## glm(formula = Completed ~ Black * (MedicalAssist + LocationType) + 
##     Hospital + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.313  -0.918  -0.755   1.289   1.994  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.2590     0.0917   -2.82  0.00475 ** 
## Black                -0.3881     0.1640   -2.37  0.01800 *  
## MedicalAssist        -0.5171     0.2642   -1.96  0.05029 .  
## LocationType         -0.3334     0.1988   -1.68  0.09348 .  
## Hospital              0.5718     0.2424    2.36  0.01835 *  
## Family               -0.5357     0.1430   -3.75  0.00018 ***
## Black:MedicalAssist   0.1421     0.3948    0.36  0.71895    
## Black:LocationType   -0.4841     0.3286   -1.47  0.14070    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1725.5  on 1405  degrees of freedom
## AIC: 1742
## 
## Number of Fisher Scoring iterations: 4
mInteract5.2b <- glm(Completed ~ Black * (Hospital + LocationType) + MedicalAssist + 
    Family, data = gardasil, family = binomial)
summary(mInteract5.2b)  # Not all varaibles contributed
## 
## Call:
## glm(formula = Completed ~ Black * (Hospital + LocationType) + 
##     MedicalAssist + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.332  -0.922  -0.760   1.290   2.004  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.2621     0.0921   -2.84  0.00445 ** 
## Black               -0.3724     0.1635   -2.28  0.02273 *  
## Hospital             0.6186     0.2818    2.20  0.02816 *  
## LocationType        -0.3603     0.1865   -1.93  0.05344 .  
## MedicalAssist       -0.4602     0.2019   -2.28  0.02261 *  
## Family              -0.5345     0.1431   -3.73  0.00019 ***
## Black:Hospital      -0.1696     0.5315   -0.32  0.74972    
## Black:LocationType  -0.4093     0.2902   -1.41  0.15852    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1725.5  on 1405  degrees of freedom
## AIC: 1742
## 
## Number of Fisher Scoring iterations: 4
mInteract5.2c <- glm(Completed ~ Black + LocationType * (Hospital + MedicalAssist) + 
    Family, data = gardasil, family = binomial)
summary(mInteract5.2c)  # Not all varaibles contributed
## 
## Call:
## glm(formula = Completed ~ Black + LocationType * (Hospital + 
##     MedicalAssist) + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.616  -0.884  -0.737   1.283   2.237  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 -0.2441     0.0907   -2.69  0.00712 ** 
## Black                       -0.4935     0.1354   -3.65  0.00027 ***
## LocationType                -0.4277     0.1917   -2.23  0.02567 *  
## Hospital                     1.2342     0.3642    3.39  0.00070 ***
## MedicalAssist               -1.1466     0.4920   -2.33  0.01978 *  
## Family                      -0.5314     0.1443   -3.68  0.00023 ***
## LocationType:Hospital       -1.2428     0.5142   -2.42  0.01566 *  
## LocationType:MedicalAssist   0.7296     0.5431    1.34  0.17917    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1718.6  on 1405  degrees of freedom
## AIC: 1735
## 
## Number of Fisher Scoring iterations: 4
mInteract5.2d <- glm(Completed ~ (Black + LocationType) * Hospital + MedicalAssist + 
    Family, data = gardasil, family = binomial)
summary(mInteract5.2d)  # Not all varaibles contributed
## 
## Call:
## glm(formula = Completed ~ (Black + LocationType) * Hospital + 
##     MedicalAssist + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.633  -0.875  -0.739   1.289   2.002  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.2594     0.0908   -2.86  0.00429 ** 
## Black                  -0.5032     0.1393   -3.61  0.00030 ***
## LocationType           -0.3272     0.1749   -1.87  0.06133 .  
## Hospital                1.2876     0.3912    3.29  0.00100 ***
## MedicalAssist          -0.5735     0.2052   -2.80  0.00518 ** 
## Family                 -0.5231     0.1441   -3.63  0.00028 ***
## Black:Hospital         -0.1511     0.5488   -0.28  0.78303    
## LocationType:Hospital  -1.3290     0.5119   -2.60  0.00943 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1720.5  on 1405  degrees of freedom
## AIC: 1737
## 
## Number of Fisher Scoring iterations: 4
mInteract5.3a <- glm(Completed ~ Black * LocationType * MedicalAssist + Hospital + 
    Family, data = gardasil, family = binomial)
summary(mInteract5.3a)  # Not all variables contributed
## 
## Call:
## glm(formula = Completed ~ Black * LocationType * MedicalAssist + 
##     Hospital + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.344  -0.912  -0.748   1.284   2.292  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       -0.2483     0.0921   -2.70  0.00700 ** 
## Black                             -0.3390     0.1669   -2.03  0.04223 *  
## LocationType                      -0.4140     0.2071   -2.00  0.04559 *  
## MedicalAssist                     -1.7590     1.0599   -1.66  0.09699 .  
## Hospital                           0.6316     0.2463    2.56  0.01033 *  
## Family                            -0.5440     0.1435   -3.79  0.00015 ***
## Black:LocationType                -0.7008     0.3853   -1.82  0.06892 .  
## Black:MedicalAssist                0.6891     1.1995    0.57  0.56562    
## LocationType:MedicalAssist         1.3894     1.0962    1.27  0.20499    
## Black:LocationType:MedicalAssist  -0.2781     1.2907   -0.22  0.82941    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1720.5  on 1403  degrees of freedom
## AIC: 1740
## 
## Number of Fisher Scoring iterations: 4
mInteract5.3b <- glm(Completed ~ Black * LocationType * Hospital + MedicalAssist + 
    Family, data = gardasil, family = binomial)
summary(mInteract5.3b)  # Not all variables contributed
## 
## Call:
## glm(formula = Completed ~ Black * LocationType * Hospital + MedicalAssist + 
##     Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.810  -0.930  -0.752   1.312   2.042  
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -0.3108     0.0934   -3.33  0.00088 ***
## Black                        -0.3026     0.1649   -1.83  0.06653 .  
## LocationType                 -0.1227     0.1964   -0.62  0.53205    
## Hospital                      1.7333     0.4672    3.71  0.00021 ***
## MedicalAssist                -0.5677     0.2056   -2.76  0.00575 ** 
## Family                       -0.5063     0.1440   -3.52  0.00044 ***
## Black:LocationType           -0.6489     0.3027   -2.14  0.03208 *  
## Black:Hospital               -1.9169     0.9571   -2.00  0.04520 *  
## LocationType:Hospital        -2.2648     0.6487   -3.49  0.00048 ***
## Black:LocationType:Hospital   3.1403     1.1846    2.65  0.00803 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1710.7  on 1403  degrees of freedom
## AIC: 1731
## 
## Number of Fisher Scoring iterations: 4
# Compare the two best interaction models.

# Null hypothesis: There is no difference in the deviance between the two
# models.
anova(mInteract5.1e, mInteract5.1f, test = "Chisq")  # Cannot reject null: there is no statistical difference in the deviance of the two models. So keep the mInteract7.1f model because of lower AIC.
## Analysis of Deviance Table
## 
## Model 1: Completed ~ Black + MedicalAssist * LocationType + Hospital + 
##     Family
## Model 2: Completed ~ Black + Hospital * LocationType + MedicalAssist + 
##     Family
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1      1406       1725                     
## 2      1406       1721  0     4.12

Best Model Comparisons and Summaries

# Compare the best non-interaction and interaction models.

# Null hypothesis: There is no difference in the deviance between the two
# models.
anova(mAllSig5, mInteract5.1f, test = "Chisq")  # Reject null: the deviance difference between the two models are significant, the interactio model appears better.
## Analysis of Deviance Table
## 
## Model 1: Completed ~ Black + MedicalAssist + Hospital + LocationType + 
##     Family
## Model 2: Completed ~ Black + Hospital * LocationType + MedicalAssist + 
##     Family
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)   
## 1      1407       1728                        
## 2      1406       1721  1      7.3   0.0069 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Summaries for non-interaction model
summary(mAllSig5)
## 
## Call:
## glm(formula = Completed ~ Black + MedicalAssist + Hospital + 
##     LocationType + Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.326  -0.892  -0.724   1.276   1.950  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.2289     0.0894   -2.56  0.01048 *  
## Black          -0.5191     0.1345   -3.86  0.00011 ***
## MedicalAssist  -0.4585     0.2014   -2.28  0.02283 *  
## Hospital        0.5714     0.2415    2.37  0.01799 *  
## LocationType   -0.4886     0.1660   -2.94  0.00324 ** 
## Family         -0.5339     0.1433   -3.73  0.00019 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1727.9  on 1407  degrees of freedom
## AIC: 1740
## 
## Number of Fisher Scoring iterations: 4
exp(cbind(OR = coef(mAllSig5), confint(mAllSig5)))
## Waiting for profiling to be done...
##                   OR  2.5 % 97.5 %
## (Intercept)   0.7954 0.6672 0.9474
## Black         0.5950 0.4559 0.7728
## MedicalAssist 0.6322 0.4241 0.9352
## Hospital      1.7707 1.1008 2.8449
## LocationType  0.6135 0.4420 0.8477
## Family        0.5863 0.4418 0.7750
summary(mAllSig5$residuals)  # Mean is 0.000519
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -2.410  -1.490  -1.300   0.001   2.260   6.700
# Summaries for interaction model
summary(mInteract5.1f)
## 
## Call:
## glm(formula = Completed ~ Black + Hospital * LocationType + MedicalAssist + 
##     Family, family = binomial, data = gardasil)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.618  -0.873  -0.740   1.288   2.005  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.2566     0.0903   -2.84  0.00447 ** 
## Black                  -0.5130     0.1348   -3.81  0.00014 ***
## Hospital                1.2498     0.3642    3.43  0.00060 ***
## LocationType           -0.3273     0.1749   -1.87  0.06137 .  
## MedicalAssist          -0.5716     0.2051   -2.79  0.00532 ** 
## Family                 -0.5250     0.1439   -3.65  0.00026 ***
## Hospital:LocationType  -1.3398     0.5088   -2.63  0.00846 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1796.0  on 1412  degrees of freedom
## Residual deviance: 1720.6  on 1406  degrees of freedom
## AIC: 1735
## 
## Number of Fisher Scoring iterations: 4
exp(cbind(OR = coef(mInteract5.1f), confint(mInteract5.1f)))
## Waiting for profiling to be done...
##                           OR  2.5 % 97.5 %
## (Intercept)           0.7737 0.6477 0.9229
## Black                 0.5987 0.4585 0.7779
## Hospital              3.4896 1.7439 7.3646
## LocationType          0.7209 0.5103 1.0139
## MedicalAssist         0.5646 0.3760 0.8412
## Family                0.5915 0.4452 0.7829
## Hospital:LocationType 0.2619 0.0940 0.6961
summary(mInteract5.1f$residuals)  # Mean is 0.000347
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -3.70   -1.46   -1.31    0.00    2.29    7.46