library(arm)
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lattice
## Loading required package: lme4
## Attaching package: 'lme4'
## The following object(s) are masked from 'package:stats':
##
## AIC, BIC
## Loading required package: R2WinBUGS
## Loading required package: coda
## Attaching package: 'coda'
## The following object(s) are masked from 'package:lme4':
##
## HPDinterval
## Loading required package: abind
## Loading required package: foreign
## arm (Version 1.6-03, built: 2013-2-20)
## Working directory is /Users/xiwang/Dropbox/GEOG 5023 - offline/Labs/Lab 2
## Attaching package: 'arm'
## The following object(s) are masked from 'package:coda':
##
## traceplot
# Load data
gardasil <- read.table("/Users/xiwang/Dropbox/GEOG 5023 - offline/Labs/Lab 2/jh_gardasil.dat",
header = TRUE)
# Data summaries
summary(gardasil)
## Age AgeGroup Race Shots
## Min. :11.0 Min. :0.000 Min. :0.000 Min. :1.00
## 1st Qu.:15.0 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:1.00
## Median :18.0 Median :1.000 Median :0.000 Median :2.00
## Mean :18.5 Mean :0.504 Mean :0.782 Mean :2.07
## 3rd Qu.:22.0 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:3.00
## Max. :26.0 Max. :1.000 Max. :3.000 Max. :3.00
## Completed InsuranceType MedAssist Location
## Min. :0.000 Min. :0.00 Min. :0.000 Min. :1.00
## 1st Qu.:0.000 1st Qu.:1.00 1st Qu.:0.000 1st Qu.:1.00
## Median :0.000 Median :1.00 Median :0.000 Median :1.00
## Mean :0.332 Mean :1.33 Mean :0.195 Mean :2.01
## 3rd Qu.:1.000 3rd Qu.:2.00 3rd Qu.:0.000 3rd Qu.:4.00
## Max. :1.000 Max. :3.00 Max. :1.000 Max. :4.00
## LocationType PracticeType
## Min. :0.000 Min. :0.00
## 1st Qu.:0.000 1st Qu.:0.00
## Median :0.000 Median :1.00
## Mean :0.319 Mean :1.01
## 3rd Qu.:1.000 3rd Qu.:2.00
## Max. :1.000 Max. :2.00
# `ifelse(test, c(1), c(0))` returns a value that is the same shape as
# `test` which is filled with either 1 if it satisfies the test or 0 if it
# doesn't Race Dummy Variables
gardasil$White <- ifelse(gardasil$Race == 0, c(1), c(0))
gardasil$Black <- ifelse(gardasil$Race == 1, c(1), c(0))
gardasil$Hispanic <- ifelse(gardasil$Race == 2, c(1), c(0))
gardasil$Other <- ifelse(gardasil$Race == 3, c(1), c(0))
# Insurance Dummy Variables
gardasil$MedicalAssist <- ifelse(gardasil$InsuranceType == 0, c(1), c(0)) #MedicalAssist should match MedAssist, the original variable
gardasil$Private <- ifelse(gardasil$InsuranceType == 1, c(1), c(0))
gardasil$Hospital <- ifelse(gardasil$InsuranceType == 2, c(1), c(0))
gardasil$Military <- ifelse(gardasil$InsuranceType == 3, c(1), c(0))
# Location Dummy Variables
gardasil$Odenton <- ifelse(gardasil$Location == 1, c(1), c(0))
gardasil$WhiteMarsh <- ifelse(gardasil$Location == 2, c(1), c(0))
gardasil$JohnsHopkins <- ifelse(gardasil$Location == 3, c(1), c(0))
gardasil$Bayview <- ifelse(gardasil$Location == 4, c(1), c(0))
# Practice Type Dummy Variables
gardasil$Pediatric <- ifelse(gardasil$PracticeType == 0, c(1), c(0))
gardasil$Family <- ifelse(gardasil$PracticeType == 1, c(1), c(0))
gardasil$Obgyn <- ifelse(gardasil$PracticeType == 2, c(1), c(0))
# Medical Assistance Reverse Dummy Variable
gardasil$NonMedAssist <- ifelse(gardasil$MedAssist == 0, c(1), c(0)) # 0 = No, 1 = Yes
# Location Type Reverse Dummy Variable
gardasil$Suburban <- ifelse(gardasil$LocationType == 0, c(1), c(0)) # 0 = Urban, 1 = Suburban
# Check that MedAssist matches MedicalAssist
MACompare <- ifelse(gardasil$MedicalAssist == gardasil$MedAssist, c(0), c(1)) # Count mismatches
sum(MACompare) # This will be > 0 if there are mismatches; MedicalAssist and MedAssist are equivalent
## [1] 0
The naming convention for models is mData#.g, where m stands for model, Data is the category, # is the number of variables, g is the abbreviation of the group used under that variable if is the only one modeled.
The naming convention for probablity is pData#.g_Group, where p stands for probability of completion, Data#.g mirrors the naming convention for models, and Group is the group for which the probability is calculated.
mAge1 <- glm(gardasil$Completed ~ gardasil$Age, family = binomial("logit"))
summary(mAge1)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Age, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.954 -0.909 -0.873 1.454 1.552
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3341 0.2533 -1.32 0.19
## gardasil$Age -0.0198 0.0134 -1.47 0.14
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1793.8 on 1411 degrees of freedom
## AIC: 1798
##
## Number of Fisher Scoring iterations: 4
# P(completion) = (exp(-0.33412 - 0.01976*x))/(1 + exp(-0.33412 -
# 0.01976*x)), where x is age
exp(cbind(OR = coef(mAge1), confint(mAge1))) # The probability of completion does not apper to be significantly affected by age (the probability decreases slightly with age in this particular data)
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.7160 0.4353 1.176
## gardasil$Age 0.9804 0.9549 1.006
plot(gardasil$Age, fitted(mAge1), main = "Probability of Vaccine Completion by Age",
xlab = "Patient Age (years)", ylab = "Completion Probability", pch = 16)
mAgeGrp1 <- glm(gardasil$Completed ~ gardasil$AgeGroup, family = binomial("logit"))
summary(mAgeGrp1)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$AgeGroup, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.932 -0.932 -0.865 1.444 1.527
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6087 0.0791 -7.70 1.4e-14 ***
## gardasil$AgeGroup -0.1830 0.1131 -1.62 0.11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1793.4 on 1411 degrees of freedom
## AIC: 1797
##
## Number of Fisher Scoring iterations: 4
pAgeGrp1_younger <- (exp(-0.60871 - 0.18302 * 0))/(1 + exp(-0.60871 - 0.18302 *
0))
pAgeGrp1_older <- (exp(-0.60871 - 0.18302 * 1))/(1 + exp(-0.60871 - 0.18302 *
1))
pAgeGrp1_younger # For ages 11-17
## [1] 0.3524
pAgeGrp1_older # For ages 18-26
## [1] 0.3118
exp(cbind(OR = coef(mAgeGrp1), confint(mAgeGrp1))) # The probability of completion does not apper to be significantly affected by age group (the probability decreases slightly with the older age group in this particular data)
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5441 0.4654 0.6345
## gardasil$AgeGroup 0.8328 0.6669 1.0393
plot(gardasil$AgeGroup, fitted(mAgeGrp1), main = "Probability of Vaccine Completion by Age Group",
xlab = "Age Group\n(0 = ages 11-17, 1 = ages 18-26 yo)", ylab = "Completion Probability",
pch = 16)
# Race: All
mRace1 <- glm(gardasil$Completed ~ gardasil$White + gardasil$Hispanic + gardasil$Other,
family = binomial("logit")) # Using black as reference
summary(mRace1)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$White + gardasil$Hispanic +
## gardasil$Other, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.982 -0.982 -0.736 1.386 1.697
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.169 0.112 -10.46 < 2e-16 ***
## gardasil$White 0.690 0.135 5.11 3.3e-07 ***
## gardasil$Hispanic 0.447 0.316 1.41 0.1573
## gardasil$Other 0.595 0.189 3.14 0.0017 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1768 on 1409 degrees of freedom
## AIC: 1776
##
## Number of Fisher Scoring iterations: 4
pRace1_White <- (exp(-1.1691 + 0.6902 * 1 + 0.447 * 0 + 0.5947 * 0))/(1 + exp(-1.1691 +
0.6902 * 1 + 0.447 * 0 + 0.5947 * 0))
pRace1_Black <- (exp(-1.1691 + 0.6902 * 0 + 0.447 * 0 + 0.5947 * 0))/(1 + exp(-1.1691 +
0.6902 * 0 + 0.447 * 0 + 0.5947 * 0))
pRace1_Hispanic <- (exp(-1.1691 + 0.6902 * 0 + 0.447 * 1 + 0.5947 * 0))/(1 +
exp(-1.1691 + 0.6902 * 0 + 0.447 * 1 + 0.5947 * 0))
pRace1_Other <- (exp(-1.1691 + 0.6902 * 0 + 0.447 * 0 + 0.5947 * 1))/(1 + exp(-1.1691 +
0.6902 * 0 + 0.447 * 0 + 0.5947 * 1))
pRace1_White
## [1] 0.3825
pRace1_Black
## [1] 0.237
pRace1_Hispanic
## [1] 0.3269
pRace1_Other
## [1] 0.3602
exp(cbind(OR = coef(mRace1), confint(mRace1)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.3107 0.2484 0.3852
## gardasil$White 1.9941 1.5337 2.6062
## gardasil$Hispanic 1.5635 0.8247 2.8674
## gardasil$Other 1.8124 1.2486 2.6241
plot(gardasil$Race, fitted(mRace1), main = "Probabiliy of Vaccine Completion by Race",
xlab = "Race\n(0 = white, 1 = black, 2 = Hispanice, 3 = other)", ylab = "Completion Probability",
pch = 16)
# Race: White
mRace1.w <- glm(gardasil$Completed ~ gardasil$White, family = binomial("logit"))
summary(mRace1.w)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$White, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.982 -0.982 -0.806 1.386 1.601
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.9567 0.0856 -11.18 <2e-16 ***
## gardasil$White 0.4778 0.1145 4.17 3e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1778.4 on 1411 degrees of freedom
## AIC: 1782
##
## Number of Fisher Scoring iterations: 4
pRace1.w_no = (exp(-0.95673 + 0.47784 * 0))/(1 + exp(-0.95673 + 0.47784 * 0))
pRace1.w_yes = (exp(-0.95673 + 0.47784 * 1))/(1 + exp(-0.95673 + 0.47784 * 1))
pRace1.w_no
## [1] 0.2775
pRace1.w_yes
## [1] 0.3825
exp(cbind(OR = coef(mRace1.w), confint(mRace1.w))) # Whites are significantly more likely to complete than the other racial groups combined
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.3841 0.3241 0.4534
## gardasil$White 1.6126 1.2894 2.0200
# Race: Black
mRace1.b <- glm(gardasil$Completed ~ gardasil$Black, family = binomial("logit"))
summary(mRace1.b)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Black, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.970 -0.970 -0.736 1.400 1.697
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5097 0.0663 -7.69 1.5e-14 ***
## gardasil$Black -0.6594 0.1299 -5.08 3.9e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1768.9 on 1411 degrees of freedom
## AIC: 1773
##
## Number of Fisher Scoring iterations: 4
pRace1.b_no = (exp(-0.50973 - 0.65936 * 0))/(1 + exp(-0.50973 - 0.65936 * 0))
pRace1.b_yes = (exp(-0.50973 - 0.65936 * 1))/(1 + exp(-0.50973 - 0.65936 * 1))
pRace1.b_no
## [1] 0.3753
pRace1.b_yes
## [1] 0.237
exp(cbind(OR = coef(mRace1.b), confint(mRace1.b))) # Blacks are significantly less likely to complete than the other racial groups combined
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.6007 0.5271 0.6836
## gardasil$Black 0.5172 0.3997 0.6653
# Race: Hispanic
mRace1.h <- glm(gardasil$Completed ~ gardasil$Hispanic, family = binomial("logit"))
summary(mRace1.h)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Hispanic, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.898 -0.898 -0.898 1.485 1.495
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6987 0.0576 -12.14 <2e-16 ***
## gardasil$Hispanic -0.0235 0.3012 -0.08 0.94
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1796 on 1411 degrees of freedom
## AIC: 1800
##
## Number of Fisher Scoring iterations: 4
pRace1.h_no = (exp(-0.69866 - 0.02347 * 0))/(1 + exp(-0.69866 - 0.02347 * 0))
pRace1.h_yes = (exp(-0.69866 - 0.02347 * 1))/(1 + exp(-0.69866 - 0.02347 * 1))
pRace1.h_no
## [1] 0.3321
pRace1.h_yes
## [1] 0.3269
exp(cbind(OR = coef(mRace1.h), confint(mRace1.h))) # Hispanics are as likely to complete as the other racial groups combined
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4972 0.4439 0.5562
## gardasil$Hispanic 0.9768 0.5291 1.7368
# Race: Other
mRace1.o <- glm(gardasil$Completed ~ gardasil$Other, family = binomial("logit"))
summary(mRace1.o)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Other, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.945 -0.891 -0.891 1.494 1.494
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7189 0.0608 -11.82 <2e-16 ***
## gardasil$Other 0.1445 0.1644 0.88 0.38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1795.2 on 1411 degrees of freedom
## AIC: 1799
##
## Number of Fisher Scoring iterations: 4
pRace1.o_no = (exp(-0.71893 + 0.1445 * 0))/(1 + exp(-0.71893 + 0.1445 * 0))
pRace1.o_yes = (exp(-0.71893 + 0.1445 * 1))/(1 + exp(-0.71893 + 0.1445 * 1))
pRace1.o_no
## [1] 0.3276
pRace1.o_yes
## [1] 0.3602
exp(cbind(OR = coef(mRace1.o), confint(mRace1.o)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4873 0.4321 0.5485
## gardasil$Other 1.1555 0.8338 1.5899
# Insurance Type: All
mInsurance1 <- glm(gardasil$Completed ~ gardasil$Private + gardasil$Hospital +
gardasil$Military, family = binomial("logit")) # Using MedicalAssist as reference
summary(mInsurance1)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Private + gardasil$Hospital +
## gardasil$Military, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.117 -0.928 -0.928 1.413 1.794
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.386 0.151 -9.20 < 2e-16 ***
## gardasil$Private 0.767 0.170 4.52 6.2e-06 ***
## gardasil$Hospital 1.243 0.266 4.68 2.9e-06 ***
## gardasil$Military 0.848 0.189 4.49 7.2e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1763.1 on 1409 degrees of freedom
## AIC: 1771
##
## Number of Fisher Scoring iterations: 4
pInsurance1_MedicalAssist <- (exp(-1.3863 + 0.767 * 0 + 1.2432 * 0 + 0.848 *
0))/(1 + exp(-1.3863 + 0.767 * 0 + 1.2432 * 0 + 0.848 * 0))
pInsurance1_Private <- (exp(-1.3863 + 0.767 * 1 + 1.2432 * 0 + 0.848 * 0))/(1 +
exp(-1.3863 + 0.767 * 1 + 1.2432 * 0 + 0.848 * 0))
pInsurance1_Hospital <- (exp(-1.3863 + 0.767 * 0 + 1.2432 * 1 + 0.848 * 0))/(1 +
exp(-1.3863 + 0.767 * 0 + 1.2432 * 1 + 0.848 * 0))
pInsurance1_Military <- (exp(-1.3863 + 0.767 * 0 + 1.2432 * 0 + 0.848 * 1))/(1 +
exp(-1.3863 + 0.767 * 0 + 1.2432 * 0 + 0.848 * 1))
pInsurance1_MedicalAssist
## [1] 0.2
pInsurance1_Private
## [1] 0.3499
pInsurance1_Hospital
## [1] 0.4643
pInsurance1_Military
## [1] 0.3686
exp(cbind(OR = coef(mInsurance1), confint(mInsurance1))) # Compared to medical assistance, all other insurance groups are more significantly likely to complete
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.250 0.1843 0.3332
## gardasil$Private 2.153 1.5534 3.0247
## gardasil$Hospital 3.467 2.0598 5.8496
## gardasil$Military 2.335 1.6191 3.3996
plot(gardasil$Insurance, fitted(mInsurance1), main = "Probability of Vaccine Completion by Insurance Type",
xlab = "Insurance Type\n(0 = medical assistance, 1 = private, 2 = hospital, 3 = military)",
ylab = "Completion Probability", pch = 16)
# Insurance Type: Medical Assistance
mInsurance1.ma <- glm(gardasil$Completed ~ gardasil$MedicalAssist, family = binomial("logit"))
summary(mInsurance1.ma)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$MedicalAssist, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.951 -0.951 -0.951 1.422 1.794
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5589 0.0616 -9.07 < 2e-16 ***
## gardasil$MedicalAssist -0.8274 0.1629 -5.08 3.8e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1767.3 on 1411 degrees of freedom
## AIC: 1771
##
## Number of Fisher Scoring iterations: 4
pInsurance1.ma_no = (exp(-0.55893 - 0.82737 * 0))/(1 + exp(-0.55893 - 0.82737 *
0))
pInsurance1.ma_yes = (exp(-0.55893 - 0.82737 * 1))/(1 + exp(-0.55893 - 0.82737 *
1))
pInsurance1.ma_no
## [1] 0.3638
pInsurance1.ma_yes
## [1] 0.2
exp(cbind(OR = coef(mInsurance1.ma), confint(mInsurance1.ma))) # Medical assistance insurance patients are less likely to complete than other groups combined
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5718 0.5064 0.6448
## gardasil$MedicalAssist 0.4372 0.3152 0.5975
# Insurance Type: Private
mInsurance1.p <- glm(gardasil$Completed ~ gardasil$Private, family = binomial("logit"))
summary(mInsurance1.p)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Private, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.928 -0.928 -0.867 1.449 1.524
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7859 0.0821 -9.57 <2e-16 ***
## gardasil$Private 0.1666 0.1132 1.47 0.14
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1793.8 on 1411 degrees of freedom
## AIC: 1798
##
## Number of Fisher Scoring iterations: 4
pInsurance1.p_no = (exp(-0.78593 + 0.16659 * 0))/(1 + exp(-0.78593 + 0.16659 *
0))
pInsurance1.p_yes = (exp(-0.78593 + 0.16659 * 1))/(1 + exp(-0.78593 + 0.16659 *
1))
pInsurance1.p_no
## [1] 0.313
pInsurance1.p_yes
## [1] 0.3499
exp(cbind(OR = coef(mInsurance1.p), confint(mInsurance1.p))) # Private insurance patients are not significantly less or more likely to complete than the other groups combined (though their probability is slightly higher in this particular data)
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4557 0.3873 0.5344
## gardasil$Private 1.1813 0.9464 1.4753
# Insurance Type: Hospital
mInsurance1.h <- glm(gardasil$Completed ~ gardasil$Hospital, family = binomial("logit"))
summary(mInsurance1.h)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Hospital, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.117 -0.884 -0.884 1.502 1.502
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7375 0.0586 -12.58 <2e-16 ***
## gardasil$Hospital 0.5944 0.2265 2.62 0.0087 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1789.3 on 1411 degrees of freedom
## AIC: 1793
##
## Number of Fisher Scoring iterations: 4
pInsurance1.h_no = (exp(-0.7375 + 0.5944 * 0))/(1 + exp(-0.7375 + 0.5944 * 0))
pInsurance1.h_yes = (exp(-0.7375 + 0.5944 * 1))/(1 + exp(-0.7375 + 0.5944 *
1))
pInsurance1.h_no
## [1] 0.3236
pInsurance1.h_yes
## [1] 0.4643
exp(cbind(OR = coef(mInsurance1.h), confint(mInsurance1.h))) # Hospital insurance patients are significantly more likely to complete than the other groups combined
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4783 0.426 0.5362
## gardasil$Hospital 1.8119 1.158 2.8230
# Insurance Type: Military
mInsurance1.mi <- glm(gardasil$Completed ~ gardasil$Military, family = binomial("logit"))
summary(mInsurance1.mi)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Military, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.959 -0.879 -0.879 1.413 1.508
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7505 0.0651 -11.52 <2e-16 ***
## gardasil$Military 0.2122 0.1312 1.62 0.11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1793.4 on 1411 degrees of freedom
## AIC: 1797
##
## Number of Fisher Scoring iterations: 4
pInsurance1.mi_no = (exp(-0.75055 + 0.21223 * 0))/(1 + exp(-0.75055 + 0.21223 *
0))
pInsurance1.mi_yes = (exp(-0.75055 + 0.21223 * 1))/(1 + exp(-0.75055 + 0.21223 *
1))
pInsurance1.mi_no
## [1] 0.3207
pInsurance1.mi_yes
## [1] 0.3686
exp(cbind(OR = coef(mInsurance1.mi), confint(mInsurance1.mi))) # Military insurance patients are not significantly less or more likely to complete than the other groups combined (though their probability is slightly higher in this particular data)
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4721 0.4151 0.5359
## gardasil$Military 1.2364 0.9545 1.5971
# The probabilities, odds ratio, and confidence interval should be the
# same as the 'mInsurance1.ma'' model because the 'MedAssist'' and
# 'MedicalAssist'' variables are exactly the same.
plot(gardasil$MedicalAssist, fitted(mInsurance1.ma), main = "Probability of Vaccine Completion by Medical Assistance",
xlab = "Medical Assistance \n(0 = no, 1 = yes)", ylab = "Completion Probability",
pch = 16)
# Location: All
mLocation1 <- glm(gardasil$Completed ~ gardasil$Odenton + gardasil$WhiteMarsh +
gardasil$Bayview, family = binomial("logit")) # Using Johns Hopkins Outpatient Center as reference
summary(mLocation1)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Odenton + gardasil$WhiteMarsh +
## gardasil$Bayview, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.152 -0.919 -0.781 1.460 1.757
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.304 0.259 -5.04 4.6e-07 ***
## gardasil$Odenton 0.661 0.269 2.46 0.014 *
## gardasil$WhiteMarsh 1.243 0.302 4.12 3.8e-05 ***
## gardasil$Bayview 0.274 0.285 0.96 0.336
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1764.9 on 1409 degrees of freedom
## AIC: 1773
##
## Number of Fisher Scoring iterations: 4
pLocation1_Odenton <- (exp(-1.3041 + 0.6612 * 1 + 1.2434 * 0 + 0.2744 * 0))/(1 +
exp(-1.3041 + 0.6612 * 1 + 1.2434 * 0 + 0.2744 * 0))
pLocation1_WhiteMarsh <- (exp(-1.3041 + 0.6612 * 0 + 1.2434 * 1 + 0.2744 * 0))/(1 +
exp(-1.3041 + 0.6612 * 0 + 1.2434 * 1 + 0.2744 * 0))
pLocation1_JohnsHopkins <- (exp(-1.3041 + 0.6612 * 0 + 1.2434 * 0 + 0.2744 *
0))/(1 + exp(-1.3041 + 0.6612 * 0 + 1.2434 * 0 + 0.2744 * 0))
pLocation1_Bayview <- (exp(-1.3041 + 0.6612 * 0 + 1.2434 * 0 + 0.2744 * 1))/(1 +
exp(-1.3041 + 0.6612 * 0 + 1.2434 * 0 + 0.2744 * 1))
pLocation1_Odenton
## [1] 0.3446
pLocation1_WhiteMarsh
## [1] 0.4848
pLocation1_JohnsHopkins
## [1] 0.2135
pLocation1_Bayview
## [1] 0.2631
exp(cbind(OR = coef(mLocation1), confint(mLocation1))) # Compared to Johns Hopkins, the Odenton and White Marsh groups are significantly more likely to complete while the Bayview group is not significantly less or more likely (though Bayview's probability is slightly higher in this particular data)
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.2714 0.1590 0.441
## gardasil$Odenton 1.9372 1.1660 3.369
## gardasil$WhiteMarsh 3.4675 1.9489 6.397
## gardasil$Bayview 1.3158 0.7656 2.352
plot(gardasil$Location, fitted(mLocation1), main = "Probability of Vaccine Completion by Location",
xlab = "Location\n(1 = Odenton, 2 = White Marsh, 3 = Johns Hopkins, 4 = Bayview)",
ylab = "Completion Probability", pch = 16)
# Location: Odenton
mLocation1.o <- glm(gardasil$Completed ~ gardasil$Odenton, family = binomial("logit"))
summary(mLocation1.o)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Odenton, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.919 -0.919 -0.871 1.460 1.519
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7748 0.0868 -8.93 <2e-16 ***
## gardasil$Odenton 0.1320 0.1144 1.15 0.25
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1794.7 on 1411 degrees of freedom
## AIC: 1799
##
## Number of Fisher Scoring iterations: 4
pLocation1.o_no = (exp(-0.77477 + 0.13196 * 0))/(1 + exp(-0.77477 + 0.13196 *
0))
pLocation1.o_yes = (exp(-0.77477 + 0.13196 * 1))/(1 + exp(-0.77477 + 0.13196 *
1))
pLocation1.o_no
## [1] 0.3154
pLocation1.o_yes
## [1] 0.3446
exp(cbind(OR = coef(mLocation1.o), confint(mLocation1.o))) # Odenton patients are not significantly less or more likely to complete than the other groups combined (though their probablity is lightly higher in this particular data)
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4608 0.3880 0.5453
## gardasil$Odenton 1.1411 0.9124 1.4287
# Location: White Marsh
mLocation1.w <- glm(gardasil$Completed ~ gardasil$WhiteMarsh, family = binomial("logit"))
summary(mLocation1.w)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$WhiteMarsh, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.152 -0.864 -0.864 1.527 1.527
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7922 0.0611 -12.96 < 2e-16 ***
## gardasil$WhiteMarsh 0.7316 0.1673 4.37 1.2e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1777.2 on 1411 degrees of freedom
## AIC: 1781
##
## Number of Fisher Scoring iterations: 4
pLocation1.w_no = (exp(-0.79219 + 0.73156 * 0))/(1 + exp(-0.79219 + 0.73156 *
0))
pLocation1.w_yes = (exp(-0.79219 + 0.73156 * 1))/(1 + exp(-0.79219 + 0.73156 *
1))
pLocation1.w_no
## [1] 0.3117
pLocation1.w_yes
## [1] 0.4848
exp(cbind(OR = coef(mLocation1.w), confint(mLocation1.w))) # White Marsh patients are significantly more likely to complete than the other groups combined
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4529 0.4014 0.510
## gardasil$WhiteMarsh 2.0783 1.4960 2.886
# Location: Johns Hopkins Outpatient Center
mLocation1.j <- glm(gardasil$Completed ~ gardasil$JohnsHopkins, family = binomial("logit"))
summary(mLocation1.j)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$JohnsHopkins, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.911 -0.911 -0.911 1.469 1.757
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.664 0.058 -11.44 <2e-16 ***
## gardasil$JohnsHopkins -0.640 0.265 -2.41 0.016 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1789.5 on 1411 degrees of freedom
## AIC: 1794
##
## Number of Fisher Scoring iterations: 4
pLocation1.j_no = (exp(-0.66383 - 0.64022 * 0))/(1 + exp(-0.66383 - 0.64022 *
0))
pLocation1.j_yes = (exp(-0.66383 - 0.64022 * 1))/(1 + exp(-0.66383 - 0.64022 *
1))
pLocation1.j_no
## [1] 0.3399
pLocation1.j_yes
## [1] 0.2135
exp(cbind(OR = coef(mLocation1.j), confint(mLocation1.j))) # Johns Hopkins patients are significantly less likely to complete than the other groups combined
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5149 0.4592 0.5765
## gardasil$JohnsHopkins 0.5272 0.3054 0.8683
# Location: Bayview
mLocation1.b <- glm(gardasil$Completed ~ gardasil$Bayview, family = binomial("logit"))
summary(mLocation1.b)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Bayview, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.937 -0.937 -0.781 1.438 1.634
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5949 0.0644 -9.24 <2e-16 ***
## gardasil$Bayview -0.4347 0.1358 -3.20 0.0014 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1785.4 on 1411 degrees of freedom
## AIC: 1789
##
## Number of Fisher Scoring iterations: 4
pLocation1.b_no = (exp(-0.59489 - 0.43473 * 0))/(1 + exp(-0.59489 - 0.43473 *
0))
pLocation1.b_yes = (exp(-0.59489 - 0.43473 * 1))/(1 + exp(-0.59489 - 0.43473 *
1))
pLocation1.b_no
## [1] 0.3555
pLocation1.b_yes
## [1] 0.2632
exp(cbind(OR = coef(mLocation1.b), confint(mLocation1.b))) # Bayview patients are significantly less likely to complete than the other groups combined
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5516 0.4858 0.6254
## gardasil$Bayview 0.6474 0.4945 0.8423
mLocationType1 <- glm(gardasil$Completed ~ gardasil$LocationType, family = binomial("logit"))
summary(mLocationType1)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$LocationType, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.959 -0.959 -0.764 1.413 1.657
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5381 0.0668 -8.06 7.9e-16 ***
## gardasil$LocationType -0.5429 0.1273 -4.26 2.0e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1777.1 on 1411 degrees of freedom
## AIC: 1781
##
## Number of Fisher Scoring iterations: 4
pLocationType1_suburban = (exp(-0.5381 - 0.5429 * 0))/(1 + exp(-0.5381 - 0.5429 *
0))
pLocationType1_urban = (exp(-0.5381 - 0.5429 * 1))/(1 + exp(-0.5381 - 0.5429 *
1))
pLocationType1_suburban
## [1] 0.3686
pLocationType1_urban
## [1] 0.2533
exp(cbind(OR = coef(mLocationType1), confint(mLocationType1))) # Urban patients are significantly less likely to complete compared to suburban patients
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5839 0.5118 0.6651
## gardasil$LocationType 0.5811 0.4516 0.7441
plot(gardasil$LocationType, fitted(mLocationType1), main = "Probability of Vaccine Completion by Location Type",
xlab = "Location Type\n(0 = suburban, 1 = urban)", ylab = "Completion Probability",
pch = 16)
# Practice Type: All
mPractice1 <- glm(gardasil$Completed ~ gardasil$Pediatric + gardasil$Obgyn,
family = binomial("logit")) # Using family practice as reference
summary(mPractice1)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Pediatric + gardasil$Obgyn,
## family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.973 -0.869 -0.828 1.397 1.573
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.893 0.115 -7.75 9.3e-15 ***
## gardasil$Pediatric 0.115 0.149 0.77 0.4432
## gardasil$Obgyn 0.392 0.146 2.68 0.0073 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1787.6 on 1410 degrees of freedom
## AIC: 1794
##
## Number of Fisher Scoring iterations: 4
pPractice1_Pediatric <- (exp(-0.8934 + 0.1145 * 1 + 0.3916 * 0))/(1 + exp(-0.8934 +
0.1145 * 1 + 0.3916 * 0))
pPractice1_Family <- (exp(-0.8934 + 0.1145 * 0 + 0.3916 * 0))/(1 + exp(-0.8934 +
0.1145 * 0 + 0.3916 * 0))
pPractice1_Obgyn <- (exp(-0.8934 + 0.1145 * 0 + 0.3916 * 1))/(1 + exp(-0.8934 +
0.1145 * 0 + 0.3916 * 1))
pPractice1_Pediatric
## [1] 0.3146
pPractice1_Family
## [1] 0.2904
pPractice1_Obgyn
## [1] 0.3771
exp(cbind(OR = coef(mPractice1), confint(mPractice1))) # Compared to the family group, the OB-GYN group is significantly more likely to complete while the pediatric group is not significantly less or more likely (though pediatric's probability is slightly higher in this particular data)
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4093 0.3252 0.5113
## gardasil$Pediatric 1.1213 0.8377 1.5048
## gardasil$Obgyn 1.4793 1.1132 1.9728
plot(gardasil$PracticeType, fitted(mPractice1), main = "Probability of Vaccine Completion by Practice Type",
xlab = "Practice Type\n(0 = pediatric, 1 = family, 2 = OB-GYN)", ylab = "Completion Probability",
pch = 16)
# Practice Type: Pediatric
mPractice1.p <- glm(gardasil$Completed ~ gardasil$Pediatric, family = binomial("logit"))
summary(mPractice1.p)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Pediatric, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.915 -0.915 -0.869 1.465 1.521
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6550 0.0704 -9.31 <2e-16 ***
## gardasil$Pediatric -0.1239 0.1181 -1.05 0.29
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1794.9 on 1411 degrees of freedom
## AIC: 1799
##
## Number of Fisher Scoring iterations: 4
pPractice1.p_no = (exp(-0.65497 - 0.1239 * 0))/(1 + exp(-0.65497 - 0.1239 *
0))
pPractice1.p_yes = (exp(-0.65497 - 0.1239 * 1))/(1 + exp(-0.65497 - 0.1239 *
1))
pPractice1.p_no
## [1] 0.3419
pPractice1.p_yes
## [1] 0.3146
exp(cbind(OR = coef(mPractice1.p), confint(mPractice1.p))) # Pediatric practice patients are not significantly less or more likely to complete compared to the other groups combined
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5195 0.4521 0.5957
## gardasil$Pediatric 0.8835 0.7001 1.1126
# Practice Type: Family
mPractice1.f <- glm(gardasil$Completed ~ gardasil$Family, family = binomial("logit"))
summary(mPractice1.f)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Family, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.922 -0.922 -0.828 1.456 1.573
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6350 0.0649 -9.78 <2e-16 ***
## gardasil$Family -0.2584 0.1323 -1.95 0.051 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1792.1 on 1411 degrees of freedom
## AIC: 1796
##
## Number of Fisher Scoring iterations: 4
pPractice1.f_no = (exp(-0.63502 - 0.25837 * 0))/(1 + exp(-0.63502 - 0.25837 *
0))
pPractice1.f_yes = (exp(-0.63502 - 0.25837 * 1))/(1 + exp(-0.63502 - 0.25837 *
1))
pPractice1.f_no
## [1] 0.3464
pPractice1.f_yes
## [1] 0.2904
exp(cbind(OR = coef(mPractice1.f), confint(mPractice1.f))) # Family practice patients are (borderline) significantly less likely to complete compared to the other groups combined
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.5299 0.4662 0.6014
## gardasil$Family 0.7723 0.5943 0.9987
# Practice Type: OB-GYN
mPractice1.o <- glm(gardasil$Completed ~ gardasil$Obgyn, family = binomial("logit"))
summary(mPractice1.o)
##
## Call:
## glm(formula = gardasil$Completed ~ gardasil$Obgyn, family = binomial("logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.973 -0.852 -0.852 1.397 1.542
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8257 0.0732 -11.3 <2e-16 ***
## gardasil$Obgyn 0.3239 0.1156 2.8 0.0051 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1788.2 on 1411 degrees of freedom
## AIC: 1792
##
## Number of Fisher Scoring iterations: 4
pPractice1.o_no = (exp(-0.82575 + 0.32392 * 0))/(1 + exp(-0.82575 + 0.32392 *
0))
pPractice1.o_yes = (exp(-0.82575 + 0.32392 * 1))/(1 + exp(-0.82575 + 0.32392 *
1))
pPractice1.o_no
## [1] 0.3045
pPractice1.o_yes
## [1] 0.3771
exp(cbind(OR = coef(mPractice1.o), confint(mPractice1.o))) # OB-GYN practice patients are significanly more likely to complete compared to the other groups combined
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.4379 0.3788 0.5049
## gardasil$Obgyn 1.3825 1.1020 1.7337
summary(mAge1$residuals) # Mean is 0.000179
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.58 -1.51 -1.46 0.00 2.88 3.33
summary(mAgeGrp1$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.54 -1.54 -1.45 0.00 2.84 3.21
summary(mRace1$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.62 -1.62 -1.31 0.00 2.61 4.22
summary(mInsurance1$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.87 -1.54 -1.54 0.00 2.71 5.00
summary(mInsurance1.ma$residuals) # Equivalent model for the MedAssist variable
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.57 -1.57 -1.57 0.00 2.75 5.00
summary(mLocation1$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.94 -1.53 -1.36 0.00 2.90 4.68
summary(mLocationType1$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.58 -1.58 -1.34 0.00 2.71 3.95
summary(mPractice1$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.61 -1.46 -1.41 0.00 2.65 3.44
# For all categorical variables, the mean is 0.000
# Begin with a model that contains all parameters that were significant in
# univariate analysis. Since both Johns Hopkins and Bayview were
# significant and urban, I've included LocationType (urban = 1) instead.
# Null hypothesis for Deviance Test: The model predicts the probability of
# a patient's vaccine completion no better than the average probabiliy.
# Null hypothesis for Wald's Test: For each independent variabel, the
# coefficient is 0.
mAllSig8 <- glm(Completed ~ White + Black + MedicalAssist + Hospital + WhiteMarsh +
LocationType + Family + Obgyn, data = gardasil, family = binomial("logit"))
summary(mAllSig8) # AIC: 1743, Residual deviance: 1725 on 1404 degrees of freedom
##
## Call:
## glm(formula = Completed ~ White + Black + MedicalAssist + Hospital +
## WhiteMarsh + LocationType + Family + Obgyn, family = binomial("logit"),
## data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.384 -0.890 -0.738 1.269 1.983
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2301 0.1747 -1.32 0.1878
## White 0.0173 0.1616 0.11 0.9146
## Black -0.4919 0.1810 -2.72 0.0066 **
## MedicalAssist -0.5096 0.2057 -2.48 0.0132 *
## Hospital 0.5188 0.2466 2.10 0.0354 *
## WhiteMarsh 0.3301 0.2055 1.61 0.1083
## LocationType -0.4205 0.1770 -2.38 0.0175 *
## Family -0.5449 0.1718 -3.17 0.0015 **
## Obgyn -0.1623 0.1551 -1.05 0.2955
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796 on 1412 degrees of freedom
## Residual deviance: 1725 on 1404 degrees of freedom
## AIC: 1743
##
## Number of Fisher Scoring iterations: 4
# Use analysis of deviance table to see if adding the next independent
# variable improves the model.
# Null hypothesis: There is no difference in the deviance between the
# reduced and full models.
anova(mAllSig8, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## White 1 17.62 1411 1778 2.7e-05 ***
## Black 1 10.17 1410 1768 0.0014 **
## MedicalAssist 1 18.52 1409 1750 1.7e-05 ***
## Hospital 1 4.29 1408 1745 0.0382 *
## WhiteMarsh 1 8.43 1407 1737 0.0037 **
## LocationType 1 1.64 1406 1735 0.2010
## Family 1 9.25 1405 1726 0.0024 **
## Obgyn 1 1.10 1404 1725 0.2946
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Obgyn is the least significant for both the Wald and LRT tests. Will
# eliminate it.
mAllSig7 <- glm(Completed ~ White + Black + MedicalAssist + Hospital + WhiteMarsh +
LocationType + Family, data = gardasil, family = binomial)
summary(mAllSig7) # AIC: 1742.1, Residual deviance: 1726.1 on 1405 degrees
##
## Call:
## glm(formula = Completed ~ White + Black + MedicalAssist + Hospital +
## WhiteMarsh + LocationType + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.382 -0.892 -0.721 1.307 1.949
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.30563 0.15921 -1.92 0.0549 .
## White 0.00692 0.16130 0.04 0.9658
## Black -0.49974 0.18089 -2.76 0.0057 **
## MedicalAssist -0.46878 0.20192 -2.32 0.0203 *
## Hospital 0.51436 0.24625 2.09 0.0367 *
## WhiteMarsh 0.25247 0.19141 1.32 0.1872
## LocationType -0.41001 0.17665 -2.32 0.0203 *
## Family -0.46318 0.15328 -3.02 0.0025 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1726.1 on 1405 degrees of freedom
## AIC: 1742
##
## Number of Fisher Scoring iterations: 4
# The m7 (reduced) model has a smaller AIC, so keep it.
anova(mAllSig7, test = "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Completed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1412 1796
## White 1 17.62 1411 1778 2.7e-05 ***
## Black 1 10.17 1410 1768 0.0014 **
## MedicalAssist 1 18.52 1409 1750 1.7e-05 ***
## Hospital 1 4.29 1408 1745 0.0382 *
## WhiteMarsh 1 8.43 1407 1737 0.0037 **
## LocationType 1 1.64 1406 1735 0.2010
## Family 1 9.25 1405 1726 0.0024 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Though LocationType is insignificant in the LRT test, it is one of the
# last variables to be added. White is very insignificant in the Wald
# test. To further evaluate, I will try the single term deletion analysis
# so LocationType is not at a disadvantage for order of being added.
# Null hypothesis: There is no difference in the deviance between the
# reduced and full models.
drop1(mAllSig7, test = "LRT")
## Single term deletions
##
## Model:
## Completed ~ White + Black + MedicalAssist + Hospital + WhiteMarsh +
## LocationType + Family
## Df Deviance AIC LRT Pr(>Chi)
## <none> 1726 1742
## White 1 1726 1740 0.00 0.9658
## Black 1 1734 1748 7.58 0.0059 **
## MedicalAssist 1 1732 1746 5.50 0.0191 *
## Hospital 1 1730 1744 4.32 0.0377 *
## WhiteMarsh 1 1728 1742 1.74 0.1876
## LocationType 1 1732 1746 5.44 0.0197 *
## Family 1 1735 1749 9.25 0.0024 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# This confirms that White appears is not significant, so will eliminate
# it.
mAllSig6 <- glm(Completed ~ Black + MedicalAssist + Hospital + WhiteMarsh +
LocationType + Family, data = gardasil, family = binomial)
summary(mAllSig6) # AIC: 1740.1, Residual deviance: 1726.1 on 1406 degrees
##
## Call:
## glm(formula = Completed ~ Black + MedicalAssist + Hospital +
## WhiteMarsh + LocationType + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.381 -0.894 -0.721 1.307 1.950
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.300 0.105 -2.87 0.00404 **
## Black -0.505 0.135 -3.74 0.00018 ***
## MedicalAssist -0.469 0.202 -2.33 0.01993 *
## Hospital 0.514 0.246 2.09 0.03676 *
## WhiteMarsh 0.254 0.189 1.34 0.18021
## LocationType -0.410 0.176 -2.32 0.02019 *
## Family -0.464 0.153 -3.03 0.00241 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1726.1 on 1406 degrees of freedom
## AIC: 1740
##
## Number of Fisher Scoring iterations: 4
# The reduced model has a smaller AIC, so keep it.
drop1(mAllSig6, test = "LRT")
## Single term deletions
##
## Model:
## Completed ~ Black + MedicalAssist + Hospital + WhiteMarsh + LocationType +
## Family
## Df Deviance AIC LRT Pr(>Chi)
## <none> 1726 1740
## Black 1 1741 1753 14.43 0.00015 ***
## MedicalAssist 1 1732 1744 5.52 0.01876 *
## Hospital 1 1730 1742 4.32 0.03774 *
## WhiteMarsh 1 1728 1740 1.79 0.18069
## LocationType 1 1732 1744 5.45 0.01958 *
## Family 1 1735 1747 9.33 0.00226 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# WhiteMarsh is insignificant in the Wald and single term deletion tests,
# so will eliminate it.
mAllSig5 <- glm(Completed ~ Black + MedicalAssist + Hospital + LocationType +
Family, data = gardasil, family = binomial)
summary(mAllSig5) # AIC: 1739.9, Residual deviance: 1727.9 on 1407 degrees
##
## Call:
## glm(formula = Completed ~ Black + MedicalAssist + Hospital +
## LocationType + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.326 -0.892 -0.724 1.276 1.950
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2289 0.0894 -2.56 0.01048 *
## Black -0.5191 0.1345 -3.86 0.00011 ***
## MedicalAssist -0.4585 0.2014 -2.28 0.02283 *
## Hospital 0.5714 0.2415 2.37 0.01799 *
## LocationType -0.4886 0.1660 -2.94 0.00324 **
## Family -0.5339 0.1433 -3.73 0.00019 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1727.9 on 1407 degrees of freedom
## AIC: 1740
##
## Number of Fisher Scoring iterations: 4
# The reduced model has a smaller AIC, so keep it.
drop1(mAllSig5, test = "LRT")
## Single term deletions
##
## Model:
## Completed ~ Black + MedicalAssist + Hospital + LocationType +
## Family
## Df Deviance AIC LRT Pr(>Chi)
## <none> 1728 1740
## Black 1 1743 1753 15.38 8.8e-05 ***
## MedicalAssist 1 1733 1743 5.28 0.02156 *
## Hospital 1 1733 1743 5.53 0.01864 *
## LocationType 1 1737 1747 8.82 0.00299 **
## Family 1 1742 1752 14.21 0.00016 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# MedicalAssist and Hospital are similar in terms of signficance. Try
# dropping each for m4 model.
mAllSig4a <- glm(Completed ~ Black + MedicalAssist + LocationType + Family,
data = gardasil, family = binomial)
summary(mAllSig4a) # AIC: 1743.4, Residual deviance: 1733.4 on 1408
##
## Call:
## glm(formula = Completed ~ Black + MedicalAssist + LocationType +
## Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.094 -0.893 -0.737 1.263 1.989
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1986 0.0884 -2.25 0.02460 *
## Black -0.5139 0.1342 -3.83 0.00013 ***
## MedicalAssist -0.5685 0.1956 -2.91 0.00366 **
## LocationType -0.3970 0.1605 -2.47 0.01336 *
## Family -0.5480 0.1430 -3.83 0.00013 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1733.4 on 1408 degrees of freedom
## AIC: 1743
##
## Number of Fisher Scoring iterations: 4
mAllSig4b <- glm(Completed ~ Black + Hospital + LocationType + Family, data = gardasil,
family = binomial)
summary(mAllSig4b) # AIC: 1743.2, Residual deviance: 1733.2 on 1408
##
## Call:
## glm(formula = Completed ~ Black + Hospital + LocationType + Family,
## family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.382 -0.873 -0.813 1.282 1.846
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2430 0.0892 -2.72 0.00647 **
## Black -0.5644 0.1330 -4.24 2.2e-05 ***
## Hospital 0.7127 0.2358 3.02 0.00250 **
## LocationType -0.6950 0.1412 -4.92 8.5e-07 ***
## Family -0.5253 0.1432 -3.67 0.00024 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1733.2 on 1408 degrees of freedom
## AIC: 1743
##
## Number of Fisher Scoring iterations: 4
# The m4h model is slightly better. Use the Chi-squared test to compare
# this with the m5 model.
# Null hypothesis: There is no difference in the deviance between the
# reduced and full models.
anova(mAllSig5, mAllSig4b, test = "Chisq") # Reject null: the residual deviance is significantly higher for reduced model. Keep the mAllSig5 model.
## Analysis of Deviance Table
##
## Model 1: Completed ~ Black + MedicalAssist + Hospital + LocationType +
## Family
## Model 2: Completed ~ Black + Hospital + LocationType + Family
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1407 1728
## 2 1408 1733 -1 -5.28 0.022 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Model all parameters that were significantly positive in predicting the
# probability of completion. For Medical Assistance, using inverse
# NonMedAssist variable, where yes = 1. For Location Type, using inverse
# Suburban variable instead, where Suburban = 1.
# Null hypothesis for Deviance Test: The model predicts the probability of
# a patient's vaccine completion no better than the average probabiliy.
# Null hypothesis for Wald's Test: For each independent variabel, the
# coefficient is 0.
mPositiveSig6 <- glm(Completed ~ White + Hospital + NonMedAssist + WhiteMarsh +
Suburban + Obgyn, data = gardasil, family = binomial)
summary(mPositiveSig6) # AIC: 1756.1, Residual deviance: 1742.1 on 1406 degrees
##
## Call:
## glm(formula = Completed ~ White + Hospital + NonMedAssist + WhiteMarsh +
## Suburban + Obgyn, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.367 -0.959 -0.767 1.370 1.871
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5594 0.1622 -9.61 <2e-16 ***
## White 0.3314 0.1196 2.77 0.0056 **
## Hospital 0.4879 0.2444 2.00 0.0458 *
## NonMedAssist 0.4864 0.2025 2.40 0.0163 *
## WhiteMarsh 0.3908 0.2039 1.92 0.0553 .
## Suburban 0.2562 0.1648 1.56 0.1199
## Obgyn 0.0429 0.1372 0.31 0.7544
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1742.1 on 1406 degrees of freedom
## AIC: 1756
##
## Number of Fisher Scoring iterations: 4
# Use single term deletion analysis to evaluate whether any variables can
# be dropped without affectint the model.
# Null hypothesis: There is no difference in the deviance between the
# reduced and full models.
drop1(mPositiveSig6, test = "LRT")
## Single term deletions
##
## Model:
## Completed ~ White + Hospital + NonMedAssist + WhiteMarsh + Suburban +
## Obgyn
## Df Deviance AIC LRT Pr(>Chi)
## <none> 1742 1756
## White 1 1750 1762 7.70 0.0055 **
## Hospital 1 1746 1758 3.95 0.0470 *
## NonMedAssist 1 1748 1760 5.89 0.0152 *
## WhiteMarsh 1 1746 1758 3.67 0.0554 .
## Suburban 1 1745 1757 2.45 0.1178
## Obgyn 1 1742 1754 0.10 0.7546
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Both Suburban and Obgyn are insignicant in the Wald and single term
# deletion tests. Obgyn is more insigificant, so will try eliminate it
# first.
mPositiveSig5 <- glm(Completed ~ White + Hospital + NonMedAssist + WhiteMarsh +
Suburban, data = gardasil, family = binomial)
summary(mPositiveSig5) # AIC: 1754.2, Residual deviance: 1742.4 on 1407 degrees
##
## Call:
## glm(formula = Completed ~ White + Hospital + NonMedAssist + WhiteMarsh +
## Suburban, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.368 -0.956 -0.773 1.383 1.867
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.551 0.160 -9.71 <2e-16 ***
## White 0.334 0.119 2.80 0.0052 **
## Hospital 0.488 0.244 2.00 0.0457 *
## NonMedAssist 0.496 0.200 2.48 0.0132 *
## WhiteMarsh 0.421 0.180 2.34 0.0191 *
## Suburban 0.249 0.163 1.53 0.1269
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1742.2 on 1407 degrees of freedom
## AIC: 1754
##
## Number of Fisher Scoring iterations: 4
# Reduced model has lower AIC, so keep it.
drop1(mPositiveSig5, test = "LRT")
## Single term deletions
##
## Model:
## Completed ~ White + Hospital + NonMedAssist + WhiteMarsh + Suburban
## Df Deviance AIC LRT Pr(>Chi)
## <none> 1742 1754
## White 1 1750 1760 7.84 0.0051 **
## Hospital 1 1746 1756 3.95 0.0468 *
## NonMedAssist 1 1748 1758 6.28 0.0122 *
## WhiteMarsh 1 1748 1758 5.45 0.0195 *
## Suburban 1 1745 1755 2.36 0.1248
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Suburban is insignificant in both tests, so eliminate it.
mPositiveSig4 <- glm(Completed ~ White + Hospital + NonMedAssist + WhiteMarsh,
data = gardasil, family = binomial)
summary(mPositiveSig4) # AIC: 1754.5, Residual deviance: 1744.5 on 1408 degrees
##
## Call:
## glm(formula = Completed ~ White + Hospital + NonMedAssist + WhiteMarsh,
## family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.327 -0.965 -0.844 1.406 1.850
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.513 0.158 -9.60 <2e-16 ***
## White 0.326 0.119 2.74 0.0062 **
## Hospital 0.377 0.232 1.62 0.1047
## NonMedAssist 0.664 0.167 3.98 7e-05 ***
## WhiteMarsh 0.490 0.174 2.82 0.0048 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1744.5 on 1408 degrees of freedom
## AIC: 1755
##
## Number of Fisher Scoring iterations: 4
# Reduced model has a slightly higher AIC, so use the Chi-squared test to
# compare it with the m5 model.
# Null hypothesis: There is no difference in the deviance between the
# reduced and full models.
anova(mPositiveSig5, mPositiveSig4, test = "Chisq") # Cannot reject null: there is no statistical difference in the deviance of the two models. So keep the mPositiveSig5 model because of lower AIC.
## Analysis of Deviance Table
##
## Model 1: Completed ~ White + Hospital + NonMedAssist + WhiteMarsh + Suburban
## Model 2: Completed ~ White + Hospital + NonMedAssist + WhiteMarsh
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1407 1742
## 2 1408 1745 -1 -2.36 0.12
# Use lowest AIC model, mAllSig5, for parameters. Interactions seem likely
# between race, insurance, and location. For the naming convention,
# mInterac.#letter, where # is the number of interactions and the letter
# denotes the permutation.
mInteract5.1a <- glm(Completed ~ Black * MedicalAssist + Hospital + LocationType +
Family, data = gardasil, family = binomial)
summary(mInteract5.1a) # Not all variables contributed
##
## Call:
## glm(formula = Completed ~ Black * MedicalAssist + Hospital +
## LocationType + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.324 -0.889 -0.719 1.278 1.976
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2337 0.0901 -2.59 0.00951 **
## Black -0.4903 0.1499 -3.27 0.00107 **
## MedicalAssist -0.3932 0.2513 -1.56 0.11766
## Hospital 0.5723 0.2414 2.37 0.01774 *
## LocationType -0.4964 0.1671 -2.97 0.00298 **
## Family -0.5353 0.1433 -3.74 0.00019 ***
## Black:MedicalAssist -0.1470 0.3424 -0.43 0.66773
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1727.7 on 1406 degrees of freedom
## AIC: 1742
##
## Number of Fisher Scoring iterations: 4
mInteract5.1b <- glm(Completed ~ Black * Hospital + MedicalAssist + LocationType +
Family, data = gardasil, family = binomial)
summary(mInteract5.1b) # Not all variables contributed
##
## Call:
## glm(formula = Completed ~ Black * Hospital + MedicalAssist +
## LocationType + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.360 -0.886 -0.726 1.279 1.946
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2356 0.0902 -2.61 0.00899 **
## Black -0.4973 0.1394 -3.57 0.00036 ***
## Hospital 0.6543 0.2816 2.32 0.02016 *
## MedicalAssist -0.4666 0.2018 -2.31 0.02079 *
## LocationType -0.4832 0.1662 -2.91 0.00365 **
## Family -0.5300 0.1434 -3.70 0.00022 ***
## Black:Hospital -0.3001 0.5219 -0.58 0.56526
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1727.5 on 1406 degrees of freedom
## AIC: 1742
##
## Number of Fisher Scoring iterations: 4
mInteract5.1c <- glm(Completed ~ Black * LocationType + Hospital + MedicalAssist +
Family, data = gardasil, family = binomial)
summary(mInteract5.1c)
##
## Call:
## glm(formula = Completed ~ Black * LocationType + Hospital + MedicalAssist +
## Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.313 -0.921 -0.760 1.289 2.011
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2594 0.0917 -2.83 0.00469 **
## Black -0.3793 0.1622 -2.34 0.01934 *
## LocationType -0.3585 0.1864 -1.92 0.05442 .
## Hospital 0.5727 0.2421 2.37 0.01801 *
## MedicalAssist -0.4560 0.2015 -2.26 0.02363 *
## Family -0.5368 0.1430 -3.76 0.00017 ***
## Black:LocationType -0.4259 0.2859 -1.49 0.13627
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1725.6 on 1406 degrees of freedom
## AIC: 1740
##
## Number of Fisher Scoring iterations: 4
mInteract5.1d <- glm(Completed ~ Black * Family + LocationType + Hospital +
MedicalAssist, data = gardasil, family = binomial)
summary(mInteract5.1d)
##
## Call:
## glm(formula = Completed ~ Black * Family + LocationType + Hospital +
## MedicalAssist, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.327 -0.894 -0.719 1.274 1.935
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2246 0.0909 -2.47 0.01343 *
## Black -0.5385 0.1541 -3.49 0.00047 ***
## Family -0.5525 0.1606 -3.44 0.00058 ***
## LocationType -0.4858 0.1663 -2.92 0.00349 **
## Hospital 0.5688 0.2417 2.35 0.01862 *
## MedicalAssist -0.4573 0.2015 -2.27 0.02325 *
## Black:Family 0.0811 0.3126 0.26 0.79535
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1727.8 on 1406 degrees of freedom
## AIC: 1742
##
## Number of Fisher Scoring iterations: 4
mInteract5.1e <- glm(Completed ~ Black + MedicalAssist * LocationType + Hospital +
Family, data = gardasil, family = binomial)
summary(mInteract5.1e) # All variables contributed, with AIC: 1738.7, Residual deviance: 1720.6 on 1406 degrees
##
## Call:
## glm(formula = Completed ~ Black + MedicalAssist * LocationType +
## Hospital + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.352 -0.894 -0.710 1.270 2.240
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2159 0.0898 -2.41 0.01613 *
## Black -0.4939 0.1351 -3.65 0.00026 ***
## MedicalAssist -1.1723 0.4919 -2.38 0.01716 *
## LocationType -0.6050 0.1815 -3.33 0.00086 ***
## Hospital 0.6167 0.2440 2.53 0.01149 *
## Family -0.5408 0.1437 -3.76 0.00017 ***
## MedicalAssist:LocationType 0.9046 0.5398 1.68 0.09378 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1724.7 on 1406 degrees of freedom
## AIC: 1739
##
## Number of Fisher Scoring iterations: 4
mInteract5.1f <- glm(Completed ~ Black + Hospital * LocationType + MedicalAssist +
Family, data = gardasil, family = binomial)
summary(mInteract5.1f) # All variables contributed, with AIC: 1734.6, Residual deviance: 1720.6 on 1406 degrees
##
## Call:
## glm(formula = Completed ~ Black + Hospital * LocationType + MedicalAssist +
## Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.618 -0.873 -0.740 1.288 2.005
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2566 0.0903 -2.84 0.00447 **
## Black -0.5130 0.1348 -3.81 0.00014 ***
## Hospital 1.2498 0.3642 3.43 0.00060 ***
## LocationType -0.3273 0.1749 -1.87 0.06137 .
## MedicalAssist -0.5716 0.2051 -2.79 0.00532 **
## Family -0.5250 0.1439 -3.65 0.00026 ***
## Hospital:LocationType -1.3398 0.5088 -2.63 0.00846 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1720.6 on 1406 degrees of freedom
## AIC: 1735
##
## Number of Fisher Scoring iterations: 4
mInteract5.2a <- glm(Completed ~ Black * (MedicalAssist + LocationType) + Hospital +
Family, data = gardasil, family = binomial)
summary(mInteract5.2a) # Not all variables contributed
##
## Call:
## glm(formula = Completed ~ Black * (MedicalAssist + LocationType) +
## Hospital + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.313 -0.918 -0.755 1.289 1.994
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2590 0.0917 -2.82 0.00475 **
## Black -0.3881 0.1640 -2.37 0.01800 *
## MedicalAssist -0.5171 0.2642 -1.96 0.05029 .
## LocationType -0.3334 0.1988 -1.68 0.09348 .
## Hospital 0.5718 0.2424 2.36 0.01835 *
## Family -0.5357 0.1430 -3.75 0.00018 ***
## Black:MedicalAssist 0.1421 0.3948 0.36 0.71895
## Black:LocationType -0.4841 0.3286 -1.47 0.14070
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1725.5 on 1405 degrees of freedom
## AIC: 1742
##
## Number of Fisher Scoring iterations: 4
mInteract5.2b <- glm(Completed ~ Black * (Hospital + LocationType) + MedicalAssist +
Family, data = gardasil, family = binomial)
summary(mInteract5.2b) # Not all varaibles contributed
##
## Call:
## glm(formula = Completed ~ Black * (Hospital + LocationType) +
## MedicalAssist + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.332 -0.922 -0.760 1.290 2.004
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2621 0.0921 -2.84 0.00445 **
## Black -0.3724 0.1635 -2.28 0.02273 *
## Hospital 0.6186 0.2818 2.20 0.02816 *
## LocationType -0.3603 0.1865 -1.93 0.05344 .
## MedicalAssist -0.4602 0.2019 -2.28 0.02261 *
## Family -0.5345 0.1431 -3.73 0.00019 ***
## Black:Hospital -0.1696 0.5315 -0.32 0.74972
## Black:LocationType -0.4093 0.2902 -1.41 0.15852
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1725.5 on 1405 degrees of freedom
## AIC: 1742
##
## Number of Fisher Scoring iterations: 4
mInteract5.2c <- glm(Completed ~ Black + LocationType * (Hospital + MedicalAssist) +
Family, data = gardasil, family = binomial)
summary(mInteract5.2c) # Not all varaibles contributed
##
## Call:
## glm(formula = Completed ~ Black + LocationType * (Hospital +
## MedicalAssist) + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.616 -0.884 -0.737 1.283 2.237
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2441 0.0907 -2.69 0.00712 **
## Black -0.4935 0.1354 -3.65 0.00027 ***
## LocationType -0.4277 0.1917 -2.23 0.02567 *
## Hospital 1.2342 0.3642 3.39 0.00070 ***
## MedicalAssist -1.1466 0.4920 -2.33 0.01978 *
## Family -0.5314 0.1443 -3.68 0.00023 ***
## LocationType:Hospital -1.2428 0.5142 -2.42 0.01566 *
## LocationType:MedicalAssist 0.7296 0.5431 1.34 0.17917
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1718.6 on 1405 degrees of freedom
## AIC: 1735
##
## Number of Fisher Scoring iterations: 4
mInteract5.2d <- glm(Completed ~ (Black + LocationType) * Hospital + MedicalAssist +
Family, data = gardasil, family = binomial)
summary(mInteract5.2d) # Not all varaibles contributed
##
## Call:
## glm(formula = Completed ~ (Black + LocationType) * Hospital +
## MedicalAssist + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.633 -0.875 -0.739 1.289 2.002
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2594 0.0908 -2.86 0.00429 **
## Black -0.5032 0.1393 -3.61 0.00030 ***
## LocationType -0.3272 0.1749 -1.87 0.06133 .
## Hospital 1.2876 0.3912 3.29 0.00100 ***
## MedicalAssist -0.5735 0.2052 -2.80 0.00518 **
## Family -0.5231 0.1441 -3.63 0.00028 ***
## Black:Hospital -0.1511 0.5488 -0.28 0.78303
## LocationType:Hospital -1.3290 0.5119 -2.60 0.00943 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1720.5 on 1405 degrees of freedom
## AIC: 1737
##
## Number of Fisher Scoring iterations: 4
mInteract5.3a <- glm(Completed ~ Black * LocationType * MedicalAssist + Hospital +
Family, data = gardasil, family = binomial)
summary(mInteract5.3a) # Not all variables contributed
##
## Call:
## glm(formula = Completed ~ Black * LocationType * MedicalAssist +
## Hospital + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.344 -0.912 -0.748 1.284 2.292
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2483 0.0921 -2.70 0.00700 **
## Black -0.3390 0.1669 -2.03 0.04223 *
## LocationType -0.4140 0.2071 -2.00 0.04559 *
## MedicalAssist -1.7590 1.0599 -1.66 0.09699 .
## Hospital 0.6316 0.2463 2.56 0.01033 *
## Family -0.5440 0.1435 -3.79 0.00015 ***
## Black:LocationType -0.7008 0.3853 -1.82 0.06892 .
## Black:MedicalAssist 0.6891 1.1995 0.57 0.56562
## LocationType:MedicalAssist 1.3894 1.0962 1.27 0.20499
## Black:LocationType:MedicalAssist -0.2781 1.2907 -0.22 0.82941
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1720.5 on 1403 degrees of freedom
## AIC: 1740
##
## Number of Fisher Scoring iterations: 4
mInteract5.3b <- glm(Completed ~ Black * LocationType * Hospital + MedicalAssist +
Family, data = gardasil, family = binomial)
summary(mInteract5.3b) # Not all variables contributed
##
## Call:
## glm(formula = Completed ~ Black * LocationType * Hospital + MedicalAssist +
## Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.810 -0.930 -0.752 1.312 2.042
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3108 0.0934 -3.33 0.00088 ***
## Black -0.3026 0.1649 -1.83 0.06653 .
## LocationType -0.1227 0.1964 -0.62 0.53205
## Hospital 1.7333 0.4672 3.71 0.00021 ***
## MedicalAssist -0.5677 0.2056 -2.76 0.00575 **
## Family -0.5063 0.1440 -3.52 0.00044 ***
## Black:LocationType -0.6489 0.3027 -2.14 0.03208 *
## Black:Hospital -1.9169 0.9571 -2.00 0.04520 *
## LocationType:Hospital -2.2648 0.6487 -3.49 0.00048 ***
## Black:LocationType:Hospital 3.1403 1.1846 2.65 0.00803 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1710.7 on 1403 degrees of freedom
## AIC: 1731
##
## Number of Fisher Scoring iterations: 4
# Compare the two best interaction models.
# Null hypothesis: There is no difference in the deviance between the two
# models.
anova(mInteract5.1e, mInteract5.1f, test = "Chisq") # Cannot reject null: there is no statistical difference in the deviance of the two models. So keep the mInteract7.1f model because of lower AIC.
## Analysis of Deviance Table
##
## Model 1: Completed ~ Black + MedicalAssist * LocationType + Hospital +
## Family
## Model 2: Completed ~ Black + Hospital * LocationType + MedicalAssist +
## Family
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1406 1725
## 2 1406 1721 0 4.12
# Compare the best non-interaction and interaction models.
# Null hypothesis: There is no difference in the deviance between the two
# models.
anova(mAllSig5, mInteract5.1f, test = "Chisq") # Reject null: the deviance difference between the two models are significant, the interactio model appears better.
## Analysis of Deviance Table
##
## Model 1: Completed ~ Black + MedicalAssist + Hospital + LocationType +
## Family
## Model 2: Completed ~ Black + Hospital * LocationType + MedicalAssist +
## Family
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 1407 1728
## 2 1406 1721 1 7.3 0.0069 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Summaries for non-interaction model
summary(mAllSig5)
##
## Call:
## glm(formula = Completed ~ Black + MedicalAssist + Hospital +
## LocationType + Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.326 -0.892 -0.724 1.276 1.950
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2289 0.0894 -2.56 0.01048 *
## Black -0.5191 0.1345 -3.86 0.00011 ***
## MedicalAssist -0.4585 0.2014 -2.28 0.02283 *
## Hospital 0.5714 0.2415 2.37 0.01799 *
## LocationType -0.4886 0.1660 -2.94 0.00324 **
## Family -0.5339 0.1433 -3.73 0.00019 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1727.9 on 1407 degrees of freedom
## AIC: 1740
##
## Number of Fisher Scoring iterations: 4
exp(cbind(OR = coef(mAllSig5), confint(mAllSig5)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.7954 0.6672 0.9474
## Black 0.5950 0.4559 0.7728
## MedicalAssist 0.6322 0.4241 0.9352
## Hospital 1.7707 1.1008 2.8449
## LocationType 0.6135 0.4420 0.8477
## Family 0.5863 0.4418 0.7750
summary(mAllSig5$residuals) # Mean is 0.000519
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.410 -1.490 -1.300 0.001 2.260 6.700
# Summaries for interaction model
summary(mInteract5.1f)
##
## Call:
## glm(formula = Completed ~ Black + Hospital * LocationType + MedicalAssist +
## Family, family = binomial, data = gardasil)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.618 -0.873 -0.740 1.288 2.005
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2566 0.0903 -2.84 0.00447 **
## Black -0.5130 0.1348 -3.81 0.00014 ***
## Hospital 1.2498 0.3642 3.43 0.00060 ***
## LocationType -0.3273 0.1749 -1.87 0.06137 .
## MedicalAssist -0.5716 0.2051 -2.79 0.00532 **
## Family -0.5250 0.1439 -3.65 0.00026 ***
## Hospital:LocationType -1.3398 0.5088 -2.63 0.00846 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1796.0 on 1412 degrees of freedom
## Residual deviance: 1720.6 on 1406 degrees of freedom
## AIC: 1735
##
## Number of Fisher Scoring iterations: 4
exp(cbind(OR = coef(mInteract5.1f), confint(mInteract5.1f)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.7737 0.6477 0.9229
## Black 0.5987 0.4585 0.7779
## Hospital 3.4896 1.7439 7.3646
## LocationType 0.7209 0.5103 1.0139
## MedicalAssist 0.5646 0.3760 0.8412
## Family 0.5915 0.4452 0.7829
## Hospital:LocationType 0.2619 0.0940 0.6961
summary(mInteract5.1f$residuals) # Mean is 0.000347
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.70 -1.46 -1.31 0.00 2.29 7.46