library(car)
library(mice)
library(ggplot2)
library(haven)
library(readr)
BRFSS19<-read_xpt("C:/Users/adolp/Desktop/Statistics 2/BRFSS/LLCP2019.XPT")
set.seed(1234)
samp<-sample(1:dim(BRFSS19)[1], size = 20000) #smaller sample for brevity
BRFSS19<-BRFSS19[samp,]
#Fix BRFSS names
nams<-names(BRFSS19)
head(BRFSS19, n=10)
## # A tibble: 10 x 342
## `_STATE` FMONTH IDATE IMONTH IDAY IYEAR DISPCODE SEQNO `_PSU` CTELENM1
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 31 8 110320~ 11 03 2019 1200 201900~ 2.02e9 NA
## 2 18 9 091220~ 09 12 2019 1100 201900~ 2.02e9 1
## 3 41 1 020820~ 02 08 2019 1100 201900~ 2.02e9 NA
## 4 56 3 030420~ 03 04 2019 1100 201900~ 2.02e9 NA
## 5 39 12 120320~ 12 03 2019 1100 201901~ 2.02e9 NA
## 6 20 10 111320~ 11 13 2019 1100 201900~ 2.02e9 1
## 7 53 1 013120~ 01 31 2019 1100 201900~ 2.02e9 1
## 8 48 9 092820~ 09 28 2019 1100 201900~ 2.02e9 1
## 9 48 1 021020~ 02 10 2019 1100 201900~ 2.02e9 1
## 10 48 2 021820~ 02 18 2019 1100 201900~ 2.02e9 NA
## # ... with 332 more variables: PVTRESD1 <dbl>, COLGHOUS <dbl>, STATERE1 <dbl>,
## # CELPHONE <dbl>, LADULT1 <dbl>, COLGSEX <dbl>, NUMADULT <dbl>,
## # LANDSEX <dbl>, NUMMEN <dbl>, NUMWOMEN <dbl>, RESPSLCT <dbl>,
## # SAFETIME <dbl>, CTELNUM1 <dbl>, CELLFON5 <dbl>, CADULT1 <dbl>,
## # CELLSEX <dbl>, PVTRESD3 <dbl>, CCLGHOUS <dbl>, CSTATE1 <dbl>,
## # LANDLINE <dbl>, HHADULT <dbl>, SEXVAR <dbl>, GENHLTH <dbl>, PHYSHLTH <dbl>,
## # MENTHLTH <dbl>, POORHLTH <dbl>, HLTHPLN1 <dbl>, PERSDOC2 <dbl>,
## # MEDCOST <dbl>, CHECKUP1 <dbl>, BPHIGH4 <dbl>, BPMEDS <dbl>, CHOLCHK2 <dbl>,
## # TOLDHI2 <dbl>, CHOLMED2 <dbl>, CVDINFR4 <dbl>, CVDCRHD4 <dbl>,
## # CVDSTRK3 <dbl>, ASTHMA3 <dbl>, ASTHNOW <dbl>, CHCSCNCR <dbl>,
## # CHCOCNCR <dbl>, CHCCOPD2 <dbl>, ADDEPEV3 <dbl>, CHCKDNY2 <dbl>,
## # DIABETE4 <dbl>, DIABAGE3 <dbl>, HAVARTH4 <dbl>, ARTHEXER <dbl>,
## # ARTHEDU <dbl>, LMTJOIN3 <dbl>, ARTHDIS2 <dbl>, JOINPAI2 <dbl>,
## # MARITAL <dbl>, EDUCA <dbl>, RENTHOM1 <dbl>, NUMHHOL3 <dbl>, NUMPHON3 <dbl>,
## # CPDEMO1B <dbl>, VETERAN3 <dbl>, EMPLOY1 <dbl>, CHILDREN <dbl>,
## # INCOME2 <dbl>, WEIGHT2 <dbl>, HEIGHT3 <dbl>, PREGNANT <dbl>, DEAF <dbl>,
## # BLIND <dbl>, DECIDE <dbl>, DIFFWALK <dbl>, DIFFDRES <dbl>, DIFFALON <dbl>,
## # SMOKE100 <dbl>, SMOKDAY2 <dbl>, STOPSMK2 <dbl>, LASTSMK2 <dbl>,
## # USENOW3 <dbl>, ALCDAY5 <dbl>, AVEDRNK3 <dbl>, DRNK3GE5 <dbl>,
## # MAXDRNKS <dbl>, EXERANY2 <dbl>, EXRACT11 <dbl>, EXEROFT1 <dbl>,
## # EXERHMM1 <dbl>, EXRACT21 <dbl>, EXEROFT2 <dbl>, EXERHMM2 <dbl>,
## # STRENGTH <dbl>, FRUIT2 <dbl>, FRUITJU2 <dbl>, FVGREEN1 <dbl>,
## # FRENCHF1 <dbl>, POTATOE1 <dbl>, VEGETAB2 <dbl>, FLUSHOT7 <dbl>,
## # FLSHTMY3 <dbl>, TETANUS1 <dbl>, PNEUVAC4 <dbl>, HIVTST7 <dbl>, ...
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(BRFSS19)<-newnames
#Recode and Dummy coded variables
BRFSS19$Male<-recode(BRFSS19$birthsex, recodes = "7:9=NA; 1=1;2=0")
BRFSS19$Age1824<-recode(BRFSS19$ageg, recodes="1=1; else=0")
BRFSS19$Age2534<-recode(BRFSS19$ageg, recodes="2=1;else=0")
BRFSS19$Age3544<-recode(BRFSS19$ageg, recodes="3=1;else=0")
BRFSS19$Age4554<-recode(BRFSS19$ageg, recodes="4=1;else=0")
BRFSS19$Age5564<-recode(BRFSS19$ageg, recodes="5=1;else=0")
BRFSS19$Age65<-recode(BRFSS19$ageg, recodes="6=1;else=0")
BRFSS19$Married<-recode(BRFSS19$marital, recodes="9=NA; 1=1; else=0")
BRFSS19$Divorced<-recode(BRFSS19$marital, recodes="9=NA; 2=1; else=0")
BRFSS19$Widowed<-recode(BRFSS19$marital, recodes="9=NA; 3=1; else=0")
BRFSS19$Seperated<-recode(BRFSS19$marital, recodes="9=NA; 4=1; else=0")
BRFSS19$Never<-recode(BRFSS19$marital, recodes="9=NA; 5=1; else=0")
BRFSS19$Unmarried<-recode(BRFSS19$marital, recodes="9=NA; 6=1; else=0")
BRFSS19$NoHS<-recode(BRFSS19$educag, recodes="9=NA; 1=1; else=0")
BRFSS19$HS<-recode(BRFSS19$educag, recodes="9=NA; 2=1; else=0")
BRFSS19$College<-recode(BRFSS19$educag, recodes="9=NA; 3=1; else=0")
BRFSS19$GradCollege<-recode(BRFSS19$educag, recodes="9=NA; 4=1; else=0")
BRFSS19$Employed<-recode(BRFSS19$employ1,recodes="9=NA; 1=1; else=0")
BRFSS19$SelfEmployed<-recode(BRFSS19$employ1,recodes="9=NA; 2=1; else=0")
BRFSS19$NotEmployed<-recode(BRFSS19$employ1,recodes="9=NA; 3=1; 4=1; else=0")
BRFSS19$Student<-recode(BRFSS19$employ1,recodes="9=NA; 6=1; else=0")
BRFSS19$Retired<-recode(BRFSS19$employ1,recodes="9=NA; 7=1; else=0")
BRFSS19$Unabletowork<-recode(BRFSS19$employ1,recodes="9=NA; 8=1; else=0")
BRFSS19$Homemaker<-recode(BRFSS19$employ1,recodes="9=NA; 5=1; else=0")
BRFSS19$Black<-recode(BRFSS19$racegr3, recodes="2=1; 9=NA; else=0")
BRFSS19$White<-recode(BRFSS19$racegr3, recodes="1=1; 9=NA; else=0")
BRFSS19$Other<-recode(BRFSS19$racegr3, recodes="3:4=1; 9=NA; else=0")
BRFSS19$Hispanic<-recode(BRFSS19$racegr3, recodes="5=1; 9=NA; else=0")
BRFSS19$Income15K<-recode(BRFSS19$incomg,recodes="9=NA; 1=1; else=0")
BRFSS19$Income1525K<-recode(BRFSS19$incomg,recodes="9=NA; 2=1; else=0")
BRFSS19$Income2535K<-recode(BRFSS19$incomg,recodes="9=NA; 3=1; else=0")
BRFSS19$Income3550K<-recode(BRFSS19$incomg,recodes="9=NA; 4=1; else=0")
BRFSS19$Income50K<-recode(BRFSS19$incomg,recodes="9=NA; 5=1; else=0")
BRFSS19$Gay<-recode(BRFSS19$somale,recodes="7:9=NA; 1=1; else=0")
BRFSS19$Straight<-recode(BRFSS19$somale,recodes="7:9=NA; 2=1; else=0")
BRFSS19$Bi<-recode(BRFSS19$somale,recodes="7:9=NA; 3=1; else=0")
BRFSS19$Else<-recode(BRFSS19$somale,recodes="7:9=NA; 4=1; else=0")
BRFSS19$HIVtest<-recode(BRFSS19$aidtst4,recodes="7:9=NA; 1=1; else=0")
summary(BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay", "Bi", "Male")])
## HIVtest Hispanic Black Gay
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.3218 Mean :0.0891 Mean :0.0767 Mean :0.00549
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## NA's :772 NA's :395 NA's :395 NA's :142
## Bi Male
## Min. :0.00000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.000
## Median :0.00000 Median :0.000
## Mean :0.00458 Mean :0.452
## 3rd Qu.:0.00000 3rd Qu.:1.000
## Max. :1.00000 Max. :1.000
## NA's :142 NA's :16945
Modal Imputation
table(BRFSS19$HIVtest)
##
## 0 1
## 13040 6188
#find the most common value
mcv.HIVtest<-factor(names(which.max(table(BRFSS19$HIVtest))), levels=levels(BRFSS19$HIVtest))
mcv.HIVtest
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$HIVtest.imp<-as.factor(ifelse(is.na(BRFSS19$HIVtest)==T, mcv.HIVtest, BRFSS19$HIVtest))
levels(BRFSS19$HIVtest.imp)<-levels(as.factor(BRFSS19$HIVtest))
prop.table(table(BRFSS19$HIVtest))
##
## 0 1
## 0.6781777 0.3218223
prop.table(table(BRFSS19$HIVtest.imp))
##
## 0 1
## 0.6781777 0.3218223
barplot(prop.table(table(BRFSS19$HIVtest)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(BRFSS19$HIVtest.imp)), main="Imputed Data",ylim=c(0, .6))
table(BRFSS19$HIVtest)
##
## 0 1
## 13040 6188
#find the most common value
mcv.HIV<-factor(names(which.max(table(BRFSS19$HIVtest))), levels = levels(BRFSS19$HIVtest))
mcv.HIV
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$HIV.imp<-as.factor(ifelse(is.na(BRFSS19$HIVtest)==T, mcv.HIV, BRFSS19$HIVtest))
levels(BRFSS19$HIV.imp)<-levels(as.factor(BRFSS19$HIVtest))
prop.table(table(BRFSS19$HIVtest))
##
## 0 1
## 0.6781777 0.3218223
prop.table(table(BRFSS19$HIV.imp))
##
## 0 1
## 0.6781777 0.3218223
barplot(prop.table(table(BRFSS19$HIVtest)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(BRFSS19$HIV.imp)), main="Imputed Data", ylim=c(0, .6))
table(BRFSS19$Hispanic)
##
## 0 1
## 17858 1747
#find the most common value
mcv.His<-factor(names(which.max(table(BRFSS19$Hispanic))), levels = levels(BRFSS19$Hispanic))
mcv.His
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$His.imp<-as.factor(ifelse(is.na(BRFSS19$Hispanic)==T, mcv.His, BRFSS19$Hispanic))
levels(BRFSS19$His.imp)<-levels(as.factor(BRFSS19$Hispanic))
prop.table(table(BRFSS19$HIVtest))
##
## 0 1
## 0.6781777 0.3218223
prop.table(table(BRFSS19$His.imp))
##
## 0 1
## 0.91089008 0.08910992
barplot(prop.table(table(BRFSS19$HIVtest)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(BRFSS19$His.imp)), main="Imputed Data", ylim=c(0, .6))
table(BRFSS19$Black)
##
## 0 1
## 18102 1503
#find the most common value
mcv.Blk<-factor(names(which.max(table(BRFSS19$Black))), levels = levels(BRFSS19$Black))
mcv.Blk
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$Blk.imp<-as.factor(ifelse(is.na(BRFSS19$Black)==T, mcv.Blk, BRFSS19$Black))
levels(BRFSS19$Blk.imp)<-levels(as.factor(BRFSS19$Black))
prop.table(table(BRFSS19$Black))
##
## 0 1
## 0.92333588 0.07666412
prop.table(table(BRFSS19$Blk.imp))
##
## 0 1
## 0.92333588 0.07666412
barplot(prop.table(table(BRFSS19$Black)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(BRFSS19$Blk.imp)), main="Imputed Data", ylim=c(0, .6))
table(BRFSS19$Gay)
##
## 0 1
## 19749 109
#find the most common value
mcv.Gay<-factor(names(which.max(table(BRFSS19$Gay))), levels = levels(BRFSS19$Gay))
mcv.Gay
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$Gay.imp<-as.factor(ifelse(is.na(BRFSS19$Gay)==T, mcv.Gay, BRFSS19$Gay))
levels(BRFSS19$Gay.imp)<-levels(as.factor(BRFSS19$Gay))
prop.table(table(BRFSS19$Gay))
##
## 0 1
## 0.994511028 0.005488972
prop.table(table(BRFSS19$Gay.imp))
##
## 0 1
## 0.994511028 0.005488972
barplot(prop.table(table(BRFSS19$Gay)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(BRFSS19$Gay.imp)), main="Imputed Data", ylim=c(0, .6))
table(BRFSS19$Bi)
##
## 0 1
## 19767 91
#find the most common value
mcv.Bi<-factor(names(which.max(table(BRFSS19$Bi))), levels = levels(BRFSS19$Bi))
mcv.Bi
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$Bi.imp<-as.factor(ifelse(is.na(BRFSS19$Bi)==T, mcv.Bi, BRFSS19$Bi))
levels(BRFSS19$Bi.imp)<-levels(as.factor(BRFSS19$Bi))
prop.table(table(BRFSS19$Bi))
##
## 0 1
## 0.995417464 0.004582536
prop.table(table(BRFSS19$Bi.imp))
##
## 0 1
## 0.995417464 0.004582536
barplot(prop.table(table(BRFSS19$Bi)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(BRFSS19$Bi.imp)), main="Imputed Data", ylim=c(0, .6))
table(BRFSS19$Male)
##
## 0 1
## 1674 1381
#find the most common value
mcv.Male<-factor(names(which.max(table(BRFSS19$Male))), levels = levels(BRFSS19$Male))
mcv.Male
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$Male.imp<-as.factor(ifelse(is.na(BRFSS19$Male)==T, mcv.Male, BRFSS19$Male))
levels(BRFSS19$Male.imp)<-levels(as.factor(BRFSS19$Male))
prop.table(table(BRFSS19$Male))
##
## 0 1
## 0.5479542 0.4520458
prop.table(table(BRFSS19$Male.imp))
##
## 0 1
## 0.5479542 0.4520458
barplot(prop.table(table(BRFSS19$Male)), main="Original Data", ylim=c(0, .6))
barplot(prop.table(table(BRFSS19$Male.imp)), main="Imputed Data", ylim=c(0, .6))
Modal.HIV<-glm(HIV.imp~His.imp+Blk.imp+Gay.imp+Bi.imp+Male.imp,family = binomial, data = BRFSS19)
Modal.HIV
##
## Call: glm(formula = HIV.imp ~ His.imp + Blk.imp + Gay.imp + Bi.imp +
## Male.imp, family = binomial, data = BRFSS19)
##
## Coefficients:
## (Intercept) His.imp1 Blk.imp1 Gay.imp1 Bi.imp1 Male.imp1
## -0.8661 0.5053 0.9743 2.1381 0.9263 -0.2188
##
## Degrees of Freedom: 2863 Total (i.e. Null); 2858 Residual
## (17136 observations deleted due to missingness)
## Null Deviance: 3522
## Residual Deviance: 3435 AIC: 3447
Testing for MAR
#look at the patterns of missingness
md.pattern(BRFSS19[,c("HIVtest", "Hispanic", "Black", "Gay", "Bi", "Male")])
## Gay Bi Hispanic Black HIVtest Male
## 2864 1 1 1 1 1 1 0
## 15883 1 1 1 1 1 0 1
## 94 1 1 1 1 0 1 1
## 644 1 1 1 1 0 0 2
## 57 1 1 0 0 1 1 2
## 298 1 1 0 0 1 0 3
## 4 1 1 0 0 0 1 3
## 14 1 1 0 0 0 0 4
## 25 0 0 1 1 1 1 2
## 83 0 0 1 1 1 0 3
## 3 0 0 1 1 0 1 3
## 9 0 0 1 1 0 0 4
## 7 0 0 0 0 1 1 4
## 11 0 0 0 0 1 0 5
## 1 0 0 0 0 0 1 5
## 3 0 0 0 0 0 0 6
## 142 142 395 395 772 16945 18791
md.pairs(BRFSS19[,c("HIVtest", "Hispanic", "Black", "Gay", "Bi", "Male")])
## $rr
## HIVtest Hispanic Black Gay Bi Male
## HIVtest 19228 18855 18855 19102 19102 2953
## Hispanic 18855 19605 19605 19485 19485 2986
## Black 18855 19605 19605 19485 19485 2986
## Gay 19102 19485 19485 19858 19858 3019
## Bi 19102 19485 19485 19858 19858 3019
## Male 2953 2986 2986 3019 3019 3055
##
## $rm
## HIVtest Hispanic Black Gay Bi Male
## HIVtest 0 373 373 126 126 16275
## Hispanic 750 0 0 120 120 16619
## Black 750 0 0 120 120 16619
## Gay 756 373 373 0 0 16839
## Bi 756 373 373 0 0 16839
## Male 102 69 69 36 36 0
##
## $mr
## HIVtest Hispanic Black Gay Bi Male
## HIVtest 0 750 750 756 756 102
## Hispanic 373 0 0 373 373 69
## Black 373 0 0 373 373 69
## Gay 126 120 120 0 0 36
## Bi 126 120 120 0 0 36
## Male 16275 16619 16619 16839 16839 0
##
## $mm
## HIVtest Hispanic Black Gay Bi Male
## HIVtest 772 22 22 16 16 670
## Hispanic 22 395 395 22 22 326
## Black 22 395 395 22 22 326
## Gay 16 22 22 142 142 106
## Bi 16 22 22 142 142 106
## Male 670 326 326 106 106 16945
imp<-mice(data = BRFSS19[,c("HIVtest", "Hispanic", "Black", "Gay", "Bi", "Male")], m = 10, method = "logreg")
## Warning: Type mismatch for variable(s): HIVtest
## Imputation method logreg is for categorical data.
## Warning: Type mismatch for variable(s): Hispanic
## Imputation method logreg is for categorical data.
## Warning: Type mismatch for variable(s): Black
## Imputation method logreg is for categorical data.
## Warning: Type mismatch for variable(s): Gay
## Imputation method logreg is for categorical data.
## Warning: Type mismatch for variable(s): Bi
## Imputation method logreg is for categorical data.
## Warning: Type mismatch for variable(s): Male
## Imputation method logreg is for categorical data.
##
## iter imp variable
## 1 1 HIVtest Hispanic Black Gay Bi Male
## 1 2 HIVtest Hispanic Black Gay Bi Male
## 1 3 HIVtest Hispanic Black Gay Bi Male
## 1 4 HIVtest Hispanic Black Gay Bi Male
## 1 5 HIVtest Hispanic Black Gay Bi Male
## 1 6 HIVtest Hispanic Black Gay Bi Male
## 1 7 HIVtest Hispanic Black Gay Bi Male
## 1 8 HIVtest Hispanic Black Gay Bi Male
## 1 9 HIVtest Hispanic Black Gay Bi Male
## 1 10 HIVtest Hispanic Black Gay Bi Male
## 2 1 HIVtest Hispanic Black Gay Bi Male
## 2 2 HIVtest Hispanic Black Gay Bi Male
## 2 3 HIVtest Hispanic Black Gay Bi Male
## 2 4 HIVtest Hispanic Black Gay Bi Male
## 2 5 HIVtest Hispanic Black Gay Bi Male
## 2 6 HIVtest Hispanic Black Gay Bi Male
## 2 7 HIVtest Hispanic Black Gay Bi Male
## 2 8 HIVtest Hispanic Black Gay Bi Male
## 2 9 HIVtest Hispanic Black Gay Bi Male
## 2 10 HIVtest Hispanic Black Gay Bi Male
## 3 1 HIVtest Hispanic Black Gay Bi Male
## 3 2 HIVtest Hispanic Black Gay Bi Male
## 3 3 HIVtest Hispanic Black Gay Bi Male
## 3 4 HIVtest Hispanic Black Gay Bi Male
## 3 5 HIVtest Hispanic Black Gay Bi Male
## 3 6 HIVtest Hispanic Black Gay Bi Male
## 3 7 HIVtest Hispanic Black Gay Bi Male
## 3 8 HIVtest Hispanic Black Gay Bi Male
## 3 9 HIVtest Hispanic Black Gay Bi Male
## 3 10 HIVtest Hispanic Black Gay Bi Male
## 4 1 HIVtest Hispanic Black Gay Bi Male
## 4 2 HIVtest Hispanic Black Gay Bi Male
## 4 3 HIVtest Hispanic Black Gay Bi Male
## 4 4 HIVtest Hispanic Black Gay Bi Male
## 4 5 HIVtest Hispanic Black Gay Bi Male
## 4 6 HIVtest Hispanic Black Gay Bi Male
## 4 7 HIVtest Hispanic Black Gay Bi Male
## 4 8 HIVtest Hispanic Black Gay Bi Male
## 4 9 HIVtest Hispanic Black Gay Bi Male
## 4 10 HIVtest Hispanic Black Gay Bi Male
## 5 1 HIVtest Hispanic Black Gay Bi Male
## 5 2 HIVtest Hispanic Black Gay Bi Male
## 5 3 HIVtest Hispanic Black Gay Bi Male
## 5 4 HIVtest Hispanic Black Gay Bi Male
## 5 5 HIVtest Hispanic Black Gay Bi Male
## 5 6 HIVtest Hispanic Black Gay Bi Male
## 5 7 HIVtest Hispanic Black Gay Bi Male
## 5 8 HIVtest Hispanic Black Gay Bi Male
## 5 9 HIVtest Hispanic Black Gay Bi Male
## 5 10 HIVtest Hispanic Black Gay Bi Male
print(imp)
## Class: mids
## Number of multiple imputations: 10
## Imputation methods:
## HIVtest Hispanic Black Gay Bi Male
## "logreg" "logreg" "logreg" "logreg" "logreg" "logreg"
## PredictorMatrix:
## HIVtest Hispanic Black Gay Bi Male
## HIVtest 0 1 1 1 1 1
## Hispanic 1 0 1 1 1 1
## Black 1 1 0 1 1 1
## Gay 1 1 1 0 1 1
## Bi 1 1 1 1 0 1
## Male 1 1 1 1 1 0
head(imp$imp$HIVtest)
## 1 2 3 4 5 6 7 8 9 10
## 36 0 0 0 0 0 0 0 0 0 0
## 60 0 1 0 1 0 0 0 0 0 0
## 107 1 1 0 1 0 0 0 0 0 0
## 136 0 0 0 0 1 1 0 0 0 0
## 232 0 0 1 0 0 1 1 1 0 0
## 274 0 1 0 0 1 0 0 0 0 0
summary(imp$imp$HIVtest)
## 1 2 3 4
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.3199 Mean :0.3057 Mean :0.3303 Mean :0.2979
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## 5 6 7 8
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.3148 Mean :0.3212 Mean :0.2979 Mean :0.3109
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## 9 10
## Min. :0.000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.000 Median :0.0000
## Mean :0.329 Mean :0.3096
## 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :1.000 Max. :1.0000
library(lattice)
stripplot(imp,HIVtest~Hispanic|.imp, pch=20)
dat.imp<-complete(imp, action = 1)
head(dat.imp, n=10)
## HIVtest Hispanic Black Gay Bi Male
## 1 0 0 0 0 0 1
## 2 1 0 0 0 0 1
## 3 0 0 0 0 0 1
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## 7 1 0 0 0 0 0
## 8 0 1 0 0 0 1
## 9 0 0 0 0 0 0
## 10 1 0 0 0 0 1
#Compare to the original data
head(BRFSS19[,c("HIVtest", "Hispanic", "Black", "Gay", "Bi", "Male")], n=10)
## # A tibble: 10 x 6
## HIVtest Hispanic Black Gay Bi Male
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0 0 0 0 NA
## 2 1 0 0 0 0 NA
## 3 0 0 0 0 0 NA
## 4 0 0 0 0 0 NA
## 5 0 0 0 0 0 NA
## 6 0 0 0 0 0 NA
## 7 1 0 0 0 0 NA
## 8 0 1 0 0 0 NA
## 9 0 0 0 0 0 NA
## 10 1 0 0 0 0 NA
#compare the original data to the imputed
head(dat.imp[is.na(BRFSS19$HIVtest)==T,], n=10)
## HIVtest Hispanic Black Gay Bi Male
## 36 0 0 0 0 0 1
## 60 0 0 0 0 0 1
## 107 1 0 0 0 0 1
## 136 0 0 0 0 0 1
## 232 0 0 0 0 0 0
## 274 0 0 0 0 0 0
## 275 0 0 0 0 0 1
## 317 0 0 0 0 0 1
## 372 0 0 1 0 0 0
## 374 0 0 0 0 0 0
head(BRFSS19[is.na(BRFSS19$HIVtest)==T,c("HIVtest", "Hispanic", "Black", "Gay", "Bi", "Male")], n=10)
## # A tibble: 10 x 6
## HIVtest Hispanic Black Gay Bi Male
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA 0 0 0 0 NA
## 2 NA 0 0 0 0 NA
## 3 NA 0 0 0 0 NA
## 4 NA 0 0 0 0 NA
## 5 NA 0 0 0 0 NA
## 6 NA 0 0 0 0 NA
## 7 NA 0 0 0 0 NA
## 8 NA 0 0 NA NA 1
## 9 NA 0 1 0 0 NA
## 10 NA 0 0 0 0 NA
fit.HIV<-with(data=imp ,expr=glm(HIVtest~Hispanic+Black+Gay+Bi+Male, family = binomial))
fit.HIV
## call :
## with.mids(data = imp, expr = glm(HIVtest ~ Hispanic + Black +
## Gay + Bi + Male, family = binomial))
##
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay",
## "Bi", "Male")], m = 10, method = "logreg")
##
## nmis :
## HIVtest Hispanic Black Gay Bi Male
## 772 395 395 142 142 16945
##
## analyses :
## [[1]]
##
## Call: glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
##
## Coefficients:
## (Intercept) Hispanic Black Gay Bi Male
## -0.8150 0.6186 0.8728 1.8917 0.9923 -0.1834
##
## Degrees of Freedom: 19999 Total (i.e. Null); 19994 Residual
## Null Deviance: 25130
## Residual Deviance: 24620 AIC: 24630
##
## [[2]]
##
## Call: glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
##
## Coefficients:
## (Intercept) Hispanic Black Gay Bi Male
## -0.7667 0.6283 0.8701 1.8971 0.7242 -0.2995
##
## Degrees of Freedom: 19999 Total (i.e. Null); 19994 Residual
## Null Deviance: 25110
## Residual Deviance: 24550 AIC: 24560
##
## [[3]]
##
## Call: glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
##
## Coefficients:
## (Intercept) Hispanic Black Gay Bi Male
## -0.7843 0.6166 0.8866 1.9696 0.9625 -0.2582
##
## Degrees of Freedom: 19999 Total (i.e. Null); 19994 Residual
## Null Deviance: 25140
## Residual Deviance: 24600 AIC: 24610
##
## [[4]]
##
## Call: glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
##
## Coefficients:
## (Intercept) Hispanic Black Gay Bi Male
## -0.8193 0.6046 0.8788 1.8067 0.9520 -0.1829
##
## Degrees of Freedom: 19999 Total (i.e. Null); 19994 Residual
## Null Deviance: 25100
## Residual Deviance: 24600 AIC: 24620
##
## [[5]]
##
## Call: glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
##
## Coefficients:
## (Intercept) Hispanic Black Gay Bi Male
## -0.8118 0.6398 0.8809 1.9002 0.7249 -0.1977
##
## Degrees of Freedom: 19999 Total (i.e. Null); 19994 Residual
## Null Deviance: 25120
## Residual Deviance: 24610 AIC: 24620
##
## [[6]]
##
## Call: glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
##
## Coefficients:
## (Intercept) Hispanic Black Gay Bi Male
## -0.8367 0.5965 0.8627 1.8161 0.9346 -0.1236
##
## Degrees of Freedom: 19999 Total (i.e. Null); 19994 Residual
## Null Deviance: 25130
## Residual Deviance: 24660 AIC: 24670
##
## [[7]]
##
## Call: glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
##
## Coefficients:
## (Intercept) Hispanic Black Gay Bi Male
## -0.8134 0.6356 0.8637 1.8149 0.9195 -0.2026
##
## Degrees of Freedom: 19999 Total (i.e. Null); 19994 Residual
## Null Deviance: 25100
## Residual Deviance: 24590 AIC: 24600
##
## [[8]]
##
## Call: glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
##
## Coefficients:
## (Intercept) Hispanic Black Gay Bi Male
## -0.8433 0.6196 0.8784 1.9060 0.9585 -0.1218
##
## Degrees of Freedom: 19999 Total (i.e. Null); 19994 Residual
## Null Deviance: 25120
## Residual Deviance: 24630 AIC: 24640
##
## [[9]]
##
## Call: glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
##
## Coefficients:
## (Intercept) Hispanic Black Gay Bi Male
## -0.7360 0.6188 0.8594 1.9794 1.0382 -0.3668
##
## Degrees of Freedom: 19999 Total (i.e. Null); 19994 Residual
## Null Deviance: 25140
## Residual Deviance: 24540 AIC: 24550
##
## [[10]]
##
## Call: glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
##
## Coefficients:
## (Intercept) Hispanic Black Gay Bi Male
## -0.8396 0.6293 0.8861 1.8735 0.8034 -0.1355
##
## Degrees of Freedom: 19999 Total (i.e. Null); 19994 Residual
## Null Deviance: 25120
## Residual Deviance: 24620 AIC: 24630
with (data=imp, exp=(prop.table(table(HIVtest))))
## call :
## with.mids(data = imp, expr = (prop.table(table(HIVtest))))
##
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay",
## "Bi", "Male")], m = 10, method = "logreg")
##
## nmis :
## HIVtest Hispanic Black Gay Bi Male
## 772 395 395 142 142 16945
##
## analyses :
## [[1]]
## HIVtest
## 0 1
## 0.67825 0.32175
##
## [[2]]
## HIVtest
## 0 1
## 0.6788 0.3212
##
## [[3]]
## HIVtest
## 0 1
## 0.67785 0.32215
##
## [[4]]
## HIVtest
## 0 1
## 0.6791 0.3209
##
## [[5]]
## HIVtest
## 0 1
## 0.67845 0.32155
##
## [[6]]
## HIVtest
## 0 1
## 0.6782 0.3218
##
## [[7]]
## HIVtest
## 0 1
## 0.6791 0.3209
##
## [[8]]
## HIVtest
## 0 1
## 0.6786 0.3214
##
## [[9]]
## HIVtest
## 0 1
## 0.6779 0.3221
##
## [[10]]
## HIVtest
## 0 1
## 0.67865 0.32135
with (data=imp, exp=(prop.table(table(Hispanic))))
## call :
## with.mids(data = imp, expr = (prop.table(table(Hispanic))))
##
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay",
## "Bi", "Male")], m = 10, method = "logreg")
##
## nmis :
## HIVtest Hispanic Black Gay Bi Male
## 772 395 395 142 142 16945
##
## analyses :
## [[1]]
## Hispanic
## 0 1
## 0.91025 0.08975
##
## [[2]]
## Hispanic
## 0 1
## 0.91085 0.08915
##
## [[3]]
## Hispanic
## 0 1
## 0.91085 0.08915
##
## [[4]]
## Hispanic
## 0 1
## 0.91105 0.08895
##
## [[5]]
## Hispanic
## 0 1
## 0.91065 0.08935
##
## [[6]]
## Hispanic
## 0 1
## 0.91055 0.08945
##
## [[7]]
## Hispanic
## 0 1
## 0.911 0.089
##
## [[8]]
## Hispanic
## 0 1
## 0.9109 0.0891
##
## [[9]]
## Hispanic
## 0 1
## 0.9109 0.0891
##
## [[10]]
## Hispanic
## 0 1
## 0.9108 0.0892
with (data=imp, exp=(prop.table(table(Black))))
## call :
## with.mids(data = imp, expr = (prop.table(table(Black))))
##
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay",
## "Bi", "Male")], m = 10, method = "logreg")
##
## nmis :
## HIVtest Hispanic Black Gay Bi Male
## 772 395 395 142 142 16945
##
## analyses :
## [[1]]
## Black
## 0 1
## 0.9231 0.0769
##
## [[2]]
## Black
## 0 1
## 0.92345 0.07655
##
## [[3]]
## Black
## 0 1
## 0.92325 0.07675
##
## [[4]]
## Black
## 0 1
## 0.9233 0.0767
##
## [[5]]
## Black
## 0 1
## 0.9235 0.0765
##
## [[6]]
## Black
## 0 1
## 0.92375 0.07625
##
## [[7]]
## Black
## 0 1
## 0.92325 0.07675
##
## [[8]]
## Black
## 0 1
## 0.9231 0.0769
##
## [[9]]
## Black
## 0 1
## 0.92345 0.07655
##
## [[10]]
## Black
## 0 1
## 0.9232 0.0768
with (data=imp, exp=(prop.table(table(Gay))))
## call :
## with.mids(data = imp, expr = (prop.table(table(Gay))))
##
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay",
## "Bi", "Male")], m = 10, method = "logreg")
##
## nmis :
## HIVtest Hispanic Black Gay Bi Male
## 772 395 395 142 142 16945
##
## analyses :
## [[1]]
## Gay
## 0 1
## 0.9945 0.0055
##
## [[2]]
## Gay
## 0 1
## 0.9945 0.0055
##
## [[3]]
## Gay
## 0 1
## 0.99455 0.00545
##
## [[4]]
## Gay
## 0 1
## 0.99455 0.00545
##
## [[5]]
## Gay
## 0 1
## 0.9945 0.0055
##
## [[6]]
## Gay
## 0 1
## 0.9945 0.0055
##
## [[7]]
## Gay
## 0 1
## 0.9945 0.0055
##
## [[8]]
## Gay
## 0 1
## 0.9945 0.0055
##
## [[9]]
## Gay
## 0 1
## 0.99435 0.00565
##
## [[10]]
## Gay
## 0 1
## 0.99445 0.00555
with (data=imp, exp=(prop.table(table(Bi))))
## call :
## with.mids(data = imp, expr = (prop.table(table(Bi))))
##
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay",
## "Bi", "Male")], m = 10, method = "logreg")
##
## nmis :
## HIVtest Hispanic Black Gay Bi Male
## 772 395 395 142 142 16945
##
## analyses :
## [[1]]
## Bi
## 0 1
## 0.99545 0.00455
##
## [[2]]
## Bi
## 0 1
## 0.9953 0.0047
##
## [[3]]
## Bi
## 0 1
## 0.9954 0.0046
##
## [[4]]
## Bi
## 0 1
## 0.99545 0.00455
##
## [[5]]
## Bi
## 0 1
## 0.99535 0.00465
##
## [[6]]
## Bi
## 0 1
## 0.9954 0.0046
##
## [[7]]
## Bi
## 0 1
## 0.99545 0.00455
##
## [[8]]
## Bi
## 0 1
## 0.99545 0.00455
##
## [[9]]
## Bi
## 0 1
## 0.99535 0.00465
##
## [[10]]
## Bi
## 0 1
## 0.9954 0.0046
with (data=imp, exp=(prop.table(table(Male))))
## call :
## with.mids(data = imp, expr = (prop.table(table(Male))))
##
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay",
## "Bi", "Male")], m = 10, method = "logreg")
##
## nmis :
## HIVtest Hispanic Black Gay Bi Male
## 772 395 395 142 142 16945
##
## analyses :
## [[1]]
## Male
## 0 1
## 0.5507 0.4493
##
## [[2]]
## Male
## 0 1
## 0.55125 0.44875
##
## [[3]]
## Male
## 0 1
## 0.5613 0.4387
##
## [[4]]
## Male
## 0 1
## 0.5648 0.4352
##
## [[5]]
## Male
## 0 1
## 0.5565 0.4435
##
## [[6]]
## Male
## 0 1
## 0.5558 0.4442
##
## [[7]]
## Male
## 0 1
## 0.5677 0.4323
##
## [[8]]
## Male
## 0 1
## 0.5497 0.4503
##
## [[9]]
## Male
## 0 1
## 0.555 0.445
##
## [[10]]
## Male
## 0 1
## 0.55825 0.44175
est.p<-pool(fit.HIV)
print(est.p)
## Class: mipo m = 10
## term m estimate ubar b t dfcom
## 1 (Intercept) 10 -0.8066149 0.0004677443 1.191558e-03 0.001778458 19994
## 2 Hispanic 10 0.6207687 0.0025943622 1.767635e-04 0.002788802 19994
## 3 Black 10 0.8739436 0.0029197620 9.526886e-05 0.003024558 19994
## 4 Gay 10 1.8855324 0.0484780009 3.649747e-03 0.052492723 19994
## 5 Bi 10 0.9010028 0.0448345591 1.223437e-02 0.058292368 19994
## 6 Male 10 -0.2072016 0.0009753271 6.364741e-03 0.007976542 19994
## df riv lambda fmi
## 1 16.51762 2.80220180 0.73699450 0.76394506
## 2 1683.80836 0.07494707 0.06972164 0.07082464
## 3 5399.43335 0.03589188 0.03464829 0.03500566
## 4 1420.25728 0.08281533 0.07648149 0.07777924
## 5 167.02212 0.30016598 0.23086743 0.23991487
## 6 11.62665 7.17832501 0.87772557 0.89444498
summary(est.p)
## term estimate std.error statistic df p.value
## 1 (Intercept) -0.8066149 0.04217177 -19.126893 16.51762 1.060929e-12
## 2 Hispanic 0.6207687 0.05280911 11.754955 1683.80836 0.000000e+00
## 3 Black 0.8739436 0.05499598 15.891046 5399.43335 0.000000e+00
## 4 Gay 1.8855324 0.22911290 8.229708 1420.25728 4.440892e-16
## 5 Bi 0.9010028 0.24143813 3.731817 167.02212 2.602868e-04
## 6 Male -0.2072016 0.08931149 -2.319989 11.62665 3.940558e-02
lam<-data.frame(lam=est.p$pooled$lambda, param=row.names(est.p$pooled))
ggplot(data=lam,aes(x=param, y=lam))+geom_col()+theme(axis.text.x = element_text(angle = 45, hjust = 1))
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bnm<-BRFSS19%>%
select(HIVtest,Hispanic,Black,Gay,Bi,Male)%>%
filter(complete.cases(.))%>%
as.data.frame()
summary(glm(HIVtest~Hispanic+Black+Gay+Bi+Male, family = binomial, data = bnm))
##
## Call:
## glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial,
## data = bnm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8705 -0.8379 -0.7631 1.3339 1.6589
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.86607 0.05711 -15.164 < 2e-16 ***
## Hispanic 0.50532 0.16491 3.064 0.00218 **
## Black 0.97435 0.15693 6.209 5.34e-10 ***
## Gay 2.13807 0.41181 5.192 2.08e-07 ***
## Bi 0.92634 0.41879 2.212 0.02697 *
## Male -0.21884 0.08469 -2.584 0.00976 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3522.1 on 2863 degrees of freedom
## Residual deviance: 3434.9 on 2858 degrees of freedom
## AIC: 3446.9
##
## Number of Fisher Scoring iterations: 4
Modal.HIV<-glm(HIV.imp~His.imp+Blk.imp+Gay.imp+Bi.imp+Male.imp,family = binomial, data = BRFSS19)
summary(Modal.HIV)
##
## Call:
## glm(formula = HIV.imp ~ His.imp + Blk.imp + Gay.imp + Bi.imp +
## Male.imp, family = binomial, data = BRFSS19)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8705 -0.8379 -0.7631 1.3339 1.6589
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.86607 0.05711 -15.164 < 2e-16 ***
## His.imp1 0.50532 0.16491 3.064 0.00218 **
## Blk.imp1 0.97435 0.15693 6.209 5.34e-10 ***
## Gay.imp1 2.13807 0.41181 5.192 2.08e-07 ***
## Bi.imp1 0.92634 0.41879 2.212 0.02697 *
## Male.imp1 -0.21884 0.08469 -2.584 0.00976 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3522.1 on 2863 degrees of freedom
## Residual deviance: 3434.9 on 2858 degrees of freedom
## (17136 observations deleted due to missingness)
## AIC: 3446.9
##
## Number of Fisher Scoring iterations: 4
fit1<-glm(HIVtest~Hispanic+Black+Gay+Bi+Male, family = binomial, data = bnm)
summary(fit1)
##
## Call:
## glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial,
## data = bnm)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8705 -0.8379 -0.7631 1.3339 1.6589
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.86607 0.05711 -15.164 < 2e-16 ***
## Hispanic 0.50532 0.16491 3.064 0.00218 **
## Black 0.97435 0.15693 6.209 5.34e-10 ***
## Gay 2.13807 0.41181 5.192 2.08e-07 ***
## Bi 0.92634 0.41879 2.212 0.02697 *
## Male -0.21884 0.08469 -2.584 0.00976 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3522.1 on 2863 degrees of freedom
## Residual deviance: 3434.9 on 2858 degrees of freedom
## AIC: 3446.9
##
## Number of Fisher Scoring iterations: 4
fit.imp<-glm(HIVtest~Hispanic+Black+Gay+Bi+Male, family = binomial, data = dat.imp)
summary(fit.imp)
##
## Call:
## glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial,
## data = dat.imp)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9616 -0.8561 -0.7921 1.3423 1.6199
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.81498 0.02180 -37.390 < 2e-16 ***
## Hispanic 0.61863 0.05076 12.187 < 2e-16 ***
## Black 0.87280 0.05396 16.175 < 2e-16 ***
## Gay 1.89175 0.21916 8.632 < 2e-16 ***
## Bi 0.99227 0.21317 4.655 3.24e-06 ***
## Male -0.18342 0.03120 -5.878 4.15e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25127 on 19999 degrees of freedom
## Residual deviance: 24622 on 19994 degrees of freedom
## AIC: 24634
##
## Number of Fisher Scoring iterations: 4
Five categorical variables were selected from the BRFSS19: HIVtest (outcome variable) Hispanic + Black + Gay + Bi + Male (predictor variables). A random sample of 25,00 cases was subset from the BRFSS19 for imputation. Two methods were used: modal imputation and multiple imputations. For the multiple imputations, 10 iterations were specified. Modal imputations were conducted across the HIVtest ~ Hispanic + Black + Gay + Bi + Male. All betas were statistically significant. Multiple imputations were also conducted across the HIVtest ~ Hispanic + Black + Gay + Bi + Male. All betas were statistically significant. Results showed that the modal imputation improved the model fit based on the AIC = 3446.9 compared to the original dataset AIC = 24634. Multiple imputations also improved the model fit based on the AIC = 3446.9 compared to the original dataset. Modal imputation and multiple imputations yielded similar results betas and AICs.