library(car)
library(mice)
library(ggplot2)
library(haven)
library(readr)
BRFSS19<-read_xpt("C:/Users/adolp/Desktop/Statistics 2/BRFSS/LLCP2019.XPT")
set.seed(1234)
samp<-sample(1:dim(BRFSS19)[1], size = 20000) #smaller sample for brevity
BRFSS19<-BRFSS19[samp,]
#Fix BRFSS names
nams<-names(BRFSS19)
head(BRFSS19, n=10)
## # A tibble: 10 x 342
##    `_STATE` FMONTH IDATE   IMONTH IDAY  IYEAR DISPCODE SEQNO     `_PSU` CTELENM1
##       <dbl>  <dbl> <chr>   <chr>  <chr> <chr>    <dbl> <chr>      <dbl>    <dbl>
##  1       31      8 110320~ 11     03    2019      1200 201900~   2.02e9       NA
##  2       18      9 091220~ 09     12    2019      1100 201900~   2.02e9        1
##  3       41      1 020820~ 02     08    2019      1100 201900~   2.02e9       NA
##  4       56      3 030420~ 03     04    2019      1100 201900~   2.02e9       NA
##  5       39     12 120320~ 12     03    2019      1100 201901~   2.02e9       NA
##  6       20     10 111320~ 11     13    2019      1100 201900~   2.02e9        1
##  7       53      1 013120~ 01     31    2019      1100 201900~   2.02e9        1
##  8       48      9 092820~ 09     28    2019      1100 201900~   2.02e9        1
##  9       48      1 021020~ 02     10    2019      1100 201900~   2.02e9        1
## 10       48      2 021820~ 02     18    2019      1100 201900~   2.02e9       NA
## # ... with 332 more variables: PVTRESD1 <dbl>, COLGHOUS <dbl>, STATERE1 <dbl>,
## #   CELPHONE <dbl>, LADULT1 <dbl>, COLGSEX <dbl>, NUMADULT <dbl>,
## #   LANDSEX <dbl>, NUMMEN <dbl>, NUMWOMEN <dbl>, RESPSLCT <dbl>,
## #   SAFETIME <dbl>, CTELNUM1 <dbl>, CELLFON5 <dbl>, CADULT1 <dbl>,
## #   CELLSEX <dbl>, PVTRESD3 <dbl>, CCLGHOUS <dbl>, CSTATE1 <dbl>,
## #   LANDLINE <dbl>, HHADULT <dbl>, SEXVAR <dbl>, GENHLTH <dbl>, PHYSHLTH <dbl>,
## #   MENTHLTH <dbl>, POORHLTH <dbl>, HLTHPLN1 <dbl>, PERSDOC2 <dbl>,
## #   MEDCOST <dbl>, CHECKUP1 <dbl>, BPHIGH4 <dbl>, BPMEDS <dbl>, CHOLCHK2 <dbl>,
## #   TOLDHI2 <dbl>, CHOLMED2 <dbl>, CVDINFR4 <dbl>, CVDCRHD4 <dbl>,
## #   CVDSTRK3 <dbl>, ASTHMA3 <dbl>, ASTHNOW <dbl>, CHCSCNCR <dbl>,
## #   CHCOCNCR <dbl>, CHCCOPD2 <dbl>, ADDEPEV3 <dbl>, CHCKDNY2 <dbl>,
## #   DIABETE4 <dbl>, DIABAGE3 <dbl>, HAVARTH4 <dbl>, ARTHEXER <dbl>,
## #   ARTHEDU <dbl>, LMTJOIN3 <dbl>, ARTHDIS2 <dbl>, JOINPAI2 <dbl>,
## #   MARITAL <dbl>, EDUCA <dbl>, RENTHOM1 <dbl>, NUMHHOL3 <dbl>, NUMPHON3 <dbl>,
## #   CPDEMO1B <dbl>, VETERAN3 <dbl>, EMPLOY1 <dbl>, CHILDREN <dbl>,
## #   INCOME2 <dbl>, WEIGHT2 <dbl>, HEIGHT3 <dbl>, PREGNANT <dbl>, DEAF <dbl>,
## #   BLIND <dbl>, DECIDE <dbl>, DIFFWALK <dbl>, DIFFDRES <dbl>, DIFFALON <dbl>,
## #   SMOKE100 <dbl>, SMOKDAY2 <dbl>, STOPSMK2 <dbl>, LASTSMK2 <dbl>,
## #   USENOW3 <dbl>, ALCDAY5 <dbl>, AVEDRNK3 <dbl>, DRNK3GE5 <dbl>,
## #   MAXDRNKS <dbl>, EXERANY2 <dbl>, EXRACT11 <dbl>, EXEROFT1 <dbl>,
## #   EXERHMM1 <dbl>, EXRACT21 <dbl>, EXEROFT2 <dbl>, EXERHMM2 <dbl>,
## #   STRENGTH <dbl>, FRUIT2 <dbl>, FRUITJU2 <dbl>, FVGREEN1 <dbl>,
## #   FRENCHF1 <dbl>, POTATOE1 <dbl>, VEGETAB2 <dbl>, FLUSHOT7 <dbl>,
## #   FLSHTMY3 <dbl>, TETANUS1 <dbl>, PNEUVAC4 <dbl>, HIVTST7 <dbl>, ...
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(BRFSS19)<-newnames
#Recode and Dummy coded variables
BRFSS19$Male<-recode(BRFSS19$birthsex, recodes = "7:9=NA; 1=1;2=0")

BRFSS19$Age1824<-recode(BRFSS19$ageg, recodes="1=1; else=0")
BRFSS19$Age2534<-recode(BRFSS19$ageg, recodes="2=1;else=0")
BRFSS19$Age3544<-recode(BRFSS19$ageg, recodes="3=1;else=0")
BRFSS19$Age4554<-recode(BRFSS19$ageg, recodes="4=1;else=0")
BRFSS19$Age5564<-recode(BRFSS19$ageg, recodes="5=1;else=0")
BRFSS19$Age65<-recode(BRFSS19$ageg, recodes="6=1;else=0")

BRFSS19$Married<-recode(BRFSS19$marital, recodes="9=NA; 1=1; else=0")
BRFSS19$Divorced<-recode(BRFSS19$marital, recodes="9=NA; 2=1; else=0")
BRFSS19$Widowed<-recode(BRFSS19$marital, recodes="9=NA; 3=1; else=0")
BRFSS19$Seperated<-recode(BRFSS19$marital, recodes="9=NA; 4=1; else=0")
BRFSS19$Never<-recode(BRFSS19$marital, recodes="9=NA; 5=1; else=0")
BRFSS19$Unmarried<-recode(BRFSS19$marital, recodes="9=NA; 6=1; else=0")

BRFSS19$NoHS<-recode(BRFSS19$educag, recodes="9=NA; 1=1; else=0")
BRFSS19$HS<-recode(BRFSS19$educag, recodes="9=NA; 2=1; else=0")
BRFSS19$College<-recode(BRFSS19$educag, recodes="9=NA; 3=1; else=0")
BRFSS19$GradCollege<-recode(BRFSS19$educag, recodes="9=NA; 4=1; else=0")

BRFSS19$Employed<-recode(BRFSS19$employ1,recodes="9=NA; 1=1; else=0")
BRFSS19$SelfEmployed<-recode(BRFSS19$employ1,recodes="9=NA; 2=1; else=0")
BRFSS19$NotEmployed<-recode(BRFSS19$employ1,recodes="9=NA; 3=1; 4=1; else=0")
BRFSS19$Student<-recode(BRFSS19$employ1,recodes="9=NA; 6=1; else=0")
BRFSS19$Retired<-recode(BRFSS19$employ1,recodes="9=NA; 7=1; else=0")
BRFSS19$Unabletowork<-recode(BRFSS19$employ1,recodes="9=NA; 8=1; else=0")
BRFSS19$Homemaker<-recode(BRFSS19$employ1,recodes="9=NA; 5=1; else=0")

BRFSS19$Black<-recode(BRFSS19$racegr3, recodes="2=1; 9=NA; else=0")
BRFSS19$White<-recode(BRFSS19$racegr3, recodes="1=1; 9=NA; else=0")
BRFSS19$Other<-recode(BRFSS19$racegr3, recodes="3:4=1; 9=NA; else=0")
BRFSS19$Hispanic<-recode(BRFSS19$racegr3, recodes="5=1; 9=NA; else=0")

BRFSS19$Income15K<-recode(BRFSS19$incomg,recodes="9=NA; 1=1; else=0")
BRFSS19$Income1525K<-recode(BRFSS19$incomg,recodes="9=NA; 2=1; else=0")
BRFSS19$Income2535K<-recode(BRFSS19$incomg,recodes="9=NA; 3=1; else=0")
BRFSS19$Income3550K<-recode(BRFSS19$incomg,recodes="9=NA; 4=1; else=0")
BRFSS19$Income50K<-recode(BRFSS19$incomg,recodes="9=NA; 5=1; else=0")

BRFSS19$Gay<-recode(BRFSS19$somale,recodes="7:9=NA; 1=1; else=0")
BRFSS19$Straight<-recode(BRFSS19$somale,recodes="7:9=NA; 2=1; else=0")
BRFSS19$Bi<-recode(BRFSS19$somale,recodes="7:9=NA; 3=1; else=0")
BRFSS19$Else<-recode(BRFSS19$somale,recodes="7:9=NA; 4=1; else=0")

BRFSS19$HIVtest<-recode(BRFSS19$aidtst4,recodes="7:9=NA; 1=1; else=0")
summary(BRFSS19[, c("HIVtest", "Hispanic",  "Black", "Gay", "Bi",  "Male")])
##     HIVtest          Hispanic          Black             Gay         
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.3218   Mean   :0.0891   Mean   :0.0767   Mean   :0.00549  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##  NA's   :772      NA's   :395      NA's   :395      NA's   :142      
##        Bi               Male      
##  Min.   :0.00000   Min.   :0.000  
##  1st Qu.:0.00000   1st Qu.:0.000  
##  Median :0.00000   Median :0.000  
##  Mean   :0.00458   Mean   :0.452  
##  3rd Qu.:0.00000   3rd Qu.:1.000  
##  Max.   :1.00000   Max.   :1.000  
##  NA's   :142       NA's   :16945

Modal Imputation

table(BRFSS19$HIVtest)
## 
##     0     1 
## 13040  6188
#find the most common value
mcv.HIVtest<-factor(names(which.max(table(BRFSS19$HIVtest))), levels=levels(BRFSS19$HIVtest))
mcv.HIVtest
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$HIVtest.imp<-as.factor(ifelse(is.na(BRFSS19$HIVtest)==T, mcv.HIVtest, BRFSS19$HIVtest))
levels(BRFSS19$HIVtest.imp)<-levels(as.factor(BRFSS19$HIVtest))
prop.table(table(BRFSS19$HIVtest))
## 
##         0         1 
## 0.6781777 0.3218223
prop.table(table(BRFSS19$HIVtest.imp))
## 
##         0         1 
## 0.6781777 0.3218223
barplot(prop.table(table(BRFSS19$HIVtest)), main="Original Data", ylim=c(0, .6))

barplot(prop.table(table(BRFSS19$HIVtest.imp)), main="Imputed Data",ylim=c(0, .6))

table(BRFSS19$HIVtest)
## 
##     0     1 
## 13040  6188
#find the most common value
mcv.HIV<-factor(names(which.max(table(BRFSS19$HIVtest))), levels = levels(BRFSS19$HIVtest))
mcv.HIV
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$HIV.imp<-as.factor(ifelse(is.na(BRFSS19$HIVtest)==T, mcv.HIV, BRFSS19$HIVtest))
levels(BRFSS19$HIV.imp)<-levels(as.factor(BRFSS19$HIVtest))

prop.table(table(BRFSS19$HIVtest))
## 
##         0         1 
## 0.6781777 0.3218223
prop.table(table(BRFSS19$HIV.imp))
## 
##         0         1 
## 0.6781777 0.3218223
barplot(prop.table(table(BRFSS19$HIVtest)), main="Original Data", ylim=c(0, .6))

barplot(prop.table(table(BRFSS19$HIV.imp)), main="Imputed Data", ylim=c(0, .6))

table(BRFSS19$Hispanic)
## 
##     0     1 
## 17858  1747
#find the most common value
mcv.His<-factor(names(which.max(table(BRFSS19$Hispanic))), levels = levels(BRFSS19$Hispanic))
mcv.His
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$His.imp<-as.factor(ifelse(is.na(BRFSS19$Hispanic)==T, mcv.His, BRFSS19$Hispanic))
levels(BRFSS19$His.imp)<-levels(as.factor(BRFSS19$Hispanic))

prop.table(table(BRFSS19$HIVtest))
## 
##         0         1 
## 0.6781777 0.3218223
prop.table(table(BRFSS19$His.imp))
## 
##          0          1 
## 0.91089008 0.08910992
barplot(prop.table(table(BRFSS19$HIVtest)), main="Original Data", ylim=c(0, .6))

barplot(prop.table(table(BRFSS19$His.imp)), main="Imputed Data", ylim=c(0, .6))

table(BRFSS19$Black)
## 
##     0     1 
## 18102  1503
#find the most common value
mcv.Blk<-factor(names(which.max(table(BRFSS19$Black))), levels = levels(BRFSS19$Black))
mcv.Blk
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$Blk.imp<-as.factor(ifelse(is.na(BRFSS19$Black)==T, mcv.Blk, BRFSS19$Black))
levels(BRFSS19$Blk.imp)<-levels(as.factor(BRFSS19$Black))

prop.table(table(BRFSS19$Black))
## 
##          0          1 
## 0.92333588 0.07666412
prop.table(table(BRFSS19$Blk.imp))
## 
##          0          1 
## 0.92333588 0.07666412
barplot(prop.table(table(BRFSS19$Black)), main="Original Data", ylim=c(0, .6))

barplot(prop.table(table(BRFSS19$Blk.imp)), main="Imputed Data", ylim=c(0, .6))

table(BRFSS19$Gay)
## 
##     0     1 
## 19749   109
#find the most common value
mcv.Gay<-factor(names(which.max(table(BRFSS19$Gay))), levels = levels(BRFSS19$Gay))
mcv.Gay
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$Gay.imp<-as.factor(ifelse(is.na(BRFSS19$Gay)==T, mcv.Gay, BRFSS19$Gay))
levels(BRFSS19$Gay.imp)<-levels(as.factor(BRFSS19$Gay))

prop.table(table(BRFSS19$Gay))
## 
##           0           1 
## 0.994511028 0.005488972
prop.table(table(BRFSS19$Gay.imp))
## 
##           0           1 
## 0.994511028 0.005488972
barplot(prop.table(table(BRFSS19$Gay)), main="Original Data", ylim=c(0, .6))

barplot(prop.table(table(BRFSS19$Gay.imp)), main="Imputed Data", ylim=c(0, .6))

table(BRFSS19$Bi)
## 
##     0     1 
## 19767    91
#find the most common value
mcv.Bi<-factor(names(which.max(table(BRFSS19$Bi))), levels = levels(BRFSS19$Bi))
mcv.Bi
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$Bi.imp<-as.factor(ifelse(is.na(BRFSS19$Bi)==T, mcv.Bi, BRFSS19$Bi))
levels(BRFSS19$Bi.imp)<-levels(as.factor(BRFSS19$Bi))

prop.table(table(BRFSS19$Bi))
## 
##           0           1 
## 0.995417464 0.004582536
prop.table(table(BRFSS19$Bi.imp))
## 
##           0           1 
## 0.995417464 0.004582536
barplot(prop.table(table(BRFSS19$Bi)), main="Original Data", ylim=c(0, .6))

barplot(prop.table(table(BRFSS19$Bi.imp)), main="Imputed Data", ylim=c(0, .6))

table(BRFSS19$Male)
## 
##    0    1 
## 1674 1381
#find the most common value
mcv.Male<-factor(names(which.max(table(BRFSS19$Male))), levels = levels(BRFSS19$Male))
mcv.Male
## [1] <NA>
## Levels:
#impute the cases
BRFSS19$Male.imp<-as.factor(ifelse(is.na(BRFSS19$Male)==T, mcv.Male, BRFSS19$Male))
levels(BRFSS19$Male.imp)<-levels(as.factor(BRFSS19$Male))

prop.table(table(BRFSS19$Male))
## 
##         0         1 
## 0.5479542 0.4520458
prop.table(table(BRFSS19$Male.imp))
## 
##         0         1 
## 0.5479542 0.4520458
barplot(prop.table(table(BRFSS19$Male)), main="Original Data", ylim=c(0, .6))

barplot(prop.table(table(BRFSS19$Male.imp)), main="Imputed Data", ylim=c(0, .6))

Modal.HIV<-glm(HIV.imp~His.imp+Blk.imp+Gay.imp+Bi.imp+Male.imp,family = binomial, data = BRFSS19)
Modal.HIV  
## 
## Call:  glm(formula = HIV.imp ~ His.imp + Blk.imp + Gay.imp + Bi.imp + 
##     Male.imp, family = binomial, data = BRFSS19)
## 
## Coefficients:
## (Intercept)     His.imp1     Blk.imp1     Gay.imp1      Bi.imp1    Male.imp1  
##     -0.8661       0.5053       0.9743       2.1381       0.9263      -0.2188  
## 
## Degrees of Freedom: 2863 Total (i.e. Null);  2858 Residual
##   (17136 observations deleted due to missingness)
## Null Deviance:       3522 
## Residual Deviance: 3435  AIC: 3447

Testing for MAR

#look at the patterns of missingness
md.pattern(BRFSS19[,c("HIVtest", "Hispanic",  "Black", "Gay", "Bi",  "Male")])

##       Gay  Bi Hispanic Black HIVtest  Male      
## 2864    1   1        1     1       1     1     0
## 15883   1   1        1     1       1     0     1
## 94      1   1        1     1       0     1     1
## 644     1   1        1     1       0     0     2
## 57      1   1        0     0       1     1     2
## 298     1   1        0     0       1     0     3
## 4       1   1        0     0       0     1     3
## 14      1   1        0     0       0     0     4
## 25      0   0        1     1       1     1     2
## 83      0   0        1     1       1     0     3
## 3       0   0        1     1       0     1     3
## 9       0   0        1     1       0     0     4
## 7       0   0        0     0       1     1     4
## 11      0   0        0     0       1     0     5
## 1       0   0        0     0       0     1     5
## 3       0   0        0     0       0     0     6
##       142 142      395   395     772 16945 18791
md.pairs(BRFSS19[,c("HIVtest", "Hispanic",  "Black", "Gay", "Bi",  "Male")])
## $rr
##          HIVtest Hispanic Black   Gay    Bi Male
## HIVtest    19228    18855 18855 19102 19102 2953
## Hispanic   18855    19605 19605 19485 19485 2986
## Black      18855    19605 19605 19485 19485 2986
## Gay        19102    19485 19485 19858 19858 3019
## Bi         19102    19485 19485 19858 19858 3019
## Male        2953     2986  2986  3019  3019 3055
## 
## $rm
##          HIVtest Hispanic Black Gay  Bi  Male
## HIVtest        0      373   373 126 126 16275
## Hispanic     750        0     0 120 120 16619
## Black        750        0     0 120 120 16619
## Gay          756      373   373   0   0 16839
## Bi           756      373   373   0   0 16839
## Male         102       69    69  36  36     0
## 
## $mr
##          HIVtest Hispanic Black   Gay    Bi Male
## HIVtest        0      750   750   756   756  102
## Hispanic     373        0     0   373   373   69
## Black        373        0     0   373   373   69
## Gay          126      120   120     0     0   36
## Bi           126      120   120     0     0   36
## Male       16275    16619 16619 16839 16839    0
## 
## $mm
##          HIVtest Hispanic Black Gay  Bi  Male
## HIVtest      772       22    22  16  16   670
## Hispanic      22      395   395  22  22   326
## Black         22      395   395  22  22   326
## Gay           16       22    22 142 142   106
## Bi            16       22    22 142 142   106
## Male         670      326   326 106 106 16945
imp<-mice(data = BRFSS19[,c("HIVtest", "Hispanic",  "Black", "Gay", "Bi",  "Male")], m = 10, method = "logreg")
## Warning: Type mismatch for variable(s): HIVtest
## Imputation method logreg is for categorical data.
## Warning: Type mismatch for variable(s): Hispanic
## Imputation method logreg is for categorical data.
## Warning: Type mismatch for variable(s): Black
## Imputation method logreg is for categorical data.
## Warning: Type mismatch for variable(s): Gay
## Imputation method logreg is for categorical data.
## Warning: Type mismatch for variable(s): Bi
## Imputation method logreg is for categorical data.
## Warning: Type mismatch for variable(s): Male
## Imputation method logreg is for categorical data.
## 
##  iter imp variable
##   1   1  HIVtest  Hispanic  Black  Gay  Bi  Male
##   1   2  HIVtest  Hispanic  Black  Gay  Bi  Male
##   1   3  HIVtest  Hispanic  Black  Gay  Bi  Male
##   1   4  HIVtest  Hispanic  Black  Gay  Bi  Male
##   1   5  HIVtest  Hispanic  Black  Gay  Bi  Male
##   1   6  HIVtest  Hispanic  Black  Gay  Bi  Male
##   1   7  HIVtest  Hispanic  Black  Gay  Bi  Male
##   1   8  HIVtest  Hispanic  Black  Gay  Bi  Male
##   1   9  HIVtest  Hispanic  Black  Gay  Bi  Male
##   1   10  HIVtest  Hispanic  Black  Gay  Bi  Male
##   2   1  HIVtest  Hispanic  Black  Gay  Bi  Male
##   2   2  HIVtest  Hispanic  Black  Gay  Bi  Male
##   2   3  HIVtest  Hispanic  Black  Gay  Bi  Male
##   2   4  HIVtest  Hispanic  Black  Gay  Bi  Male
##   2   5  HIVtest  Hispanic  Black  Gay  Bi  Male
##   2   6  HIVtest  Hispanic  Black  Gay  Bi  Male
##   2   7  HIVtest  Hispanic  Black  Gay  Bi  Male
##   2   8  HIVtest  Hispanic  Black  Gay  Bi  Male
##   2   9  HIVtest  Hispanic  Black  Gay  Bi  Male
##   2   10  HIVtest  Hispanic  Black  Gay  Bi  Male
##   3   1  HIVtest  Hispanic  Black  Gay  Bi  Male
##   3   2  HIVtest  Hispanic  Black  Gay  Bi  Male
##   3   3  HIVtest  Hispanic  Black  Gay  Bi  Male
##   3   4  HIVtest  Hispanic  Black  Gay  Bi  Male
##   3   5  HIVtest  Hispanic  Black  Gay  Bi  Male
##   3   6  HIVtest  Hispanic  Black  Gay  Bi  Male
##   3   7  HIVtest  Hispanic  Black  Gay  Bi  Male
##   3   8  HIVtest  Hispanic  Black  Gay  Bi  Male
##   3   9  HIVtest  Hispanic  Black  Gay  Bi  Male
##   3   10  HIVtest  Hispanic  Black  Gay  Bi  Male
##   4   1  HIVtest  Hispanic  Black  Gay  Bi  Male
##   4   2  HIVtest  Hispanic  Black  Gay  Bi  Male
##   4   3  HIVtest  Hispanic  Black  Gay  Bi  Male
##   4   4  HIVtest  Hispanic  Black  Gay  Bi  Male
##   4   5  HIVtest  Hispanic  Black  Gay  Bi  Male
##   4   6  HIVtest  Hispanic  Black  Gay  Bi  Male
##   4   7  HIVtest  Hispanic  Black  Gay  Bi  Male
##   4   8  HIVtest  Hispanic  Black  Gay  Bi  Male
##   4   9  HIVtest  Hispanic  Black  Gay  Bi  Male
##   4   10  HIVtest  Hispanic  Black  Gay  Bi  Male
##   5   1  HIVtest  Hispanic  Black  Gay  Bi  Male
##   5   2  HIVtest  Hispanic  Black  Gay  Bi  Male
##   5   3  HIVtest  Hispanic  Black  Gay  Bi  Male
##   5   4  HIVtest  Hispanic  Black  Gay  Bi  Male
##   5   5  HIVtest  Hispanic  Black  Gay  Bi  Male
##   5   6  HIVtest  Hispanic  Black  Gay  Bi  Male
##   5   7  HIVtest  Hispanic  Black  Gay  Bi  Male
##   5   8  HIVtest  Hispanic  Black  Gay  Bi  Male
##   5   9  HIVtest  Hispanic  Black  Gay  Bi  Male
##   5   10  HIVtest  Hispanic  Black  Gay  Bi  Male
print(imp)
## Class: mids
## Number of multiple imputations:  10 
## Imputation methods:
##  HIVtest Hispanic    Black      Gay       Bi     Male 
## "logreg" "logreg" "logreg" "logreg" "logreg" "logreg" 
## PredictorMatrix:
##          HIVtest Hispanic Black Gay Bi Male
## HIVtest        0        1     1   1  1    1
## Hispanic       1        0     1   1  1    1
## Black          1        1     0   1  1    1
## Gay            1        1     1   0  1    1
## Bi             1        1     1   1  0    1
## Male           1        1     1   1  1    0
head(imp$imp$HIVtest)
##     1 2 3 4 5 6 7 8 9 10
## 36  0 0 0 0 0 0 0 0 0  0
## 60  0 1 0 1 0 0 0 0 0  0
## 107 1 1 0 1 0 0 0 0 0  0
## 136 0 0 0 0 1 1 0 0 0  0
## 232 0 0 1 0 0 1 1 1 0  0
## 274 0 1 0 0 1 0 0 0 0  0
summary(imp$imp$HIVtest)
##        1                2                3                4         
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.3199   Mean   :0.3057   Mean   :0.3303   Mean   :0.2979  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##        5                6                7                8         
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.3148   Mean   :0.3212   Mean   :0.2979   Mean   :0.3109  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##        9               10        
##  Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:0.0000  
##  Median :0.000   Median :0.0000  
##  Mean   :0.329   Mean   :0.3096  
##  3rd Qu.:1.000   3rd Qu.:1.0000  
##  Max.   :1.000   Max.   :1.0000
library(lattice)
stripplot(imp,HIVtest~Hispanic|.imp, pch=20)

dat.imp<-complete(imp, action = 1)
head(dat.imp, n=10)
##    HIVtest Hispanic Black Gay Bi Male
## 1        0        0     0   0  0    1
## 2        1        0     0   0  0    1
## 3        0        0     0   0  0    1
## 4        0        0     0   0  0    0
## 5        0        0     0   0  0    0
## 6        0        0     0   0  0    0
## 7        1        0     0   0  0    0
## 8        0        1     0   0  0    1
## 9        0        0     0   0  0    0
## 10       1        0     0   0  0    1
#Compare to the original data
head(BRFSS19[,c("HIVtest", "Hispanic",  "Black", "Gay", "Bi",  "Male")], n=10)
## # A tibble: 10 x 6
##    HIVtest Hispanic Black   Gay    Bi  Male
##      <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl>
##  1       0        0     0     0     0    NA
##  2       1        0     0     0     0    NA
##  3       0        0     0     0     0    NA
##  4       0        0     0     0     0    NA
##  5       0        0     0     0     0    NA
##  6       0        0     0     0     0    NA
##  7       1        0     0     0     0    NA
##  8       0        1     0     0     0    NA
##  9       0        0     0     0     0    NA
## 10       1        0     0     0     0    NA
#compare the original data to the imputed
head(dat.imp[is.na(BRFSS19$HIVtest)==T,], n=10)
##     HIVtest Hispanic Black Gay Bi Male
## 36        0        0     0   0  0    1
## 60        0        0     0   0  0    1
## 107       1        0     0   0  0    1
## 136       0        0     0   0  0    1
## 232       0        0     0   0  0    0
## 274       0        0     0   0  0    0
## 275       0        0     0   0  0    1
## 317       0        0     0   0  0    1
## 372       0        0     1   0  0    0
## 374       0        0     0   0  0    0
head(BRFSS19[is.na(BRFSS19$HIVtest)==T,c("HIVtest", "Hispanic",  "Black", "Gay", "Bi",  "Male")], n=10)
## # A tibble: 10 x 6
##    HIVtest Hispanic Black   Gay    Bi  Male
##      <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl>
##  1      NA        0     0     0     0    NA
##  2      NA        0     0     0     0    NA
##  3      NA        0     0     0     0    NA
##  4      NA        0     0     0     0    NA
##  5      NA        0     0     0     0    NA
##  6      NA        0     0     0     0    NA
##  7      NA        0     0     0     0    NA
##  8      NA        0     0    NA    NA     1
##  9      NA        0     1     0     0    NA
## 10      NA        0     0     0     0    NA
fit.HIV<-with(data=imp ,expr=glm(HIVtest~Hispanic+Black+Gay+Bi+Male, family = binomial))
fit.HIV                
## call :
## with.mids(data = imp, expr = glm(HIVtest ~ Hispanic + Black + 
##     Gay + Bi + Male, family = binomial))
## 
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay", 
##     "Bi", "Male")], m = 10, method = "logreg")
## 
## nmis :
##  HIVtest Hispanic    Black      Gay       Bi     Male 
##      772      395      395      142      142    16945 
## 
## analyses :
## [[1]]
## 
## Call:  glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
## 
## Coefficients:
## (Intercept)     Hispanic        Black          Gay           Bi         Male  
##     -0.8150       0.6186       0.8728       1.8917       0.9923      -0.1834  
## 
## Degrees of Freedom: 19999 Total (i.e. Null);  19994 Residual
## Null Deviance:       25130 
## Residual Deviance: 24620     AIC: 24630
## 
## [[2]]
## 
## Call:  glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
## 
## Coefficients:
## (Intercept)     Hispanic        Black          Gay           Bi         Male  
##     -0.7667       0.6283       0.8701       1.8971       0.7242      -0.2995  
## 
## Degrees of Freedom: 19999 Total (i.e. Null);  19994 Residual
## Null Deviance:       25110 
## Residual Deviance: 24550     AIC: 24560
## 
## [[3]]
## 
## Call:  glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
## 
## Coefficients:
## (Intercept)     Hispanic        Black          Gay           Bi         Male  
##     -0.7843       0.6166       0.8866       1.9696       0.9625      -0.2582  
## 
## Degrees of Freedom: 19999 Total (i.e. Null);  19994 Residual
## Null Deviance:       25140 
## Residual Deviance: 24600     AIC: 24610
## 
## [[4]]
## 
## Call:  glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
## 
## Coefficients:
## (Intercept)     Hispanic        Black          Gay           Bi         Male  
##     -0.8193       0.6046       0.8788       1.8067       0.9520      -0.1829  
## 
## Degrees of Freedom: 19999 Total (i.e. Null);  19994 Residual
## Null Deviance:       25100 
## Residual Deviance: 24600     AIC: 24620
## 
## [[5]]
## 
## Call:  glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
## 
## Coefficients:
## (Intercept)     Hispanic        Black          Gay           Bi         Male  
##     -0.8118       0.6398       0.8809       1.9002       0.7249      -0.1977  
## 
## Degrees of Freedom: 19999 Total (i.e. Null);  19994 Residual
## Null Deviance:       25120 
## Residual Deviance: 24610     AIC: 24620
## 
## [[6]]
## 
## Call:  glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
## 
## Coefficients:
## (Intercept)     Hispanic        Black          Gay           Bi         Male  
##     -0.8367       0.5965       0.8627       1.8161       0.9346      -0.1236  
## 
## Degrees of Freedom: 19999 Total (i.e. Null);  19994 Residual
## Null Deviance:       25130 
## Residual Deviance: 24660     AIC: 24670
## 
## [[7]]
## 
## Call:  glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
## 
## Coefficients:
## (Intercept)     Hispanic        Black          Gay           Bi         Male  
##     -0.8134       0.6356       0.8637       1.8149       0.9195      -0.2026  
## 
## Degrees of Freedom: 19999 Total (i.e. Null);  19994 Residual
## Null Deviance:       25100 
## Residual Deviance: 24590     AIC: 24600
## 
## [[8]]
## 
## Call:  glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
## 
## Coefficients:
## (Intercept)     Hispanic        Black          Gay           Bi         Male  
##     -0.8433       0.6196       0.8784       1.9060       0.9585      -0.1218  
## 
## Degrees of Freedom: 19999 Total (i.e. Null);  19994 Residual
## Null Deviance:       25120 
## Residual Deviance: 24630     AIC: 24640
## 
## [[9]]
## 
## Call:  glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
## 
## Coefficients:
## (Intercept)     Hispanic        Black          Gay           Bi         Male  
##     -0.7360       0.6188       0.8594       1.9794       1.0382      -0.3668  
## 
## Degrees of Freedom: 19999 Total (i.e. Null);  19994 Residual
## Null Deviance:       25140 
## Residual Deviance: 24540     AIC: 24550
## 
## [[10]]
## 
## Call:  glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial)
## 
## Coefficients:
## (Intercept)     Hispanic        Black          Gay           Bi         Male  
##     -0.8396       0.6293       0.8861       1.8735       0.8034      -0.1355  
## 
## Degrees of Freedom: 19999 Total (i.e. Null);  19994 Residual
## Null Deviance:       25120 
## Residual Deviance: 24620     AIC: 24630
with (data=imp, exp=(prop.table(table(HIVtest))))
## call :
## with.mids(data = imp, expr = (prop.table(table(HIVtest))))
## 
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay", 
##     "Bi", "Male")], m = 10, method = "logreg")
## 
## nmis :
##  HIVtest Hispanic    Black      Gay       Bi     Male 
##      772      395      395      142      142    16945 
## 
## analyses :
## [[1]]
## HIVtest
##       0       1 
## 0.67825 0.32175 
## 
## [[2]]
## HIVtest
##      0      1 
## 0.6788 0.3212 
## 
## [[3]]
## HIVtest
##       0       1 
## 0.67785 0.32215 
## 
## [[4]]
## HIVtest
##      0      1 
## 0.6791 0.3209 
## 
## [[5]]
## HIVtest
##       0       1 
## 0.67845 0.32155 
## 
## [[6]]
## HIVtest
##      0      1 
## 0.6782 0.3218 
## 
## [[7]]
## HIVtest
##      0      1 
## 0.6791 0.3209 
## 
## [[8]]
## HIVtest
##      0      1 
## 0.6786 0.3214 
## 
## [[9]]
## HIVtest
##      0      1 
## 0.6779 0.3221 
## 
## [[10]]
## HIVtest
##       0       1 
## 0.67865 0.32135
with (data=imp, exp=(prop.table(table(Hispanic))))
## call :
## with.mids(data = imp, expr = (prop.table(table(Hispanic))))
## 
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay", 
##     "Bi", "Male")], m = 10, method = "logreg")
## 
## nmis :
##  HIVtest Hispanic    Black      Gay       Bi     Male 
##      772      395      395      142      142    16945 
## 
## analyses :
## [[1]]
## Hispanic
##       0       1 
## 0.91025 0.08975 
## 
## [[2]]
## Hispanic
##       0       1 
## 0.91085 0.08915 
## 
## [[3]]
## Hispanic
##       0       1 
## 0.91085 0.08915 
## 
## [[4]]
## Hispanic
##       0       1 
## 0.91105 0.08895 
## 
## [[5]]
## Hispanic
##       0       1 
## 0.91065 0.08935 
## 
## [[6]]
## Hispanic
##       0       1 
## 0.91055 0.08945 
## 
## [[7]]
## Hispanic
##     0     1 
## 0.911 0.089 
## 
## [[8]]
## Hispanic
##      0      1 
## 0.9109 0.0891 
## 
## [[9]]
## Hispanic
##      0      1 
## 0.9109 0.0891 
## 
## [[10]]
## Hispanic
##      0      1 
## 0.9108 0.0892
with (data=imp, exp=(prop.table(table(Black))))
## call :
## with.mids(data = imp, expr = (prop.table(table(Black))))
## 
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay", 
##     "Bi", "Male")], m = 10, method = "logreg")
## 
## nmis :
##  HIVtest Hispanic    Black      Gay       Bi     Male 
##      772      395      395      142      142    16945 
## 
## analyses :
## [[1]]
## Black
##      0      1 
## 0.9231 0.0769 
## 
## [[2]]
## Black
##       0       1 
## 0.92345 0.07655 
## 
## [[3]]
## Black
##       0       1 
## 0.92325 0.07675 
## 
## [[4]]
## Black
##      0      1 
## 0.9233 0.0767 
## 
## [[5]]
## Black
##      0      1 
## 0.9235 0.0765 
## 
## [[6]]
## Black
##       0       1 
## 0.92375 0.07625 
## 
## [[7]]
## Black
##       0       1 
## 0.92325 0.07675 
## 
## [[8]]
## Black
##      0      1 
## 0.9231 0.0769 
## 
## [[9]]
## Black
##       0       1 
## 0.92345 0.07655 
## 
## [[10]]
## Black
##      0      1 
## 0.9232 0.0768
with (data=imp, exp=(prop.table(table(Gay))))
## call :
## with.mids(data = imp, expr = (prop.table(table(Gay))))
## 
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay", 
##     "Bi", "Male")], m = 10, method = "logreg")
## 
## nmis :
##  HIVtest Hispanic    Black      Gay       Bi     Male 
##      772      395      395      142      142    16945 
## 
## analyses :
## [[1]]
## Gay
##      0      1 
## 0.9945 0.0055 
## 
## [[2]]
## Gay
##      0      1 
## 0.9945 0.0055 
## 
## [[3]]
## Gay
##       0       1 
## 0.99455 0.00545 
## 
## [[4]]
## Gay
##       0       1 
## 0.99455 0.00545 
## 
## [[5]]
## Gay
##      0      1 
## 0.9945 0.0055 
## 
## [[6]]
## Gay
##      0      1 
## 0.9945 0.0055 
## 
## [[7]]
## Gay
##      0      1 
## 0.9945 0.0055 
## 
## [[8]]
## Gay
##      0      1 
## 0.9945 0.0055 
## 
## [[9]]
## Gay
##       0       1 
## 0.99435 0.00565 
## 
## [[10]]
## Gay
##       0       1 
## 0.99445 0.00555
with (data=imp, exp=(prop.table(table(Bi))))
## call :
## with.mids(data = imp, expr = (prop.table(table(Bi))))
## 
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay", 
##     "Bi", "Male")], m = 10, method = "logreg")
## 
## nmis :
##  HIVtest Hispanic    Black      Gay       Bi     Male 
##      772      395      395      142      142    16945 
## 
## analyses :
## [[1]]
## Bi
##       0       1 
## 0.99545 0.00455 
## 
## [[2]]
## Bi
##      0      1 
## 0.9953 0.0047 
## 
## [[3]]
## Bi
##      0      1 
## 0.9954 0.0046 
## 
## [[4]]
## Bi
##       0       1 
## 0.99545 0.00455 
## 
## [[5]]
## Bi
##       0       1 
## 0.99535 0.00465 
## 
## [[6]]
## Bi
##      0      1 
## 0.9954 0.0046 
## 
## [[7]]
## Bi
##       0       1 
## 0.99545 0.00455 
## 
## [[8]]
## Bi
##       0       1 
## 0.99545 0.00455 
## 
## [[9]]
## Bi
##       0       1 
## 0.99535 0.00465 
## 
## [[10]]
## Bi
##      0      1 
## 0.9954 0.0046
with (data=imp, exp=(prop.table(table(Male))))
## call :
## with.mids(data = imp, expr = (prop.table(table(Male))))
## 
## call1 :
## mice(data = BRFSS19[, c("HIVtest", "Hispanic", "Black", "Gay", 
##     "Bi", "Male")], m = 10, method = "logreg")
## 
## nmis :
##  HIVtest Hispanic    Black      Gay       Bi     Male 
##      772      395      395      142      142    16945 
## 
## analyses :
## [[1]]
## Male
##      0      1 
## 0.5507 0.4493 
## 
## [[2]]
## Male
##       0       1 
## 0.55125 0.44875 
## 
## [[3]]
## Male
##      0      1 
## 0.5613 0.4387 
## 
## [[4]]
## Male
##      0      1 
## 0.5648 0.4352 
## 
## [[5]]
## Male
##      0      1 
## 0.5565 0.4435 
## 
## [[6]]
## Male
##      0      1 
## 0.5558 0.4442 
## 
## [[7]]
## Male
##      0      1 
## 0.5677 0.4323 
## 
## [[8]]
## Male
##      0      1 
## 0.5497 0.4503 
## 
## [[9]]
## Male
##     0     1 
## 0.555 0.445 
## 
## [[10]]
## Male
##       0       1 
## 0.55825 0.44175
est.p<-pool(fit.HIV)
print(est.p)
## Class: mipo    m = 10 
##          term  m   estimate         ubar            b           t dfcom
## 1 (Intercept) 10 -0.8066149 0.0004677443 1.191558e-03 0.001778458 19994
## 2    Hispanic 10  0.6207687 0.0025943622 1.767635e-04 0.002788802 19994
## 3       Black 10  0.8739436 0.0029197620 9.526886e-05 0.003024558 19994
## 4         Gay 10  1.8855324 0.0484780009 3.649747e-03 0.052492723 19994
## 5          Bi 10  0.9010028 0.0448345591 1.223437e-02 0.058292368 19994
## 6        Male 10 -0.2072016 0.0009753271 6.364741e-03 0.007976542 19994
##           df        riv     lambda        fmi
## 1   16.51762 2.80220180 0.73699450 0.76394506
## 2 1683.80836 0.07494707 0.06972164 0.07082464
## 3 5399.43335 0.03589188 0.03464829 0.03500566
## 4 1420.25728 0.08281533 0.07648149 0.07777924
## 5  167.02212 0.30016598 0.23086743 0.23991487
## 6   11.62665 7.17832501 0.87772557 0.89444498
summary(est.p)
##          term   estimate  std.error  statistic         df      p.value
## 1 (Intercept) -0.8066149 0.04217177 -19.126893   16.51762 1.060929e-12
## 2    Hispanic  0.6207687 0.05280911  11.754955 1683.80836 0.000000e+00
## 3       Black  0.8739436 0.05499598  15.891046 5399.43335 0.000000e+00
## 4         Gay  1.8855324 0.22911290   8.229708 1420.25728 4.440892e-16
## 5          Bi  0.9010028 0.24143813   3.731817  167.02212 2.602868e-04
## 6        Male -0.2072016 0.08931149  -2.319989   11.62665 3.940558e-02
lam<-data.frame(lam=est.p$pooled$lambda, param=row.names(est.p$pooled))

ggplot(data=lam,aes(x=param, y=lam))+geom_col()+theme(axis.text.x = element_text(angle = 45, hjust = 1))

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
bnm<-BRFSS19%>%
  select(HIVtest,Hispanic,Black,Gay,Bi,Male)%>%
  filter(complete.cases(.))%>%
  as.data.frame()

summary(glm(HIVtest~Hispanic+Black+Gay+Bi+Male, family = binomial, data =  bnm))
## 
## Call:
## glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial, 
##     data = bnm)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8705  -0.8379  -0.7631   1.3339   1.6589  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.86607    0.05711 -15.164  < 2e-16 ***
## Hispanic     0.50532    0.16491   3.064  0.00218 ** 
## Black        0.97435    0.15693   6.209 5.34e-10 ***
## Gay          2.13807    0.41181   5.192 2.08e-07 ***
## Bi           0.92634    0.41879   2.212  0.02697 *  
## Male        -0.21884    0.08469  -2.584  0.00976 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3522.1  on 2863  degrees of freedom
## Residual deviance: 3434.9  on 2858  degrees of freedom
## AIC: 3446.9
## 
## Number of Fisher Scoring iterations: 4
Modal.HIV<-glm(HIV.imp~His.imp+Blk.imp+Gay.imp+Bi.imp+Male.imp,family = binomial, data = BRFSS19)
summary(Modal.HIV)
## 
## Call:
## glm(formula = HIV.imp ~ His.imp + Blk.imp + Gay.imp + Bi.imp + 
##     Male.imp, family = binomial, data = BRFSS19)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8705  -0.8379  -0.7631   1.3339   1.6589  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.86607    0.05711 -15.164  < 2e-16 ***
## His.imp1     0.50532    0.16491   3.064  0.00218 ** 
## Blk.imp1     0.97435    0.15693   6.209 5.34e-10 ***
## Gay.imp1     2.13807    0.41181   5.192 2.08e-07 ***
## Bi.imp1      0.92634    0.41879   2.212  0.02697 *  
## Male.imp1   -0.21884    0.08469  -2.584  0.00976 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3522.1  on 2863  degrees of freedom
## Residual deviance: 3434.9  on 2858  degrees of freedom
##   (17136 observations deleted due to missingness)
## AIC: 3446.9
## 
## Number of Fisher Scoring iterations: 4
fit1<-glm(HIVtest~Hispanic+Black+Gay+Bi+Male, family = binomial, data =  bnm)
summary(fit1)
## 
## Call:
## glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial, 
##     data = bnm)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8705  -0.8379  -0.7631   1.3339   1.6589  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.86607    0.05711 -15.164  < 2e-16 ***
## Hispanic     0.50532    0.16491   3.064  0.00218 ** 
## Black        0.97435    0.15693   6.209 5.34e-10 ***
## Gay          2.13807    0.41181   5.192 2.08e-07 ***
## Bi           0.92634    0.41879   2.212  0.02697 *  
## Male        -0.21884    0.08469  -2.584  0.00976 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3522.1  on 2863  degrees of freedom
## Residual deviance: 3434.9  on 2858  degrees of freedom
## AIC: 3446.9
## 
## Number of Fisher Scoring iterations: 4
fit.imp<-glm(HIVtest~Hispanic+Black+Gay+Bi+Male, family = binomial, data = dat.imp)
summary(fit.imp)
## 
## Call:
## glm(formula = HIVtest ~ Hispanic + Black + Gay + Bi + Male, family = binomial, 
##     data = dat.imp)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9616  -0.8561  -0.7921   1.3423   1.6199  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.81498    0.02180 -37.390  < 2e-16 ***
## Hispanic     0.61863    0.05076  12.187  < 2e-16 ***
## Black        0.87280    0.05396  16.175  < 2e-16 ***
## Gay          1.89175    0.21916   8.632  < 2e-16 ***
## Bi           0.99227    0.21317   4.655 3.24e-06 ***
## Male        -0.18342    0.03120  -5.878 4.15e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 25127  on 19999  degrees of freedom
## Residual deviance: 24622  on 19994  degrees of freedom
## AIC: 24634
## 
## Number of Fisher Scoring iterations: 4

Five categorical variables were selected from the BRFSS19: HIVtest (outcome variable) Hispanic + Black + Gay + Bi + Male (predictor variables). A random sample of 25,00 cases was subset from the BRFSS19 for imputation. Two methods were used: modal imputation and multiple imputations. For the multiple imputations, 10 iterations were specified. Modal imputations were conducted across the HIVtest ~ Hispanic + Black + Gay + Bi + Male. All betas were statistically significant. Multiple imputations were also conducted across the HIVtest ~ Hispanic + Black + Gay + Bi + Male. All betas were statistically significant. Results showed that the modal imputation improved the model fit based on the AIC = 3446.9 compared to the original dataset AIC = 24634. Multiple imputations also improved the model fit based on the AIC = 3446.9 compared to the original dataset. Modal imputation and multiple imputations yielded similar results betas and AICs.