Using ECLS-K data, I will assess the relationship between child BMI and perceived child health status in children in the United States using poverty status, parent education, and parent race/ethnicity, and child sex as control variables.

#load required packages
library(car)
library(mice)
library(ggplot2)
library(dplyr)

#load eclsk data
load("~/OneDrive/UTSA/7283 Stats II/eclsk_k5.Rdata")

#extract variables for assignment
mydat<-c("childid", "x_chsex_r", "p2hscale", "x2povty", "x2bmi", "x12par1ed_i", "x2par1rac", "w2p0", "w2p0str", "w2p0psu")

eclsk<-data.frame(eclskk5[,mydat])

rm(eclskk5)

Recode Variables

#recode perceived child health
eclsk$chhealth<-Recode(eclsk$p2hscale, recodes = "1:2='0Exc/VGood'; 3='1Good'; 4='2Poor'; else=NA", as.factor=TRUE)

#recode poverty status
eclsk$pov<-Recode(eclsk$x2povty, recodes ="1=1; 2:3=0; -9=NA", as.factor=TRUE)

#child bmi
eclsk$bmi<-Recode(eclsk$x2bmi, recodes = "-9=NA")

#parent 1 education
eclsk$educ<-Recode(eclsk$x12par1ed_i, recodes = "1='0prim'; 2='1somehs'; 3='2hsgrad'; 4:5='3somecol'; 6:9='4colgrad'; -9=NA", as.factor=TRUE)
eclsk$educ<-relevel(eclsk$educ, ref='2hsgrad')

#race
eclsk$p_raceth<-Recode(eclsk$x2par1rac, recodes="1='nh white'; 2='nh black'; 3:4='hispanic'; 5:7='nh other'; 8='nh multirace'; -9=NA", as.factor=TRUE)
eclsk$p_raceth<-relevel(eclsk$p_raceth, ref='nh white')

#recode sex
eclsk$male<-Recode(eclsk$x_chsex_r, recodes = "1=1; 2=0; -9=NA", as.factor=TRUE)

Pattern of Missingness

#library(car)
#library(mice)
#library(ggplot2)
#library(dplyr)

summary(eclsk[, c("chhealth", "pov",  "bmi", "educ", "p_raceth", "male")])
##        chhealth       pov             bmi               educ     
##  0Exc/VGood:11304   0   :10076   Min.   : 7.605   2hsgrad :3543  
##  1Good     : 1418   1   : 3451   1st Qu.:15.086   0prim   : 781  
##  2Poor     :  293   NA's: 4647   Median :16.046   1somehs :1398  
##  NA's      : 5159                Mean   :16.602   3somecol:5135  
##                                  3rd Qu.:17.396   4colgrad:5148  
##                                  Max.   :49.136   NA's    :2169  
##                                  NA's   :1012                    
##          p_raceth      male     
##  nh white    :7388   0   :8847  
##  hispanic    :2882   1   :9288  
##  nh black    :1541   NA's:  39  
##  nh multirace: 234              
##  nh other    :1408              
##  NA's        :4721              
## 

We see that perceived child health, poverty status, and parent race/ethnicity all have more than 25% missing cases. Sex of child is the only variable with less than 1% missing cases.

Perceived child health = 28.387% Poverty status = 25.569% Child BMI = 5.568% Parent education = 11.935% Parent race ethnicity = 25.977% Sex of child = 0.215%

Mean imputation

summary(eclsk$bmi)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   7.605  15.086  16.046  16.602  17.396  49.136    1012
#replace missing with mean
eclsk$bmi.imp.mean<-ifelse(is.na(eclsk$bmi)==T, mean(eclsk$bmi, na.rm=T), eclsk$bmi)

#mean of non imputed and imputed
mean(eclsk$bmi, na.rm=T)
## [1] 16.60225
mean(eclsk$bmi.imp.mean)
## [1] 16.60225
#median of non imputed and imputed 
median(eclsk$bmi, na.rm=T)
## [1] 16.0458
median(eclsk$bmi.imp.mean)
## [1] 16.1684
#variance of non imputed and imputed
var(eclsk$bmi, na.rm=T)
## [1] 6.320484
var(eclsk$bmi.imp.mean)
## [1] 5.968515
#plot the histogram
library(reshape2)

eclsk%>%
  select(bmi.imp.mean, bmi)%>%
  melt()%>%
  ggplot()+geom_freqpoly(aes(x = value,
     y = ..density.., colour = variable))
## No id variables; using all as measure variables
## Warning: attributes are not identical across measure variables; they will
## be dropped
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1012 rows containing non-finite values (stat_bin).

## Modal imputation - child health

table(eclsk$chhealth)
## 
## 0Exc/VGood      1Good      2Poor 
##      11304       1418        293
#find most common
mcv.chhealth<-factor(names(which.max(table(eclsk$chhealth))), levels=levels(eclsk$chhealth))
mcv.chhealth
## [1] 0Exc/VGood
## Levels: 0Exc/VGood 1Good 2Poor
#impute the cases
eclsk$chhealth.imp<-as.factor(ifelse(is.na(eclsk$chhealth)==T, mcv.chhealth, eclsk$chhealth))
levels(eclsk$chhealth.imp)<-levels(eclsk$chhealth)

prop.table(table(eclsk$chhealth))
## 
## 0Exc/VGood      1Good      2Poor 
## 0.86853630 0.10895121 0.02251249
prop.table(table(eclsk$chhealth.imp))
## 
## 0Exc/VGood      1Good      2Poor 
## 0.90585452 0.07802355 0.01612193
#bar plots
barplot(prop.table(table(eclsk$chhealth)), main="Original Data", ylim=c(0, 1.0))

barplot(prop.table(table(eclsk$chhealth.imp)), main="Imputed Data",ylim=c(0, 1.0))

Multiple imputation

md.pattern(eclsk[,c("chhealth", "pov",  "bmi", "educ", "p_raceth", "male")])

##       male  bmi educ  pov p_raceth chhealth      
## 12530    1    1    1    1        1        1     0
## 469      1    1    1    1        1        0     1
## 64       1    1    1    1        0        1     1
## 4        1    1    1    1        0        0     2
## 2150     1    1    1    0        0        0     3
## 2        1    1    0    0        0        1     3
## 1912     1    1    0    0        0        0     4
## 413      1    0    1    1        1        1     1
## 41       1    0    1    1        1        0     2
## 6        1    0    1    1        0        1     2
## 328      1    0    1    0        0        0     4
## 216      1    0    0    0        0        0     5
## 31       0    1    0    0        0        0     5
## 8        0    0    0    0        0        0     6
##         39 1012 2169 4647     4721     5159 17747
md.pairs(eclsk[,c("chhealth", "pov",  "bmi", "educ", "p_raceth", "male")])
## $rr
##          chhealth   pov   bmi  educ p_raceth  male
## chhealth    13015 13013 12596 13013    12943 13015
## pov         13013 13527 13067 13527    13453 13527
## bmi         12596 13067 17162 15217    12999 17131
## educ        13013 13527 15217 16005    13453 16005
## p_raceth    12943 13453 12999 13453    13453 13453
## male        13015 13527 17131 16005    13453 18135
## 
## $rm
##          chhealth  pov  bmi educ p_raceth male
## chhealth        0    2  419    2       72    0
## pov           514    0  460    0       74    0
## bmi          4566 4095    0 1945     4163   31
## educ         2992 2478  788    0     2552    0
## p_raceth      510    0  454    0        0    0
## male         5120 4608 1004 2130     4682    0
## 
## $mr
##          chhealth pov  bmi educ p_raceth male
## chhealth        0 514 4566 2992      510 5120
## pov             2   0 4095 2478        0 4608
## bmi           419 460    0  788      454 1004
## educ            2   0 1945    0        0 2130
## p_raceth       72  74 4163 2552        0 4682
## male            0   0   31    0        0    0
## 
## $mm
##          chhealth  pov  bmi educ p_raceth male
## chhealth     5159 4645  593 2167     4649   39
## pov          4645 4647  552 2169     4647   39
## bmi           593  552 1012  224      558    8
## educ         2167 2169  224 2169     2169   39
## p_raceth     4649 4647  558 2169     4721   39
## male           39   39    8   39       39   39
mydat2<-eclsk
samp1<-sample(1:dim(mydat2)[1], replace = F, size = 500)
mydat2$bmiknock<-mydat2$bmi
mydat2$bmiknock[samp1]<-NA

head(mydat2[, c("bmiknock","bmi")])
##   bmiknock     bmi
## 1  16.3250 16.3250
## 2  22.9158 22.9158
## 3  15.1035 15.1035
## 4  19.4687 19.4687
## 5  15.5441 15.5441
## 6  17.1869 17.1869
imp<-mice(data = mydat2[,c("chhealth", "pov",  "bmi", "educ", "p_raceth", "male")], seed = 22, m = 5)
## 
##  iter imp variable
##   1   1  chhealth  pov  bmi  educ  p_raceth  male
##   1   2  chhealth  pov  bmi  educ  p_raceth  male
##   1   3  chhealth  pov  bmi  educ  p_raceth  male
##   1   4  chhealth  pov  bmi  educ  p_raceth  male
##   1   5  chhealth  pov  bmi  educ  p_raceth  male
##   2   1  chhealth  pov  bmi  educ  p_raceth  male
##   2   2  chhealth  pov  bmi  educ  p_raceth  male
##   2   3  chhealth  pov  bmi  educ  p_raceth  male
##   2   4  chhealth  pov  bmi  educ  p_raceth  male
##   2   5  chhealth  pov  bmi  educ  p_raceth  male
##   3   1  chhealth  pov  bmi  educ  p_raceth  male
##   3   2  chhealth  pov  bmi  educ  p_raceth  male
##   3   3  chhealth  pov  bmi  educ  p_raceth  male
##   3   4  chhealth  pov  bmi  educ  p_raceth  male
##   3   5  chhealth  pov  bmi  educ  p_raceth  male
##   4   1  chhealth  pov  bmi  educ  p_raceth  male
##   4   2  chhealth  pov  bmi  educ  p_raceth  male
##   4   3  chhealth  pov  bmi  educ  p_raceth  male
##   4   4  chhealth  pov  bmi  educ  p_raceth  male
##   4   5  chhealth  pov  bmi  educ  p_raceth  male
##   5   1  chhealth  pov  bmi  educ  p_raceth  male
##   5   2  chhealth  pov  bmi  educ  p_raceth  male
##   5   3  chhealth  pov  bmi  educ  p_raceth  male
##   5   4  chhealth  pov  bmi  educ  p_raceth  male
##   5   5  chhealth  pov  bmi  educ  p_raceth  male
print(imp)
## Class: mids
## Number of multiple imputations:  5 
## Imputation methods:
##  chhealth       pov       bmi      educ  p_raceth      male 
## "polyreg"  "logreg"     "pmm" "polyreg" "polyreg"  "logreg" 
## PredictorMatrix:
##          chhealth pov bmi educ p_raceth male
## chhealth        0   1   1    1        1    1
## pov             1   0   1    1        1    1
## bmi             1   1   0    1        1    1
## educ            1   1   1    0        1    1
## p_raceth        1   1   1    1        0    1
## male            1   1   1    1        1    0
head(imp$imp$bmi)
##          1       2       3       4       5
## 33 27.1212 15.9581 14.7976 17.1514 15.9486
## 44 17.0196 23.1611 15.6164 15.5909 15.9396
## 59 18.0612 16.1148 16.4499 15.3452 14.3810
## 74 16.7542 14.4433 14.5133 14.6193 15.9486
## 80 15.2689 17.3479 23.8018 15.2576 15.8123
## 96 15.6164 12.3555 16.4516 21.2501 19.0641
summary(imp$imp$bmi)
##        1               2               3               4        
##  Min.   :11.64   Min.   :11.48   Min.   :10.75   Min.   :12.09  
##  1st Qu.:15.04   1st Qu.:14.97   1st Qu.:15.13   1st Qu.:15.12  
##  Median :15.97   Median :16.13   Median :16.19   Median :16.16  
##  Mean   :16.68   Mean   :16.72   Mean   :16.72   Mean   :16.86  
##  3rd Qu.:17.46   3rd Qu.:17.69   3rd Qu.:17.59   3rd Qu.:17.51  
##  Max.   :49.14   Max.   :36.91   Max.   :30.39   Max.   :36.91  
##        5        
##  Min.   :10.28  
##  1st Qu.:15.11  
##  Median :16.20  
##  Mean   :16.87  
##  3rd Qu.:17.88  
##  Max.   :34.43
summary(eclsk$bmi)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   7.605  15.086  16.046  16.602  17.396  49.136    1012
head(imp$imp$chhealth)
##             1          2          3          4          5
## 9  0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood
## 15 0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood
## 18      1Good 0Exc/VGood 0Exc/VGood      1Good 0Exc/VGood
## 26      1Good 0Exc/VGood      1Good 0Exc/VGood      2Poor
## 31 0Exc/VGood      2Poor 0Exc/VGood      1Good 0Exc/VGood
## 33 0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood
summary(imp$imp$chhealth)
##           1                 2                 3                 4       
##  0Exc/VGood:4408   0Exc/VGood:4411   0Exc/VGood:4450   0Exc/VGood:4413  
##  1Good     : 608   1Good     : 624   1Good     : 575   1Good     : 599  
##  2Poor     : 143   2Poor     : 124   2Poor     : 134   2Poor     : 147  
##           5       
##  0Exc/VGood:4398  
##  1Good     : 627  
##  2Poor     : 134
library(lattice)
stripplot(imp,bmi~chhealth|.imp, pch=20)

dat.imp<-complete(imp, action = 1)
head(dat.imp, n=10)
##      chhealth pov     bmi     educ p_raceth male
## 1  0Exc/VGood   0 16.3250 3somecol nh white    1
## 2  0Exc/VGood   0 22.9158 3somecol nh white    0
## 3  0Exc/VGood   0 15.1035 4colgrad nh white    1
## 4  0Exc/VGood   0 19.4687 3somecol nh white    1
## 5  0Exc/VGood   0 15.5441 3somecol nh white    0
## 6  0Exc/VGood   0 17.1869  2hsgrad nh white    0
## 7  0Exc/VGood   1 14.2873 3somecol nh white    0
## 8  0Exc/VGood   0 13.0043 4colgrad nh white    0
## 9  0Exc/VGood   1 15.6715  2hsgrad nh white    0
## 10 0Exc/VGood   1 19.5331    0prim hispanic    1
head(eclsk[,c("chhealth", "pov",  "bmi", "educ", "p_raceth", "male")], n=10)
##      chhealth  pov     bmi     educ p_raceth male
## 1  0Exc/VGood    0 16.3250 3somecol nh white    1
## 2  0Exc/VGood    0 22.9158 3somecol nh white    0
## 3  0Exc/VGood    0 15.1035 4colgrad nh white    1
## 4  0Exc/VGood    0 19.4687 3somecol nh white    1
## 5  0Exc/VGood    0 15.5441 3somecol nh white    0
## 6  0Exc/VGood    0 17.1869  2hsgrad nh white    0
## 7  0Exc/VGood    1 14.2873 3somecol nh white    0
## 8  0Exc/VGood    0 13.0043 4colgrad nh white    0
## 9        <NA> <NA> 15.6715  2hsgrad     <NA>    0
## 10 0Exc/VGood    1 19.5331    0prim hispanic    1
head(dat.imp[is.na(eclsk$chhealth)==T,], n=10)
##      chhealth pov     bmi     educ p_raceth male
## 9  0Exc/VGood   1 15.6715  2hsgrad nh white    0
## 15 0Exc/VGood   1 16.6762 3somecol nh white    0
## 18      1Good   0 14.9589 3somecol hispanic    1
## 26      1Good   0 22.4385 3somecol nh black    0
## 31 0Exc/VGood   1 19.4878  1somehs hispanic    1
## 33 0Exc/VGood   0 27.1212 3somecol nh white    1
## 36 0Exc/VGood   0 17.2372  2hsgrad hispanic    0
## 37 0Exc/VGood   0 17.0139 4colgrad nh white    1
## 39 0Exc/VGood   0 16.3011 4colgrad nh white    1
## 42      1Good   1 15.1203    0prim hispanic    1
head(eclsk[is.na(eclsk$chhealth)==T,c("chhealth", "pov",  "bmi", "educ", "p_raceth", "male")], n=10)
##    chhealth  pov     bmi     educ p_raceth male
## 9      <NA> <NA> 15.6715  2hsgrad     <NA>    0
## 15     <NA> <NA> 16.6762     <NA>     <NA>    0
## 18     <NA> <NA> 14.9589     <NA>     <NA>    1
## 26     <NA>    0 22.4385 3somecol nh black    0
## 31     <NA> <NA> 19.4878  1somehs     <NA>    1
## 33     <NA> <NA>      NA 3somecol     <NA>    1
## 36     <NA> <NA> 17.2372  2hsgrad     <NA>    0
## 37     <NA> <NA> 17.0139     <NA>     <NA>    1
## 39     <NA> <NA> 16.3011     <NA>     <NA>    1
## 42     <NA> <NA> 15.1203    0prim     <NA>    1

Analyzing imputed data

fit.bmi<-with(data=imp, expr=lm(bmi~chhealth+pov+educ+p_raceth+male))
fit.bmi
## call :
## with.mids(data = imp, expr = lm(bmi ~ chhealth + pov + educ + 
##     p_raceth + male))
## 
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth", 
##     "male")], m = 5, seed = 22)
## 
## nmis :
## chhealth      pov      bmi     educ p_raceth     male 
##     5159     4647     1012     2169     4721       39 
## 
## analyses :
## [[1]]
## 
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male)
## 
## Coefficients:
##          (Intercept)         chhealth1Good         chhealth2Poor  
##             16.52217               0.43907               0.26070  
##                 pov1             educ0prim           educ1somehs  
##              0.05024               0.08673               0.05171  
##         educ3somecol          educ4colgrad      p_racethhispanic  
##             -0.05037              -0.51079               0.50993  
##     p_racethnh black  p_racethnh multirace      p_racethnh other  
##              0.29632               0.26386              -0.18580  
##                male1  
##              0.10581  
## 
## 
## [[2]]
## 
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male)
## 
## Coefficients:
##          (Intercept)         chhealth1Good         chhealth2Poor  
##             16.50599               0.40703               0.19571  
##                 pov1             educ0prim           educ1somehs  
##              0.05390               0.16208               0.11886  
##         educ3somecol          educ4colgrad      p_racethhispanic  
##             -0.02307              -0.49654               0.52850  
##     p_racethnh black  p_racethnh multirace      p_racethnh other  
##              0.26237               0.21127              -0.19320  
##                male1  
##              0.10707  
## 
## 
## [[3]]
## 
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male)
## 
## Coefficients:
##          (Intercept)         chhealth1Good         chhealth2Poor  
##             16.51163               0.48385               0.50604  
##                 pov1             educ0prim           educ1somehs  
##              0.02082               0.08899               0.08158  
##         educ3somecol          educ4colgrad      p_racethhispanic  
##             -0.07448              -0.48505               0.50952  
##     p_racethnh black  p_racethnh multirace      p_racethnh other  
##              0.38694               0.35412              -0.19561  
##                male1  
##              0.09874  
## 
## 
## [[4]]
## 
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male)
## 
## Coefficients:
##          (Intercept)         chhealth1Good         chhealth2Poor  
##             16.46350               0.34555               0.27134  
##                 pov1             educ0prim           educ1somehs  
##              0.12272               0.12871               0.12603  
##         educ3somecol          educ4colgrad      p_racethhispanic  
##             -0.04177              -0.47831               0.61696  
##     p_racethnh black  p_racethnh multirace      p_racethnh other  
##              0.37603               0.37390              -0.16283  
##                male1  
##              0.10821  
## 
## 
## [[5]]
## 
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male)
## 
## Coefficients:
##          (Intercept)         chhealth1Good         chhealth2Poor  
##             16.49096               0.49237               0.49794  
##                 pov1             educ0prim           educ1somehs  
##              0.03290               0.10247               0.08047  
##         educ3somecol          educ4colgrad      p_racethhispanic  
##             -0.07251              -0.47109               0.60219  
##     p_racethnh black  p_racethnh multirace      p_racethnh other  
##              0.31389               0.35839              -0.18624  
##                male1  
##              0.10587
#variation in child bmi
with (data=imp, exp=(sd(bmi)))
## call :
## with.mids(data = imp, expr = (sd(bmi)))
## 
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth", 
##     "male")], m = 5, seed = 22)
## 
## nmis :
## chhealth      pov      bmi     educ p_raceth     male 
##     5159     4647     1012     2169     4721       39 
## 
## analyses :
## [[1]]
## [1] 2.533415
## 
## [[2]]
## [1] 2.529566
## 
## [[3]]
## [1] 2.508661
## 
## [[4]]
## [1] 2.535824
## 
## [[5]]
## [1] 2.537452

Frequency Tables for categorical variables

with (data=imp, exp=(prop.table(table(chhealth))))
## call :
## with.mids(data = imp, expr = (prop.table(table(chhealth))))
## 
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth", 
##     "male")], m = 5, seed = 22)
## 
## nmis :
## chhealth      pov      bmi     educ p_raceth     male 
##     5159     4647     1012     2169     4721       39 
## 
## analyses :
## [[1]]
## chhealth
## 0Exc/VGood      1Good      2Poor 
## 0.86453175 0.11147794 0.02399032 
## 
## [[2]]
## chhealth
## 0Exc/VGood      1Good      2Poor 
## 0.86469682 0.11235831 0.02294487 
## 
## [[3]]
## chhealth
## 0Exc/VGood      1Good      2Poor 
##  0.8668427  0.1096622  0.0234951 
## 
## [[4]]
## chhealth
## 0Exc/VGood      1Good      2Poor 
## 0.86480687 0.11098272 0.02421041 
## 
## [[5]]
## chhealth
## 0Exc/VGood      1Good      2Poor 
##  0.8639815  0.1125234  0.0234951
with (data=imp, exp=(prop.table(table(pov))))
## call :
## with.mids(data = imp, expr = (prop.table(table(pov))))
## 
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth", 
##     "male")], m = 5, seed = 22)
## 
## nmis :
## chhealth      pov      bmi     educ p_raceth     male 
##     5159     4647     1012     2169     4721       39 
## 
## analyses :
## [[1]]
## pov
##         0         1 
## 0.7374821 0.2625179 
## 
## [[2]]
## pov
##         0         1 
## 0.7353912 0.2646088 
## 
## [[3]]
## pov
##         0         1 
## 0.7362166 0.2637834 
## 
## [[4]]
## pov
##         0         1 
## 0.7342357 0.2657643 
## 
## [[5]]
## pov
##         0         1 
## 0.7309893 0.2690107
with (data=imp, exp=(prop.table(table(educ))))
## call :
## with.mids(data = imp, expr = (prop.table(table(educ))))
## 
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth", 
##     "male")], m = 5, seed = 22)
## 
## nmis :
## chhealth      pov      bmi     educ p_raceth     male 
##     5159     4647     1012     2169     4721       39 
## 
## analyses :
## [[1]]
## educ
##    2hsgrad      0prim    1somehs   3somecol   4colgrad 
## 0.22092000 0.04880599 0.08765269 0.32106306 0.32155827 
## 
## [[2]]
## educ
##    2hsgrad      0prim    1somehs   3somecol   4colgrad 
## 0.22097502 0.05001651 0.08743260 0.32040277 0.32117310 
## 
## [[3]]
## educ
##    2hsgrad      0prim    1somehs   3somecol   4colgrad 
## 0.22207549 0.04853087 0.08787279 0.31957742 0.32194344 
## 
## [[4]]
## educ
##    2hsgrad      0prim    1somehs   3somecol   4colgrad 
## 0.22345108 0.04990646 0.08721250 0.31853197 0.32089799 
## 
## [[5]]
## educ
##   2hsgrad     0prim   1somehs  3somecol  4colgrad 
## 0.2235611 0.0490811 0.0862771 0.3206779 0.3204028
with (data=imp, exp=(prop.table(table(p_raceth))))
## call :
## with.mids(data = imp, expr = (prop.table(table(p_raceth))))
## 
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth", 
##     "male")], m = 5, seed = 22)
## 
## nmis :
## chhealth      pov      bmi     educ p_raceth     male 
##     5159     4647     1012     2169     4721       39 
## 
## analyses :
## [[1]]
## p_raceth
##     nh white     hispanic     nh black nh multirace     nh other 
##   0.54143282   0.22191042   0.11654011   0.01760757   0.10250908 
## 
## [[2]]
## p_raceth
##     nh white     hispanic     nh black nh multirace     nh other 
##   0.54088258   0.22207549   0.11659514   0.01771762   0.10272917 
## 
## [[3]]
## p_raceth
##     nh white     hispanic     nh black nh multirace     nh other 
##   0.54165291   0.22080995   0.11720040   0.01848795   0.10184879 
## 
## [[4]]
## p_raceth
##     nh white     hispanic     nh black nh multirace     nh other 
##   0.54220315   0.21998459   0.11665016   0.01788269   0.10327941 
## 
## [[5]]
## p_raceth
##     nh white     hispanic     nh black nh multirace     nh other 
##   0.54011225   0.22235061   0.11835589   0.01733245   0.10184879
with (data=imp, exp=(prop.table(table(male))))
## call :
## with.mids(data = imp, expr = (prop.table(table(male))))
## 
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth", 
##     "male")], m = 5, seed = 22)
## 
## nmis :
## chhealth      pov      bmi     educ p_raceth     male 
##     5159     4647     1012     2169     4721       39 
## 
## analyses :
## [[1]]
## male
##         0         1 
## 0.4876197 0.5123803 
## 
## [[2]]
## male
##         0         1 
## 0.4878398 0.5121602 
## 
## [[3]]
## male
##         0         1 
## 0.4878398 0.5121602 
## 
## [[4]]
## male
##         0         1 
## 0.4877847 0.5122153 
## 
## [[5]]
## male
##         0         1 
## 0.4877297 0.5122703
est.p<-pool(fit.bmi)
print(est.p)
## Class: mipo    m = 5 
##                         estimate        ubar            b           t
## (Intercept)          16.49884943 0.002603675 5.175423e-04 0.003224726
## chhealth1Good         0.43357335 0.003533433 3.616930e-03 0.007873749
## chhealth2Poor         0.34634736 0.015129599 2.103364e-02 0.040369969
## pov1                  0.05611379 0.002321808 1.565009e-03 0.004199819
## educ0prim             0.11379681 0.009360334 1.007308e-03 0.010569103
## educ1somehs           0.09172839 0.005673895 9.358563e-04 0.006796923
## educ3somecol         -0.05244043 0.002689841 4.672687e-04 0.003250564
## educ4colgrad         -0.48835379 0.002946256 2.450869e-04 0.003240360
## p_racethhispanic      0.55341868 0.002777785 2.713893e-03 0.006034456
## p_racethnh black      0.32711030 0.003776916 2.821755e-03 0.007163022
## p_racethnh multirace  0.31230924 0.019855877 5.055344e-03 0.025922290
## p_racethnh other     -0.18473495 0.004015787 1.682867e-04 0.004217731
## male1                 0.10513898 0.001365107 1.378274e-05 0.001381646
##                      dfcom          df        riv     lambda        fmi
## (Intercept)          18161   107.05544 0.23852850 0.19259024 0.20726302
## chhealth1Good        18161    13.14254 1.22835662 0.55123880 0.60683861
## chhealth2Poor        18161    10.21722 1.66827745 0.62522638 0.68193627
## pov1                 18161    19.96459 0.80885715 0.44716475 0.49531151
## educ0prim            18161   300.10168 0.12913746 0.11436824 0.12021203
## educ1somehs          18161   145.11961 0.19792886 0.16522589 0.17649751
## educ3somecol         18161   133.23355 0.20845932 0.17250007 0.18464832
## educ4colgrad         18161   471.68946 0.09982305 0.09076283 0.09459370
## p_racethhispanic     18161    13.71120 1.17239869 0.53967934 0.59477062
## p_racethnh black     18161    17.86654 0.89652652 0.47272026 0.52325857
## p_racethnh multirace 18161    72.65548 0.30552225 0.23402301 0.25427209
## p_racethnh other     18161  1584.89524 0.05028753 0.04787977 0.04907899
## male1                18161 10921.70461 0.01211575 0.01197071 0.01215159
summary(est.p)
##                         estimate  std.error   statistic          df
## (Intercept)          16.49884943 0.05678667 290.5408880   107.05544
## chhealth1Good         0.43357335 0.08873415   4.8862064    13.14254
## chhealth2Poor         0.34634736 0.20092279   1.7237833    10.21722
## pov1                  0.05611379 0.06480601   0.8658733    19.96459
## educ0prim             0.11379681 0.10280614   1.1069067   300.10168
## educ1somehs           0.09172839 0.08244345   1.1126219   145.11961
## educ3somecol         -0.05244043 0.05701372  -0.9197862   133.23355
## educ4colgrad         -0.48835379 0.05692416  -8.5790249   471.68946
## p_racethhispanic      0.55341868 0.07768176   7.1241779    13.71120
## p_racethnh black      0.32711030 0.08463464   3.8649697    17.86654
## p_racethnh multirace  0.31230924 0.16100401   1.9397607    72.65548
## p_racethnh other     -0.18473495 0.06494406  -2.8445241  1584.89524
## male1                 0.10513898 0.03717050   2.8285596 10921.70461
##                           p.value
## (Intercept)          0.000000e+00
## chhealth1Good        2.881333e-04
## chhealth2Poor        1.148200e-01
## pov1                 3.968461e-01
## educ0prim            2.692207e-01
## educ1somehs          2.677105e-01
## educ3somecol         3.593468e-01
## educ4colgrad         0.000000e+00
## p_racethhispanic     5.774833e-06
## p_racethnh black     1.148009e-03
## p_racethnh multirace 5.629094e-02
## p_racethnh other     4.504913e-03
## male1                4.684316e-03
lam<-data.frame(lam=est.p$pooled$lambda, param=row.names(est.p$pooled))

ggplot(data=lam,aes(x=param, y=lam))+geom_col()+theme(axis.text.x = element_text(angle = 45, hjust = 1))

Some child health variables, race ethnicity variables, and poverty have higher variance. Migh explain variation between 5 imputed models.

Model Comparison - Original data

#Non imputated data
sub<-eclsk%>%
  select(chhealth,pov,bmi,educ,p_raceth,male)%>%
  filter(complete.cases(.))%>%
  as.data.frame()

fit1<-summary(lm(bmi~chhealth+pov+educ+p_raceth+male, sub))
fit1
## 
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male, data = sub)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.343 -1.453 -0.497  0.831 32.341 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          16.46814    0.06077 270.980  < 2e-16 ***
## chhealth1Good         0.42143    0.06990   6.029 1.70e-09 ***
## chhealth2Poor         0.30108    0.15001   2.007 0.044759 *  
## pov1                  0.10468    0.05769   1.815 0.069595 .  
## educ0prim             0.10770    0.11448   0.941 0.346802    
## educ1somehs           0.03137    0.09213   0.341 0.733446    
## educ3somecol         -0.07775    0.06193  -1.255 0.209380    
## educ4colgrad         -0.53332    0.06350  -8.399  < 2e-16 ***
## p_racethhispanic      0.47956    0.06201   7.733 1.13e-14 ***
## p_racethnh black      0.32710    0.07336   4.459 8.32e-06 ***
## p_racethnh multirace  0.23440    0.16396   1.430 0.152846    
## p_racethnh other     -0.20488    0.07370  -2.780 0.005444 ** 
## male1                 0.15794    0.04291   3.680 0.000234 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.4 on 12517 degrees of freedom
## Multiple R-squared:  0.03528,    Adjusted R-squared:  0.03436 
## F-statistic: 38.15 on 12 and 12517 DF,  p-value: < 2.2e-16

Model Comparison - Mean/Modal data

fit.mean.imp<-lm(bmi.imp.mean~chhealth.imp+pov.imp+educ.imp+p_raceth.imp+male.imp, data=eclsk)
summary(fit.mean.imp)
## 
## Call:
## lm(formula = bmi.imp.mean ~ chhealth.imp + pov.imp + educ.imp + 
##     p_raceth.imp + male.imp, data = eclsk)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.375 -1.439 -0.446  0.695 32.309 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              16.62515    0.04931 337.153  < 2e-16 ***
## chhealth.imp1Good         0.31936    0.06833   4.674 2.98e-06 ***
## chhealth.imp2Poor         0.19170    0.14433   1.328 0.184114    
## pov.imp1                  0.01159    0.05331   0.217 0.827837    
## educ.imp0prim             0.22206    0.09945   2.233 0.025565 *  
## educ.imp1somehs           0.15482    0.07731   2.002 0.045246 *  
## educ.imp3somecol         -0.09653    0.05336  -1.809 0.070498 .  
## educ.imp4colgrad         -0.38783    0.05207  -7.448 9.91e-14 ***
## p_raceth.imphispanic      0.35474    0.05701   6.223 4.99e-10 ***
## p_raceth.impnh black      0.20196    0.06845   2.951 0.003176 ** 
## p_raceth.impnh multirace  0.14551    0.16028   0.908 0.363956    
## p_raceth.impnh other     -0.35122    0.06866  -5.116 3.16e-07 ***
## male.imp1                 0.11850    0.03593   3.298 0.000976 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.42 on 18161 degrees of freedom
## Multiple R-squared:  0.01932,    Adjusted R-squared:  0.01868 
## F-statistic: 29.82 on 12 and 18161 DF,  p-value: < 2.2e-16

Model Comparison - Multiple imputation data

fit.imp<-lm(bmi~chhealth+pov+educ+p_raceth+male, data=dat.imp)
summary(fit.imp)
## 
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male, data = dat.imp)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.427 -1.513 -0.520  0.826 32.317 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          16.52217    0.05112 323.179  < 2e-16 ***
## chhealth1Good         0.43907    0.05959   7.368 1.81e-13 ***
## chhealth2Poor         0.26070    0.12233   2.131  0.03310 *  
## pov1                  0.05024    0.04833   1.040  0.29855    
## educ0prim             0.08673    0.09746   0.890  0.37352    
## educ1somehs           0.05171    0.07551   0.685  0.49345    
## educ3somecol         -0.05037    0.05205  -0.968  0.33320    
## educ4colgrad         -0.51079    0.05442  -9.386  < 2e-16 ***
## p_racethhispanic      0.50993    0.05283   9.652  < 2e-16 ***
## p_racethnh black      0.29632    0.06166   4.806 1.55e-06 ***
## p_racethnh multirace  0.26386    0.14206   1.857  0.06327 .  
## p_racethnh other     -0.18580    0.06358  -2.922  0.00348 ** 
## male1                 0.10581    0.03705   2.856  0.00430 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.496 on 18161 degrees of freedom
## Multiple R-squared:  0.03014,    Adjusted R-squared:  0.0295 
## F-statistic: 47.03 on 12 and 18161 DF,  p-value: < 2.2e-16

Across all models, what remained constant was:

-Kids with Good health are more likely to have higher BMIs compared to kids with Excellent or very good health. (Smaller effect in mean/modal imputation) -Kids with parents who graduated college are more likely to have lower BMIs compared to kids with parents who have a high school education. (Smaller effect in mean/modal imputation) -Kids with parents who are Hispanic or Black are more likely to haver higher BMIs thatn kids with parents who are White.(Somewhat smaller effect in mean/modal imputation) -Kids with parents who identify as “Other” are more likely to have lower BMIs than kids with parents who are White. (varying effects across three models) -Boys are likely to have higher BMIs than girls. (Somewhat similar across all models)

Overall, results are most similar between original data and multiple imputation data.