Using ECLS-K data, I will assess the relationship between child BMI and perceived child health status in children in the United States using poverty status, parent education, and parent race/ethnicity, and child sex as control variables.
#load required packages
library(car)
library(mice)
library(ggplot2)
library(dplyr)
#load eclsk data
load("~/OneDrive/UTSA/7283 Stats II/eclsk_k5.Rdata")
#extract variables for assignment
mydat<-c("childid", "x_chsex_r", "p2hscale", "x2povty", "x2bmi", "x12par1ed_i", "x2par1rac", "w2p0", "w2p0str", "w2p0psu")
eclsk<-data.frame(eclskk5[,mydat])
rm(eclskk5)
#recode perceived child health
eclsk$chhealth<-Recode(eclsk$p2hscale, recodes = "1:2='0Exc/VGood'; 3='1Good'; 4='2Poor'; else=NA", as.factor=TRUE)
#recode poverty status
eclsk$pov<-Recode(eclsk$x2povty, recodes ="1=1; 2:3=0; -9=NA", as.factor=TRUE)
#child bmi
eclsk$bmi<-Recode(eclsk$x2bmi, recodes = "-9=NA")
#parent 1 education
eclsk$educ<-Recode(eclsk$x12par1ed_i, recodes = "1='0prim'; 2='1somehs'; 3='2hsgrad'; 4:5='3somecol'; 6:9='4colgrad'; -9=NA", as.factor=TRUE)
eclsk$educ<-relevel(eclsk$educ, ref='2hsgrad')
#race
eclsk$p_raceth<-Recode(eclsk$x2par1rac, recodes="1='nh white'; 2='nh black'; 3:4='hispanic'; 5:7='nh other'; 8='nh multirace'; -9=NA", as.factor=TRUE)
eclsk$p_raceth<-relevel(eclsk$p_raceth, ref='nh white')
#recode sex
eclsk$male<-Recode(eclsk$x_chsex_r, recodes = "1=1; 2=0; -9=NA", as.factor=TRUE)
#library(car)
#library(mice)
#library(ggplot2)
#library(dplyr)
summary(eclsk[, c("chhealth", "pov", "bmi", "educ", "p_raceth", "male")])
## chhealth pov bmi educ
## 0Exc/VGood:11304 0 :10076 Min. : 7.605 2hsgrad :3543
## 1Good : 1418 1 : 3451 1st Qu.:15.086 0prim : 781
## 2Poor : 293 NA's: 4647 Median :16.046 1somehs :1398
## NA's : 5159 Mean :16.602 3somecol:5135
## 3rd Qu.:17.396 4colgrad:5148
## Max. :49.136 NA's :2169
## NA's :1012
## p_raceth male
## nh white :7388 0 :8847
## hispanic :2882 1 :9288
## nh black :1541 NA's: 39
## nh multirace: 234
## nh other :1408
## NA's :4721
##
We see that perceived child health, poverty status, and parent race/ethnicity all have more than 25% missing cases. Sex of child is the only variable with less than 1% missing cases.
Perceived child health = 28.387% Poverty status = 25.569% Child BMI = 5.568% Parent education = 11.935% Parent race ethnicity = 25.977% Sex of child = 0.215%
summary(eclsk$bmi)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 7.605 15.086 16.046 16.602 17.396 49.136 1012
#replace missing with mean
eclsk$bmi.imp.mean<-ifelse(is.na(eclsk$bmi)==T, mean(eclsk$bmi, na.rm=T), eclsk$bmi)
#mean of non imputed and imputed
mean(eclsk$bmi, na.rm=T)
## [1] 16.60225
mean(eclsk$bmi.imp.mean)
## [1] 16.60225
#median of non imputed and imputed
median(eclsk$bmi, na.rm=T)
## [1] 16.0458
median(eclsk$bmi.imp.mean)
## [1] 16.1684
#variance of non imputed and imputed
var(eclsk$bmi, na.rm=T)
## [1] 6.320484
var(eclsk$bmi.imp.mean)
## [1] 5.968515
#plot the histogram
library(reshape2)
eclsk%>%
select(bmi.imp.mean, bmi)%>%
melt()%>%
ggplot()+geom_freqpoly(aes(x = value,
y = ..density.., colour = variable))
## No id variables; using all as measure variables
## Warning: attributes are not identical across measure variables; they will
## be dropped
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1012 rows containing non-finite values (stat_bin).
## Modal imputation - child health
table(eclsk$chhealth)
##
## 0Exc/VGood 1Good 2Poor
## 11304 1418 293
#find most common
mcv.chhealth<-factor(names(which.max(table(eclsk$chhealth))), levels=levels(eclsk$chhealth))
mcv.chhealth
## [1] 0Exc/VGood
## Levels: 0Exc/VGood 1Good 2Poor
#impute the cases
eclsk$chhealth.imp<-as.factor(ifelse(is.na(eclsk$chhealth)==T, mcv.chhealth, eclsk$chhealth))
levels(eclsk$chhealth.imp)<-levels(eclsk$chhealth)
prop.table(table(eclsk$chhealth))
##
## 0Exc/VGood 1Good 2Poor
## 0.86853630 0.10895121 0.02251249
prop.table(table(eclsk$chhealth.imp))
##
## 0Exc/VGood 1Good 2Poor
## 0.90585452 0.07802355 0.01612193
#bar plots
barplot(prop.table(table(eclsk$chhealth)), main="Original Data", ylim=c(0, 1.0))
barplot(prop.table(table(eclsk$chhealth.imp)), main="Imputed Data",ylim=c(0, 1.0))
table(eclsk$pov)
##
## 0 1
## 10076 3451
#find most common
mcv.pov<-factor(names(which.max(table(eclsk$pov))), levels=levels(eclsk$pov))
mcv.pov
## [1] 0
## Levels: 0 1
#impute the cases
eclsk$pov.imp<-as.factor(ifelse(is.na(eclsk$pov)==T, mcv.pov, eclsk$pov))
levels(eclsk$pov.imp)<-levels(eclsk$pov)
prop.table(table(eclsk$pov))
##
## 0 1
## 0.7448806 0.2551194
prop.table(table(eclsk$pov.imp))
##
## 0 1
## 0.8101133 0.1898867
table(eclsk$educ)
##
## 2hsgrad 0prim 1somehs 3somecol 4colgrad
## 3543 781 1398 5135 5148
#find most common
mcv.educ<-factor(names(which.max(table(eclsk$educ))), levels=levels(eclsk$educ))
mcv.educ
## [1] 4colgrad
## Levels: 2hsgrad 0prim 1somehs 3somecol 4colgrad
#impute the cases
eclsk$educ.imp<-as.factor(ifelse(is.na(eclsk$educ)==T, mcv.educ, eclsk$educ))
levels(eclsk$educ.imp)<-levels(eclsk$educ)
prop.table(table(eclsk$educ))
##
## 2hsgrad 0prim 1somehs 3somecol 4colgrad
## 0.22136832 0.04879725 0.08734770 0.32083724 0.32164948
prop.table(table(eclsk$educ.imp))
##
## 2hsgrad 0prim 1somehs 3somecol 4colgrad
## 0.19494883 0.04297348 0.07692308 0.28254649 0.40260812
table(eclsk$p_raceth)
##
## nh white hispanic nh black nh multirace nh other
## 7388 2882 1541 234 1408
#find most common
mcv.p_raceth<-factor(names(which.max(table(eclsk$p_raceth))), levels=levels(eclsk$p_raceth))
mcv.p_raceth
## [1] nh white
## Levels: nh white hispanic nh black nh multirace nh other
#impute the cases
eclsk$p_raceth.imp<-as.factor(ifelse(is.na(eclsk$p_raceth)==T, mcv.p_raceth, eclsk$p_raceth))
levels(eclsk$p_raceth.imp)<-levels(eclsk$p_raceth)
prop.table(table(eclsk$p_raceth))
##
## nh white hispanic nh black nh multirace nh other
## 0.54917119 0.21422731 0.11454694 0.01739389 0.10466067
prop.table(table(eclsk$p_raceth.imp))
##
## nh white hispanic nh black nh multirace nh other
## 0.66628150 0.15857819 0.08479146 0.01287554 0.07747331
table(eclsk$male)
##
## 0 1
## 8847 9288
#find most common
mcv.male<-factor(names(which.max(table(eclsk$male))), levels=levels(eclsk$male))
mcv.male
## [1] 1
## Levels: 0 1
#impute the cases
eclsk$male.imp<-as.factor(ifelse(is.na(eclsk$male)==T, mcv.male, eclsk$male))
levels(eclsk$male.imp)<-levels(eclsk$male)
prop.table(table(eclsk$male))
##
## 0 1
## 0.4878412 0.5121588
prop.table(table(eclsk$male.imp))
##
## 0 1
## 0.4867943 0.5132057
md.pattern(eclsk[,c("chhealth", "pov", "bmi", "educ", "p_raceth", "male")])
## male bmi educ pov p_raceth chhealth
## 12530 1 1 1 1 1 1 0
## 469 1 1 1 1 1 0 1
## 64 1 1 1 1 0 1 1
## 4 1 1 1 1 0 0 2
## 2150 1 1 1 0 0 0 3
## 2 1 1 0 0 0 1 3
## 1912 1 1 0 0 0 0 4
## 413 1 0 1 1 1 1 1
## 41 1 0 1 1 1 0 2
## 6 1 0 1 1 0 1 2
## 328 1 0 1 0 0 0 4
## 216 1 0 0 0 0 0 5
## 31 0 1 0 0 0 0 5
## 8 0 0 0 0 0 0 6
## 39 1012 2169 4647 4721 5159 17747
md.pairs(eclsk[,c("chhealth", "pov", "bmi", "educ", "p_raceth", "male")])
## $rr
## chhealth pov bmi educ p_raceth male
## chhealth 13015 13013 12596 13013 12943 13015
## pov 13013 13527 13067 13527 13453 13527
## bmi 12596 13067 17162 15217 12999 17131
## educ 13013 13527 15217 16005 13453 16005
## p_raceth 12943 13453 12999 13453 13453 13453
## male 13015 13527 17131 16005 13453 18135
##
## $rm
## chhealth pov bmi educ p_raceth male
## chhealth 0 2 419 2 72 0
## pov 514 0 460 0 74 0
## bmi 4566 4095 0 1945 4163 31
## educ 2992 2478 788 0 2552 0
## p_raceth 510 0 454 0 0 0
## male 5120 4608 1004 2130 4682 0
##
## $mr
## chhealth pov bmi educ p_raceth male
## chhealth 0 514 4566 2992 510 5120
## pov 2 0 4095 2478 0 4608
## bmi 419 460 0 788 454 1004
## educ 2 0 1945 0 0 2130
## p_raceth 72 74 4163 2552 0 4682
## male 0 0 31 0 0 0
##
## $mm
## chhealth pov bmi educ p_raceth male
## chhealth 5159 4645 593 2167 4649 39
## pov 4645 4647 552 2169 4647 39
## bmi 593 552 1012 224 558 8
## educ 2167 2169 224 2169 2169 39
## p_raceth 4649 4647 558 2169 4721 39
## male 39 39 8 39 39 39
mydat2<-eclsk
samp1<-sample(1:dim(mydat2)[1], replace = F, size = 500)
mydat2$bmiknock<-mydat2$bmi
mydat2$bmiknock[samp1]<-NA
head(mydat2[, c("bmiknock","bmi")])
## bmiknock bmi
## 1 16.3250 16.3250
## 2 22.9158 22.9158
## 3 15.1035 15.1035
## 4 19.4687 19.4687
## 5 15.5441 15.5441
## 6 17.1869 17.1869
imp<-mice(data = mydat2[,c("chhealth", "pov", "bmi", "educ", "p_raceth", "male")], seed = 22, m = 5)
##
## iter imp variable
## 1 1 chhealth pov bmi educ p_raceth male
## 1 2 chhealth pov bmi educ p_raceth male
## 1 3 chhealth pov bmi educ p_raceth male
## 1 4 chhealth pov bmi educ p_raceth male
## 1 5 chhealth pov bmi educ p_raceth male
## 2 1 chhealth pov bmi educ p_raceth male
## 2 2 chhealth pov bmi educ p_raceth male
## 2 3 chhealth pov bmi educ p_raceth male
## 2 4 chhealth pov bmi educ p_raceth male
## 2 5 chhealth pov bmi educ p_raceth male
## 3 1 chhealth pov bmi educ p_raceth male
## 3 2 chhealth pov bmi educ p_raceth male
## 3 3 chhealth pov bmi educ p_raceth male
## 3 4 chhealth pov bmi educ p_raceth male
## 3 5 chhealth pov bmi educ p_raceth male
## 4 1 chhealth pov bmi educ p_raceth male
## 4 2 chhealth pov bmi educ p_raceth male
## 4 3 chhealth pov bmi educ p_raceth male
## 4 4 chhealth pov bmi educ p_raceth male
## 4 5 chhealth pov bmi educ p_raceth male
## 5 1 chhealth pov bmi educ p_raceth male
## 5 2 chhealth pov bmi educ p_raceth male
## 5 3 chhealth pov bmi educ p_raceth male
## 5 4 chhealth pov bmi educ p_raceth male
## 5 5 chhealth pov bmi educ p_raceth male
print(imp)
## Class: mids
## Number of multiple imputations: 5
## Imputation methods:
## chhealth pov bmi educ p_raceth male
## "polyreg" "logreg" "pmm" "polyreg" "polyreg" "logreg"
## PredictorMatrix:
## chhealth pov bmi educ p_raceth male
## chhealth 0 1 1 1 1 1
## pov 1 0 1 1 1 1
## bmi 1 1 0 1 1 1
## educ 1 1 1 0 1 1
## p_raceth 1 1 1 1 0 1
## male 1 1 1 1 1 0
head(imp$imp$bmi)
## 1 2 3 4 5
## 33 27.1212 15.9581 14.7976 17.1514 15.9486
## 44 17.0196 23.1611 15.6164 15.5909 15.9396
## 59 18.0612 16.1148 16.4499 15.3452 14.3810
## 74 16.7542 14.4433 14.5133 14.6193 15.9486
## 80 15.2689 17.3479 23.8018 15.2576 15.8123
## 96 15.6164 12.3555 16.4516 21.2501 19.0641
summary(imp$imp$bmi)
## 1 2 3 4
## Min. :11.64 Min. :11.48 Min. :10.75 Min. :12.09
## 1st Qu.:15.04 1st Qu.:14.97 1st Qu.:15.13 1st Qu.:15.12
## Median :15.97 Median :16.13 Median :16.19 Median :16.16
## Mean :16.68 Mean :16.72 Mean :16.72 Mean :16.86
## 3rd Qu.:17.46 3rd Qu.:17.69 3rd Qu.:17.59 3rd Qu.:17.51
## Max. :49.14 Max. :36.91 Max. :30.39 Max. :36.91
## 5
## Min. :10.28
## 1st Qu.:15.11
## Median :16.20
## Mean :16.87
## 3rd Qu.:17.88
## Max. :34.43
summary(eclsk$bmi)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 7.605 15.086 16.046 16.602 17.396 49.136 1012
head(imp$imp$chhealth)
## 1 2 3 4 5
## 9 0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood
## 15 0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood
## 18 1Good 0Exc/VGood 0Exc/VGood 1Good 0Exc/VGood
## 26 1Good 0Exc/VGood 1Good 0Exc/VGood 2Poor
## 31 0Exc/VGood 2Poor 0Exc/VGood 1Good 0Exc/VGood
## 33 0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood 0Exc/VGood
summary(imp$imp$chhealth)
## 1 2 3 4
## 0Exc/VGood:4408 0Exc/VGood:4411 0Exc/VGood:4450 0Exc/VGood:4413
## 1Good : 608 1Good : 624 1Good : 575 1Good : 599
## 2Poor : 143 2Poor : 124 2Poor : 134 2Poor : 147
## 5
## 0Exc/VGood:4398
## 1Good : 627
## 2Poor : 134
library(lattice)
stripplot(imp,bmi~chhealth|.imp, pch=20)
dat.imp<-complete(imp, action = 1)
head(dat.imp, n=10)
## chhealth pov bmi educ p_raceth male
## 1 0Exc/VGood 0 16.3250 3somecol nh white 1
## 2 0Exc/VGood 0 22.9158 3somecol nh white 0
## 3 0Exc/VGood 0 15.1035 4colgrad nh white 1
## 4 0Exc/VGood 0 19.4687 3somecol nh white 1
## 5 0Exc/VGood 0 15.5441 3somecol nh white 0
## 6 0Exc/VGood 0 17.1869 2hsgrad nh white 0
## 7 0Exc/VGood 1 14.2873 3somecol nh white 0
## 8 0Exc/VGood 0 13.0043 4colgrad nh white 0
## 9 0Exc/VGood 1 15.6715 2hsgrad nh white 0
## 10 0Exc/VGood 1 19.5331 0prim hispanic 1
head(eclsk[,c("chhealth", "pov", "bmi", "educ", "p_raceth", "male")], n=10)
## chhealth pov bmi educ p_raceth male
## 1 0Exc/VGood 0 16.3250 3somecol nh white 1
## 2 0Exc/VGood 0 22.9158 3somecol nh white 0
## 3 0Exc/VGood 0 15.1035 4colgrad nh white 1
## 4 0Exc/VGood 0 19.4687 3somecol nh white 1
## 5 0Exc/VGood 0 15.5441 3somecol nh white 0
## 6 0Exc/VGood 0 17.1869 2hsgrad nh white 0
## 7 0Exc/VGood 1 14.2873 3somecol nh white 0
## 8 0Exc/VGood 0 13.0043 4colgrad nh white 0
## 9 <NA> <NA> 15.6715 2hsgrad <NA> 0
## 10 0Exc/VGood 1 19.5331 0prim hispanic 1
head(dat.imp[is.na(eclsk$chhealth)==T,], n=10)
## chhealth pov bmi educ p_raceth male
## 9 0Exc/VGood 1 15.6715 2hsgrad nh white 0
## 15 0Exc/VGood 1 16.6762 3somecol nh white 0
## 18 1Good 0 14.9589 3somecol hispanic 1
## 26 1Good 0 22.4385 3somecol nh black 0
## 31 0Exc/VGood 1 19.4878 1somehs hispanic 1
## 33 0Exc/VGood 0 27.1212 3somecol nh white 1
## 36 0Exc/VGood 0 17.2372 2hsgrad hispanic 0
## 37 0Exc/VGood 0 17.0139 4colgrad nh white 1
## 39 0Exc/VGood 0 16.3011 4colgrad nh white 1
## 42 1Good 1 15.1203 0prim hispanic 1
head(eclsk[is.na(eclsk$chhealth)==T,c("chhealth", "pov", "bmi", "educ", "p_raceth", "male")], n=10)
## chhealth pov bmi educ p_raceth male
## 9 <NA> <NA> 15.6715 2hsgrad <NA> 0
## 15 <NA> <NA> 16.6762 <NA> <NA> 0
## 18 <NA> <NA> 14.9589 <NA> <NA> 1
## 26 <NA> 0 22.4385 3somecol nh black 0
## 31 <NA> <NA> 19.4878 1somehs <NA> 1
## 33 <NA> <NA> NA 3somecol <NA> 1
## 36 <NA> <NA> 17.2372 2hsgrad <NA> 0
## 37 <NA> <NA> 17.0139 <NA> <NA> 1
## 39 <NA> <NA> 16.3011 <NA> <NA> 1
## 42 <NA> <NA> 15.1203 0prim <NA> 1
fit.bmi<-with(data=imp, expr=lm(bmi~chhealth+pov+educ+p_raceth+male))
fit.bmi
## call :
## with.mids(data = imp, expr = lm(bmi ~ chhealth + pov + educ +
## p_raceth + male))
##
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth",
## "male")], m = 5, seed = 22)
##
## nmis :
## chhealth pov bmi educ p_raceth male
## 5159 4647 1012 2169 4721 39
##
## analyses :
## [[1]]
##
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male)
##
## Coefficients:
## (Intercept) chhealth1Good chhealth2Poor
## 16.52217 0.43907 0.26070
## pov1 educ0prim educ1somehs
## 0.05024 0.08673 0.05171
## educ3somecol educ4colgrad p_racethhispanic
## -0.05037 -0.51079 0.50993
## p_racethnh black p_racethnh multirace p_racethnh other
## 0.29632 0.26386 -0.18580
## male1
## 0.10581
##
##
## [[2]]
##
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male)
##
## Coefficients:
## (Intercept) chhealth1Good chhealth2Poor
## 16.50599 0.40703 0.19571
## pov1 educ0prim educ1somehs
## 0.05390 0.16208 0.11886
## educ3somecol educ4colgrad p_racethhispanic
## -0.02307 -0.49654 0.52850
## p_racethnh black p_racethnh multirace p_racethnh other
## 0.26237 0.21127 -0.19320
## male1
## 0.10707
##
##
## [[3]]
##
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male)
##
## Coefficients:
## (Intercept) chhealth1Good chhealth2Poor
## 16.51163 0.48385 0.50604
## pov1 educ0prim educ1somehs
## 0.02082 0.08899 0.08158
## educ3somecol educ4colgrad p_racethhispanic
## -0.07448 -0.48505 0.50952
## p_racethnh black p_racethnh multirace p_racethnh other
## 0.38694 0.35412 -0.19561
## male1
## 0.09874
##
##
## [[4]]
##
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male)
##
## Coefficients:
## (Intercept) chhealth1Good chhealth2Poor
## 16.46350 0.34555 0.27134
## pov1 educ0prim educ1somehs
## 0.12272 0.12871 0.12603
## educ3somecol educ4colgrad p_racethhispanic
## -0.04177 -0.47831 0.61696
## p_racethnh black p_racethnh multirace p_racethnh other
## 0.37603 0.37390 -0.16283
## male1
## 0.10821
##
##
## [[5]]
##
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male)
##
## Coefficients:
## (Intercept) chhealth1Good chhealth2Poor
## 16.49096 0.49237 0.49794
## pov1 educ0prim educ1somehs
## 0.03290 0.10247 0.08047
## educ3somecol educ4colgrad p_racethhispanic
## -0.07251 -0.47109 0.60219
## p_racethnh black p_racethnh multirace p_racethnh other
## 0.31389 0.35839 -0.18624
## male1
## 0.10587
#variation in child bmi
with (data=imp, exp=(sd(bmi)))
## call :
## with.mids(data = imp, expr = (sd(bmi)))
##
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth",
## "male")], m = 5, seed = 22)
##
## nmis :
## chhealth pov bmi educ p_raceth male
## 5159 4647 1012 2169 4721 39
##
## analyses :
## [[1]]
## [1] 2.533415
##
## [[2]]
## [1] 2.529566
##
## [[3]]
## [1] 2.508661
##
## [[4]]
## [1] 2.535824
##
## [[5]]
## [1] 2.537452
with (data=imp, exp=(prop.table(table(chhealth))))
## call :
## with.mids(data = imp, expr = (prop.table(table(chhealth))))
##
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth",
## "male")], m = 5, seed = 22)
##
## nmis :
## chhealth pov bmi educ p_raceth male
## 5159 4647 1012 2169 4721 39
##
## analyses :
## [[1]]
## chhealth
## 0Exc/VGood 1Good 2Poor
## 0.86453175 0.11147794 0.02399032
##
## [[2]]
## chhealth
## 0Exc/VGood 1Good 2Poor
## 0.86469682 0.11235831 0.02294487
##
## [[3]]
## chhealth
## 0Exc/VGood 1Good 2Poor
## 0.8668427 0.1096622 0.0234951
##
## [[4]]
## chhealth
## 0Exc/VGood 1Good 2Poor
## 0.86480687 0.11098272 0.02421041
##
## [[5]]
## chhealth
## 0Exc/VGood 1Good 2Poor
## 0.8639815 0.1125234 0.0234951
with (data=imp, exp=(prop.table(table(pov))))
## call :
## with.mids(data = imp, expr = (prop.table(table(pov))))
##
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth",
## "male")], m = 5, seed = 22)
##
## nmis :
## chhealth pov bmi educ p_raceth male
## 5159 4647 1012 2169 4721 39
##
## analyses :
## [[1]]
## pov
## 0 1
## 0.7374821 0.2625179
##
## [[2]]
## pov
## 0 1
## 0.7353912 0.2646088
##
## [[3]]
## pov
## 0 1
## 0.7362166 0.2637834
##
## [[4]]
## pov
## 0 1
## 0.7342357 0.2657643
##
## [[5]]
## pov
## 0 1
## 0.7309893 0.2690107
with (data=imp, exp=(prop.table(table(educ))))
## call :
## with.mids(data = imp, expr = (prop.table(table(educ))))
##
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth",
## "male")], m = 5, seed = 22)
##
## nmis :
## chhealth pov bmi educ p_raceth male
## 5159 4647 1012 2169 4721 39
##
## analyses :
## [[1]]
## educ
## 2hsgrad 0prim 1somehs 3somecol 4colgrad
## 0.22092000 0.04880599 0.08765269 0.32106306 0.32155827
##
## [[2]]
## educ
## 2hsgrad 0prim 1somehs 3somecol 4colgrad
## 0.22097502 0.05001651 0.08743260 0.32040277 0.32117310
##
## [[3]]
## educ
## 2hsgrad 0prim 1somehs 3somecol 4colgrad
## 0.22207549 0.04853087 0.08787279 0.31957742 0.32194344
##
## [[4]]
## educ
## 2hsgrad 0prim 1somehs 3somecol 4colgrad
## 0.22345108 0.04990646 0.08721250 0.31853197 0.32089799
##
## [[5]]
## educ
## 2hsgrad 0prim 1somehs 3somecol 4colgrad
## 0.2235611 0.0490811 0.0862771 0.3206779 0.3204028
with (data=imp, exp=(prop.table(table(p_raceth))))
## call :
## with.mids(data = imp, expr = (prop.table(table(p_raceth))))
##
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth",
## "male")], m = 5, seed = 22)
##
## nmis :
## chhealth pov bmi educ p_raceth male
## 5159 4647 1012 2169 4721 39
##
## analyses :
## [[1]]
## p_raceth
## nh white hispanic nh black nh multirace nh other
## 0.54143282 0.22191042 0.11654011 0.01760757 0.10250908
##
## [[2]]
## p_raceth
## nh white hispanic nh black nh multirace nh other
## 0.54088258 0.22207549 0.11659514 0.01771762 0.10272917
##
## [[3]]
## p_raceth
## nh white hispanic nh black nh multirace nh other
## 0.54165291 0.22080995 0.11720040 0.01848795 0.10184879
##
## [[4]]
## p_raceth
## nh white hispanic nh black nh multirace nh other
## 0.54220315 0.21998459 0.11665016 0.01788269 0.10327941
##
## [[5]]
## p_raceth
## nh white hispanic nh black nh multirace nh other
## 0.54011225 0.22235061 0.11835589 0.01733245 0.10184879
with (data=imp, exp=(prop.table(table(male))))
## call :
## with.mids(data = imp, expr = (prop.table(table(male))))
##
## call1 :
## mice(data = mydat2[, c("chhealth", "pov", "bmi", "educ", "p_raceth",
## "male")], m = 5, seed = 22)
##
## nmis :
## chhealth pov bmi educ p_raceth male
## 5159 4647 1012 2169 4721 39
##
## analyses :
## [[1]]
## male
## 0 1
## 0.4876197 0.5123803
##
## [[2]]
## male
## 0 1
## 0.4878398 0.5121602
##
## [[3]]
## male
## 0 1
## 0.4878398 0.5121602
##
## [[4]]
## male
## 0 1
## 0.4877847 0.5122153
##
## [[5]]
## male
## 0 1
## 0.4877297 0.5122703
est.p<-pool(fit.bmi)
print(est.p)
## Class: mipo m = 5
## estimate ubar b t
## (Intercept) 16.49884943 0.002603675 5.175423e-04 0.003224726
## chhealth1Good 0.43357335 0.003533433 3.616930e-03 0.007873749
## chhealth2Poor 0.34634736 0.015129599 2.103364e-02 0.040369969
## pov1 0.05611379 0.002321808 1.565009e-03 0.004199819
## educ0prim 0.11379681 0.009360334 1.007308e-03 0.010569103
## educ1somehs 0.09172839 0.005673895 9.358563e-04 0.006796923
## educ3somecol -0.05244043 0.002689841 4.672687e-04 0.003250564
## educ4colgrad -0.48835379 0.002946256 2.450869e-04 0.003240360
## p_racethhispanic 0.55341868 0.002777785 2.713893e-03 0.006034456
## p_racethnh black 0.32711030 0.003776916 2.821755e-03 0.007163022
## p_racethnh multirace 0.31230924 0.019855877 5.055344e-03 0.025922290
## p_racethnh other -0.18473495 0.004015787 1.682867e-04 0.004217731
## male1 0.10513898 0.001365107 1.378274e-05 0.001381646
## dfcom df riv lambda fmi
## (Intercept) 18161 107.05544 0.23852850 0.19259024 0.20726302
## chhealth1Good 18161 13.14254 1.22835662 0.55123880 0.60683861
## chhealth2Poor 18161 10.21722 1.66827745 0.62522638 0.68193627
## pov1 18161 19.96459 0.80885715 0.44716475 0.49531151
## educ0prim 18161 300.10168 0.12913746 0.11436824 0.12021203
## educ1somehs 18161 145.11961 0.19792886 0.16522589 0.17649751
## educ3somecol 18161 133.23355 0.20845932 0.17250007 0.18464832
## educ4colgrad 18161 471.68946 0.09982305 0.09076283 0.09459370
## p_racethhispanic 18161 13.71120 1.17239869 0.53967934 0.59477062
## p_racethnh black 18161 17.86654 0.89652652 0.47272026 0.52325857
## p_racethnh multirace 18161 72.65548 0.30552225 0.23402301 0.25427209
## p_racethnh other 18161 1584.89524 0.05028753 0.04787977 0.04907899
## male1 18161 10921.70461 0.01211575 0.01197071 0.01215159
summary(est.p)
## estimate std.error statistic df
## (Intercept) 16.49884943 0.05678667 290.5408880 107.05544
## chhealth1Good 0.43357335 0.08873415 4.8862064 13.14254
## chhealth2Poor 0.34634736 0.20092279 1.7237833 10.21722
## pov1 0.05611379 0.06480601 0.8658733 19.96459
## educ0prim 0.11379681 0.10280614 1.1069067 300.10168
## educ1somehs 0.09172839 0.08244345 1.1126219 145.11961
## educ3somecol -0.05244043 0.05701372 -0.9197862 133.23355
## educ4colgrad -0.48835379 0.05692416 -8.5790249 471.68946
## p_racethhispanic 0.55341868 0.07768176 7.1241779 13.71120
## p_racethnh black 0.32711030 0.08463464 3.8649697 17.86654
## p_racethnh multirace 0.31230924 0.16100401 1.9397607 72.65548
## p_racethnh other -0.18473495 0.06494406 -2.8445241 1584.89524
## male1 0.10513898 0.03717050 2.8285596 10921.70461
## p.value
## (Intercept) 0.000000e+00
## chhealth1Good 2.881333e-04
## chhealth2Poor 1.148200e-01
## pov1 3.968461e-01
## educ0prim 2.692207e-01
## educ1somehs 2.677105e-01
## educ3somecol 3.593468e-01
## educ4colgrad 0.000000e+00
## p_racethhispanic 5.774833e-06
## p_racethnh black 1.148009e-03
## p_racethnh multirace 5.629094e-02
## p_racethnh other 4.504913e-03
## male1 4.684316e-03
lam<-data.frame(lam=est.p$pooled$lambda, param=row.names(est.p$pooled))
ggplot(data=lam,aes(x=param, y=lam))+geom_col()+theme(axis.text.x = element_text(angle = 45, hjust = 1))
Some child health variables, race ethnicity variables, and poverty have higher variance. Migh explain variation between 5 imputed models.
#Non imputated data
sub<-eclsk%>%
select(chhealth,pov,bmi,educ,p_raceth,male)%>%
filter(complete.cases(.))%>%
as.data.frame()
fit1<-summary(lm(bmi~chhealth+pov+educ+p_raceth+male, sub))
fit1
##
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male, data = sub)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.343 -1.453 -0.497 0.831 32.341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.46814 0.06077 270.980 < 2e-16 ***
## chhealth1Good 0.42143 0.06990 6.029 1.70e-09 ***
## chhealth2Poor 0.30108 0.15001 2.007 0.044759 *
## pov1 0.10468 0.05769 1.815 0.069595 .
## educ0prim 0.10770 0.11448 0.941 0.346802
## educ1somehs 0.03137 0.09213 0.341 0.733446
## educ3somecol -0.07775 0.06193 -1.255 0.209380
## educ4colgrad -0.53332 0.06350 -8.399 < 2e-16 ***
## p_racethhispanic 0.47956 0.06201 7.733 1.13e-14 ***
## p_racethnh black 0.32710 0.07336 4.459 8.32e-06 ***
## p_racethnh multirace 0.23440 0.16396 1.430 0.152846
## p_racethnh other -0.20488 0.07370 -2.780 0.005444 **
## male1 0.15794 0.04291 3.680 0.000234 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.4 on 12517 degrees of freedom
## Multiple R-squared: 0.03528, Adjusted R-squared: 0.03436
## F-statistic: 38.15 on 12 and 12517 DF, p-value: < 2.2e-16
fit.mean.imp<-lm(bmi.imp.mean~chhealth.imp+pov.imp+educ.imp+p_raceth.imp+male.imp, data=eclsk)
summary(fit.mean.imp)
##
## Call:
## lm(formula = bmi.imp.mean ~ chhealth.imp + pov.imp + educ.imp +
## p_raceth.imp + male.imp, data = eclsk)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.375 -1.439 -0.446 0.695 32.309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.62515 0.04931 337.153 < 2e-16 ***
## chhealth.imp1Good 0.31936 0.06833 4.674 2.98e-06 ***
## chhealth.imp2Poor 0.19170 0.14433 1.328 0.184114
## pov.imp1 0.01159 0.05331 0.217 0.827837
## educ.imp0prim 0.22206 0.09945 2.233 0.025565 *
## educ.imp1somehs 0.15482 0.07731 2.002 0.045246 *
## educ.imp3somecol -0.09653 0.05336 -1.809 0.070498 .
## educ.imp4colgrad -0.38783 0.05207 -7.448 9.91e-14 ***
## p_raceth.imphispanic 0.35474 0.05701 6.223 4.99e-10 ***
## p_raceth.impnh black 0.20196 0.06845 2.951 0.003176 **
## p_raceth.impnh multirace 0.14551 0.16028 0.908 0.363956
## p_raceth.impnh other -0.35122 0.06866 -5.116 3.16e-07 ***
## male.imp1 0.11850 0.03593 3.298 0.000976 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.42 on 18161 degrees of freedom
## Multiple R-squared: 0.01932, Adjusted R-squared: 0.01868
## F-statistic: 29.82 on 12 and 18161 DF, p-value: < 2.2e-16
fit.imp<-lm(bmi~chhealth+pov+educ+p_raceth+male, data=dat.imp)
summary(fit.imp)
##
## Call:
## lm(formula = bmi ~ chhealth + pov + educ + p_raceth + male, data = dat.imp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.427 -1.513 -0.520 0.826 32.317
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.52217 0.05112 323.179 < 2e-16 ***
## chhealth1Good 0.43907 0.05959 7.368 1.81e-13 ***
## chhealth2Poor 0.26070 0.12233 2.131 0.03310 *
## pov1 0.05024 0.04833 1.040 0.29855
## educ0prim 0.08673 0.09746 0.890 0.37352
## educ1somehs 0.05171 0.07551 0.685 0.49345
## educ3somecol -0.05037 0.05205 -0.968 0.33320
## educ4colgrad -0.51079 0.05442 -9.386 < 2e-16 ***
## p_racethhispanic 0.50993 0.05283 9.652 < 2e-16 ***
## p_racethnh black 0.29632 0.06166 4.806 1.55e-06 ***
## p_racethnh multirace 0.26386 0.14206 1.857 0.06327 .
## p_racethnh other -0.18580 0.06358 -2.922 0.00348 **
## male1 0.10581 0.03705 2.856 0.00430 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.496 on 18161 degrees of freedom
## Multiple R-squared: 0.03014, Adjusted R-squared: 0.0295
## F-statistic: 47.03 on 12 and 18161 DF, p-value: < 2.2e-16
Across all models, what remained constant was:
-Kids with Good health are more likely to have higher BMIs compared to kids with Excellent or very good health. (Smaller effect in mean/modal imputation) -Kids with parents who graduated college are more likely to have lower BMIs compared to kids with parents who have a high school education. (Smaller effect in mean/modal imputation) -Kids with parents who are Hispanic or Black are more likely to haver higher BMIs thatn kids with parents who are White.(Somewhat smaller effect in mean/modal imputation) -Kids with parents who identify as “Other” are more likely to have lower BMIs than kids with parents who are White. (varying effects across three models) -Boys are likely to have higher BMIs than girls. (Somewhat similar across all models)
Overall, results are most similar between original data and multiple imputation data.