tao subdata
library("foreign", lib.loc="/Library/Frameworks/R.framework/Versions/3.4/Resources/library")
data=read.dta("/Users/phuongthao/Desktop/Thao1.dta")
data1=subset(data, select=c("b6new", "honnhan", "nghe", "thunhapcanhan", "hocvan1", "nhomtuoi", "h1new", "h6new", "sh1new", "b2", "b12", "b15new","b18new", "b5new", "q1", "q2","q3","q4"))
data1$honnhan=as.factor(data1$honnhan)
data1$nghe=as.factor(data1$nghe)
data1$thunhapcanhan=as.factor(data1$thunhapcanhan)
data1$hocvan1=as.factor(data1$hocvan1)
data1$nhomtuoi=as.factor(data1$nhomtuoi)
data1$sh1new=as.factor(data1$sh1new)
data1$b2=as.factor(data1$b2)
data1$b12=as.factor(data1$b12)
data1$sh1new=as.factor(data1$sh1new)
data1$b15new=as.factor(data1$b15new)
data1$b18new=as.factor(data1$b18new)
data1$b5new=as.factor(data1$b5new)
library(VIM);
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
library(mice)
head(data1)
## b6new honnhan nghe thunhapcanhan hocvan1 nhomtuoi h1new h6new sh1new b2
## 1 0 1 2 3 3 1 1 0 3 2
## 2 0 1 1 3 1 3 1 0 1 1
## 3 0 1 1 3 3 2 1 0 3 1
## 4 0 2 1 3 3 1 1 0 2 2
## 5 0 2 1 2 3 1 0 0 3 3
## 6 0 1 1 2 1 4 0 0 3 3
## b12 b15new b18new b5new q1 q2 q3 q4
## 1 3 3 1 2 0 0 0 0
## 2 3 3 1 3 0 0 0 1
## 3 2 1 1 2 0 1 0 1
## 4 2 2 1 1 0 0 1 1
## 5 3 3 1 1 0 0 0 0
## 6 2 2 2 2 0 0 0 1
xem xet bo so lieu
md.pattern(data1)
## h6new h1new sh1new nghe b2 q1 q4 nhomtuoi q2 q3 honnhan thunhapcanhan
## 699 1 1 1 1 1 1 1 1 1 1 1 1
## 27 1 1 1 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1 1 0 1
## 2 1 1 1 0 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1 0
## 8 1 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 0 1 1 1 1
## 1 1 1 1 1 0 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1 1 1 1
## 15 1 1 1 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 0 1 1 1
## 7 1 1 1 1 1 1 1 1 1 0 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 0
## 2 1 1 1 1 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 0
## 1 1 1 1 1 1 1 1 1 1 1 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 0
## 1 1 1 1 1 1 0 0 1 0 0 1 1
## 1 1 1 1 1 0 0 0 1 0 0 1 1
## 1 1 0 0 0 0 0 0 0 0 0 0 0
## 0 1 1 3 3 3 3 4 4 10 12 13
## b5new hocvan1 b15new b12 b18new b6new
## 699 1 1 1 1 1 1 0
## 27 1 1 1 1 1 0 1
## 8 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1
## 8 1 0 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 6 1 1 1 0 1 1 1
## 5 1 1 0 1 1 1 1
## 15 1 1 1 1 0 1 1
## 8 0 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1
## 1 1 1 1 1 1 0 2
## 2 1 0 1 1 1 0 2
## 4 1 1 1 0 1 0 2
## 1 1 1 0 1 1 1 2
## 1 1 1 0 0 1 1 2
## 2 1 1 1 1 0 0 2
## 1 1 0 1 1 0 1 2
## 2 1 1 0 1 0 1 2
## 1 0 1 1 1 1 1 2
## 1 0 1 0 1 1 1 2
## 1 1 1 0 0 1 0 3
## 1 1 1 1 0 0 0 3
## 1 1 1 0 1 0 1 3
## 1 0 0 1 1 1 1 3
## 1 0 0 1 1 1 1 3
## 1 1 1 1 1 1 1 4
## 1 1 1 0 0 0 0 9
## 1 0 0 0 0 0 0 17
## 13 14 14 15 24 40 177
aggr(data1, prop=T, mumbers=T)

impute gia tri missing
mdata=mice(data1, seed=12345, printFlag = F)
mdata$imp$b6new
## 1 2 3 4 5
## 42 0 0 0 0 0
## 61 0 1 0 0 0
## 69 0 1 0 0 0
## 81 0 1 0 0 0
## 96 1 0 0 0 0
## 104 0 0 0 0 0
## 109 1 1 0 1 1
## 127 1 0 0 0 0
## 156 1 0 1 0 1
## 169 0 1 0 0 1
## 184 0 0 0 0 0
## 185 0 1 0 0 1
## 245 1 1 1 0 1
## 267 0 1 0 0 1
## 303 1 0 0 0 1
## 317 0 0 0 1 0
## 390 0 0 0 1 0
## 424 0 0 0 1 0
## 436 1 1 0 1 1
## 462 1 1 1 1 1
## 468 1 0 0 0 0
## 510 0 0 0 0 0
## 525 1 0 0 0 1
## 529 0 0 0 1 0
## 553 0 0 0 0 0
## 615 0 0 0 0 0
## 617 1 1 1 0 1
## 684 1 0 1 0 1
## 689 0 0 1 1 1
## 698 0 1 0 0 0
## 717 0 0 1 0 0
## 722 0 0 1 1 0
## 754 0 1 0 0 0
## 755 0 0 0 0 0
## 774 0 1 1 0 0
## 786 0 0 0 1 1
## 803 0 1 0 1 0
## 808 1 0 0 0 1
## 809 0 0 0 0 0
## 812 0 0 0 0 0
mdata$imp$honnhan
## 1 2 3 4 5
## 90 1 1 1 1 1
## 291 1 1 1 1 1
## 324 1 1 1 1 1
## 350 2 1 2 2 2
## 461 1 1 1 1 1
## 590 1 1 1 1 1
## 660 1 1 2 2 2
## 661 1 1 1 1 1
## 758 1 1 1 1 1
## 783 2 2 2 2 2
## 808 1 2 1 2 1
## 820 1 1 2 1 1
mdata$imp$nghe
## 1 2 3 4 5
## 103 1 1 1 1 1
## 792 2 1 1 1 1
## 808 2 2 1 2 2
mdata$imp$thunhapcanhan
## 1 2 3 4 5
## 799 2 1 2 1 2
## 800 2 2 2 2 2
## 802 1 2 2 2 2
## 806 2 2 3 2 2
## 808 2 2 2 1 3
## 811 2 2 2 1 1
## 812 1 1 2 2 2
## 813 2 2 1 2 1
## 814 1 1 1 2 2
## 815 2 2 2 1 2
## 819 2 2 2 1 2
## 821 3 1 3 1 1
## 823 2 1 2 2 2
mdata$imp$hocvan1
## 1 2 3 4 5
## 61 3 3 3 3 2
## 80 2 1 1 1 2
## 224 3 3 3 3 2
## 235 1 3 3 2 1
## 291 2 3 3 2 1
## 296 1 1 1 1 1
## 410 1 1 3 3 1
## 485 1 2 3 3 3
## 525 2 2 1 2 2
## 534 2 3 2 2 1
## 555 3 3 3 3 3
## 678 2 2 3 2 3
## 808 2 3 1 3 3
## 814 3 3 3 2 2
mdata$imp$nhomtuoi
## 1 2 3 4 5
## 266 3 3 3 3 3
## 515 1 1 1 1 1
## 621 3 3 3 3 3
## 808 3 2 3 1 2
mdata$imp$h1new
## 1 2 3 4 5
## 808 0 1 0 1 0
mdata$imp$h6new
## NULL
mdata$imp$sh1new
## 1 2 3 4 5
## 808 2 2 3 2 2
mdata$imp$b2
## 1 2 3 4 5
## 88 1 1 1 1 2
## 755 1 2 1 2 1
## 808 1 2 2 1 2
mdata$imp$b12
## 1 2 3 4 5
## 96 2 2 3 3 2
## 104 1 2 2 2 2
## 128 3 3 3 2 3
## 170 2 1 2 3 2
## 207 3 3 3 3 2
## 303 3 2 3 3 3
## 341 3 2 3 2 2
## 504 2 2 1 3 2
## 553 1 1 3 2 2
## 606 3 3 3 3 3
## 689 2 2 2 2 2
## 698 2 2 2 2 2
## 755 3 2 2 2 3
## 795 2 2 2 3 3
## 808 1 3 3 3 3
mdata$imp$b15new
## 1 2 3 4 5
## 155 1 1 1 1 1
## 205 2 3 2 1 3
## 210 3 3 2 3 3
## 269 3 3 3 1 3
## 369 1 1 2 1 1
## 377 2 3 3 3 3
## 553 1 1 1 1 2
## 590 1 1 2 1 1
## 709 2 3 3 2 3
## 728 2 1 2 3 2
## 755 3 1 3 3 3
## 795 2 2 2 3 2
## 802 3 3 2 3 3
## 808 1 2 2 3 2
mdata$imp$b18new
## 1 2 3 4 5
## 86 1 1 2 2 2
## 108 1 2 2 2 2
## 127 2 1 2 2 2
## 146 2 1 1 1 2
## 174 1 2 1 1 1
## 211 1 1 1 1 1
## 257 1 1 1 1 1
## 289 1 1 1 1 1
## 303 1 1 1 1 1
## 377 1 1 1 1 1
## 406 2 2 2 2 1
## 501 1 1 1 1 1
## 534 1 1 1 1 1
## 544 1 2 2 1 1
## 569 2 1 2 2 2
## 681 1 1 1 1 1
## 684 1 1 1 1 1
## 728 2 2 1 1 2
## 737 1 2 2 1 1
## 755 1 1 1 1 1
## 764 1 2 2 1 1
## 802 1 2 2 1 1
## 808 2 1 1 1 1
## 817 2 1 1 2 1
mdata$imp$b5new
## 1 2 3 4 5
## 51 2 2 1 2 2
## 72 3 2 2 2 2
## 145 3 3 3 3 3
## 197 1 1 2 2 1
## 250 2 3 1 3 2
## 291 2 1 3 2 2
## 350 1 1 1 1 1
## 369 3 2 2 2 2
## 526 3 3 3 3 3
## 535 2 2 2 3 2
## 566 1 1 1 1 1
## 808 3 2 3 1 2
## 814 1 1 1 1 1
mdata$imp$q1
## 1 2 3 4 5
## 20 0 0 1 0 1
## 755 0 0 0 0 0
## 808 1 0 0 0 0
mdata$imp$q2
## 1 2 3 4 5
## 20 0 0 0 0 0
## 150 0 0 0 0 0
## 755 0 0 0 0 0
## 808 1 0 0 0 0
mdata$imp$q3
## 1 2 3 4 5
## 17 0 1 0 0 0
## 20 0 0 1 1 1
## 33 0 0 1 0 0
## 142 1 0 0 0 0
## 209 1 1 1 0 1
## 270 1 1 1 1 0
## 331 1 0 1 0 1
## 707 0 0 0 0 1
## 755 0 0 0 0 0
## 808 0 1 1 1 1
mdata$imp$q4
## 1 2 3 4 5
## 20 0 1 1 0 0
## 755 0 0 1 1 1
## 808 0 0 0 0 0
tao dataset moi
newdata=complete(mdata, action=5)
head(newdata)
## b6new honnhan nghe thunhapcanhan hocvan1 nhomtuoi h1new h6new sh1new b2
## 1 0 1 2 3 3 1 1 0 3 2
## 2 0 1 1 3 1 3 1 0 1 1
## 3 0 1 1 3 3 2 1 0 3 1
## 4 0 2 1 3 3 1 1 0 2 2
## 5 0 2 1 2 3 1 0 0 3 3
## 6 0 1 1 2 1 4 0 0 3 3
## b12 b15new b18new b5new q1 q2 q3 q4
## 1 3 3 1 2 0 0 0 0
## 2 3 3 1 3 0 0 0 1
## 3 2 1 1 2 0 1 0 1
## 4 2 2 1 1 0 0 1 1
## 5 3 3 1 1 0 0 0 0
## 6 2 2 2 2 0 0 0 1
aggr(newdata, prop=T, mumbers=T)

chay logistic
yvar = newdata[,1]
xvars = newdata[,-1]
library(BMA)
## Loading required package: survival
## Loading required package: leaps
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## Loading required package: inline
## Loading required package: rrcov
## Scalable Robust Estimators with High Breakdown Point (version 1.4-3)
bma.search=bic.glm(xvars, yvar, strict = T,OR=20, glm.family = "binomial")
bma.search
##
## Call:
## bic.glm.data.frame(x = xvars, y = yvar, glm.family = "binomial", strict = T, OR = 20)
##
##
## Posterior probabilities(%):
## honnhan nghe thunhapcanhan hocvan1 nhomtuoi
## 0.0 0.0 0.0 0.0 0.0
## h1new h6new sh1new b2 b12
## 100.0 91.0 0.0 100.0 77.8
## b15new b18new b5new q1 q2
## 22.2 0.0 100.0 0.0 54.8
## q3 q4
## 0.0 0.0
##
## Coefficient posterior expected values:
## (Intercept) honnhan. nghe. thunhapcanhan.
## -2.15180 0.00000 0.00000 0.00000
## thunhapcanhan. hocvan1. hocvan1. nhomtuoi.
## 0.00000 0.00000 0.00000 0.00000
## nhomtuoi. nhomtuoi. h1new h6new
## 0.00000 0.00000 0.83924 0.62911
## sh1new. sh1new. b2. b2.
## 0.00000 0.00000 1.23115 1.17462
## b2. b2. b12. b12.
## 1.97023 0.76201 -0.20209 0.48483
## b15new. b15new. b18new. b5new.
## -0.01166 0.16935 0.00000 -0.54657
## b5new. q1 q2 q3
## -1.08202 0.00000 0.45248 0.00000
## q4
## 0.00000
summary(bma.search)
##
## Call:
## bic.glm.data.frame(x = xvars, y = yvar, glm.family = "binomial", strict = T, OR = 20)
##
##
## 6 models were selected
## Best 5 models (cumulative posterior probability = 0.9659 ):
##
## p!=0 EV SD model 1 model 2
## Intercept 100 -2.15180 0.3711 -2.222e+00 -2.016e+00
## honnhan.x 0.0
## .2 0.00000 0.0000 . .
## nghe.x 0.0
## .2 0.00000 0.0000 . .
## thunhapcanhan.x 0.0
## .2 0.00000 0.0000 . .
## .3 0.00000 0.0000 . .
## hocvan1.x 0.0
## .2 0.00000 0.0000 . .
## .3 0.00000 0.0000 . .
## nhomtuoi.x 0.0
## .2 0.00000 0.0000 . .
## .3 0.00000 0.0000 . .
## .4 0.00000 0.0000 . .
## h1new.x 100.0 0.83924 0.2123 8.137e-01 8.247e-01
## h6new.x 91.0 0.62911 0.2868 7.141e-01 6.875e-01
## sh1new.x 0.0
## .2 0.00000 0.0000 . .
## .3 0.00000 0.0000 . .
## b2.x 100.0
## .2 1.23115 0.2067 1.238e+00 1.236e+00
## .3 1.17462 0.2853 1.165e+00 1.160e+00
## .4 1.97023 0.2966 1.940e+00 2.018e+00
## .5 0.76201 0.5663 7.851e-01 8.459e-01
## b12.x 77.8
## .2 -0.20209 0.3260 -1.830e-01 -3.477e-01
## .3 0.48483 0.3905 7.013e-01 5.376e-01
## b15new.x 22.2
## .2 -0.01166 0.1333 . .
## .3 0.16935 0.3419 . .
## b18new.x 0.0
## .2 0.00000 0.0000 . .
## b5new.x 100.0
## .2 -0.54657 0.2051 -5.662e-01 -5.713e-01
## .3 -1.08202 0.2243 -1.112e+00 -1.078e+00
## q1.x 0.0 0.00000 0.0000 . .
## q2.x 54.8 0.45248 0.4685 8.059e-01 .
## q3.x 0.0 0.00000 0.0000 . .
## q4.x 0.0 0.00000 0.0000 . .
##
## nVar 6 5
## BIC -4.623e+03 -4.622e+03
## post prob 0.385 0.337
## model 3 model 4 model 5
## Intercept -2.301e+00 -2.116e+00 -2.064e+00
## honnhan.x
## .2 . . .
## nghe.x
## .2 . . .
## thunhapcanhan.x
## .2 . . .
## .3 . . .
## hocvan1.x
## .2 . . .
## .3 . . .
## nhomtuoi.x
## .2 . . .
## .3 . . .
## .4 . . .
## h1new.x 8.166e-01 8.251e-01 1.051e+00
## h6new.x 6.583e-01 6.393e-01 .
## sh1new.x
## .2 . . .
## .3 . . .
## b2.x
## .2 1.220e+00 1.215e+00 1.216e+00
## .3 1.211e+00 1.191e+00 1.189e+00
## .4 1.921e+00 2.007e+00 2.003e+00
## .5 5.741e-01 6.444e-01 7.991e-01
## b12.x
## .2 . . -2.582e-01
## .3 . . 5.998e-01
## b15new.x
## .2 -2.941e-02 -1.640e-01 .
## .3 7.922e-01 6.421e-01 .
## b18new.x
## .2 . . .
## b5new.x
## .2 -4.589e-01 -4.651e-01 -5.915e-01
## .3 -1.047e+00 -1.005e+00 -1.081e+00
## q1.x . . .
## q2.x 8.764e-01 . .
## q3.x . . .
## q4.x . . .
##
## nVar 6 5 4
## BIC -4.620e+03 -4.619e+03 -4.619e+03
## post prob 0.129 0.059 0.056
imageplot.bma(bma.search)

#model1
log1<- glm(b6new ~ h1new +h6new+ b2+ b12+ b5new + q2 ,family=binomial(logit),data=newdata)
library ("epiDisplay")
## Loading required package: MASS
## Loading required package: nnet
##
## Attaching package: 'epiDisplay'
## The following objects are masked from 'package:mice':
##
## cc, cci
logistic.display(log1)
##
## Logistic regression predicting b6new
##
## crude OR(95%CI) adj. OR(95%CI) P(Wald's test)
## h1new: 1 vs 0 3.46 (2.46,4.88) 2.26 (1.51,3.36) < 0.001
##
## h6new: 1 vs 0 2.94 (2.05,4.2) 2.04 (1.33,3.13) 0.001
##
## b2: ref.=1
## 2 3.82 (2.63,5.57) 3.45 (2.3,5.18) < 0.001
## 3 3.94 (2.36,6.59) 3.2 (1.83,5.6) < 0.001
## 4 8.2 (4.81,13.97) 6.96 (3.9,12.42) < 0.001
## 5 1.64 (0.58,4.63) 2.19 (0.73,6.57) 0.161
##
## b12: ref.=1
## 2 1.2 (0.68,2.11) 0.83 (0.42,1.64) 0.596
## 3 2.76 (1.61,4.73) 2.02 (1.06,3.83) 0.032
##
## b5new: ref.=1
## 2 0.58 (0.41,0.83) 0.57 (0.38,0.84) 0.005
## 3 0.35 (0.24,0.51) 0.33 (0.21,0.51) < 0.001
##
## q2: 1 vs 0 1.91 (1.17,3.11) 2.24 (1.24,4.06) 0.008
##
## P(LR-test)
## h1new: 1 vs 0 < 0.001
##
## h6new: 1 vs 0 < 0.001
##
## b2: ref.=1 < 0.001
## 2
## 3
## 4
## 5
##
## b12: ref.=1 < 0.001
## 2
## 3
##
## b5new: ref.=1 < 0.001
## 2
## 3
##
## q2: 1 vs 0 0.008
##
## Log-likelihood = -410.7994
## No. of observations = 823
## AIC value = 845.5987
#model2
log2<- glm(b6new ~ h1new +h6new+ b2+ b12+b5new ,family=binomial(logit),data=newdata)
library("epiDisplay")
logistic.display(log2)
##
## Logistic regression predicting b6new
##
## crude OR(95%CI) adj. OR(95%CI) P(Wald's test)
## h1new: 1 vs 0 3.46 (2.46,4.88) 2.28 (1.53,3.4) < 0.001
##
## h6new: 1 vs 0 2.94 (2.05,4.2) 1.99 (1.3,3.03) 0.001
##
## b2: ref.=1
## 2 3.82 (2.63,5.57) 3.44 (2.3,5.16) < 0.001
## 3 3.94 (2.36,6.59) 3.19 (1.83,5.55) < 0.001
## 4 8.2 (4.81,13.97) 7.52 (4.24,13.34) < 0.001
## 5 1.64 (0.58,4.63) 2.33 (0.79,6.9) 0.127
##
## b12: ref.=1
## 2 1.2 (0.68,2.11) 0.71 (0.37,1.36) 0.298
## 3 2.76 (1.61,4.73) 1.71 (0.92,3.18) 0.089
##
## b5new: ref.=1
## 2 0.58 (0.41,0.83) 0.56 (0.38,0.84) 0.004
## 3 0.35 (0.24,0.51) 0.34 (0.22,0.53) < 0.001
##
## P(LR-test)
## h1new: 1 vs 0 < 0.001
##
## h6new: 1 vs 0 0.001
##
## b2: ref.=1 < 0.001
## 2
## 3
## 4
## 5
##
## b12: ref.=1 < 0.001
## 2
## 3
##
## b5new: ref.=1 < 0.001
## 2
## 3
##
## Log-likelihood = -414.2893
## No. of observations = 823
## AIC value = 850.5786