knitr::opts_chunk$set(echo = TRUE)
library(mice)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## Warning: package 'data.table' was built under R version 3.4.4
## VIM is ready to use. 
##  Since version 4.0.0 the GUI is in its own package VIMGUI.
## 
##           Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
library(lattice)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4

Multiple imputation

perform the multiple imputation on first 20 records

d<- read.csv(file="C:/Users/atan/Desktop/All/1 study/1 HU/a.502/diabetes.csv", header=TRUE, sep=",")
summary(d)
##   Pregnancies        Glucose      BloodPressure    SkinThickness  
##  Min.   : 0.000   Min.   :  0.0   Min.   :  0.00   Min.   : 0.00  
##  1st Qu.: 1.000   1st Qu.: 99.0   1st Qu.: 62.00   1st Qu.: 0.00  
##  Median : 3.000   Median :117.0   Median : 72.00   Median :23.00  
##  Mean   : 3.845   Mean   :120.9   Mean   : 69.11   Mean   :20.54  
##  3rd Qu.: 6.000   3rd Qu.:140.2   3rd Qu.: 80.00   3rd Qu.:32.00  
##  Max.   :17.000   Max.   :199.0   Max.   :122.00   Max.   :99.00  
##     Insulin           BMI        DiabetesPedigreeFunction      Age       
##  Min.   :  0.0   Min.   : 0.00   Min.   :0.0780           Min.   :21.00  
##  1st Qu.:  0.0   1st Qu.:27.30   1st Qu.:0.2437           1st Qu.:24.00  
##  Median : 30.5   Median :32.00   Median :0.3725           Median :29.00  
##  Mean   : 79.8   Mean   :31.99   Mean   :0.4719           Mean   :33.24  
##  3rd Qu.:127.2   3rd Qu.:36.60   3rd Qu.:0.6262           3rd Qu.:41.00  
##  Max.   :846.0   Max.   :67.10   Max.   :2.4200           Max.   :81.00  
##     Outcome     
##  Min.   :0.000  
##  1st Qu.:0.000  
##  Median :0.000  
##  Mean   :0.349  
##  3rd Qu.:1.000  
##  Max.   :1.000
cor(d)
##                          Pregnancies    Glucose BloodPressure
## Pregnancies               1.00000000 0.12945867    0.14128198
## Glucose                   0.12945867 1.00000000    0.15258959
## BloodPressure             0.14128198 0.15258959    1.00000000
## SkinThickness            -0.08167177 0.05732789    0.20737054
## Insulin                  -0.07353461 0.33135711    0.08893338
## BMI                       0.01768309 0.22107107    0.28180529
## DiabetesPedigreeFunction -0.03352267 0.13733730    0.04126495
## Age                       0.54434123 0.26351432    0.23952795
## Outcome                   0.22189815 0.46658140    0.06506836
##                          SkinThickness     Insulin        BMI
## Pregnancies                -0.08167177 -0.07353461 0.01768309
## Glucose                     0.05732789  0.33135711 0.22107107
## BloodPressure               0.20737054  0.08893338 0.28180529
## SkinThickness               1.00000000  0.43678257 0.39257320
## Insulin                     0.43678257  1.00000000 0.19785906
## BMI                         0.39257320  0.19785906 1.00000000
## DiabetesPedigreeFunction    0.18392757  0.18507093 0.14064695
## Age                        -0.11397026 -0.04216295 0.03624187
## Outcome                     0.07475223  0.13054795 0.29269466
##                          DiabetesPedigreeFunction         Age    Outcome
## Pregnancies                           -0.03352267  0.54434123 0.22189815
## Glucose                                0.13733730  0.26351432 0.46658140
## BloodPressure                          0.04126495  0.23952795 0.06506836
## SkinThickness                          0.18392757 -0.11397026 0.07475223
## Insulin                                0.18507093 -0.04216295 0.13054795
## BMI                                    0.14064695  0.03624187 0.29269466
## DiabetesPedigreeFunction               1.00000000  0.03356131 0.17384407
## Age                                    0.03356131  1.00000000 0.23835598
## Outcome                                0.17384407  0.23835598 1.00000000
d2<-d[1:20,]
d2$Insulin[d2$Insulin==0]<-NA
d2$SkinThickness[d2$SkinThickness==0]<-NA

Check patterns

md.pattern(d2)
##   Pregnancies Glucose BloodPressure BMI DiabetesPedigreeFunction Age
## 9           1       1             1   1                        1   1
## 2           1       1             1   1                        1   1
## 9           1       1             1   1                        1   1
##             0       0             0   0                        0   0
##   Outcome SkinThickness Insulin   
## 9       1             1       1  0
## 2       1             1       0  1
## 9       1             0       0  2
##         0             9      11 20
p<-md.pairs(d2)

plot

marginplot(d2[c(3,4)],col=c("blue","red", "orange"))

pbox(d2,pos=2)
## Warning in createPlot(main, sub, xlab, ylab, labels, ca$at): not enough
## space to display frequencies

multiple imputation

imp1 <- mice(d2,m=5)
## 
##  iter imp variable
##   1   1  SkinThickness  Insulin
##   1   2  SkinThickness  Insulin
##   1   3  SkinThickness  Insulin
##   1   4  SkinThickness  Insulin
##   1   5  SkinThickness  Insulin
##   2   1  SkinThickness  Insulin
##   2   2  SkinThickness  Insulin
##   2   3  SkinThickness  Insulin
##   2   4  SkinThickness  Insulin
##   2   5  SkinThickness  Insulin
##   3   1  SkinThickness  Insulin
##   3   2  SkinThickness  Insulin
##   3   3  SkinThickness  Insulin
##   3   4  SkinThickness  Insulin
##   3   5  SkinThickness  Insulin
##   4   1  SkinThickness  Insulin
##   4   2  SkinThickness  Insulin
##   4   3  SkinThickness  Insulin
##   4   4  SkinThickness  Insulin
##   4   5  SkinThickness  Insulin
##   5   1  SkinThickness  Insulin
##   5   2  SkinThickness  Insulin
##   5   3  SkinThickness  Insulin
##   5   4  SkinThickness  Insulin
##   5   5  SkinThickness  Insulin
imp1
## Multiply imputed data set
## Call:
## mice(data = d2, m = 5)
## Number of multiple imputations:  5
## Missing cells per column:
##              Pregnancies                  Glucose            BloodPressure 
##                        0                        0                        0 
##            SkinThickness                  Insulin                      BMI 
##                        9                       11                        0 
## DiabetesPedigreeFunction                      Age                  Outcome 
##                        0                        0                        0 
## Imputation methods:
##              Pregnancies                  Glucose            BloodPressure 
##                       ""                       ""                       "" 
##            SkinThickness                  Insulin                      BMI 
##                    "pmm"                    "pmm"                       "" 
## DiabetesPedigreeFunction                      Age                  Outcome 
##                       ""                       ""                       "" 
## VisitSequence:
## SkinThickness       Insulin 
##             4             5 
## PredictorMatrix:
##                          Pregnancies Glucose BloodPressure SkinThickness
## Pregnancies                        0       0             0             0
## Glucose                            0       0             0             0
## BloodPressure                      0       0             0             0
## SkinThickness                      1       1             1             0
## Insulin                            1       1             1             1
## BMI                                0       0             0             0
## DiabetesPedigreeFunction           0       0             0             0
## Age                                0       0             0             0
## Outcome                            0       0             0             0
##                          Insulin BMI DiabetesPedigreeFunction Age Outcome
## Pregnancies                    0   0                        0   0       0
## Glucose                        0   0                        0   0       0
## BloodPressure                  0   0                        0   0       0
## SkinThickness                  1   1                        1   1       1
## Insulin                        0   1                        1   1       1
## BMI                            0   0                        0   0       0
## DiabetesPedigreeFunction       0   0                        0   0       0
## Age                            0   0                        0   0       0
## Outcome                        0   0                        0   0       0
## Random generator seed value:  NA
imp1$imp$Insulin
##      1   2   3   4   5
## 1  175 543 168 846 543
## 2  543 846 168 230 230
## 3   83  83 168  96  94
## 6  168 230  83  96 230
## 8   88  88  88 168  96
## 10 175 543 230 543 230
## 11  96  83 846  96  96
## 12 168  83  83  83 168
## 13 168 230 543 168 230
## 16  94  94 168  94  96
## 18  88  88  94 168 168
imp_tot2<- complete(imp1,"long",inc=TRUE)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.