knitr::opts_chunk$set(echo = TRUE)
library(mice)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## Warning: package 'data.table' was built under R version 3.4.4
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
library(lattice)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
d<- read.csv(file="C:/Users/atan/Desktop/All/1 study/1 HU/a.502/diabetes.csv", header=TRUE, sep=",")
summary(d)
## Pregnancies Glucose BloodPressure SkinThickness
## Min. : 0.000 Min. : 0.0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1.000 1st Qu.: 99.0 1st Qu.: 62.00 1st Qu.: 0.00
## Median : 3.000 Median :117.0 Median : 72.00 Median :23.00
## Mean : 3.845 Mean :120.9 Mean : 69.11 Mean :20.54
## 3rd Qu.: 6.000 3rd Qu.:140.2 3rd Qu.: 80.00 3rd Qu.:32.00
## Max. :17.000 Max. :199.0 Max. :122.00 Max. :99.00
## Insulin BMI DiabetesPedigreeFunction Age
## Min. : 0.0 Min. : 0.00 Min. :0.0780 Min. :21.00
## 1st Qu.: 0.0 1st Qu.:27.30 1st Qu.:0.2437 1st Qu.:24.00
## Median : 30.5 Median :32.00 Median :0.3725 Median :29.00
## Mean : 79.8 Mean :31.99 Mean :0.4719 Mean :33.24
## 3rd Qu.:127.2 3rd Qu.:36.60 3rd Qu.:0.6262 3rd Qu.:41.00
## Max. :846.0 Max. :67.10 Max. :2.4200 Max. :81.00
## Outcome
## Min. :0.000
## 1st Qu.:0.000
## Median :0.000
## Mean :0.349
## 3rd Qu.:1.000
## Max. :1.000
cor(d)
## Pregnancies Glucose BloodPressure
## Pregnancies 1.00000000 0.12945867 0.14128198
## Glucose 0.12945867 1.00000000 0.15258959
## BloodPressure 0.14128198 0.15258959 1.00000000
## SkinThickness -0.08167177 0.05732789 0.20737054
## Insulin -0.07353461 0.33135711 0.08893338
## BMI 0.01768309 0.22107107 0.28180529
## DiabetesPedigreeFunction -0.03352267 0.13733730 0.04126495
## Age 0.54434123 0.26351432 0.23952795
## Outcome 0.22189815 0.46658140 0.06506836
## SkinThickness Insulin BMI
## Pregnancies -0.08167177 -0.07353461 0.01768309
## Glucose 0.05732789 0.33135711 0.22107107
## BloodPressure 0.20737054 0.08893338 0.28180529
## SkinThickness 1.00000000 0.43678257 0.39257320
## Insulin 0.43678257 1.00000000 0.19785906
## BMI 0.39257320 0.19785906 1.00000000
## DiabetesPedigreeFunction 0.18392757 0.18507093 0.14064695
## Age -0.11397026 -0.04216295 0.03624187
## Outcome 0.07475223 0.13054795 0.29269466
## DiabetesPedigreeFunction Age Outcome
## Pregnancies -0.03352267 0.54434123 0.22189815
## Glucose 0.13733730 0.26351432 0.46658140
## BloodPressure 0.04126495 0.23952795 0.06506836
## SkinThickness 0.18392757 -0.11397026 0.07475223
## Insulin 0.18507093 -0.04216295 0.13054795
## BMI 0.14064695 0.03624187 0.29269466
## DiabetesPedigreeFunction 1.00000000 0.03356131 0.17384407
## Age 0.03356131 1.00000000 0.23835598
## Outcome 0.17384407 0.23835598 1.00000000
d2<-d[1:20,]
d2$Insulin[d2$Insulin==0]<-NA
d2$SkinThickness[d2$SkinThickness==0]<-NA
md.pattern(d2)
## Pregnancies Glucose BloodPressure BMI DiabetesPedigreeFunction Age
## 9 1 1 1 1 1 1
## 2 1 1 1 1 1 1
## 9 1 1 1 1 1 1
## 0 0 0 0 0 0
## Outcome SkinThickness Insulin
## 9 1 1 1 0
## 2 1 1 0 1
## 9 1 0 0 2
## 0 9 11 20
p<-md.pairs(d2)
marginplot(d2[c(3,4)],col=c("blue","red", "orange"))
pbox(d2,pos=2)
## Warning in createPlot(main, sub, xlab, ylab, labels, ca$at): not enough
## space to display frequencies
imp1 <- mice(d2,m=5)
##
## iter imp variable
## 1 1 SkinThickness Insulin
## 1 2 SkinThickness Insulin
## 1 3 SkinThickness Insulin
## 1 4 SkinThickness Insulin
## 1 5 SkinThickness Insulin
## 2 1 SkinThickness Insulin
## 2 2 SkinThickness Insulin
## 2 3 SkinThickness Insulin
## 2 4 SkinThickness Insulin
## 2 5 SkinThickness Insulin
## 3 1 SkinThickness Insulin
## 3 2 SkinThickness Insulin
## 3 3 SkinThickness Insulin
## 3 4 SkinThickness Insulin
## 3 5 SkinThickness Insulin
## 4 1 SkinThickness Insulin
## 4 2 SkinThickness Insulin
## 4 3 SkinThickness Insulin
## 4 4 SkinThickness Insulin
## 4 5 SkinThickness Insulin
## 5 1 SkinThickness Insulin
## 5 2 SkinThickness Insulin
## 5 3 SkinThickness Insulin
## 5 4 SkinThickness Insulin
## 5 5 SkinThickness Insulin
imp1
## Multiply imputed data set
## Call:
## mice(data = d2, m = 5)
## Number of multiple imputations: 5
## Missing cells per column:
## Pregnancies Glucose BloodPressure
## 0 0 0
## SkinThickness Insulin BMI
## 9 11 0
## DiabetesPedigreeFunction Age Outcome
## 0 0 0
## Imputation methods:
## Pregnancies Glucose BloodPressure
## "" "" ""
## SkinThickness Insulin BMI
## "pmm" "pmm" ""
## DiabetesPedigreeFunction Age Outcome
## "" "" ""
## VisitSequence:
## SkinThickness Insulin
## 4 5
## PredictorMatrix:
## Pregnancies Glucose BloodPressure SkinThickness
## Pregnancies 0 0 0 0
## Glucose 0 0 0 0
## BloodPressure 0 0 0 0
## SkinThickness 1 1 1 0
## Insulin 1 1 1 1
## BMI 0 0 0 0
## DiabetesPedigreeFunction 0 0 0 0
## Age 0 0 0 0
## Outcome 0 0 0 0
## Insulin BMI DiabetesPedigreeFunction Age Outcome
## Pregnancies 0 0 0 0 0
## Glucose 0 0 0 0 0
## BloodPressure 0 0 0 0 0
## SkinThickness 1 1 1 1 1
## Insulin 0 1 1 1 1
## BMI 0 0 0 0 0
## DiabetesPedigreeFunction 0 0 0 0 0
## Age 0 0 0 0 0
## Outcome 0 0 0 0 0
## Random generator seed value: NA
imp1$imp$Insulin
## 1 2 3 4 5
## 1 175 543 168 846 543
## 2 543 846 168 230 230
## 3 83 83 168 96 94
## 6 168 230 83 96 230
## 8 88 88 88 168 96
## 10 175 543 230 543 230
## 11 96 83 846 96 96
## 12 168 83 83 83 168
## 13 168 230 543 168 230
## 16 94 94 168 94 96
## 18 88 88 94 168 168
imp_tot2<- complete(imp1,"long",inc=TRUE)
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.