Description

Diabetes is a disease that affects your body’s ability to produce or use insulin. Insulin is a hormone. When your body turns the food you eat into energy (also called sugar or glucose), insulin is released to help transport this energy to the cells. Insulin acts as a “key.” Its chemical message tells the cell to open and receive glucose. If you produce little or no insulin, or are insulin resistant, too much sugar remains in your blood. Blood glucose levels are higher than normal for individuals with diabetes. There are two main types of diabetes: Type 1 and Type 2.

Dataset source

Based on several studies, a commonly used dataset was the Pima Indians Diabetes Dataset from the University of California, Irvine (UCI) Machine Learning Database This dataset describes the medical records for female patients at least 21 years old of Pima Indian heritage

library(tidyverse)
## -- Attaching packages --------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.0     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   0.8.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(DataExplorer)
library(funModeling)        
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## funModeling v.1.8 :)
## Examples and tutorials at livebook.datascienceheroes.com
library(ggplot2)
library(corrplot)
## corrplot 0.84 loaded
library(caret)
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
## The following object is masked from 'package:purrr':
## 
##     lift
library(caretEnsemble)
## 
## Attaching package: 'caretEnsemble'
## The following object is masked from 'package:ggplot2':
## 
##     autoplot
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(Amelia)
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.5, built: 2018-05-07)
## ## Copyright (C) 2005-2019 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(mice)
## 
## Attaching package: 'mice'
## The following object is masked from 'package:tidyr':
## 
##     complete
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:funModeling':
## 
##     range01
## The following object is masked from 'package:dplyr':
## 
##     nasa
library(rpart)
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:psych':
## 
##     outlier
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(dplyr)
library(heplots)
## Loading required package: car
## Loading required package: carData
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(candisc)
## 
## Attaching package: 'candisc'
## The following object is masked from 'package:stats':
## 
##     cancor

##Loading dataset and overview

Fields description follow:

preg = Number of times pregnant

plas = Plasma glucose concentration a 2 hours in an oral glucose tolerance test

pres = Diastolic blood pressure (mm Hg)

skin = Triceps skin fold thickness (mm)

test = 2-Hour serum insulin (mu U/ml)

mass = Body mass index (weight in kg/(height in m)^2)

pedi = Diabetes pedigree function

age = Age (years)

outcome = Class variable (1:tested positive for diabetes, 0: tested negative for diabetes)

##——————–

# loading data (.csv)

diabetes <- read_csv("C:/Users/Joe/Documents/Datasets/diabetes.csv")
## Parsed with column specification:
## cols(
##   Pregnancies = col_double(),
##   Glucose = col_double(),
##   BloodPressure = col_double(),
##   SkinThickness = col_double(),
##   Insulin = col_double(),
##   BMI = col_double(),
##   DiabetesPedigreeFunction = col_double(),
##   Age = col_double(),
##   Outcome = col_double()
## )
diabetes$Outcome <- factor(diabetes$Outcome, levels = c(0,1), labels = c("False", "True"))
dim(diabetes)
## [1] 768   9
summary(diabetes)
##   Pregnancies        Glucose      BloodPressure    SkinThickness  
##  Min.   : 0.000   Min.   :  0.0   Min.   :  0.00   Min.   : 0.00  
##  1st Qu.: 1.000   1st Qu.: 99.0   1st Qu.: 62.00   1st Qu.: 0.00  
##  Median : 3.000   Median :117.0   Median : 72.00   Median :23.00  
##  Mean   : 3.845   Mean   :120.9   Mean   : 69.11   Mean   :20.54  
##  3rd Qu.: 6.000   3rd Qu.:140.2   3rd Qu.: 80.00   3rd Qu.:32.00  
##  Max.   :17.000   Max.   :199.0   Max.   :122.00   Max.   :99.00  
##     Insulin           BMI        DiabetesPedigreeFunction      Age       
##  Min.   :  0.0   Min.   : 0.00   Min.   :0.0780           Min.   :21.00  
##  1st Qu.:  0.0   1st Qu.:27.30   1st Qu.:0.2437           1st Qu.:24.00  
##  Median : 30.5   Median :32.00   Median :0.3725           Median :29.00  
##  Mean   : 79.8   Mean   :31.99   Mean   :0.4719           Mean   :33.24  
##  3rd Qu.:127.2   3rd Qu.:36.60   3rd Qu.:0.6262           3rd Qu.:41.00  
##  Max.   :846.0   Max.   :67.10   Max.   :2.4200           Max.   :81.00  
##   Outcome   
##  False:500  
##  True :268  
##             
##             
##             
## 
table(diabetes$Outcome)
## 
## False  True 
##   500   268
str(diabetes)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 768 obs. of  9 variables:
##  $ Pregnancies             : num  6 1 8 1 0 5 3 10 2 8 ...
##  $ Glucose                 : num  148 85 183 89 137 116 78 115 197 125 ...
##  $ BloodPressure           : num  72 66 64 66 40 74 50 0 70 96 ...
##  $ SkinThickness           : num  35 29 0 23 35 0 32 0 45 0 ...
##  $ Insulin                 : num  0 0 0 94 168 0 88 0 543 0 ...
##  $ BMI                     : num  33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
##  $ DiabetesPedigreeFunction: num  0.627 0.351 0.672 0.167 2.288 ...
##  $ Age                     : num  50 31 32 21 33 30 26 29 53 54 ...
##  $ Outcome                 : Factor w/ 2 levels "False","True": 2 1 2 1 2 1 2 1 2 2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Pregnancies = col_double(),
##   ..   Glucose = col_double(),
##   ..   BloodPressure = col_double(),
##   ..   SkinThickness = col_double(),
##   ..   Insulin = col_double(),
##   ..   BMI = col_double(),
##   ..   DiabetesPedigreeFunction = col_double(),
##   ..   Age = col_double(),
##   ..   Outcome = col_double()
##   .. )
glimpse(diabetes)
## Observations: 768
## Variables: 9
## $ Pregnancies              <dbl> 6, 1, 8, 1, 0, 5, 3, 10, 2, 8, 4, 10,...
## $ Glucose                  <dbl> 148, 85, 183, 89, 137, 116, 78, 115, ...
## $ BloodPressure            <dbl> 72, 66, 64, 66, 40, 74, 50, 0, 70, 96...
## $ SkinThickness            <dbl> 35, 29, 0, 23, 35, 0, 32, 0, 45, 0, 0...
## $ Insulin                  <dbl> 0, 0, 0, 94, 168, 0, 88, 0, 543, 0, 0...
## $ BMI                      <dbl> 33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 3...
## $ DiabetesPedigreeFunction <dbl> 0.627, 0.351, 0.672, 0.167, 2.288, 0....
## $ Age                      <dbl> 50, 31, 32, 21, 33, 30, 26, 29, 53, 5...
## $ Outcome                  <fct> True, False, True, False, True, False...
head(diabetes)
## # A tibble: 6 x 9
##   Pregnancies Glucose BloodPressure SkinThickness Insulin   BMI
##         <dbl>   <dbl>         <dbl>         <dbl>   <dbl> <dbl>
## 1           6     148            72            35       0  33.6
## 2           1      85            66            29       0  26.6
## 3           8     183            64             0       0  23.3
## 4           1      89            66            23      94  28.1
## 5           0     137            40            35     168  43.1
## 6           5     116            74             0       0  25.6
## # ... with 3 more variables: DiabetesPedigreeFunction <dbl>, Age <dbl>,
## #   Outcome <fct>
df_status(diabetes)
##                   variable q_zeros p_zeros q_na p_na q_inf p_inf    type
## 1              Pregnancies     111   14.45    0    0     0     0 numeric
## 2                  Glucose       5    0.65    0    0     0     0 numeric
## 3            BloodPressure      35    4.56    0    0     0     0 numeric
## 4            SkinThickness     227   29.56    0    0     0     0 numeric
## 5                  Insulin     374   48.70    0    0     0     0 numeric
## 6                      BMI      11    1.43    0    0     0     0 numeric
## 7 DiabetesPedigreeFunction       0    0.00    0    0     0     0 numeric
## 8                      Age       0    0.00    0    0     0     0 numeric
## 9                  Outcome       0    0.00    0    0     0     0  factor
##   unique
## 1     17
## 2    136
## 3     47
## 4     51
## 5    186
## 6    248
## 7    517
## 8     52
## 9      2
ggplot(diabetes, aes(Outcome,BMI))+geom_boxplot()

ggplot(diabetes, aes(Pregnancies,Glucose, col=Outcome))+geom_point()

ggplot(diabetes, aes(BMI,Age ,col=Outcome))+geom_point()

  1. Glucose and Outcome Notice zero values in Glucose column
ggplot(diabetes, aes(Glucose))+
       geom_bar(aes(group=Outcome,fill=Outcome)) + facet_wrap(~Outcome) 

2. Blood Pressure and Outcome

—Notice count number for zero values —

ggplot(diabetes, aes(BloodPressure))+
       geom_bar(aes(group=Outcome,fill=Outcome)) + facet_wrap(~Outcome) 

  1. Skin Thickness and Outcome

    Notice very high number of zero values

ggplot(diabetes, aes(SkinThickness))+
       geom_bar(aes(group=Outcome,fill=Outcome)) + facet_wrap(~Outcome) 

4. Insulin and Outcome

Notice very high number of zero values

ggplot(diabetes, aes(Insulin))+
       geom_bar(aes(group=Outcome,fill=Outcome)) + facet_wrap(~Outcome) 

  1. BMI and Outcome Notice very high count of zero values
ggplot(diabetes, aes(BMI))+
       geom_bar(aes(group=Outcome,fill=Outcome)) + facet_wrap(~Outcome) 

  1. Pedigree Function and Outcome
ggplot(diabetes, aes(DiabetesPedigreeFunction))+
       geom_bar(aes(group=Outcome,fill=Outcome)) + facet_wrap(~Outcome) 

  1. Age and Outcome

    Notice the imbalance

ggplot(diabetes, aes(Age))+
       geom_bar(aes(group=Outcome,fill=Outcome)) + facet_wrap(~Outcome) 

Correlations between variables ( excluding column Outcome)

dia_corr <- cor(diabetes[,-9])
corrplot(dia_corr, method="number")

## ## zero is meaningful for number of pregnancies ## Replacing zero values with NA in other variable ( columns)

diabetes[, 2:7][diabetes[, 2:7] == 0] <- NA
plot_missing(diabetes)

library(naniar)
gg_miss_var(diabetes)

sort(sapply(diabetes, function(x) { sum(is.na(x)) }), decreasing=TRUE)
##                  Insulin            SkinThickness            BloodPressure 
##                      374                      227                       35 
##                      BMI                  Glucose              Pregnancies 
##                       11                        5                        0 
## DiabetesPedigreeFunction                      Age                  Outcome 
##                        0                        0                        0

NA values : Insulin column has not number of NA with 48.7%

Skin Thickness zero value = 29,56%

Using MICE package to replace NA values

## Load mice package for Multivariate Imputation by Chained Equations (MICE)
library(mice)
md.pattern(diabetes)

##     Pregnancies DiabetesPedigreeFunction Age Outcome Glucose BMI
## 392           1                        1   1       1       1   1
## 140           1                        1   1       1       1   1
## 192           1                        1   1       1       1   1
## 2             1                        1   1       1       1   1
## 26            1                        1   1       1       1   1
## 1             1                        1   1       1       1   0
## 1             1                        1   1       1       1   0
## 2             1                        1   1       1       1   0
## 7             1                        1   1       1       1   0
## 1             1                        1   1       1       0   1
## 4             1                        1   1       1       0   1
##               0                        0   0       0       5  11
##     BloodPressure SkinThickness Insulin    
## 392             1             1       1   0
## 140             1             1       0   1
## 192             1             0       0   2
## 2               0             1       0   2
## 26              0             0       0   3
## 1               1             1       1   1
## 1               1             1       0   2
## 2               1             0       0   3
## 7               0             0       0   4
## 1               1             1       1   1
## 4               1             1       0   2
##                35           227     374 652
## in number
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
## VIM is ready to use. 
##  Since version 4.0.0 the GUI is in its own package VIMGUI.
## 
##           Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
aggr(diabetes, prop = F, numbers = T)

scattmatrixMiss(diabetes, interactive = F, highlight = c("Insulin"))

Imputing the missing data

The mice() function takes care of the imputing process imputting values for variable —- except number of pregnancies and outcome

creating a separate dataset for outcome

tempData <- mice(diabetes[,-c(1,9)],m=5,maxit=50,meth='pmm',seed=500)
## 
##  iter imp variable
##   1   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   1   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   1   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   1   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   1   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   2   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   2   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   2   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   2   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   2   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   3   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   3   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   3   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   3   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   3   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   4   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   4   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   4   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   4   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   4   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   5   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   5   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   5   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   5   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   5   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   6   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   6   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   6   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   6   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   6   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   7   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   7   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   7   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   7   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   7   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   8   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   8   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   8   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   8   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   8   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   9   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   9   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   9   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   9   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   9   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   10   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   10   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   10   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   10   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   10   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   11   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   11   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   11   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   11   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   11   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   12   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   12   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   12   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   12   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   12   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   13   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   13   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   13   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   13   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   13   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   14   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   14   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   14   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   14   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   14   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   15   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   15   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   15   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   15   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   15   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   16   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   16   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   16   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   16   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   16   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   17   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   17   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   17   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   17   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   17   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   18   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   18   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   18   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   18   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   18   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   19   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   19   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   19   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   19   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   19   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   20   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   20   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   20   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   20   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   20   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   21   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   21   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   21   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   21   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   21   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   22   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   22   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   22   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   22   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   22   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   23   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   23   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   23   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   23   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   23   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   24   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   24   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   24   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   24   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   24   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   25   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   25   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   25   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   25   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   25   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   26   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   26   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   26   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   26   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   26   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   27   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   27   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   27   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   27   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   27   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   28   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   28   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   28   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   28   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   28   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   29   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   29   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   29   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   29   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   29   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   30   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   30   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   30   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   30   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   30   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   31   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   31   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   31   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   31   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   31   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   32   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   32   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   32   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   32   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   32   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   33   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   33   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   33   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   33   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   33   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   34   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   34   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   34   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   34   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   34   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   35   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   35   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   35   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   35   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   35   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   36   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   36   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   36   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   36   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   36   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   37   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   37   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   37   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   37   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   37   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   38   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   38   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   38   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   38   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   38   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   39   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   39   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   39   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   39   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   39   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   40   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   40   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   40   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   40   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   40   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   41   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   41   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   41   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   41   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   41   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   42   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   42   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   42   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   42   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   42   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   43   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   43   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   43   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   43   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   43   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   44   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   44   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   44   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   44   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   44   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   45   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   45   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   45   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   45   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   45   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   46   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   46   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   46   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   46   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   46   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   47   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   47   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   47   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   47   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   47   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   48   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   48   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   48   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   48   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   48   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   49   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   49   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   49   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   49   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   49   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   50   1  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   50   2  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   50   3  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   50   4  Glucose  BloodPressure  SkinThickness  Insulin  BMI
##   50   5  Glucose  BloodPressure  SkinThickness  Insulin  BMI
summary(tempData)
## Class: mids
## Number of multiple imputations:  5 
## Imputation methods:
##                  Glucose            BloodPressure            SkinThickness 
##                    "pmm"                    "pmm"                    "pmm" 
##                  Insulin                      BMI DiabetesPedigreeFunction 
##                    "pmm"                    "pmm"                       "" 
##                      Age 
##                       "" 
## PredictorMatrix:
##                          Glucose BloodPressure SkinThickness Insulin BMI
## Glucose                        0             1             1       1   1
## BloodPressure                  1             0             1       1   1
## SkinThickness                  1             1             0       1   1
## Insulin                        1             1             1       0   1
## BMI                            1             1             1       1   0
## DiabetesPedigreeFunction       1             1             1       1   1
##                          DiabetesPedigreeFunction Age
## Glucose                                         1   1
## BloodPressure                                   1   1
## SkinThickness                                   1   1
## Insulin                                         1   1
## BMI                                             1   1
## DiabetesPedigreeFunction                        0   1
tempData$imp$Insulin
##       1   2   3   4   5
## 1   480 130 237 175 140
## 2    55  42  16  66  23
## 3   579 168 185 192  70
## 6    94  68  79 180  18
## 8    56 326 140 205 215
## 10   63 115 170 220  58
## 11  100  82 120 105  48
## 12  225 168 321 300 680
## 13  220 130 170 284 100
## 16   75 152  94 120  48
## 18   81 192  83 158 258
## 22   88 114 182  44 120
## 23  207 543 510 207 280
## 24   44  82 215 120  63
## 27  130 485 171 328 120
## 30   44 142 182 183 105
## 31   75 106 129 200  96
## 34   40  45  40  55  76
## 35  142  74  94  64 176
## 37  165 140 167 135 128
## 38  258  90 105 120 105
## 39   73 180 135  57  94
## 42  167 204  88 182 165
## 43   76  76  76 110  49
## 45  132 193 155 135 540
## 46  200 846 145 375 744
## 47  144  29  29 193 284
## 48   49  45  66  45  66
## 49  125  94 110 160  87
## 50   65 115 148 160 110
## 56   45  45  45  95  66
## 59  190 328 325 495 140
## 61   60  45  37  54  49
## 62  220 167 480 165 180
## 63   49  15  66  45  49
## 65  119 180  96 190 215
## 66   77  66  65  90  90
## 67   78 165 156 160  82
## 68   44 120  85 105 215
## 73  200  88  88 160 180
## 75   40  55  88  60  66
## 76  110 126 180  84 191
## 77   49  37  15  45  49
## 78  180  90 196  72  48
## 79  250  60 110 167 100
## 80   83 116 325 125 100
## 81   74 182  50  74 105
## 82   55  36  37  55  49
## 84   48  58 116  54  77
## 85  330 240 128 240 155
## 87  160 120 148 165 120
## 90   18 265  71 100  88
## 91   45  66  40  40  45
## 94   83 156 170 480  60
## 97   92  92  90  70  16
## 101 474 321 156  90 300
## 102 171 240 240 310  96
## 103  78 258  25 105 265
## 105  54 116  81  92  87
## 107  76  45  15  45  66
## 114  42  41  74  70  40
## 116  60 167  29 237 480
## 117 105  74 150 335  92
## 118  49  66 140  82  54
## 119  86  54 152  50  90
## 122 175 175  74 145  56
## 124 176 108 170 130 110
## 125 176 130 110  63 191
## 130  77 100 100 130  94
## 132 270 160 122 128 220
## 134  51  36  54  70  84
## 139  92 106  94 291 106
## 141  85 142 112 140  94
## 142 120 148 125 142 278
## 144 196  63 105  75 183
## 146  36  56  87 258  88
## 147  76  76  49  37  15
## 149 144 171 272 135 284
## 150  84  41  50  49  36
## 152 105  68 100 105  64
## 155 249 207 207  14 846
## 156 175 175 168 495 140
## 161 194 160 140 130 140
## 164 152 126  48 210  75
## 165 170 130 285 275  83
## 167 193 135 127 325 165
## 168 130 215 105  89  94
## 169 178 120 125  74 215
## 171  88  54 192 110  66
## 173  84  66  56  70  70
## 177  45  49  43  90  71
## 179 135 545 210 210 130
## 180 150 110 130 130 182
## 181  60  66  36  54  45
## 184  49  95  55  36  66
## 185 480 220 220 135 110
## 186 207 392 392  14 207
## 191 105 265 278 165  92
## 193 440 272 485 168 105
## 194 130 545 140 146 168
## 195  71  37  43 258  82
## 197 192  36  36 100  64
## 201 160 120  71  64  81
## 202 165 165 240 165  88
## 203 114 110 114 175 165
## 206  56  48  83 215  71
## 208 171 237 171 321 144
## 210 192 120 130 200 495
## 211  40  55  49  54  55
## 212 480 228 210 130 193
## 213 114 185 225 130 100
## 219  49  76  53  18  38
## 220 120  77 165 230 106
## 222 100 342 284 255 140
## 223 200 160 148 600 112
## 227  25  50 278 120 210
## 228 168 225 156 185 300
## 231 194 370 130 182 130
## 234 190  92 105  63 148
## 236 225  90 510 120 120
## 238 375 579 185 680 300
## 239 231 272 130  67 140
## 240 115  53  84 105  66
## 241  16  84  64  54  82
## 243  83 220 125 360 360
## 246 185 579 175 130 293
## 247  99 176 170 150 270
## 250 135  50  36 120  90
## 251  63  74 105 115 188
## 252  85 112  78 105 176
## 254 100  85  48  85  71
## 256 140 142  77 180  99
## 257  94  50 115  77 215
## 258  50  71 110 148 196
## 262 285 160 250 165 140
## 263 115  56  72 265  56
## 264 165  96 145 132 110
## 265 140  94 110  61 402
## 267 240 194 250 168 144
## 268 480 182 140 125  29
## 269  48  48  78  78 258
## 270 144 160 110 146 125
## 271 190 200 110  75 130
## 273  63  82 215 285 105
## 275 116  99  89 415 160
## 277  81 116  94 105 100
## 279  57  78 182  63 215
## 281 194 127 126 130 284
## 284 120 130 171 140 284
## 285 258 100 152 200 278
## 295 485 155 440 495 168
## 300 120 125  81 600 145
## 301 225 540 342 465 225
## 304  77 180 415 110 215
## 305  83 275  29 293 110
## 311  37  40 106  85  60
## 315 105  85  63 140 188
## 318 156 135 579 277 185
## 320 225 156  90 207 392
## 322 158 160 148 130 156
## 323 285  85  89 190 326
## 325 142  94 105 130 165
## 328 579 156 185 293 478
## 331 158 110  94 140 105
## 333 375 180 100 846 145
## 334 265  48 115  87  36
## 337 140 105 115 170 180
## 338 110 148 105 165  96
## 340 130 120  90 180 325
## 343  32  54  54 215  66
## 344 180  99 176 106 180
## 345  92 135  99  75 190
## 348  64  57  74  74  79
## 350  54 480 440 250 106
## 351  88  72  56  57  59
## 352 145  63 110 145 275
## 353  66  45  66  45  15
## 355  90 210 180  92  76
## 356 342 193 126 474 168
## 358 130 180 145 275 122
## 362 387 210 140 478 168
## 363  48 160  48 110  90
## 364  96 237 100 185 140
## 367 215 129  63 200 215
## 368  77  32  48  36  49
## 379 168 114 168 321 140
## 382  54  32 105 210  74
## 387  56 188 180  77  85
## 388 115  78  91 130 192
## 392 168 240 321 225 168
## 395 100 272 325 175 495
## 398 150 156 167  88 220
## 399  45  76  36  60  37
## 400 249 159 846 846 846
## 401  57  94 258 116  92
## 402 150 140 330 126 140
## 404  76  37  66  42  37
## 405 474 135 114 510 156
## 407 120 215  74  74 120
## 408  54  66 115 105  66
## 409 543 325 375 274 744
## 411  92 180 196 160 105
## 417  77  77  56  48  75
## 418 318 480 135 130 370
## 419  37  76  45  42  45
## 424 183 142  99  74  78
## 427  88 192  95  54  76
## 431  90 125  50  72  70
## 434 145  74  63 480  94
## 435  46  59  55  90  32
## 436 127 165 110 130 146
## 437 240 360 240 480 125
## 438 228 330 228  96  29
## 439  64  40  70  44  55
## 440 156  91 115 105 160
## 441 180 114 321 495 240
## 444 114 130 120 120 190
## 445 285 326 176  75 110
## 452 335 140 122  88 205
## 454  87 119  72 130 120
## 456 465 680 156 293 185
## 457 167 146  96 105 171
## 462  45  66  49  66  66
## 464  42  42  18  74  49
## 465  50  92  75  83 105
## 469 110 178 170 106 129
## 471 127 293 100 127 293
## 472  60 275  83 130 140
## 473 235 140 140  77 200
## 474 140 140 220 110 220
## 475  99  57  50 110 130
## 476 205 155 275 190  88
## 480 130 152 148 168 167
## 482 120 110 106  99 145
## 485 127 387 245 130 284
## 489  48  43 115  86  84
## 490 293 325 192 274 280
## 492  71  46  44  90  23
## 493 258  25 105 116  90
## 495  60  37  95  60  76
## 496 342 150 271 495 168
## 497 182 152  63 110  54
## 502  46  66  48  43  88
## 503  36 191  89 293 125
## 505  56  56  91 184 210
## 506  66  55  55  55  45
## 510 132 120  89 110 176
## 511  55  64  66  92  64
## 513  49 140  52 120  74
## 514  43  44  65  23 115
## 518 106 166  58 135 122
## 519  76  44  43  58  55
## 523  94 130  94 215  54
## 524 335 135 194  83  60
## 525 220 115 160 200 285
## 526  76  36  23  38  42
## 530 190 114 196  78  91
## 532 188 285  92 190  63
## 534  41  32  85 115  64
## 536 194 205 140  74 130
## 537  66 210  65  94 180
## 538  49  45  49  60  45
## 543  37  50  48 156  88
## 550 255  67 168 478 300
## 551  78 100 129 160  91
## 553 115  57  36 130  94
## 557  54  78 110 160  56
## 558 114 120  91  22  78
## 559 129  53 230 105  99
## 560  76  41  49  82  23
## 561 106 205 204 110 205
## 565 106  76  23  54  82
## 571  95  71  55  18  66
## 572  63 100  74  50 120
## 578 115  79 210 130 140
## 579 205  83 200 240 200
## 580 579 375 207 274 280
## 581 310 370 342 310 130
## 582 100  36 175 175  90
## 583 112 200 142  88  96
## 584  87  57 135  50 120
## 586  52  76 100  74  53
## 587 480 194 127 210 293
## 588 180  77 105  56  59
## 590  45  66  45  66  45
## 591  75 180  83 402 105
## 593  95  61 335 250 160
## 597  60  58  44  56  37
## 599 120 168 194 277 185
## 601  50  92  48  25  59
## 602  52  77 115  57  67
## 603 188 215 270 270 120
## 605 156 140 271 304 180
## 606 275 155 135 200 200
## 614 192 120 100 100  56
## 616  36 105  56 182  88
## 617 110 105  57 116  54
## 619  68  23 135 285  99
## 620 130  74 140 215 190
## 622  23 125  88  52  85
## 623 293 130 465 265 249
## 625  64  68 160 105 196
## 627 106 105 285  94 148
## 628 130 105 156 155 155
## 629  63 155 130 130 122
## 630  82  88  73 115  41
## 631 160 119  74 106 105
## 633  44  79 278  75  94
## 635  77  18  46  36  71
## 636  81 114 100  50 156
## 637 265 120  78 145 192
## 642 210  53 145 155 230
## 643 165 125 480 387 146
## 644 100  49  76  94  54
## 650 114  54 158 105  90
## 654  96 326 105  83 105
## 659 200 105 120 335  96
## 661 100 310 318 321 485
## 662 744  14 744 274 274
## 665  63 285 275  64 207
## 667  29 100 128 125 318
## 668 135 110 130 120  91
## 672  72  86  66  87  76
## 675  71 110  91 148  87
## 676 280 293 145 249 846
## 677 168 146 250 168 168
## 678  78  65 180  36 258
## 679 291 190 110 135 230
## 682 194  90 300 168 474
## 684 155  75  79 600 200
## 685 291 110  77 150 110
## 687 270  63 130 115 120
## 688 175 132  50  74 182
## 691  56 258  36 115  65
## 692 193 193 194 155 100
## 695  46  49  94  58  60
## 698 140  48  48  92  51
## 700 190 110 100 140  95
## 702 145  96  22 480 230
## 703 168 168 231 120 114
## 704 130 128 220 150 360
## 706  49 106  76  82  55
## 707 132 278 176 200 105
## 709 328 144 155 225 175
## 713 182 285 135 140 146
## 715  94 105  71  78  88
## 718  38  52  44  64  90
## 720 115  71  67 110  48
## 721  36  40  55  71  37
## 725 196 100  71  74 325
## 726  56  85 120 106 110
## 728 240 275  61 135 135
## 729 474 370 190 105 237
## 730  88  56 182  64  56
## 732  74  18 105  44 115
## 735  70 265  56 110  68
## 736 152 120  57 115 152
## 738  49  66  49  36  66
## 740  81 112 148 188 105
## 744 166  88 135 146 130
## 747 342 180 284 194 293
## 750 210 180 245 240 125
## 751 167 335 318 130 100
## 753  68  90 120 120  79
## 755 126 310 126 125 144
## 757  63 230 480 167  88
## 758  83 220 335 165 330
## 759  94 120 110  78 116
## 760 159 130 130 744 249
## 762 277 495 680 192  70
## 763  70  71  23 115  60
## 765 402 152 152 115  99
## 767 130 130  88 165 100
## 768  16  82 265  92 100
tempData$method
##                  Glucose            BloodPressure            SkinThickness 
##                    "pmm"                    "pmm"                    "pmm" 
##                  Insulin                      BMI DiabetesPedigreeFunction 
##                    "pmm"                    "pmm"                       "" 
##                      Age 
##                       ""
completedData <- complete(tempData,1)
Outcome <- diabetes$Outcome 

##The missing values have been replaced with the imputed values in the first of the five datasets.

magenta points (imputed) - blue ones (observed)

xyplot(Insulin~ BMI + BloodPressure + Glucose,
       data = completedData, scales = "free", layout = c(2, 2),
       auto.key = list(x = .6, y = .7, corner = c(0, 0)))

xyplot(Glucose ~Insulin+BMI+BloodPressure,data=completedData,pch=18,cex=.75,
       auto.key = list(x = .6, y = .7, corner = c(-0.5, 4)))

xyplot(data=completedData,SkinThickness ~Glucose+BMI+BloodPressure,pch=18,cex=.75)

xyplot(data=completedData,BloodPressure ~Glucose+BMI+Insulin,pch=18,cex=0.75)

##—————- The density of the imputed data for each imputed dataset is showed in magenta while the density of the observed data is showed in blue. Again, under our previous assumptions we expect the distributions to be similar. ##—————

densityplot(completedData$Glucose)

modelFit1 <- with(completedData,lm(Insulin~ Glucose+BMI+Age+BloodPressure))
summary(modelFit1)
## 
## Call:
## lm(formula = Insulin ~ Glucose + BMI + Age + BloodPressure)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -319.39  -49.07  -13.86   31.44  531.75 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -117.51914   23.08886  -5.090 4.52e-07 ***
## Glucose          2.27724    0.11366  20.035  < 2e-16 ***
## BMI              2.09257    0.50304   4.160 3.55e-05 ***
## Age              0.06167    0.29996   0.206 0.837171    
## BloodPressure   -1.08759    0.29402  -3.699 0.000232 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 89.69 on 763 degrees of freedom
## Multiple R-squared:  0.3928, Adjusted R-squared:  0.3896 
## F-statistic: 123.4 on 4 and 763 DF,  p-value: < 2.2e-16

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

# compare classical and robust covariance estimates
covEllipses(completedData[,2:7], Outcome)

covEllipses(completedData[,2:7], Outcome, fill=TRUE, pooled=FALSE, 
    col=c("blue", "red", "darkgreen"), variables=1:5)

diab.boxm <- boxM(completedData[,2:7], Outcome)
plot(diab.boxm, cex.lab=1.5)

Preparing dataset for regression analysis

First : combine the completedData set with outcome

complete <- completedData
complete$Outcome <- Outcome
summary(complete)
##     Glucose      BloodPressure    SkinThickness      Insulin     
##  Min.   : 44.0   Min.   : 24.00   Min.   : 7.00   Min.   : 14.0  
##  1st Qu.: 99.0   1st Qu.: 64.00   1st Qu.:20.00   1st Qu.: 74.0  
##  Median :117.0   Median : 72.00   Median :29.00   Median :120.0  
##  Mean   :121.5   Mean   : 72.35   Mean   :28.77   Mean   :150.4  
##  3rd Qu.:140.2   3rd Qu.: 80.00   3rd Qu.:36.00   3rd Qu.:188.5  
##  Max.   :199.0   Max.   :122.00   Max.   :99.00   Max.   :846.0  
##       BMI        DiabetesPedigreeFunction      Age         Outcome   
##  Min.   :18.20   Min.   :0.0780           Min.   :21.00   False:500  
##  1st Qu.:27.50   1st Qu.:0.2437           1st Qu.:24.00   True :268  
##  Median :32.05   Median :0.3725           Median :29.00              
##  Mean   :32.42   Mean   :0.4719           Mean   :33.24              
##  3rd Qu.:36.60   3rd Qu.:0.6262           3rd Qu.:41.00              
##  Max.   :67.10   Max.   :2.4200           Max.   :81.00

For convenience sake we will use a 70/30 split, using 70% of the data as the training set and the remaining 30% for the testing set.

set.seed(3456)
index <- createDataPartition(complete$Outcome, p = .7, 
                                  list = FALSE, 
                                  times = 1)
train <- complete[ index,]
test  <- complete[-index,]
head(train)
##   Glucose BloodPressure SkinThickness Insulin  BMI
## 3     183            64            14     579 23.3
## 4      89            66            23      94 28.1
## 6     116            74            19      94 25.6
## 7      78            50            32      88 31.0
## 8     115            70            45      56 35.3
## 9     197            70            45     543 30.5
##   DiabetesPedigreeFunction Age Outcome
## 3                    0.672  32    True
## 4                    0.167  21   False
## 6                    0.201  30   False
## 7                    0.248  26    True
## 8                    0.134  29   False
## 9                    0.158  53    True
head(test)
##    Glucose BloodPressure SkinThickness Insulin  BMI
## 1      148            72            35     480 33.6
## 2       85            66            29      55 26.6
## 5      137            40            35     168 43.1
## 11     110            92            36     100 37.6
## 13     139            80            22     220 27.1
## 16     100            80            24      75 30.0
##    DiabetesPedigreeFunction Age Outcome
## 1                     0.627  50    True
## 2                     0.351  31   False
## 5                     2.288  33    True
## 11                    0.191  30   False
## 13                    1.441  57   False
## 16                    0.484  32    True

Normalize the data. Categorical variables are automatically ignored

prep <- train %>% 
  preProcess(method = c("center", "scale"))
# Transform the data using the estimated parameters
train_prep <- prep %>% predict(train)
test_prep <- prep %>% predict(test)

LDA

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
model_lda <- lda(Outcome~., data = train)
model_lda
## Call:
## lda(Outcome ~ ., data = train)
## 
## Prior probabilities of groups:
##     False      True 
## 0.6505576 0.3494424 
## 
## Group means:
##        Glucose BloodPressure SkinThickness  Insulin      BMI
## False 109.7286      71.21429      27.28571 123.7686 30.88400
## True  142.8883      75.15426      32.15426 197.5532 35.28191
##       DiabetesPedigreeFunction      Age
## False                0.4349457 30.67714
## True                 0.5311277 36.20745
## 
## Coefficients of linear discriminants:
##                                   LD1
## Glucose                   0.034099001
## BloodPressure            -0.010371262
## SkinThickness            -0.005636507
## Insulin                  -0.001693631
## BMI                       0.072542298
## DiabetesPedigreeFunction  0.437085722
## Age                       0.024375131
plot(model_lda)

library(klaR)
partimat(Outcome ~ Glucose+Insulin+BloodPressure+BMI, data=train, method="lda")

Prediction

predictions <- model_lda %>% predict(test_prep)
summary(predictions)
##           Length Class  Mode   
## class     230    factor numeric
## posterior 460    -none- numeric
## x         230    -none- numeric
fitControl <- trainControl(## 10-fold CV
                           method = "repeatedcv",
                           number = 10,
                           ## repeated ten times
                           repeats = 10)

set.seed(825)
gbm_fit <- train(Outcome ~ ., data = train, 
                 method = "gbm", 
                 trControl = fitControl,
                 ## This last option is actually one
                 ## for gbm() that passes through
                 verbose = FALSE)
gbm_fit
## Stochastic Gradient Boosting 
## 
## 538 samples
##   7 predictor
##   2 classes: 'False', 'True' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 10 times) 
## Summary of sample sizes: 484, 484, 484, 485, 484, 484, ... 
## Resampling results across tuning parameters:
## 
##   interaction.depth  n.trees  Accuracy   Kappa    
##   1                   50      0.7753040  0.4803382
##   1                  100      0.7717890  0.4772438
##   1                  150      0.7646995  0.4633156
##   2                   50      0.7650908  0.4639867
##   2                  100      0.7578477  0.4509521
##   2                  150      0.7474249  0.4277235
##   3                   50      0.7661740  0.4671532
##   3                  100      0.7548358  0.4445688
##   3                  150      0.7477813  0.4302643
## 
## Tuning parameter 'shrinkage' was held constant at a value of 0.1
## 
## Tuning parameter 'n.minobsinnode' was held constant at a value of 10
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were n.trees = 50, interaction.depth
##  = 1, shrinkage = 0.1 and n.minobsinnode = 10.
# predict the outcome on a test set
gbm_pred <- predict(gbm_fit, test)
# compare predicted outcome and true outcome
confusionMatrix(gbm_pred, test$Outcome)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction False True
##      False   131   38
##      True     19   42
##                                           
##                Accuracy : 0.7522          
##                  95% CI : (0.6912, 0.8066)
##     No Information Rate : 0.6522          
##     P-Value [Acc > NIR] : 0.0007075       
##                                           
##                   Kappa : 0.4217          
##                                           
##  Mcnemar's Test P-Value : 0.0171182       
##                                           
##             Sensitivity : 0.8733          
##             Specificity : 0.5250          
##          Pos Pred Value : 0.7751          
##          Neg Pred Value : 0.6885          
##              Prevalence : 0.6522          
##          Detection Rate : 0.5696          
##    Detection Prevalence : 0.7348          
##       Balanced Accuracy : 0.6992          
##                                           
##        'Positive' Class : False           
## 

Random Forest

# repeating fitcontrol function
library(naivebayes)
## naivebayes 0.9.6 loaded
## 
## Attaching package: 'naivebayes'
## The following object is masked from 'package:data.table':
## 
##     tables
fitControl <- trainControl(## 10-fold CV
                           method = "repeatedcv",
                           number = 10,
                           ## repeated ten times
                           repeats = 10)

# fit a random forest model 
library(randomForest)
rf_fit <- train(Outcome ~ ., 
                data = train, 
                method = "rf",
                trControl = fitControl,
                       verbose = FALSE)
rf_fit
## Random Forest 
## 
## 538 samples
##   7 predictor
##   2 classes: 'False', 'True' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 10 times) 
## Summary of sample sizes: 484, 484, 484, 484, 485, 484, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##   2     0.7451188  0.4216633
##   4     0.7460657  0.4265525
##   7     0.7442278  0.4244386
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 4.
# predict the outcome on a test set
rf_pred <- predict(rf_fit, test)
# compare predicted outcome and true outcome
confusionMatrix(rf_pred, test$Outcome)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction False True
##      False   125   37
##      True     25   43
##                                           
##                Accuracy : 0.7304          
##                  95% CI : (0.6682, 0.7866)
##     No Information Rate : 0.6522          
##     P-Value [Acc > NIR] : 0.006878        
##                                           
##                   Kappa : 0.3843          
##                                           
##  Mcnemar's Test P-Value : 0.162413        
##                                           
##             Sensitivity : 0.8333          
##             Specificity : 0.5375          
##          Pos Pred Value : 0.7716          
##          Neg Pred Value : 0.6324          
##              Prevalence : 0.6522          
##          Detection Rate : 0.5435          
##    Detection Prevalence : 0.7043          
##       Balanced Accuracy : 0.6854          
##                                           
##        'Positive' Class : False           
## 

Bootstrap Bootstrap resampling involves taking random samples from the dataset (with re-selection) against which to evaluate the model. In aggregate, the results provide an indication of the variance of the models performance. Typically, large number of resampling iterations are performed (thousands or tends of thousands). ##LDA

The following example uses a bootstrap with 100 resamples to prepare a Linear Discrinimant Analysis model.

library(MASS)
fitControl <- trainControl(method = "boot", number = 100)
set.seed(825)

# train the model
lda_fit <- train(Outcome ~ ., data = train, 
                 method = "lda", 
                 trControl = fitControl,
                 verbose = FALSE)

lda_fit
## Linear Discriminant Analysis 
## 
## 538 samples
##   7 predictor
##   2 classes: 'False', 'True' 
## 
## No pre-processing
## Resampling: Bootstrapped (100 reps) 
## Summary of sample sizes: 538, 538, 538, 538, 538, 538, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.7701148  0.4704893
# predict the outcome on a test set
lda_pred <- predict(lda_fit, test)
# compare predicted outcome and true outcome
confusionMatrix(lda_pred, test$Outcome)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction False True
##      False   127   36
##      True     23   44
##                                           
##                Accuracy : 0.7435          
##                  95% CI : (0.6819, 0.7986)
##     No Information Rate : 0.6522          
##     P-Value [Acc > NIR] : 0.001872        
##                                           
##                   Kappa : 0.4123          
##                                           
##  Mcnemar's Test P-Value : 0.118225        
##                                           
##             Sensitivity : 0.8467          
##             Specificity : 0.5500          
##          Pos Pred Value : 0.7791          
##          Neg Pred Value : 0.6567          
##              Prevalence : 0.6522          
##          Detection Rate : 0.5522          
##    Detection Prevalence : 0.7087          
##       Balanced Accuracy : 0.6983          
##                                           
##        'Positive' Class : False           
##