Problem Definition :

The objective is to predict whether a patient has diabetes based on diagnostic measurements.

Setup

Adding Libraries

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(corrgram)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:dplyr':
## 
##     combine, src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
library(Deducer)
## Loading required package: JGR
## Loading required package: rJava
## Loading required package: JavaGD
## Loading required package: iplots
## 
## Please type JGR() to launch console. Platform specific launchers (.exe and .app) can also be obtained at http://www.rforge.net/JGR/files/.
## Loading required package: car
## 
## Attaching package: 'car'
## The following object is masked from 'package:rms':
## 
##     vif
## The following object is masked from 'package:dplyr':
## 
##     recode
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## Note Non-JGR console detected:
##  Deducer is best used from within JGR (http://jgr.markushelbig.org/).
##  To Bring up GUI dialogs, type deducer().
library(caret)
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster

Functions to determine outliers and NA vaues

Dataset
Reading the data from a file

dfrDiabModel <- read.csv("./data/diabetes-train.csv", header=T, stringsAsFactors=F)
head(dfrDiabModel)
##   Pregnancies Glucose BloodPressure SkinThickness Insulin  BMI
## 1           6     148            72            35       0 33.6
## 2           1      85            66            29       0 26.6
## 3           8     183            64             0       0 23.3
## 4           1      89            66            23      94 28.1
## 5           0     137            40            35     168 43.1
## 6           5     116            74             0       0 25.6
##   DiabetesPedigreeFunction Age Outcome
## 1                    0.627  50       1
## 2                    0.351  31       0
## 3                    0.672  32       1
## 4                    0.167  21       0
## 5                    2.288  33       1
## 6                    0.201  30       0

Trying to find missing data after imputation

lapply(dfrDiabModel, FUN=detect_na)
## $Pregnancies
## [1] 0
## 
## $Glucose
## [1] 0
## 
## $BloodPressure
## [1] 0
## 
## $SkinThickness
## [1] 0
## 
## $Insulin
## [1] 0
## 
## $BMI
## [1] 0
## 
## $DiabetesPedigreeFunction
## [1] 0
## 
## $Age
## [1] 0
## 
## $Outcome
## [1] 0

Observation:
1. Hence we see that there are no NA values

Trying to find outliers Data after imputation

lapply(dfrDiabModel, FUN=detect_outliers)
## $Pregnancies
## integer(0)
## 
## $Glucose
## integer(0)
## 
## $BloodPressure
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 
## $SkinThickness
## integer(0)
## 
## $Insulin
##  [1] 543 846 495 485 495 478 744 680 545 465 579 474 480 600 540 480
## 
## $BMI
##  [1]  0.0  0.0  0.0  0.0  0.0 67.1  0.0  0.0  0.0  0.0  0.0
## 
## $DiabetesPedigreeFunction
## [1] 2.288 1.893 1.781 2.329 2.137 1.731 2.420 1.699 1.698
## 
## $Age
## [1] 81
## 
## $Outcome
## integer(0)

Observation:
1. Hence we see that there are outliers in our data.
2. For this particular case we will impute the data by replacing the outliers value with the average values

***********************************Imputing Data***********************************************

1. BMI Values

Plotting the Graph

qplot(data = dfrDiabModel, x = dfrDiabModel$BMI) + ylab("Levels of BMI")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Observation:
1. Therefore we see that we have outliers value at 0 and at BMI values greater than 50

Replacing the Outliers Value with the average value

dfrDiabModel$BMI <- ifelse(dfrDiabModel$BMI==0 |dfrDiabModel$BMI> 50,NA,dfrDiabModel$BMI)
head(dfrDiabModel)
##   Pregnancies Glucose BloodPressure SkinThickness Insulin  BMI
## 1           6     148            72            35       0 33.6
## 2           1      85            66            29       0 26.6
## 3           8     183            64             0       0 23.3
## 4           1      89            66            23      94 28.1
## 5           0     137            40            35     168 43.1
## 6           5     116            74             0       0 25.6
##   DiabetesPedigreeFunction Age Outcome
## 1                    0.627  50       1
## 2                    0.351  31       0
## 3                    0.672  32       1
## 4                    0.167  21       0
## 5                    2.288  33       1
## 6                    0.201  30       0
dfrDiabModel$BMI[is.na(dfrDiabModel$BMI)] <- round(mean(dfrDiabModel$BMI[!is.na(dfrDiabModel$BMI)]),digits=0)
dfrDiabModel$BMI <- as.integer(dfrDiabModel$BMI)
detect_na(dfrDiabModel$BMI)
## [1] 0
#head(dfrDiabModel)

2. BloodPressure

Plotting the Graph

qplot(data = dfrDiabModel, x = dfrDiabModel$BloodPressure) + ylab("Levels of BloodPressure")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Observation: 1. Therefore we see that we have outliers values less than 40 and values greater than 110

Replacing the Outliers Value with the average value

dfrDiabModel$BloodPressure <- ifelse(dfrDiabModel$BloodPressure<40 |dfrDiabModel$BloodPressure>110,NA,dfrDiabModel$BloodPressure)
head(dfrDiabModel)
##   Pregnancies Glucose BloodPressure SkinThickness Insulin BMI
## 1           6     148            72            35       0  33
## 2           1      85            66            29       0  26
## 3           8     183            64             0       0  23
## 4           1      89            66            23      94  28
## 5           0     137            40            35     168  43
## 6           5     116            74             0       0  25
##   DiabetesPedigreeFunction Age Outcome
## 1                    0.627  50       1
## 2                    0.351  31       0
## 3                    0.672  32       1
## 4                    0.167  21       0
## 5                    2.288  33       1
## 6                    0.201  30       0
dfrDiabModel$BloodPressure[is.na(dfrDiabModel$BloodPressure)] <- round(mean(dfrDiabModel$BloodPressure[!is.na(dfrDiabModel$BloodPressure)]),digits=0)
dfrDiabModel$BloodPressure <- as.integer(dfrDiabModel$BloodPressure)
detect_na(dfrDiabModel$BloodPressure)
## [1] 0
#head(dfrDiabModel)

3. Insulin

Plotting the Graph

qplot(data = dfrDiabModel, x = dfrDiabModel$Insulin) + ylab("Levels of Insulin")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Observation:
1. Therefore we see that we have outliers values greater than 400

Replacing the Outliers Value with the average value

dfrDiabModel$Insulin <- ifelse(dfrDiabModel$Insulin> 400,NA,dfrDiabModel$Insulin)
head(dfrDiabModel)
##   Pregnancies Glucose BloodPressure SkinThickness Insulin BMI
## 1           6     148            72            35       0  33
## 2           1      85            66            29       0  26
## 3           8     183            64             0       0  23
## 4           1      89            66            23      94  28
## 5           0     137            40            35     168  43
## 6           5     116            74             0       0  25
##   DiabetesPedigreeFunction Age Outcome
## 1                    0.627  50       1
## 2                    0.351  31       0
## 3                    0.672  32       1
## 4                    0.167  21       0
## 5                    2.288  33       1
## 6                    0.201  30       0
dfrDiabModel$Insulin[is.na(dfrDiabModel$Insulin)] <- round(mean(dfrDiabModel$Insulin[!is.na(dfrDiabModel$Insulin)]),digits=0)
dfrDiabModel$Insulin <- as.integer(dfrDiabModel$Insulin)
detect_na(dfrDiabModel$Insulin)
## [1] 0
#head(dfrDiabModel)

4. DiabetesPedigreeFunction

Plotting the Graph

qplot(data = dfrDiabModel, x = dfrDiabModel$DiabetesPedigreeFunction) + ylab("Levels of DiabetesPedigreeFunction")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Observation:
1. Therefore we see that we have outliers values greater than 1.5

Replacing the Outliers Value with the average value

dfrDiabModel$DiabetesPedigreeFunction <- ifelse(dfrDiabModel$DiabetesPedigreeFunction>1.5 ,NA,dfrDiabModel$DiabetesPedigreeFunction)
head(dfrDiabModel)
##   Pregnancies Glucose BloodPressure SkinThickness Insulin BMI
## 1           6     148            72            35       0  33
## 2           1      85            66            29       0  26
## 3           8     183            64             0       0  23
## 4           1      89            66            23      94  28
## 5           0     137            40            35     168  43
## 6           5     116            74             0       0  25
##   DiabetesPedigreeFunction Age Outcome
## 1                    0.627  50       1
## 2                    0.351  31       0
## 3                    0.672  32       1
## 4                    0.167  21       0
## 5                       NA  33       1
## 6                    0.201  30       0

Impute Data

dfrDiabModel$DiabetesPedigreeFunction[is.na(dfrDiabModel$DiabetesPedigreeFunction)] <- mean(dfrDiabModel$DiabetesPedigreeFunction[!is.na(dfrDiabModel$DiabetesPedigreeFunction)])
dfrDiabModel$DiabetesPedigreeFunction <- as.numeric(dfrDiabModel$DiabetesPedigreeFunction)
detect_na(dfrDiabModel$DiabetesPedigreeFunction)
## [1] 0
#head(dfrDiabModel)

Trying to find missing Data

lapply(dfrDiabModel, FUN=detect_na)
## $Pregnancies
## [1] 0
## 
## $Glucose
## [1] 0
## 
## $BloodPressure
## [1] 0
## 
## $SkinThickness
## [1] 0
## 
## $Insulin
## [1] 0
## 
## $BMI
## [1] 0
## 
## $DiabetesPedigreeFunction
## [1] 0
## 
## $Age
## [1] 0
## 
## $Outcome
## [1] 0

Observation:
1. Hence we see that there are no NA values

Trying to find outliers Data

lapply(dfrDiabModel, FUN=detect_outliers)
## $Pregnancies
## integer(0)
## 
## $Glucose
## integer(0)
## 
## $BloodPressure
## integer(0)
## 
## $SkinThickness
## integer(0)
## 
## $Insulin
## integer(0)
## 
## $BMI
## integer(0)
## 
## $DiabetesPedigreeFunction
## numeric(0)
## 
## $Age
## [1] 81
## 
## $Outcome
## integer(0)

Observation:
1. Hence we see that there are no outliers in our data after the imputation operation.

Correlation

vctCorr = numeric(0)
for (i in names(dfrDiabModel)){
    cor.result <- cor(as.numeric(dfrDiabModel$Outcome), as.numeric(dfrDiabModel[,i]))
    vctCorr <- c(vctCorr, cor.result)
}
dfrCorr <- vctCorr
names(dfrCorr) <- names(dfrDiabModel)
dfrCorr
##              Pregnancies                  Glucose            BloodPressure 
##                0.2278834                0.4593730                0.1650356 
##            SkinThickness                  Insulin                      BMI 
##                0.0862186                0.1203176                0.3055303 
## DiabetesPedigreeFunction                      Age                  Outcome 
##                0.1860203                0.2265074                1.0000000

Data For Visualization

dfrDiabGraph <- gather(dfrDiabModel, variable, value, -Outcome)
head(dfrDiabGraph)
##   Outcome    variable value
## 1       1 Pregnancies     6
## 2       0 Pregnancies     1
## 3       1 Pregnancies     8
## 4       0 Pregnancies     1
## 5       1 Pregnancies     0
## 6       0 Pregnancies     5

Data Visualization

ggplot(dfrDiabGraph) +
    geom_jitter(aes(value,Outcome, colour=variable)) + 
    facet_wrap(~variable, scales="free_x") +
    labs(title="Relation Of Outcome With Other Features")

Summary

lapply(dfrDiabModel, FUN=summary)
## $Pregnancies
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.000   3.000   3.827   6.000  17.000 
## 
## $Glucose
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    99.0   116.0   120.5   140.5   199.0 
## 
## $BloodPressure
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   40.00   64.00   72.00   72.38   80.00  110.00 
## 
## $SkinThickness
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   23.00   20.41   32.00   99.00 
## 
## $Insulin
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   37.00   67.24  115.00  375.00 
## 
## $BMI
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   27.00   32.00   31.61   36.00   50.00 
## 
## $DiabetesPedigreeFunction
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0780  0.2400  0.3750  0.4539  0.6125  1.4760 
## 
## $Age
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   21.00   24.00   29.00   33.13   40.00   81.00 
## 
## $Outcome
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.3448  1.0000  1.0000

Find Best Multi Logistic Model
Choose the best logistic model by using step().

stpDiabModel=step(glm(data=dfrDiabModel, formula=Outcome~., family=binomial), trace=0, steps=100)
summary(stpDiabModel)
## 
## Call:
## glm(formula = Outcome ~ Pregnancies + Glucose + BMI + DiabetesPedigreeFunction, 
##     family = binomial, data = dfrDiabModel)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6934  -0.7366  -0.4020   0.7396   2.8487  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -9.041522   0.745678 -12.125  < 2e-16 ***
## Pregnancies               0.140305   0.028466   4.929 8.27e-07 ***
## Glucose                   0.033625   0.003498   9.613  < 2e-16 ***
## BMI                       0.093970   0.016265   5.777 7.59e-09 ***
## DiabetesPedigreeFunction  1.292168   0.336558   3.839 0.000123 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 900.53  on 698  degrees of freedom
## Residual deviance: 661.04  on 694  degrees of freedom
## AIC: 671.04
## 
## Number of Fisher Scoring iterations: 5

Observation:
1.Best results given by Outcome ~ Pregnancies + Glucose + BMI + DiabetesPedigreeFunction

Making the Final Multi Linear Model

# make model
mLRDModel <- glm(data=dfrDiabModel, formula=Outcome~Pregnancies+Glucose+BMI+DiabetesPedigreeFunction, family=binomial(link="logit"))
# print summary
summary(mLRDModel)
## 
## Call:
## glm(formula = Outcome ~ Pregnancies + Glucose + BMI + DiabetesPedigreeFunction, 
##     family = binomial(link = "logit"), data = dfrDiabModel)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6934  -0.7366  -0.4020   0.7396   2.8487  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -9.041522   0.745678 -12.125  < 2e-16 ***
## Pregnancies               0.140305   0.028466   4.929 8.27e-07 ***
## Glucose                   0.033625   0.003498   9.613  < 2e-16 ***
## BMI                       0.093970   0.016265   5.777 7.59e-09 ***
## DiabetesPedigreeFunction  1.292168   0.336558   3.839 0.000123 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 900.53  on 698  degrees of freedom
## Residual deviance: 661.04  on 694  degrees of freedom
## AIC: 671.04
## 
## Number of Fisher Scoring iterations: 5

Checking for R2 value

Diablrm <- lrm(formula =  Outcome ~ Pregnancies + Glucose + BMI + DiabetesPedigreeFunction,data=dfrDiabModel,x=TRUE, y=TRUE)
print(Diablrm)
## Logistic Regression Model
##  
##  lrm(formula = Outcome ~ Pregnancies + Glucose + BMI + DiabetesPedigreeFunction, 
##      data = dfrDiabModel, x = TRUE, y = TRUE)
##  
##                        Model Likelihood     Discrimination    Rank Discrim.    
##                           Ratio Test           Indexes           Indexes       
##  Obs           699    LR chi2     239.49    R2       0.401    C       0.835    
##   0            458    d.f.             4    g        1.776    Dxy     0.670    
##   1            241    Pr(> chi2) <0.0001    gr       5.908    gamma   0.671    
##  max |deriv| 3e-08                          gp       0.298    tau-a   0.303    
##                                             Brier    0.154                     
##  
##                           Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept                -9.0415 0.7457 -12.13 <0.0001 
##  Pregnancies               0.1403 0.0285   4.93 <0.0001 
##  Glucose                   0.0336 0.0035   9.61 <0.0001 
##  BMI                       0.0940 0.0163   5.78 <0.0001 
##  DiabetesPedigreeFunction  1.2922 0.3366   3.84 0.0001  
## 

Observation:
1.We find that R2 value is 0.401 which means this particular model has a moderate strength

Confusion Matrix

PrdProbValue  <- predict(mLRDModel, type='response')
PrdOutLevels  <- ifelse(PrdProbValue > 0.5, 1, 0)
cnfmtrx       <- table(Predicted=PrdOutLevels, ActUAL=dfrDiabModel$Outcome)
confusionMatrix(cnfmtrx)
## Confusion Matrix and Statistics
## 
##          ActUAL
## Predicted   0   1
##         0 403 103
##         1  55 138
##                                           
##                Accuracy : 0.774           
##                  95% CI : (0.7411, 0.8045)
##     No Information Rate : 0.6552          
##     P-Value [Acc > NIR] : 5.519e-12       
##                                           
##                   Kappa : 0.4749          
##  Mcnemar's Test P-Value : 0.0001847       
##                                           
##             Sensitivity : 0.8799          
##             Specificity : 0.5726          
##          Pos Pred Value : 0.7964          
##          Neg Pred Value : 0.7150          
##              Prevalence : 0.6552          
##          Detection Rate : 0.5765          
##    Detection Prevalence : 0.7239          
##       Balanced Accuracy : 0.7263          
##                                           
##        'Positive' Class : 0               
## 

Observation:
1. We find that accuracy of this model comes out to be 0.774

Regression Data

dfrDiabPlot <- mutate(dfrDiabModel, PrdProbValue=PrdProbValue, PrdOutLevels=PrdOutLevels)
head(dfrDiabPlot)
##   Pregnancies Glucose BloodPressure SkinThickness Insulin BMI
## 1           6     148            72            35       0  33
## 2           1      85            66            29       0  26
## 3           8     183            64             0       0  23
## 4           1      89            66            23      94  28
## 5           0     137            40            35     168  43
## 6           5     116            74             0       0  25
##   DiabetesPedigreeFunction Age Outcome PrdProbValue PrdOutLevels
## 1                 0.627000  50       1   0.66551782            1
## 2                 0.351000  31       0   0.04123596            0
## 3                 0.672000  32       1   0.77969800            1
## 4                 0.167000  21       0   0.04471700            0
## 5                 0.453894  33       1   0.54792737            1
## 6                 0.201000  30       0   0.13817377            0

Regression Visulaization

#dfrPlot
ggplot(dfrDiabPlot, aes(x=PrdProbValue, y=PrdOutLevels))  + 
    geom_point(shape=19, colour="blue", fill="blue") +
    geom_smooth(method="gam", formula=y~s(log(x)), se=FALSE) +
    labs(title="Binomial Regression Curve") +
    labs(x="") +
    labs(y="")

ROC Visulaization

#rocplot(logistic.model,diag=TRUE,pred.prob.labels=FALSE,prob.label.digits=3,AUC=TRUE)
rocplot(mLRDModel)

Test Data

dfrDiabTest <- read.csv("./data/diabetes-test.csv", header=T, stringsAsFactors=F)
head(dfrDiabTest)
##   Pregnancies Glucose BloodPressure SkinThickness Insulin  BMI
## 1           4     118            70             0       0 44.5
## 2           2     122            76            27     200 35.9
## 3           6     125            78            31       0 27.6
## 4           1     168            88            29       0 35.0
## 5           2     129             0             0       0 38.5
## 6           4     110            76            20     100 28.4
##   DiabetesPedigreeFunction Age
## 1                    0.904  26
## 2                    0.483  26
## 3                    0.565  49
## 4                    0.905  52
## 5                    0.304  41
## 6                    0.118  27

Predict

resVal <- predict(mLRDModel, dfrDiabTest, type="response")
prdSur <- ifelse(resVal > 0.5, 1, 0)
prdSur <- as.factor(prdSur)
levels(prdSur) <- c("NoDiabetes", "Diabetes")
dfrDiabTest <- mutate(dfrDiabTest, Result=resVal, Outcome=prdSur)
dfrDiabTest
##    Pregnancies Glucose BloodPressure SkinThickness Insulin  BMI
## 1            4     118            70             0       0 44.5
## 2            2     122            76            27     200 35.9
## 3            6     125            78            31       0 27.6
## 4            1     168            88            29       0 35.0
## 5            2     129             0             0       0 38.5
## 6            4     110            76            20     100 28.4
## 7            6      80            80            36       0 39.8
## 8           10     115             0             0       0  0.0
## 9            2     127            46            21     335 34.4
## 10           9     164            78             0       0 32.8
## 11           2      93            64            32     160 38.0
## 12           3     158            64            13     387 31.2
## 13           5     126            78            27      22 29.6
## 14          10     129            62            36       0 41.2
## 15           0     134            58            20     291 26.4
## 16           3     102            74             0       0 29.5
## 17           7     187            50            33     392 33.9
## 18           3     173            78            39     185 33.8
## 19          10      94            72            18       0 23.1
## 20           1     108            60            46     178 35.5
## 21           5      97            76            27       0 35.6
## 22           4      83            86            19       0 29.3
## 23           1     114            66            36     200 38.1
## 24           1     149            68            29     127 29.3
## 25           5     117            86            30     105 39.1
## 26           1     111            94             0       0 32.8
## 27           4     112            78            40       0 39.4
## 28           1     116            78            29     180 36.1
## 29           0     141            84            26       0 32.4
## 30           2     175            88             0       0 22.9
## 31           2      92            52             0       0 30.1
## 32           3     130            78            23      79 28.4
## 33           8     120            86             0       0 28.4
## 34           2     174            88            37     120 44.5
## 35           2     106            56            27     165 29.0
## 36           2     105            75             0       0 23.3
## 37           4      95            60            32       0 35.4
## 38           0     126            86            27     120 27.4
## 39           8      65            72            23       0 32.0
## 40           2      99            60            17     160 36.6
## 41           1     102            74             0       0 39.5
## 42          11     120            80            37     150 42.3
## 43           3     102            44            20      94 30.8
## 44           1     109            58            18     116 28.5
## 45           9     140            94             0       0 32.7
## 46          13     153            88            37     140 40.6
## 47          12     100            84            33     105 30.0
## 48           1     147            94            41       0 49.3
## 49           1      81            74            41      57 46.3
## 50           3     187            70            22     200 36.4
## 51           6     162            62             0       0 24.3
## 52           4     136            70             0       0 31.2
## 53           1     121            78            39      74 39.0
## 54           3     108            62            24       0 26.0
## 55           0     181            88            44     510 43.3
## 56           8     154            78            32       0 32.4
## 57           1     128            88            39     110 36.5
## 58           7     137            90            41       0 32.0
## 59           0     123            72             0       0 36.3
## 60           1     106            76             0       0 37.5
## 61           6     190            92             0       0 35.5
## 62           2      88            58            26      16 28.4
## 63           9     170            74            31       0 44.0
## 64           9      89            62             0       0 22.5
## 65          10     101            76            48     180 32.9
## 66           2     122            70            27       0 36.8
## 67           5     121            72            23     112 26.2
## 68           1     126            60             0       0 30.1
## 69           1      93            70            31       0 30.4
##    DiabetesPedigreeFunction Age     Result    Outcome
## 1                     0.904  26 0.69787807   Diabetes
## 2                     0.483  26 0.34050577 NoDiabetes
## 3                     0.565  49 0.33783836 NoDiabetes
## 4                     0.905  52 0.76961152   Diabetes
## 5                     0.304  41 0.39827848 NoDiabetes
## 6                     0.118  27 0.12342951 NoDiabetes
## 7                     0.177  28 0.17639188 NoDiabetes
## 8                     0.261  30 0.03123857 NoDiabetes
## 9                     0.176  22 0.26297406 NoDiabetes
## 10                    0.148  45 0.73285378   Diabetes
## 11                    0.674  23 0.23289782 NoDiabetes
## 12                    0.295  24 0.50129268   Diabetes
## 13                    0.439  40 0.31984267 NoDiabetes
## 14                    0.441  38 0.75777856   Diabetes
## 15                    0.352  21 0.16796184 NoDiabetes
## 16                    0.121  32 0.09428466 NoDiabetes
## 17                    0.826  34 0.92282539   Diabetes
## 18                    0.970  31 0.83563179   Diabetes
## 19                    0.595  56 0.17679794 NoDiabetes
## 20                    0.415  24 0.19820536 NoDiabetes
## 21                    0.378  52 0.22363330 NoDiabetes
## 22                    0.317  34 0.07402098 NoDiabetes
## 23                    0.289  21 0.24707179 NoDiabetes
## 24                    0.349  42 0.33473911 NoDiabetes
## 25                    0.251  42 0.39955233 NoDiabetes
## 26                    0.265  45 0.14877788 NoDiabetes
## 27                    0.236  38 0.33027291 NoDiabetes
## 28                    0.496  25 0.27537859 NoDiabetes
## 29                    0.433  22 0.33263712 NoDiabetes
## 30                    0.326  22 0.42472516 NoDiabetes
## 31                    0.141  22 0.06557517 NoDiabetes
## 32                    0.323  34 0.23807540 NoDiabetes
## 33                    0.259  22 0.29303589 NoDiabetes
## 34                    0.646  24 0.89150589   Diabetes
## 35                    0.426  22 0.12774058 NoDiabetes
## 36                    0.560  53 0.08970935 NoDiabetes
## 37                    0.284  28 0.16904239 NoDiabetes
## 38                    0.515  21 0.17299519 NoDiabetes
## 39                    0.600  42 0.12443027 NoDiabetes
## 40                    0.453  21 0.19664471 NoDiabetes
## 41                    0.293  42 0.20083845 NoDiabetes
## 42                    0.785  48 0.82143392   Diabetes
## 43                    0.400  26 0.14433641 NoDiabetes
## 44                    0.219  22 0.09321648 NoDiabetes
## 45                    0.734  45 0.72110866   Diabetes
## 46                    1.174  39 0.96300322   Diabetes
## 47                    0.488  46 0.36686147 NoDiabetes
## 48                    0.358  27 0.75711972   Diabetes
## 49                    1.096  32 0.39877391 NoDiabetes
## 50                    0.408  36 0.83409753   Diabetes
## 51                    0.178  50 0.44053443 NoDiabetes
## 52                    1.182  22 0.63457214   Diabetes
## 53                    0.261  28 0.30353413 NoDiabetes
## 54                    0.223  25 0.09468293 NoDiabetes
## 55                    0.222  26 0.80223746   Diabetes
## 56                    0.443  45 0.70603391   Diabetes
## 57                    1.057  37 0.54946023   Diabetes
## 58                    0.391  39 0.51486844   Diabetes
## 59                    0.258  52 0.23845382 NoDiabetes
## 60                    0.197  26 0.17385465 NoDiabetes
## 61                    0.278  66 0.86809089   Diabetes
## 62                    0.766  22 0.10495172 NoDiabetes
## 63                    0.403  43 0.93040106   Diabetes
## 64                    0.142  33 0.07667843 NoDiabetes
## 65                    0.171  63 0.28296664 NoDiabetes
## 66                    0.340  27 0.31837533 NoDiabetes
## 67                    0.245  30 0.18350215 NoDiabetes
## 68                    0.349  47 0.20020349 NoDiabetes
## 69                    0.315  23 0.07513332 NoDiabetes

Observation:
1. When the predicted output was matched with the given output we found that 81 % of data was predicted correctly.