library(readr)
## Warning: package 'readr' was built under R version 4.0.3
library(treeheatr)
## Warning: package 'treeheatr' was built under R version 4.0.3
library(partykit)
## Warning: package 'partykit' was built under R version 4.0.3
## Loading required package: grid
## Loading required package: libcoin
## Warning: package 'libcoin' was built under R version 4.0.3
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 4.0.3
library(explore)
## Warning: package 'explore' was built under R version 4.0.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(shiny)
## Warning: package 'shiny' was built under R version 4.0.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.5     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.3
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'tidyr' was built under R version 4.0.3
## Warning: package 'purrr' was built under R version 4.0.3
## Warning: package 'stringr' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
diabetes <- read_csv("C:/Users/Nikhil/Desktop/diabetes.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   Pregnancies = col_double(),
##   Glucose = col_double(),
##   BloodPressure = col_double(),
##   SkinThickness = col_double(),
##   Insulin = col_double(),
##   BMI = col_double(),
##   DiabetesPedigreeFunction = col_double(),
##   Age = col_double(),
##   Outcome = col_double()
## )
str(diabetes)
## tibble [768 x 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Pregnancies             : num [1:768] 6 1 8 1 0 5 3 10 2 8 ...
##  $ Glucose                 : num [1:768] 148 85 183 89 137 116 78 115 197 125 ...
##  $ BloodPressure           : num [1:768] 72 66 64 66 40 74 50 0 70 96 ...
##  $ SkinThickness           : num [1:768] 35 29 0 23 35 0 32 0 45 0 ...
##  $ Insulin                 : num [1:768] 0 0 0 94 168 0 88 0 543 0 ...
##  $ BMI                     : num [1:768] 33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
##  $ DiabetesPedigreeFunction: num [1:768] 0.627 0.351 0.672 0.167 2.288 ...
##  $ Age                     : num [1:768] 50 31 32 21 33 30 26 29 53 54 ...
##  $ Outcome                 : num [1:768] 1 0 1 0 1 0 1 0 1 1 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Pregnancies = col_double(),
##   ..   Glucose = col_double(),
##   ..   BloodPressure = col_double(),
##   ..   SkinThickness = col_double(),
##   ..   Insulin = col_double(),
##   ..   BMI = col_double(),
##   ..   DiabetesPedigreeFunction = col_double(),
##   ..   Age = col_double(),
##   ..   Outcome = col_double()
##   .. )
summary(diabetes)
##   Pregnancies        Glucose      BloodPressure    SkinThickness  
##  Min.   : 0.000   Min.   :  0.0   Min.   :  0.00   Min.   : 0.00  
##  1st Qu.: 1.000   1st Qu.: 99.0   1st Qu.: 62.00   1st Qu.: 0.00  
##  Median : 3.000   Median :117.0   Median : 72.00   Median :23.00  
##  Mean   : 3.845   Mean   :120.9   Mean   : 69.11   Mean   :20.54  
##  3rd Qu.: 6.000   3rd Qu.:140.2   3rd Qu.: 80.00   3rd Qu.:32.00  
##  Max.   :17.000   Max.   :199.0   Max.   :122.00   Max.   :99.00  
##     Insulin           BMI        DiabetesPedigreeFunction      Age       
##  Min.   :  0.0   Min.   : 0.00   Min.   :0.0780           Min.   :21.00  
##  1st Qu.:  0.0   1st Qu.:27.30   1st Qu.:0.2437           1st Qu.:24.00  
##  Median : 30.5   Median :32.00   Median :0.3725           Median :29.00  
##  Mean   : 79.8   Mean   :31.99   Mean   :0.4719           Mean   :33.24  
##  3rd Qu.:127.2   3rd Qu.:36.60   3rd Qu.:0.6262           3rd Qu.:41.00  
##  Max.   :846.0   Max.   :67.10   Max.   :2.4200           Max.   :81.00  
##     Outcome     
##  Min.   :0.000  
##  1st Qu.:0.000  
##  Median :0.000  
##  Mean   :0.349  
##  3rd Qu.:1.000  
##  Max.   :1.000
dim(diabetes)
## [1] 768   9
diabetes$Outcome<- as.factor(diabetes$Outcome)
View(diabetes)
diabetes <- na.omit(diabetes)
## # A tibble: 9 x 8
##   variable                 type     na na_pct unique   min   mean    max
##   <chr>                    <chr> <int>  <dbl>  <int> <dbl>  <dbl>  <dbl>
## 1 Pregnancies              dbl       0      0     17  0      3.85  17   
## 2 Glucose                  dbl       0      0    136  0    121.   199   
## 3 BloodPressure            dbl       0      0     47  0     69.1  122   
## 4 SkinThickness            dbl       0      0     51  0     20.5   99   
## 5 Insulin                  dbl       0      0    186  0     79.8  846   
## 6 BMI                      dbl       0      0    248  0     32.0   67.1 
## 7 DiabetesPedigreeFunction dbl       0      0    517  0.08   0.47   2.42
## 8 Age                      dbl       0      0     52 21     33.2   81   
## 9 Outcome                  fct       0      0      2 NA     NA     NA
## # A tibble: 1 x 8
##   variable type     na na_pct unique   min  mean   max
##   <chr>    <chr> <int>  <dbl>  <int> <dbl> <dbl> <dbl>
## 1 Outcome  fct       0      0      2    NA    NA    NA
## # A tibble: 0 x 8
## # ... with 8 variables: variable <chr>, type <chr>, na <int>, na_pct <dbl>,
## #   unique <int>, min <dbl>, mean <dbl>, max <dbl>

References : explore package

https://cran.r-project.org/web/packages/explore/vignettes/explore.html