R Markdown

This R markdown file consists of Data visualization using various plots on Diabetes data.

getwd()
## [1] "D:/KRUTHI/DV-LAB"
setwd("D:/KRUTHI/DV-LAB")
data=read.csv("diabetes.csv")
summary(data)
##   Pregnancies        Glucose      BloodPressure    SkinThickness  
##  Min.   : 0.000   Min.   :  0.0   Min.   :  0.00   Min.   : 0.00  
##  1st Qu.: 1.000   1st Qu.: 99.0   1st Qu.: 62.00   1st Qu.: 0.00  
##  Median : 3.000   Median :117.0   Median : 72.00   Median :23.00  
##  Mean   : 3.845   Mean   :120.9   Mean   : 69.11   Mean   :20.54  
##  3rd Qu.: 6.000   3rd Qu.:140.2   3rd Qu.: 80.00   3rd Qu.:32.00  
##  Max.   :17.000   Max.   :199.0   Max.   :122.00   Max.   :99.00  
##     Insulin           BMI        DiabetesPedigreeFunction      Age       
##  Min.   :  0.0   Min.   : 0.00   Min.   :0.0780           Min.   :21.00  
##  1st Qu.:  0.0   1st Qu.:27.30   1st Qu.:0.2437           1st Qu.:24.00  
##  Median : 30.5   Median :32.00   Median :0.3725           Median :29.00  
##  Mean   : 79.8   Mean   :31.99   Mean   :0.4719           Mean   :33.24  
##  3rd Qu.:127.2   3rd Qu.:36.60   3rd Qu.:0.6262           3rd Qu.:41.00  
##  Max.   :846.0   Max.   :67.10   Max.   :2.4200           Max.   :81.00  
##     Outcome     
##  Min.   :0.000  
##  1st Qu.:0.000  
##  Median :0.000  
##  Mean   :0.349  
##  3rd Qu.:1.000  
##  Max.   :1.000
str(data)
## 'data.frame':    768 obs. of  9 variables:
##  $ Pregnancies             : int  6 1 8 1 0 5 3 10 2 8 ...
##  $ Glucose                 : int  148 85 183 89 137 116 78 115 197 125 ...
##  $ BloodPressure           : int  72 66 64 66 40 74 50 0 70 96 ...
##  $ SkinThickness           : int  35 29 0 23 35 0 32 0 45 0 ...
##  $ Insulin                 : int  0 0 0 94 168 0 88 0 543 0 ...
##  $ BMI                     : num  33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
##  $ DiabetesPedigreeFunction: num  0.627 0.351 0.672 0.167 2.288 ...
##  $ Age                     : int  50 31 32 21 33 30 26 29 53 54 ...
##  $ Outcome                 : int  1 0 1 0 1 0 1 0 1 1 ...

#Plots

library(ggplot2)
ggplot(data = data, aes(x = Glucose , y = BloodPressure, col = BloodPressure))+labs(title = "Glucose vs BloodPressure Plot")

ggplot(data = data, aes(x = Glucose , y = BloodPressure, col = Age)) +
  geom_point() +
  labs(title = "Glucose vs BloodPressure Plot", x = "Glucose", y = "BloodPressure")

ggplot(data = data, aes(x = Pregnancies , y = BloodPressure, size = Age)) +
  geom_point() +
  labs(title = "Pregnancies vs BloodPressure ", x = "Pregnancies", y = "BloodPressure")

ggplot(data, aes(x = DiabetesPedigreeFunction)) +
  geom_histogram(fill = "yellow", color = "black", bins = 20) +
  labs(title = "Histogram", x = "DiabetesPedigreeFunction", y = "total")

ggplot(data, aes(x = Pregnancies, y = SkinThickness)) +
  geom_bar(stat = "identity", fill = "pink") +
  labs(title = "Diabetes Plot", x = "Pregnancies", y = "SkinThickness")

ggplot(data = data, aes(x=as.factor(Pregnancies), fill=Pregnancies)) + 
       geom_bar(stat="count")

ggplot(data, aes(x = BMI)) +
  geom_boxplot(fill = "lightblue",col="black") +
  labs(title = "Box Plot", x = "Body Mass Index")

Length <- table(data$Outcome)
data.labels <- c("No Diabetes", "Diabetes")
share <- round(Length / sum(Length) * 100)
data.labels <- paste(data.labels, share)
data.labels <- paste(data.labels, "%", sep = "")

pie(Length, labels = data.labels, clockwise = TRUE, col = heat.colors(length(data.labels)), main = "Diabetes Outcome")