This R markdown file consists of Data visualization using various plots on Diabetes data.
getwd()
## [1] "D:/KRUTHI/DV-LAB"
setwd("D:/KRUTHI/DV-LAB")
data=read.csv("diabetes.csv")
summary(data)
## Pregnancies Glucose BloodPressure SkinThickness
## Min. : 0.000 Min. : 0.0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1.000 1st Qu.: 99.0 1st Qu.: 62.00 1st Qu.: 0.00
## Median : 3.000 Median :117.0 Median : 72.00 Median :23.00
## Mean : 3.845 Mean :120.9 Mean : 69.11 Mean :20.54
## 3rd Qu.: 6.000 3rd Qu.:140.2 3rd Qu.: 80.00 3rd Qu.:32.00
## Max. :17.000 Max. :199.0 Max. :122.00 Max. :99.00
## Insulin BMI DiabetesPedigreeFunction Age
## Min. : 0.0 Min. : 0.00 Min. :0.0780 Min. :21.00
## 1st Qu.: 0.0 1st Qu.:27.30 1st Qu.:0.2437 1st Qu.:24.00
## Median : 30.5 Median :32.00 Median :0.3725 Median :29.00
## Mean : 79.8 Mean :31.99 Mean :0.4719 Mean :33.24
## 3rd Qu.:127.2 3rd Qu.:36.60 3rd Qu.:0.6262 3rd Qu.:41.00
## Max. :846.0 Max. :67.10 Max. :2.4200 Max. :81.00
## Outcome
## Min. :0.000
## 1st Qu.:0.000
## Median :0.000
## Mean :0.349
## 3rd Qu.:1.000
## Max. :1.000
str(data)
## 'data.frame': 768 obs. of 9 variables:
## $ Pregnancies : int 6 1 8 1 0 5 3 10 2 8 ...
## $ Glucose : int 148 85 183 89 137 116 78 115 197 125 ...
## $ BloodPressure : int 72 66 64 66 40 74 50 0 70 96 ...
## $ SkinThickness : int 35 29 0 23 35 0 32 0 45 0 ...
## $ Insulin : int 0 0 0 94 168 0 88 0 543 0 ...
## $ BMI : num 33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
## $ DiabetesPedigreeFunction: num 0.627 0.351 0.672 0.167 2.288 ...
## $ Age : int 50 31 32 21 33 30 26 29 53 54 ...
## $ Outcome : int 1 0 1 0 1 0 1 0 1 1 ...
#Plots
library(ggplot2)
ggplot(data = data, aes(x = Glucose , y = BloodPressure, col = BloodPressure))+labs(title = "Glucose vs BloodPressure Plot")
ggplot(data = data, aes(x = Glucose , y = BloodPressure, col = Age)) +
geom_point() +
labs(title = "Glucose vs BloodPressure Plot", x = "Glucose", y = "BloodPressure")
ggplot(data = data, aes(x = Pregnancies , y = BloodPressure, size = Age)) +
geom_point() +
labs(title = "Pregnancies vs BloodPressure ", x = "Pregnancies", y = "BloodPressure")
ggplot(data, aes(x = DiabetesPedigreeFunction)) +
geom_histogram(fill = "yellow", color = "black", bins = 20) +
labs(title = "Histogram", x = "DiabetesPedigreeFunction", y = "total")
ggplot(data, aes(x = Pregnancies, y = SkinThickness)) +
geom_bar(stat = "identity", fill = "pink") +
labs(title = "Diabetes Plot", x = "Pregnancies", y = "SkinThickness")
ggplot(data = data, aes(x=as.factor(Pregnancies), fill=Pregnancies)) +
geom_bar(stat="count")
ggplot(data, aes(x = BMI)) +
geom_boxplot(fill = "lightblue",col="black") +
labs(title = "Box Plot", x = "Body Mass Index")
Length <- table(data$Outcome)
data.labels <- c("No Diabetes", "Diabetes")
share <- round(Length / sum(Length) * 100)
data.labels <- paste(data.labels, share)
data.labels <- paste(data.labels, "%", sep = "")
pie(Length, labels = data.labels, clockwise = TRUE, col = heat.colors(length(data.labels)), main = "Diabetes Outcome")