Susmita Barua

Student, university of Chittagong

The ‘diabetes’ is a dataset that determines the outcome of the suspected patients based on several ‘features’ either as ‘No diabetes = 0’ or as ‘Diabetes = 1’.

nume_column = sapply(diabetes, is.numeric)
nume_column_name = names(nume_column)
nume_column_name
[1] "Pregnancies"              "Glucose"                 
[3] "BloodPressure"            "SkinThickness"           
[5] "Insulin"                  "BMI"                     
[7] "DiabetesPedigreeFunction" "Age"                     
[9] "Outcome"                 

There are total 9 numerical columns and 0 categorical column in the ‘diabetes’ dataset.The target variable of the dataset is the ‘Outcome’ column.

diabetes = read.csv('diabetes.csv')
diabetes

Basic Plot

a) Scatter plot

result_color = as.numeric(factor(diabetes$Outcome)) 
plot(diabetes$BMI, diabetes$Glucose, 
     col = result_color,
     pch = 20,
     xlab = substitute(paste(bold("BMI (kg/m^2)"))),
     ylab = substitute(paste(bold("Glucose (mmol/L)"))),
     main = 'BMI Vs Glucose',
     col.main = 'black',
     cex.main = 1.5
    ) 

The scatter plot represents the relationship between BMI and Glucose level of suspected diabetes patients. Each dot demonstrates the outcome as no diabetes (0) and diabetes(1) based on the two features.

b) Histogram Plot

hist(diabetes$Insulin,
     main = "Histogram plot of Insulin",
     col = "pink",
     xlab = "Insulin Level (IU/mL)")

The histogram is a graphical representation of the distribution of ‘diabetes’ dataset. Here, the x-axis represents insulin level range and the y-axis shows the frequency of individuals falling within each range.

c) Box Plot

boxplot(diabetes$Age,
        main = 'Box plot',
        col = 'aquamarine',
        xlab = substitute(paste(bold('Age'))),
        ylab = substitute(paste(bold('Years')))
)

The boxplot displays key statistics such as median, quartiles and potential outliers. This boxplot represents the age distribution of ‘diabetes’ dataset showing their maximum and minimum value within a certain range, the interquartile range with a line inside indicating the median age. The points beyond the maximum range are considered as outliers.

ggplot

a) Scatter Plot

library(ggplot2)

diabetes$Outcome = as.character(diabetes$Outcome)
ggplot(diabetes, aes(x = BMI, y = Glucose, color = Outcome)) + geom_point(size = 1.5) + 
  labs(title = 'BMI Vs Glucose (Using ggplot)',
       x = 'BMI (Kg/m^2)',
       y = 'Glucose (mmol/L)',
       caption = 'Source: Iskulghar') +
  scale_color_manual(values = c("0" = "maroon", "1" = "royalblue"))+
  theme_minimal() +
  theme(
    legend.position = "top",
    text = element_text(colour = 'darkslategray', size = 13), 
         )

The association between a suspected diabetes patient’s BMI and glucose level is shown in a scatter plot. Based on the two attributes, each dot represents the conclusion as either no diabetes (0) or diabetes (1).

b) Box Plot

diabetes$Outcome = as.character(diabetes$Outcome)
ggplot(diabetes, aes(x = Outcome, y = Pregnancies, fill = Outcome)) +
geom_boxplot() +

  labs(title = "Bloxplot using ggplot",
       x = "Pregnancies",
       y = "value",
       caption = "Source: Iskulghar") +
  
  theme(
    legend.position = "top", 
    text = element_text(colour = 'black', size = 14))


ggplot(diabetes, aes(x = Outcome, y = Glucose, fill = Outcome)) +
geom_boxplot() +

  labs(title = "Bloxplot using ggplot",
       x = "Glucose",
       y = "value",
       caption = "Source: Iskulghar") +
  theme(
    legend.position = "top", 
    text = element_text(colour = 'black', size = 14))


ggplot(diabetes, aes(x = Outcome, y = BloodPressure, fill = Outcome)) +
geom_boxplot() +

  labs(title = "Bloxplot using ggplot",
       x = "BloodPressure",
       y = "value",
       caption = "Source: Iskulghar") +
  theme(
    legend.position = "top", 
    text = element_text(colour = 'black', size = 14))


ggplot(diabetes, aes(x = Outcome, y = SkinThickness, fill = Outcome)) +
geom_boxplot() +

  labs(title = "Bloxplot using ggplot",
       x = "SkinThickness",
       y = "value",
       caption = "Source: Iskulghar") +
  theme(
    legend.position = "top", 
    text = element_text(colour = 'black', size = 14))


ggplot(diabetes, aes(x = Outcome, y = Insulin, fill = Outcome)) +
geom_boxplot() +

  labs(title = "Bloxplot using ggplot",
       x = "Insulin",
       y = "value",
       caption = "Source: Iskulghar") +
  theme(
    legend.position = "top", 
    text = element_text(colour = 'black', size = 14))


ggplot(diabetes, aes(x = Outcome, y = BMI, fill = Outcome)) +
geom_boxplot() +

  labs(title = "Bloxplot using ggplot",
       x = "BMI",
       y = "value",
       caption = "Source: Iskulghar") +
  theme(
    legend.position = "top", 
    text = element_text(colour = 'black', size = 14))


ggplot(diabetes, aes(x = Outcome, y = DiabetesPedigreeFunction, fill = Outcome)) +
geom_boxplot() +

  labs(title = "Bloxplot using ggplot",
       x = "DiabetesPedigreeFunction",
       y = "value",
       caption = "Source: Iskulghar") +
  theme(
    legend.position = "top", 
    text = element_text(colour = 'black', size = 14))


ggplot(diabetes, aes(x = Outcome, y = Age, fill = Outcome)) +
geom_boxplot() +

  labs(title = "Bloxplot using ggplot",
       x = "Age",
       y = "value",
       caption = "Source: Iskulghar") +
  theme(
    legend.position = "top", 
    text = element_text(colour = 'black', size = 14))

The interquartile range, which includes a line representing the median age, is used to illustrate the distribution of the all columns of “diabetes” dataset. It also shows the maximum and minimum values within that range. Any point that falls outside of the range are considered as outliers.

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KIyBTdXNtaXRhIEJhcnVhDQojIyBTdHVkZW50LCB1bml2ZXJzaXR5IG9mIENoaXR0YWdvbmcNCg0KIyMgVGhlICdkaWFiZXRlcycgaXMgYSBkYXRhc2V0IHRoYXQgZGV0ZXJtaW5lcyB0aGUgb3V0Y29tZSBvZiB0aGUgc3VzcGVjdGVkIHBhdGllbnRzIGJhc2VkIG9uIHNldmVyYWwgJ2ZlYXR1cmVzJyBlaXRoZXIgYXMgJ05vIGRpYWJldGVzID0gMCcgb3IgYXMgJ0RpYWJldGVzID0gMScuIA0KDQpgYGB7cn0NCm51bWVfY29sdW1uID0gc2FwcGx5KGRpYWJldGVzLCBpcy5udW1lcmljKQ0KbnVtZV9jb2x1bW5fbmFtZSA9IG5hbWVzKG51bWVfY29sdW1uKQ0KbnVtZV9jb2x1bW5fbmFtZQ0KYGBgDQoNCiMjIFRoZXJlIGFyZSB0b3RhbCA5IG51bWVyaWNhbCBjb2x1bW5zIGFuZCAwIGNhdGVnb3JpY2FsIGNvbHVtbiBpbiB0aGUgJ2RpYWJldGVzJyBkYXRhc2V0LlRoZSB0YXJnZXQgdmFyaWFibGUgb2YgdGhlIGRhdGFzZXQgaXMgdGhlICdPdXRjb21lJyBjb2x1bW4uDQoNCmBgYHtyfQ0KZGlhYmV0ZXMgPSByZWFkLmNzdignZGlhYmV0ZXMuY3N2JykNCmRpYWJldGVzDQpgYGANCiMgQmFzaWMgUGxvdA0KDQojIyBhKSBTY2F0dGVyIHBsb3QNCg0KYGBge3J9DQpyZXN1bHRfY29sb3IgPSBhcy5udW1lcmljKGZhY3RvcihkaWFiZXRlcyRPdXRjb21lKSkgDQpwbG90KGRpYWJldGVzJEJNSSwgZGlhYmV0ZXMkR2x1Y29zZSwgDQogICAgIGNvbCA9IHJlc3VsdF9jb2xvciwNCiAgICAgcGNoID0gMjAsDQogICAgIHhsYWIgPSBzdWJzdGl0dXRlKHBhc3RlKGJvbGQoIkJNSSAoa2cvbV4yKSIpKSksDQogICAgIHlsYWIgPSBzdWJzdGl0dXRlKHBhc3RlKGJvbGQoIkdsdWNvc2UgKG1tb2wvTCkiKSkpLA0KICAgICBtYWluID0gJ0JNSSBWcyBHbHVjb3NlJywNCiAgICAgY29sLm1haW4gPSAnYmxhY2snLA0KICAgICBjZXgubWFpbiA9IDEuNQ0KICAgICkgDQpgYGANCiMjIFRoZSBzY2F0dGVyIHBsb3QgcmVwcmVzZW50cyB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gQk1JIGFuZCBHbHVjb3NlIGxldmVsIG9mIHN1c3BlY3RlZCBkaWFiZXRlcyBwYXRpZW50cy4gRWFjaCBkb3QgZGVtb25zdHJhdGVzIHRoZSBvdXRjb21lIGFzIG5vIGRpYWJldGVzICgwKSBhbmQgZGlhYmV0ZXMoMSkgYmFzZWQgb24gdGhlIHR3byBmZWF0dXJlcy4NCg0KIyMgYikgSGlzdG9ncmFtIFBsb3QNCg0KYGBge3J9DQpoaXN0KGRpYWJldGVzJEluc3VsaW4sDQogICAgIG1haW4gPSAiSGlzdG9ncmFtIHBsb3Qgb2YgSW5zdWxpbiIsDQogICAgIGNvbCA9ICJwaW5rIiwNCiAgICAgeGxhYiA9ICJJbnN1bGluIExldmVsIChJVS9tTCkiKQ0KYGBgDQojIyBUaGUgaGlzdG9ncmFtIGlzIGEgZ3JhcGhpY2FsIHJlcHJlc2VudGF0aW9uIG9mIHRoZSBkaXN0cmlidXRpb24gb2YgJ2RpYWJldGVzJyBkYXRhc2V0LiBIZXJlLCB0aGUgeC1heGlzIHJlcHJlc2VudHMgaW5zdWxpbiBsZXZlbCByYW5nZSBhbmQgdGhlIHktYXhpcyBzaG93cyB0aGUgZnJlcXVlbmN5IG9mIGluZGl2aWR1YWxzIGZhbGxpbmcgd2l0aGluIGVhY2ggcmFuZ2UuDQoNCiMjIGMpIEJveCBQbG90DQpgYGB7cn0NCmJveHBsb3QoZGlhYmV0ZXMkQWdlLA0KICAgICAgICBtYWluID0gJ0JveCBwbG90JywNCiAgICAgICAgY29sID0gJ2FxdWFtYXJpbmUnLA0KICAgICAgICB4bGFiID0gc3Vic3RpdHV0ZShwYXN0ZShib2xkKCdBZ2UnKSkpLA0KICAgICAgICB5bGFiID0gc3Vic3RpdHV0ZShwYXN0ZShib2xkKCdZZWFycycpKSkNCikNCmBgYA0KIyMgVGhlIGJveHBsb3QgZGlzcGxheXMga2V5IHN0YXRpc3RpY3Mgc3VjaCBhcyBtZWRpYW4sIHF1YXJ0aWxlcyBhbmQgcG90ZW50aWFsIG91dGxpZXJzLiBUaGlzIGJveHBsb3QgcmVwcmVzZW50cyB0aGUgYWdlIGRpc3RyaWJ1dGlvbiBvZiAnZGlhYmV0ZXMnIGRhdGFzZXQgc2hvd2luZyB0aGVpciBtYXhpbXVtIGFuZCBtaW5pbXVtIHZhbHVlIHdpdGhpbiBhIGNlcnRhaW4gcmFuZ2UsIHRoZSBpbnRlcnF1YXJ0aWxlIHJhbmdlIHdpdGggIGEgbGluZSBpbnNpZGUgaW5kaWNhdGluZyB0aGUgbWVkaWFuIGFnZS4gVGhlIHBvaW50cyBiZXlvbmQgdGhlIG1heGltdW0gcmFuZ2UgYXJlIGNvbnNpZGVyZWQgYXMgb3V0bGllcnMuDQoNCiMgZ2dwbG90DQoNCiMjIGEpIFNjYXR0ZXIgUGxvdA0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQoNCmRpYWJldGVzJE91dGNvbWUgPSBhcy5jaGFyYWN0ZXIoZGlhYmV0ZXMkT3V0Y29tZSkNCmdncGxvdChkaWFiZXRlcywgYWVzKHggPSBCTUksIHkgPSBHbHVjb3NlLCBjb2xvciA9IE91dGNvbWUpKSArIGdlb21fcG9pbnQoc2l6ZSA9IDEuNSkgKyANCiAgbGFicyh0aXRsZSA9ICdCTUkgVnMgR2x1Y29zZSAoVXNpbmcgZ2dwbG90KScsDQogICAgICAgeCA9ICdCTUkgKEtnL21eMiknLA0KICAgICAgIHkgPSAnR2x1Y29zZSAobW1vbC9MKScsDQogICAgICAgY2FwdGlvbiA9ICdTb3VyY2U6IElza3VsZ2hhcicpICsNCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoIjAiID0gIm1hcm9vbiIsICIxIiA9ICJyb3lhbGJsdWUiKSkrDQogIHRoZW1lX21pbmltYWwoKSArDQogIHRoZW1lKA0KICAgIGxlZ2VuZC5wb3NpdGlvbiA9ICJ0b3AiLA0KICAgIHRleHQgPSBlbGVtZW50X3RleHQoY29sb3VyID0gJ2RhcmtzbGF0ZWdyYXknLCBzaXplID0gMTMpLCANCiAgICAgICAgICkNCmBgYA0KIyMgVGhlIGFzc29jaWF0aW9uIGJldHdlZW4gYSBzdXNwZWN0ZWQgZGlhYmV0ZXMgcGF0aWVudCdzIEJNSSBhbmQgZ2x1Y29zZSBsZXZlbCBpcyBzaG93biBpbiBhIHNjYXR0ZXIgcGxvdC4gQmFzZWQgb24gdGhlIHR3byBhdHRyaWJ1dGVzLCBlYWNoIGRvdCByZXByZXNlbnRzIHRoZSBjb25jbHVzaW9uIGFzIGVpdGhlciBubyBkaWFiZXRlcyAoMCkgb3IgZGlhYmV0ZXMgKDEpLg0KDQojIyBiKSBCb3ggUGxvdA0KYGBge3J9DQpkaWFiZXRlcyRPdXRjb21lID0gYXMuY2hhcmFjdGVyKGRpYWJldGVzJE91dGNvbWUpDQpnZ3Bsb3QoZGlhYmV0ZXMsIGFlcyh4ID0gT3V0Y29tZSwgeSA9IFByZWduYW5jaWVzLCBmaWxsID0gT3V0Y29tZSkpICsNCmdlb21fYm94cGxvdCgpICsNCg0KICBsYWJzKHRpdGxlID0gIkJsb3hwbG90IHVzaW5nIGdncGxvdCIsDQogICAgICAgeCA9ICJQcmVnbmFuY2llcyIsDQogICAgICAgeSA9ICJ2YWx1ZSIsDQogICAgICAgY2FwdGlvbiA9ICJTb3VyY2U6IElza3VsZ2hhciIpICsNCiAgDQogIHRoZW1lKA0KICAgIGxlZ2VuZC5wb3NpdGlvbiA9ICJ0b3AiLCANCiAgICB0ZXh0ID0gZWxlbWVudF90ZXh0KGNvbG91ciA9ICdibGFjaycsIHNpemUgPSAxNCkpDQoNCmdncGxvdChkaWFiZXRlcywgYWVzKHggPSBPdXRjb21lLCB5ID0gR2x1Y29zZSwgZmlsbCA9IE91dGNvbWUpKSArDQpnZW9tX2JveHBsb3QoKSArDQoNCiAgbGFicyh0aXRsZSA9ICJCbG94cGxvdCB1c2luZyBnZ3Bsb3QiLA0KICAgICAgIHggPSAiR2x1Y29zZSIsDQogICAgICAgeSA9ICJ2YWx1ZSIsDQogICAgICAgY2FwdGlvbiA9ICJTb3VyY2U6IElza3VsZ2hhciIpICsNCiAgdGhlbWUoDQogICAgbGVnZW5kLnBvc2l0aW9uID0gInRvcCIsIA0KICAgIHRleHQgPSBlbGVtZW50X3RleHQoY29sb3VyID0gJ2JsYWNrJywgc2l6ZSA9IDE0KSkNCg0KZ2dwbG90KGRpYWJldGVzLCBhZXMoeCA9IE91dGNvbWUsIHkgPSBCbG9vZFByZXNzdXJlLCBmaWxsID0gT3V0Y29tZSkpICsNCmdlb21fYm94cGxvdCgpICsNCg0KICBsYWJzKHRpdGxlID0gIkJsb3hwbG90IHVzaW5nIGdncGxvdCIsDQogICAgICAgeCA9ICJCbG9vZFByZXNzdXJlIiwNCiAgICAgICB5ID0gInZhbHVlIiwNCiAgICAgICBjYXB0aW9uID0gIlNvdXJjZTogSXNrdWxnaGFyIikgKw0KICB0aGVtZSgNCiAgICBsZWdlbmQucG9zaXRpb24gPSAidG9wIiwgDQogICAgdGV4dCA9IGVsZW1lbnRfdGV4dChjb2xvdXIgPSAnYmxhY2snLCBzaXplID0gMTQpKQ0KDQpnZ3Bsb3QoZGlhYmV0ZXMsIGFlcyh4ID0gT3V0Y29tZSwgeSA9IFNraW5UaGlja25lc3MsIGZpbGwgPSBPdXRjb21lKSkgKw0KZ2VvbV9ib3hwbG90KCkgKw0KDQogIGxhYnModGl0bGUgPSAiQmxveHBsb3QgdXNpbmcgZ2dwbG90IiwNCiAgICAgICB4ID0gIlNraW5UaGlja25lc3MiLA0KICAgICAgIHkgPSAidmFsdWUiLA0KICAgICAgIGNhcHRpb24gPSAiU291cmNlOiBJc2t1bGdoYXIiKSArDQogIHRoZW1lKA0KICAgIGxlZ2VuZC5wb3NpdGlvbiA9ICJ0b3AiLCANCiAgICB0ZXh0ID0gZWxlbWVudF90ZXh0KGNvbG91ciA9ICdibGFjaycsIHNpemUgPSAxNCkpDQoNCmdncGxvdChkaWFiZXRlcywgYWVzKHggPSBPdXRjb21lLCB5ID0gSW5zdWxpbiwgZmlsbCA9IE91dGNvbWUpKSArDQpnZW9tX2JveHBsb3QoKSArDQoNCiAgbGFicyh0aXRsZSA9ICJCbG94cGxvdCB1c2luZyBnZ3Bsb3QiLA0KICAgICAgIHggPSAiSW5zdWxpbiIsDQogICAgICAgeSA9ICJ2YWx1ZSIsDQogICAgICAgY2FwdGlvbiA9ICJTb3VyY2U6IElza3VsZ2hhciIpICsNCiAgdGhlbWUoDQogICAgbGVnZW5kLnBvc2l0aW9uID0gInRvcCIsIA0KICAgIHRleHQgPSBlbGVtZW50X3RleHQoY29sb3VyID0gJ2JsYWNrJywgc2l6ZSA9IDE0KSkNCg0KZ2dwbG90KGRpYWJldGVzLCBhZXMoeCA9IE91dGNvbWUsIHkgPSBCTUksIGZpbGwgPSBPdXRjb21lKSkgKw0KZ2VvbV9ib3hwbG90KCkgKw0KDQogIGxhYnModGl0bGUgPSAiQmxveHBsb3QgdXNpbmcgZ2dwbG90IiwNCiAgICAgICB4ID0gIkJNSSIsDQogICAgICAgeSA9ICJ2YWx1ZSIsDQogICAgICAgY2FwdGlvbiA9ICJTb3VyY2U6IElza3VsZ2hhciIpICsNCiAgdGhlbWUoDQogICAgbGVnZW5kLnBvc2l0aW9uID0gInRvcCIsIA0KICAgIHRleHQgPSBlbGVtZW50X3RleHQoY29sb3VyID0gJ2JsYWNrJywgc2l6ZSA9IDE0KSkNCg0KZ2dwbG90KGRpYWJldGVzLCBhZXMoeCA9IE91dGNvbWUsIHkgPSBEaWFiZXRlc1BlZGlncmVlRnVuY3Rpb24sIGZpbGwgPSBPdXRjb21lKSkgKw0KZ2VvbV9ib3hwbG90KCkgKw0KDQogIGxhYnModGl0bGUgPSAiQmxveHBsb3QgdXNpbmcgZ2dwbG90IiwNCiAgICAgICB4ID0gIkRpYWJldGVzUGVkaWdyZWVGdW5jdGlvbiIsDQogICAgICAgeSA9ICJ2YWx1ZSIsDQogICAgICAgY2FwdGlvbiA9ICJTb3VyY2U6IElza3VsZ2hhciIpICsNCiAgdGhlbWUoDQogICAgbGVnZW5kLnBvc2l0aW9uID0gInRvcCIsIA0KICAgIHRleHQgPSBlbGVtZW50X3RleHQoY29sb3VyID0gJ2JsYWNrJywgc2l6ZSA9IDE0KSkNCg0KZ2dwbG90KGRpYWJldGVzLCBhZXMoeCA9IE91dGNvbWUsIHkgPSBBZ2UsIGZpbGwgPSBPdXRjb21lKSkgKw0KZ2VvbV9ib3hwbG90KCkgKw0KDQogIGxhYnModGl0bGUgPSAiQmxveHBsb3QgdXNpbmcgZ2dwbG90IiwNCiAgICAgICB4ID0gIkFnZSIsDQogICAgICAgeSA9ICJ2YWx1ZSIsDQogICAgICAgY2FwdGlvbiA9ICJTb3VyY2U6IElza3VsZ2hhciIpICsNCiAgdGhlbWUoDQogICAgbGVnZW5kLnBvc2l0aW9uID0gInRvcCIsIA0KICAgIHRleHQgPSBlbGVtZW50X3RleHQoY29sb3VyID0gJ2JsYWNrJywgc2l6ZSA9IDE0KSkNCmBgYA0KDQojIyBUaGUgaW50ZXJxdWFydGlsZSByYW5nZSwgd2hpY2ggaW5jbHVkZXMgYSBsaW5lIHJlcHJlc2VudGluZyB0aGUgbWVkaWFuIGFnZSwgaXMgdXNlZCB0byBpbGx1c3RyYXRlIHRoZSBkaXN0cmlidXRpb24gb2YgdGhlIGFsbCBjb2x1bW5zIG9mICJkaWFiZXRlcyIgZGF0YXNldC4gSXQgYWxzbyBzaG93cyB0aGUgbWF4aW11bSBhbmQgbWluaW11bSB2YWx1ZXMgd2l0aGluIHRoYXQgcmFuZ2UuIEFueSBwb2ludCB0aGF0IGZhbGxzIG91dHNpZGUgb2YgdGhlIHJhbmdlIGFyZSBjb25zaWRlcmVkIGFzIG91dGxpZXJzLg==