LA Assignment

Author

Team-AURA

Problem Statement

Plot a slope chart comparing values across two categories derived from the diabetes dataset.

Step 1:Load Required Libraries

we first load the ggplot package , which is used for data visualization. ggrepel automatically adjusts labels so they don’t overlap.

library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyr)
library(ggrepel)
Warning: package 'ggrepel' was built under R version 4.5.3

Step 2: Load the Dataset

data <- read.csv("C:/Users/rnaya/Downloads/diabetes (2).csv")

After loading display the first few rows and also the type of the data we are using

str(data)
'data.frame':   768 obs. of  9 variables:
 $ Pregnancies             : int  6 1 8 1 0 5 3 10 2 8 ...
 $ Glucose                 : int  148 85 183 89 137 116 78 115 197 125 ...
 $ BloodPressure           : int  72 66 64 66 40 74 50 0 70 96 ...
 $ SkinThickness           : int  35 29 0 23 35 0 32 0 45 0 ...
 $ Insulin                 : int  0 0 0 94 168 0 88 0 543 0 ...
 $ BMI                     : num  33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
 $ DiabetesPedigreeFunction: num  0.627 0.351 0.672 0.167 2.288 ...
 $ Age                     : int  50 31 32 21 33 30 26 29 53 54 ...
 $ Outcome                 : int  1 0 1 0 1 0 1 0 1 1 ...
head(data)
  Pregnancies Glucose BloodPressure SkinThickness Insulin  BMI
1           6     148            72            35       0 33.6
2           1      85            66            29       0 26.6
3           8     183            64             0       0 23.3
4           1      89            66            23      94 28.1
5           0     137            40            35     168 43.1
6           5     116            74             0       0 25.6
  DiabetesPedigreeFunction Age Outcome
1                    0.627  50       1
2                    0.351  31       0
3                    0.672  32       1
4                    0.167  21       0
5                    2.288  33       1
6                    0.201  30       0

Step 3: Creating Groups

Since our dataset is medical, we convert it into comparison form.

summary_data <- data %>%
  group_by(Outcome) %>%
  summarise(
    Glucose = mean(Glucose, na.rm = TRUE),
    BMI = mean(BMI, na.rm = TRUE),
    Age = mean(Age, na.rm = TRUE),
    Insulin = mean(Insulin, na.rm = TRUE),
    BloodPressure = mean(BloodPressure, na.rm = TRUE)
  )

Step 4: Convert Wide Data → Long Format

long_data <- summary_data %>%
  pivot_longer(cols = -Outcome,
               names_to = "Variable",
               values_to = "Value")

Step 5: Convert Outcome to Labels

long_data$Outcome <- ifelse(long_data$Outcome == 0,
                           "Non-Diabetic",
                           "Diabetic")
long_data$Outcome <- factor(long_data$Outcome, 
                           levels = c("Non-Diabetic", "Diabetic"))

Step 6: Plot Slope Chart

ggplot(long_data, aes(x = Outcome, y = Value, group = Variable, color = Variable))

ggplot(long_data, aes(x = Outcome, y = Value, group = Variable, color = Variable)) +
  geom_line(size = 1.2) +
  geom_point(size = 3) +
  
  geom_text(data = long_data %>% filter(Outcome == "Non-Diabetic"),
            aes(label = Variable),
            hjust = 1.2) +
  
  geom_text(data = long_data %>% filter(Outcome == "Diabetic"),
            aes(label = Variable),
            hjust = -0.2)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

ggplot(long_data, aes(x = Outcome, y = Value, group = Variable, color = Variable)) +
  geom_line(size = 1.2) +
  geom_point(size = 3) +
  
  geom_text_repel(data = long_data %>% filter(Outcome == "Non-Diabetic"),
                  aes(label = Variable),
                  nudge_x = -0.2,
                  direction = "y") +
  
  geom_text_repel(data = long_data %>% filter(Outcome == "Diabetic"),
                  aes(label = Variable),
                  nudge_x = 0.2,
                  direction = "y") +
  
  labs(
    title = "Slope Chart Comparing Health Factors (Diabetic vs Non-Diabetic)",
    x = "Category",
    y = "Average Values"
  ) +
  
  theme_minimal() +
  theme(legend.position = "top")