df <- read.csv("PenyakitDiabetes.csv", sep=";")
colnames(df)[1] <- "Glucose"
colnames(df)[2] <- "Insulin"
df$Glucose <- as.numeric(gsub(",", ".", df$Glucose))
df$Insulin <- as.numeric(gsub(",", ".", df$Insulin))
head(df)
## Glucose Insulin BloodPressure SkinThickness Insulin BMI
## 1 2 138 62 35 0 33.6
## 2 0 84 82 31 125 38.2
## 3 0 145 0 0 0 44.2
## 4 0 135 68 42 250 42.3
## 5 1 139 62 41 480 40.7
## 6 0 173 78 32 265 46.5
## Diabetes.PedigreeFunction Age Outcome X X.1
## 1 0.127 47 1 NA
## 2 0.233 23 0 NA
## 3 0.630 31 1 NA
## 4 0.365 24 1 NA
## 5 0.536 21 0 NA
## 6 1.159 58 0 NA
df <- df[, !names(df) %in% c("x", "x_1")]
names(df)
## [1] "Glucose" "Insulin"
## [3] "BloodPressure" "SkinThickness"
## [5] "Insulin.1" "BMI"
## [7] "Diabetes.PedigreeFunction" "Age"
## [9] "Outcome" "X"
## [11] "X.1"
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
ggplot(df, aes(x = Glucose)) +
geom_histogram(bins = 20, fill = "pink") +
theme_minimal() +
labs(
title = "Distribusi Kadar Glukosa",
x = "Glucose",
y = "Frekuensi"
)

ggplot(df, aes(x = Glucose, y = Insulin)) +
geom_point(color = "green") +
geom_smooth(method = "lm", se = FALSE, color = "black") +
theme_minimal() +
labs(
title = "Hubungan Glukosa dan Insulin",
x = "Glucose",
y = "Insulin"
)
## `geom_smooth()` using formula = 'y ~ x'
