#Install Package
library(class)
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
#Input Data
data <- read.csv("C:/Users/nandh/Desktop/SEMESTER 6/DATMIN/dataset_knn_olahraga.csv")
summary(data)
## usia bmi jam_tidur kebutuhan_olahraga
## Min. :18.00 Min. :14.70 Min. : 3.000 Length:200
## 1st Qu.:31.00 1st Qu.:21.88 1st Qu.: 6.100 Class :character
## Median :41.00 Median :24.60 Median : 7.150 Mode :character
## Mean :41.79 Mean :25.03 Mean : 7.052
## 3rd Qu.:52.25 3rd Qu.:28.02 3rd Qu.: 8.100
## Max. :65.00 Max. :41.20 Max. :10.900
sum(is.na(data))
## [1] 0
#Encode label menjadi faktor
data$kebutuhan_olahraga <- as.factor(data$kebutuhan_olahraga)
#Normalisasi data
normalize <- function(x) { (x - min(x)) / (max(x) - min(x)) }
data_norm <- as.data.frame(lapply(data[, c("usia", "bmi", "jam_tidur")], normalize))
#Split data
set.seed(123)
train_index <- createDataPartition(data$kebutuhan_olahraga, p=0.8, list = FALSE)
train_data <- data_norm[train_index, ]
test_data <- data_norm[-train_index, ]
train_labels <- data$kebutuhan_olahraga[train_index]
test_labels <- data$kebutuhan_olahraga[-train_index]
#KNN model
predicted <- knn(train = train_data, test = test_data, cl = train_labels, k = 5)
#Evaluasi
conf_matrix <- confusionMatrix(predicted, test_labels)
print(conf_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Perlu Olahraga Tidak Perlu Olahraga
## Perlu Olahraga 11 11
## Tidak Perlu Olahraga 9 8
##
## Accuracy : 0.4872
## 95% CI : (0.3242, 0.6522)
## No Information Rate : 0.5128
## P-Value [Acc > NIR] : 0.6847
##
## Kappa : -0.029
##
## Mcnemar's Test P-Value : 0.8231
##
## Sensitivity : 0.5500
## Specificity : 0.4211
## Pos Pred Value : 0.5000
## Neg Pred Value : 0.4706
## Prevalence : 0.5128
## Detection Rate : 0.2821
## Detection Prevalence : 0.5641
## Balanced Accuracy : 0.4855
##
## 'Positive' Class : Perlu Olahraga
##
#Prediksi studi Kasus
# Simpan nilai min dan max untuk tiap kolom (dari data asli)
min_vals <- sapply(data[, c("usia", "bmi", "jam_tidur")], min)
max_vals <- sapply(data[, c("usia", "bmi", "jam_tidur")], max)
input_Carmen <- data.frame(
usia = 22,
bmi = 30,
jam_tidur = 7
)
# Normalisasi input
input_Carmen_norm <- as.data.frame(scale(input_Carmen, center = min_vals, scale = max_vals - min_vals))
# Prediksi KNN
kebutuhan_Carmen <- knn(train = train_data, test = input_Carmen_norm, cl = train_labels, k = 5)
cat("Kebutuhan Olahraga untuk Carmen:", as.character(kebutuhan_Carmen), "\n")
## Kebutuhan Olahraga untuk Carmen: Perlu Olahraga
library(ggplot2)
# Tambahkan label prediksi ke data asli (untuk warna)
data$label <- data$kebutuhan_olahraga
# Data untuk Carmen (asumsikan nilai original, bukan normalisasi)
input_Carmen$label <- as.character(kebutuhan_Carmen)
# Buat plot
ggplot(data, aes(x = usia, y = bmi, color = label)) +
geom_point(alpha = 0.6, size = 3) +
geom_point(data = input_Carmen, aes(x = usia, y = bmi),
color = "black", shape = 17, size = 5, stroke = 2) +
labs(
title = "Visualisasi Kebutuhan Olahraga untuk Carmen",
x = "Usia",
y = "BMI",
color = "Kebutuhan Olahraga"
) +
annotate("text", x = input_Carmen$usia + 1, y = input_Carmen$bmi,
label = "Carmen", hjust = 0, vjust = -1, size = 4.5) +
scale_color_manual(values = c("Perlu Olahraga" = "#E74C3C",
"Tidak Perlu Olahraga" = "#2ECC71")) +
theme_minimal()