library(neuralnet)
## Warning: package 'neuralnet' was built under R version 4.4.3
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
library(mice)
## Warning: package 'mice' was built under R version 4.4.3
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(keras)
## Warning: package 'keras' was built under R version 4.4.3
## The keras package is deprecated. Please use the keras3 package instead.
## Alternatively, to continue using legacy keras, call `py_require_legacy_keras()`.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:neuralnet':
##
## compute
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(ROSE)
## Warning: package 'ROSE' was built under R version 4.4.3
## Loaded ROSE 0.0-4
library(class)
## Warning: package 'class' was built under R version 4.4.3
library(kknn)
## Warning: package 'kknn' was built under R version 4.4.3
##
## Attaching package: 'kknn'
## The following object is masked from 'package:caret':
##
## contr.dummy
data <- read_excel("C:/Users/62852/OneDrive/Documents/Semester 6/Jaringan Syaraf Tiruan/Diabetes Prediction.xlsx")
data
names(data)
## [1] "Age" "Gender" "BMI" "SBP"
## [5] "DBP" "FPG" "Chol" "Tri"
## [9] "HDL" "LDL" "ALT" "BUN"
## [13] "CCR" "FFPG" "smoking" "drinking"
## [17] "family_histroy" "Diabetes"
sum(is.na(data))
## [1] 0
LVQ Balance
# MENGUBAH TARGET MENJADI FAKTOR
data$Diabetes <- as.factor(data$Diabetes)
# Menghapus variabel target
Variabel_Prediktor <- data[, c(
"Age",
"Gender",
"BMI",
"SBP",
"DBP",
"FPG",
"Chol",
"Tri",
"HDL",
"LDL",
"ALT",
"BUN",
"CCR",
"FFPG",
"smoking",
"drinking",
"family_histroy"
)]
# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
Normalisasi <- as.data.frame(lapply(Variabel_Prediktor,
min_max_scaling))
# Menggabungkan target dan data normalisasi
Data_norm <- data.frame(
Diabetes = data$Diabetes,
Normalisasi
)
# SPLITTING DATA
set.seed(476)
index <- sample(
1:nrow(Data_norm),
round(0.75 * nrow(Data_norm))
)
train_ <- Data_norm[index, ]
test_ <- Data_norm[-index, ]
# HANDLING IMBALANCE DATA
train_balanced <- ovun.sample(
Diabetes ~ .,
data = train_,
method = "both"
)$data
# CROSS VALIDATION
control <- trainControl(
method = "repeatedcv",
number = 10,
repeats = 3
)
# MEMBANGUN MODEL LVQ
model_lvq <- train(
Diabetes ~ .,
data = train_balanced,
method = "lvq",
trControl = control
)
# HASIL MODEL
print(model_lvq)
## Learning Vector Quantization
##
## 3227 samples
## 17 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 2904, 2904, 2905, 2905, 2904, 2905, ...
## Resampling results across tuning parameters:
##
## size k Accuracy Kappa
## 8 1 0.8849235 0.7707295
## 8 6 0.8702685 0.7419143
## 8 11 0.8758634 0.7533677
## 12 1 0.9018696 0.8043474
## 12 6 0.8863673 0.7736345
## 12 11 0.8936069 0.7883362
## 16 1 0.9044531 0.8096055
## 16 6 0.8915391 0.7842397
## 16 11 0.9037249 0.8083480
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 16 and k = 1.
# PREDIKSI DATA TESTING
predictions <- predict(
model_lvq,
newdata = test_
)
# CONFUSION MATRIX
confusionMatrix(
predictions,
test_$Diabetes,
positive = "1"
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 727 55
## 1 29 265
##
## Accuracy : 0.9219
## 95% CI : (0.9043, 0.9373)
## No Information Rate : 0.7026
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8087
##
## Mcnemar's Test P-Value : 0.006377
##
## Sensitivity : 0.8281
## Specificity : 0.9616
## Pos Pred Value : 0.9014
## Neg Pred Value : 0.9297
## Prevalence : 0.2974
## Detection Rate : 0.2463
## Detection Prevalence : 0.2732
## Balanced Accuracy : 0.8949
##
## 'Positive' Class : 1
##
LVQ dengan Learning Rate
# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
# NORMALISASI DATA
Normalisasi <- as.data.frame(
lapply(
Variabel_Prediktor,
min_max_scaling
)
)
# MENGGABUNGKAN TARGET DAN DATA NORMALISASI
Data_norm <- data.frame(
Diabetes = data$Diabetes,
Normalisasi
)
# SPLITTING DATA
set.seed(476)
index <- sample(
1:nrow(Data_norm),
round(0.75 * nrow(Data_norm))
)
train_ <- Data_norm[index, ]
test_ <- Data_norm[-index, ]
# HANDLING IMBALANCE DATA
train_balanced <- ovun.sample(
Diabetes ~ .,
data = train_,
method = "both"
)$data
# MEMBUAT DATA TRAINING NUMERIK
x_train <- as.matrix(
train_balanced[, -1]
)
y_train <- train_balanced$Diabetes
x_test <- as.matrix(
test_[, -1]
)
# MEMBANGUN CODEBOOK AWAL
set.seed(123)
codebook <- lvqinit(
x_train,
y_train,
size = 10
)
# ==========================================
# LEARNING RATE 0.01
# ==========================================
model_lvq_001 <- olvq1(
x_train,
y_train,
codebook,
alpha = 0.01,
niter = 100
)
pred_001 <- lvqtest(
model_lvq_001,
x_test
)
pred_001 <- factor(
pred_001,
levels = levels(test_$Diabetes)
)
confusionMatrix(
pred_001,
test_$Diabetes,
positive = "1"
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 756 79
## 1 0 241
##
## Accuracy : 0.9266
## 95% CI : (0.9093, 0.9414)
## No Information Rate : 0.7026
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8108
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.7531
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.9054
## Prevalence : 0.2974
## Detection Rate : 0.2240
## Detection Prevalence : 0.2240
## Balanced Accuracy : 0.8766
##
## 'Positive' Class : 1
##
# ==========================================
# LEARNING RATE 0.05
# ==========================================
model_lvq_005 <- olvq1(
x_train,
y_train,
codebook,
alpha = 0.05,
niter = 100
)
pred_005 <- lvqtest(
model_lvq_005,
x_test
)
pred_005 <- factor(
pred_005,
levels = levels(test_$Diabetes)
)
confusionMatrix(
pred_005,
test_$Diabetes,
positive = "1"
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 755 76
## 1 1 244
##
## Accuracy : 0.9284
## 95% CI : (0.9114, 0.9431)
## No Information Rate : 0.7026
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8163
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.7625
## Specificity : 0.9987
## Pos Pred Value : 0.9959
## Neg Pred Value : 0.9085
## Prevalence : 0.2974
## Detection Rate : 0.2268
## Detection Prevalence : 0.2277
## Balanced Accuracy : 0.8806
##
## 'Positive' Class : 1
##
# ==========================================
# LEARNING RATE 0.1
# ==========================================
model_lvq_01 <- olvq1(
x_train,
y_train,
codebook,
alpha = 0.1,
niter = 100
)
pred_01 <- lvqtest(
model_lvq_01,
x_test
)
pred_01 <- factor(
pred_01,
levels = levels(test_$Diabetes)
)
confusionMatrix(
pred_01,
test_$Diabetes,
positive = "1"
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 756 76
## 1 0 244
##
## Accuracy : 0.9294
## 95% CI : (0.9124, 0.9439)
## No Information Rate : 0.7026
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8186
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.7625
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.9087
## Prevalence : 0.2974
## Detection Rate : 0.2268
## Detection Prevalence : 0.2268
## Balanced Accuracy : 0.8812
##
## 'Positive' Class : 1
##
LVQ Unbalance
# MENGUBAH TARGET MENJADI FAKTOR
data$Diabetes <- as.factor(data$Diabetes)
# Menghapus variabel target
Variabel_Prediktor <- data[, c(
"Age",
"Gender",
"BMI",
"SBP",
"DBP",
"FPG",
"Chol",
"Tri",
"HDL",
"LDL",
"ALT",
"BUN",
"CCR",
"FFPG",
"smoking",
"drinking",
"family_histroy"
)]
# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
Normalisasi <- as.data.frame(lapply(Variabel_Prediktor,
min_max_scaling))
# Menggabungkan target dan data normalisasi
Data_norm <- data.frame(
Diabetes = data$Diabetes,
Normalisasi
)
# SPLITTING DATA
set.seed(476)
index <- sample(
1:nrow(Data_norm),
round(0.75 * nrow(Data_norm))
)
train_ <- Data_norm[index, ]
test_ <- Data_norm[-index, ]
# CROSS VALIDATION
control <- trainControl(
method = "repeatedcv",
number = 10,
repeats = 3
)
# MEMBANGUN MODEL LVQ
model_lvq_unbalance <- train(
Diabetes ~ .,
data = train_,
method = "lvq",
trControl = control
)
# PREDIKSI DATA TESTING
predictions <- predict(
model_lvq_unbalance,
newdata = test_
)
# CONFUSION MATRIX
confusionMatrix(
predictions,
test_$Diabetes,
positive = "1"
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 754 82
## 1 2 238
##
## Accuracy : 0.9219
## 95% CI : (0.9043, 0.9373)
## No Information Rate : 0.7026
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7987
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.7438
## Specificity : 0.9974
## Pos Pred Value : 0.9917
## Neg Pred Value : 0.9019
## Prevalence : 0.2974
## Detection Rate : 0.2212
## Detection Prevalence : 0.2230
## Balanced Accuracy : 0.8706
##
## 'Positive' Class : 1
##
LVQ unbalance learning rate
# ==========================================
# LVQ UNBALANCED DENGAN VARIASI LEARNING RATE
# ==========================================
# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
# NORMALISASI DATA
Normalisasi <- as.data.frame(
lapply(
Variabel_Prediktor,
min_max_scaling
)
)
# MENGGABUNGKAN TARGET DAN DATA NORMALISASI
Data_norm <- data.frame(
Diabetes = data$Diabetes,
Normalisasi
)
# SPLITTING DATA
set.seed(476)
index <- sample(
1:nrow(Data_norm),
round(0.75 * nrow(Data_norm))
)
train_ <- Data_norm[index, ]
test_ <- Data_norm[-index, ]
# MEMBUAT DATA TRAINING NUMERIK
x_train <- as.matrix(
train_[, -1]
)
y_train <- train_$Diabetes
x_test <- as.matrix(
test_[, -1]
)
# MEMBANGUN CODEBOOK AWAL
set.seed(123)
codebook <- lvqinit(
x_train,
y_train,
size = 10
)
# ==========================================
# LEARNING RATE 0.01
# ==========================================
model_lvq_001 <- olvq1(
x_train,
y_train,
codebook,
alpha = 0.01,
niter = 100
)
pred_001 <- lvqtest(
model_lvq_001,
x_test
)
pred_001 <- factor(
pred_001,
levels = levels(test_$Diabetes)
)
cm_001 <- confusionMatrix(
pred_001,
test_$Diabetes,
positive = "1"
)
print(cm_001)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 756 102
## 1 0 218
##
## Accuracy : 0.9052
## 95% CI : (0.8861, 0.922)
## No Information Rate : 0.7026
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7502
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.6813
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.8811
## Prevalence : 0.2974
## Detection Rate : 0.2026
## Detection Prevalence : 0.2026
## Balanced Accuracy : 0.8406
##
## 'Positive' Class : 1
##
# ==========================================
# LEARNING RATE 0.05
# ==========================================
model_lvq_005 <- olvq1(
x_train,
y_train,
codebook,
alpha = 0.05,
niter = 100
)
pred_005 <- lvqtest(
model_lvq_005,
x_test
)
pred_005 <- factor(
pred_005,
levels = levels(test_$Diabetes)
)
cm_005 <- confusionMatrix(
pred_005,
test_$Diabetes,
positive = "1"
)
print(cm_005)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 756 95
## 1 0 225
##
## Accuracy : 0.9117
## 95% CI : (0.8931, 0.928)
## No Information Rate : 0.7026
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.769
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.7031
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.8884
## Prevalence : 0.2974
## Detection Rate : 0.2091
## Detection Prevalence : 0.2091
## Balanced Accuracy : 0.8516
##
## 'Positive' Class : 1
##
# ==========================================
# LEARNING RATE 0.1
# ==========================================
model_lvq_01 <- olvq1(
x_train,
y_train,
codebook,
alpha = 0.1,
niter = 100
)
pred_01 <- lvqtest(
model_lvq_01,
x_test
)
pred_01 <- factor(
pred_01,
levels = levels(test_$Diabetes)
)
cm_01 <- confusionMatrix(
pred_01,
test_$Diabetes,
positive = "1"
)
print(cm_01)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 756 97
## 1 0 223
##
## Accuracy : 0.9099
## 95% CI : (0.8911, 0.9263)
## No Information Rate : 0.7026
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7636
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.6969
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.8863
## Prevalence : 0.2974
## Detection Rate : 0.2072
## Detection Prevalence : 0.2072
## Balanced Accuracy : 0.8484
##
## 'Positive' Class : 1
##
LVQ 1
# MENGUBAH TARGET MENJADI FAKTOR
data$Diabetes <- as.factor(data$Diabetes)
# Menghapus variabel target
Variabel_Prediktor <- data[, c(
"Age",
"Gender",
"BMI",
"SBP",
"DBP",
"FPG",
"Chol",
"Tri",
"HDL",
"LDL",
"ALT",
"BUN",
"CCR",
"FFPG",
"smoking",
"drinking",
"family_histroy"
)]
# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
Normalisasi <- as.data.frame(lapply(Variabel_Prediktor,
min_max_scaling))
# Menggabungkan target dan data normalisasi
Data_norm <- data.frame(
Diabetes = data$Diabetes,
Normalisasi
)
# SPLITTING DATA
set.seed(476)
index <- sample(
1:nrow(Data_norm),
round(0.75 * nrow(Data_norm))
)
train_ <- Data_norm[index, ]
test_ <- Data_norm[-index, ]
# CROSS VALIDATION
control <- trainControl(
method = "repeatedcv",
number = 10,
repeats = 3
)
# SPLITTING DATA
set.seed(476)
index <- sample(
1:nrow(Data_norm),
round(0.75 * nrow(Data_norm))
)
train_ <- Data_norm[index, ]
test_ <- Data_norm[-index, ]
# HANDLING IMBALANCE DATA
train_balanced <- ovun.sample(
Diabetes ~ .,
data = train_,
method = "both"
)$data
# MEMBUAT DATA TRAINING NUMERIK
x_train <- as.matrix(
train_balanced[, -1]
)
y_train <- train_balanced$Diabetes
x_test <- as.matrix(
test_[, -1]
)
# MEMBANGUN MODEL LVQ 1
set.seed(123)
codebook1 <- lvqinit(
x_train,
y_train,
size = 10
)
model_lvq1 <- olvq1(
x_train,
y_train,
codebook1
)
# PREDIKSI DATA TESTING
predictions <- lvqtest(
model_lvq1,
x_test
)
#MENGUBAH HASIL MENJADI FAKTOR
predictions <- factor(
predictions,
levels = levels(test_$Diabetes)
)
# CONFUSION MATRIX
confusionMatrix(
predictions,
test_$Diabetes,
positive = "1"
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 756 76
## 1 0 244
##
## Accuracy : 0.9294
## 95% CI : (0.9124, 0.9439)
## No Information Rate : 0.7026
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8186
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.7625
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.9087
## Prevalence : 0.2974
## Detection Rate : 0.2268
## Detection Prevalence : 0.2268
## Balanced Accuracy : 0.8812
##
## 'Positive' Class : 1
##
LVQ2
# MEMBANGUN MODEL LVQ2
set.seed(123)
codebook2 <- lvqinit(
x_train,
y_train,
size = 10
)
model_lvq2 <- lvq2(
x_train,
y_train,
codebook2
)
# PREDIKSI DATA TESTING
predictions <- lvqtest(
model_lvq2,
x_test
)
#MENGUBAH HASIL MENJADI FAKTOR
predictions <- factor(
predictions,
levels = levels(test_$Diabetes)
)
# CONFUSION MATRIX
confusionMatrix(
predictions,
test_$Diabetes,
positive = "1"
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 755 76
## 1 1 244
##
## Accuracy : 0.9284
## 95% CI : (0.9114, 0.9431)
## No Information Rate : 0.7026
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8163
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.7625
## Specificity : 0.9987
## Pos Pred Value : 0.9959
## Neg Pred Value : 0.9085
## Prevalence : 0.2974
## Detection Rate : 0.2268
## Detection Prevalence : 0.2277
## Balanced Accuracy : 0.8806
##
## 'Positive' Class : 1
##
LVQ 3
# MEMBANGUN MODEL LVQ3
set.seed(123)
codebook <- lvqinit(
x_train,
y_train,
size = 10
)
model_lvq3 <- lvq3(
x_train,
y_train,
codebook
)
# PREDIKSI DATA TESTING
predictions <- lvqtest(
model_lvq3,
x_test
)
#MENGUBAH HASIL MENJADI FAKTOR
predictions <- factor(
predictions,
levels = levels(test_$Diabetes)
)
# CONFUSION MATRIX
confusionMatrix(
predictions,
test_$Diabetes,
positive = "1"
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 755 76
## 1 1 244
##
## Accuracy : 0.9284
## 95% CI : (0.9114, 0.9431)
## No Information Rate : 0.7026
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8163
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.7625
## Specificity : 0.9987
## Pos Pred Value : 0.9959
## Neg Pred Value : 0.9085
## Prevalence : 0.2974
## Detection Rate : 0.2268
## Detection Prevalence : 0.2277
## Balanced Accuracy : 0.8806
##
## 'Positive' Class : 1
##