library(neuralnet)
## Warning: package 'neuralnet' was built under R version 4.4.3
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
library(mice)
## Warning: package 'mice' was built under R version 4.4.3
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(keras)
## Warning: package 'keras' was built under R version 4.4.3
## The keras package is deprecated. Please use the keras3 package instead.
## Alternatively, to continue using legacy keras, call `py_require_legacy_keras()`.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:neuralnet':
## 
##     compute
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(ROSE)
## Warning: package 'ROSE' was built under R version 4.4.3
## Loaded ROSE 0.0-4
library(class)
## Warning: package 'class' was built under R version 4.4.3
library(kknn)
## Warning: package 'kknn' was built under R version 4.4.3
## 
## Attaching package: 'kknn'
## The following object is masked from 'package:caret':
## 
##     contr.dummy
data <- read_excel("C:/Users/62852/OneDrive/Documents/Semester 6/Jaringan Syaraf Tiruan/Diabetes Prediction.xlsx")
data
names(data)
##  [1] "Age"            "Gender"         "BMI"            "SBP"           
##  [5] "DBP"            "FPG"            "Chol"           "Tri"           
##  [9] "HDL"            "LDL"            "ALT"            "BUN"           
## [13] "CCR"            "FFPG"           "smoking"        "drinking"      
## [17] "family_histroy" "Diabetes"
sum(is.na(data))
## [1] 0

LVQ Balance

# MENGUBAH TARGET MENJADI FAKTOR
data$Diabetes <- as.factor(data$Diabetes)

# Menghapus variabel target
Variabel_Prediktor <- data[, c(
  "Age",
  "Gender",
  "BMI",
  "SBP",
  "DBP",
  "FPG",
  "Chol",
  "Tri",
  "HDL",
  "LDL",
  "ALT",
  "BUN",
  "CCR",
  "FFPG",
  "smoking",
  "drinking",
  "family_histroy"
)]

# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
  (x - min(x)) / (max(x) - min(x))
}

Normalisasi <- as.data.frame(lapply(Variabel_Prediktor,
                                    min_max_scaling))

# Menggabungkan target dan data normalisasi
Data_norm <- data.frame(
  Diabetes = data$Diabetes,
  Normalisasi
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# HANDLING IMBALANCE DATA
train_balanced <- ovun.sample(
  Diabetes ~ .,
  data = train_,
  method = "both"
)$data

# CROSS VALIDATION
control <- trainControl(
  method = "repeatedcv",
  number = 10,
  repeats = 3
)

# MEMBANGUN MODEL LVQ
model_lvq <- train(
  Diabetes ~ .,
  data = train_balanced,
  method = "lvq",
  trControl = control
)

# HASIL MODEL
print(model_lvq)
## Learning Vector Quantization 
## 
## 3227 samples
##   17 predictor
##    2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 2904, 2904, 2905, 2905, 2904, 2905, ... 
## Resampling results across tuning parameters:
## 
##   size  k   Accuracy   Kappa    
##    8     1  0.8849235  0.7707295
##    8     6  0.8702685  0.7419143
##    8    11  0.8758634  0.7533677
##   12     1  0.9018696  0.8043474
##   12     6  0.8863673  0.7736345
##   12    11  0.8936069  0.7883362
##   16     1  0.9044531  0.8096055
##   16     6  0.8915391  0.7842397
##   16    11  0.9037249  0.8083480
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 16 and k = 1.
# PREDIKSI DATA TESTING
predictions <- predict(
  model_lvq,
  newdata = test_
)

# CONFUSION MATRIX
confusionMatrix(
  predictions,
  test_$Diabetes,
  positive = "1"
)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 727  55
##          1  29 265
##                                           
##                Accuracy : 0.9219          
##                  95% CI : (0.9043, 0.9373)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8087          
##                                           
##  Mcnemar's Test P-Value : 0.006377        
##                                           
##             Sensitivity : 0.8281          
##             Specificity : 0.9616          
##          Pos Pred Value : 0.9014          
##          Neg Pred Value : 0.9297          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2463          
##    Detection Prevalence : 0.2732          
##       Balanced Accuracy : 0.8949          
##                                           
##        'Positive' Class : 1               
## 

LVQ dengan Learning Rate

# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
  (x - min(x)) / (max(x) - min(x))
}

# NORMALISASI DATA
Normalisasi <- as.data.frame(
  lapply(
    Variabel_Prediktor,
    min_max_scaling
  )
)

# MENGGABUNGKAN TARGET DAN DATA NORMALISASI
Data_norm <- data.frame(
  Diabetes = data$Diabetes,
  Normalisasi
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# HANDLING IMBALANCE DATA
train_balanced <- ovun.sample(
  Diabetes ~ .,
  data = train_,
  method = "both"
)$data

# MEMBUAT DATA TRAINING NUMERIK
x_train <- as.matrix(
  train_balanced[, -1]
)

y_train <- train_balanced$Diabetes

x_test <- as.matrix(
  test_[, -1]
)

# MEMBANGUN CODEBOOK AWAL
set.seed(123)

codebook <- lvqinit(
  x_train,
  y_train,
  size = 10
)

# ==========================================
# LEARNING RATE 0.01
# ==========================================

model_lvq_001 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.01,
  niter = 100
)

pred_001 <- lvqtest(
  model_lvq_001,
  x_test
)

pred_001 <- factor(
  pred_001,
  levels = levels(test_$Diabetes)
)

confusionMatrix(
  pred_001,
  test_$Diabetes,
  positive = "1"
)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756  79
##          1   0 241
##                                           
##                Accuracy : 0.9266          
##                  95% CI : (0.9093, 0.9414)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8108          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7531          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.9054          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2240          
##    Detection Prevalence : 0.2240          
##       Balanced Accuracy : 0.8766          
##                                           
##        'Positive' Class : 1               
## 
# ==========================================
# LEARNING RATE 0.05
# ==========================================

model_lvq_005 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.05,
  niter = 100
)

pred_005 <- lvqtest(
  model_lvq_005,
  x_test
)

pred_005 <- factor(
  pred_005,
  levels = levels(test_$Diabetes)
)

confusionMatrix(
  pred_005,
  test_$Diabetes,
  positive = "1"
)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 755  76
##          1   1 244
##                                           
##                Accuracy : 0.9284          
##                  95% CI : (0.9114, 0.9431)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8163          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7625          
##             Specificity : 0.9987          
##          Pos Pred Value : 0.9959          
##          Neg Pred Value : 0.9085          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2268          
##    Detection Prevalence : 0.2277          
##       Balanced Accuracy : 0.8806          
##                                           
##        'Positive' Class : 1               
## 
# ==========================================
# LEARNING RATE 0.1
# ==========================================

model_lvq_01 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.1,
  niter = 100
)

pred_01 <- lvqtest(
  model_lvq_01,
  x_test
)

pred_01 <- factor(
  pred_01,
  levels = levels(test_$Diabetes)
)

confusionMatrix(
  pred_01,
  test_$Diabetes,
  positive = "1"
)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756  76
##          1   0 244
##                                           
##                Accuracy : 0.9294          
##                  95% CI : (0.9124, 0.9439)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8186          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7625          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.9087          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2268          
##    Detection Prevalence : 0.2268          
##       Balanced Accuracy : 0.8812          
##                                           
##        'Positive' Class : 1               
## 

LVQ Unbalance

# MENGUBAH TARGET MENJADI FAKTOR
data$Diabetes <- as.factor(data$Diabetes)

# Menghapus variabel target
Variabel_Prediktor <- data[, c(
  "Age",
  "Gender",
  "BMI",
  "SBP",
  "DBP",
  "FPG",
  "Chol",
  "Tri",
  "HDL",
  "LDL",
  "ALT",
  "BUN",
  "CCR",
  "FFPG",
  "smoking",
  "drinking",
  "family_histroy"
)]

# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
  (x - min(x)) / (max(x) - min(x))
}

Normalisasi <- as.data.frame(lapply(Variabel_Prediktor,
                                    min_max_scaling))

# Menggabungkan target dan data normalisasi
Data_norm <- data.frame(
  Diabetes = data$Diabetes,
  Normalisasi
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# CROSS VALIDATION
control <- trainControl(
  method = "repeatedcv",
  number = 10,
  repeats = 3
)

# MEMBANGUN MODEL LVQ
model_lvq_unbalance <- train(
  Diabetes ~ .,
  data = train_,
  method = "lvq",
  trControl = control
)

# PREDIKSI DATA TESTING
predictions <- predict(
  model_lvq_unbalance,
  newdata = test_
)

# CONFUSION MATRIX
confusionMatrix(
  predictions,
  test_$Diabetes,
  positive = "1"
)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 754  82
##          1   2 238
##                                           
##                Accuracy : 0.9219          
##                  95% CI : (0.9043, 0.9373)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7987          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7438          
##             Specificity : 0.9974          
##          Pos Pred Value : 0.9917          
##          Neg Pred Value : 0.9019          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2212          
##    Detection Prevalence : 0.2230          
##       Balanced Accuracy : 0.8706          
##                                           
##        'Positive' Class : 1               
## 

LVQ unbalance learning rate

# ==========================================
# LVQ UNBALANCED DENGAN VARIASI LEARNING RATE
# ==========================================

# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
  (x - min(x)) / (max(x) - min(x))
}

# NORMALISASI DATA
Normalisasi <- as.data.frame(
  lapply(
    Variabel_Prediktor,
    min_max_scaling
  )
)

# MENGGABUNGKAN TARGET DAN DATA NORMALISASI
Data_norm <- data.frame(
  Diabetes = data$Diabetes,
  Normalisasi
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# MEMBUAT DATA TRAINING NUMERIK
x_train <- as.matrix(
  train_[, -1]
)

y_train <- train_$Diabetes

x_test <- as.matrix(
  test_[, -1]
)

# MEMBANGUN CODEBOOK AWAL
set.seed(123)

codebook <- lvqinit(
  x_train,
  y_train,
  size = 10
)

# ==========================================
# LEARNING RATE 0.01
# ==========================================

model_lvq_001 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.01,
  niter = 100
)

pred_001 <- lvqtest(
  model_lvq_001,
  x_test
)

pred_001 <- factor(
  pred_001,
  levels = levels(test_$Diabetes)
)

cm_001 <- confusionMatrix(
  pred_001,
  test_$Diabetes,
  positive = "1"
)

print(cm_001)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756 102
##          1   0 218
##                                          
##                Accuracy : 0.9052         
##                  95% CI : (0.8861, 0.922)
##     No Information Rate : 0.7026         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.7502         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.6813         
##             Specificity : 1.0000         
##          Pos Pred Value : 1.0000         
##          Neg Pred Value : 0.8811         
##              Prevalence : 0.2974         
##          Detection Rate : 0.2026         
##    Detection Prevalence : 0.2026         
##       Balanced Accuracy : 0.8406         
##                                          
##        'Positive' Class : 1              
## 
# ==========================================
# LEARNING RATE 0.05
# ==========================================

model_lvq_005 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.05,
  niter = 100
)

pred_005 <- lvqtest(
  model_lvq_005,
  x_test
)

pred_005 <- factor(
  pred_005,
  levels = levels(test_$Diabetes)
)

cm_005 <- confusionMatrix(
  pred_005,
  test_$Diabetes,
  positive = "1"
)

print(cm_005)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756  95
##          1   0 225
##                                          
##                Accuracy : 0.9117         
##                  95% CI : (0.8931, 0.928)
##     No Information Rate : 0.7026         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.769          
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.7031         
##             Specificity : 1.0000         
##          Pos Pred Value : 1.0000         
##          Neg Pred Value : 0.8884         
##              Prevalence : 0.2974         
##          Detection Rate : 0.2091         
##    Detection Prevalence : 0.2091         
##       Balanced Accuracy : 0.8516         
##                                          
##        'Positive' Class : 1              
## 
# ==========================================
# LEARNING RATE 0.1
# ==========================================

model_lvq_01 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.1,
  niter = 100
)

pred_01 <- lvqtest(
  model_lvq_01,
  x_test
)

pred_01 <- factor(
  pred_01,
  levels = levels(test_$Diabetes)
)

cm_01 <- confusionMatrix(
  pred_01,
  test_$Diabetes,
  positive = "1"
)

print(cm_01)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756  97
##          1   0 223
##                                           
##                Accuracy : 0.9099          
##                  95% CI : (0.8911, 0.9263)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7636          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.6969          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.8863          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2072          
##    Detection Prevalence : 0.2072          
##       Balanced Accuracy : 0.8484          
##                                           
##        'Positive' Class : 1               
## 

LVQ 1

# MENGUBAH TARGET MENJADI FAKTOR
data$Diabetes <- as.factor(data$Diabetes)

# Menghapus variabel target
Variabel_Prediktor <- data[, c(
  "Age",
  "Gender",
  "BMI",
  "SBP",
  "DBP",
  "FPG",
  "Chol",
  "Tri",
  "HDL",
  "LDL",
  "ALT",
  "BUN",
  "CCR",
  "FFPG",
  "smoking",
  "drinking",
  "family_histroy"
)]

# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
  (x - min(x)) / (max(x) - min(x))
}

Normalisasi <- as.data.frame(lapply(Variabel_Prediktor,
                                    min_max_scaling))

# Menggabungkan target dan data normalisasi
Data_norm <- data.frame(
  Diabetes = data$Diabetes,
  Normalisasi
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# CROSS VALIDATION
control <- trainControl(
  method = "repeatedcv",
  number = 10,
  repeats = 3
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# HANDLING IMBALANCE DATA
train_balanced <- ovun.sample(
  Diabetes ~ .,
  data = train_,
  method = "both"
)$data

# MEMBUAT DATA TRAINING NUMERIK
x_train <- as.matrix(
  train_balanced[, -1]
)

y_train <- train_balanced$Diabetes

x_test <- as.matrix(
  test_[, -1]
)

# MEMBANGUN MODEL LVQ 1
set.seed(123)

codebook1 <- lvqinit(
  x_train,
  y_train,
  size = 10
)

model_lvq1 <- olvq1(
  x_train,
  y_train,
  codebook1
)

# PREDIKSI DATA TESTING
predictions <- lvqtest(
  model_lvq1,
  x_test
)

#MENGUBAH HASIL MENJADI FAKTOR
predictions <- factor(
  predictions,
  levels = levels(test_$Diabetes)
)

# CONFUSION MATRIX
confusionMatrix(
  predictions,
  test_$Diabetes,
  positive = "1"
)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756  76
##          1   0 244
##                                           
##                Accuracy : 0.9294          
##                  95% CI : (0.9124, 0.9439)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8186          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7625          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.9087          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2268          
##    Detection Prevalence : 0.2268          
##       Balanced Accuracy : 0.8812          
##                                           
##        'Positive' Class : 1               
## 

LVQ2

# MEMBANGUN MODEL LVQ2
set.seed(123)

codebook2 <- lvqinit(
  x_train,
  y_train,
  size = 10
)

model_lvq2 <- lvq2(
  x_train,
  y_train,
  codebook2
)

# PREDIKSI DATA TESTING
predictions <- lvqtest(
  model_lvq2,
  x_test
)

#MENGUBAH HASIL MENJADI FAKTOR
predictions <- factor(
  predictions,
  levels = levels(test_$Diabetes)
)

# CONFUSION MATRIX
confusionMatrix(
  predictions,
  test_$Diabetes,
  positive = "1"
)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 755  76
##          1   1 244
##                                           
##                Accuracy : 0.9284          
##                  95% CI : (0.9114, 0.9431)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8163          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7625          
##             Specificity : 0.9987          
##          Pos Pred Value : 0.9959          
##          Neg Pred Value : 0.9085          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2268          
##    Detection Prevalence : 0.2277          
##       Balanced Accuracy : 0.8806          
##                                           
##        'Positive' Class : 1               
## 

LVQ 3

# MEMBANGUN MODEL LVQ3
set.seed(123)

codebook <- lvqinit(
  x_train,
  y_train,
  size = 10
)

model_lvq3 <- lvq3(
  x_train,
  y_train,
  codebook
)

# PREDIKSI DATA TESTING
predictions <- lvqtest(
  model_lvq3,
  x_test
)

#MENGUBAH HASIL MENJADI FAKTOR
predictions <- factor(
  predictions,
  levels = levels(test_$Diabetes)
)

# CONFUSION MATRIX
confusionMatrix(
  predictions,
  test_$Diabetes,
  positive = "1"
)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 755  76
##          1   1 244
##                                           
##                Accuracy : 0.9284          
##                  95% CI : (0.9114, 0.9431)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8163          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7625          
##             Specificity : 0.9987          
##          Pos Pred Value : 0.9959          
##          Neg Pred Value : 0.9085          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2268          
##    Detection Prevalence : 0.2277          
##       Balanced Accuracy : 0.8806          
##                                           
##        'Positive' Class : 1               
##