Tugas LVQ

library(neuralnet)

## Warning: package 'neuralnet' was built under R version 4.4.3

library(caret)

## Warning: package 'caret' was built under R version 4.4.3

## Loading required package: ggplot2

## Loading required package: lattice

library(mice)

## Warning: package 'mice' was built under R version 4.4.3

## 
## Attaching package: 'mice'

## The following object is masked from 'package:stats':
## 
##     filter

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

library(keras)

## Warning: package 'keras' was built under R version 4.4.3

## The keras package is deprecated. Please use the keras3 package instead.
## Alternatively, to continue using legacy keras, call `py_require_legacy_keras()`.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:neuralnet':
## 
##     compute

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readxl)
library(ROSE)

## Warning: package 'ROSE' was built under R version 4.4.3

## Loaded ROSE 0.0-4

library(class)

## Warning: package 'class' was built under R version 4.4.3

library(kknn)

## Warning: package 'kknn' was built under R version 4.4.3

## 
## Attaching package: 'kknn'

## The following object is masked from 'package:caret':
## 
##     contr.dummy

data <- read_excel("C:/Users/62852/OneDrive/Documents/Semester 6/Jaringan Syaraf Tiruan/Diabetes Prediction.xlsx")
data

names(data)

##  [1] "Age"            "Gender"         "BMI"            "SBP"           
##  [5] "DBP"            "FPG"            "Chol"           "Tri"           
##  [9] "HDL"            "LDL"            "ALT"            "BUN"           
## [13] "CCR"            "FFPG"           "smoking"        "drinking"      
## [17] "family_histroy" "Diabetes"

sum(is.na(data))

## [1] 0

LVQ Balance

# MENGUBAH TARGET MENJADI FAKTOR
data$Diabetes <- as.factor(data$Diabetes)

# Menghapus variabel target
Variabel_Prediktor <- data[, c(
  "Age",
  "Gender",
  "BMI",
  "SBP",
  "DBP",
  "FPG",
  "Chol",
  "Tri",
  "HDL",
  "LDL",
  "ALT",
  "BUN",
  "CCR",
  "FFPG",
  "smoking",
  "drinking",
  "family_histroy"
)]

# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
  (x - min(x)) / (max(x) - min(x))
}

Normalisasi <- as.data.frame(lapply(Variabel_Prediktor,
                                    min_max_scaling))

# Menggabungkan target dan data normalisasi
Data_norm <- data.frame(
  Diabetes = data$Diabetes,
  Normalisasi
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# HANDLING IMBALANCE DATA
train_balanced <- ovun.sample(
  Diabetes ~ .,
  data = train_,
  method = "both"
)$data

# CROSS VALIDATION
control <- trainControl(
  method = "repeatedcv",
  number = 10,
  repeats = 3
)

# MEMBANGUN MODEL LVQ
model_lvq <- train(
  Diabetes ~ .,
  data = train_balanced,
  method = "lvq",
  trControl = control
)

# HASIL MODEL
print(model_lvq)

## Learning Vector Quantization 
## 
## 3227 samples
##   17 predictor
##    2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 2904, 2904, 2905, 2905, 2904, 2905, ... 
## Resampling results across tuning parameters:
## 
##   size  k   Accuracy   Kappa    
##    8     1  0.8849235  0.7707295
##    8     6  0.8702685  0.7419143
##    8    11  0.8758634  0.7533677
##   12     1  0.9018696  0.8043474
##   12     6  0.8863673  0.7736345
##   12    11  0.8936069  0.7883362
##   16     1  0.9044531  0.8096055
##   16     6  0.8915391  0.7842397
##   16    11  0.9037249  0.8083480
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 16 and k = 1.

# PREDIKSI DATA TESTING
predictions <- predict(
  model_lvq,
  newdata = test_
)

# CONFUSION MATRIX
confusionMatrix(
  predictions,
  test_$Diabetes,
  positive = "1"
)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 727  55
##          1  29 265
##                                           
##                Accuracy : 0.9219          
##                  95% CI : (0.9043, 0.9373)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8087          
##                                           
##  Mcnemar's Test P-Value : 0.006377        
##                                           
##             Sensitivity : 0.8281          
##             Specificity : 0.9616          
##          Pos Pred Value : 0.9014          
##          Neg Pred Value : 0.9297          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2463          
##    Detection Prevalence : 0.2732          
##       Balanced Accuracy : 0.8949          
##                                           
##        'Positive' Class : 1               
##

LVQ dengan Learning Rate

# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
  (x - min(x)) / (max(x) - min(x))
}

# NORMALISASI DATA
Normalisasi <- as.data.frame(
  lapply(
    Variabel_Prediktor,
    min_max_scaling
  )
)

# MENGGABUNGKAN TARGET DAN DATA NORMALISASI
Data_norm <- data.frame(
  Diabetes = data$Diabetes,
  Normalisasi
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# HANDLING IMBALANCE DATA
train_balanced <- ovun.sample(
  Diabetes ~ .,
  data = train_,
  method = "both"
)$data

# MEMBUAT DATA TRAINING NUMERIK
x_train <- as.matrix(
  train_balanced[, -1]
)

y_train <- train_balanced$Diabetes

x_test <- as.matrix(
  test_[, -1]
)

# MEMBANGUN CODEBOOK AWAL
set.seed(123)

codebook <- lvqinit(
  x_train,
  y_train,
  size = 10
)

# ==========================================
# LEARNING RATE 0.01
# ==========================================

model_lvq_001 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.01,
  niter = 100
)

pred_001 <- lvqtest(
  model_lvq_001,
  x_test
)

pred_001 <- factor(
  pred_001,
  levels = levels(test_$Diabetes)
)

confusionMatrix(
  pred_001,
  test_$Diabetes,
  positive = "1"
)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756  79
##          1   0 241
##                                           
##                Accuracy : 0.9266          
##                  95% CI : (0.9093, 0.9414)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8108          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7531          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.9054          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2240          
##    Detection Prevalence : 0.2240          
##       Balanced Accuracy : 0.8766          
##                                           
##        'Positive' Class : 1               
##

# ==========================================
# LEARNING RATE 0.05
# ==========================================

model_lvq_005 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.05,
  niter = 100
)

pred_005 <- lvqtest(
  model_lvq_005,
  x_test
)

pred_005 <- factor(
  pred_005,
  levels = levels(test_$Diabetes)
)

confusionMatrix(
  pred_005,
  test_$Diabetes,
  positive = "1"
)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 755  76
##          1   1 244
##                                           
##                Accuracy : 0.9284          
##                  95% CI : (0.9114, 0.9431)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8163          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7625          
##             Specificity : 0.9987          
##          Pos Pred Value : 0.9959          
##          Neg Pred Value : 0.9085          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2268          
##    Detection Prevalence : 0.2277          
##       Balanced Accuracy : 0.8806          
##                                           
##        'Positive' Class : 1               
##

# ==========================================
# LEARNING RATE 0.1
# ==========================================

model_lvq_01 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.1,
  niter = 100
)

pred_01 <- lvqtest(
  model_lvq_01,
  x_test
)

pred_01 <- factor(
  pred_01,
  levels = levels(test_$Diabetes)
)

confusionMatrix(
  pred_01,
  test_$Diabetes,
  positive = "1"
)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756  76
##          1   0 244
##                                           
##                Accuracy : 0.9294          
##                  95% CI : (0.9124, 0.9439)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8186          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7625          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.9087          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2268          
##    Detection Prevalence : 0.2268          
##       Balanced Accuracy : 0.8812          
##                                           
##        'Positive' Class : 1               
##

LVQ Unbalance

# MENGUBAH TARGET MENJADI FAKTOR
data$Diabetes <- as.factor(data$Diabetes)

# Menghapus variabel target
Variabel_Prediktor <- data[, c(
  "Age",
  "Gender",
  "BMI",
  "SBP",
  "DBP",
  "FPG",
  "Chol",
  "Tri",
  "HDL",
  "LDL",
  "ALT",
  "BUN",
  "CCR",
  "FFPG",
  "smoking",
  "drinking",
  "family_histroy"
)]

# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
  (x - min(x)) / (max(x) - min(x))
}

Normalisasi <- as.data.frame(lapply(Variabel_Prediktor,
                                    min_max_scaling))

# Menggabungkan target dan data normalisasi
Data_norm <- data.frame(
  Diabetes = data$Diabetes,
  Normalisasi
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# CROSS VALIDATION
control <- trainControl(
  method = "repeatedcv",
  number = 10,
  repeats = 3
)

# MEMBANGUN MODEL LVQ
model_lvq_unbalance <- train(
  Diabetes ~ .,
  data = train_,
  method = "lvq",
  trControl = control
)

# PREDIKSI DATA TESTING
predictions <- predict(
  model_lvq_unbalance,
  newdata = test_
)

# CONFUSION MATRIX
confusionMatrix(
  predictions,
  test_$Diabetes,
  positive = "1"
)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 754  82
##          1   2 238
##                                           
##                Accuracy : 0.9219          
##                  95% CI : (0.9043, 0.9373)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7987          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7438          
##             Specificity : 0.9974          
##          Pos Pred Value : 0.9917          
##          Neg Pred Value : 0.9019          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2212          
##    Detection Prevalence : 0.2230          
##       Balanced Accuracy : 0.8706          
##                                           
##        'Positive' Class : 1               
##

LVQ unbalance learning rate

# ==========================================
# LVQ UNBALANCED DENGAN VARIASI LEARNING RATE
# ==========================================

# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
  (x - min(x)) / (max(x) - min(x))
}

# NORMALISASI DATA
Normalisasi <- as.data.frame(
  lapply(
    Variabel_Prediktor,
    min_max_scaling
  )
)

# MENGGABUNGKAN TARGET DAN DATA NORMALISASI
Data_norm <- data.frame(
  Diabetes = data$Diabetes,
  Normalisasi
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# MEMBUAT DATA TRAINING NUMERIK
x_train <- as.matrix(
  train_[, -1]
)

y_train <- train_$Diabetes

x_test <- as.matrix(
  test_[, -1]
)

# MEMBANGUN CODEBOOK AWAL
set.seed(123)

codebook <- lvqinit(
  x_train,
  y_train,
  size = 10
)

# ==========================================
# LEARNING RATE 0.01
# ==========================================

model_lvq_001 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.01,
  niter = 100
)

pred_001 <- lvqtest(
  model_lvq_001,
  x_test
)

pred_001 <- factor(
  pred_001,
  levels = levels(test_$Diabetes)
)

cm_001 <- confusionMatrix(
  pred_001,
  test_$Diabetes,
  positive = "1"
)

print(cm_001)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756 102
##          1   0 218
##                                          
##                Accuracy : 0.9052         
##                  95% CI : (0.8861, 0.922)
##     No Information Rate : 0.7026         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.7502         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.6813         
##             Specificity : 1.0000         
##          Pos Pred Value : 1.0000         
##          Neg Pred Value : 0.8811         
##              Prevalence : 0.2974         
##          Detection Rate : 0.2026         
##    Detection Prevalence : 0.2026         
##       Balanced Accuracy : 0.8406         
##                                          
##        'Positive' Class : 1              
##

# ==========================================
# LEARNING RATE 0.05
# ==========================================

model_lvq_005 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.05,
  niter = 100
)

pred_005 <- lvqtest(
  model_lvq_005,
  x_test
)

pred_005 <- factor(
  pred_005,
  levels = levels(test_$Diabetes)
)

cm_005 <- confusionMatrix(
  pred_005,
  test_$Diabetes,
  positive = "1"
)

print(cm_005)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756  95
##          1   0 225
##                                          
##                Accuracy : 0.9117         
##                  95% CI : (0.8931, 0.928)
##     No Information Rate : 0.7026         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.769          
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.7031         
##             Specificity : 1.0000         
##          Pos Pred Value : 1.0000         
##          Neg Pred Value : 0.8884         
##              Prevalence : 0.2974         
##          Detection Rate : 0.2091         
##    Detection Prevalence : 0.2091         
##       Balanced Accuracy : 0.8516         
##                                          
##        'Positive' Class : 1              
##

# ==========================================
# LEARNING RATE 0.1
# ==========================================

model_lvq_01 <- olvq1(
  x_train,
  y_train,
  codebook,
  alpha = 0.1,
  niter = 100
)

pred_01 <- lvqtest(
  model_lvq_01,
  x_test
)

pred_01 <- factor(
  pred_01,
  levels = levels(test_$Diabetes)
)

cm_01 <- confusionMatrix(
  pred_01,
  test_$Diabetes,
  positive = "1"
)

print(cm_01)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756  97
##          1   0 223
##                                           
##                Accuracy : 0.9099          
##                  95% CI : (0.8911, 0.9263)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7636          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.6969          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.8863          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2072          
##    Detection Prevalence : 0.2072          
##       Balanced Accuracy : 0.8484          
##                                           
##        'Positive' Class : 1               
##

LVQ 1

# MENGUBAH TARGET MENJADI FAKTOR
data$Diabetes <- as.factor(data$Diabetes)

# Menghapus variabel target
Variabel_Prediktor <- data[, c(
  "Age",
  "Gender",
  "BMI",
  "SBP",
  "DBP",
  "FPG",
  "Chol",
  "Tri",
  "HDL",
  "LDL",
  "ALT",
  "BUN",
  "CCR",
  "FFPG",
  "smoking",
  "drinking",
  "family_histroy"
)]

# NORMALISASI MIN-MAX
min_max_scaling <- function(x) {
  (x - min(x)) / (max(x) - min(x))
}

Normalisasi <- as.data.frame(lapply(Variabel_Prediktor,
                                    min_max_scaling))

# Menggabungkan target dan data normalisasi
Data_norm <- data.frame(
  Diabetes = data$Diabetes,
  Normalisasi
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# CROSS VALIDATION
control <- trainControl(
  method = "repeatedcv",
  number = 10,
  repeats = 3
)

# SPLITTING DATA
set.seed(476)

index <- sample(
  1:nrow(Data_norm),
  round(0.75 * nrow(Data_norm))
)

train_ <- Data_norm[index, ]
test_  <- Data_norm[-index, ]

# HANDLING IMBALANCE DATA
train_balanced <- ovun.sample(
  Diabetes ~ .,
  data = train_,
  method = "both"
)$data

# MEMBUAT DATA TRAINING NUMERIK
x_train <- as.matrix(
  train_balanced[, -1]
)

y_train <- train_balanced$Diabetes

x_test <- as.matrix(
  test_[, -1]
)

# MEMBANGUN MODEL LVQ 1
set.seed(123)

codebook1 <- lvqinit(
  x_train,
  y_train,
  size = 10
)

model_lvq1 <- olvq1(
  x_train,
  y_train,
  codebook1
)

# PREDIKSI DATA TESTING
predictions <- lvqtest(
  model_lvq1,
  x_test
)

#MENGUBAH HASIL MENJADI FAKTOR
predictions <- factor(
  predictions,
  levels = levels(test_$Diabetes)
)

# CONFUSION MATRIX
confusionMatrix(
  predictions,
  test_$Diabetes,
  positive = "1"
)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 756  76
##          1   0 244
##                                           
##                Accuracy : 0.9294          
##                  95% CI : (0.9124, 0.9439)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8186          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7625          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.9087          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2268          
##    Detection Prevalence : 0.2268          
##       Balanced Accuracy : 0.8812          
##                                           
##        'Positive' Class : 1               
##

LVQ2

# MEMBANGUN MODEL LVQ2
set.seed(123)

codebook2 <- lvqinit(
  x_train,
  y_train,
  size = 10
)

model_lvq2 <- lvq2(
  x_train,
  y_train,
  codebook2
)

# PREDIKSI DATA TESTING
predictions <- lvqtest(
  model_lvq2,
  x_test
)

#MENGUBAH HASIL MENJADI FAKTOR
predictions <- factor(
  predictions,
  levels = levels(test_$Diabetes)
)

# CONFUSION MATRIX
confusionMatrix(
  predictions,
  test_$Diabetes,
  positive = "1"
)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 755  76
##          1   1 244
##                                           
##                Accuracy : 0.9284          
##                  95% CI : (0.9114, 0.9431)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8163          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7625          
##             Specificity : 0.9987          
##          Pos Pred Value : 0.9959          
##          Neg Pred Value : 0.9085          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2268          
##    Detection Prevalence : 0.2277          
##       Balanced Accuracy : 0.8806          
##                                           
##        'Positive' Class : 1               
##

LVQ 3

# MEMBANGUN MODEL LVQ3
set.seed(123)

codebook <- lvqinit(
  x_train,
  y_train,
  size = 10
)

model_lvq3 <- lvq3(
  x_train,
  y_train,
  codebook
)

# PREDIKSI DATA TESTING
predictions <- lvqtest(
  model_lvq3,
  x_test
)

#MENGUBAH HASIL MENJADI FAKTOR
predictions <- factor(
  predictions,
  levels = levels(test_$Diabetes)
)

# CONFUSION MATRIX
confusionMatrix(
  predictions,
  test_$Diabetes,
  positive = "1"
)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 755  76
##          1   1 244
##                                           
##                Accuracy : 0.9284          
##                  95% CI : (0.9114, 0.9431)
##     No Information Rate : 0.7026          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8163          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.7625          
##             Specificity : 0.9987          
##          Pos Pred Value : 0.9959          
##          Neg Pred Value : 0.9085          
##              Prevalence : 0.2974          
##          Detection Rate : 0.2268          
##    Detection Prevalence : 0.2277          
##       Balanced Accuracy : 0.8806          
##                                           
##        'Positive' Class : 1               
##

Tugas LVQ

Firli Fiora

2026-05-01