#Import dataset bank latih
library(readxl)
data <- read_excel("C:/Users/ihsan/Downloads/bank_latih_clean.xlsx")
head(data)
#Preparation data
#ubah tipe data
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data <- data %>%
  mutate_if(is.character, as.factor) %>%
  mutate_if(is.logical, as.factor) %>%
  mutate_if(is.integer, as.numeric) %>%
  mutate_if(is.double, as.numeric)
#KNN
library(class)
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.3.3
# Split data
trainIndex <- createDataPartition(data$y, p = .8, 
                                  list = FALSE, 
                                  times = 1)
train_data <- data[trainIndex, ]
test_data <- data[-trainIndex, ]

# Dummy encoding pakai model.matrix
train_data_knn <- model.matrix(y ~ ., data = train_data)[, -1]
test_data_knn <- model.matrix(y ~ ., data = test_data)[, -1]

# Normalisasi data
train_data_knn <- scale(train_data_knn)
test_data_knn <- scale(test_data_knn)
library(caret)

grid_knn <- expand.grid(k = 1:10)  # Menentukan rentang nilai k

# Melakukan grid search dengan cross-validation
set.seed(123)
knn_grid_search <- train(
  y ~ .,                       # Variabel target y, sisanya fitur
  data = train_data,        
  method = "knn",         
  trControl = trainControl(method = "cv", number = 10),  # 10-fold cross-validation
  tuneGrid = grid_knn        
)
print(knn_grid_search)
## k-Nearest Neighbors 
## 
## 3617 samples
##   16 predictor
##    2 classes: 'no', 'yes' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 3256, 3255, 3255, 3256, 3255, 3256, ... 
## Resampling results across tuning parameters:
## 
##   k   Accuracy   Kappa    
##    1  0.8493197  0.2338938
##    2  0.8459979  0.2240576
##    3  0.8706103  0.2373902
##    4  0.8711674  0.2271440
##    5  0.8755888  0.1978661
##    6  0.8750363  0.1843586
##    7  0.8794585  0.1863001
##    8  0.8794585  0.1817617
##    9  0.8819470  0.1853185
##   10  0.8833305  0.1901213
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 10.
# Menampilkan parameter terbaik yang ditemukan
best_k <- knn_grid_search$bestTune
print(best_k)
##     k
## 10 10
plot(knn_grid_search)

# Mulai pencatatan waktu
start_time <- Sys.time()

# KNN untuk data training
model_knn_train <- knn(train_data_knn, train_data_knn, train_data$y, 5)

# Evaluasi
confusion_knn_train <- confusionMatrix(model_knn_train, train_data$y)
print(confusion_knn_train)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   no  yes
##        no  3151  285
##        yes   49  132
##                                           
##                Accuracy : 0.9077          
##                  95% CI : (0.8978, 0.9169)
##     No Information Rate : 0.8847          
##     P-Value [Acc > NIR] : 4.858e-06       
##                                           
##                   Kappa : 0.3996          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9847          
##             Specificity : 0.3165          
##          Pos Pred Value : 0.9171          
##          Neg Pred Value : 0.7293          
##              Prevalence : 0.8847          
##          Detection Rate : 0.8712          
##    Detection Prevalence : 0.9500          
##       Balanced Accuracy : 0.6506          
##                                           
##        'Positive' Class : no              
## 
# Hitung metrik
precision_knn_train <- confusion_knn_train$byClass['Precision']
recall_knn_train <- confusion_knn_train$byClass['Recall']
f1_score_knn_train <- 2 * ((precision_knn_train * recall_knn_train) / (precision_knn_train + recall_knn_train))
accuracy_knn_train <- confusion_knn_train$overall['Accuracy']

# Simpan hasil
results_knn_train <- data.frame(
  Model = "KNN_Training",
  Precision = precision_knn_train,
  Recall = recall_knn_train,
  F1_Score = f1_score_knn_train,
  Accuracy = accuracy_knn_train
)
print(results_knn_train)
##                  Model Precision    Recall  F1_Score  Accuracy
## Precision KNN_Training 0.9170547 0.9846875 0.9496685 0.9076583
# Akhiri pencatatan waktu
end_time <- Sys.time()
runtime <- end_time - start_time
cat("Runtime (in seconds):", runtime, "\n")
## Runtime (in seconds): 0.9905949
# Mulai pencatatan waktu
start_time <- Sys.time()

#KNN untuk data test
model_knn_test <- knn(train_data_knn, test_data_knn, train_data$y, 5)

# Evaluasi
confusion_knn_test <- confusionMatrix(model_knn_test, test_data$y, positive = "no")
print(confusion_knn_test)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  no yes
##        no  778  79
##        yes  22  25
##                                           
##                Accuracy : 0.8883          
##                  95% CI : (0.8659, 0.9081)
##     No Information Rate : 0.885           
##     P-Value [Acc > NIR] : 0.402           
##                                           
##                   Kappa : 0.2795          
##                                           
##  Mcnemar's Test P-Value : 2.515e-08       
##                                           
##             Sensitivity : 0.9725          
##             Specificity : 0.2404          
##          Pos Pred Value : 0.9078          
##          Neg Pred Value : 0.5319          
##              Prevalence : 0.8850          
##          Detection Rate : 0.8606          
##    Detection Prevalence : 0.9480          
##       Balanced Accuracy : 0.6064          
##                                           
##        'Positive' Class : no              
## 
# Hitung metrik
precision_knn_test <- confusion_knn_test$byClass['Precision']
recall_knn_test <- confusion_knn_test$byClass['Recall']
f1_score_knn_test <- 2 * ((precision_knn_test * recall_knn_test) / (precision_knn_test + recall_knn_test))
accuracy_knn_test <- confusion_knn_test$overall['Accuracy']

# Simpan hasil
results_knn_test <- data.frame(
  Model = "KNN_Testing",
  Precision = precision_knn_test,
  Recall = recall_knn_test,
  F1_Score = f1_score_knn_test,
  Accuracy = accuracy_knn_test
)
print(results_knn_test)
##                 Model Precision Recall  F1_Score  Accuracy
## Precision KNN_Testing  0.907818 0.9725 0.9390465 0.8882743
# Akhiri pencatatan waktu
end_time <- Sys.time()
runtime <- end_time - start_time
cat("Runtime (in seconds):", runtime, "\n")
## Runtime (in seconds): 0.2705472
#ANN
library(nnet)
library(NeuralNetTools)
## Warning: package 'NeuralNetTools' was built under R version 4.3.3
library(caret)

# Split the data into training and testing sets
trainIndex <- createDataPartition(data$y, p = .8, 
                                  list = FALSE, 
                                  times = 1)
train_data <- data[trainIndex, ]
test_data <- data[-trainIndex, ]

# Data: pastikan variabel y adalah faktors
train_data$y <- as.factor(train_data$y)
test_data$y <- as.factor(test_data$y)

# Model ANN
set.seed(123)
model_ann <- nnet(y ~ ., data = train_data, size = 7, maxit = 100, linout = FALSE)
## # weights:  288
## initial  value 1875.656463 
## iter  10 value 1243.855610
## iter  20 value 1111.922190
## iter  30 value 1090.501227
## iter  40 value 1073.339694
## iter  50 value 1058.451986
## iter  60 value 1045.245536
## iter  70 value 1029.829836
## iter  80 value 1013.150776
## iter  90 value 990.834498
## iter 100 value 959.741722
## final  value 959.741722 
## stopped after 100 iterations
# Visualisasi struktur jaring an
plotnet(model_ann,
        alpha = 0.5,       # Membuat garis lebih transparan
        circle_col = "lightblue",  # Warna node lebih enak dilihat
        cex_val = 0.7,      # Perkecil tulisan value bobot
        cex_input = 0.6,    # Perkecil label input
        cex_output = 0.8    # Ukuran label output
)

ctrl <- trainControl(
  method = "cv",        # Gunakan k-fold cross-validation
  number = 10,           # 10-fold cross-validation
  classProbs = TRUE,    # Kalau mau hitung ROC, AUC
  summaryFunction = twoClassSummary # Ringkasan untuk klasifikasi biner
)

tunegrid <- expand.grid(
  size = c(1, 3, 5, 7, 10, 15, 20),  # Coba beberapa pilihan neuron
  decay = 0.1                        # Bisa juga fix decay dulu
)

set.seed(123)
model_tuning <- train(
  y ~ ., 
  data = train_data, 
  method = "nnet", 
  trControl = ctrl,
  tuneGrid = tunegrid,
  metric = "ROC",    # Bisa ganti "Accuracy" kalau mau fokus akurasi
  maxit = 200,       # Iterasi lebih banyak supaya convergence
  trace = FALSE,
  linout = FALSE
)

print(model_tuning)
## Neural Network 
## 
## 3617 samples
##   16 predictor
##    2 classes: 'no', 'yes' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 3256, 3255, 3255, 3256, 3255, 3256, ... 
## Resampling results across tuning parameters:
## 
##   size  ROC        Sens       Spec     
##    1    0.8540605  0.9665625  0.4078397
##    3    0.8931479  0.9603125  0.4316492
##    5    0.8947271  0.9653125  0.4123693
##    7    0.8861947  0.9609375  0.3979675
##   10    0.8876367  0.9615625  0.4197445
##   15    0.8695486  0.9646875  0.3493612
##   20    0.8681661  0.9650000  0.3383856
## 
## Tuning parameter 'decay' was held constant at a value of 0.1
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
plot(model_tuning)

# Mulai pencatatan waktu
start_time <- Sys.time()

# Prediksi kelas pada data training
predictions_ann <- predict(model_ann, newdata = train_data, type = "class")
predictions_ann <- factor(predictions_ann, levels = levels(train_data$y))

# Confusion matrix untuk data training
confusion_ann <- confusionMatrix(predictions_ann, train_data$y)
print(confusion_ann)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   no  yes
##        no  3103  296
##        yes   97  121
##                                           
##                Accuracy : 0.8913          
##                  95% CI : (0.8807, 0.9013)
##     No Information Rate : 0.8847          
##     P-Value [Acc > NIR] : 0.1099          
##                                           
##                   Kappa : 0.3279          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.9697          
##             Specificity : 0.2902          
##          Pos Pred Value : 0.9129          
##          Neg Pred Value : 0.5550          
##              Prevalence : 0.8847          
##          Detection Rate : 0.8579          
##    Detection Prevalence : 0.9397          
##       Balanced Accuracy : 0.6299          
##                                           
##        'Positive' Class : no              
## 
# Precision, Recall, dan F1-score untuk data training
precision_ann <- confusion_ann$byClass['Precision']
recall_ann <- confusion_ann$byClass['Recall']
f1_score_ann <- 2 * ((precision_ann * recall_ann) / (precision_ann + recall_ann))
precision_ann
## Precision 
## 0.9129156
recall_ann
##    Recall 
## 0.9696875
f1_score_ann
## Precision 
## 0.9404455
# Akurasi pada data training
accuracy_ann <- confusion_ann$overall['Accuracy']

# Tabel hasil evaluasi model ANN pada data training
results_ann <- data.frame(
  Model = "ANN_Training",
  Precision = precision_ann,
  Recall = recall_ann,
  F1_Score = f1_score_ann,
  Accuracy = accuracy_ann
)

# Tampilkan hasil
print(results_ann)
##                  Model Precision    Recall  F1_Score  Accuracy
## Precision ANN_Training 0.9129156 0.9696875 0.9404455 0.8913464
# Akhiri pencatatan waktu
end_time <- Sys.time()
runtime <- end_time - start_time
cat("Runtime (in seconds):", runtime, "\n")
## Runtime (in seconds): 0.06229401
# Mulai pencatatan waktu
start_time <- Sys.time()

# Prediksi kelas testing ANN
predictions_ann <- predict(model_ann, newdata = test_data, type = "class")
predictions_ann <- factor(predictions_ann, levels = levels(test_data$y))

# Confusion matrix
confusion_ann <- confusionMatrix(predictions_ann, test_data$y)
print(confusion_ann)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  no yes
##        no  761  75
##        yes  39  29
##                                           
##                Accuracy : 0.8739          
##                  95% CI : (0.8505, 0.8948)
##     No Information Rate : 0.885           
##     P-Value [Acc > NIR] : 0.862637        
##                                           
##                   Kappa : 0.2709          
##                                           
##  Mcnemar's Test P-Value : 0.001045        
##                                           
##             Sensitivity : 0.9513          
##             Specificity : 0.2788          
##          Pos Pred Value : 0.9103          
##          Neg Pred Value : 0.4265          
##              Prevalence : 0.8850          
##          Detection Rate : 0.8418          
##    Detection Prevalence : 0.9248          
##       Balanced Accuracy : 0.6150          
##                                           
##        'Positive' Class : no              
## 
# Make precision recall and F1-score
precision_ann <- confusion_ann$byClass['Precision']
recall_ann <- confusion_ann$byClass['Recall']
f1_score_ann <- 2 * ((precision_ann * recall_ann) / (precision_ann + recall_ann))
precision_ann
## Precision 
## 0.9102871
recall_ann
##  Recall 
## 0.95125
f1_score_ann
## Precision 
## 0.9303178
# Extracting accuracy
accuracy_ann <- confusion_ann$overall['Accuracy']

#Make in the table for the precision, recall, f1-score and accuracy
results_ann <- data.frame(
  Model = "ANN_Testing",
  Precision = precision_ann,
  Recall = recall_ann,
  F1_Score = f1_score_ann,
  Accuracy = accuracy_ann
)
results_ann
# Akhiri pencatatan waktu
end_time <- Sys.time()
runtime <- end_time - start_time
cat("Runtime (in seconds):", runtime, "\n")
## Runtime (in seconds): 0.12661
library(ggplot2)
library(pROC)
## Warning: package 'pROC' was built under R version 4.3.3
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#ANN
# Plot confusion matrixs
cm_df <- as.data.frame(confusion_ann$table)
colnames(cm_df) <- c("Predicted", "Actual", "Freq")

ggplot(cm_df, aes(x = Actual, y = Predicted, fill = Freq)) +
  geom_tile(color = "white") +
  geom_text(aes(label = Freq), vjust = 1) +
  scale_fill_gradient(low = "lightblue", high = "blue") +
  theme_minimal() +
  labs(title = "Confusion Matrix - ANN", fill = "Frequency")

# Hitung metrik
precision <- confusion_ann$byClass['Precision']
recall <- confusion_ann$byClass['Recall']
f1 <- 2 * ((precision * recall) / (precision + recall))

# Buat plot
metrics_df <- data.frame(
  Metric = c("Precision", "Recall", "F1 Score"),
  Value = c(precision, recall, f1)
)

ggplot(metrics_df, aes(x = Metric, y = Value, fill = Metric)) +
  geom_col() +
  geom_text(aes(label = round(Value, 2)), vjust = -0.5) +
  ylim(0, 1) +
  theme_minimal() +
  labs(title = "Evaluation Metrics - ANN", y = "Score") +
  scale_fill_brewer(palette = "Set2")