#Import dataset bank latih
library(readxl)
data <- read_excel("C:/Users/ihsan/Downloads/bank_latih_clean.xlsx")
head(data)
#Preparation data
#ubah tipe data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- data %>%
mutate_if(is.character, as.factor) %>%
mutate_if(is.logical, as.factor) %>%
mutate_if(is.integer, as.numeric) %>%
mutate_if(is.double, as.numeric)
#KNN
library(class)
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.3.3
# Split data
trainIndex <- createDataPartition(data$y, p = .8,
list = FALSE,
times = 1)
train_data <- data[trainIndex, ]
test_data <- data[-trainIndex, ]
# Dummy encoding pakai model.matrix
train_data_knn <- model.matrix(y ~ ., data = train_data)[, -1]
test_data_knn <- model.matrix(y ~ ., data = test_data)[, -1]
# Normalisasi data
train_data_knn <- scale(train_data_knn)
test_data_knn <- scale(test_data_knn)
library(caret)
grid_knn <- expand.grid(k = 1:10) # Menentukan rentang nilai k
# Melakukan grid search dengan cross-validation
set.seed(123)
knn_grid_search <- train(
y ~ ., # Variabel target y, sisanya fitur
data = train_data,
method = "knn",
trControl = trainControl(method = "cv", number = 10), # 10-fold cross-validation
tuneGrid = grid_knn
)
print(knn_grid_search)
## k-Nearest Neighbors
##
## 3617 samples
## 16 predictor
## 2 classes: 'no', 'yes'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 3256, 3255, 3255, 3256, 3255, 3256, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 1 0.8493197 0.2338938
## 2 0.8459979 0.2240576
## 3 0.8706103 0.2373902
## 4 0.8711674 0.2271440
## 5 0.8755888 0.1978661
## 6 0.8750363 0.1843586
## 7 0.8794585 0.1863001
## 8 0.8794585 0.1817617
## 9 0.8819470 0.1853185
## 10 0.8833305 0.1901213
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 10.
# Menampilkan parameter terbaik yang ditemukan
best_k <- knn_grid_search$bestTune
print(best_k)
## k
## 10 10
plot(knn_grid_search)

# Mulai pencatatan waktu
start_time <- Sys.time()
# KNN untuk data training
model_knn_train <- knn(train_data_knn, train_data_knn, train_data$y, 5)
# Evaluasi
confusion_knn_train <- confusionMatrix(model_knn_train, train_data$y)
print(confusion_knn_train)
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 3151 285
## yes 49 132
##
## Accuracy : 0.9077
## 95% CI : (0.8978, 0.9169)
## No Information Rate : 0.8847
## P-Value [Acc > NIR] : 4.858e-06
##
## Kappa : 0.3996
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9847
## Specificity : 0.3165
## Pos Pred Value : 0.9171
## Neg Pred Value : 0.7293
## Prevalence : 0.8847
## Detection Rate : 0.8712
## Detection Prevalence : 0.9500
## Balanced Accuracy : 0.6506
##
## 'Positive' Class : no
##
# Hitung metrik
precision_knn_train <- confusion_knn_train$byClass['Precision']
recall_knn_train <- confusion_knn_train$byClass['Recall']
f1_score_knn_train <- 2 * ((precision_knn_train * recall_knn_train) / (precision_knn_train + recall_knn_train))
accuracy_knn_train <- confusion_knn_train$overall['Accuracy']
# Simpan hasil
results_knn_train <- data.frame(
Model = "KNN_Training",
Precision = precision_knn_train,
Recall = recall_knn_train,
F1_Score = f1_score_knn_train,
Accuracy = accuracy_knn_train
)
print(results_knn_train)
## Model Precision Recall F1_Score Accuracy
## Precision KNN_Training 0.9170547 0.9846875 0.9496685 0.9076583
# Akhiri pencatatan waktu
end_time <- Sys.time()
runtime <- end_time - start_time
cat("Runtime (in seconds):", runtime, "\n")
## Runtime (in seconds): 0.9905949
# Mulai pencatatan waktu
start_time <- Sys.time()
#KNN untuk data test
model_knn_test <- knn(train_data_knn, test_data_knn, train_data$y, 5)
# Evaluasi
confusion_knn_test <- confusionMatrix(model_knn_test, test_data$y, positive = "no")
print(confusion_knn_test)
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 778 79
## yes 22 25
##
## Accuracy : 0.8883
## 95% CI : (0.8659, 0.9081)
## No Information Rate : 0.885
## P-Value [Acc > NIR] : 0.402
##
## Kappa : 0.2795
##
## Mcnemar's Test P-Value : 2.515e-08
##
## Sensitivity : 0.9725
## Specificity : 0.2404
## Pos Pred Value : 0.9078
## Neg Pred Value : 0.5319
## Prevalence : 0.8850
## Detection Rate : 0.8606
## Detection Prevalence : 0.9480
## Balanced Accuracy : 0.6064
##
## 'Positive' Class : no
##
# Hitung metrik
precision_knn_test <- confusion_knn_test$byClass['Precision']
recall_knn_test <- confusion_knn_test$byClass['Recall']
f1_score_knn_test <- 2 * ((precision_knn_test * recall_knn_test) / (precision_knn_test + recall_knn_test))
accuracy_knn_test <- confusion_knn_test$overall['Accuracy']
# Simpan hasil
results_knn_test <- data.frame(
Model = "KNN_Testing",
Precision = precision_knn_test,
Recall = recall_knn_test,
F1_Score = f1_score_knn_test,
Accuracy = accuracy_knn_test
)
print(results_knn_test)
## Model Precision Recall F1_Score Accuracy
## Precision KNN_Testing 0.907818 0.9725 0.9390465 0.8882743
# Akhiri pencatatan waktu
end_time <- Sys.time()
runtime <- end_time - start_time
cat("Runtime (in seconds):", runtime, "\n")
## Runtime (in seconds): 0.2705472
#ANN
library(nnet)
library(NeuralNetTools)
## Warning: package 'NeuralNetTools' was built under R version 4.3.3
library(caret)
# Split the data into training and testing sets
trainIndex <- createDataPartition(data$y, p = .8,
list = FALSE,
times = 1)
train_data <- data[trainIndex, ]
test_data <- data[-trainIndex, ]
# Data: pastikan variabel y adalah faktors
train_data$y <- as.factor(train_data$y)
test_data$y <- as.factor(test_data$y)
# Model ANN
set.seed(123)
model_ann <- nnet(y ~ ., data = train_data, size = 7, maxit = 100, linout = FALSE)
## # weights: 288
## initial value 1875.656463
## iter 10 value 1243.855610
## iter 20 value 1111.922190
## iter 30 value 1090.501227
## iter 40 value 1073.339694
## iter 50 value 1058.451986
## iter 60 value 1045.245536
## iter 70 value 1029.829836
## iter 80 value 1013.150776
## iter 90 value 990.834498
## iter 100 value 959.741722
## final value 959.741722
## stopped after 100 iterations
# Visualisasi struktur jaring an
plotnet(model_ann,
alpha = 0.5, # Membuat garis lebih transparan
circle_col = "lightblue", # Warna node lebih enak dilihat
cex_val = 0.7, # Perkecil tulisan value bobot
cex_input = 0.6, # Perkecil label input
cex_output = 0.8 # Ukuran label output
)

ctrl <- trainControl(
method = "cv", # Gunakan k-fold cross-validation
number = 10, # 10-fold cross-validation
classProbs = TRUE, # Kalau mau hitung ROC, AUC
summaryFunction = twoClassSummary # Ringkasan untuk klasifikasi biner
)
tunegrid <- expand.grid(
size = c(1, 3, 5, 7, 10, 15, 20), # Coba beberapa pilihan neuron
decay = 0.1 # Bisa juga fix decay dulu
)
set.seed(123)
model_tuning <- train(
y ~ .,
data = train_data,
method = "nnet",
trControl = ctrl,
tuneGrid = tunegrid,
metric = "ROC", # Bisa ganti "Accuracy" kalau mau fokus akurasi
maxit = 200, # Iterasi lebih banyak supaya convergence
trace = FALSE,
linout = FALSE
)
print(model_tuning)
## Neural Network
##
## 3617 samples
## 16 predictor
## 2 classes: 'no', 'yes'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 3256, 3255, 3255, 3256, 3255, 3256, ...
## Resampling results across tuning parameters:
##
## size ROC Sens Spec
## 1 0.8540605 0.9665625 0.4078397
## 3 0.8931479 0.9603125 0.4316492
## 5 0.8947271 0.9653125 0.4123693
## 7 0.8861947 0.9609375 0.3979675
## 10 0.8876367 0.9615625 0.4197445
## 15 0.8695486 0.9646875 0.3493612
## 20 0.8681661 0.9650000 0.3383856
##
## Tuning parameter 'decay' was held constant at a value of 0.1
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
plot(model_tuning)

# Mulai pencatatan waktu
start_time <- Sys.time()
# Prediksi kelas pada data training
predictions_ann <- predict(model_ann, newdata = train_data, type = "class")
predictions_ann <- factor(predictions_ann, levels = levels(train_data$y))
# Confusion matrix untuk data training
confusion_ann <- confusionMatrix(predictions_ann, train_data$y)
print(confusion_ann)
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 3103 296
## yes 97 121
##
## Accuracy : 0.8913
## 95% CI : (0.8807, 0.9013)
## No Information Rate : 0.8847
## P-Value [Acc > NIR] : 0.1099
##
## Kappa : 0.3279
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.9697
## Specificity : 0.2902
## Pos Pred Value : 0.9129
## Neg Pred Value : 0.5550
## Prevalence : 0.8847
## Detection Rate : 0.8579
## Detection Prevalence : 0.9397
## Balanced Accuracy : 0.6299
##
## 'Positive' Class : no
##
# Precision, Recall, dan F1-score untuk data training
precision_ann <- confusion_ann$byClass['Precision']
recall_ann <- confusion_ann$byClass['Recall']
f1_score_ann <- 2 * ((precision_ann * recall_ann) / (precision_ann + recall_ann))
precision_ann
## Precision
## 0.9129156
recall_ann
## Recall
## 0.9696875
f1_score_ann
## Precision
## 0.9404455
# Akurasi pada data training
accuracy_ann <- confusion_ann$overall['Accuracy']
# Tabel hasil evaluasi model ANN pada data training
results_ann <- data.frame(
Model = "ANN_Training",
Precision = precision_ann,
Recall = recall_ann,
F1_Score = f1_score_ann,
Accuracy = accuracy_ann
)
# Tampilkan hasil
print(results_ann)
## Model Precision Recall F1_Score Accuracy
## Precision ANN_Training 0.9129156 0.9696875 0.9404455 0.8913464
# Akhiri pencatatan waktu
end_time <- Sys.time()
runtime <- end_time - start_time
cat("Runtime (in seconds):", runtime, "\n")
## Runtime (in seconds): 0.06229401
# Mulai pencatatan waktu
start_time <- Sys.time()
# Prediksi kelas testing ANN
predictions_ann <- predict(model_ann, newdata = test_data, type = "class")
predictions_ann <- factor(predictions_ann, levels = levels(test_data$y))
# Confusion matrix
confusion_ann <- confusionMatrix(predictions_ann, test_data$y)
print(confusion_ann)
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 761 75
## yes 39 29
##
## Accuracy : 0.8739
## 95% CI : (0.8505, 0.8948)
## No Information Rate : 0.885
## P-Value [Acc > NIR] : 0.862637
##
## Kappa : 0.2709
##
## Mcnemar's Test P-Value : 0.001045
##
## Sensitivity : 0.9513
## Specificity : 0.2788
## Pos Pred Value : 0.9103
## Neg Pred Value : 0.4265
## Prevalence : 0.8850
## Detection Rate : 0.8418
## Detection Prevalence : 0.9248
## Balanced Accuracy : 0.6150
##
## 'Positive' Class : no
##
# Make precision recall and F1-score
precision_ann <- confusion_ann$byClass['Precision']
recall_ann <- confusion_ann$byClass['Recall']
f1_score_ann <- 2 * ((precision_ann * recall_ann) / (precision_ann + recall_ann))
precision_ann
## Precision
## 0.9102871
recall_ann
## Recall
## 0.95125
f1_score_ann
## Precision
## 0.9303178
# Extracting accuracy
accuracy_ann <- confusion_ann$overall['Accuracy']
#Make in the table for the precision, recall, f1-score and accuracy
results_ann <- data.frame(
Model = "ANN_Testing",
Precision = precision_ann,
Recall = recall_ann,
F1_Score = f1_score_ann,
Accuracy = accuracy_ann
)
results_ann
# Akhiri pencatatan waktu
end_time <- Sys.time()
runtime <- end_time - start_time
cat("Runtime (in seconds):", runtime, "\n")
## Runtime (in seconds): 0.12661
library(ggplot2)
library(pROC)
## Warning: package 'pROC' was built under R version 4.3.3
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
#ANN
# Plot confusion matrixs
cm_df <- as.data.frame(confusion_ann$table)
colnames(cm_df) <- c("Predicted", "Actual", "Freq")
ggplot(cm_df, aes(x = Actual, y = Predicted, fill = Freq)) +
geom_tile(color = "white") +
geom_text(aes(label = Freq), vjust = 1) +
scale_fill_gradient(low = "lightblue", high = "blue") +
theme_minimal() +
labs(title = "Confusion Matrix - ANN", fill = "Frequency")

# Hitung metrik
precision <- confusion_ann$byClass['Precision']
recall <- confusion_ann$byClass['Recall']
f1 <- 2 * ((precision * recall) / (precision + recall))
# Buat plot
metrics_df <- data.frame(
Metric = c("Precision", "Recall", "F1 Score"),
Value = c(precision, recall, f1)
)
ggplot(metrics_df, aes(x = Metric, y = Value, fill = Metric)) +
geom_col() +
geom_text(aes(label = round(Value, 2)), vjust = -0.5) +
ylim(0, 1) +
theme_minimal() +
labs(title = "Evaluation Metrics - ANN", y = "Score") +
scale_fill_brewer(palette = "Set2")
