Menyiapkan library dan data
data <- read_xlsx("bank_latih_preprocessed.xlsx")
str(data)
## tibble [4,521 Ć 41] (S3: tbl_df/tbl/data.frame)
## $ Age : num [1:4521] 30 33 35 30 59 35 36 39 41 43 ...
## $ job : chr [1:4521] "unemployed" "services" "management" "management" ...
## $ marital : chr [1:4521] "married" "married" "single" "married" ...
## $ education : chr [1:4521] "primary" "secondary" "tertiary" "tertiary" ...
## $ default : chr [1:4521] "no" "no" "no" "no" ...
## $ balance : num [1:4521] 1787 4789 1350 1476 0 ...
## $ housing : chr [1:4521] "no" "yes" "yes" "yes" ...
## $ loan : chr [1:4521] "no" "yes" "no" "yes" ...
## $ contact : chr [1:4521] "cellular" "cellular" "cellular" "unknown" ...
## $ day : num [1:4521] 19 11 16 3 5 23 14 6 14 17 ...
## $ month : chr [1:4521] "oct" "may" "apr" "jun" ...
## $ duration : num [1:4521] 79 220 185 199 226 141 341 151 57 313 ...
## $ campaign : num [1:4521] 1 1 1 4 1 2 1 2 2 1 ...
## $ pdays : num [1:4521] -1 339 330 -1 -1 176 330 -1 -1 147 ...
## $ previous : num [1:4521] 0 4 1 0 0 3 2 0 0 2 ...
## $ poutcome : chr [1:4521] "unknown" "failure" "failure" "unknown" ...
## $ y : chr [1:4521] "no" "no" "no" "no" ...
## $ Age_z : num [1:4521] -1.056 -0.772 -0.583 -1.056 1.686 ...
## $ balance_z : num [1:4521] 0.1211 1.1185 -0.0241 0.0177 -0.4727 ...
## $ day_z : num [1:4521] 0.374 -0.596 0.0103 -1.5659 -1.3234 ...
## $ duration_z : num [1:4521] -0.712 -0.169 -0.304 -0.25 -0.146 ...
## $ campaign_z : num [1:4521] -0.577 -0.577 -0.577 0.388 -0.577 ...
## $ pdays_z : num [1:4521] -1 0.974 0.897 -1 -1 ...
## $ previous_z : num [1:4521] -0.32 2.04 0.27 -0.32 -0.32 ...
## $ Age_norm : num [1:4521] 0.162 0.206 0.235 0.162 0.588 ...
## $ balance_norm : num [1:4521] 0.0685 0.1088 0.0626 0.0643 0.0445 ...
## $ day_norm : num [1:4521] 0.6 0.3333 0.5 0.0667 0.1333 ...
## $ duration_norm : num [1:4521] 0.0248 0.0715 0.0599 0.0645 0.0735 ...
## $ campaign_norm : num [1:4521] 0 0 0 0.0612 0 ...
## $ pdays_norm : num [1:4521] -1 0.389 0.378 -1 -1 ...
## $ previous_norm : num [1:4521] 0 0.16 0.04 0 0 0.12 0.08 0 0 0.08 ...
## $ job_encoded : num [1:4521] 11 8 5 5 2 5 7 10 3 8 ...
## $ marital_encoded : num [1:4521] 2 2 3 2 2 3 2 2 2 2 ...
## $ education_encoded: num [1:4521] 1 2 3 3 2 3 3 2 3 1 ...
## $ default_encoded : num [1:4521] 1 1 1 1 1 1 1 1 1 1 ...
## $ housing_encoded : num [1:4521] 1 2 2 2 2 1 2 2 2 2 ...
## $ loan_encoded : num [1:4521] 1 2 1 2 1 1 1 1 1 2 ...
## $ contact_encoded : num [1:4521] 1 1 1 3 3 1 1 1 3 1 ...
## $ month_encoded : num [1:4521] 11 9 1 7 9 4 9 9 9 1 ...
## $ poutcome_encoded : num [1:4521] 4 1 1 4 4 1 2 4 4 1 ...
## $ y_encoded : num [1:4521] 1 1 1 1 1 1 1 1 1 1 ...
Mengubah variabel target menjadi bentuk biner
data$y_binary <- ifelse(data$y == "yes", 1, 0)
Memilih fitur
features.1 <- c("Age", "balance", "day", "duration", "campaign", "pdays", "previous", "job_encoded", "marital_encoded", "education_encoded", "default_encoded", "housing_encoded", "loan_encoded", "contact_encoded", "month_encoded", "poutcome_encoded")
features.2_1 <- c("Age_z", "balance_z", "day", "duration_z", "campaign_z", "pdays_z", "previous_z", "job_encoded", "marital_encoded", "education_encoded", "default_encoded", "housing_encoded", "loan_encoded", "contact_encoded", "month_encoded", "poutcome_encoded")
features.3_1 <- c("Age_norm", "balance_norm", "day", "duration_norm", "campaign_norm", "pdays_norm", "previous_norm", "job_encoded", "marital_encoded", "education_encoded", "default_encoded", "housing_encoded", "loan_encoded", "contact_encoded", "month_encoded", "poutcome_encoded")
model.1_data <- data[, c(features.1, "y_binary")]
model.2_1_data <- data[, c(features.2_1, "y_binary")]
model.3_1_data <- data[, c(features.3_1, "y_binary")]
Split data training dan testing
# Split data
set.seed(123)
index <- createDataPartition(model.1_data$y_binary, p = 0.8, list = FALSE)
trainData_1 <- model.1_data[index, ]
testData_1 <- model.1_data[-index, ]
trainData_2_1 <- model.2_1_data[index, ]
testData_2_1 <- model.2_1_data[-index, ]
trainData_3_1 <- model.3_1_data[index, ]
testData_3_1 <- model.3_1_data[-index, ]
Model dengan fitur asli
ann_model.1<- neuralnet(formula.1, data = trainData_1, hidden = c(5), linear.output = FALSE)
Model dengan fitur hasil standardisasi (Z-Score)
ann_model.2_1 <- neuralnet(formula.2_1, data = trainData_2_1, hidden = c(5), linear.output = FALSE)
Model dengan fitur hasil normalisasi (min-max)
ann_model.3_1 <- neuralnet(formula.3_1, data = trainData_3_1, hidden = c(5), linear.output = FALSE)
Plot model ANN dengan fitur asli
plot(ann_model.1)
Plot model ANN dengan fitur hasil standardisasi
plot(ann_model.2_1)
Plot model ANN dengan fitur hasil normalisasi
plot(ann_model.3_1)
Metrik evaluasi setiap model
# Simpan semua model, data, dan fitur dalam list
models <- list(ann_model.1, ann_model.2_1, ann_model.3_1)
train_data <- list(trainData_1, trainData_2_1, trainData_3_1)
test_data <- list(testData_1, testData_2_1, testData_3_1)
features_list <- list(features.1, features.2_1, features.3_1)
# Fungsi bantu evaluasi
evaluate_model <- function(model, train_set, test_set, features, index) {
cat("\n===============================\n")
cat(paste("š Evaluasi Model ANN -", index, "\n"))
cat("===============================\n")
# ---- TRAINING ----
pred_train <- predict(model, train_set[, features], type = "raw")
pred_train_class <- ifelse(pred_train > 0.5, 1, 0)
cm_train <- confusionMatrix(as.factor(pred_train_class), as.factor(train_set$y_binary))
precision_train <- Precision(pred_train_class, train_set$y_binary)
recall_train <- Recall(pred_train_class, train_set$y_binary)
f1_train <- F1_Score(pred_train_class, train_set$y_binary)
accuracy_train <- Accuracy(pred_train_class, train_set$y_binary)
cat("\nš TRAINING SET:\n")
print(cm_train$table)
cat(sprintf("Accuracy : %.4f | Precision: %.4f | Recall: %.4f | F1: %.4f\n",
accuracy_train, precision_train, recall_train, f1_train))
# ---- TESTING ----
pred_test <- predict(model, test_set[, features], type = "raw")
pred_test_class <- ifelse(pred_test > 0.5, 1, 0)
cm_test <- confusionMatrix(as.factor(pred_test_class), as.factor(test_set$y_binary))
precision_test <- Precision(pred_test_class, test_set$y_binary)
recall_test <- Recall(pred_test_class, test_set$y_binary)
f1_test <- F1_Score(pred_test_class, test_set$y_binary)
accuracy_test <- Accuracy(pred_test_class, test_set$y_binary)
cat("\nš TESTING SET:\n")
print(cm_test$table)
cat(sprintf("Accuracy : %.4f | Precision: %.4f | Recall: %.4f | F1: %.4f\n",
accuracy_test, precision_test, recall_test, f1_test))
}
# Loop untuk evaluasi semua model
for (i in 1:3) {
evaluate_model(models[[i]], train_data[[i]], test_data[[i]], features_list[[i]], i)
}
##
## ===============================
## š Evaluasi Model ANN - 1
## ===============================
##
## š TRAINING SET:
## Reference
## Prediction 0 1
## 0 3190 394
## 1 13 20
## Accuracy : 0.8875 | Precision: 0.9959 | Recall: 0.8901 | F1: 0.9400
##
## š TESTING SET:
## Reference
## Prediction 0 1
## 0 797 106
## 1 0 1
## Accuracy : 0.8827 | Precision: 1.0000 | Recall: 0.8826 | F1: 0.9376
##
## ===============================
## š Evaluasi Model ANN - 2
## ===============================
##
## š TRAINING SET:
## Reference
## Prediction 0 1
## 0 3103 188
## 1 100 226
## Accuracy : 0.9204 | Precision: 0.9688 | Recall: 0.9429 | F1: 0.9557
##
## š TESTING SET:
## Reference
## Prediction 0 1
## 0 761 73
## 1 36 34
## Accuracy : 0.8794 | Precision: 0.9548 | Recall: 0.9125 | F1: 0.9332
##
## ===============================
## š Evaluasi Model ANN - 3
## ===============================
##
## š TRAINING SET:
## Reference
## Prediction 0 1
## 0 3115 195
## 1 88 219
## Accuracy : 0.9218 | Precision: 0.9725 | Recall: 0.9411 | F1: 0.9565
##
## š TESTING SET:
## Reference
## Prediction 0 1
## 0 758 67
## 1 39 40
## Accuracy : 0.8827 | Precision: 0.9511 | Recall: 0.9188 | F1: 0.9346