Import Library
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(party)
## Warning: package 'party' was built under R version 4.4.3
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 4.4.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 4.4.2
##
## Attaching package: 'party'
## The following object is masked from 'package:dplyr':
##
## where
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
library(caret)
## Warning: package 'caret' was built under R version 4.4.2
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
## Loading required package: lattice
library(class)
library(nnet)
## Warning: package 'nnet' was built under R version 4.4.3
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
library(e1071)
library(rJava)
##
## Attaching package: 'rJava'
## The following object is masked from 'package:modeltools':
##
## clone
library(RWeka)
## Warning: package 'RWeka' was built under R version 4.4.3
library(partykit)
## Warning: package 'partykit' was built under R version 4.4.3
## Loading required package: libcoin
## Warning: package 'libcoin' was built under R version 4.4.3
##
## Attaching package: 'partykit'
## The following objects are masked from 'package:party':
##
## cforest, ctree, ctree_control, edge_simple, mob, mob_control,
## node_barplot, node_bivplot, node_boxplot, node_inner, node_surv,
## node_terminal, varimp
library(kknn)
## Warning: package 'kknn' was built under R version 4.4.3
##
## Attaching package: 'kknn'
## The following object is masked from 'package:caret':
##
## contr.dummy
library(fastDummies)
## Warning: package 'fastDummies' was built under R version 4.4.3
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.4.3
##
## Attaching package: 'MLmetrics'
## The following objects are masked from 'package:caret':
##
## MAE, RMSE
## The following object is masked from 'package:base':
##
## Recall
library(rpart)
## Warning: package 'rpart' was built under R version 4.4.3
library(RColorBrewer)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.4.3
library(rattle)
## Warning: package 'rattle' was built under R version 4.4.3
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
##
## Attaching package: 'rattle'
## The following object is masked from 'package:randomForest':
##
## importance
library(neuralnet)
## Warning: package 'neuralnet' was built under R version 4.4.3
##
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
##
## compute
library(readxl)
library(NeuralNetTools)
## Warning: package 'NeuralNetTools' was built under R version 4.4.3
Preprocessing
# Mengecek konsistensi isian
data<- raw_data
lapply(data[, sapply(data, is.character)], unique)
## $job
## [1] "unemployed" "services" "management" "blue-collar"
## [5] "self-employed" "technician" "entrepreneur" "admin."
## [9] "student" "housemaid" "retired" "unknown"
##
## $marital
## [1] "married" "single" "menikah" "divorced"
##
## $education
## [1] "primary" "secondary" "tertiary" "tertier" "sekunder" "unknown"
##
## $default
## [1] "no" "yes"
##
## $housing
## [1] "no" "yes" "tidak"
##
## $loan
## [1] "no" "yes" "tidak"
##
## $contact
## [1] "cellular" "unknown" "seluler" "telephone"
##
## $month
## [1] "10" "may" "apr" "jun" "feb" "aug" "jan" "7" "nov" "jul" "oct" "sep"
## [13] "mar" "dec"
##
## $poutcome
## [1] "unknown" "failure" "other" "success"
##
## $y
## [1] "no" "iya" "yes"
# Memperbaiki konsistensi isian
data$marital <- ifelse(data$marital == "menikah", "married", data$marital)
data$education <- ifelse(data$education == "tertier", "tertiary", data$education)
data$education <- ifelse(data$education == "sekunder", "secondary", data$education)
data$housing <- ifelse(data$housing == "tidak", "no", data$housing)
data$loan <- ifelse(data$loan == "tidak", "no", data$loan)
data$contact <- ifelse(data$contact == "seluler", "cellular", data$contact)
data$month <- ifelse(data$month == "10", "oct", data$month)
data$month <- ifelse(data$month == "7", "jul", data$month)
data$y <- ifelse(data$y == "iya", "yes", data$y)
str(data)
## 'data.frame': 4521 obs. of 17 variables:
## $ Age : int 30 33 35 30 59 35 36 39 41 43 ...
## $ job : chr "unemployed" "services" "management" "management" ...
## $ marital : chr "married" "married" "single" "married" ...
## $ education: chr "primary" "secondary" "tertiary" "tertiary" ...
## $ default : chr "no" "no" "no" "no" ...
## $ balance : int 1787 4789 1350 1476 0 747 307 147 221 -88 ...
## $ housing : chr "no" "yes" "yes" "yes" ...
## $ loan : chr "no" "yes" "no" "yes" ...
## $ contact : chr "cellular" "cellular" "cellular" "unknown" ...
## $ day : int 19 11 16 3 5 23 14 6 14 17 ...
## $ month : chr "oct" "may" "apr" "jun" ...
## $ duration : int 79 220 185 199 226 141 341 151 57 313 ...
## $ campaign : int 1 1 1 4 1 2 1 2 2 1 ...
## $ pdays : int -1 339 330 -1 -1 176 330 -1 -1 147 ...
## $ previous : int 0 4 1 0 0 3 2 0 0 2 ...
## $ poutcome : chr "unknown" "failure" "failure" "unknown" ...
## $ y : chr "no" "no" "no" "no" ...
# Mengubah variabel - variabel menjadi factor
data$job <- as.factor(data$job)
data$marital <- as.factor(data$marital)
data$education <- as.factor(data$education)
data$default <- as.factor(data$default)
data$housing <- as.factor(data$housing)
data$loan <- as.factor(data$loan)
data$contact <- as.factor(data$contact)
data$poutcome <- as.factor(data$poutcome)
data$y <- as.factor(data$y)
data$month <- as.factor(data$month)
head(data)
str(data)
## 'data.frame': 4521 obs. of 17 variables:
## $ Age : int 30 33 35 30 59 35 36 39 41 43 ...
## $ job : Factor w/ 12 levels "admin.","blue-collar",..: 11 8 5 5 2 5 7 10 3 8 ...
## $ marital : Factor w/ 3 levels "divorced","married",..: 2 2 3 2 2 3 2 2 2 2 ...
## $ education: Factor w/ 4 levels "primary","secondary",..: 1 2 3 3 2 3 3 2 3 1 ...
## $ default : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ balance : int 1787 4789 1350 1476 0 747 307 147 221 -88 ...
## $ housing : Factor w/ 2 levels "no","yes": 1 2 2 2 2 1 2 2 2 2 ...
## $ loan : Factor w/ 2 levels "no","yes": 1 2 1 2 1 1 1 1 1 2 ...
## $ contact : Factor w/ 3 levels "cellular","telephone",..: 1 1 1 3 3 1 1 1 3 1 ...
## $ day : int 19 11 16 3 5 23 14 6 14 17 ...
## $ month : Factor w/ 12 levels "apr","aug","dec",..: 11 9 1 7 9 4 9 9 9 1 ...
## $ duration : int 79 220 185 199 226 141 341 151 57 313 ...
## $ campaign : int 1 1 1 4 1 2 1 2 2 1 ...
## $ pdays : int -1 339 330 -1 -1 176 330 -1 -1 147 ...
## $ previous : int 0 4 1 0 0 3 2 0 0 2 ...
## $ poutcome : Factor w/ 4 levels "failure","other",..: 4 1 1 4 4 1 2 4 4 1 ...
## $ y : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
Pemisahan Data Training dan testing
# Untuk metode RF, DT, NB dan KNN
set.seed(777)
inTrain <- createDataPartition(y=data$y, p=0.8, list=FALSE)
train <- data[inTrain,]
test <- data[-inTrain,]
# Dipisah untuk evaluasi model
X_train <- train[, setdiff(names(train), "y")]
y_train <- train$y
X_test <- test[, setdiff(names(test), "y")]
y_test <- test$y
head(train)
head(test)
Pemodelan
Naive Bayes
set.seed(777)
# 1. Training model
output.nb <- naiveBayes(y ~ ., data = train)
# 2. Prediksi untuk training dan testing
pred_train_nb <- predict(output.nb, newdata = train)
pred_test_nb <- predict(output.nb, newdata = test)
# 3. Confusion Matrix
conf_matrix_train_nb <- confusionMatrix(pred_train_nb, train$y)
conf_matrix_test_nb <- confusionMatrix(pred_test_nb, test$y)
# 4. Akurasi
acc_train_nb <- conf_matrix_train_nb $overall['Accuracy']
acc_test_nb <- conf_matrix_test_nb $overall['Accuracy']
# 5. Precision dan Recall
precision_train_nb <- precision(data = pred_train_nb, reference = y_train)
precision_test_nb <- precision(data = pred_test_nb, reference = y_test)
recall_train_nb <- recall(data = pred_train_nb, reference = y_train)
recall_test_nb <- recall(data = pred_test_nb, reference = y_test)
# 6. F1 Score
f1_train_nb <- F_meas(data = pred_train_nb, reference = y_train)
f1_test_nb <- F_meas(data = pred_test_nb, reference = y_test)
# 7. Cetak hasil
cat("=== TRAINING DATA ===\n")
## === TRAINING DATA ===
print(conf_matrix_train_nb )
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 2905 206
## yes 295 211
##
## Accuracy : 0.8615
## 95% CI : (0.8498, 0.8726)
## No Information Rate : 0.8847
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.3787
##
## Mcnemar's Test P-Value : 8.44e-05
##
## Sensitivity : 0.9078
## Specificity : 0.5060
## Pos Pred Value : 0.9338
## Neg Pred Value : 0.4170
## Prevalence : 0.8847
## Detection Rate : 0.8032
## Detection Prevalence : 0.8601
## Balanced Accuracy : 0.7069
##
## 'Positive' Class : no
##
cat(sprintf("Akurasi: %.4f\n", acc_train_nb ))
## Akurasi: 0.8615
cat(sprintf("Precision: %.4f\n", precision_train_nb ))
## Precision: 0.9338
cat(sprintf("Recall: %.4f\n", recall_train_nb ))
## Recall: 0.9078
cat(sprintf("F1 Score: %.4f\n\n", f1_train_nb ))
## F1 Score: 0.9206
cat("=== TESTING DATA ===\n")
## === TESTING DATA ===
print(conf_matrix_test_nb )
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 735 51
## yes 65 53
##
## Accuracy : 0.8717
## 95% CI : (0.8481, 0.8928)
## No Information Rate : 0.885
## P-Value [Acc > NIR] : 0.9022
##
## Kappa : 0.4047
##
## Mcnemar's Test P-Value : 0.2274
##
## Sensitivity : 0.9187
## Specificity : 0.5096
## Pos Pred Value : 0.9351
## Neg Pred Value : 0.4492
## Prevalence : 0.8850
## Detection Rate : 0.8131
## Detection Prevalence : 0.8695
## Balanced Accuracy : 0.7142
##
## 'Positive' Class : no
##
cat(sprintf("Akurasi: %.4f\n", acc_test_nb ))
## Akurasi: 0.8717
cat(sprintf("Precision: %.4f\n", precision_test_nb ))
## Precision: 0.9351
cat(sprintf("Recall: %.4f\n", recall_test_nb ))
## Recall: 0.9187
cat(sprintf("F1 Score: %.4f\n", f1_test_nb ))
## F1 Score: 0.9269
Random Forest
set.seed(777)
# 1. Training model
output.rf <- randomForest(
y ~ ., data = train,
ntree = 50,
mtry = 3,
)
# 2. Prediksi untuk training dan testing
pred_train_rf <- predict(output.rf, newdata = train)
pred_test_rf <- predict(output.rf, newdata = test)
# 3. Confusion Matrix
conf_matrix_train_rf <- confusionMatrix(pred_train_rf, train$y)
conf_matrix_test_rf <- confusionMatrix(pred_test_rf, test$y)
# 4. Akurasi
acc_train_rf <- conf_matrix_train_rf$overall['Accuracy']
acc_test_rf <- conf_matrix_test_rf$overall['Accuracy']
# 5. Precision dan Recall
precision_train_rf <- precision(data = pred_train_rf, reference = y_train)
precision_test_rf <- precision(data = pred_test_rf, reference = y_test)
recall_train_rf <- recall(data = pred_train_rf, reference = y_train)
recall_test_rf <- recall(data = pred_test_rf, reference = y_test)
# 6. F1 Score
f1_train_rf <- F_meas(data = pred_train_rf, reference = y_train)
f1_test_rf <- F_meas(data = pred_test_rf, reference = y_test)
# 7. Cetak hasil
cat("=== TRAINING DATA ===\n")
## === TRAINING DATA ===
print(conf_matrix_train_rf )
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 3200 6
## yes 0 411
##
## Accuracy : 0.9983
## 95% CI : (0.9964, 0.9994)
## No Information Rate : 0.8847
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.9918
##
## Mcnemar's Test P-Value : 0.04123
##
## Sensitivity : 1.0000
## Specificity : 0.9856
## Pos Pred Value : 0.9981
## Neg Pred Value : 1.0000
## Prevalence : 0.8847
## Detection Rate : 0.8847
## Detection Prevalence : 0.8864
## Balanced Accuracy : 0.9928
##
## 'Positive' Class : no
##
cat(sprintf("Akurasi: %.4f\n", acc_train_rf ))
## Akurasi: 0.9983
cat(sprintf("Precision: %.4f\n", precision_train_rf ))
## Precision: 0.9981
cat(sprintf("Recall: %.4f\n", recall_train_rf ))
## Recall: 1.0000
cat(sprintf("F1 Score: %.4f\n\n", f1_train_rf ))
## F1 Score: 0.9991
cat("=== TESTING DATA ===\n")
## === TESTING DATA ===
print(conf_matrix_test_rf )
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 783 70
## yes 17 34
##
## Accuracy : 0.9038
## 95% CI : (0.8826, 0.9222)
## No Information Rate : 0.885
## P-Value [Acc > NIR] : 0.04018
##
## Kappa : 0.3927
##
## Mcnemar's Test P-Value : 2.476e-08
##
## Sensitivity : 0.9788
## Specificity : 0.3269
## Pos Pred Value : 0.9179
## Neg Pred Value : 0.6667
## Prevalence : 0.8850
## Detection Rate : 0.8662
## Detection Prevalence : 0.9436
## Balanced Accuracy : 0.6528
##
## 'Positive' Class : no
##
cat(sprintf("Akurasi: %.4f\n", acc_test_rf ))
## Akurasi: 0.9038
cat(sprintf("Precision: %.4f\n", precision_test_rf ))
## Precision: 0.9179
cat(sprintf("Recall: %.4f\n", recall_test_rf ))
## Recall: 0.9788
cat(sprintf("F1 Score: %.4f\n", f1_test_rf ))
## F1 Score: 0.9474
KNN
set.seed(777)
# 1. Train Model
output.knn <- train.kknn(y ~ ., data = train,
kmax = 20,
distance = 2,
kernel = "gaussian",
tuneGrid = expand.grid(k = 1:20),
trControl = trainControl(method = "cv", number = 10))
# 2. Prediksi untuk training dan testing
pred_train_knn <- predict(output.knn, newdata = train)
## Warning in model.matrix.default(mt2, test, contrasts.arg = contrasts.arg):
## variable 'y' is absent, its contrast will be ignored
pred_test_knn <- predict(output.knn, newdata = test)
## Warning in model.matrix.default(mt2, test, contrasts.arg = contrasts.arg):
## variable 'y' is absent, its contrast will be ignored
# 3. Confusion Matrix
conf_matrix_train_knn <- confusionMatrix(pred_train_knn, train$y)
conf_matrix_test_knn <- confusionMatrix(pred_test_knn, test$y)
# 4. Akurasi
acc_train_knn <- conf_matrix_train_knn$overall['Accuracy']
acc_test_knn <- conf_matrix_test_knn$overall['Accuracy']
# 5. Precision dan Recall
precision_train_knn <- precision(data = pred_train_knn, reference = y_train)
precision_test_knn <- precision(data = pred_test_knn, reference = y_test)
recall_train_knn <- recall(data = pred_train_knn, reference = y_train)
recall_test_knn <- recall(data = pred_test_knn, reference = y_test)
# 6. F1 Score
f1_train_knn <- F_meas(data = pred_train_knn, reference = y_train)
f1_test_knn <- F_meas(data = pred_test_knn, reference = y_test)
# 7. Cetak hasil
cat("=== TRAINING DATA ===\n")
## === TRAINING DATA ===
print(conf_matrix_train_knn )
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 3188 283
## yes 12 134
##
## Accuracy : 0.9184
## 95% CI : (0.909, 0.9272)
## No Information Rate : 0.8847
## P-Value [Acc > NIR] : 1.719e-11
##
## Kappa : 0.4427
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9962
## Specificity : 0.3213
## Pos Pred Value : 0.9185
## Neg Pred Value : 0.9178
## Prevalence : 0.8847
## Detection Rate : 0.8814
## Detection Prevalence : 0.9596
## Balanced Accuracy : 0.6588
##
## 'Positive' Class : no
##
cat(sprintf("Akurasi: %.4f\n", acc_train_knn ))
## Akurasi: 0.9184
cat(sprintf("Precision: %.4f\n", precision_train_knn ))
## Precision: 0.9185
cat(sprintf("Recall: %.4f\n", recall_train_knn ))
## Recall: 0.9962
cat(sprintf("F1 Score: %.4f\n\n", f1_train_knn ))
## F1 Score: 0.9558
cat("=== TESTING DATA ===\n")
## === TESTING DATA ===
print(conf_matrix_test_knn )
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 792 85
## yes 8 19
##
## Accuracy : 0.8971
## 95% CI : (0.8755, 0.9162)
## No Information Rate : 0.885
## P-Value [Acc > NIR] : 0.1362
##
## Kappa : 0.2547
##
## Mcnemar's Test P-Value : 3.252e-15
##
## Sensitivity : 0.9900
## Specificity : 0.1827
## Pos Pred Value : 0.9031
## Neg Pred Value : 0.7037
## Prevalence : 0.8850
## Detection Rate : 0.8761
## Detection Prevalence : 0.9701
## Balanced Accuracy : 0.5863
##
## 'Positive' Class : no
##
cat(sprintf("Akurasi: %.3f\n", acc_test_knn ))
## Akurasi: 0.897
cat(sprintf("Precision: %.3f\n", precision_test_knn ))
## Precision: 0.903
cat(sprintf("Recall: %.3f\n", recall_test_knn ))
## Recall: 0.990
cat(sprintf("F1 Score: %.3f\n", f1_test_knn ))
## F1 Score: 0.945
Decision Tree
set.seed(777)
# 1. Train Model
output.tree <- rpart(y ~ .,
data = train,
control = rpart.control(minsplit = 10, cp = 0.01))
# 2. Prediksi untuk training dan testing
pred_train_tree <- predict(output.tree, X_train, type = "class")
pred_test_tree <- predict(output.tree, X_test, type = "class")
pred_train_tree <- factor(pred_train_tree, levels = levels(y_train))
pred_test_tree <- factor(pred_test_tree, levels = levels(y_test))
# 3. Confusion Matrix
conf_matrix_train_tree <- confusionMatrix(pred_train_tree, train$y)
conf_matrix_test_tree <- confusionMatrix(pred_test_tree, test$y)
# 4. Akurasi
acc_train_tree <- conf_matrix_train_tree$overall['Accuracy']
acc_test_tree <- conf_matrix_test_tree$overall['Accuracy']
# 5. Precision dan Recall
precision_train_tree <- precision(data = pred_train_tree, reference = y_train)
precision_test_tree <- precision(data = pred_test_tree, reference = y_test)
recall_train_tree <- recall(data = pred_train_tree, reference = y_train)
recall_test_tree <- recall(data = pred_test_tree, reference = y_test)
# 6. F1 Score
f1_train_tree <- F_meas(data = pred_train_tree, reference = y_train)
f1_test_tree <- F_meas(data = pred_test_tree, reference = y_test)
# 7. Cetak hasil
cat("=== TRAINING DATA ===\n")
## === TRAINING DATA ===
print(conf_matrix_train_tree )
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 3099 222
## yes 101 195
##
## Accuracy : 0.9107
## 95% CI : (0.9009, 0.9198)
## No Information Rate : 0.8847
## P-Value [Acc > NIR] : 2.366e-07
##
## Kappa : 0.499
##
## Mcnemar's Test P-Value : 2.439e-11
##
## Sensitivity : 0.9684
## Specificity : 0.4676
## Pos Pred Value : 0.9332
## Neg Pred Value : 0.6588
## Prevalence : 0.8847
## Detection Rate : 0.8568
## Detection Prevalence : 0.9182
## Balanced Accuracy : 0.7180
##
## 'Positive' Class : no
##
cat(sprintf("Akurasi: %.4f\n", acc_train_tree ))
## Akurasi: 0.9107
cat(sprintf("Precision: %.4f\n", precision_train_tree ))
## Precision: 0.9332
cat(sprintf("Recall: %.4f\n", recall_train_tree ))
## Recall: 0.9684
cat(sprintf("F1 Score: %.4f\n\n", f1_train_tree ))
## F1 Score: 0.9505
cat("=== TESTING DATA ===\n")
## === TESTING DATA ===
print(conf_matrix_test_tree )
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 777 57
## yes 23 47
##
## Accuracy : 0.9115
## 95% CI : (0.8911, 0.9292)
## No Information Rate : 0.885
## P-Value [Acc > NIR] : 0.0057884
##
## Kappa : 0.4933
##
## Mcnemar's Test P-Value : 0.0002247
##
## Sensitivity : 0.9712
## Specificity : 0.4519
## Pos Pred Value : 0.9317
## Neg Pred Value : 0.6714
## Prevalence : 0.8850
## Detection Rate : 0.8595
## Detection Prevalence : 0.9226
## Balanced Accuracy : 0.7116
##
## 'Positive' Class : no
##
cat(sprintf("Akurasi: %.4f\n", acc_test_tree ))
## Akurasi: 0.9115
cat(sprintf("Precision: %.4f\n", precision_test_tree ))
## Precision: 0.9317
cat(sprintf("Recall: %.4f\n", recall_test_tree ))
## Recall: 0.9712
cat(sprintf("F1 Score: %.4f\n", f1_test_tree ))
## F1 Score: 0.9510
# Visualisasi Decision Tree
rpart.plot(output.tree, box.palette = "auto", nn = TRUE)

ANN
set.seed(777)
# 1. Train Model
n <- names(train_data)
f <- as.formula(paste("y ~", paste(n[!n %in% "y"], collapse = " + ")))
nn <- neuralnet(
formula = f,
data = train_data,
hidden = c(8,5,3), # Sigmoid untuk hidden dan output layer
linear.output = FALSE, # FALSE → binary classification (sigmoid output)
)
set.seed(777)
# 2. Prediksi untuk training dan testing
pr.nn_train <- compute(nn, train_data)
roundedresults_train <- ifelse(pr.nn_train$net.result > 0.5, 1, 0) # Klasifikasi biner (threshold 0.5)
results_train <- data.frame(actual = train_data$y, prediction = roundedresults_train) # Gabungkan dengan nilai aktual
actual_train <- factor(results_train$actual, levels = c(0, 1)) # Ubah ke factor dengan level yang sama dan urutan sama
predicted_train <- factor(results_train$prediction, levels = c(0, 1))
pr.nn_test <- compute(nn, test_data)
roundedresults_test <- ifelse(pr.nn_test$net.result > 0.5, 1, 0)
results_test <- data.frame(actual = test_data$y, prediction = roundedresults_test)
actual_test <- factor(results_test$actual, levels = c(0, 1))
predicted_test <- factor(results_test$prediction, levels = c(0, 1))
# 3. Confusion Matrix
conf_matrix_train_ann <- confusionMatrix(predicted_train, actual_train)
conf_matrix_test_ann <- confusionMatrix(predicted_test, actual_test)
# 4. Akurasi
acc_train_ann <- conf_matrix_train_ann$overall['Accuracy']
acc_test_ann <- conf_matrix_test_ann$overall['Accuracy']
# 5. Precision dan Recall
precision_train_ann <- conf_matrix_train_ann$byClass["Precision"]
recall_train_ann <- conf_matrix_train_ann$byClass["Recall"]
precision_test_ann <- conf_matrix_test_ann$byClass["Precision"]
recall_test_ann <- conf_matrix_test_ann$byClass["Recall"]
# 6. F1 Score
f1_train_ann <- 2 * ((precision_train_ann * recall_train_ann) / (precision_train_ann + recall_train_ann))
f1_test_ann <- 2 * ((precision_test_ann * recall_test_ann) / (precision_test_ann + recall_test_ann))
# 7. Cetak hasil
cat("=== TRAINING DATA ===\n")
## === TRAINING DATA ===
print(conf_matrix_train_ann )
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 3034 83
## 1 166 3117
##
## Accuracy : 0.9611
## 95% CI : (0.9561, 0.9657)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9222
##
## Mcnemar's Test P-Value : 2.03e-07
##
## Sensitivity : 0.9481
## Specificity : 0.9741
## Pos Pred Value : 0.9734
## Neg Pred Value : 0.9494
## Prevalence : 0.5000
## Detection Rate : 0.4741
## Detection Prevalence : 0.4870
## Balanced Accuracy : 0.9611
##
## 'Positive' Class : 0
##
cat(sprintf("Akurasi: %.4f\n", acc_train_ann ))
## Akurasi: 0.9611
cat(sprintf("Precision: %.4f\n", precision_train_ann ))
## Precision: 0.9734
cat(sprintf("Recall: %.4f\n", recall_train_ann ))
## Recall: 0.9481
cat(sprintf("F1 Score: %.4f\n\n", f1_train_ann ))
## F1 Score: 0.9606
cat("=== TESTING DATA ===\n")
## === TESTING DATA ===
print(conf_matrix_test_ann )
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 710 21
## 1 90 779
##
## Accuracy : 0.9306
## 95% CI : (0.9171, 0.9426)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8613
##
## Mcnemar's Test P-Value : 1.087e-10
##
## Sensitivity : 0.8875
## Specificity : 0.9738
## Pos Pred Value : 0.9713
## Neg Pred Value : 0.8964
## Prevalence : 0.5000
## Detection Rate : 0.4437
## Detection Prevalence : 0.4569
## Balanced Accuracy : 0.9306
##
## 'Positive' Class : 0
##
cat(sprintf("Akurasi: %.4f\n", acc_test_ann ))
## Akurasi: 0.9306
cat(sprintf("Precision: %.4f\n", precision_test_ann ))
## Precision: 0.9713
cat(sprintf("Recall: %.4f\n", recall_test_ann ))
## Recall: 0.8875
cat(sprintf("F1 Score: %.4f\n", f1_test_ann ))
## F1 Score: 0.9275
# Visualisasi ANN
plotnet(nn)

Perbandingan Metode
hasil_perbandingan <- data.frame(
Metode = c("Random Forest", "Naive Bayes", "Decision Tree", "KNN","ANN"),
Akurasi_Train = c(as.numeric(acc_train_rf), as.numeric(acc_train_nb), as.numeric(acc_train_tree), as.numeric(acc_train_knn),as.numeric(acc_train_ann)),
Akurasi_Test = c(as.numeric(acc_test_rf), as.numeric(acc_test_nb), as.numeric(acc_test_tree), as.numeric(acc_test_knn),as.numeric(acc_test_ann)),
F1_Train = c(as.numeric(f1_train_rf), as.numeric(f1_train_nb), as.numeric(f1_train_tree), as.numeric(f1_train_knn),as.numeric(f1_train_ann)),
F1_Test = c(as.numeric(f1_test_rf), as.numeric(f1_test_nb), as.numeric(f1_test_tree), as.numeric(f1_test_knn),as.numeric(f1_test_ann)),
Precision = c(as.numeric(precision_test_rf), as.numeric(precision_test_nb), as.numeric(precision_test_tree), as.numeric(precision_test_knn),as.numeric(precision_test_ann)),
Recall = c(as.numeric(recall_test_rf), as.numeric(recall_test_nb), as.numeric(recall_test_tree), as.numeric(recall_test_knn), as.numeric(recall_test_ann))
)
print(hasil_perbandingan)
## Metode Akurasi_Train Akurasi_Test F1_Train F1_Test Precision
## 1 Random Forest 0.9983412 0.9037611 0.9990634 0.9473684 0.9179367
## 2 Naive Bayes 0.8614874 0.8716814 0.9206148 0.9268600 0.9351145
## 3 Decision Tree 0.9106995 0.9115044 0.9504677 0.9510404 0.9316547
## 4 KNN 0.9184407 0.8971239 0.9557787 0.9445438 0.9030787
## 5 ANN 0.9610937 0.9306250 0.9605826 0.9274984 0.9712722
## Recall
## 1 0.97875
## 2 0.91875
## 3 0.97125
## 4 0.99000
## 5 0.88750