Tugas Data Mining Pertemuan 6

# library
library(readxl)        # untuk membaca file Excel

## Warning: package 'readxl' was built under R version 4.4.3

library(dplyr)         # untuk manipulasi data

## Warning: package 'dplyr' was built under R version 4.4.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)       # untuk visualisasi

## Warning: package 'ggplot2' was built under R version 4.4.3

library(caret)         # untuk evaluasi model

## Loading required package: lattice

library(tidyr)         # untuk manipulasi data
library(mice)          # untuk imputasi data (jika diperlukan)

## Warning: package 'mice' was built under R version 4.4.3

## 
## Attaching package: 'mice'

## The following object is masked from 'package:stats':
## 
##     filter

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

library(class)         # untuk KNN
library(kknn)          # untuk KNN dengan metode yang lebih fleksibel

## Warning: package 'kknn' was built under R version 4.4.3

## 
## Attaching package: 'kknn'

## The following object is masked from 'package:caret':
## 
##     contr.dummy

library(randomForest)  # untuk Random Forest

## Warning: package 'randomForest' was built under R version 4.4.3

## randomForest 4.7-1.2

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:ggplot2':
## 
##     margin

## The following object is masked from 'package:dplyr':
## 
##     combine

library(e1071)         # untuk Naive Bayes dan SVM

## Warning: package 'e1071' was built under R version 4.4.3

library(nnet)          # untuk Artificial Neural Network
library(party)         # untuk Decision Tree

## Warning: package 'party' was built under R version 4.4.3

## Loading required package: grid

## Loading required package: mvtnorm

## Loading required package: modeltools

## Loading required package: stats4

## Loading required package: strucchange

## Warning: package 'strucchange' was built under R version 4.4.3

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: sandwich

## Warning: package 'sandwich' was built under R version 4.4.3

## 
## Attaching package: 'party'

## The following object is masked from 'package:dplyr':
## 
##     where

library(pROC)          # untuk ROC Curve

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

Ringkasan Dataset

databank <- read_excel("bank latih.xlsx")
# colnames(databank) <- tolower(colnames(databank))

str(databank)

## tibble [4,521 × 17] (S3: tbl_df/tbl/data.frame)
##  $ Age      : num [1:4521] 30 33 35 30 59 35 36 39 41 43 ...
##  $ job      : chr [1:4521] "unemployed" "services" "management" "management" ...
##  $ marital  : chr [1:4521] "married" "married" "single" "married" ...
##  $ education: chr [1:4521] "primary" "secondary" "tertiary" "tertier" ...
##  $ default  : chr [1:4521] "no" "no" "no" "no" ...
##  $ balance  : num [1:4521] 1787 4789 1350 1476 0 ...
##  $ housing  : chr [1:4521] "no" "yes" "yes" "yes" ...
##  $ loan     : chr [1:4521] "no" "yes" "no" "yes" ...
##  $ contact  : chr [1:4521] "cellular" "cellular" "cellular" "unknown" ...
##  $ day      : num [1:4521] 19 11 16 3 5 23 14 6 14 17 ...
##  $ month    : chr [1:4521] "10" "may" "apr" "jun" ...
##  $ duration : num [1:4521] 79 220 185 199 226 141 341 151 57 313 ...
##  $ campaign : num [1:4521] 1 1 1 4 1 2 1 2 2 1 ...
##  $ pdays    : num [1:4521] -1 339 330 -1 -1 176 330 -1 -1 147 ...
##  $ previous : num [1:4521] 0 4 1 0 0 3 2 0 0 2 ...
##  $ poutcome : chr [1:4521] "unknown" "failure" "failure" "unknown" ...
##  $ y        : chr [1:4521] "no" "no" "no" "no" ...

summary(databank)

##       Age            job              marital           education        
##  Min.   :19.00   Length:4521        Length:4521        Length:4521       
##  1st Qu.:33.00   Class :character   Class :character   Class :character  
##  Median :39.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :41.17                                                           
##  3rd Qu.:49.00                                                           
##  Max.   :87.00                                                           
##    default             balance        housing              loan          
##  Length:4521        Min.   :-3313   Length:4521        Length:4521       
##  Class :character   1st Qu.:   69   Class :character   Class :character  
##  Mode  :character   Median :  444   Mode  :character   Mode  :character  
##                     Mean   : 1423                                        
##                     3rd Qu.: 1480                                        
##                     Max.   :71188                                        
##    contact               day           month              duration   
##  Length:4521        Min.   : 1.00   Length:4521        Min.   :   4  
##  Class :character   1st Qu.: 9.00   Class :character   1st Qu.: 104  
##  Mode  :character   Median :16.00   Mode  :character   Median : 185  
##                     Mean   :15.92                      Mean   : 264  
##                     3rd Qu.:21.00                      3rd Qu.: 329  
##                     Max.   :31.00                      Max.   :3025  
##     campaign          pdays           previous         poutcome        
##  Min.   : 1.000   Min.   : -1.00   Min.   : 0.0000   Length:4521       
##  1st Qu.: 1.000   1st Qu.: -1.00   1st Qu.: 0.0000   Class :character  
##  Median : 2.000   Median : -1.00   Median : 0.0000   Mode  :character  
##  Mean   : 2.794   Mean   : 39.77   Mean   : 0.5426                     
##  3rd Qu.: 3.000   3rd Qu.: -1.00   3rd Qu.: 0.0000                     
##  Max.   :50.000   Max.   :871.00   Max.   :25.0000                     
##       y            
##  Length:4521       
##  Class :character  
##  Mode  :character  
##                    
##                    
##

paste("Jumlah baris sebelum preprocessing:", nrow(databank))

## [1] "Jumlah baris sebelum preprocessing: 4521"

Preprocessing Data

# 1. Missing Value & Duplicates
databank <- na.omit(databank)
databank <- databank %>% distinct()

# 2. Validasi day dan month
databank$day <- as.numeric(databank$day)
databank <- databank %>% filter(day >= 1 & day <= 31)

valid_months <- c("jan", "feb", "mar", "apr", "may", "jun", 
                  "jul", "aug", "sep", "oct", "nov", "dec")

databank$month <- tolower(as.character(databank$month))
month_map <- setNames(valid_months, as.character(1:12))
databank$month <- ifelse(databank$month %in% names(month_map),
                              month_map[databank$month], 
                              databank$month)
databank$month <- ifelse(databank$month %in% valid_months, databank$month, "unknown")
databank <- databank %>% filter(!(month == "feb" & day > 29))

# 3. Validasi marital, education, dan variabel biner
databank$marital <- tolower(databank$marital)
databank$marital <- recode(databank$marital, "menikah" = "married", "cerai" = "divorced")
databank$marital <- ifelse(databank$marital %in% c("married", "single", "divorced"), databank$marital, "unknown")

databank$education <- tolower(databank$education)
databank$education <- recode(databank$education, "primari" = "primary", "sekunder" = "secondary", "tertier" = "tertiary")
databank$education <- ifelse(databank$education %in% c("primary", "secondary", "tertiary", "unknown"), databank$education, "unknown")

yesno_vars <- c("default", "housing", "loan", "y")
for (var in yesno_vars) {
  databank[[var]] <- tolower(databank[[var]])
  databank[[var]] <- recode(databank[[var]], "iya" = "yes", "tidak" = "no")
  databank[[var]] <- ifelse(databank[[var]] %in% c("yes", "no"), databank[[var]], "unknown")
}

# 4. Validasi Variabel lainnya
databank$contact <- tolower(databank$contact)
databank$contact <- recode(databank$contact, "seluler" = "cellular")
databank$contact <- ifelse(databank$contact %in% c("cellular", "telephone", "unknown"), databank$contact, "unknown")

databank$poutcome <- tolower(databank$poutcome)
databank$poutcome <- ifelse(databank$poutcome %in% c("success", "failure", "other", "unknown"), databank$poutcome, "unknown")

databank <- databank %>% 
  mutate(campaign = as.numeric(campaign),
         pdays = as.numeric(pdays),
         previous = as.numeric(previous)) %>% 
  na.omit()

# 5. Konversi ke faktor
databank_clean <- databank %>% mutate(across(where(is.character), as.factor))

# 6. Menampilkan hasil preprocessing
print(str(databank_clean))

## tibble [4,521 × 17] (S3: tbl_df/tbl/data.frame)
##  $ Age      : num [1:4521] 30 33 35 30 59 35 36 39 41 43 ...
##  $ job      : Factor w/ 12 levels "admin.","blue-collar",..: 11 8 5 5 2 5 7 10 3 8 ...
##  $ marital  : Factor w/ 3 levels "divorced","married",..: 2 2 3 2 2 3 2 2 2 2 ...
##  $ education: Factor w/ 4 levels "primary","secondary",..: 1 2 3 3 2 3 3 2 3 1 ...
##  $ default  : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ balance  : num [1:4521] 1787 4789 1350 1476 0 ...
##  $ housing  : Factor w/ 2 levels "no","yes": 1 2 2 2 2 1 2 2 2 2 ...
##  $ loan     : Factor w/ 2 levels "no","yes": 1 2 1 2 1 1 1 1 1 2 ...
##  $ contact  : Factor w/ 3 levels "cellular","telephone",..: 1 1 1 3 3 1 1 1 3 1 ...
##  $ day      : num [1:4521] 19 11 16 3 5 23 14 6 14 17 ...
##  $ month    : Factor w/ 12 levels "apr","aug","dec",..: 11 9 1 7 9 4 9 9 9 1 ...
##  $ duration : num [1:4521] 79 220 185 199 226 141 341 151 57 313 ...
##  $ campaign : num [1:4521] 1 1 1 4 1 2 1 2 2 1 ...
##  $ pdays    : num [1:4521] -1 339 330 -1 -1 176 330 -1 -1 147 ...
##  $ previous : num [1:4521] 0 4 1 0 0 3 2 0 0 2 ...
##  $ poutcome : Factor w/ 4 levels "failure","other",..: 4 1 1 4 4 1 2 4 4 1 ...
##  $ y        : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
## NULL

# Menampilkan jumlah baris setelah preprocessing
print(paste("Jumlah baris setelah preprocessing:", nrow(databank_clean)))

## [1] "Jumlah baris setelah preprocessing: 4521"

# Menampilkan data setelah preprocessing
# print(head(databank_clean))
#write.csv(databank_clean, file = "databank_cl.csv", row.names = FALSE)
#View(databank_clean)

# 7. Split Data
set.seed(123)
trainIndex <- createDataPartition(databank_clean$y, p = 0.8, list = FALSE)
train <- databank_clean[trainIndex, ]
test <- databank_clean[-trainIndex, ]

# 8. Pisahkan fitur dan label
train_labels <- factor(train$y)
test_labels <- factor(test$y)

Newdata

head(databank_clean)

## # A tibble: 6 × 17
##     Age job        marital education default balance housing loan  contact   day
##   <dbl> <fct>      <fct>   <fct>     <fct>     <dbl> <fct>   <fct> <fct>   <dbl>
## 1    30 unemployed married primary   no         1787 no      no    cellul…    19
## 2    33 services   married secondary no         4789 yes     yes   cellul…    11
## 3    35 management single  tertiary  no         1350 yes     no    cellul…    16
## 4    30 management married tertiary  no         1476 yes     yes   unknown     3
## 5    59 blue-coll… married secondary no            0 yes     no    unknown     5
## 6    35 management single  tertiary  no          747 no      no    cellul…    23
## # ℹ 7 more variables: month <fct>, duration <dbl>, campaign <dbl>, pdays <dbl>,
## #   previous <dbl>, poutcome <fct>, y <fct>

newbank <- read_excel("unseen data.xlsx")
# colnames(newbank) <- tolower(colnames(newbank))
head(newbank)

## # A tibble: 6 × 16
##     Age job        marital education default balance housing loan  contact   day
##   <dbl> <chr>      <chr>   <chr>     <chr>     <dbl> <chr>   <chr> <chr>   <dbl>
## 1    20 student    single  secondary no          502 no      no    cellul…    30
## 2    34 technician married tertiary  no         1539 yes     no    cellul…    15
## 3    27 admin.     divorc… secondary no          451 yes     no    cellul…    16
## 4    27 services   single  secondary no         -195 yes     no    cellul…    18
## 5    21 student    single  secondary no         2488 no      no    cellul…    30
## 6    35 management single  tertiary  no          747 no      no    cellul…    23
## # ℹ 6 more variables: month <chr>, duration <dbl>, campaign <dbl>, pdays <dbl>,
## #   previous <dbl>, poutcome <chr>

# Pastikan struktur newbank sama seperti train
for (col in names(newbank)) {
  if (col %in% names(train)) {
    if (is.factor(train[[col]])) {
      newbank[[col]] <- factor(newbank[[col]], levels = levels(train[[col]]))
    }
  }
}

Naive Bayes

# 1. Model Naive Bayes
model_nb <- naiveBayes(y ~ ., data = train)

# 2. Prediksi pada data training

pred_train_nb <- predict(model_nb, train)

# 3. Evaluasi training
CM_train_nb <- confusionMatrix(factor(pred_train_nb, levels = levels(train_labels)),
                               factor(train$y, levels = levels(train_labels)))

cat("\n=== Naive Bayes - Training ===\n")

## 
## === Naive Bayes - Training ===

cat("Confusion Matrix:\n")

## Confusion Matrix:

print(CM_train_nb$table)

##           Reference
## Prediction   no  yes
##        no  2924  199
##        yes  276  218

cat("Akurasi:", round(CM_train_nb$overall["Accuracy"], 4), "\n")

## Akurasi: 0.8687

cat("F1-score:", round(CM_train_nb$byClass["F1"], 4), "\n")

## F1-score: 0.9249

cat("Precision:", round(CM_train_nb$byClass["Precision"], 4), "\n")

## Precision: 0.9363

cat("Recall:", round(CM_train_nb$byClass["Recall"], 4), "\n")

## Recall: 0.9138

# 4. Prediksi pada data testing
pred_test_nb <- predict(model_nb, test)

# 5. Evaluasi testing
CM_test_nb <- confusionMatrix(factor(pred_test_nb, levels = levels(test_labels)),
                              factor(test$y, levels = levels(test_labels)))

cat("\n=== Naive Bayes - Testing ===\n")

## 
## === Naive Bayes - Testing ===

cat("Confusion Matrix:\n")

## Confusion Matrix:

print(CM_test_nb$table)

##           Reference
## Prediction  no yes
##        no  740  54
##        yes  60  50

cat("Akurasi:", round(CM_test_nb$overall["Accuracy"], 4), "\n")

## Akurasi: 0.8739

cat("F1-score:", round(CM_test_nb$byClass["F1"], 4), "\n")

## F1-score: 0.9285

cat("Precision:", round(CM_test_nb$byClass["Precision"], 4), "\n")

## Precision: 0.932

cat("Recall:", round(CM_test_nb$byClass["Recall"], 4), "\n")

## Recall: 0.925

# Buat prediksi testing
pred_newbank_nb <- predict(model_nb, newdata = newbank)
# Lihat hasil prediksi
print(pred_newbank_nb)

##  [1] no  no  no  no  yes no  yes no  no  no 
## Levels: no yes

Decision Tree

# 1. Model decision tree
tree_party <- ctree(y ~ ., data = train)

# 2. Visualisasi pohon
plot(tree_party)

# 3. Prediksi pada data training
pred_train_dtree <- predict(tree_party, train)

# 4. Evaluasi training
CM_train_dtree <- confusionMatrix(factor(pred_train_dtree, levels = levels(train_labels)),
                                  factor(train$y, levels = levels(train_labels)))
cat("\n=== Decision Tree - Training ===\n")

## 
## === Decision Tree - Training ===

cat("Confusion Matrix:\n")

## Confusion Matrix:

print(CM_train_dtree$table)

##           Reference
## Prediction   no  yes
##        no  3094  228
##        yes  106  189

cat("Akurasi:", round(CM_train_dtree$overall["Accuracy"], 4), "\n")

## Akurasi: 0.9077

cat("F1-score:", round(CM_train_dtree$byClass["F1"], 4), "\n")

## F1-score: 0.9488

cat("Precision:", round(CM_train_dtree$byClass["Precision"], 4), "\n")

## Precision: 0.9314

cat("Recall:", round(CM_train_dtree$byClass["Recall"], 4), "\n")

## Recall: 0.9669

# 5. Prediksi pada data testing
pred_test_dtree <- predict(tree_party, test)

# 6. Evaluasi testing
CM_test_dtree <- confusionMatrix(factor(pred_test_dtree, levels = c("yes","no")),
                                 factor(test$y, levels = c("yes","no")), positive = "no")

cat("\n=== Decision Tree - Testing ===\n")

## 
## === Decision Tree - Testing ===

cat("Confusion Matrix:\n")

## Confusion Matrix:

print(CM_test_dtree$table)

##           Reference
## Prediction yes  no
##        yes  37  25
##        no   67 775

cat("Akurasi:", round(CM_test_dtree$overall["Accuracy"], 4), "\n")

## Akurasi: 0.8982

cat("F1-score:", round(CM_test_dtree$byClass["F1"], 4), "\n")

## F1-score: 0.944

cat("Precision:", round(CM_test_dtree$byClass["Precision"], 4), "\n")

## Precision: 0.9204

cat("Recall:", round(CM_test_dtree$byClass["Recall"], 4), "\n")

## Recall: 0.9688

# Buat prediksi testing
pred_newbank_dtree <- predict(tree_party, newdata = newbank)
# Lihat hasil prediksi
print(pred_newbank_dtree)

##  [1] no  yes yes no  yes no  no  no  no  no 
## Levels: no yes

Random Forest

# 1. Model Random Forest
model_rf <- randomForest(y ~ ., data = train, maxnodes = 20)

# 2. Tampilkan importance variabel
cat("\n=== Variable Importance ===\n")

## 
## === Variable Importance ===

print(importance(model_rf))

##           MeanDecreaseGini
## Age              9.4782036
## job              8.3573381
## marital          2.7382401
## education        1.6634201
## default          0.1933507
## balance          4.9653387
## housing          2.2947279
## loan             0.3397440
## contact          3.6819199
## day              4.9771176
## month           33.0832782
## duration        88.6101831
## campaign         1.9257391
## pdays           10.8628939
## previous         5.8133279
## poutcome        35.9489125

varImpPlot(model_rf)

# 3. Prediksi pada data training
pred_train_rf <- predict(model_rf, train)

# 4. Evaluasi training
CM_train_rf <- confusionMatrix(factor(pred_train_rf, levels = levels(train_labels)),
                               factor(train$y, levels = levels(train_labels)))


cat("\n=== Random Forest - Training ===\n")

## 
## === Random Forest - Training ===

cat("Confusion Matrix:\n")

## Confusion Matrix:

print(CM_train_rf$table)

##           Reference
## Prediction   no  yes
##        no  3194  337
##        yes    6   80

cat("Akurasi:", round(CM_train_rf$overall["Accuracy"], 4), "\n")

## Akurasi: 0.9052

cat("F1-score:", round(CM_train_rf$byClass["F1"], 4), "\n")

## F1-score: 0.949

cat("Precision:", round(CM_train_rf$byClass["Precision"], 4), "\n")

## Precision: 0.9046

cat("Recall:", round(CM_train_rf$byClass["Recall"], 4), "\n")

## Recall: 0.9981

# 5. Prediksi pada data testing
pred_test_rf <- predict(model_rf, test)

# 6. Evaluasi testing
CM_test_rf <- confusionMatrix(factor(pred_test_rf, levels = c("no", "yes")),
                              factor(test$y, levels = c("no", "yes")))

cat("\n=== Random Forest - Testing ===\n")

## 
## === Random Forest - Testing ===

cat("Confusion Matrix:\n")

## Confusion Matrix:

print(CM_test_rf$table)

##           Reference
## Prediction  no yes
##        no  792  92
##        yes   8  12

cat("Akurasi:", round(CM_test_rf$overall["Accuracy"], 4), "\n")

## Akurasi: 0.8894

cat("F1-score:", round(CM_test_rf$byClass["F1"], 4), "\n")

## F1-score: 0.9406

cat("Precision:", round(CM_test_rf$byClass["Precision"], 4), "\n")

## Precision: 0.8959

cat("Recall:", round(CM_test_rf$byClass["Recall"], 4), "\n")

## Recall: 0.99

# Buat prediksi testing
pred_newbank_rf <- predict(model_rf, newdata = newbank)
# Lihat hasil prediksi
print(pred_newbank_rf)

##   1   2   3   4   5   6   7   8   9  10 
##  no  no  no  no yes  no  no  no  no  no 
## Levels: no yes

KNN Model

# 1. Model KNN
model_knn <- train.kknn(y ~ ., data = train, kmax = 9)
model_knn

## 
## Call:
## train.kknn(formula = y ~ ., data = train, kmax = 9)
## 
## Type of response variable: nominal
## Minimal misclassification: 0.1050594
## Best kernel: optimal
## Best k: 9

model_knn$MISCLASS

##     optimal
## 1 0.1235831
## 2 0.1235831
## 3 0.1235831
## 4 0.1235831
## 5 0.1078242
## 6 0.1058889
## 7 0.1069947
## 8 0.1064418
## 9 0.1050594

# 2. Prediksi
# Buat prediksi training
pred_train_knn <- predict(model_knn, newdata = train)

## Warning in model.matrix.default(mt2, test, contrasts.arg = contrasts.arg):
## variable 'y' is absent, its contrast will be ignored

# Buat prediksi testing
pred_test_knn <- predict(model_knn, newdata = test)

## Warning in model.matrix.default(mt2, test, contrasts.arg = contrasts.arg):
## variable 'y' is absent, its contrast will be ignored

# 4. Evaluasi - Train
CM_train_knn <- confusionMatrix(factor(pred_train_knn, levels = levels(train_labels)),
                                  factor(train$y, levels = levels(train_labels)))

cat("\n=== KNN - Training ===\n")

## 
## === KNN - Training ===

cat("Confusion Matrix:\n")

## Confusion Matrix:

print(CM_train_knn$table)

##           Reference
## Prediction   no  yes
##        no  3182  211
##        yes   18  206

cat("Akurasi:", round(CM_train_knn$overall["Accuracy"], 4), "\n")

## Akurasi: 0.9367

cat("F1-score:", round(CM_train_knn$byClass["F1"], 4), "\n")

## F1-score: 0.9653

cat("Precision:", round(CM_train_knn$byClass["Precision"], 4), "\n")

## Precision: 0.9378

cat("Recall:", round(CM_train_knn$byClass["Recall"], 4), "\n")

## Recall: 0.9944

# 5. Evaluasi - Test
CM_test_knn <- confusionMatrix(factor(pred_test_knn, levels = levels(test_labels)),
                                  factor(test$y, levels = levels(test_labels)))

cat("\n=== KNN - Testing ===\n")

## 
## === KNN - Testing ===

cat("Confusion Matrix:\n")

## Confusion Matrix:

print(CM_test_knn$table)

##           Reference
## Prediction  no yes
##        no  789  70
##        yes  11  34

cat("Akurasi:", round(CM_test_knn$overall["Accuracy"], 4), "\n")

## Akurasi: 0.9104

cat("F1-score:", round(CM_test_knn$byClass["F1"], 4), "\n")

## F1-score: 0.9512

cat("Precision:", round(CM_test_knn$byClass["Precision"], 4), "\n")

## Precision: 0.9185

cat("Recall:", round(CM_test_knn$byClass["Recall"], 4), "\n")

## Recall: 0.9862

# Buat prediksi testing
pred_newbank_knn <- predict(model_knn, newdata = newbank)

## Warning in model.matrix.default(mt2, test, contrasts.arg = contrasts.arg):
## variable 'y' is absent, its contrast will be ignored

# Lihat hasil prediksi
print(pred_newbank_knn)

##  [1] yes yes no  no  yes no  no  no  no  no 
## Levels: no yes

ANN Model

# 1. Model ANN
model_ann <- nnet(y ~ ., data = train, size = 5, maxit = 200, decay = 0.01)

## # weights:  221
## initial  value 3067.322499 
## iter  10 value 1280.104809
## iter  20 value 1142.019148
## iter  30 value 1119.943483
## iter  40 value 1114.681670
## iter  50 value 1110.810002
## iter  60 value 1098.683884
## iter  70 value 1059.832411
## iter  80 value 1015.319682
## iter  90 value 926.932038
## iter 100 value 880.332896
## iter 110 value 858.432733
## iter 120 value 834.937659
## iter 130 value 818.128660
## iter 140 value 804.276290
## iter 150 value 798.328264
## iter 160 value 792.932068
## iter 170 value 790.855898
## iter 180 value 787.415832
## iter 190 value 786.127963
## iter 200 value 785.017779
## final  value 785.017779 
## stopped after 200 iterations

# 2. Prediksi
pred_train_ann <- predict(model_ann, newdata = train, type = "class")
pred_test_ann <- predict(model_ann, newdata = test, type = "class")

# 3. Evaluasi - Train
CM_train_ann <- confusionMatrix(factor(pred_train_ann, levels = levels(train_labels)), train_labels)
cat("\n=== ANN - Training ===\n")

## 
## === ANN - Training ===

print(CM_train_ann$table)

##           Reference
## Prediction   no  yes
##        no  3093  226
##        yes  107  191

cat("Akurasi:", round(CM_train_ann$overall["Accuracy"], 4), "\n")

## Akurasi: 0.9079

cat("F1-score:", round(CM_train_ann$byClass["F1"], 4), "\n")

## F1-score: 0.9489

cat("Precision:", round(CM_train_ann$byClass["Precision"], 4), "\n")

## Precision: 0.9319

cat("Recall:", round(CM_train_ann$byClass["Recall"], 4), "\n")

## Recall: 0.9666

# 4. Evaluasi - Test
CM_test_ann <- confusionMatrix(factor(pred_test_ann, levels = levels(test_labels)), test_labels)
cat("\n=== ANN - Testing ===\n")

## 
## === ANN - Testing ===

print(CM_test_ann$table)

##           Reference
## Prediction  no yes
##        no  768  59
##        yes  32  45

cat("Akurasi:", round(CM_test_ann$overall["Accuracy"], 4), "\n")

## Akurasi: 0.8993

cat("F1-score:", round(CM_test_ann$byClass["F1"], 4), "\n")

## F1-score: 0.9441

cat("Precision:", round(CM_test_ann$byClass["Precision"], 4), "\n")

## Precision: 0.9287

cat("Recall:", round(CM_test_ann$byClass["Recall"], 4), "\n")

## Recall: 0.96

# Buat prediksi testing
pred_newbank_ann <- predict(model_ann, newdata = newbank)
# Lihat hasil prediksi
print(pred_newbank_ann)

##            [,1]
## 1  0.3299078686
## 2  0.5090376746
## 3  0.4759823701
## 4  0.1153593606
## 5  0.6533799095
## 6  0.1862234499
## 7  0.2097606035
## 8  0.0662194068
## 9  0.1104399664
## 10 0.0008089075

Support Vector Machine (SVM)

# 1. Model SVM
model_svm <- svm(y ~ ., data = train, kernel = "linear")

# 2. Prediksi pada data training
pred_train_svm <- predict(model_svm, train)

# 3. Evaluasi training
CM_train_svm <- confusionMatrix(factor(pred_train_svm, levels = levels(train_labels)),
                                factor(train$y, levels = levels(train_labels)))

cat("\n=== SVM - Training ===\n")

## 
## === SVM - Training ===

cat("Confusion Matrix:\n")

## Confusion Matrix:

print(CM_train_svm$table)

##           Reference
## Prediction   no  yes
##        no  3162  347
##        yes   38   70

cat("Akurasi:", round(CM_train_svm$overall["Accuracy"], 4), "\n")

## Akurasi: 0.8936

cat("F1-score:", round(CM_train_svm$byClass["F1"], 4), "\n")

## F1-score: 0.9426

cat("Precision:", round(CM_train_svm$byClass["Precision"], 4), "\n")

## Precision: 0.9011

cat("Recall:", round(CM_train_svm$byClass["Recall"], 4), "\n")

## Recall: 0.9881

# 4. Prediksi pada data testing
pred_test_svm <- predict(model_svm, test)

# 5. Evaluasi testing
CM_test_svm <- confusionMatrix(factor(pred_test_svm, levels = levels(test_labels)), 
                               factor(test$y, levels = levels(test_labels)))

cat("\n=== SVM - Testing ===\n")

## 
## === SVM - Testing ===

cat("Confusion Matrix:\n")

## Confusion Matrix:

print(CM_test_svm$table)

##           Reference
## Prediction  no yes
##        no  792  91
##        yes   8  13

cat("Akurasi:", round(CM_test_svm$overall["Accuracy"], 4), "\n")

## Akurasi: 0.8905

cat("F1-score:", round(CM_test_svm$byClass["F1"], 4), "\n")

## F1-score: 0.9412

cat("Precision:", round(CM_test_svm$byClass["Precision"], 4), "\n")

## Precision: 0.8969

cat("Recall:", round(CM_test_svm$byClass["Recall"], 4), "\n")

## Recall: 0.99

# Buat prediksi testing
pred_newbank_svm <- predict(model_svm, newdata = newbank)
# Lihat hasil prediksi
print(pred_newbank_svm)

##   1   2   3   4   5   6   7   8   9  10 
##  no  no  no  no yes  no  no  no  no  no 
## Levels: no yes

Visualisasi Perbandingan Akurasi

# Bandingkan akurasi semua model dalam satu bar chart
accuracy_results <- data.frame(
  Model = c("ANN", "KNN", "Decision Tree", "Random Forest", "Naive Bayes", "SVM"),
  Accuracy = c(CM_test_ann$overall["Accuracy"], CM_test_knn$overall["Accuracy"], CM_test_dtree$overall["Accuracy"], CM_test_rf$overall["Accuracy"], CM_test_nb$overall["Accuracy"], CM_test_svm$overall["Accuracy"])
)

ggplot(accuracy_results, aes(x = Model, y = Accuracy, fill = Model)) +
  geom_bar(stat = "identity") +
  ylim(0,1) +
  ggtitle("Perbandingan Akurasi Antar Model") +
  theme_minimal()

Tugas Data Mining Pertemuan 6

Kelompok 9 - 3SD2

2025-04-28

Ringkasan Dataset

Preprocessing Data

Newdata

Naive Bayes

Decision Tree

Random Forest

KNN Model

ANN Model

Support Vector Machine (SVM)

Visualisasi Perbandingan Akurasi