1 Input Library

library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
## 
##     element
library(moments)
## 
## Attaching package: 'moments'
## The following objects are masked from 'package:e1071':
## 
##     kurtosis, moment, skewness
library(smotefamily)
## Warning: package 'smotefamily' was built under R version 4.4.3

2 Input Dataset

set.seed(123)
# Sesuaikan path file jika perlu
train <- read_excel("C:/Users/LENOVO/Downloads/Data Training.xlsx")
test  <- read_excel("C:/Users/LENOVO/Downloads/Data Testing.xlsx")

train <- as.data.frame(train)
test  <- as.data.frame(test)

cat("Dimensi data training :", dim(train), "\n")
## Dimensi data training : 41188 21
cat("Dimensi data testing  :", dim(test), "\n")
## Dimensi data testing  : 4119 21
cat("\nNama kolom training:\n")
## 
## Nama kolom training:
print(names(train))
##  [1] "age"            "job"            "marital"        "education"     
##  [5] "default"        "housing"        "loan"           "contact"       
##  [9] "month"          "day_of_week"    "duration"       "campaign"      
## [13] "pdays"          "previous"       "poutcome"       "emp.var.rate"  
## [17] "cons.price.idx" "cons.conf.idx"  "euribor3m"      "nr.employed"   
## [21] "y"
cat("\nNama kolom testing:\n")
## 
## Nama kolom testing:
print(names(test))
##  [1] "age"            "job"            "marital"        "education"     
##  [5] "default"        "housing"        "loan"           "contact"       
##  [9] "month"          "day_of_week"    "duration"       "campaign"      
## [13] "pdays"          "previous"       "poutcome"       "emp.var.rate"  
## [17] "cons.price.idx" "cons.conf.idx"  "euribor3m"      "nr.employed"   
## [21] "y"

3 Data Preprocessing dan Eksplorasi Data

# Target
train$y <- factor(train$y, levels = c("no", "yes"))
test$y  <- factor(test$y, levels = c("no", "yes"))

cat_vars <- c("job", "marital", "education", "default", "housing", "loan",
              "contact", "month", "day_of_week", "poutcome")

for (v in cat_vars) {
  train[[v]] <- as.factor(train[[v]])
  test[[v]]  <- as.factor(test[[v]])
}

num_vars <- setdiff(names(train), c(cat_vars, "y"))

for (v in num_vars) {
  train[[v]] <- as.numeric(train[[v]])
  test[[v]]  <- as.numeric(test[[v]])
}

3.1 Cek Missing Value

# CEK DATA AWAL
cat("\n===== Missing Value Training =====\n")
## 
## ===== Missing Value Training =====
print(colSums(is.na(train)))
##            age            job        marital      education        default 
##              0            330             80           1731           8597 
##        housing           loan        contact          month    day_of_week 
##            990            990              0              0              0 
##       duration       campaign          pdays       previous       poutcome 
##              0              0              0              0              0 
##   emp.var.rate cons.price.idx  cons.conf.idx      euribor3m    nr.employed 
##              0              0              0              0              0 
##              y 
##              0
cat("\n===== Missing Value Testing =====\n")
## 
## ===== Missing Value Testing =====
print(colSums(is.na(test)))
##            age            job        marital      education        default 
##              0             39             11            167            803 
##        housing           loan        contact          month    day_of_week 
##            105            105              0              0              0 
##       duration       campaign          pdays       previous       poutcome 
##              0              0              0              0              0 
##   emp.var.rate cons.price.idx  cons.conf.idx      euribor3m    nr.employed 
##              0              0              0              0              0 
##              y 
##              0
cat("\n===== Distribusi Target Training =====\n")
## 
## ===== Distribusi Target Training =====
print(table(train$y))
## 
##    no   yes 
## 36548  4640
print(prop.table(table(train$y)))
## 
##        no       yes 
## 0.8873458 0.1126542
cat("\n===== Distribusi Target Testing =====\n")
## 
## ===== Distribusi Target Testing =====
print(table(test$y))
## 
##   no  yes 
## 3668  451
print(prop.table(table(test$y)))
## 
##        no       yes 
## 0.8905074 0.1094926

Pada hasil pengecekan missing value dapat diketahui bahwa terdapat beberapa variabel yang memiliki missing value, yaitu variabel job, marital, education, default, housing, dan loan Selain itu, distribusi target menunjukkan adanya ketidakseimbangan kelas, dengan kelas “no” lebih dominan dibandingkan kelas “yes”. Hal ini perlu diperhatikan dalam tahap pemodelan selanjutnya, terutama dalam penanganan missing value dan ketidakseimbangan kelas.

3.2 Mengatasi Missing Value

impute_mode <- function(x) {
  x <- x[!is.na(x)]
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

impute_data <- function(df, ref_df = NULL, cat_vars, target_var = "y") {
  if (is.null(ref_df)) ref_df <- df
  
  out <- df
  
  for (col in names(out)) {
    if (col == target_var) next
    
    if (col %in% cat_vars) {
      mode_value <- impute_mode(ref_df[[col]])
      out[[col]][is.na(out[[col]])] <- mode_value
      out[[col]] <- factor(out[[col]], levels = levels(ref_df[[col]]))
    } else {
      med_value <- median(ref_df[[col]], na.rm = TRUE)
      out[[col]][is.na(out[[col]])] <- med_value
    }
  }
  
  return(out)
}
train_imp <- impute_data(train, ref_df = train, cat_vars = cat_vars, target_var = "y")
test_imp  <- impute_data(test,  ref_df = train, cat_vars = cat_vars, target_var = "y")

cat("\n===== Missing Value Setelah Imputasi =====\n")
## 
## ===== Missing Value Setelah Imputasi =====
print(colSums(is.na(train_imp)))
##            age            job        marital      education        default 
##              0              0              0              0              0 
##        housing           loan        contact          month    day_of_week 
##              0              0              0              0              0 
##       duration       campaign          pdays       previous       poutcome 
##              0              0              0              0              0 
##   emp.var.rate cons.price.idx  cons.conf.idx      euribor3m    nr.employed 
##              0              0              0              0              0 
##              y 
##              0
print(colSums(is.na(test_imp)))
##            age            job        marital      education        default 
##              0              0              0              0              0 
##        housing           loan        contact          month    day_of_week 
##              0              0              0              0              0 
##       duration       campaign          pdays       previous       poutcome 
##              0              0              0              0              0 
##   emp.var.rate cons.price.idx  cons.conf.idx      euribor3m    nr.employed 
##              0              0              0              0              0 
##              y 
##              0

3.3 Penanganan Outier

train_out <- train_imp
test_out  <- test_imp

for (v in num_vars) {
  q1 <- quantile(train_out[[v]], 0.25, na.rm = TRUE)
  q3 <- quantile(train_out[[v]], 0.75, na.rm = TRUE)
  iqr_val <- q3 - q1
  
  lower <- q1 - 1.5 * iqr_val
  upper <- q3 + 1.5 * iqr_val
  
  train_out[[v]][train_out[[v]] < lower] <- lower
  train_out[[v]][train_out[[v]] > upper] <- upper
  
  test_out[[v]][test_out[[v]] < lower] <- lower
  test_out[[v]][test_out[[v]] > upper] <- upper
}

3.4 Cek Skewness dan Transformasi Jika Perlu

safe_skewness <- function(x) {
  x <- x[is.finite(x)]
  if (length(x) < 3) return(NA)
  if (sd(x) == 0) return(NA)
  moments::skewness(x)
}

skew_vals <- sapply(train_out[num_vars], safe_skewness)

cat("\n===== Skewness Variabel Numerik (Training) =====\n")
## 
## ===== Skewness Variabel Numerik (Training) =====
print(skew_vals)
##            age       duration       campaign          pdays       previous 
##      0.5675749      1.0448624      1.2121184             NA             NA 
##   emp.var.rate cons.price.idx  cons.conf.idx      euribor3m    nr.employed 
##     -0.7240692     -0.2308792      0.3008034     -0.7091621     -1.0442244
invalid_skew <- names(skew_vals[!is.finite(skew_vals)])
if (length(invalid_skew) > 0) {
  cat("\nVariabel dengan skewness tidak valid:\n")
  print(invalid_skew)
}
## 
## Variabel dengan skewness tidak valid:
## [1] "pdays"    "previous"

Berdasarkan hasil pengecekan skewness, sebagian besar variabel numerik dalam data training memiliki distribusi yang cukup baik, dengan beberapa variabel seperti age, duration, dan campaign memiliki skewness positif, menandakan distribusi yang sedikit miring ke kanan. Sementara itu, emp.var.rate, euribor3m, dan nr.employed memiliki skewness negatif, menunjukkan distribusi yang miring ke kiri. Variabel cons.price.idx dan cons.conf.idx relatif simetris dengan skewness yang kecil. Namun, variabel pdays dan previous tidak memiliki nilai skewness yang valid (NA), kemungkinan besar karena distribusi keduanya sangat terpusat pada satu nilai, yang sering terjadi pada variabel seperti pdays yang memiliki nilai 999 untuk nasabah yang belum dihubungi sebelumnya.

3.5 Transformasi Skewness

skew_threshold <- 1

train_prep <- train_out
test_prep  <- test_out

for (v in num_vars) {
  if (v == "pdays") {
    cat("Transformasi dilewati untuk:", v, "\n")
    next
  }
  
  if (is.finite(skew_vals[v]) && abs(skew_vals[v]) > skew_threshold) {
    if (min(train_prep[[v]], na.rm = TRUE) >= 0) {
      cat("Transformasi log1p pada:", v, "\n")
      train_prep[[v]] <- log1p(train_prep[[v]])
      test_prep[[v]]  <- log1p(test_prep[[v]])
    } else {
      cat("Tidak ditransformasi (ada nilai negatif):", v, "\n")
    }
  } else {
    cat("Skewness tidak valid / tidak perlu transformasi:", v, "\n")
  }
}
## Skewness tidak valid / tidak perlu transformasi: age 
## Transformasi log1p pada: duration 
## Transformasi log1p pada: campaign 
## Transformasi dilewati untuk: pdays 
## Skewness tidak valid / tidak perlu transformasi: previous 
## Skewness tidak valid / tidak perlu transformasi: emp.var.rate 
## Skewness tidak valid / tidak perlu transformasi: cons.price.idx 
## Skewness tidak valid / tidak perlu transformasi: cons.conf.idx 
## Skewness tidak valid / tidak perlu transformasi: euribor3m 
## Transformasi log1p pada: nr.employed

Berdasarkan hasil transformasi skewness, hanya beberapa variabel numerik yang memerlukan penyesuaian untuk mengurangi ketidaksimetrian distribusi, yaitu duration, campaign, dan nr.employed, yang kemudian ditransformasi menggunakan log1p(). Proses transformasi bertujuan agar variabel yang sangat miring menjadi lebih stabil dan lebih sesuai untuk digunakan dalam pemodelan.

3.6 Opsi skenario fitur dan penyamaan level faktor train dan test

use_duration <- FALSE

if (!use_duration) {
  train_model <- train_prep %>% select(-duration)
  test_model  <- test_prep %>% select(-duration)
  cat("\nModel menggunakan skenario REALISTIS (tanpa duration)\n")
} else {
  train_model <- train_prep
  test_model  <- test_prep
  cat("\nModel menggunakan skenario BENCHMARK (dengan duration)\n")
}
## 
## Model menggunakan skenario REALISTIS (tanpa duration)
cat_vars_model <- names(train_model)[sapply(train_model, is.factor)]
cat_vars_model <- setdiff(cat_vars_model, "y")

for (v in cat_vars_model) {
  all_levels <- union(levels(train_model[[v]]), levels(test_model[[v]]))
  train_model[[v]] <- factor(train_model[[v]], levels = all_levels)
  test_model[[v]]  <- factor(test_model[[v]], levels = all_levels)
}

cat("\n===== Distribusi Kelas Sebelum SMOTE (Training) =====\n")
## 
## ===== Distribusi Kelas Sebelum SMOTE (Training) =====
print(table(train_model$y))
## 
##    no   yes 
## 36548  4640
print(prop.table(table(train_model$y)))
## 
##        no       yes 
## 0.8873458 0.1126542

3.7 Dummy Variable

y_train <- train_model$y
y_test  <- test_model$y

x_train_raw <- train_model[, setdiff(names(train_model), "y"), drop = FALSE]
x_test_raw  <- test_model[, setdiff(names(test_model), "y"), drop = FALSE]

gabung_data <- rbind(x_train_raw, x_test_raw)

dummy_all <- as.data.frame(model.matrix(~ . - 1, data = gabung_data))

n_train <- nrow(x_train_raw)
x_train <- dummy_all[1:n_train, , drop = FALSE]
x_test  <- dummy_all[(n_train + 1):nrow(dummy_all), , drop = FALSE]

cat("\nDimensi x_train setelah dummy:", dim(x_train), "\n")
## 
## Dimensi x_train setelah dummy: 41188 47
cat("Dimensi x_test setelah dummy :", dim(x_test), "\n")
## Dimensi x_test setelah dummy : 4119 47

3.8 SMOTE

y_train_num <- ifelse(y_train == "yes", 1, 0)

smote_result <- smotefamily::SMOTE(
  X = x_train,
  target = y_train_num,
  K = 5,
  dup_size = 2
)

train_smote_data <- smote_result$data

# pastikan nama kolom target hasil SMOTE
target_col <- ncol(train_smote_data)

x_train_smote <- train_smote_data[, -target_col, drop = FALSE]
y_train_smote_num <- train_smote_data[, target_col]

y_train_smote <- factor(ifelse(y_train_smote_num == 1, "yes", "no"),
                        levels = c("no", "yes"))

cat("\n===== Distribusi Kelas Setelah SMOTE =====\n")
## 
## ===== Distribusi Kelas Setelah SMOTE =====
print(table(y_train_smote))
## y_train_smote
##    no   yes 
## 36548 13920
print(prop.table(table(y_train_smote)))
## y_train_smote
##        no       yes 
## 0.7241817 0.2758183

Hasil SMOTE pada y_train menunjukkan bahwa distribusi kelas pada data training menjadi lebih seimbang dibandingkan sebelum oversampling, dengan jumlah kelas no sebanyak 36.548 dan kelas yes sebanyak 13.920. Secara proporsi, kelas mayoritas no menjadi 72,42%, sedangkan kelas minoritas yes meningkat menjadi 27,58%. Hal ini menunjukkan bahwa SMOTE berhasil menambah representasi kelas minoritas sehingga ketimpangan kelas berkurang dan model diharapkan lebih mampu mengenali nasabah yang berpotensi berlangganan deposito.

3.9 Hapus zero variance dan Scaling

zero_var_cols <- names(x_train_smote)[apply(x_train_smote, 2, function(z) var(z, na.rm = TRUE) == 0)]

cat("\nKolom zero variance:\n")
## 
## Kolom zero variance:
print(zero_var_cols)
## [1] "pdays"    "previous"
if (length(zero_var_cols) > 0) {
  x_train_smote <- x_train_smote[, !(names(x_train_smote) %in% zero_var_cols), drop = FALSE]
  x_test        <- x_test[, !(names(x_test) %in% zero_var_cols), drop = FALSE]
}

cat("\nDimensi x_train_smote setelah hapus zero variance:", dim(x_train_smote), "\n")
## 
## Dimensi x_train_smote setelah hapus zero variance: 50468 45
cat("Dimensi x_test setelah hapus zero variance       :", dim(x_test), "\n")
## Dimensi x_test setelah hapus zero variance       : 4119 45
train_means <- sapply(x_train_smote, mean, na.rm = TRUE)
train_sds   <- sapply(x_train_smote, sd, na.rm = TRUE)

train_sds[train_sds == 0] <- 1

x_train_sc <- as.data.frame(scale(x_train_smote, center = train_means, scale = train_sds))
x_test_sc  <- as.data.frame(scale(x_test, center = train_means, scale = train_sds))

# pastikan tidak ada NA/Inf setelah scaling
x_train_sc[!is.finite(as.matrix(x_train_sc))] <- 0
x_test_sc[!is.finite(as.matrix(x_test_sc))] <- 0

4 Model SVM

model_svm <- svm(
  x = x_train_sc,
  y = y_train_smote,
  kernel = "radial",
  cost = 1,
  gamma = 1 / ncol(x_train_sc),
  probability = FALSE
)

cat("\n===== Ringkasan Model SVM =====\n")
## 
## ===== Ringkasan Model SVM =====
print(model_svm)
## 
## Call:
## svm.default(x = x_train_sc, y = y_train_smote, kernel = "radial", 
##     gamma = 1/ncol(x_train_sc), cost = 1, probability = FALSE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  18778

Berdasarkan pemodelan SVM diatas menunjukkan bahwa model yang dibangun adalah Support Vector Machine (SVM) untuk klasifikasi biner dengan kernel radial, sehingga model dapat menangkap hubungan yang bersifat nonlinier antar variabel prediktor dan target. Parameter cost = 1 menunjukkan bahwa model menggunakan penalti kesalahan klasifikasi pada tingkat moderat, sedangkan nilai gamma = 1/ncol(x_train_sc) digunakan sebagai pengaturan awal berdasarkan jumlah fitur hasil preprocessing. Selain itu, model menghasilkan 18.778 support vectors, yang berarti terdapat cukup banyak observasi training yang berperan penting dalam membentuk batas pemisah antar kelas, sehingga dapat diindikasikan bahwa pola pemisahan antara kelas yes dan no pada data cukup kompleks.

4.1 prediksi

# Pastikan data numerik
x_train_sc <- as.data.frame(x_train_sc)
x_test_sc  <- as.data.frame(x_test_sc)

pred_train <- e1071:::predict.svm(model_svm, newdata = x_train_sc)
pred_test  <- e1071:::predict.svm(model_svm, newdata = x_test_sc)

pred_train <- factor(as.character(pred_train), levels = c("no", "yes"))
pred_test  <- factor(as.character(pred_test), levels = c("no", "yes"))

4.2 Evaluasi

evaluate_model <- function(actual, predicted, positive_class = "yes") {
  actual <- factor(actual, levels = c("no", "yes"))
  predicted <- factor(predicted, levels = c("no", "yes"))
  
  cm <- confusionMatrix(predicted, actual, positive = positive_class)
  
  acc  <- as.numeric(cm$overall["Accuracy"])
  sens <- as.numeric(cm$byClass["Sensitivity"])
  spec <- as.numeric(cm$byClass["Specificity"])
  bal_acc <- (sens + spec) / 2
  precision <- as.numeric(cm$byClass["Pos Pred Value"])
  f1 <- ifelse((precision + sens) == 0, 0,
               2 * precision * sens / (precision + sens))
  
  metrics <- data.frame(
    Accuracy = acc,
    Sensitivity = sens,
    Specificity = spec,
    Balanced_Accuracy = bal_acc,
    F1_Score = f1
  )
  
  return(list(conf_matrix = cm, metrics = metrics))
}

eval_train <- evaluate_model(y_train_smote, pred_train)

cat("\n=============================\n")
## 
## =============================
cat("CONFUSION MATRIX - TRAINING SMOTE\n")
## CONFUSION MATRIX - TRAINING SMOTE
cat("=============================\n")
## =============================
print(eval_train$conf_matrix)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    no   yes
##        no  35461  4415
##        yes  1087  9505
##                                           
##                Accuracy : 0.891           
##                  95% CI : (0.8882, 0.8937)
##     No Information Rate : 0.7242          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7053          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.6828          
##             Specificity : 0.9703          
##          Pos Pred Value : 0.8974          
##          Neg Pred Value : 0.8893          
##              Prevalence : 0.2758          
##          Detection Rate : 0.1883          
##    Detection Prevalence : 0.2099          
##       Balanced Accuracy : 0.8265          
##                                           
##        'Positive' Class : yes             
## 
cat("\n=============================\n")
## 
## =============================
cat("METRICS - TRAINING SMOTE\n")
## METRICS - TRAINING SMOTE
cat("=============================\n")
## =============================
print(eval_train$metrics)
##    Accuracy Sensitivity Specificity Balanced_Accuracy  F1_Score
## 1 0.8909804   0.6828305   0.9702583         0.8265444 0.7755385
eval_test <- evaluate_model(y_test, pred_test)

cat("\n=============================\n")
## 
## =============================
cat("CONFUSION MATRIX - TESTING\n")
## CONFUSION MATRIX - TESTING
cat("=============================\n")
## =============================
print(eval_test$conf_matrix)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   no  yes
##        no  3567  250
##        yes  101  201
##                                           
##                Accuracy : 0.9148          
##                  95% CI : (0.9058, 0.9231)
##     No Information Rate : 0.8905          
##     P-Value [Acc > NIR] : 1.405e-07       
##                                           
##                   Kappa : 0.489           
##                                           
##  Mcnemar's Test P-Value : 2.797e-15       
##                                           
##             Sensitivity : 0.44568         
##             Specificity : 0.97246         
##          Pos Pred Value : 0.66556         
##          Neg Pred Value : 0.93450         
##              Prevalence : 0.10949         
##          Detection Rate : 0.04880         
##    Detection Prevalence : 0.07332         
##       Balanced Accuracy : 0.70907         
##                                           
##        'Positive' Class : yes             
## 
cat("\n=============================\n")
## 
## =============================
cat("METRICS - TESTING\n")
## METRICS - TESTING
cat("=============================\n")
## =============================
print(eval_test$metrics)
##    Accuracy Sensitivity Specificity Balanced_Accuracy  F1_Score
## 1 0.9147851   0.4456763   0.9724646         0.7090704 0.5338645

5 Ringkasan Hasil dan Simpulan

hasil_akhir <- bind_rows(
  cbind(Model = "SVM + SMOTE", Data = "Training_SMOTE", eval_train$metrics),
  cbind(Model = "SVM + SMOTE", Data = "Testing", eval_test$metrics)
)

cat("\n=============================\n")
## 
## =============================
cat("RINGKASAN HASIL AKHIR\n")
## RINGKASAN HASIL AKHIR
cat("=============================\n")
## =============================
print(hasil_akhir)
##         Model           Data  Accuracy Sensitivity Specificity
## 1 SVM + SMOTE Training_SMOTE 0.8909804   0.6828305   0.9702583
## 2 SVM + SMOTE        Testing 0.9147851   0.4456763   0.9724646
##   Balanced_Accuracy  F1_Score
## 1         0.8265444 0.7755385
## 2         0.7090704 0.5338645

Berdasarkan ringkasan hasil akhir, model SVM dengan penanganan SMOTE menunjukkan kinerja yang cukup baik, terutama pada data testing dengan akurasi 0,9138. Nilai specificity sebesar 0,9714 menunjukkan bahwa model sangat baik dalam mengidentifikasi nasabah yang tidak berlangganan deposito (no). Namun, sensitivity sebesar 0,4457 menandakan bahwa kemampuan model dalam mendeteksi nasabah yang berpotensi berlangganan deposito (yes) masih relatif terbatas. Nilai balanced accuracy sebesar 0,7085 dan F1-score sebesar 0,5310 menunjukkan bahwa model sudah cukup mampu menangani ketidakseimbangan kelas, tetapi performanya pada kelas minoritas belum optimal.

Jika dibandingkan antara training SMOTE dan testing, terlihat bahwa performa testing masih stabil dan tidak menunjukkan penurunan yang ekstrem, sehingga model dapat dikatakan tidak mengalami overfitting yang berat. Secara keseluruhan, model SVM + SMOTE layak digunakan sebagai model klasifikasi pada data ini, terutama jika fokus utama adalah memprediksi kelas mayoritas dengan baik. Namun, apabila tujuan penelitian lebih menekankan pada pendeteksian nasabah yang akan berlangganan deposito (yes), maka model ini masih perlu ditingkatkan, misalnya melalui tuning parameter SVM, penyesuaian rasio SMOTE, atau pengaturan threshold klasifikasi agar sensitivity dan F1-score dapat meningkat.