# Import library
library(readxl) # untuk import data excel
library(dplyr) # untuk memanipulasi/mengolah dataset
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(moments) # untuk menghitung momen statistik dari distribusi data
library(smotefamily) # untuk mengatasi imbalanced data
library(e1071) # untuk membuat model SVM
##
## Attaching package: 'e1071'
## The following objects are masked from 'package:moments':
##
## kurtosis, moment, skewness
library(caret) # untuk evaluasi metrik
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:e1071':
##
## element
## Loading required package: lattice
# Import data
set.seed(123)
train <- read_excel("C:/Users/Lenovo/Documents/Teknik Pembelajaran Mesin/Tugas Individu/Tugas Individu 1/Data Training.xlsx")
test <- read_excel("C:/Users/Lenovo/Documents/Teknik Pembelajaran Mesin/Tugas Individu/Tugas Individu 1/Data Testing.xlsx")
# Cek struktur data
str(train)
## tibble [41,188 × 21] (S3: tbl_df/tbl/data.frame)
## $ age : num [1:41188] 56 57 37 40 56 45 59 41 24 25 ...
## $ job : chr [1:41188] "housemaid" "services" "services" "admin." ...
## $ marital : chr [1:41188] "married" "married" "married" "married" ...
## $ education : chr [1:41188] "basic.4y" "high.school" "high.school" "basic.6y" ...
## $ default : chr [1:41188] "no" NA "no" "no" ...
## $ housing : chr [1:41188] "no" "no" "yes" "no" ...
## $ loan : chr [1:41188] "no" "no" "no" "no" ...
## $ contact : chr [1:41188] "telephone" "telephone" "telephone" "telephone" ...
## $ month : chr [1:41188] "may" "may" "may" "may" ...
## $ day_of_week : chr [1:41188] "mon" "mon" "mon" "mon" ...
## $ duration : num [1:41188] 261 149 226 151 307 198 139 217 380 50 ...
## $ campaign : num [1:41188] 1 1 1 1 1 1 1 1 1 1 ...
## $ pdays : num [1:41188] 999 999 999 999 999 999 999 999 999 999 ...
## $ previous : num [1:41188] 0 0 0 0 0 0 0 0 0 0 ...
## $ poutcome : chr [1:41188] "nonexistent" "nonexistent" "nonexistent" "nonexistent" ...
## $ emp.var.rate : num [1:41188] 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 ...
## $ cons.price.idx: num [1:41188] 94 94 94 94 94 ...
## $ cons.conf.idx : num [1:41188] -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 ...
## $ euribor3m : num [1:41188] 4.86 4.86 4.86 4.86 4.86 ...
## $ nr.employed : num [1:41188] 5191 5191 5191 5191 5191 ...
## $ y : chr [1:41188] "no" "no" "no" "no" ...
str(test)
## tibble [4,119 × 21] (S3: tbl_df/tbl/data.frame)
## $ age : num [1:4119] 30 39 25 38 47 32 32 41 31 35 ...
## $ job : chr [1:4119] "blue-collar" "services" "services" "services" ...
## $ marital : chr [1:4119] "married" "single" "married" "married" ...
## $ education : chr [1:4119] "basic.9y" "high.school" "high.school" "basic.9y" ...
## $ default : chr [1:4119] "no" "no" "no" "no" ...
## $ housing : chr [1:4119] "yes" "no" "yes" NA ...
## $ loan : chr [1:4119] "no" "no" "no" NA ...
## $ contact : chr [1:4119] "cellular" "telephone" "telephone" "telephone" ...
## $ month : chr [1:4119] "may" "may" "jun" "jun" ...
## $ day_of_week : chr [1:4119] "fri" "fri" "wed" "fri" ...
## $ duration : num [1:4119] 487 346 227 17 58 128 290 44 68 170 ...
## $ campaign : num [1:4119] 2 4 1 3 1 3 4 2 1 1 ...
## $ pdays : num [1:4119] 999 999 999 999 999 999 999 999 999 999 ...
## $ previous : num [1:4119] 0 0 0 0 0 2 0 0 1 0 ...
## $ poutcome : chr [1:4119] "nonexistent" "nonexistent" "nonexistent" "nonexistent" ...
## $ emp.var.rate : num [1:4119] -1.8 1.1 1.4 1.4 -0.1 -1.1 -1.1 -0.1 -0.1 1.1 ...
## $ cons.price.idx: num [1:4119] 92.9 94 94.5 94.5 93.2 ...
## $ cons.conf.idx : num [1:4119] -46.2 -36.4 -41.8 -41.8 -42 -37.5 -37.5 -42 -42 -36.4 ...
## $ euribor3m : num [1:4119] 1.31 4.86 4.96 4.96 4.19 ...
## $ nr.employed : num [1:4119] 5099 5191 5228 5228 5196 ...
## $ y : chr [1:4119] "no" "no" "no" "no" ...
# Tranformasi peubah respon jadi factor
train$y <- factor(train$y, levels = c("no", "yes"))
test$y <- factor(test$y, levels = c("no", "yes"))
# Identifikasi daftar peubah kategorik
cat_vars <- c("job", "marital", "education", "default"
, "housing", "loan", "contact", "month",
"day_of_week", "poutcome")
# Ubah semua peubah kategorik (dalam cat_vars) jadi tipe faktor
for (v in cat_vars) {
train[[v]] <- as.factor(train[[v]])
test[[v]] <- as.factor(test[[v]])
}
# Identifikasi daftar peubah numberik
num_vars <- setdiff(names(train), c(cat_vars, "y"))
# Pastikan semua peubah numerik diidentifikasi numerik di R
for (v in num_vars) {
train[[v]] <- as.numeric(train[[v]])
test[[v]] <- as.numeric(test[[v]])
}
Missing Value Data Training
print(colSums(is.na(train)))
## age job marital education default
## 0 330 80 1731 8597
## housing loan contact month day_of_week
## 990 990 0 0 0
## duration campaign pdays previous poutcome
## 0 0 0 0 0
## emp.var.rate cons.price.idx cons.conf.idx euribor3m nr.employed
## 0 0 0 0 0
## y
## 0Missing Value Data Testing
print(colSums(is.na(test)))
## age job marital education default
## 0 39 11 167 803
## housing loan contact month day_of_week
## 105 105 0 0 0
## duration campaign pdays previous poutcome
## 0 0 0 0 0
## emp.var.rate cons.price.idx cons.conf.idx euribor3m nr.employed
## 0 0 0 0 0
## y
## 0Identifikasi Distribusi Target
Distribusi Target Data Training
print(table(train$y))
##
## no yes
## 36548 4640
print(prop.table(table(train$y)))
##
## no yes
## 0.8873458 0.1126542Distribusi Target Data Testing
print(table(test$y))
##
## no yes
## 3668 451
print(prop.table(table(test$y)))
##
## no yes
## 0.8905074 0.1094926# Membuat fungsi impute_mode untuk mencari nilai modus dari sebuah vektor/peubah
impute_mode <- function(x) {
x <- x[!is.na(x)]
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
# Membuat fungsi impute_data untuk melakukan imputasi missing value pada dataset
impute_data <- function(df, ref_df = NULL, cat_vars, target_var = "y") {
if (is.null(ref_df)) ref_df <- df
out <- df
for (col in names(out)) {
if (col == target_var) next
if (col %in% cat_vars) {
mode_value <- impute_mode(ref_df[[col]])
out[[col]][is.na(out[[col]])] <- mode_value
out[[col]] <- factor(out[[col]], levels = levels(ref_df[[col]]))
} else {
med_value <- median(ref_df[[col]], na.rm = TRUE)
out[[col]][is.na(out[[col]])] <- med_value
}
}
return(out)
}
# Melakukan imputasi pada data training dan data testing dengan referensi dari data training
train_imp <- impute_data(train, ref_df = train, cat_vars = cat_vars, target_var = "y")
test_imp <- impute_data(test, ref_df = train, cat_vars = cat_vars, target_var = "y")
# Mengecek jumlah missing value setelah imputasi
print(colSums(is.na(train_imp)))
## age job marital education default
## 0 0 0 0 0
## housing loan contact month day_of_week
## 0 0 0 0 0
## duration campaign pdays previous poutcome
## 0 0 0 0 0
## emp.var.rate cons.price.idx cons.conf.idx euribor3m nr.employed
## 0 0 0 0 0
## y
## 0
# Menyalin data training dan data testing yang sudah dilakukan imputasi untuk ditangani outliernya
train_out <- train_imp
test_out <- test_imp
# Membuat fungsi untuk menangani outlier
# Nilai yang lebih kecil dari batas bawah data training akan diganti jadi nilai batas bawah data training.
# Nilai yang lebih besar dari batas atas data training akan diganti jadi nilai batas atas data training.
for (v in num_vars) {
q1 <- quantile(train_out[[v]], 0.25, na.rm = TRUE)
q3 <- quantile(train_out[[v]], 0.75, na.rm = TRUE)
iqr_val <- q3 - q1
lower <- q1 - 1.5 * iqr_val # Batas Bawah
upper <- q3 + 1.5 * iqr_val # Batas Atas
train_out[[v]][train_out[[v]] < lower] <- lower
train_out[[v]][train_out[[v]] > upper] <- upper
test_out[[v]][test_out[[v]] < lower] <- lower
test_out[[v]][test_out[[v]] > upper] <- upper
}
# Membuat fungsi untuk menghitung skewness peubah numerik
safe_skewness <- function(x) {
x <- x[is.finite(x)]
if (length(x) < 3) return(NA)
if (sd(x) == 0) return(NA)
moments::skewness(x)
}
skew_vals <- sapply(train_out[num_vars], safe_skewness)
print(skew_vals)
## age duration campaign pdays previous
## 0.5675749 1.0448624 1.2121184 NA NA
## emp.var.rate cons.price.idx cons.conf.idx euribor3m nr.employed
## -0.7240692 -0.2308792 0.3008034 -0.7091621 -1.0442244
# Identifikasi peubah yang skewness-nya tidak valid
invalid_skew <- names(skew_vals[!is.finite(skew_vals)])
if (length(invalid_skew) > 0) {
print(invalid_skew)
}
## [1] "pdays" "previous"
# Transformasi skewness yang tidak valid
skew_threshold <- 1
train_prep <- train_out
test_prep <- test_out
for (v in num_vars) {
if (v == "pdays") {
cat("Transformasi dilewati untuk:", v, "\n")
next
}
if (is.finite(skew_vals[v]) && abs(skew_vals[v]) > skew_threshold) {
if (min(train_prep[[v]], na.rm = TRUE) >= 0) {
cat("Transformasi log1p pada:", v, "\n")
train_prep[[v]] <- log1p(train_prep[[v]])
test_prep[[v]] <- log1p(test_prep[[v]])
} else {
cat("Tidak ditransformasi (ada nilai negatif):", v, "\n")
}
} else {
cat("Skewness valid / tidak perlu transformasi:", v, "\n")
}
}
## Skewness valid / tidak perlu transformasi: age
## Transformasi log1p pada: duration
## Transformasi log1p pada: campaign
## Transformasi dilewati untuk: pdays
## Skewness valid / tidak perlu transformasi: previous
## Skewness valid / tidak perlu transformasi: emp.var.rate
## Skewness valid / tidak perlu transformasi: cons.price.idx
## Skewness valid / tidak perlu transformasi: cons.conf.idx
## Skewness valid / tidak perlu transformasi: euribor3m
## Transformasi log1p pada: nr.employed
use_duration <- FALSE
if (!use_duration) {
train_model <- train_prep %>% select(-duration)
test_model <- test_prep %>% select(-duration)
cat("\nModel menggunakan skenario REALISTIS (tanpa duration)\n")
} else {
train_model <- train_prep
test_model <- test_prep
cat("\nModel menggunakan skenario BENCHMARK (dengan duration)\n")
}
##
## Model menggunakan skenario REALISTIS (tanpa duration)
cat_vars_model <- names(train_model)[sapply(train_model, is.factor)]
cat_vars_model <- setdiff(cat_vars_model, "y")
for (v in cat_vars_model) {
all_levels <- union(levels(train_model[[v]]), levels(test_model[[v]]))
train_model[[v]] <- factor(train_model[[v]], levels = all_levels)
test_model[[v]] <- factor(test_model[[v]], levels = all_levels)
}
Distribusi Kelas Sebelum SMOTE (Training)
print(table(train_model$y))
##
## no yes
## 36548 4640
print(prop.table(table(train_model$y)))
##
## no yes
## 0.8873458 0.1126542
y_train <- train_model$y
y_test <- test_model$y
x_train_raw <- train_model[, setdiff(names(train_model), "y"), drop = FALSE]
x_test_raw <- test_model[, setdiff(names(test_model), "y"), drop = FALSE]
gabung_data <- rbind(x_train_raw, x_test_raw)
dummy_all <- as.data.frame(model.matrix(~ . - 1, data = gabung_data))
n_train <- nrow(x_train_raw)
x_train <- dummy_all[1:n_train, , drop = FALSE]
x_test <- dummy_all[(n_train + 1):nrow(dummy_all), , drop = FALSE]
Cek dimensi data setelah menggunakan peubah dummy
cat("\nDimensi x_train setelah dummy:", dim(x_train), "\n")
##
## Dimensi x_train setelah dummy: 41188 47
cat("Dimensi x_test setelah dummy :", dim(x_test), "\n")
## Dimensi x_test setelah dummy : 4119 47
Cek ringkasan data setelah menggunakan peubah dummy
str(x_train)
## 'data.frame': 41188 obs. of 47 variables:
## $ age : num 56 57 37 40 56 45 59 41 24 25 ...
## $ jobadmin. : num 0 0 0 1 0 0 1 0 0 0 ...
## $ jobblue-collar : num 0 0 0 0 0 0 0 1 0 0 ...
## $ jobentrepreneur : num 0 0 0 0 0 0 0 0 0 0 ...
## $ jobhousemaid : num 1 0 0 0 0 0 0 0 0 0 ...
## $ jobmanagement : num 0 0 0 0 0 0 0 0 0 0 ...
## $ jobretired : num 0 0 0 0 0 0 0 0 0 0 ...
## $ jobself-employed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ jobservices : num 0 1 1 0 1 1 0 0 0 1 ...
## $ jobstudent : num 0 0 0 0 0 0 0 0 0 0 ...
## $ jobtechnician : num 0 0 0 0 0 0 0 0 1 0 ...
## $ jobunemployed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ maritalmarried : num 1 1 1 1 1 1 1 1 0 0 ...
## $ maritalsingle : num 0 0 0 0 0 0 0 0 1 1 ...
## $ educationbasic.6y : num 0 0 0 1 0 0 0 0 0 0 ...
## $ educationbasic.9y : num 0 0 0 0 0 1 0 0 0 0 ...
## $ educationhigh.school : num 0 1 1 0 1 0 0 0 0 1 ...
## $ educationilliterate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ educationprofessional.course: num 0 0 0 0 0 0 1 0 1 0 ...
## $ educationuniversity.degree : num 0 0 0 0 0 0 0 1 0 0 ...
## $ defaultyes : num 0 0 0 0 0 0 0 0 0 0 ...
## $ housingyes : num 0 0 1 0 0 0 0 0 1 1 ...
## $ loanyes : num 0 0 0 0 1 0 0 0 0 0 ...
## $ contacttelephone : num 1 1 1 1 1 1 1 1 1 1 ...
## $ monthaug : num 0 0 0 0 0 0 0 0 0 0 ...
## $ monthdec : num 0 0 0 0 0 0 0 0 0 0 ...
## $ monthjul : num 0 0 0 0 0 0 0 0 0 0 ...
## $ monthjun : num 0 0 0 0 0 0 0 0 0 0 ...
## $ monthmar : num 0 0 0 0 0 0 0 0 0 0 ...
## $ monthmay : num 1 1 1 1 1 1 1 1 1 1 ...
## $ monthnov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ monthoct : num 0 0 0 0 0 0 0 0 0 0 ...
## $ monthsep : num 0 0 0 0 0 0 0 0 0 0 ...
## $ day_of_weekmon : num 1 1 1 1 1 1 1 1 1 1 ...
## $ day_of_weekthu : num 0 0 0 0 0 0 0 0 0 0 ...
## $ day_of_weektue : num 0 0 0 0 0 0 0 0 0 0 ...
## $ day_of_weekwed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ campaign : num 0.693 0.693 0.693 0.693 0.693 ...
## $ pdays : num 999 999 999 999 999 999 999 999 999 999 ...
## $ previous : num 0 0 0 0 0 0 0 0 0 0 ...
## $ poutcomenonexistent : num 1 1 1 1 1 1 1 1 1 1 ...
## $ poutcomesuccess : num 0 0 0 0 0 0 0 0 0 0 ...
## $ emp.var.rate : num 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 ...
## $ cons.price.idx : num 94 94 94 94 94 ...
## $ cons.conf.idx : num -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 ...
## $ euribor3m : num 4.86 4.86 4.86 4.86 4.86 ...
## $ nr.employed : num 8.55 8.55 8.55 8.55 8.55 ...
# Mengubah peubah target y_train menjadi bentuk numerik dengan "yes" = 1 dan "no" = 0
y_train_num <- ifelse(y_train == "yes", 1, 0)
# Menerapkan metode SMOTE dari package smotefamily untuk menyeimbangkan kelas pada data training
smote_result <- smotefamily::SMOTE(
X = x_train,
target = y_train_num,
K = 5,
dup_size = 2
)
# Mengambil dataset hasil SMOTE yang berisi peubah prediktor dan target
train_smote_data <- smote_result$data
# Menentukan posisi kolom terakhir sebagai kolom target pada data hasil SMOTE
target_col <- ncol(train_smote_data)
# Mengambil semua kolom kecuali kolom terakhir sebagai peubah prediktor
x_train_smote <- train_smote_data[, -target_col, drop = FALSE]
# Mengambil kolom terakhir sebagai peubah target dalam bentuk numerik
y_train_smote_num <- train_smote_data[, target_col]
# Mengubah kembali target numerik menjadi faktor dengan label "yes" dan "no"
y_train_smote <- factor(ifelse(y_train_smote_num == 1, "yes", "no"), levels = c("no", "yes"))
Distribusi Kelas Setelah SMOTE
print(table(y_train_smote))
## y_train_smote
## no yes
## 36548 13920
print(prop.table(table(y_train_smote)))
## y_train_smote
## no yes
## 0.7241817 0.2758183
Identifikasi peubah dengan zero variance (semua peubah nilainya sama)
zero_var_cols <- names(x_train_smote)[apply(x_train_smote, 2, function(z) var(z, na.rm = TRUE) == 0)]
print(zero_var_cols)
## [1] "pdays" "previous"
Penghapusan zero variance
if (length(zero_var_cols) > 0) {
x_train_smote <- x_train_smote[, !(names(x_train_smote) %in% zero_var_cols), drop = FALSE]
x_test <- x_test[, !(names(x_test) %in% zero_var_cols), drop = FALSE]
}
Cek dimensi data setelah penghapusan zero variance
cat("\nDimensi x_train_smote setelah hapus zero variance:", dim(x_train_smote), "\n")
##
## Dimensi x_train_smote setelah hapus zero variance: 50468 45
cat("Dimensi x_test setelah hapus zero variance :", dim(x_test), "\n")
## Dimensi x_test setelah hapus zero variance : 4119 45
train_means <- sapply(x_train_smote, mean, na.rm = TRUE)
train_sds <- sapply(x_train_smote, sd, na.rm = TRUE)
train_sds[train_sds == 0] <- 1
x_train_sc <- as.data.frame(scale(x_train_smote, center = train_means, scale = train_sds))
x_test_sc <- as.data.frame(scale(x_test, center = train_means, scale = train_sds))
# Memastikan tidak ada missing value setelah scaling
x_train_sc[!is.finite(as.matrix(x_train_sc))] <- 0
x_test_sc[!is.finite(as.matrix(x_test_sc))] <- 0
model_svm <- svm(
x = x_train_sc,
y = y_train_smote,
kernel = "radial",
cost = 1,
gamma = 1 / ncol(x_train_sc),
probability = FALSE
)
print(model_svm)
##
## Call:
## svm.default(x = x_train_sc, y = y_train_smote, kernel = "radial",
## gamma = 1/ncol(x_train_sc), cost = 1, probability = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 18778
x_train_sc <- as.data.frame(x_train_sc)
x_test_sc <- as.data.frame(x_test_sc)
pred_train <- e1071:::predict.svm(model_svm, newdata = x_train_sc)
pred_test <- e1071:::predict.svm(model_svm, newdata = x_test_sc)
pred_train <- factor(as.character(pred_train), levels = c("no", "yes"))
pred_test <- factor(as.character(pred_test), levels = c("no", "yes"))
evaluate_model <- function(actual, predicted, positive_class = "yes") {
actual <- factor(actual, levels = c("no", "yes"))
predicted <- factor(predicted, levels = c("no", "yes"))
cm <- confusionMatrix(predicted, actual, positive = positive_class)
acc <- as.numeric(cm$overall["Accuracy"])
sens <- as.numeric(cm$byClass["Sensitivity"])
spec <- as.numeric(cm$byClass["Specificity"])
bal_acc <- (sens + spec) / 2
precision <- as.numeric(cm$byClass["Pos Pred Value"])
f1 <- ifelse((precision + sens) == 0, 0,
2 * precision * sens / (precision + sens))
metrics <- data.frame(
Accuracy = acc,
Sensitivity = sens,
Specificity = spec,
Balanced_Accuracy = bal_acc,
F1_Score = f1
)
return(list(conf_matrix = cm, metrics = metrics))
}
eval_train <- evaluate_model(y_train_smote, pred_train)
eval_test <- evaluate_model(y_test, pred_test)
Confusion Matrix
print(eval_train$conf_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 35461 4415
## yes 1087 9505
##
## Accuracy : 0.891
## 95% CI : (0.8882, 0.8937)
## No Information Rate : 0.7242
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7053
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.6828
## Specificity : 0.9703
## Pos Pred Value : 0.8974
## Neg Pred Value : 0.8893
## Prevalence : 0.2758
## Detection Rate : 0.1883
## Detection Prevalence : 0.2099
## Balanced Accuracy : 0.8265
##
## 'Positive' Class : yes
## Metrics
print(eval_train$metrics)
## Accuracy Sensitivity Specificity Balanced_Accuracy F1_Score
## 1 0.8909804 0.6828305 0.9702583 0.8265444 0.7755385Confusion Matrix
print(eval_test$conf_matrix)
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 3567 250
## yes 101 201
##
## Accuracy : 0.9148
## 95% CI : (0.9058, 0.9231)
## No Information Rate : 0.8905
## P-Value [Acc > NIR] : 1.405e-07
##
## Kappa : 0.489
##
## Mcnemar's Test P-Value : 2.797e-15
##
## Sensitivity : 0.44568
## Specificity : 0.97246
## Pos Pred Value : 0.66556
## Neg Pred Value : 0.93450
## Prevalence : 0.10949
## Detection Rate : 0.04880
## Detection Prevalence : 0.07332
## Balanced Accuracy : 0.70907
##
## 'Positive' Class : yes
## Metrics
print(eval_test$metrics)
## Accuracy Sensitivity Specificity Balanced_Accuracy F1_Score
## 1 0.9147851 0.4456763 0.9724646 0.7090704 0.5338645hasil_akhir <- bind_rows(
cbind(Model = "SVM + SMOTE", Data = "Training_SMOTE", eval_train$metrics),
cbind(Model = "SVM + SMOTE", Data = "Testing", eval_test$metrics)
)
print(hasil_akhir)
## Model Data Accuracy Sensitivity Specificity
## 1 SVM + SMOTE Training_SMOTE 0.8909804 0.6828305 0.9702583
## 2 SVM + SMOTE Testing 0.9147851 0.4456763 0.9724646
## Balanced_Accuracy F1_Score
## 1 0.8265444 0.7755385
## 2 0.7090704 0.5338645
Hasil evaluasi menunjukkan bahwa model Support Vector Machine yang dilatih dengan teknik SMOTE memiliki performa yang cukup baik pada data training maupun testing dari sisi akurasi dan spesifisitas. Pada data training SMOTE, model menghasilkan akurasi sebesar 0.891 dengan sensitivitas 0.683 dan spesifisitas 0.970, serta balanced accuracy 0.827 dan F1-score 0.776. Hal ini menunjukkan bahwa model mampu mengklasifikasikan sebagian besar data dengan benar, terutama dalam mengenali kelas mayoritas, yang terlihat dari nilai spesifisitas yang sangat tinggi.
Namun pada data testing, meskipun akurasi masih cukup tinggi yaitu 0.915 dan spesifisitas tetap tinggi (0.972), nilai sensitivitas menurun menjadi 0.446 dan F1-score menjadi 0.534. Penurunan ini menunjukkan bahwa model masih kurang baik dalam mendeteksi kelas positif (minoritas) pada data baru, meskipun teknik SMOTE telah digunakan untuk menyeimbangkan data saat pelatihan. Dengan demikian, model cenderung lebih baik dalam mengidentifikasi kelas negatif dibandingkan kelas positif, sehingga peningkatan performa terutama pada sensitivitas masih diperlukan agar model lebih seimbang dalam mendeteksi kedua kelas.