Title : Proyek Data Mining Nama : Wullan NIM : 4101422182 Dataset : GermanCredit
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(class)
library(rpart)
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.4.3
library(ggplot2)
data("GermanCredit")
df <- GermanCredit
set.seed(42)
index <- createDataPartition(df$Class, p = 0.7, list = FALSE)
train <- df[index, ]
test <- df[-index, ]
# SVM
svm_model <- svm(Class ~ ., data = train, kernel = "radial")
## Warning in svm.default(x, y, scale = scale, ..., na.action = na.action):
## Variable(s) 'Purpose.Vacation' and 'Personal.Female.Single' constant. Cannot
## scale data.
svm_pred <- predict(svm_model, newdata = test)
# KNN
knn_pred <- knn(train = train[, -which(names(train) == "Class")],
test = test[, -which(names(test) == "Class")],
cl = train$Class, k = 5)
# Decision Tree
dt_model <- rpart(Class ~ ., data = train, method = "class")
dt_pred <- predict(dt_model, test, type = "class")
svm_acc <- mean(svm_pred == test$Class)
knn_acc <- mean(knn_pred == test$Class)
dt_acc <- mean(dt_pred == test$Class)
cat("Akurasi SVM :", round(svm_acc * 100, 2), "%\n")
## Akurasi SVM : 69.33 %
cat("Akurasi KNN (k = 5) :", round(knn_acc * 100, 2), "%\n")
## Akurasi KNN (k = 5) : 67.67 %
cat("Akurasi Decision Tree:", round(dt_acc * 100, 2), "%\n")
## Akurasi Decision Tree: 70.33 %
# Confusion Matrix + Label Format
get_confusion_data <- function(actual, predicted, model_name) {
cm <- table(Predicted = predicted, Actual = actual)
TN <- cm["Bad", "Bad"]
FP <- cm["Good", "Bad"]
FN <- cm["Bad", "Good"]
TP <- cm["Good", "Good"]
data.frame(
Kategori = c("True Negative", "False Positive", "False Negative", "True Positive"),
Jumlah = c(TN, FP, FN, TP),
Model = model_name
)
}
svm_data <- get_confusion_data(test$Class, svm_pred, "SVM")
knn_data <- get_confusion_data(test$Class, knn_pred, "KNN")
dt_data <- get_confusion_data(test$Class, dt_pred, "Decision Tree")
cm_data <- rbind(svm_data, knn_data, dt_data)
ggplot(cm_data, aes(x = Kategori, y = Jumlah, fill = Kategori)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7) +
geom_text(aes(label = Jumlah), vjust = -0.5, size = 4) +
facet_wrap(~ Model, ncol = 3) +
labs(title = "Confusion Matrix untuk Tiap Model",
x = "Kategori",
y = "Jumlah") +
scale_fill_manual(values = c("green", "red", "orange", "blue")) +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 30, hjust = 1))