library(e1071)
library(ggplot2)
Maukan dataset, preprocesing, dan model Regreso & SVR
# Dataset airquality
data("airquality")
# Membersihkan data (hapus NA)
aq_clean <- na.omit(airquality)
# Pembagian data training dan testing
set.seed(42)
index <- sample(1:nrow(aq_clean), 0.7 * nrow(aq_clean))
train_data <- aq_clean[index, ]
test_data <- aq_clean[-index, ]
# Model Regresi Linear
lm_model <- lm(Ozone ~ Temp, data = train_data)
lm_pred <- predict(lm_model, newdata = test_data)
# Model SVR
svr_model <- svm(Ozone ~ Temp, data = train_data, type = "eps-regression")
svr_pred <- predict(svr_model, newdata = test_data)
# Visualisasi Hasil
ggplot() +
geom_point(aes(x = test_data$Temp, y = test_data$Ozone), color = "black", size = 2) + # Data asli
geom_line(aes(x = test_data$Temp, y = lm_pred), color = "blue", linetype = "dashed", size = 1) + # Regresi Linear
geom_line(aes(x = test_data$Temp, y = svr_pred), color = "red", linetype = "solid", size = 1) + # SVR
labs(title = "Perbandingan SVR vs Regresi Linear dalam Prediksi Ozone",
x = "Temperature (Fahrenheit)",
y = "Ozone (ppb)") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Menghitung MSE
lm_mse <- mean((lm_pred - test_data$Ozone)^2)
svr_mse <- mean((svr_pred - test_data$Ozone)^2)
# Print hasil MSE
print(paste("MSE Regresi Linear:", round(lm_mse, 2)))
## [1] "MSE Regresi Linear: 609.59"
print(paste("MSE SVR:", round(svr_mse, 2)))
## [1] "MSE SVR: 599.56"
Interpretasi Hasil: Visualisasi: - Titik-titik hitam = data aktual. - Garis biru putus-putus = hasil regresi linear. - Garis merah solid = hasil SVR. - Kalau pola data agak “membelok”, SVR biasanya bisa mengikuti lekukannya lebih baik.
MSE: - Menunjukkan rata-rata kesalahan kuadrat. Makin kecil nilainya, makin bagus prediksinya. - Berdasarkan hasil yang diperoleh, MSE Regresi Linear 609.59 > MSE SVR 599.56 sehingga dapat disimpulkan untuk data(airquality) SVR adalah metode yang lebih baik daripada regresi.
Kesimpulan: - Regresi Linear lebih sederhana dan mungkin lebih mudah diinterpretasikan, namun hanya efektif jika hubungan antara variabel bersifat linear. - SVR lebih fleksibel dan dapat menangani hubungan non-linear, yang seringkali lebih sesuai dalam data nyata yang lebih kompleks
library(mlbench)
library(class)
library(rpart)
library(caret)
## Loading required package: lattice
library(gridExtra) # Untuk menggabungkan plot
Maukan dataset, preprocesing, dan model SVM, KNN, Decision Tree
# Ambil data
data(PimaIndiansDiabetes)
df <- PimaIndiansDiabetes
# Pisah training & testing
set.seed(42)
index <- createDataPartition(df$diabetes, p = 0.7, list = FALSE)
train <- df[index, ]
test <- df[-index, ]
# --- Model 1: SVM ---
svm_model <- svm(diabetes ~ ., data = train, kernel = "radial")
svm_pred <- predict(svm_model, newdata = test)
svm_acc <- mean(svm_pred == test$diabetes)
# --- Model 2: KNN (k = 5) ---
knn_pred <- knn(train = train[, -9], test = test[, -9],
cl = train$diabetes, k = 5)
knn_acc <- mean(knn_pred == test$diabetes)
# --- Model 3: Decision Tree ---
dt_model <- rpart(diabetes ~ ., data = train, method = "class")
dt_pred <- predict(dt_model, test, type = "class")
dt_acc <- mean(dt_pred == test$diabetes)
Cek akurasi SVM, KNN, dan Decision Tree
# --- Evaluasi ---
cat("Akurasi SVM :", round(svm_acc * 100, 2), "%\n")
## Akurasi SVM : 76.09 %
cat("Akurasi KNN (k = 5) :", round(knn_acc * 100, 2), "%\n")
## Akurasi KNN (k = 5) : 70 %
cat("Akurasi Decision Tree:", round(dt_acc * 100, 2), "%\n")
## Akurasi Decision Tree: 73.91 %
Visualisasi
# Membuat dataframe untuk visualisasi
results <- data.frame(
Actual = test$diabetes,
SVM = svm_pred,
KNN = knn_pred,
DecisionTree = dt_pred
)
# Mengubah kolom menjadi faktor untuk keperluan visualisasi
results$Actual <- factor(results$Actual, levels = c("neg", "pos"))
results$SVM <- factor(results$SVM, levels = c("neg", "pos"))
results$KNN <- factor(results$KNN, levels = c("neg", "pos"))
results$DecisionTree <- factor(results$DecisionTree, levels = c("neg", "pos"))
# Visualisasi untuk SVM dengan angka di atas batang
svm_plot <- ggplot(results, aes(x = Actual, fill = SVM)) +
geom_bar(position = "dodge") +
geom_text(stat = "count", aes(label = ..count..), position = position_dodge(0.8), vjust = -0.5) +
labs(title = "Prediksi SVM", x = "Aktual", y = "Frekuensi") +
scale_fill_manual(values = c("red", "blue"))
# Visualisasi untuk KNN dengan angka di atas batang
knn_plot <- ggplot(results, aes(x = Actual, fill = KNN)) +
geom_bar(position = "dodge") +
geom_text(stat = "count", aes(label = ..count..), position = position_dodge(0.8), vjust = -0.5) +
labs(title = "Prediksi KNN (k = 5)", x = "Aktual", y = "Frekuensi") +
scale_fill_manual(values = c("red", "blue"))
# Visualisasi untuk Decision Tree dengan angka di atas batang
dt_plot <- ggplot(results, aes(x = Actual, fill = DecisionTree)) +
geom_bar(position = "dodge") +
geom_text(stat = "count", aes(label = ..count..), position = position_dodge(0.8), vjust = -0.5) +
labs(title = "Prediksi Decision Tree", x = "Aktual", y = "Frekuensi") +
scale_fill_manual(values = c("red", "blue"))
# Gabungkan plot menggunakan gridExtra
grid.arrange(svm_plot, knn_plot, dt_plot, ncol = 3)
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Interpretasi SVM Benar negatif (TN): 131, Salah positif (FP): 19, Salah negatif (FN): 36, Benar positif (TP): 44
-SVM sangat kuat dalam mengklasifikasikan kasus negatif (131 dari 150 benar). Namun, masih banyak salah deteksi pada kasus positif (36 salah dari 80 kasus).
KNN Benar negatif (TN): 112, Salah positif (FP): 38, Salah negatif (FN): 31, Benar positif (TP): 49
Decision Tree Benar negatif (TN): 119, Salah positif (FP): 31, Salah negatif (FN): 29, Benar positif (TP): 51