Support Vector Regression (SVR) adalah ekstensi dari Support Vector Machine untuk masalah regresi. SVR menggunakan konsep epsilon-tube yang memungkinkan toleransi error dalam prediksi, berbeda dengan regresi linear biasa yang mencoba meminimalkan semua error.
# Install packages jika belum ada
# install.packages(c("e1071", "caret", "ggplot2", "dplyr", "gridExtra",
# "corrplot", "Metrics", "plotly", "GGally"))
# Load libraries
library(e1071) # untuk SVR
library(caret) # untuk machine learning
library(ggplot2) # untuk visualisasi
library(dplyr) # untuk manipulasi data
library(gridExtra) # untuk multiple plots
library(corrplot) # untuk correlation plot
library(Metrics) # untuk evaluation metrics
library(knitr) # untuk kable
library(GGally) # untuk pair plots
library(plotly) # untuk interactive plots
library(tidyr) # untuk data reshapingDataset mtcars berisi data tentang performa mobil dengan 32 observasi dan 11 variabel. Kita akan memprediksi konsumsi bahan bakar (mpg) berdasarkan variabel lainnya.
# Load dataset mtcars
data(mtcars)
df <- mtcars
# Tampilkan informasi dasar dataset
cat("Dimensi dataset:", dim(df), "\n")## Dimensi dataset: 32 11
## Struktur dataset:
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
## Deskripsi Variabel:
## mpg - Miles per gallon (target variable)
## cyl - Number of cylinders
## disp - Displacement (cu.in.)
## hp - Gross horsepower
## drat - Rear axle ratio
## wt - Weight (1000 lbs)
## qsec - 1/4 mile time
## vs - Engine (0=V-shaped, 1=straight)
## am - Transmission (0=automatic, 1=manual)
## gear - Number of forward gears
## carb - Number of carburetors
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
## Missing values per kolom:
## mpg cyl disp hp drat wt qsec vs am gear carb
## 0 0 0 0 0 0 0 0 0 0 0
##
## Tidak ada missing values dalam dataset
# Distribusi target variable (mpg)
ggplot(df, aes(x = mpg)) +
geom_histogram(bins = 15, fill = "skyblue", alpha = 0.7, color = "black") +
geom_density(aes(y = ..density.. * nrow(df) * 2), color = "red", size = 1) +
labs(title = "Distribusi Miles per Gallon (MPG)",
x = "Miles per Gallon",
y = "Frequency") +
theme_minimal()# Correlation matrix
cor_matrix <- cor(df)
corrplot(cor_matrix, method = "color", type = "upper",
order = "hclust", tl.col = "black", tl.srt = 45,
title = "Korelasi Antar Variabel")# Pairs plot untuk melihat hubungan antar variabel
# Pilih beberapa variabel penting
important_vars <- c("mpg", "wt", "hp", "disp", "cyl")
ggpairs(df[important_vars],
title = "Scatter Plot Matrix - Variabel Penting") +
theme_minimal()# Plot hubungan key variables dengan mpg
p1 <- ggplot(df, aes(x = wt, y = mpg)) +
geom_point(size = 3, alpha = 0.7, color = "blue") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "MPG vs Weight", x = "Weight (1000 lbs)", y = "MPG") +
theme_minimal()
p2 <- ggplot(df, aes(x = hp, y = mpg)) +
geom_point(size = 3, alpha = 0.7, color = "green") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "MPG vs Horsepower", x = "Horsepower", y = "MPG") +
theme_minimal()
p3 <- ggplot(df, aes(x = disp, y = mpg)) +
geom_point(size = 3, alpha = 0.7, color = "purple") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "MPG vs Displacement", x = "Displacement", y = "MPG") +
theme_minimal()
p4 <- ggplot(df, aes(x = factor(cyl), y = mpg)) +
geom_boxplot(fill = "orange", alpha = 0.7) +
labs(title = "MPG vs Cylinders", x = "Number of Cylinders", y = "MPG") +
theme_minimal()
grid.arrange(p1, p2, p3, p4, ncol = 2)# Set seed untuk reproducibility
set.seed(123)
# Karena dataset kecil (32 observasi), kita gunakan 70-30 split
train_index <- createDataPartition(df$mpg, p = 0.7, list = FALSE)
train_data <- df[train_index, ]
test_data <- df[-train_index, ]
cat("Ukuran data training:", nrow(train_data), "\n")## Ukuran data training: 24
## Ukuran data testing: 8
##
## Statistik MPG - Training:
## Mean: 20.23
## SD: 6.19
## Range: 10.4 33.9
##
## Statistik MPG - Testing:
## Mean: 19.68
## SD: 5.89
## Range: 14.3 30.4
# Feature scaling untuk SVR
# Simpan parameter scaling dari training data
scale_params <- list()
for(col in names(train_data)) {
if(col != "mpg") {
scale_params[[col]] <- list(
center = mean(train_data[[col]]),
scale = sd(train_data[[col]])
)
}
}
# Apply scaling
train_scaled <- train_data
test_scaled <- test_data
for(col in names(scale_params)) {
train_scaled[[col]] <- scale(train_data[[col]])[,1]
test_scaled[[col]] <- (test_data[[col]] - scale_params[[col]]$center) / scale_params[[col]]$scale
}
# Target variable tidak di-scale untuk interpretasi yang mudah
cat("Data berhasil di-scale. Target variable (mpg) tetap dalam skala asli.\n")## Data berhasil di-scale. Target variable (mpg) tetap dalam skala asli.
# Model OLS untuk perbandingan
ols_model <- lm(mpg ~ ., data = train_data)
# Summary model
summary(ols_model)##
## Call:
## lm(formula = mpg ~ ., data = train_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8335 -1.2145 -0.0044 1.1104 4.1136
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -14.72655 27.33755 -0.539 0.599
## cyl 1.33019 1.41806 0.938 0.365
## disp 0.01218 0.01887 0.645 0.530
## hp -0.01756 0.02588 -0.678 0.509
## drat 2.67080 2.51575 1.062 0.308
## wt -3.65531 2.37988 -1.536 0.149
## qsec 1.02957 0.86524 1.190 0.255
## vs -0.37848 3.00566 -0.126 0.902
## am 1.82482 2.74395 0.665 0.518
## gear 3.74182 2.99973 1.247 0.234
## carb -1.42190 1.42397 -0.999 0.336
##
## Residual standard error: 2.641 on 13 degrees of freedom
## Multiple R-squared: 0.8971, Adjusted R-squared: 0.818
## F-statistic: 11.33 on 10 and 13 DF, p-value: 6.948e-05
# Prediksi OLS
pred_ols_train <- predict(ols_model, train_data)
pred_ols_test <- predict(ols_model, test_data)
# Evaluasi OLS
ols_train_rmse <- rmse(train_data$mpg, pred_ols_train)
ols_train_mae <- mae(train_data$mpg, pred_ols_train)
ols_train_r2 <- cor(train_data$mpg, pred_ols_train)^2
ols_test_rmse <- rmse(test_data$mpg, pred_ols_test)
ols_test_mae <- mae(test_data$mpg, pred_ols_test)
ols_test_r2 <- cor(test_data$mpg, pred_ols_test)^2
cat("=== OLS Performance ===\n")## === OLS Performance ===
cat("Training - RMSE:", round(ols_train_rmse, 3), "MAE:", round(ols_train_mae, 3), "R²:", round(ols_train_r2, 3), "\n")## Training - RMSE: 1.944 MAE: 1.527 R²: 0.897
cat("Testing - RMSE:", round(ols_test_rmse, 3), "MAE:", round(ols_test_mae, 3), "R²:", round(ols_test_r2, 3), "\n")## Testing - RMSE: 3.989 MAE: 2.844 R²: 0.658
# Training SVR Linear
svr_linear <- svm(mpg ~ .,
data = train_scaled,
type = "eps-regression",
kernel = "linear",
cost = 1,
epsilon = 0.1,
scale = FALSE) # sudah di-scale manual
# Summary model
summary(svr_linear)##
## Call:
## svm(formula = mpg ~ ., data = train_scaled, type = "eps-regression",
## kernel = "linear", cost = 1, epsilon = 0.1, scale = FALSE)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: linear
## cost: 1
## gamma: 0.1
## epsilon: 0.1
##
##
## Number of Support Vectors: 24
# Prediksi SVR Linear
pred_svr_linear_train <- predict(svr_linear, train_scaled)
pred_svr_linear_test <- predict(svr_linear, test_scaled)
# Evaluasi SVR Linear
svr_linear_train_rmse <- rmse(train_data$mpg, pred_svr_linear_train)
svr_linear_train_mae <- mae(train_data$mpg, pred_svr_linear_train)
svr_linear_train_r2 <- cor(train_data$mpg, pred_svr_linear_train)^2
svr_linear_test_rmse <- rmse(test_data$mpg, pred_svr_linear_test)
svr_linear_test_mae <- mae(test_data$mpg, pred_svr_linear_test)
svr_linear_test_r2 <- cor(test_data$mpg, pred_svr_linear_test)^2
cat("=== SVR Linear Performance ===\n")## === SVR Linear Performance ===
cat("Training - RMSE:", round(svr_linear_train_rmse, 3), "MAE:", round(svr_linear_train_mae, 3), "R²:", round(svr_linear_train_r2, 3), "\n")## Training - RMSE: 2.25 MAE: 1.601 R²: 0.877
cat("Testing - RMSE:", round(svr_linear_test_rmse, 3), "MAE:", round(svr_linear_test_mae, 3), "R²:", round(svr_linear_test_r2, 3), "\n")## Testing - RMSE: 2.913 MAE: 2.328 R²: 0.776
## Support Vectors: 24
# Training SVR dengan RBF kernel
svr_rbf <- svm(mpg ~ .,
data = train_scaled,
type = "eps-regression",
kernel = "radial",
cost = 1,
epsilon = 0.1,
gamma = 0.1,
scale = FALSE)
# Summary model
summary(svr_rbf)##
## Call:
## svm(formula = mpg ~ ., data = train_scaled, type = "eps-regression",
## kernel = "radial", cost = 1, epsilon = 0.1, gamma = 0.1, scale = FALSE)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 1
## gamma: 0.1
## epsilon: 0.1
##
##
## Number of Support Vectors: 24
# Prediksi SVR RBF
pred_svr_rbf_train <- predict(svr_rbf, train_scaled)
pred_svr_rbf_test <- predict(svr_rbf, test_scaled)
# Evaluasi SVR RBF
svr_rbf_train_rmse <- rmse(train_data$mpg, pred_svr_rbf_train)
svr_rbf_train_mae <- mae(train_data$mpg, pred_svr_rbf_train)
svr_rbf_train_r2 <- cor(train_data$mpg, pred_svr_rbf_train)^2
svr_rbf_test_rmse <- rmse(test_data$mpg, pred_svr_rbf_test)
svr_rbf_test_mae <- mae(test_data$mpg, pred_svr_rbf_test)
svr_rbf_test_r2 <- cor(test_data$mpg, pred_svr_rbf_test)^2
cat("=== SVR RBF Performance ===\n")## === SVR RBF Performance ===
cat("Training - RMSE:", round(svr_rbf_train_rmse, 3), "MAE:", round(svr_rbf_train_mae, 3), "R²:", round(svr_rbf_train_r2, 3), "\n")## Training - RMSE: 4.186 MAE: 2.946 R²: 0.788
cat("Testing - RMSE:", round(svr_rbf_test_rmse, 3), "MAE:", round(svr_rbf_test_mae, 3), "R²:", round(svr_rbf_test_r2, 3), "\n")## Testing - RMSE: 4.123 MAE: 3.177 R²: 0.82
## Support Vectors: 24
# Grid search untuk SVR RBF
tune_result <- tune(svm, mpg ~ .,
data = train_scaled,
type = "eps-regression",
kernel = "radial",
ranges = list(
cost = c(0.1, 1, 10, 100),
epsilon = c(0.01, 0.1, 0.2, 0.5),
gamma = c(0.01, 0.1, 0.5, 1)
),
tunecontrol = tune.control(cross = 5))
# Best parameters
print(tune_result)##
## Parameter tuning of 'svm':
##
## - sampling method: 5-fold cross validation
##
## - best parameters:
## cost epsilon gamma
## 100 0.5 0.01
##
## - best performance: 6.517972
##
## Best parameters:
## Cost: 100
## Epsilon: 0.5
## Gamma: 0.01
# Training model dengan parameter terbaik
svr_best <- svm(mpg ~ .,
data = train_scaled,
type = "eps-regression",
kernel = "radial",
cost = best_params$cost,
epsilon = best_params$epsilon,
gamma = best_params$gamma,
scale = FALSE)
# Prediksi dengan model terbaik
pred_svr_best_train <- predict(svr_best, train_scaled)
pred_svr_best_test <- predict(svr_best, test_scaled)
# Evaluasi model terbaik
svr_best_train_rmse <- rmse(train_data$mpg, pred_svr_best_train)
svr_best_train_mae <- mae(train_data$mpg, pred_svr_best_train)
svr_best_train_r2 <- cor(train_data$mpg, pred_svr_best_train)^2
svr_best_test_rmse <- rmse(test_data$mpg, pred_svr_best_test)
svr_best_test_mae <- mae(test_data$mpg, pred_svr_best_test)
svr_best_test_r2 <- cor(test_data$mpg, pred_svr_best_test)^2
cat("=== SVR Best Model Performance ===\n")## === SVR Best Model Performance ===
cat("Training - RMSE:", round(svr_best_train_rmse, 3), "MAE:", round(svr_best_train_mae, 3), "R²:", round(svr_best_train_r2, 3), "\n")## Training - RMSE: 1.912 MAE: 1.327 R²: 0.91
cat("Testing - RMSE:", round(svr_best_test_rmse, 3), "MAE:", round(svr_best_test_mae, 3), "R²:", round(svr_best_test_r2, 3), "\n")## Testing - RMSE: 2.696 MAE: 2.135 R²: 0.78
## Support Vectors: 23
Untuk visualisasi epsilon-tube, kita akan menggunakan hubungan 1D antara weight (wt) dan mpg.
# Buat dataset 1D untuk visualisasi epsilon-tube
train_1d <- data.frame(
wt = train_scaled$wt,
mpg = train_data$mpg
)
test_1d <- data.frame(
wt = test_scaled$wt,
mpg = test_data$mpg
)
# Model SVR 1D untuk visualisasi
svr_1d <- svm(mpg ~ wt,
data = train_1d,
type = "eps-regression",
kernel = "radial",
cost = best_params$cost,
epsilon = best_params$epsilon,
gamma = best_params$gamma)
cat("Support Vectors untuk model 1D:", svr_1d$tot.nSV, "\n")## Support Vectors untuk model 1D: 8
# Fungsi untuk visualisasi epsilon-tube
visualize_epsilon_tube <- function(model, train_data, epsilon, title) {
# Create prediction grid
wt_range <- seq(min(train_data$wt) - 0.5, max(train_data$wt) + 0.5, length.out = 100)
grid_data <- data.frame(wt = wt_range)
# Predictions
pred_grid <- predict(model, grid_data)
# Identify support vectors
sv_indices <- model$index
sv_data <- train_data[sv_indices, ]
non_sv_data <- train_data[-sv_indices, ]
# Create plot
p <- ggplot() +
# Epsilon tube
geom_ribbon(data = data.frame(wt = wt_range, pred = pred_grid),
aes(x = wt, ymin = pred - epsilon, ymax = pred + epsilon),
alpha = 0.2, fill = "gray") +
# Regression line
geom_line(data = data.frame(wt = wt_range, pred = pred_grid),
aes(x = wt, y = pred), color = "red", size = 1.2) +
# Support vectors
geom_point(data = sv_data, aes(x = wt, y = mpg),
color = "red", size = 4, shape = 1, stroke = 2) +
# Non-support vectors
geom_point(data = non_sv_data, aes(x = wt, y = mpg),
color = "blue", size = 3, alpha = 0.7) +
# Epsilon boundaries
geom_line(data = data.frame(wt = wt_range, pred = pred_grid),
aes(x = wt, y = pred + epsilon),
linetype = "dashed", color = "gray40") +
geom_line(data = data.frame(wt = wt_range, pred = pred_grid),
aes(x = wt, y = pred - epsilon),
linetype = "dashed", color = "gray40") +
labs(title = title,
x = "Weight (scaled)",
y = "Miles per Gallon",
subtitle = paste("Epsilon =", epsilon, "| Support Vectors =", length(sv_indices))) +
theme_minimal() +
theme(legend.position = "bottom")
return(p)
}
# Plot epsilon-tube
epsilon_plot <- visualize_epsilon_tube(svr_1d, train_1d, best_params$epsilon,
"SVR Epsilon-Tube Visualization")
print(epsilon_plot)# Analisis residuals untuk melihat support vectors
train_1d$predicted <- predict(svr_1d, train_1d)
train_1d$residuals <- train_1d$mpg - train_1d$predicted
train_1d$abs_residuals <- abs(train_1d$residuals)
train_1d$is_sv <- 1:nrow(train_1d) %in% svr_1d$index
# Plot residuals
ggplot(train_1d, aes(x = predicted, y = residuals, color = is_sv, size = is_sv)) +
geom_hline(yintercept = c(-best_params$epsilon, best_params$epsilon),
linetype = "dashed", color = "gray40") +
geom_hline(yintercept = 0, color = "red") +
geom_point(alpha = 0.7) +
scale_color_manual(values = c("FALSE" = "blue", "TRUE" = "red"),
labels = c("Non-Support Vector", "Support Vector")) +
scale_size_manual(values = c("FALSE" = 2, "TRUE" = 4), guide = "none") +
labs(title = "Residuals Analysis - Support Vectors Identification",
x = "Predicted MPG",
y = "Residuals",
color = "Point Type",
subtitle = paste("Points outside epsilon-tube (±", best_params$epsilon, ") become support vectors")) +
theme_minimal()# Test berbagai nilai epsilon
epsilon_values <- c(0.01, 0.05, 0.1, 0.2, 0.5, 1.0)
epsilon_results <- data.frame(
Epsilon = epsilon_values,
RMSE_Train = numeric(length(epsilon_values)),
RMSE_Test = numeric(length(epsilon_values)),
Support_Vectors = numeric(length(epsilon_values))
)
for(i in 1:length(epsilon_values)) {
# Model dengan epsilon berbeda
model_eps <- svm(mpg ~ ., data = train_scaled,
type = "eps-regression", kernel = "radial",
cost = best_params$cost, gamma = best_params$gamma,
epsilon = epsilon_values[i], scale = FALSE)
# Evaluasi
pred_train <- predict(model_eps, train_scaled)
pred_test <- predict(model_eps, test_scaled)
epsilon_results$RMSE_Train[i] <- rmse(train_data$mpg, pred_train)
epsilon_results$RMSE_Test[i] <- rmse(test_data$mpg, pred_test)
epsilon_results$Support_Vectors[i] <- model_eps$tot.nSV
}
print(epsilon_results)## Epsilon RMSE_Train RMSE_Test Support_Vectors
## 1 0.01 2.001340 2.883725 24
## 2 0.05 1.992592 2.900373 24
## 3 0.10 1.979962 2.889359 24
## 4 0.20 1.957760 2.866061 24
## 5 0.50 1.911806 2.696299 23
## 6 1.00 1.918997 2.376505 17
# Plot pengaruh parameter epsilon
eps_long <- epsilon_results %>%
gather(key = "Metric", value = "Value", RMSE_Train, RMSE_Test)
p1 <- ggplot(eps_long, aes(x = Epsilon, y = Value, color = Metric)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
labs(title = "Pengaruh Parameter Epsilon terhadap RMSE",
x = "Epsilon",
y = "RMSE",
color = "Dataset") +
theme_minimal()
p2 <- ggplot(epsilon_results, aes(x = Epsilon, y = Support_Vectors)) +
geom_line(size = 1.2, color = "darkgreen") +
geom_point(size = 3, color = "darkgreen") +
labs(title = "Pengaruh Parameter Epsilon terhadap Jumlah Support Vectors",
x = "Epsilon",
y = "Jumlah Support Vectors") +
theme_minimal()
grid.arrange(p1, p2, ncol = 2)# Test berbagai nilai C
cost_values <- c(0.01, 0.1, 1, 10, 100, 1000)
cost_results <- data.frame(
Cost = cost_values,
RMSE_Train = numeric(length(cost_values)),
RMSE_Test = numeric(length(cost_values)),
Support_Vectors = numeric(length(cost_values))
)
for(i in 1:length(cost_values)) {
# Model dengan cost berbeda
model_cost <- svm(mpg ~ ., data = train_scaled,
type = "eps-regression", kernel = "radial",
cost = cost_values[i], gamma = best_params$gamma,
epsilon = best_params$epsilon, scale = FALSE)
# Evaluasi
pred_train <- predict(model_cost, train_scaled)
pred_test <- predict(model_cost, test_scaled)
cost_results$RMSE_Train[i] <- rmse(train_data$mpg, pred_train)
cost_results$RMSE_Test[i] <- rmse(test_data$mpg, pred_test)
cost_results$Support_Vectors[i] <- model_cost$tot.nSV
}
print(cost_results)## Cost RMSE_Train RMSE_Test Support_Vectors
## 1 1e-02 6.092428 5.498389 22
## 2 1e-01 5.937176 5.353396 22
## 3 1e+00 4.749888 4.222613 22
## 4 1e+01 2.608767 2.108564 22
## 5 1e+02 1.911806 2.696299 23
## 6 1e+03 1.420705 4.905910 21
# Plot pengaruh parameter C
cost_long <- cost_results %>%
gather(key = "Metric", value = "Value", RMSE_Train, RMSE_Test)
p1 <- ggplot(cost_long, aes(x = log10(Cost), y = Value, color = Metric)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
labs(title = "Pengaruh Parameter C terhadap RMSE",
x = "log10(C)",
y = "RMSE",
color = "Dataset") +
theme_minimal() +
scale_x_continuous(breaks = log10(cost_values), labels = cost_values)
p2 <- ggplot(cost_results, aes(x = log10(Cost), y = Support_Vectors)) +
geom_line(size = 1.2, color = "purple") +
geom_point(size = 3, color = "purple") +
labs(title = "Pengaruh Parameter C terhadap Jumlah Support Vectors",
x = "log10(C)",
y = "Jumlah Support Vectors") +
theme_minimal() +
scale_x_continuous(breaks = log10(cost_values), labels = cost_values)
grid.arrange(p1, p2, ncol = 2)# Test berbagai nilai gamma
gamma_values <- c(0.001, 0.01, 0.1, 0.5, 1, 2, 5)
gamma_results <- data.frame(
Gamma = gamma_values,
RMSE_Train = numeric(length(gamma_values)),
RMSE_Test = numeric(length(gamma_values)),
Support_Vectors = numeric(length(gamma_values))
)
for(i in 1:length(gamma_values)) {
# Model dengan gamma berbeda
model_gamma <- svm(mpg ~ ., data = train_scaled,
type = "eps-regression", kernel = "radial",
cost = best_params$cost, gamma = gamma_values[i],
epsilon = best_params$epsilon, scale = FALSE)
# Evaluasi
pred_train <- predict(model_gamma, train_scaled)
pred_test <- predict(model_gamma, test_scaled)
gamma_results$RMSE_Train[i] <- rmse(train_data$mpg, pred_train)
gamma_results$RMSE_Test[i] <- rmse(test_data$mpg, pred_test)
gamma_results$Support_Vectors[i] <- model_gamma$tot.nSV
}
print(gamma_results)## Gamma RMSE_Train RMSE_Test Support_Vectors
## 1 0.001 2.5070875 2.055603 23
## 2 0.010 1.9118058 2.696299 23
## 3 0.100 0.6394558 5.765392 23
## 4 0.500 0.4999488 5.002868 24
## 5 1.000 0.4856315 4.781565 21
## 6 2.000 0.4940382 4.974310 22
## 7 5.000 0.4895685 5.365265 23
# Plot pengaruh parameter gamma
gamma_long <- gamma_results %>%
gather(key = "Metric", value = "Value", RMSE_Train, RMSE_Test)
p1 <- ggplot(gamma_long, aes(x = log10(Gamma), y = Value, color = Metric)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
labs(title = "Pengaruh Parameter Gamma terhadap RMSE",
x = "log10(Gamma)",
y = "RMSE",
color = "Dataset") +
theme_minimal() +
scale_x_continuous(breaks = log10(gamma_values), labels = gamma_values)
p2 <- ggplot(gamma_results, aes(x = log10(Gamma), y = Support_Vectors)) +
geom_line(size = 1)