Prinsip kerja Support Vector Regression (SVR) adalah sebagai tambahan dari Support Vector Machine untuk masalah regresi:
Keunggulan SVR: - Robust terhadap outlier - Efektif untuk data dimensi tinggi - Membantu menghindari overfitting dengan regularisasi
# Load required libraries
library(MASS) # untuk dataset Boston
library(e1071) # untuk SVR
library(ggplot2) # untuk plotting
library(caret) # untuk evaluasi model
library(gridExtra) # untuk multiple plots
library(knitr) # untuk tabel
library(corrplot) # untuk correlation plot
library(dplyr) # untuk data manipulation
library(reshape2) # untuk data reshaping
# Set seed untuk reproducibility
set.seed(123)## 'data.frame': 506 obs. of 14 variables:
## $ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ...
## $ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
## $ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
## $ chas : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
## $ rm : num 6.58 6.42 7.18 7 7.15 ...
## $ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
## $ dis : num 4.09 4.97 4.97 6.06 6.06 ...
## $ rad : int 1 2 2 3 3 3 5 5 5 5 ...
## $ tax : num 296 242 242 222 222 222 311 311 311 311 ...
## $ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
## $ black : num 397 397 393 395 397 ...
## $ lstat : num 4.98 9.14 4.03 2.94 5.33 ...
## $ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
## crim zn indus chas
## Min. : 0.00632 Min. : 0.00 Min. : 0.46 Min. :0.00000
## 1st Qu.: 0.08205 1st Qu.: 0.00 1st Qu.: 5.19 1st Qu.:0.00000
## Median : 0.25651 Median : 0.00 Median : 9.69 Median :0.00000
## Mean : 3.61352 Mean : 11.36 Mean :11.14 Mean :0.06917
## 3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10 3rd Qu.:0.00000
## Max. :88.97620 Max. :100.00 Max. :27.74 Max. :1.00000
## nox rm age dis
## Min. :0.3850 Min. :3.561 Min. : 2.90 Min. : 1.130
## 1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02 1st Qu.: 2.100
## Median :0.5380 Median :6.208 Median : 77.50 Median : 3.207
## Mean :0.5547 Mean :6.285 Mean : 68.57 Mean : 3.795
## 3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08 3rd Qu.: 5.188
## Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
## rad tax ptratio black
## Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 0.32
## 1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.:375.38
## Median : 5.000 Median :330.0 Median :19.05 Median :391.44
## Mean : 9.549 Mean :408.2 Mean :18.46 Mean :356.67
## 3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:396.23
## Max. :24.000 Max. :711.0 Max. :22.00 Max. :396.90
## lstat medv
## Min. : 1.73 Min. : 5.00
## 1st Qu.: 6.95 1st Qu.:17.02
## Median :11.36 Median :21.20
## Mean :12.65 Mean :22.53
## 3rd Qu.:16.95 3rd Qu.:25.00
## Max. :37.97 Max. :50.00
# Tampilkan beberapa baris pertama
kable(head(df), caption = "Boston Housing Dataset - 6 Baris Pertama")| crim | zn | indus | chas | nox | rm | age | dis | rad | tax | ptratio | black | lstat | medv |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.00632 | 18 | 2.31 | 0 | 0.538 | 6.575 | 65.2 | 4.0900 | 1 | 296 | 15.3 | 396.90 | 4.98 | 24.0 |
| 0.02731 | 0 | 7.07 | 0 | 0.469 | 6.421 | 78.9 | 4.9671 | 2 | 242 | 17.8 | 396.90 | 9.14 | 21.6 |
| 0.02729 | 0 | 7.07 | 0 | 0.469 | 7.185 | 61.1 | 4.9671 | 2 | 242 | 17.8 | 392.83 | 4.03 | 34.7 |
| 0.03237 | 0 | 2.18 | 0 | 0.458 | 6.998 | 45.8 | 6.0622 | 3 | 222 | 18.7 | 394.63 | 2.94 | 33.4 |
| 0.06905 | 0 | 2.18 | 0 | 0.458 | 7.147 | 54.2 | 6.0622 | 3 | 222 | 18.7 | 396.90 | 5.33 | 36.2 |
| 0.02985 | 0 | 2.18 | 0 | 0.458 | 6.430 | 58.7 | 6.0622 | 3 | 222 | 18.7 | 394.12 | 5.21 | 28.7 |
# Histogram target variable (medv)
p1 <- ggplot(df, aes(x = medv)) +
geom_histogram(bins = 30, fill = "lightblue", alpha = 0.7) +
labs(title = "Distribusi Harga Rumah (medv)", x = "Median Value ($1000s)", y = "Frequency") +
theme_bw()
# Boxplot untuk deteksi outlier
p2 <- ggplot(df, aes(y = medv)) +
geom_boxplot(fill = "lightpink", alpha = 0.7) +
labs(title = "Boxplot Harga Rumah", y = "Median Value ($1000s)") +
theme_bw()
grid.arrange(p1, p2, ncol = 2)# Correlation matrix
cor_matrix <- cor(df)
corrplot(cor_matrix, method = "color", type = "upper",
tl.cex = 0.8, tl.col = "black", title = "Correlation Matrix")# Scatter plot beberapa variabel penting vs medv
p1 <- ggplot(df, aes(x = lstat, y = medv)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE, color = "lightcoral") +
labs(title = "LSTAT vs MEDV", x = "% Lower Status Population", y = "Median Value") +
theme_bw()
p2 <- ggplot(df, aes(x = rm, y = medv)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE, color = "lightcyan") +
labs(title = "RM vs MEDV", x = "Average Rooms per Dwelling", y = "Median Value") +
theme_bw()
p3 <- ggplot(df, aes(x = crim, y = medv)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE, color = "lightgreen") +
labs(title = "CRIM vs MEDV", x = "Crime Rate", y = "Median Value") +
theme_bw()
p4 <- ggplot(df, aes(x = dis, y = medv)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE, color = "lightpink") +
labs(title = "DIS vs MEDV", x = "Distance to Employment Centers", y = "Median Value") +
theme_bw()
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)## Missing values per kolom:
## crim zn indus chas nox rm age dis rad tax
## 0 0 0 0 0 0 0 0 0 0
## ptratio black lstat medv
## 0 0 0 0
# Normalisasi data (scaling)
df_scaled <- df
df_scaled[, -ncol(df_scaled)] <- scale(df_scaled[, -ncol(df_scaled)])
# Split data training dan testing (80:20)
train_index <- createDataPartition(df_scaled$medv, p = 0.8, list = FALSE)
train_data <- df_scaled[train_index, ]
test_data <- df_scaled[-train_index, ]
cat("Ukuran data training:", nrow(train_data), "\n")## Ukuran data training: 407
## Ukuran data testing: 99
# SVR Linear
svr_linear <- svm(medv ~ ., data = train_data,
type = "eps-regression",
kernel = "linear",
cost = 1,
epsilon = 0.1)
# Prediksi
pred_svr_linear_train <- predict(svr_linear, train_data)
pred_svr_linear_test <- predict(svr_linear, test_data)
cat("SVR Linear Model Summary:\n")## SVR Linear Model Summary:
##
## Call:
## svm(formula = medv ~ ., data = train_data, type = "eps-regression",
## kernel = "linear", cost = 1, epsilon = 0.1)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: linear
## cost: 1
## gamma: 0.07692308
## epsilon: 0.1
##
##
## Number of Support Vectors: 307
# SVR dengan RBF kernel
svr_rbf <- svm(medv ~ ., data = train_data,
type = "eps-regression",
kernel = "radial",
cost = 10,
gamma = 0.1,
epsilon = 0.1)
# Prediksi
pred_svr_rbf_train <- predict(svr_rbf, train_data)
pred_svr_rbf_test <- predict(svr_rbf, test_data)
cat("SVR RBF Model Summary:\n")## SVR RBF Model Summary:
##
## Call:
## svm(formula = medv ~ ., data = train_data, type = "eps-regression",
## kernel = "radial", cost = 10, gamma = 0.1, epsilon = 0.1)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 10
## gamma: 0.1
## epsilon: 0.1
##
##
## Number of Support Vectors: 277
# Model OLS untuk perbandingan
ols_model <- lm(medv ~ ., data = train_data)
# Prediksi
pred_ols_train <- predict(ols_model, train_data)
pred_ols_test <- predict(ols_model, test_data)
cat("OLS Model Summary:\n")## OLS Model Summary:
##
## Call:
## lm(formula = medv ~ ., data = train_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.9550 -2.7996 -0.4647 1.7767 25.0993
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.5130 0.2385 94.377 < 2e-16 ***
## crim -0.8073 0.3368 -2.397 0.016999 *
## zn 0.9197 0.3729 2.467 0.014062 *
## indus -0.0891 0.4774 -0.187 0.852059
## chas 0.5817 0.2389 2.435 0.015346 *
## nox -1.9850 0.5032 -3.945 9.45e-05 ***
## rm 2.4586 0.3172 7.751 7.87e-14 ***
## age 0.2765 0.4366 0.633 0.526905
## dis -2.9286 0.4856 -6.031 3.77e-09 ***
## rad 2.8816 0.6716 4.290 2.25e-05 ***
## tax -2.0875 0.7318 -2.852 0.004568 **
## ptratio -2.0798 0.3254 -6.391 4.66e-10 ***
## black 0.8984 0.2679 3.353 0.000877 ***
## lstat -4.0140 0.4226 -9.498 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.801 on 393 degrees of freedom
## Multiple R-squared: 0.7346, Adjusted R-squared: 0.7258
## F-statistic: 83.68 on 13 and 393 DF, p-value: < 2.2e-16
# Fungsi untuk menghitung metrik evaluasi
calculate_metrics <- function(actual, predicted) {
rmse <- sqrt(mean((actual - predicted)^2))
mae <- mean(abs(actual - predicted))
r2 <- 1 - sum((actual - predicted)^2) / sum((actual - mean(actual))^2)
return(c(RMSE = rmse, MAE = mae, R2 = r2))
}# Evaluasi untuk data training
train_metrics <- data.frame(
Model = c("SVR Linear", "SVR RBF", "OLS"),
rbind(
calculate_metrics(train_data$medv, pred_svr_linear_train),
calculate_metrics(train_data$medv, pred_svr_rbf_train),
calculate_metrics(train_data$medv, pred_ols_train)
)
)
# Evaluasi untuk data testing
test_metrics <- data.frame(
Model = c("SVR Linear", "SVR RBF", "OLS"),
rbind(
calculate_metrics(test_data$medv, pred_svr_linear_test),
calculate_metrics(test_data$medv, pred_svr_rbf_test),
calculate_metrics(test_data$medv, pred_ols_test)
)
)
cat("Hasil Evaluasi pada Data Training:\n")## Hasil Evaluasi pada Data Training:
| Model | RMSE | MAE | R2 |
|---|---|---|---|
| SVR Linear | 5.0158 | 3.0860 | 0.7000 |
| SVR RBF | 1.6612 | 1.0852 | 0.9671 |
| OLS | 4.7173 | 3.2781 | 0.7346 |
##
## Hasil Evaluasi pada Data Testing:
| Model | RMSE | MAE | R2 |
|---|---|---|---|
| SVR Linear | 4.6854 | 3.1500 | 0.7470 |
| SVR RBF | 2.6743 | 1.9644 | 0.9176 |
| OLS | 4.5889 | 3.3655 | 0.7573 |
# Buat dataframe untuk plotting
plot_data_test <- data.frame(
Actual = test_data$medv,
SVR_Linear = pred_svr_linear_test,
SVR_RBF = pred_svr_rbf_test,
OLS = pred_ols_test
)
# Reshape data untuk ggplot
plot_data_melted <- melt(plot_data_test, id.vars = "Actual",
variable.name = "Model", value.name = "Predicted")
# Plot Actual vs Predicted
ggplot(plot_data_melted, aes(x = Actual, y = Predicted, color = Model)) +
geom_point(alpha = 0.6, size = 2) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "black") +
facet_wrap(~Model, ncol = 3) +
labs(title = "Actual vs Predicted Values - Test Data",
x = "Actual Values", y = "Predicted Values") +
theme_bw() +
theme(legend.position = "bottom")# Residual plots
residuals_data <- data.frame(
Predicted_SVR_Linear = pred_svr_linear_test,
Residuals_SVR_Linear = test_data$medv - pred_svr_linear_test,
Predicted_SVR_RBF = pred_svr_rbf_test,
Residuals_SVR_RBF = test_data$medv - pred_svr_rbf_test,
Predicted_OLS = pred_ols_test,
Residuals_OLS = test_data$medv - pred_ols_test
)
p1 <- ggplot(residuals_data, aes(x = Predicted_SVR_Linear, y = Residuals_SVR_Linear)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "dashed", color = "lightpink") +
labs(title = "SVR Linear - Residuals", x = "Predicted Values", y = "Residuals") +
theme_bw()
p2 <- ggplot(residuals_data, aes(x = Predicted_SVR_RBF, y = Residuals_SVR_RBF)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "dashed", color = "lightblue") +
labs(title = "SVR RBF - Residuals", x = "Predicted Values", y = "Residuals") +
theme_bw()
p3 <- ggplot(residuals_data, aes(x = Predicted_OLS, y = Residuals_OLS)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "dashed", color = "lightcoral") +
labs(title = "OLS - Residuals", x = "Predicted Values", y = "Residuals") +
theme_bw()
grid.arrange(p1, p2, p3, ncol = 3)# Untuk visualisasi epsilon-tube, kita akan menggunakan subset 1 variabel saja
# Menggunakan lstat (% lower status) sebagai predictor tunggal
# Model SVR 1D
train_1d <- train_data[, c("lstat", "medv")]
test_1d <- test_data[, c("lstat", "medv")]
svr_1d_linear <- svm(medv ~ lstat, data = train_1d,
type = "eps-regression",
kernel = "linear",
cost = 1,
epsilon = 0.5)
svr_1d_rbf <- svm(medv ~ lstat, data = train_1d,
type = "eps-regression",
kernel = "radial",
cost = 10,
gamma = 0.1,
epsilon = 0.5)
# Prediksi untuk plotting
lstat_range <- seq(min(train_1d$lstat), max(train_1d$lstat), length.out = 100)
pred_data <- data.frame(lstat = lstat_range)
pred_1d_linear <- predict(svr_1d_linear, pred_data)
pred_1d_rbf <- predict(svr_1d_rbf, pred_data)
# Plot SVR Linear dengan epsilon-tube
p1 <- ggplot() +
geom_point(data = train_1d, aes(x = lstat, y = medv), alpha = 0.6) +
geom_line(data = data.frame(lstat = lstat_range, pred = pred_1d_linear),
aes(x = lstat, y = pred), color = "lightblue", size = 1) +
geom_ribbon(data = data.frame(lstat = lstat_range, pred = pred_1d_linear),
aes(x = lstat, ymin = pred - 0.5, ymax = pred + 0.5),
alpha = 0.3, fill = "yellow") +
labs(title = "SVR Linear dengan Epsilon-Tube",
x = "LSTAT (scaled)", y = "MEDV",
subtitle = "Zona kuning = epsilon-tube (ε = 0.5)") +
theme_bw()
# Plot SVR RBF dengan epsilon-tube
p2 <- ggplot() +
geom_point(data = train_1d, aes(x = lstat, y = medv), alpha = 0.6) +
geom_line(data = data.frame(lstat = lstat_range, pred = pred_1d_rbf),
aes(x = lstat, y = pred), color = "lightcoral", size = 1) +
geom_ribbon(data = data.frame(lstat = lstat_range, pred = pred_1d_rbf),
aes(x = lstat, ymin = pred - 0.5, ymax = pred + 0.5),
alpha = 0.3, fill = "yellow") +
labs(title = "SVR RBF dengan Epsilon-Tube",
x = "LSTAT (scaled)", y = "MEDV",
subtitle = "Zona kuning = epsilon-tube (ε = 0.5)") +
theme_bw()
grid.arrange(p1, p2, ncol = 2)# Test berbagai nilai epsilon
epsilon_values <- c(0.01, 0.1, 0.5, 1.0, 2.0)
epsilon_results <- data.frame()
for (eps in epsilon_values) {
svr_temp <- svm(medv ~ ., data = train_data,
type = "eps-regression",
kernel = "radial",
cost = 10,
gamma = 0.1,
epsilon = eps)
pred_temp <- predict(svr_temp, test_data)
metrics_temp <- calculate_metrics(test_data$medv, pred_temp)
epsilon_results <- rbind(epsilon_results,
data.frame(Epsilon = eps,
RMSE = metrics_temp[1],
MAE = metrics_temp[2],
R2 = metrics_temp[3]))
}
kable(epsilon_results, digits = 4, caption = "Pengaruh Parameter Epsilon")| Epsilon | RMSE | MAE | R2 | |
|---|---|---|---|---|
| RMSE | 0.01 | 2.7857 | 2.0143 | 0.9106 |
| RMSE1 | 0.10 | 2.6743 | 1.9644 | 0.9176 |
| RMSE2 | 0.50 | 3.5178 | 2.5759 | 0.8574 |
| RMSE3 | 1.00 | 5.5122 | 4.6506 | 0.6498 |
| RMSE4 | 2.00 | 10.7648 | 9.6435 | -0.3356 |
# Plot pengaruh epsilon
p1 <- ggplot(epsilon_results, aes(x = Epsilon, y = RMSE)) +
geom_line(color = "lightblue") + geom_point(color = "lightblue") +
labs(title = "Pengaruh Epsilon terhadap RMSE") + theme_bw()
p2 <- ggplot(epsilon_results, aes(x = Epsilon, y = R2)) +
geom_line(color = "palevioletred1") + geom_point(color = "palevioletred1") +
labs(title = "Pengaruh Epsilon terhadap R²") + theme_bw()
grid.arrange(p1, p2, ncol = 2)# Test berbagai nilai cost
cost_values <- c(0.1, 1, 10, 100, 1000)
cost_results <- data.frame()
for (c_val in cost_values) {
svr_temp <- svm(medv ~ ., data = train_data,
type = "eps-regression",
kernel = "radial",
cost = c_val,
gamma = 0.1,
epsilon = 0.1)
pred_temp <- predict(svr_temp, test_data)
metrics_temp <- calculate_metrics(test_data$medv, pred_temp)
cost_results <- rbind(cost_results,
data.frame(Cost = c_val,
RMSE = metrics_temp[1],
MAE = metrics_temp[2],
R2 = metrics_temp[3]))
}
kable(cost_results, digits = 4, caption = "Pengaruh Parameter Cost")| Cost | RMSE | MAE | R2 | |
|---|---|---|---|---|
| RMSE | 1e-01 | 6.1386 | 3.5381 | 0.5657 |
| RMSE1 | 1e+00 | 3.8619 | 2.3528 | 0.8281 |
| RMSE2 | 1e+01 | 2.6743 | 1.9644 | 0.9176 |
| RMSE3 | 1e+02 | 3.4411 | 2.5219 | 0.8635 |
| RMSE4 | 1e+03 | 4.0788 | 2.9768 | 0.8083 |
# Plot pengaruh cost
p1 <- ggplot(cost_results, aes(x = log10(Cost), y = RMSE)) +
geom_line(color = "lightcyan") + geom_point(color = "lightcyan") +
labs(title = "Pengaruh Cost terhadap RMSE", x = "log10(Cost)") + theme_bw()
p2 <- ggplot(cost_results, aes(x = log10(Cost), y = R2)) +
geom_line(color = "lightpink") + geom_point(color = "lightpink") +
labs(title = "Pengaruh Cost terhadap R²", x = "log10(Cost)") + theme_bw()
grid.arrange(p1, p2, ncol = 2)# Test berbagai nilai gamma
gamma_values <- c(0.001, 0.01, 0.1, 1, 10)
gamma_results <- data.frame()
for (g_val in gamma_values) {
svr_temp <- svm(medv ~ ., data = train_data,
type = "eps-regression",
kernel = "radial",
cost = 10,
gamma = g_val,
epsilon = 0.1)
pred_temp <- predict(svr_temp, test_data)
metrics_temp <- calculate_metrics(test_data$medv, pred_temp)
gamma_results <- rbind(gamma_results,
data.frame(Gamma = g_val,
RMSE = metrics_temp[1],
MAE = metrics_temp[2],
R2 = metrics_temp[3]))
}
kable(gamma_results, digits = 4, caption = "Pengaruh Parameter Gamma")| Gamma | RMSE | MAE | R2 | |
|---|---|---|---|---|
| RMSE | 1e-03 | 4.6989 | 3.1067 | 0.7455 |
| RMSE1 | 1e-02 | 3.7887 | 2.3406 | 0.8346 |
| RMSE2 | 1e-01 | 2.6743 | 1.9644 | 0.9176 |
| RMSE3 | 1e+00 | 4.7893 | 3.3144 | 0.7356 |
| RMSE4 | 1e+01 | 8.7717 | 6.2238 | 0.1132 |
# Plot pengaruh gamma
p1 <- ggplot(gamma_results, aes(x = log10(Gamma), y = RMSE)) +
geom_line(color = "lightblue") + geom_point(color = "lightblue") +
labs(title = "Pengaruh Gamma terhadap RMSE", x = "log10(Gamma)") + theme_bw()
p2 <- ggplot(gamma_results, aes(x = log10(Gamma), y = R2)) +
geom_line(color = "lightpink") + geom_point(color = "lightpink") +
labs(title = "Pengaruh Gamma terhadap R²", x = "log10(Gamma)") + theme_bw()
grid.arrange(p1, p2, ncol = 2)# Grid search untuk parameter optimal
tune_result <- tune(svm, medv ~ ., data = train_data,
type = "eps-regression",
kernel = "radial",
ranges = list(cost = c(1, 10, 100),
gamma = c(0.01, 0.1, 1),
epsilon = c(0.01, 0.1, 0.5)))
cat("Parameter Optimal:\n")## Parameter Optimal:
## cost gamma epsilon
## 14 10 0.1 0.1
##
## Performance Terbaik:
## [1] 13.13684
# Model dengan parameter optimal
svr_optimal <- tune_result$best.model
pred_optimal_test <- predict(svr_optimal, test_data)
metrics_optimal <- calculate_metrics(test_data$medv, pred_optimal_test)
cat("\nMetrik Model Optimal:\n")##
## Metrik Model Optimal:
## RMSE MAE R2
## 2.6742851 1.9643708 0.9175722
Berdasarkan hasil analisis diatas, dapat disimpulkan:
SVR sangat berguna untuk: - Prediksi harga properti dengan akurasi tinggi - Menangani data dengan outlier - Modeling hubungan nonlinear antara fitur dan target
Untuk dataset Boston Housing: 1. Gunakan SVR RBF untuk akurasi maksimal 2. Lakukan parameter tuning untuk hasil optimal 3. Pertimbangkan preprocessing data untuk meningkatkan performa 4. Monitor overfitting melalui validasi silang
# Summary akhir semua model
final_comparison <- data.frame(
Model = c("SVR Linear", "SVR RBF", "SVR Optimal", "OLS"),
RMSE = c(test_metrics$RMSE[1], test_metrics$RMSE[2], metrics_optimal[1], test_metrics$RMSE[3]),
MAE = c(test_metrics$MAE[1], test_metrics$MAE[2], metrics_optimal[2], test_metrics$MAE[3]),
R2 = c(test_metrics$R2[1], test_metrics$R2[2], metrics_optimal[3], test_metrics$R2[3])
)
kable(final_comparison, digits = 4, caption = "Ringkasan Perbandingan Semua Model")| Model | RMSE | MAE | R2 |
|---|---|---|---|
| SVR Linear | 4.6854 | 3.1500 | 0.7470 |
| SVR RBF | 2.6743 | 1.9644 | 0.9176 |
| SVR Optimal | 2.6743 | 1.9644 | 0.9176 |
| OLS | 4.5889 | 3.3655 | 0.7573 |
# Best model visualization
best_model_name <- final_comparison$Model[which.max(final_comparison$R2)]
cat("Model terbaik berdasarkan R²:", best_model_name)## Model terbaik berdasarkan R²: SVR RBF
Catatan: Analisis ini menunjukkan bahwa SVR efektif dalam menangani masalah regresi dengan data yang rumit. Untuk mendapatkan hasil terbaik, pemilihan kernel dan parameter penyesuaian yang tepat sangat penting.