library(e1071)
set.seed(123)
x = rnorm(100)
y = 4 * x^2 + 1 + rnorm(100)
class = sample(100, 50)
y[class] = y[class] + 3
y[-class] = y[-class] - 3
plot(x[class], y[class], col = "red", xlab = "X", ylab = "Y", ylim = c(-6, 30))
points(x[-class], y[-class], col = "blue")
# On training data:
z = rep(-1, 100)
z[class] = 1
data = data.frame(x = x, y = y, z = as.factor(z))
train = sample(100, 50)
data.train = data[train, ]
data.test = data[-train, ]
svm.linear = svm(z ~ ., data = data.train, kernel = "linear", cost = 10)
plot(svm.linear, data.train)
# Making preditction:
table(predict = predict(svm.linear, data.train), truth = data.train$z)
## truth
## predict -1 1
## -1 18 1
## 1 6 25
# On testing data:
plot(svm.linear, data.test)
# Making prediction:
table(predict = predict(svm.linear, data.test), truth = data.test$z)
## truth
## predict -1 1
## -1 20 2
## 1 6 22
# On training data:
svm.poly = svm(z ~ ., data = data.train, kernel = "polynomial", cost = 10)
plot(svm.poly, data.train)
# Making prediction:
table(predict = predict(svm.poly, data.train), truth = data.train$z)
## truth
## predict -1 1
## -1 18 0
## 1 6 26
# On testing data:
plot(svm.poly, data.test)
# Making predictions:
table(predict = predict(svm.poly, data.test), truth = data.test$z)
## truth
## predict -1 1
## -1 16 0
## 1 10 24
# On training data:
svm.radial = svm(z ~ ., data = data.train, kernel = "radial", gamma = 1, cost = 10)
plot(svm.radial, data.train)
# Making prediction:
table(predict = predict(svm.radial, data.train), truth = data.train$z)
## truth
## predict -1 1
## -1 24 0
## 1 0 26
# On testing data:
plot(svm.radial, data.test)
# Making predictions:
table(predict = predict(svm.radial, data.test), truth = data.test$z)
## truth
## predict -1 1
## -1 25 0
## 1 1 24
From the above results, we can clearly see that on the training data, linear, polynomial and radial kernel has respective training error of 7, 6 and 0. This shows radial kernel outperforms both linear and polynomial kernels.
Similarly, on testing data, linear, polynomial and radial kernel has respective testing error of 8, 10 and 1, which indicates that radial kernel is the best technique to outperform others on test data. All of these statements are evidenced by the plot, and truth table presented above.
library(ISLR)
library(e1071)
set.seed(1)
# Creating binary mpg variable
data(Auto)
Auto$high_mpg <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
Auto$high_mpg <- factor(Auto$high_mpg, levels=c(0,1),
labels=c("Low","High"))
# Dropping mpg from predictors:
predictors <- subset(Auto, select = -c(mpg, name, high_mpg))
y <- Auto$high_mpg
# Support Vector Classifier:
cost_grid <- 10^seq(-2, 2, by=1)
tune_lin <- tune(svm, high_mpg~., data=Auto,
kernel="linear",
ranges=list(cost=cost_grid),
tunecontrol = tune.control(cross=10))
print(tune_lin) # CV error for each cost
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
best_lin <- tune_lin$best.model
# Plotting CV error vs cost
plot(best_lin, data=Auto, horsepower~weight)
From the above results, the cost of 1 seems to be the best in achieving the lowest cross validation error.
# Radial kernels:
gamma_grid <- 10^seq(-3, 0, by=1)
tune_rbf <- tune(svm, high_mpg~., data=Auto,
kernel="radial",
ranges=list(cost=cost_grid, gamma=gamma_grid),
tunecontrol = tune.control(cross=10))
print(tune_rbf)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 100 0.01
##
## - best performance: 0.01019231
# Polynomial Kernel:
degree_grid <- 2:4
tune_poly <- tune(svm, high_mpg~., data=Auto,
kernel="polynomial",
ranges=list(cost=cost_grid, degree=degree_grid),
tunecontrol = tune.control(cross=10))
print(tune_poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 100 2
##
## - best performance: 0.3167308
# Plotting CV error surfaces
par(mfrow=c(1,2))
plot(tune_rbf, main="RBF SVM: CV Error")
plot(tune_poly, main="Poly SVM: CV Error")
For a radial kernel, the lowest cross-validation error is obtained for a gamma of 0.01 and a cost of 100. For a polynomial kernel, the lowest cross-validation error is obtained for a degree of 2 and a cost of 100. This suggest that the linear kernel is good enough for this dataset since the relationship is not complex enough to require non-linear kernels like radial and polynomial.
# 2D decision‐boundary plots for the best models
# picking two features, e.g., horsepower vs. weight:
best_rbf <- tune_rbf$best.model
best_poly <- tune_poly$best.model
par(mfrow=c(1,2), mar=c(4,4,2,1))
plot(best_rbf, data=Auto, horsepower~weight,
slice=list(acceleration=median(Auto$acceleration),
displacement=median(Auto$displacement),
year=median(Auto$year),
origin=median(Auto$origin)))
title("RBF SVM Decision Boundary")
plot(best_poly, data=Auto, horsepower~weight,
slice=list(acceleration=median(Auto$acceleration),
displacement=median(Auto$displacement),
year=median(Auto$year),
origin=median(Auto$origin)))
title("Poly SVM Decision Boundary")
set.seed(222)
# splitting into train (800) and test (remaining):
n <- nrow(OJ)
train_idx <- sample(1:n, 800)
OJ.train <- OJ[train_idx, ]
OJ.test <- OJ[-train_idx, ]
# For the sake of computing error:
err_rate <- function(model, data) {
preds <- predict(model, data)
mean(preds != data$Purchase)
}
# Linear SVC with cost=0.01:
svc_lin01 <- svm(Purchase ~ ., data=OJ.train,
kernel="linear", cost=0.01, scale=TRUE)
summary(svc_lin01)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "linear", cost = 0.01,
## scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 431
##
## ( 216 215 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
With the cost of 0.01 and linear SVM kernel, 431 support vectors are used to separate the CH and MM classes with hyper-plane. Out of 435, 216 belongs to class CH (Citrus Hills) and 215 belongs to MM (Minute Maid).
train_err_lin01 <- err_rate(svc_lin01, OJ.train)
test_err_lin01 <- err_rate(svc_lin01, OJ.test)
cat("Linear SVC (C=0.01):\n",
" Train error =", round(train_err_lin01,3), "\n",
" Test error =", round(test_err_lin01,3), "\n\n")
## Linear SVC (C=0.01):
## Train error = 0.16
## Test error = 0.178
With linear kernel, train error and test error are 16% and 17.8% respectively.
tune_lin <- tune(svm, Purchase ~ ., data=OJ.train,
kernel="linear",
ranges=list(cost=c(0.01,0.1,1,5,10)),
tunecontrol = tune.control(cross=10))
best_cost_lin <- tune_lin$best.parameters$cost
cat("Best linear cost:", best_cost_lin, "\n\n")
## Best linear cost: 0.1
svc_lin_best <- svm(Purchase ~ ., data=OJ.train,
kernel="linear", cost=best_cost_lin)
cat("Linear SVC (C=", best_cost_lin, "):\n",
" Train error =", round(err_rate(svc_lin_best, OJ.train),3), "\n",
" Test error =", round(err_rate(svc_lin_best, OJ.test),3), "\n\n")
## Linear SVC (C= 0.1 ):
## Train error = 0.16
## Test error = 0.181
With optimal cost value and linear kernel, train error and test error are 16% and 18.1% respectively.
library(e1071)
set.seed(1)
# RBF SVM with default gamma and cost = 0.01:
svm_rbf_001 <- svm(Purchase ~ ., data = OJ.train,
kernel = "radial", cost = 0.01)
# train & test error:
train_err_001 <- mean(predict(svm_rbf_001, OJ.train) != OJ.train$Purchase)
test_err_001 <- mean(predict(svm_rbf_001, OJ.test ) != OJ.test$Purchase)
cat("RBF SVM (cost=0.01):\n",
sprintf(" Train error = %.3f\n", train_err_001),
sprintf(" Test error = %.3f\n\n", test_err_001))
## RBF SVM (cost=0.01):
## Train error = 0.384
## Test error = 0.407
# Tuning cost (gamma left at its default = 1/(#features)):
cost.grid <- c(0.01, 0.1, 1, 5, 10)
tune_rbf <- tune(svm, Purchase ~ ., data = OJ.train,
kernel = "radial",
ranges = list(cost = cost.grid),
tunecontrol = tune.control(cross = 10))
best_cost_rbf <- tune_rbf$best.parameters$cost
cat("Optimal cost (RBF):", best_cost_rbf, "\n\n")
## Optimal cost (RBF): 1
# Refitting at optimal cost and recompute errors:
svm_rbf_best <- svm(Purchase ~ ., data = OJ.train,
kernel = "radial", cost = best_cost_rbf)
train_err_best <- mean(predict(svm_rbf_best, OJ.train) != OJ.train$Purchase)
test_err_best <- mean(predict(svm_rbf_best, OJ.test ) != OJ.test$Purchase)
cat(sprintf("RBF SVM (cost=%.2f) final:\n", best_cost_rbf),
sprintf(" Train error = %.3f\n", train_err_best),
sprintf(" Test error = %.3f\n", test_err_best))
## RBF SVM (cost=1.00) final:
## Train error = 0.145
## Test error = 0.193
With radial kernel, train error and test error are 38.4% and 40.7% respectively at cost = 0.01. However after finding optimal cost value of 1, train error and test error are 14.5% and 19.3% respectively.
library(e1071)
set.seed(1)
# Poly-2 SVM with cost = 0.01:
svm_poly_001 <- svm(Purchase ~ ., data = OJ.train,
kernel = "polynomial",
degree = 2,
cost = 0.01,
scale = TRUE)
# train & test error:
train_err_p001 <- mean(predict(svm_poly_001, OJ.train) != OJ.train$Purchase)
test_err_p001 <- mean(predict(svm_poly_001, OJ.test ) != OJ.test$Purchase)
cat("Poly-2 SVM (cost=0.01):\n",
sprintf(" Train error = %.3f\n", train_err_p001),
sprintf(" Test error = %.3f\n\n", test_err_p001))
## Poly-2 SVM (cost=0.01):
## Train error = 0.360
## Test error = 0.396
# Tuning cost for Poly-2 kernel:
cost.grid <- c(0.01, 0.1, 1, 5, 10)
tune_poly <- tune(svm, Purchase ~ ., data = OJ.train,
kernel = "polynomial",
degree = 2,
ranges = list(cost = cost.grid),
tunecontrol = tune.control(cross = 10))
best_cost_poly <- tune_poly$best.parameters$cost
cat("Optimal cost (Poly-2):", best_cost_poly, "\n\n")
## Optimal cost (Poly-2): 5
# Refitting at optimal cost and recompute errors:
svm_poly_best <- svm(Purchase ~ ., data = OJ.train,
kernel = "polynomial",
degree = 2,
cost = best_cost_poly,
scale = TRUE)
train_err_pbest <- mean(predict(svm_poly_best, OJ.train) != OJ.train$Purchase)
test_err_pbest <- mean(predict(svm_poly_best, OJ.test ) != OJ.test$Purchase)
cat(sprintf("Poly-2 SVM (cost=%.2f) final:\n", best_cost_poly),
sprintf(" Train error = %.3f\n", train_err_pbest),
sprintf(" Test error = %.3f\n", test_err_pbest))
## Poly-2 SVM (cost=5.00) final:
## Train error = 0.145
## Test error = 0.207
With polynomial kernel, train error and test error are 36% and 39.6% respectively at cost = 0.01. However after finding optimal cost value of 5, train error and test error are 14.5% and 20.7% respectively.
Kernel | Training & Test Error Rate at (C = 0.01) | Training & Test Error Rate at Optimal Cost |
---|---|---|
|
16% & 17.8% | 16% & 18.1% at (C=0.1) |
|
38.4% & 40.7% | 14.5% & 19.3% (C=1) |
|
36% & 39.6% | 14.5% & 20.7% (C=5) |
From the above table, we can conclude that Radial Kernel technique is the best one to achieve lowest possible training test error rate at optimal value of cost.