library(e1071)
set.seed(1)
#Simulate 100 points
n <- 100
X1 <- runif(n, -2, 2)
X2 <- runif(n, -2, 2)
Y <- factor(ifelse(X1^2 + X2^2 > 1, "Class1", "Class2"))
dat <- data.frame(X1, X2, Y)
train_id <- sample(seq_len(n), size = 70)
train <- dat[train_id, ]
test <- dat[-train_id, ]
#Fit linear SVC
svm_lin <- svm(Y ~ X1 + X2,
data = train,
kernel = "linear",
cost = 1,
scale = FALSE)
#Fit polynomial SVM
svm_poly2 <- svm(Y ~ X1 + X2,
data = train,
kernel = "polynomial",
degree = 2,
cost = 1,
scale = FALSE)
# Fit RBF SVM
svm_rbf <- svm(Y ~ X1 + X2,
data = train,
kernel = "radial",
gamma = 1,
cost = 1,
scale = FALSE)
# 5) Compute errors
err <- function(model, df) mean(predict(model, df) != df$Y)
cat("Linear SVC: train err =", err(svm_lin, train),
" test err =", err(svm_lin, test), "\n")
## Linear SVC: train err = 0.2285714 test err = 0.3
cat("Poly deg=2: train err =", err(svm_poly2, train),
" test err =", err(svm_poly2, test), "\n")
## Poly deg=2: train err = 0 test err = 0.03333333
cat("RBF kernel: train err =", err(svm_rbf, train),
" test err =", err(svm_rbf, test), "\n")
## RBF kernel: train err = 0 test err = 0.1
The polynomial kernel performs the best with the RBF being close second, both clearly outperform the linear method as expected.
par(mfrow=c(2,2), mar=c(4,4,2,1))
#Linear SVC
plot(svm_lin, train, X1 ~ X2)
title(main="Linear SVC (linear kernel)")
#Poly SVM
plot(svm_poly2, train, X1 ~ X2)
title(main="Polynomial SVM (deg=2)")
#RBF SVM
plot(svm_rbf, train, X1 ~ X2)
title(main="RBF SVM (radial kernel)")
library(ISLR2)
data(Auto)
#Create Binary Variable
mpg_med <- median(Auto$mpg)
Auto$mpg01 <- ifelse(Auto$mpg > mpg_med, 1, 0)
Auto$mpg01 <- factor(Auto$mpg01, levels = c(0, 1))
#Check
table(Auto$mpg01)
##
## 0 1
## 196 196
set.seed(42)
vars <- setdiff(names(Auto), c("mpg", "mpg01"))
#Linear SVM
svm_tune_lin <- tune(
svm,
mpg01 ~ .,
data = Auto[, c(vars, "mpg01")],
kernel = "linear",
ranges = list(cost = c(0.01, 0.1, 1, 10, 100))
)
summary(svm_tune_lin)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.01
##
## - best performance: 0.08916667
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.08916667 0.05258186
## 2 1e-01 0.09160256 0.05869690
## 3 1e+00 0.09173077 0.04357345
## 4 1e+01 0.11705128 0.05314992
## 5 1e+02 0.12993590 0.05797340
-It seems that error is lowest at the linear SVC that has a cost of 0.01 and it can also be observed that CV error increases as cost increases.
#RBF kernel
svm_tune_rbf <- tune(
svm,
mpg01 ~ .,
data = Auto[, c(vars, "mpg01")],
kernel = "radial",
ranges = list(
cost = c(0.1, 1, 10),
gamma = c(0.01, 0.1, 1)
)
)
summary(svm_tune_rbf)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.1
##
## - best performance: 0.07865385
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 0.01 0.11474359 0.04974917
## 2 1.0 0.01 0.08653846 0.05507766
## 3 10.0 0.01 0.08403846 0.05237828
## 4 0.1 0.10 0.08910256 0.05383683
## 5 1.0 0.10 0.08910256 0.05105094
## 6 10.0 0.10 0.07865385 0.06205126
## 7 0.1 1.00 0.58429487 0.04703306
## 8 1.0 1.00 0.07884615 0.05530928
## 9 10.0 1.00 0.08141026 0.05580238
#Polynomial kernel (degree = 2)
svm_tune_poly2 <- tune(
svm,
mpg01 ~ .,
data = Auto[, c(vars, "mpg01")],
kernel = "polynomial",
degree = 2,
ranges = list(cost = c(0.1, 1, 10))
)
summary(svm_tune_poly2)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.5662821
##
## - Detailed performance results:
## cost error dispersion
## 1 0.1 0.5841026 0.05435320
## 2 1.0 0.5841026 0.05435320
## 3 10.0 0.5662821 0.05808838
From the Radial SVM we can observe the best results yielded by a cost of 10 and a gamma of .1. The polynomial SVM’s best results can be observed when cost is also ten. Though cause doesn’t necessarily mean correlation, we can state that the best results yielded by the two are when cost is high and that Radial SVM yields a much better result.
svm_tune_lin$performances$cost <- as.numeric(as.character(svm_tune_lin$performances$cost))
svm_tune_poly2$performances$cost <- as.numeric(as.character(svm_tune_poly2$performances$cost))
#Open a new, kept getting unknown error without this
graphics.off()
if (.Platform$OS.type=="windows") windows(12,4) else dev.new(width=12, height=4)
#Very slim margins for three panels, kept getting margins are too large error
par(mfrow=c(1,3), mar=c(2,2,1,1), oma=c(0,0,0,0), mgp=c(1,0.5,0))
#Grab best models
best_lin <- svm_tune_lin$best.model
best_rbf <- svm_tune_rbf$best.model
best_poly <- svm_tune_poly2$best.model
#Decision–boundary plots on the training data
plot(best_lin, Auto, horsepower ~ weight,
main="Linear SVC\n(cost = 0.1)")
plot(best_rbf, Auto, horsepower ~ weight,
main="RBF SVM\n(cost = 1, γ = 0.1)")
plot(best_poly, Auto, horsepower ~ weight,
main="Poly SVM deg=2\n(cost = 1)")
if (.Platform$OS.type=="windows") windows(12,4) else dev.new(width=12, height=4)
par(mfrow=c(1,3), mar=c(2,2,1,1), oma=c(0,0,0,0), mgp=c(1,0.5,0))
#CV-error vs parameter plots
plot(svm_tune_lin); title("Linear SVC\nCV error vs cost")
plot(svm_tune_rbf); title("RBF SVM\nCV error vs cost & γ")
plot(svm_tune_poly2);title("Poly SVM deg=2\nCV error vs cost")
library(ISLR2)
library(e1071)
set.seed(123)
#a.) Create training set
train_idx <- sample(seq_len(nrow(OJ)), 800)
oj_train <- OJ[train_idx, ]
oj_test <- OJ[-train_idx, ]
err_rate <- function(model, data) {
mean(predict(model, data) != data$Purchase)
}
#b.) Fit a support vector classifier
svm_lin_OJ <- svm(Purchase ~ ., data=oj_train,
kernel="linear", cost=0.01)
summary(svm_lin_OJ)
##
## Call:
## svm(formula = Purchase ~ ., data = oj_train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 442
##
## ( 220 222 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
#c.) Training and test error rates
cat("Linear SVC (cost=0.01):\n",
" train error =", err_rate(svm_lin_OJ, oj_train), "\n",
" test error =", err_rate(svm_lin_OJ, oj_test), "\n\n")
## Linear SVC (cost=0.01):
## train error = 0.165
## test error = 0.1777778
#d.) Tune cost
tune_lin_OJ <- tune(svm, Purchase~., data=oj_train,
kernel="linear",
ranges=list(cost=c(0.01,0.1,1,5,10)))
summary(tune_lin_OJ)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 5
##
## - best performance: 0.16625
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17625 0.03143004
## 2 0.10 0.17250 0.03425801
## 3 1.00 0.16875 0.03596391
## 4 5.00 0.16625 0.03007514
## 5 10.00 0.17250 0.02751262
#e.) Errors using the best cost
best_lin_OJ <- tune_lin_OJ$best.model
cat("Linear SVC (best cost =", best_lin_OJ$cost, "):\n",
" train error =", err_rate(best_lin_OJ, oj_train), "\n",
" test error =", err_rate(best_lin_OJ, oj_test), "\n\n")
## Linear SVC (best cost = 5 ):
## train error = 0.16375
## test error = 0.162963
#f.) Repeat for RBF kernel
tune_rbf_OJ <- tune(svm, Purchase~., data=oj_train,
kernel="radial",
ranges=list(cost=c(0.01,0.1,1,5,10)))
best_rbf_OJ <- tune_rbf_OJ$best.model
cat("RBF SVM (best cost =", best_rbf_OJ$cost, "):\n",
" train error =", err_rate(best_rbf_OJ, oj_train), "\n",
" test error =", err_rate(best_rbf_OJ, oj_test), "\n\n")
## RBF SVM (best cost = 1 ):
## train error = 0.13875
## test error = 0.1888889
#g.) Repeat for polynomial kernel
tune_poly_OJ <- tune(svm, Purchase~., data=oj_train,
kernel="polynomial", degree=2,
ranges=list(cost=c(0.01,0.1,1,5,10)))
best_poly_OJ <- tune_poly_OJ$best.model
cat("Poly SVM deg=2 (best cost =", best_poly_OJ$cost, "):\n",
" train error =", err_rate(best_poly_OJ, oj_train), "\n",
" test error =", err_rate(best_poly_OJ, oj_test), "\n\n")
## Poly SVM deg=2 (best cost = 5 ):
## train error = 0.14625
## test error = 0.2037037
H.) It can be seen that the linear SVC with a cost of 5 yields the lowest test set error. It seems that while RBF and polynomial kernels drive training error down to near zeros, they actually end up generalizing worse than that of the linear method. Thus on this data, the linear SVC is the top performer.