We have seen that we can fit an SVM with a non-linear kernel in order to perform classification using a non-linear decision boundary. We will now see that we can also obtain a non-linear decision boundary by performing logistic regression using non-linear transformations of the features. (a) Generate a data set with n = 500 and p = 2, such that the observations belong to two classes with a quadratic decision boundary between them. For instance, you can do this as follows:
set.seed(123)
x1=runif (500) -0.5
x2=runif (500) -0.5
y=1*(x1^2-x2^2 > 0)
plot(x1[y == 0], x2[y == 0], col = "red", xlab = "X1", ylab = "X2", pch = "+")
points(x1[y == 1], x2[y == 1], col = "darkgreen", pch = 4)
lm.fit = glm(y ~ x1 + x2, family = binomial)
summary(lm.fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.227 -1.200 1.133 1.157 1.188
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.04792 0.08949 0.535 0.592
## x1 -0.03999 0.31516 -0.127 0.899
## x2 0.11509 0.30829 0.373 0.709
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.86 on 499 degrees of freedom
## Residual deviance: 692.71 on 497 degrees of freedom
## AIC: 698.71
##
## Number of Fisher Scoring iterations: 3
data = data.frame(x1 = x1, x2 = x2, y = y)
lm.prob = predict(lm.fit, data, type = "response")
lm.pred = ifelse(lm.prob > 0.52, 1, 0)
data.pos = data[lm.pred == 1, ]
data.neg = data[lm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "red", xlab = "X1", ylab = "X2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "purple", pch = 4)
lm.fit = glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = data, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
lm.prob = predict(lm.fit, data, type = "response")
lm.pred = ifelse(lm.prob > 0.5, 1, 0)
data.pos = data[lm.pred == 1, ]
data.neg = data[lm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "red", xlab = "X1", ylab = "X2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "darkgreen", pch = 4)
library(e1071)
## Warning: package 'e1071' was built under R version 4.1.3
svm.fit = svm(as.factor(y) ~ x1 + x2, data, kernel = "linear", cost = 0.1)
svm.pred = predict(svm.fit, data)
data.pos = data[svm.pred == 1, ]
data.neg = data[svm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "purple", xlab = "X1", ylab = "X2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "darkgreen", pch = 4)
svm.fit = svm(as.factor(y) ~ x1 + x2, data, gamma = 1)
svm.pred = predict(svm.fit, data)
data.pos = data[svm.pred == 1, ]
data.neg = data[svm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "darkgreen", xlab = "X1", ylab = "X2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "purple", pch = 4)
this shows that SVMS are good to find non linerar models, but using cross validation would be easier with gamma as a parameter
library(ISLR)
gas_m = median(Auto$mpg)
new_v = ifelse(Auto$mpg > gas_m, 1, 0)
Auto$mpglevel = as.factor(new_v)
set.seed(123)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.07634615 0.03928191
## 2 1e-01 0.04333333 0.03191738
## 3 1e+00 0.01025641 0.01792836
## 4 5e+00 0.01538462 0.01792836
## 5 1e+01 0.01788462 0.01727588
## 6 1e+02 0.03320513 0.02720447
set.seed(123)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1, 1, 5, 10), degree = c(2, 3, 4)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 2
##
## - best performance: 0.5714744
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 2 0.5817308 0.04740051
## 2 1.0 2 0.5817308 0.04740051
## 3 5.0 2 0.5817308 0.04740051
## 4 10.0 2 0.5714744 0.04575370
## 5 0.1 3 0.5817308 0.04740051
## 6 1.0 3 0.5817308 0.04740051
## 7 5.0 3 0.5817308 0.04740051
## 8 10.0 3 0.5817308 0.04740051
## 9 0.1 4 0.5817308 0.04740051
## 10 1.0 4 0.5817308 0.04740051
## 11 5.0 4 0.5817308 0.04740051
## 12 10.0 4 0.5817308 0.04740051
set.seed(123)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.1, 1, 5, 10), gamma = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.01
##
## - best performance: 0.02032051
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 1e-02 0.08916667 0.04345384
## 2 1.0 1e-02 0.07378205 0.04185248
## 3 5.0 1e-02 0.04589744 0.03136327
## 4 10.0 1e-02 0.02032051 0.02305327
## 5 0.1 1e-01 0.07634615 0.03928191
## 6 1.0 1e-01 0.05852564 0.03960325
## 7 5.0 1e-01 0.03057692 0.02611396
## 8 10.0 1e-01 0.03314103 0.02942215
## 9 0.1 1e+00 0.58173077 0.04740051
## 10 1.0 1e+00 0.05865385 0.04942437
## 11 5.0 1e+00 0.05608974 0.04595880
## 12 10.0 1e+00 0.05608974 0.04595880
## 13 0.1 5e+00 0.58173077 0.04740051
## 14 1.0 5e+00 0.51544872 0.06790600
## 15 5.0 5e+00 0.51544872 0.06790600
## 16 10.0 5e+00 0.51544872 0.06790600
## 17 0.1 1e+01 0.58173077 0.04740051
## 18 1.0 1e+01 0.54602564 0.06355090
## 19 5.0 1e+01 0.54102564 0.06959451
## 20 10.0 1e+01 0.54102564 0.06959451
## 21 0.1 1e+02 0.58173077 0.04740051
## 22 1.0 1e+02 0.58173077 0.04740051
## 23 5.0 1e+02 0.58173077 0.04740051
## 24 10.0 1e+02 0.58173077 0.04740051
svm.linear = svm(mpglevel ~ ., data = Auto, kernel = "linear", cost = 1)
svm.poly = svm(mpglevel ~ ., data = Auto, kernel = "polynomial", cost = 10,
degree = 2)
svm.radial = svm(mpglevel ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.01)
plotpairs = function(fit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpglevel", "name"))]) {
plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
}
}
plotpairs(svm.linear)
library(ISLR)
library(e1071)
set.seed(456)
train = sample(dim(OJ)[1], 800)
OJ.train = OJ[train, ]
OJ.test = OJ[-train, ]
svm.linear = svm(Purchase ~ ., kernel = "linear", data = OJ.train, cost = 0.01)
summary(svm.linear)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 439
##
## ( 219 220 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
the output of svc creates 439 support vectors out of the 800 trainning points. out of these 219 belng to CH level 220 in the MM level
train.pred = predict(svm.linear, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 441 52
## MM 84 223
test.pred = predict(svm.linear, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 140 20
## MM 25 85
the test error rate are 17% and 16.67%
set.seed(456)
tune.out = tune(svm, Purchase ~ ., data = OJ.train, kernel = "linear", ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.03162278
##
## - best performance: 0.17125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.17375 0.05285265
## 2 0.01778279 0.17250 0.05296750
## 3 0.03162278 0.17125 0.05138701
## 4 0.05623413 0.17375 0.04803428
## 5 0.10000000 0.17375 0.05185785
## 6 0.17782794 0.17625 0.05285265
## 7 0.31622777 0.17625 0.05285265
## 8 0.56234133 0.17500 0.05204165
## 9 1.00000000 0.17500 0.04859127
## 10 1.77827941 0.17500 0.04859127
## 11 3.16227766 0.17625 0.04803428
## 12 5.62341325 0.17625 0.04581439
## 13 10.00000000 0.17625 0.04543387
the best parameter for cost is 0.03162278 with a perfomnace of .17125
svm.linear = svm(Purchase ~ ., kernel = "linear", data = OJ.train, cost = tune.out$best.parameters$cost)
train.pred = predict(svm.linear, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 439 54
## MM 80 227
test.pred = predict(svm.linear, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 139 21
## MM 22 88
the test error rates are 16.75% and 15.92%
set.seed(456)
svm.radial = svm(Purchase ~ ., data = OJ.train, kernel = "radial")
summary(svm.radial)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 372
##
## ( 189 183 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred = predict(svm.radial, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 453 40
## MM 77 230
test.pred = predict(svm.radial, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 139 21
## MM 32 78
set.seed(456)
tune.out = tune(svm, Purchase ~ ., data = OJ.train, kernel = "radial", ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.5623413
##
## - best performance: 0.16875
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.38375 0.03775377
## 2 0.01778279 0.38375 0.03775377
## 3 0.03162278 0.38000 0.04257347
## 4 0.05623413 0.21500 0.03162278
## 5 0.10000000 0.18625 0.04693746
## 6 0.17782794 0.18375 0.04788949
## 7 0.31622777 0.17125 0.04210189
## 8 0.56234133 0.16875 0.04535738
## 9 1.00000000 0.17375 0.04143687
## 10 1.77827941 0.17625 0.03653860
## 11 3.16227766 0.18250 0.04090979
## 12 5.62341325 0.19000 0.03717451
## 13 10.00000000 0.19125 0.03438447
svm.radial = svm(Purchase ~ ., data = OJ.train, kernel = "radial", cost = tune.out$best.parameters$cost)
train.pred = predict(svm.radial, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 453 40
## MM 76 231
test.pred = predict(svm.radial, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 139 21
## MM 29 81
the radial basis kernel gives us 372 support vectors with 189 beloning to the CH level and 183 to the MM level the training errors are 14.63% and 19.62% the best cost s .5623413 wiht a perfomance of .16875 the training errors for that are 18.52% and 14.5%
set.seed(456)
svm.poly = svm(Purchase ~ ., data = OJ.train, kernel = "poly", degree = 2)
summary(svm.poly)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "poly", degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 452
##
## ( 228 224 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred = predict(svm.poly, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 463 30
## MM 105 202
test.pred = predict(svm.poly, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 144 16
## MM 42 68
it produces 452 support vectors with 228 at the ch level and 224 at MM levels the training errors are 16.87% and 21.48%
radial basis kernel has the best results