##5. We have seen that we can fit an SVM with a non-linear kernel in order to perform classification using a non-linear decision boundary. We will now see that we can also obtain a non-linear decision boundary by performing logistic regression using non-linear transformations of the features.
x1 <- runif(500) - 0.5
x2 <- runif(500) - 0.5
y <- 1 * (x1^2 - x2^2 > 0)
plot(x1[y == 0], x2[y == 0], col = "red", xlab = "X1", ylab = "X2", pch = "+")
points(x1[y == 1], x2[y == 1], col = "blue", pch = 4)
## (c) Fit a logistic regression model to the data, using X1 and X2 as
predictors.
log_reg_fit = glm(y ~ x1 + x2, family = binomial)
summary(log_reg_fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.08536 0.09009 -0.947 0.343
## x1 0.47083 0.31875 1.477 0.140
## x2 -0.20178 0.31656 -0.637 0.524
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 689.53 on 497 degrees of freedom
## AIC: 695.53
##
## Number of Fisher Scoring iterations: 3
log_trng_data = data.frame(x1 = x1, x2 = x2, y = y)
log_reg_pred = predict(log_reg_fit, log_trng_data, type = "response")
log_reg_pred_prob = ifelse(log_reg_pred > 0.52, 1, 0)
log_reg_pred_prob_pos = log_trng_data[log_reg_pred_prob == 1, ]
log_reg_pred_prob_neg = log_trng_data[log_reg_pred_prob == 0, ]
plot(log_reg_pred_prob_pos$x1, log_reg_pred_prob_pos$x2, col = "green", xlab = "X1", ylab = "X2", pch = "+")
points(log_reg_pred_prob_neg$x1, log_reg_pred_prob_neg$x2, col = "red", pch = 4)
## (e) Now fit a logistic regression model to the data using non-linear
functions of X1 and X2 as predictors (e.g. X21 , X1×X2, log(X2),and so
forth).
log_reg_nonlinear = glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = log_trng_data, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
log_reg_nonlinear_pred = predict(log_reg_nonlinear, log_trng_data, type = "response")
log_reg_nonlinear_pred_prob = ifelse(log_reg_nonlinear_pred > 0.5, 1, 0)
log_reg_nonlinear_pred_prob_pos = log_trng_data[log_reg_nonlinear_pred_prob == 1, ]
log_reg_nonlinear_pred_prob_neg = log_trng_data[log_reg_nonlinear_pred_prob == 0, ]
plot(log_reg_nonlinear_pred_prob_pos$x1, log_reg_nonlinear_pred_prob_pos$x2, col = "red", xlab = "X1", ylab = "X2", pch = "+")
points(log_reg_nonlinear_pred_prob_neg$x1, log_reg_nonlinear_pred_prob_neg$x2, col = "blue", pch = 4)
## g. Fit a support vector classifier to the data with X1 and X2 as
predictors. Obtain a class prediction for each training observation.
Plot the observations, colored according to the predicted class
labels.
library(e1071)
svm.fit = svm(as.factor(y) ~ x1 + x2, log_trng_data, kernel = "linear", cost = 0.1)
svm.pred = predict(svm.fit, log_trng_data)
data.pos = log_trng_data[svm.pred == 1, ]
data.neg = log_trng_data[svm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "blue", xlab = "X1", ylab = "X2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "red", pch = 4)
## (h) Fit a SVM using a non-linear kernel to the data. Obtain a class
prediction for each training observation. Plot the observations, colored
according to the predicted class labels.
library(e1071)
svm_fit_radial= svm(as.factor(y) ~ x1 + x2, log_trng_data, kernel = "radial", gamma=1)
svm_fit_pred_radial = predict(svm_fit_radial, log_trng_data)
svm_fit_pred_pos_radial = log_trng_data[svm_fit_pred_radial == 1, ]
svm_fit_pred_neg_radial = log_trng_data[svm_fit_pred_radial == 0, ]
plot(svm_fit_pred_pos_radial$x1, svm_fit_pred_pos_radial$x2, col = "red", xlab = "X1", ylab = "X2", pch = "+")
points(svm_fit_pred_neg_radial$x1, svm_fit_pred_neg_radial$x2, col = "blue", pch = 4)
## (i) Comment on your results.
Support vector machines with radial kernels show a far more distinct non-linear border in Question (h) than other approaches. By comparison, the other approaches either show less clear-cut linear boundaries or fail to detect them at all.
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.3.2
attach(Auto)
library(e1071)
med_auto_mpg = median(Auto$mpg)
vari_bin = ifelse(Auto$mpg > med_auto_mpg, 1, 0)
Auto$mpg_binary_var = as.factor(vari_bin)
set.seed(1)
svm_tune = tune(svm, mpg~., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10, 50, 100)))
summary(svm_tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 8.981009
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 10.305990 5.295587
## 2 1e-01 8.981009 4.750742
## 3 1e+00 9.647184 4.313908
## 4 5e+00 10.149220 4.755080
## 5 1e+01 10.306219 4.953047
## 6 5e+01 10.631566 5.129439
## 7 1e+02 10.684083 5.080506
We notice the lowest error for kernel- “linear” at cost = 0.1.
set.seed(2)
svm_poly = tune(svm, mpg ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.01, 0.1, 1, 5, 10, 50, 100), degree = c(2, 3, 4, 5)))
summary(svm_poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 100 2
##
## - best performance: 17.65639
##
## - Detailed performance results:
## cost degree error dispersion
## 1 1e-02 2 61.73797 13.574948
## 2 1e-01 2 61.59446 13.602921
## 3 1e+00 2 60.15304 13.792932
## 4 5e+00 2 55.06386 15.193907
## 5 1e+01 2 50.95606 15.723882
## 6 5e+01 2 28.51889 9.993313
## 7 1e+02 2 17.65639 6.118173
## 8 1e-02 3 61.75044 13.571592
## 9 1e-01 3 61.71831 13.569399
## 10 1e+00 3 61.39833 13.547581
## 11 5e+00 3 59.99304 13.432082
## 12 1e+01 3 58.28857 13.277604
## 13 5e+01 3 47.62254 12.244469
## 14 1e+02 3 41.11944 10.878007
## 15 1e-02 4 61.75395 13.571849
## 16 1e-01 4 61.75343 13.571969
## 17 1e+00 4 61.74822 13.573169
## 18 5e+00 4 61.72510 13.578511
## 19 1e+01 4 61.69626 13.585205
## 20 5e+01 4 61.46773 13.639401
## 21 1e+02 4 61.18770 13.708674
## 22 1e-02 5 61.75400 13.571835
## 23 1e-01 5 61.75395 13.571833
## 24 1e+00 5 61.75336 13.571812
## 25 5e+00 5 61.75076 13.571718
## 26 1e+01 5 61.74752 13.571601
## 27 5e+01 5 61.72156 13.570669
## 28 1e+02 5 61.68916 13.569517
set.seed(2)
svm_radial = tune(svm, mpg ~ ., data = Auto, kernel = "radial", ranges = list(gamma = c(0.01, 0.1, 1, 10, 100), cost = c(0.01, 0.1, 1, 5, 10, 50, 100), degree = c(2, 3, 4, 5)))
summary(svm_radial)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## gamma cost degree
## 0.1 5 2
##
## - best performance: 6.498043
##
## - Detailed performance results:
## gamma cost degree error dispersion
## 1 1e-02 1e-02 2 43.589029 12.099454
## 2 1e-01 1e-02 2 30.860013 10.718178
## 3 1e+00 1e-02 2 60.022394 13.337004
## 4 1e+01 1e-02 2 61.700791 13.484368
## 5 1e+02 1e-02 2 61.723405 13.484402
## 6 1e-02 1e-01 2 13.785012 5.925585
## 7 1e-01 1e-01 2 9.994363 4.477300
## 8 1e+00 1e-01 2 46.903846 11.752198
## 9 1e+01 1e-01 2 61.386759 13.250273
## 10 1e+02 1e-01 2 61.609286 13.250083
## 11 1e-02 1e+00 2 7.748636 3.531173
## 12 1e-01 1e+00 2 6.691829 2.643121
## 13 1e+00 1e+00 2 21.684672 6.769173
## 14 1e+01 1e+00 2 59.571108 12.871977
## 15 1e+02 1e+00 2 61.303858 12.780671
## 16 1e-02 5e+00 2 6.942622 2.873995
## 17 1e-01 5e+00 2 6.498043 1.915277
## 18 1e+00 5e+00 2 20.556407 5.817490
## 19 1e+01 5e+00 2 59.223154 11.676800
## 20 1e+02 5e+00 2 61.037610 11.751463
## 21 1e-02 1e+01 2 6.933472 2.745711
## 22 1e-01 1e+01 2 6.568396 1.851267
## 23 1e+00 1e+01 2 20.535666 5.787001
## 24 1e+01 1e+01 2 59.223154 11.676800
## 25 1e+02 1e+01 2 61.037610 11.751463
## 26 1e-02 5e+01 2 7.190176 2.258457
## 27 1e-01 5e+01 2 6.693108 1.681847
## 28 1e+00 5e+01 2 20.535666 5.787001
## 29 1e+01 5e+01 2 59.223154 11.676800
## 30 1e+02 5e+01 2 61.037610 11.751463
## 31 1e-02 1e+02 2 7.243347 2.386674
## 32 1e-01 1e+02 2 6.851118 1.622121
## 33 1e+00 1e+02 2 20.535666 5.787001
## 34 1e+01 1e+02 2 59.223154 11.676800
## 35 1e+02 1e+02 2 61.037610 11.751463
## 36 1e-02 1e-02 3 43.589029 12.099454
## 37 1e-01 1e-02 3 30.860013 10.718178
## 38 1e+00 1e-02 3 60.022394 13.337004
## 39 1e+01 1e-02 3 61.700791 13.484368
## 40 1e+02 1e-02 3 61.723405 13.484402
## 41 1e-02 1e-01 3 13.785012 5.925585
## 42 1e-01 1e-01 3 9.994363 4.477300
## 43 1e+00 1e-01 3 46.903846 11.752198
## 44 1e+01 1e-01 3 61.386759 13.250273
## 45 1e+02 1e-01 3 61.609286 13.250083
## 46 1e-02 1e+00 3 7.748636 3.531173
## 47 1e-01 1e+00 3 6.691829 2.643121
## 48 1e+00 1e+00 3 21.684672 6.769173
## 49 1e+01 1e+00 3 59.571108 12.871977
## 50 1e+02 1e+00 3 61.303858 12.780671
## 51 1e-02 5e+00 3 6.942622 2.873995
## 52 1e-01 5e+00 3 6.498043 1.915277
## 53 1e+00 5e+00 3 20.556407 5.817490
## 54 1e+01 5e+00 3 59.223154 11.676800
## 55 1e+02 5e+00 3 61.037610 11.751463
## 56 1e-02 1e+01 3 6.933472 2.745711
## 57 1e-01 1e+01 3 6.568396 1.851267
## 58 1e+00 1e+01 3 20.535666 5.787001
## 59 1e+01 1e+01 3 59.223154 11.676800
## 60 1e+02 1e+01 3 61.037610 11.751463
## 61 1e-02 5e+01 3 7.190176 2.258457
## 62 1e-01 5e+01 3 6.693108 1.681847
## 63 1e+00 5e+01 3 20.535666 5.787001
## 64 1e+01 5e+01 3 59.223154 11.676800
## 65 1e+02 5e+01 3 61.037610 11.751463
## 66 1e-02 1e+02 3 7.243347 2.386674
## 67 1e-01 1e+02 3 6.851118 1.622121
## 68 1e+00 1e+02 3 20.535666 5.787001
## 69 1e+01 1e+02 3 59.223154 11.676800
## 70 1e+02 1e+02 3 61.037610 11.751463
## 71 1e-02 1e-02 4 43.589029 12.099454
## 72 1e-01 1e-02 4 30.860013 10.718178
## 73 1e+00 1e-02 4 60.022394 13.337004
## 74 1e+01 1e-02 4 61.700791 13.484368
## 75 1e+02 1e-02 4 61.723405 13.484402
## 76 1e-02 1e-01 4 13.785012 5.925585
## 77 1e-01 1e-01 4 9.994363 4.477300
## 78 1e+00 1e-01 4 46.903846 11.752198
## 79 1e+01 1e-01 4 61.386759 13.250273
## 80 1e+02 1e-01 4 61.609286 13.250083
## 81 1e-02 1e+00 4 7.748636 3.531173
## 82 1e-01 1e+00 4 6.691829 2.643121
## 83 1e+00 1e+00 4 21.684672 6.769173
## 84 1e+01 1e+00 4 59.571108 12.871977
## 85 1e+02 1e+00 4 61.303858 12.780671
## 86 1e-02 5e+00 4 6.942622 2.873995
## 87 1e-01 5e+00 4 6.498043 1.915277
## 88 1e+00 5e+00 4 20.556407 5.817490
## 89 1e+01 5e+00 4 59.223154 11.676800
## 90 1e+02 5e+00 4 61.037610 11.751463
## 91 1e-02 1e+01 4 6.933472 2.745711
## 92 1e-01 1e+01 4 6.568396 1.851267
## 93 1e+00 1e+01 4 20.535666 5.787001
## 94 1e+01 1e+01 4 59.223154 11.676800
## 95 1e+02 1e+01 4 61.037610 11.751463
## 96 1e-02 5e+01 4 7.190176 2.258457
## 97 1e-01 5e+01 4 6.693108 1.681847
## 98 1e+00 5e+01 4 20.535666 5.787001
## 99 1e+01 5e+01 4 59.223154 11.676800
## 100 1e+02 5e+01 4 61.037610 11.751463
## 101 1e-02 1e+02 4 7.243347 2.386674
## 102 1e-01 1e+02 4 6.851118 1.622121
## 103 1e+00 1e+02 4 20.535666 5.787001
## 104 1e+01 1e+02 4 59.223154 11.676800
## 105 1e+02 1e+02 4 61.037610 11.751463
## 106 1e-02 1e-02 5 43.589029 12.099454
## 107 1e-01 1e-02 5 30.860013 10.718178
## 108 1e+00 1e-02 5 60.022394 13.337004
## 109 1e+01 1e-02 5 61.700791 13.484368
## 110 1e+02 1e-02 5 61.723405 13.484402
## 111 1e-02 1e-01 5 13.785012 5.925585
## 112 1e-01 1e-01 5 9.994363 4.477300
## 113 1e+00 1e-01 5 46.903846 11.752198
## 114 1e+01 1e-01 5 61.386759 13.250273
## 115 1e+02 1e-01 5 61.609286 13.250083
## 116 1e-02 1e+00 5 7.748636 3.531173
## 117 1e-01 1e+00 5 6.691829 2.643121
## 118 1e+00 1e+00 5 21.684672 6.769173
## 119 1e+01 1e+00 5 59.571108 12.871977
## 120 1e+02 1e+00 5 61.303858 12.780671
## 121 1e-02 5e+00 5 6.942622 2.873995
## 122 1e-01 5e+00 5 6.498043 1.915277
## 123 1e+00 5e+00 5 20.556407 5.817490
## 124 1e+01 5e+00 5 59.223154 11.676800
## 125 1e+02 5e+00 5 61.037610 11.751463
## 126 1e-02 1e+01 5 6.933472 2.745711
## 127 1e-01 1e+01 5 6.568396 1.851267
## 128 1e+00 1e+01 5 20.535666 5.787001
## 129 1e+01 1e+01 5 59.223154 11.676800
## 130 1e+02 1e+01 5 61.037610 11.751463
## 131 1e-02 5e+01 5 7.190176 2.258457
## 132 1e-01 5e+01 5 6.693108 1.681847
## 133 1e+00 5e+01 5 20.535666 5.787001
## 134 1e+01 5e+01 5 59.223154 11.676800
## 135 1e+02 5e+01 5 61.037610 11.751463
## 136 1e-02 1e+02 5 7.243347 2.386674
## 137 1e-01 1e+02 5 6.851118 1.622121
## 138 1e+00 1e+02 5 20.535666 5.787001
## 139 1e+01 1e+02 5 59.223154 11.676800
## 140 1e+02 1e+02 5 61.037610 11.751463
For a radial kernel, we see a lowest of the error at gamma = 0.1, degree=2, and cost = 5 and with polynomial kernel the lowest error is at degree = 2 and cost = 100.
svm_linear = svm(mpg_binary_var ~ ., data = Auto, kernel = "linear", cost = 0.1)
svm_poly = svm(mpg_binary_var ~ ., data = Auto, kernel = "polynomial", cost = 100,
degree = 2)
svm_radial = svm(mpg_binary_var ~ ., data = Auto, kernel = "radial", cost = 5, gamma = 0.1)
plotpairs = function(fit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpg_binary_var", "name"))]) {
plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
}
}
plotpairs(svm_linear)
plotpairs(svm_poly)
plotpairs(svm_radial)
## 8. This problem involves the OJ data set which is part of the ISLR2
package. ## (a) Create a training set containing a random sample of 800
observations, and a test set containing the remaining observations.
attach(OJ)
set.seed(1)
OJ_Split = sample(nrow(OJ), 800)
OJ_Split_train = OJ[OJ_Split,]
OJ_Split_test = OJ[-OJ_Split,]
linear_SVM = svm(Purchase ~ ., kernel = "linear", data = OJ_Split_train, cost = 0.01)
summary(linear_SVM)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ_Split_train, kernel = "linear",
## cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
There are total of 435 support vectors out Of which, 219 vectos are at level CH, and 216 vectors are at level MM.
pred_OJ_Split_train = predict(linear_SVM, OJ_Split_train)
(t<-table(OJ_Split_train$Purchase, pred_OJ_Split_train))
## pred_OJ_Split_train
## CH MM
## CH 420 65
## MM 75 240
misclassified <- sum(OJ_Split_train$Purchase != pred_OJ_Split_train)
training_error_rate <- misclassified / nrow(OJ_Split_train)
print(paste("Training Error Rate:", training_error_rate))
## [1] "Training Error Rate: 0.175"
pred_OJ_Split_test= predict(linear_SVM, OJ_Split_test)
(tt<-table(OJ_Split_test$Purchase, pred_OJ_Split_test))
## pred_OJ_Split_test
## CH MM
## CH 153 15
## MM 33 69
misclassified_Test <- sum(OJ_Split_test$Purchase != pred_OJ_Split_test)
test_error_rate <- misclassified_Test / nrow(OJ_Split_test)
print(paste("Test Error Rate:", test_error_rate))
## [1] "Test Error Rate: 0.177777777777778"
set.seed(1)
svm_optimal_tune_fit = tune(svm, Purchase ~ ., data = OJ_Split_train, kernel = "linear", ranges = list(cost = c(0.01,0.1,1,10)))
summary(svm_optimal_tune_fit)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.1725
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17625 0.02853482
## 2 0.10 0.17250 0.03162278
## 3 1.00 0.17500 0.02946278
## 4 10.00 0.17375 0.03197764
The lowest error is at 0.1 cost, with dispersion(optimal cost) is 0.03162278.
svm_new_cost_fit = svm(Purchase ~ ., kernel = "linear", data = OJ_Split_train, cost = svm_optimal_tune_fit$best.parameters$cost)
pred_svm_new_cost_fit = predict(svm_new_cost_fit, OJ_Split_train)
table(OJ_Split_train$Purchase, pred_svm_new_cost_fit)
## pred_svm_new_cost_fit
## CH MM
## CH 422 63
## MM 69 246
misclassified_new_train <- sum(OJ_Split_train$Purchase != pred_svm_new_cost_fit)
training_error_rate_new <- misclassified_new_train / nrow(OJ_Split_train)
print(paste("Training Error Rate:", training_error_rate_new))
## [1] "Training Error Rate: 0.165"
pred_svm_new_cost_fit_test = predict(svm_new_cost_fit, OJ_Split_test)
table(OJ_Split_test$Purchase, pred_svm_new_cost_fit_test)
## pred_svm_new_cost_fit_test
## CH MM
## CH 155 13
## MM 31 71
misclassified_new_test <- sum(OJ_Split_test$Purchase != pred_svm_new_cost_fit_test)
test_error_rate_new <- misclassified_new_test / nrow(OJ_Split_test)
print(paste("Test Error Rate:", test_error_rate_new))
## [1] "Test Error Rate: 0.162962962962963"
set.seed(1)
svm_rad_fit1 = svm(Purchase ~ ., data = OJ_Split_train, kernel = "radial")
summary(svm_rad_fit1)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ_Split_train, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 373
##
## ( 188 185 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
There are total of 373 support vectors out Of which, 188 vectos are at level CH, and 185 vectors are at level MM.
pred_svm_rad_fit1 = predict(svm_rad_fit1, OJ_Split_train)
table(OJ_Split_train$Purchase, pred_svm_rad_fit1)
## pred_svm_rad_fit1
## CH MM
## CH 441 44
## MM 77 238
misclassified_svm_rad_fit1 <- sum(OJ_Split_train$Purchase != pred_svm_rad_fit1)
training_error_misclassified_svm_rad_fit1 <- misclassified_svm_rad_fit1 / nrow(OJ_Split_train)
print(paste("Training Error Rate:", training_error_misclassified_svm_rad_fit1))
## [1] "Training Error Rate: 0.15125"
pred_svm_rad_fit1_test = predict(svm_rad_fit1, OJ_Split_test)
table(OJ_Split_test$Purchase, pred_svm_rad_fit1_test)
## pred_svm_rad_fit1_test
## CH MM
## CH 151 17
## MM 33 69
misclassified_svm_rad_fit1_test <- sum(OJ_Split_test$Purchase != pred_svm_rad_fit1_test)
test_error_misclassified_svm_rad_fit1 <- misclassified_svm_rad_fit1_test / nrow(OJ_Split_test)
print(paste("Test Error Rate:", test_error_misclassified_svm_rad_fit1))
## [1] "Test Error Rate: 0.185185185185185"
svm_rad_1_tune = svm(Purchase ~ ., data = OJ_Split_train, kernel = "radial", cost = 1)
pred_train_tune_svm = predict(svm_rad_1_tune, OJ_Split_train)
table(OJ_Split_train$Purchase, pred_train_tune_svm)
## pred_train_tune_svm
## CH MM
## CH 441 44
## MM 77 238
misclassified2_train_tune_svm<- sum(OJ_Split_train$Purchase != pred_train_tune_svm)
training_error_misclassified_svm_rad2 <- misclassified2_train_tune_svm / nrow(OJ_Split_train)
print(paste("Training Error Rate:", training_error_misclassified_svm_rad2))
## [1] "Training Error Rate: 0.15125"
pred_svm_rad_fit1_test_tune = predict(svm_rad_1_tune, OJ_Split_test)
table(OJ_Split_test$Purchase, pred_svm_rad_fit1_test_tune)
## pred_svm_rad_fit1_test_tune
## CH MM
## CH 151 17
## MM 33 69
misclassified_svm_rad_tune_test <- sum(OJ_Split_test$Purchase != pred_svm_rad_fit1_test_tune)
test_error_misclassified_svm_rad2 <- misclassified_svm_rad_tune_test / nrow(OJ_Split_test)
print(paste("Test Error Rate:", test_error_misclassified_svm_rad2))
## [1] "Test Error Rate: 0.185185185185185"
svm_rad_1_tune_linear = svm(Purchase ~ ., data = OJ_Split_train, kernel = "linear", cost = 1)
pred_train_tune_svm_linear = predict(svm_rad_1_tune_linear, OJ_Split_train)
table(OJ_Split_train$Purchase, pred_train_tune_svm_linear)
## pred_train_tune_svm_linear
## CH MM
## CH 424 61
## MM 70 245
misclassified2_train_tune_svm_linear<- sum(OJ_Split_train$Purchase != pred_train_tune_svm_linear)
training_error_misclassified_svm_rad2_lin <- misclassified2_train_tune_svm_linear / nrow(OJ_Split_train)
print(paste("Training Error Rate:", training_error_misclassified_svm_rad2_lin))
## [1] "Training Error Rate: 0.16375"
pred_svm_rad_fit1_test_tune_linear = predict(svm_rad_1_tune_linear, OJ_Split_test)
table(OJ_Split_test$Purchase, pred_svm_rad_fit1_test_tune_linear)
## pred_svm_rad_fit1_test_tune_linear
## CH MM
## CH 155 13
## MM 29 73
misclassified_svm_rad_tune_test_lin <- sum(OJ_Split_test$Purchase != pred_svm_rad_fit1_test_tune_linear)
test_error_misclassified_svm_rad2_lin <- misclassified_svm_rad_tune_test_lin / nrow(OJ_Split_test)
print(paste("Test Error Rate:", test_error_misclassified_svm_rad2_lin))
## [1] "Test Error Rate: 0.155555555555556"
svm_pol_degree2 = svm(Purchase ~ ., kernel = "poly", data = OJ_Split_train, degree=2)
summary(svm_pol_degree2)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ_Split_train, kernel = "poly",
## degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 447
##
## ( 225 222 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
There are total of 447 support vectors out Of which, 255 vectos are at level CH, and 222 vectors are at level MM with kernel as polynominal, degree is 2 and cost as 1.
pred_train_tune_svm_poly = predict(svm_pol_degree2, OJ_Split_train)
table(OJ_Split_train$Purchase, pred_train_tune_svm_poly)
## pred_train_tune_svm_poly
## CH MM
## CH 449 36
## MM 110 205
misclassified2_train_tune_svm_poly<- sum(OJ_Split_train$Purchase != pred_train_tune_svm_poly)
training_error_misclassified_svm_poly <- misclassified2_train_tune_svm_poly / nrow(OJ_Split_train)
print(paste("Training Error Rate:", training_error_misclassified_svm_poly))
## [1] "Training Error Rate: 0.1825"
pred_svm_rad_fit1_test_tune_poly = predict(svm_pol_degree2, OJ_Split_test)
table(OJ_Split_test$Purchase, pred_svm_rad_fit1_test_tune_poly)
## pred_svm_rad_fit1_test_tune_poly
## CH MM
## CH 153 15
## MM 45 57
misclassified_svm_rad_tune_test_poly <- sum(OJ_Split_test$Purchase != pred_svm_rad_fit1_test_tune_poly)
test_error_misclassified_svm_poly <- misclassified_svm_rad_tune_test_poly / nrow(OJ_Split_test)
print(paste("Test Error Rate:", test_error_misclassified_svm_poly))
## [1] "Test Error Rate: 0.222222222222222"
set.seed(1)
tune_svm_poly = tune(svm, Purchase ~ ., data = OJ_Split_train, kernel = "poly", degree = 2, ranges = list(cost = c(0.01,0.1,1,10)))
summary(tune_svm_poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.18125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39125 0.04210189
## 2 0.10 0.32125 0.05001736
## 3 1.00 0.20250 0.04116363
## 4 10.00 0.18125 0.02779513
The lowest error is at 10 cost, with dispersion(optimal cost) is 0.02779513.
svm_poly_tune_cost_optim = svm(Purchase ~ ., data = OJ_Split_train, kernel = "poly", degree = 2, cost = tune_svm_poly$best.parameters$cost)
train_pred_optim_cost_svm_poly = predict(svm_poly_tune_cost_optim, OJ_Split_train)
table(OJ_Split_train$Purchase, train_pred_optim_cost_svm_poly)
## train_pred_optim_cost_svm_poly
## CH MM
## CH 447 38
## MM 82 233
misclassified2_train_tune_svm_poly_optim<- sum(OJ_Split_train$Purchase != train_pred_optim_cost_svm_poly)
training_error_misclassified_svm_poly_optim <- misclassified2_train_tune_svm_poly_optim / nrow(OJ_Split_train)
print(paste("Training Error Rate:", training_error_misclassified_svm_poly_optim))
## [1] "Training Error Rate: 0.15"
pred_svm_fit1_test_tune_poly_optim = predict(svm_poly_tune_cost_optim, OJ_Split_test)
table(OJ_Split_test$Purchase, pred_svm_fit1_test_tune_poly_optim)
## pred_svm_fit1_test_tune_poly_optim
## CH MM
## CH 154 14
## MM 37 65
misclassified_svm_tune_test_poly_optim <- sum(OJ_Split_test$Purchase != pred_svm_fit1_test_tune_poly_optim)
test_error_misclassified_svm_poly_optim <- misclassified_svm_tune_test_poly_optim / nrow(OJ_Split_test)
print(paste("Test Error Rate:", test_error_misclassified_svm_poly_optim))
## [1] "Test Error Rate: 0.188888888888889"
With this dataset, the tuned SVM-linear model provides the best results.