##5. We have seen that we can fit an SVM with a non-linear kernel in order to perform classification using a non-linear decision boundary. We will now see that we can also obtain a non-linear decision boundary by performing logistic regression using non-linear transformations of the features.
x1 <- runif(500) - 0.5
x2 <- runif(500) - 0.5
y <- 1 * (x1^2 - x2^2 > 0)
plot(x1[y == 0], x2[y == 0], col = "green", xlab = "X1", ylab = "X2", pch = "+")
points(x1[y == 1], x2[y == 1], col = "blue", pch = 4)
## (c) Fit a logistic regression model to the data, using X1 and X2 as
predictors.
logr_model = glm(y ~ x1 + x2, family = binomial)
summary(logr_model)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.03729 0.08970 0.416 0.678
## x1 0.36158 0.31741 1.139 0.255
## x2 0.15267 0.31217 0.489 0.625
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.95 on 499 degrees of freedom
## Residual deviance: 691.40 on 497 degrees of freedom
## AIC: 697.4
##
## Number of Fisher Scoring iterations: 3
logr_model_data = data.frame(x1 = x1, x2 = x2, y = y)
logr_model_data_pred = predict(logr_model, logr_model_data, type = "response")
logr_model_data_pred_prob = ifelse(logr_model_data_pred > 0.5, 1, 0)
logr_model_data_pred_prob_pos = logr_model_data[logr_model_data_pred_prob == 1, ]
logr_model_data_pred_prob_neg = logr_model_data[logr_model_data_pred_prob == 0, ]
plot(logr_model_data_pred_prob_pos$x1, logr_model_data_pred_prob_pos$x2, col = "green", xlab = "X1", ylab = "X2", pch = "+")
points(logr_model_data_pred_prob_neg$x1, logr_model_data_pred_prob_neg$x2, col = "blue", pch = 4)
## (e) Now fit a logistic regression model to the data using non-linear
functions of X1 and X2 as predictors (e.g. X21 , X1×X2, log(X2),and so
forth).
log_reg_NLR = glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = logr_model_data, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
log_reg_NLR_pred = predict(log_reg_NLR, logr_model_data, type = "response")
log_reg_NLR_pred_prob = ifelse(log_reg_NLR_pred > 0.5, 1, 0)
log_reg_NLR_pred_prob_pos = logr_model_data[log_reg_NLR_pred_prob == 1, ]
log_reg_NLR_pred_prob_neg = logr_model_data[log_reg_NLR_pred_prob == 0, ]
plot(log_reg_NLR_pred_prob_pos$x1, log_reg_NLR_pred_prob_pos$x2, col = "red", xlab = "X1", ylab = "X2", pch = "+")
points(log_reg_NLR_pred_prob_neg$x1, log_reg_NLR_pred_prob_neg$x2, col = "blue", pch = 4)
## g. Fit a support vector classifier to the data with X1 and X2 as
predictors. Obtain a class prediction for each training observation.
Plot the observations, colored according to the predicted class
labels.
library(e1071)
svm_fit = svm(as.factor(y) ~ x1 + x2, logr_model_data, kernel = "linear",cost=1)
svm_pred = predict(svm_fit, logr_model_data)
data.pos = logr_model_data[svm_pred == 1, ]
data.neg = logr_model_data[svm_pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "blue", xlab = "x1", ylab = "x2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "red", pch = 4)
## (h) Fit a SVM using a non-linear kernel to the data. Obtain a class
prediction for each training observation. Plot the observations, colored
according to the predicted class labels.
library(e1071)
svm_radial= svm(as.factor(y) ~ x1 + x2, logr_model_data, kernel = "radial", gamma=1)
svm_pred_radial = predict(svm_radial, logr_model_data)
svm_pred_pos_radial = logr_model_data[svm_pred_radial == 1, ]
svm_pred_neg_radial = logr_model_data[svm_pred_radial == 0, ]
plot(svm_pred_pos_radial$x1, svm_pred_pos_radial$x2, col = "red", xlab = "X1", ylab = "X2", pch = "+")
points(svm_pred_neg_radial$x1, svm_pred_neg_radial$x2, col = "blue", pch = 4)
## (i) Comment on your results.
Compared to other methods, support vector machines with radial kernels exhibit a much more pronounced non-linear boundary in Question (h). In contrast, the other methods either fail to detect linear boundaries at all or display less distinct boundaries.
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.3.2
library(e1071)
attach(Auto)
median_mpg = median(Auto$mpg)
binary_var = ifelse(Auto$mpg > median_mpg, 1, 0)
Auto$mpg_binary = as.factor(binary_var)
set.seed(1)
svm_tune_lin= tune(svm, mpg~., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 10, 100)))
summary(svm_tune_lin)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 8.981009
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 10.305990 5.295587
## 2 1e-01 8.981009 4.750742
## 3 1e+00 9.647184 4.313908
## 4 1e+01 10.306219 4.953047
## 5 1e+02 10.684083 5.080506
The kernel that is “linear” has the least error, as we can see for cost = 0.1.
set.seed(2)
svm_poly_tune_fit = tune(svm, mpg ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.01, 0.1, 1, 10, 100), degree = c(2, 3, 4)))
summary(svm_poly_tune_fit)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 100 2
##
## - best performance: 17.65639
##
## - Detailed performance results:
## cost degree error dispersion
## 1 1e-02 2 61.73797 13.574948
## 2 1e-01 2 61.59446 13.602921
## 3 1e+00 2 60.15304 13.792932
## 4 1e+01 2 50.95606 15.723882
## 5 1e+02 2 17.65639 6.118173
## 6 1e-02 3 61.75044 13.571592
## 7 1e-01 3 61.71831 13.569399
## 8 1e+00 3 61.39833 13.547581
## 9 1e+01 3 58.28857 13.277604
## 10 1e+02 3 41.11944 10.878007
## 11 1e-02 4 61.75395 13.571849
## 12 1e-01 4 61.75343 13.571969
## 13 1e+00 4 61.74822 13.573169
## 14 1e+01 4 61.69626 13.585205
## 15 1e+02 4 61.18770 13.708674
set.seed(2)
svm_radial_tune_fit = tune(svm, mpg ~ ., data = Auto, kernel = "radial", ranges = list(gamma = c(0.01, 0.1, 1, 10, 100), cost = c(0.01, 0.1, 1, 10, 100), degree = c(2, 3, 4)))
summary(svm_radial_tune_fit)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## gamma cost degree
## 0.1 10 2
##
## - best performance: 6.568396
##
## - Detailed performance results:
## gamma cost degree error dispersion
## 1 1e-02 1e-02 2 43.589029 12.099454
## 2 1e-01 1e-02 2 30.860013 10.718178
## 3 1e+00 1e-02 2 60.022394 13.337004
## 4 1e+01 1e-02 2 61.700791 13.484368
## 5 1e+02 1e-02 2 61.723405 13.484402
## 6 1e-02 1e-01 2 13.785012 5.925585
## 7 1e-01 1e-01 2 9.994363 4.477300
## 8 1e+00 1e-01 2 46.903846 11.752198
## 9 1e+01 1e-01 2 61.386759 13.250273
## 10 1e+02 1e-01 2 61.609286 13.250083
## 11 1e-02 1e+00 2 7.748636 3.531173
## 12 1e-01 1e+00 2 6.691829 2.643121
## 13 1e+00 1e+00 2 21.684672 6.769173
## 14 1e+01 1e+00 2 59.571108 12.871977
## 15 1e+02 1e+00 2 61.303858 12.780671
## 16 1e-02 1e+01 2 6.933472 2.745711
## 17 1e-01 1e+01 2 6.568396 1.851267
## 18 1e+00 1e+01 2 20.535666 5.787001
## 19 1e+01 1e+01 2 59.223154 11.676800
## 20 1e+02 1e+01 2 61.037610 11.751463
## 21 1e-02 1e+02 2 7.243347 2.386674
## 22 1e-01 1e+02 2 6.851118 1.622121
## 23 1e+00 1e+02 2 20.535666 5.787001
## 24 1e+01 1e+02 2 59.223154 11.676800
## 25 1e+02 1e+02 2 61.037610 11.751463
## 26 1e-02 1e-02 3 43.589029 12.099454
## 27 1e-01 1e-02 3 30.860013 10.718178
## 28 1e+00 1e-02 3 60.022394 13.337004
## 29 1e+01 1e-02 3 61.700791 13.484368
## 30 1e+02 1e-02 3 61.723405 13.484402
## 31 1e-02 1e-01 3 13.785012 5.925585
## 32 1e-01 1e-01 3 9.994363 4.477300
## 33 1e+00 1e-01 3 46.903846 11.752198
## 34 1e+01 1e-01 3 61.386759 13.250273
## 35 1e+02 1e-01 3 61.609286 13.250083
## 36 1e-02 1e+00 3 7.748636 3.531173
## 37 1e-01 1e+00 3 6.691829 2.643121
## 38 1e+00 1e+00 3 21.684672 6.769173
## 39 1e+01 1e+00 3 59.571108 12.871977
## 40 1e+02 1e+00 3 61.303858 12.780671
## 41 1e-02 1e+01 3 6.933472 2.745711
## 42 1e-01 1e+01 3 6.568396 1.851267
## 43 1e+00 1e+01 3 20.535666 5.787001
## 44 1e+01 1e+01 3 59.223154 11.676800
## 45 1e+02 1e+01 3 61.037610 11.751463
## 46 1e-02 1e+02 3 7.243347 2.386674
## 47 1e-01 1e+02 3 6.851118 1.622121
## 48 1e+00 1e+02 3 20.535666 5.787001
## 49 1e+01 1e+02 3 59.223154 11.676800
## 50 1e+02 1e+02 3 61.037610 11.751463
## 51 1e-02 1e-02 4 43.589029 12.099454
## 52 1e-01 1e-02 4 30.860013 10.718178
## 53 1e+00 1e-02 4 60.022394 13.337004
## 54 1e+01 1e-02 4 61.700791 13.484368
## 55 1e+02 1e-02 4 61.723405 13.484402
## 56 1e-02 1e-01 4 13.785012 5.925585
## 57 1e-01 1e-01 4 9.994363 4.477300
## 58 1e+00 1e-01 4 46.903846 11.752198
## 59 1e+01 1e-01 4 61.386759 13.250273
## 60 1e+02 1e-01 4 61.609286 13.250083
## 61 1e-02 1e+00 4 7.748636 3.531173
## 62 1e-01 1e+00 4 6.691829 2.643121
## 63 1e+00 1e+00 4 21.684672 6.769173
## 64 1e+01 1e+00 4 59.571108 12.871977
## 65 1e+02 1e+00 4 61.303858 12.780671
## 66 1e-02 1e+01 4 6.933472 2.745711
## 67 1e-01 1e+01 4 6.568396 1.851267
## 68 1e+00 1e+01 4 20.535666 5.787001
## 69 1e+01 1e+01 4 59.223154 11.676800
## 70 1e+02 1e+01 4 61.037610 11.751463
## 71 1e-02 1e+02 4 7.243347 2.386674
## 72 1e-01 1e+02 4 6.851118 1.622121
## 73 1e+00 1e+02 4 20.535666 5.787001
## 74 1e+01 1e+02 4 59.223154 11.676800
## 75 1e+02 1e+02 4 61.037610 11.751463
The lowest error for a polynomial kernel is at degree = 2 and cost = 100, while the lowest error for a radial kernel is at gamma = 0.1, degree = 2, and cost = 10.
svm_linear= svm(mpg_binary ~ ., data = Auto, kernel = "linear", cost = 0.1)
svm_poly = svm(mpg_binary ~ ., data = Auto, kernel = "polynomial", cost = 100,
degree = 2)
svm_radial = svm(mpg_binary ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.1)
plotpairs <- function(fit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "binary_var", "name"))]) {
if (is.numeric(Auto[[name]])) {
plot(fit, Auto, as.formula(paste("mpg ~ ", name, sep = "")))
}
}
}
plotpairs(svm_linear)
plotpairs(svm_poly)
plotpairs(svm_radial)
## 8. This problem involves the OJ data set which is part of the ISLR2
package. ## (a) Create a training set containing a random sample of 800
observations, and a test set containing the remaining observations.
attach(OJ)
set.seed(1)
OJ_Split_data = sample(nrow(OJ), 800)
OJ_train_data = OJ[OJ_Split_data,]
OJ_test_data = OJ[-OJ_Split_data,]
linear_SVM1 = svm(Purchase ~ ., kernel = "linear", data = OJ_train_data, cost = 0.01)
summary(linear_SVM1)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ_train_data, kernel = "linear",
## cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
219 support vectors are at level CH and 216 are at level MM out of a total of 435 support vectors.
pred_OJ_train_data = predict(linear_SVM1, OJ_train_data)
(t<-table(OJ_train_data$Purchase, pred_OJ_train_data))
## pred_OJ_train_data
## CH MM
## CH 420 65
## MM 75 240
misclassified_svm1 <- sum(OJ_train_data$Purchase != pred_OJ_train_data)
training_error_rate_svm1 <- misclassified_svm1 / nrow(OJ_train_data)
print(paste("Training Error Rate:", training_error_rate_svm1))
## [1] "Training Error Rate: 0.175"
pred_OJ_test_data= predict(linear_SVM1, OJ_test_data)
(tt<-table(OJ_test_data$Purchase, pred_OJ_test_data))
## pred_OJ_test_data
## CH MM
## CH 153 15
## MM 33 69
misclassified_Test_svm1 <- sum(OJ_test_data$Purchase != pred_OJ_test_data)
test_error_rate_svm1 <- misclassified_Test_svm1 / nrow(OJ_test_data)
print(paste("Test Error Rate:", test_error_rate_svm1))
## [1] "Test Error Rate: 0.177777777777778"
set.seed(1)
svm_optimal_tune1 = tune(svm, Purchase ~ ., data = OJ_train_data, kernel = "linear", ranges = list(cost = c(0.01,0.1,1,10)))
summary(svm_optimal_tune1)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.1725
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17625 0.02853482
## 2 0.10 0.17250 0.03162278
## 3 1.00 0.17500 0.02946278
## 4 10.00 0.17375 0.03197764
With a dispersion (ideal cost) of 0.03162278, the lowest inaccuracy occurs at 0.1 cost.
svm_new_cost_fit1 = svm(Purchase ~ ., kernel = "linear", data = OJ_train_data, cost = svm_optimal_tune1$best.parameters$cost)
pred_svm_new_cost_fit1 = predict(svm_new_cost_fit1, OJ_train_data)
table(OJ_train_data$Purchase, pred_svm_new_cost_fit1)
## pred_svm_new_cost_fit1
## CH MM
## CH 422 63
## MM 69 246
misclassified_new_train1 <- sum(OJ_train_data$Purchase != pred_svm_new_cost_fit1)
training_error_rate_new1 <- misclassified_new_train1 / nrow(OJ_train_data)
print(paste("Training Error Rate:", training_error_rate_new1))
## [1] "Training Error Rate: 0.165"
pred_svm_new_cost_fit_test1 = predict(svm_new_cost_fit1, OJ_test_data)
table(OJ_test_data$Purchase, pred_svm_new_cost_fit_test1)
## pred_svm_new_cost_fit_test1
## CH MM
## CH 155 13
## MM 31 71
misclassified_new_test1 <- sum(OJ_test_data$Purchase != pred_svm_new_cost_fit_test1)
test_error_rate_new1 <- misclassified_new_test1 / nrow(OJ_test_data)
print(paste("Test Error Rate:", test_error_rate_new1))
## [1] "Test Error Rate: 0.162962962962963"
set.seed(1)
svm_rad_fit2 = svm(Purchase ~ ., data = OJ_train_data, kernel = "radial")
summary(svm_rad_fit2)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ_train_data, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 373
##
## ( 188 185 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
There are total of 373 support vectors out Of which, 188 vectos are at level CH, and 185 vectors are at level MM.
pred_svm_rad_fit2 = predict(svm_rad_fit2, OJ_train_data)
table(OJ_train_data$Purchase, pred_svm_rad_fit2)
## pred_svm_rad_fit2
## CH MM
## CH 441 44
## MM 77 238
misclassified_svm_rad_fit2 <- sum(OJ_train_data$Purchase != pred_svm_rad_fit2)
training_error_misclassified_svm_rad_fit2 <- misclassified_svm_rad_fit2 / nrow(OJ_train_data)
print(paste("Training Error Rate:", training_error_misclassified_svm_rad_fit2))
## [1] "Training Error Rate: 0.15125"
pred_svm_rad_fit1_test1 = predict(svm_rad_fit2, OJ_test_data)
table(OJ_test_data$Purchase, pred_svm_rad_fit1_test1)
## pred_svm_rad_fit1_test1
## CH MM
## CH 151 17
## MM 33 69
misclassified_svm_rad_fit1_test1 <- sum(OJ_test_data$Purchase != pred_svm_rad_fit1_test1)
test_error_misclassified_svm_rad_fit2 <- misclassified_svm_rad_fit1_test1 / nrow(OJ_test_data)
print(paste("Test Error Rate:", test_error_misclassified_svm_rad_fit2))
## [1] "Test Error Rate: 0.185185185185185"
svm_rad_1_tune2 = svm(Purchase ~ ., data = OJ_train_data, kernel = "radial", cost = 1)
pred_train_tune_svm2 = predict(svm_rad_1_tune2, OJ_train_data)
table(OJ_train_data$Purchase, pred_train_tune_svm2)
## pred_train_tune_svm2
## CH MM
## CH 441 44
## MM 77 238
misclassified2_train_tune_svm2<- sum(OJ_train_data$Purchase != pred_train_tune_svm2)
training_error_misclassified_svm_rad <- misclassified2_train_tune_svm2 / nrow(OJ_train_data)
print(paste("Training Error Rate:", training_error_misclassified_svm_rad))
## [1] "Training Error Rate: 0.15125"
pred_svm_rad_fit1_test_tune1 = predict(svm_rad_1_tune2, OJ_test_data)
table(OJ_test_data$Purchase, pred_svm_rad_fit1_test_tune1)
## pred_svm_rad_fit1_test_tune1
## CH MM
## CH 151 17
## MM 33 69
misclassified_svm_rad_tune_test2 <- sum(OJ_test_data$Purchase != pred_svm_rad_fit1_test_tune1)
test_error_misclassified_svm_rad1 <- misclassified_svm_rad_tune_test2 / nrow(OJ_test_data)
print(paste("Test Error Rate:", test_error_misclassified_svm_rad1))
## [1] "Test Error Rate: 0.185185185185185"
svm_rad_1_tune_linear1 = svm(Purchase ~ ., data = OJ_train_data, kernel = "linear", cost = 1)
pred_train_tune_svm_linear1 = predict(svm_rad_1_tune_linear1, OJ_train_data)
table(OJ_train_data$Purchase, pred_train_tune_svm_linear1)
## pred_train_tune_svm_linear1
## CH MM
## CH 424 61
## MM 70 245
misclassified2_train_tune_svm_linear2<- sum(OJ_train_data$Purchase != pred_train_tune_svm_linear1)
training_error_misclassified_svm_rad2_lin2 <- misclassified2_train_tune_svm_linear2 / nrow(OJ_train_data)
print(paste("Training Error Rate:", training_error_misclassified_svm_rad2_lin2))
## [1] "Training Error Rate: 0.16375"
pred_svm_rad_fit1_test_tune_linear1 = predict(svm_rad_1_tune_linear1, OJ_test_data)
table(OJ_test_data$Purchase, pred_svm_rad_fit1_test_tune_linear1)
## pred_svm_rad_fit1_test_tune_linear1
## CH MM
## CH 155 13
## MM 29 73
misclassified_svm_rad_tune_test_lin2 <- sum(OJ_test_data$Purchase != pred_svm_rad_fit1_test_tune_linear1)
test_error_misclassified_svm_rad2_lin2 <- misclassified_svm_rad_tune_test_lin2 / nrow(OJ_test_data)
print(paste("Test Error Rate:", test_error_misclassified_svm_rad2_lin2))
## [1] "Test Error Rate: 0.155555555555556"
svm_pol_degree21 = svm(Purchase ~ ., kernel = "poly", data = OJ_train_data, degree=2)
summary(svm_pol_degree21)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ_train_data, kernel = "poly",
## degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 447
##
## ( 225 222 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
A total of 447 support vectors with a polynominal kernel, degree of 2, and cost of 1 are present; of these, 255 vectors are at level CH and 222 at level MM.
pred_train_tune_svm_poly1 = predict(svm_pol_degree21, OJ_train_data)
table(OJ_train_data$Purchase, pred_train_tune_svm_poly1)
## pred_train_tune_svm_poly1
## CH MM
## CH 449 36
## MM 110 205
misclassified2_train_tune_svm_poly1<- sum(OJ_train_data$Purchase != pred_train_tune_svm_poly1)
training_error_misclassified_svm_poly1 <- misclassified2_train_tune_svm_poly1 / nrow(OJ_train_data)
print(paste("Training Error Rate:", training_error_misclassified_svm_poly1))
## [1] "Training Error Rate: 0.1825"
pred_svm_rad_fit1_test_tune_poly1 = predict(svm_pol_degree21, OJ_test_data)
table(OJ_test_data$Purchase, pred_svm_rad_fit1_test_tune_poly1)
## pred_svm_rad_fit1_test_tune_poly1
## CH MM
## CH 153 15
## MM 45 57
misclassified_svm_rad_tune_test_poly1 <- sum(OJ_test_data$Purchase != pred_svm_rad_fit1_test_tune_poly1)
test_error_misclassified_svm_poly1 <- misclassified_svm_rad_tune_test_poly1 / nrow(OJ_test_data)
print(paste("Test Error Rate:", test_error_misclassified_svm_poly1))
## [1] "Test Error Rate: 0.222222222222222"
set.seed(1)
tune_svm_poly1 = tune(svm, Purchase ~ ., data = OJ_train_data, kernel = "poly", degree = 2, ranges = list(cost = c(0.01,0.1,1,10)))
summary(tune_svm_poly1)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.18125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39125 0.04210189
## 2 0.10 0.32125 0.05001736
## 3 1.00 0.20250 0.04116363
## 4 10.00 0.18125 0.02779513
The lowest error is at 10 cost, with dispersion(optimal cost) is 0.02779513.
svm_poly_tune_cost_optim1 = svm(Purchase ~ ., data = OJ_train_data, kernel = "poly", degree = 2, cost = tune_svm_poly1$best.parameters$cost)
train_pred_optim_cost_svm_poly1 = predict(svm_poly_tune_cost_optim1, OJ_train_data)
table(OJ_train_data$Purchase, train_pred_optim_cost_svm_poly1)
## train_pred_optim_cost_svm_poly1
## CH MM
## CH 447 38
## MM 82 233
misclassified2_train_tune_svm_poly_optim1<- sum(OJ_train_data$Purchase != train_pred_optim_cost_svm_poly1)
training_error_misclassified_svm_poly_optim1 <- misclassified2_train_tune_svm_poly_optim1 / nrow(OJ_train_data)
print(paste("Training Error Rate:", training_error_misclassified_svm_poly_optim1))
## [1] "Training Error Rate: 0.15"
pred_svm_fit1_test_tunes1 = predict(svm_poly_tune_cost_optim1, OJ_test_data)
table(OJ_test_data$Purchase, pred_svm_fit1_test_tunes1)
## pred_svm_fit1_test_tunes1
## CH MM
## CH 154 14
## MM 37 65
misclassified_svm_tune_test1 <- sum(OJ_train_data$Purchase != pred_svm_fit1_test_tunes1)
## Warning in `!=.default`(OJ_train_data$Purchase, pred_svm_fit1_test_tunes1):
## longer object length is not a multiple of shorter object length
## Warning in is.na(e1) | is.na(e2): longer object length is not a multiple of
## shorter object length
test_error_misclassified_svm_poly1 <- misclassified_svm_tune_test1 / nrow(OJ_test_data)
print(paste("Test Error Rate:", test_error_misclassified_svm_poly1))
## [1] "Test Error Rate: 1.3962962962963"
The tuned SVM-linear model performs well with this dataset.