Assignment #8
Chapter 9, SVM, 5, 7,8
set.seed(1)
x1 = runif(500) -.5
x2 = runif(500)-.5
y = 1 * (x1^2 - x2^2 > 0)
colors = c("blue", "yellow")
plot(x1, x2, col = colors[y + 1], pch = 20, xlab = "x1", ylab = "x2")
Just for fun vizualiztion (do not grade this but above):
library(plotly)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_ly(x = x1, y = x2, z = rep(0, length(x1)), color = factor(y),
colors = colors, type = "scatter3d", mode = "markers")
lm.fit = glm(y ~ x1 + x2, family = binomial)
summary(lm.fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.179 -1.139 -1.112 1.206 1.257
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260 0.089579 -0.974 0.330
## x1 0.196199 0.316864 0.619 0.536
## x2 -0.002854 0.305712 -0.009 0.993
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 691.79 on 497 degrees of freedom
## AIC: 697.79
##
## Number of Fisher Scoring iterations: 3
set.seed(1)
y_pred <- predict(lm.fit, newdata = data.frame(x1 = x1, x2 = x2), type = "response")
y_pred_class <- ifelse(y_pred > 0.5, 1, 0)
# Plot observations colored according to true class labels
plot(x1, x2, col = ifelse(y == 0, "blue", "yellow"), pch = 20, xlab = "x1", ylab = "x2")
# Add linear decision boundary to plot
x_seq <- seq(-0.5, 0.5, length.out = 100)
y_seq <- seq(-0.5, 0.5, length.out = 100)
z <- matrix(0, nrow = length(x_seq), ncol = length(y_seq))
for (i in 1:length(x_seq)) {
for (j in 1:length(y_seq)) {
z[i,j] <- predict(lm.fit, newdata = data.frame(x1 = x_seq[i], x2 = y_seq[j]), type = "response")
}
}
contour(x_seq, y_seq, z, levels = 0.5, add = TRUE)
# Add predicted class labels to plot
points(x1, x2, col = ifelse(y_pred_class == 0, "blue", "yellow"), pch = 20)
data_nl <- data.frame(x1, x2, x1sq = x1^2, x2sq = x2^2, x1x2 = x1*x2, log_x2 = log(x2), sqrt_x1 = sqrt(abs(x1)))
## Warning in log(x2): NaNs produced
lm.fit <- glm(y ~., data_nl, family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(lm.fit)
##
## Call:
## glm(formula = y ~ ., family = "binomial", data = data_nl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.847e-04 -2.100e-08 -2.100e-08 2.100e-08 2.808e-04
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -334.29 555325.21 -0.001 1.000
## x1 323.12 226513.64 0.001 0.999
## x2 -495.35 1366332.92 0.000 1.000
## x1sq 3590.71 1169539.15 0.003 0.998
## x2sq -3230.89 1483373.57 -0.002 0.998
## x1x2 -862.83 683186.93 -0.001 0.999
## log_x2 -66.07 132207.10 0.000 1.000
## sqrt_x1 639.41 479936.73 0.001 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3.4497e+02 on 249 degrees of freedom
## Residual deviance: 2.4489e-07 on 242 degrees of freedom
## (250 observations deleted due to missingness)
## AIC: 16
##
## Number of Fisher Scoring iterations: 25
# Obtain predicted class labels for each training observation
y_pred <- predict(lm.fit, type = "response")
# Convert predicted probabilities to class labels
y_pred_class <- ifelse(y_pred > 0.5, 1, 0)
# Plot observations, colored according to predicted class labels
plot(x1, x2, col = ifelse(y_pred_class == 1, "yellow", "blue"), pch = 20, xlab = "x1", ylab = "x2")
Welp…this is obv not linear
# Fit a support vector classifier to the data with X1 and X2 as predictors
library(e1071)
# Fit support vector classifier
svm.fit <- svm(as.matrix(cbind(x1, x2)), y)
# Obtain class prediction for each training observation
y_pred <- predict(svm.fit, as.matrix(cbind(x1, x2)))
# Plot observations colored according to predicted class labels
plot(x1, x2, col = ifelse(y_pred == 0, "blue", "yellow"), pch = 20, xlab = "x1", ylab = "x2")
This is confusing, but I think that the kernel failed to find a clear linear or nonlinear separation between the two classes and decided everything was just yellow mellow.
# Fit SVM with different kernels
svm_poly <- svm(y ~ x1 + x2, kernel = "polynomial", degree = 2)
svm_radial <- svm(y ~ x1 + x2, kernel = "radial", gamma = 1)
svm_sigmoid <- svm(y ~ x1 + x2, kernel = "sigmoid")
svm_rbf <- svm(as.factor(y) ~ x1 + x2, kernel = "radial", gamma = 10)
# Make predictions using SVM with different kernels
y_pred_poly <- predict(svm_poly, newdata = data.frame(x1 = x1, x2 = x2))
y_pred_radial <- predict(svm_radial, newdata = data.frame(x1 = x1, x2 = x2))
y_pred_sigmoid <- predict(svm_sigmoid, newdata = data.frame(x1 = x1, x2 = x2))
y_pred_rbf <- predict(svm_rbf, newdata = data.frame(x1 = x1, x2 = x2))
# Plot observations colored by predicted class labels using different kernels
par(mfrow = c(2,2))
plot(x1, x2, col = ifelse(y_pred_poly == 1, "yellow", "blue"), pch = 20, xlab = "x1", ylab = "x2", main = "Polynomial kernel")
plot(x1, x2, col = ifelse(y_pred_radial == 1, "yellow", "blue"), pch = 20, xlab = "x1", ylab = "x2", main = "Radial kernel")
plot(x1, x2, col = ifelse(y_pred_sigmoid == 1, "yellow", "blue"), pch = 20, xlab = "x1", ylab = "x2", main = "Sigmoid kernel")
svm.pred = predict(svm_rbf, newdata = data.frame(x1 = x1, x2 = x2))
data.pos = data.frame(x1 = x1, x2 = x2)[svm.pred == 1, ]
data.neg = data.frame(x1 = x1, x2 = x2)[svm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "yellow", xlab = "X1", ylab = "X2", pch = 20, main = "RBF Kernel")
points(data.neg$x1, data.neg$x2, col = "blue", pch = 20)
SVM is unable to find a good decision boundary that separates the two
classes, UNTIL we come accross the RBF (gaussian kernel) kernel. RBF is
more effective than the SVMs ;using other non linear kernels. This might
be because the RBF kernel is more appropriate for this data set as it
capture the non-linear relationship between the predictors, and the
response variable more accurately.
It is also important to note that the SVM using a linear kernel is not able to seperate the 2 classes (which is not surprising bc it is not linear). Also, logistic reg with non interactions, fail to find a decision boundary.
Adding an interaction term to the logistic model appears to have the same power as a non linear kernel.
#7) In this problem, you will use support vector approaches in order to predict whether a given car gets high or low gas mileage based on the Auto data set
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.2.3
library(e1071)
temp = median(Auto$mpg)
med = ifelse(Auto$mpg > temp, 1,0)
Auto$mpglevel = as.factor(med)
set.seed(1)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01,
0.1, 1, 5, 10, 100)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.07653846 0.03617137
## 2 1e-01 0.04596154 0.03378238
## 3 1e+00 0.01025641 0.01792836
## 4 5e+00 0.02051282 0.02648194
## 5 1e+01 0.02051282 0.02648194
## 6 1e+02 0.03076923 0.03151981
The lowest Cross Validation error appears at a cost of 1
set.seed(1)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1,
1, 5, 10), degree = c(2, 3, 4)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 2
##
## - best performance: 0.5130128
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 2 0.5511538 0.04366593
## 2 1.0 2 0.5511538 0.04366593
## 3 5.0 2 0.5511538 0.04366593
## 4 10.0 2 0.5130128 0.08963366
## 5 0.1 3 0.5511538 0.04366593
## 6 1.0 3 0.5511538 0.04366593
## 7 5.0 3 0.5511538 0.04366593
## 8 10.0 3 0.5511538 0.04366593
## 9 0.1 4 0.5511538 0.04366593
## 10 1.0 4 0.5511538 0.04366593
## 11 5.0 4 0.5511538 0.04366593
## 12 10.0 4 0.5511538 0.04366593
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.1,
1, 5, 10), gamma = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.01
##
## - best performance: 0.02294872
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 1e-02 0.08653846 0.06488131
## 2 1.0 1e-02 0.07134615 0.04769894
## 3 5.0 1e-02 0.04846154 0.03905899
## 4 10.0 1e-02 0.02294872 0.02534336
## 5 0.1 1e-01 0.07891026 0.05147085
## 6 1.0 1e-01 0.05096154 0.03995812
## 7 5.0 1e-01 0.02301282 0.03069264
## 8 10.0 1e-01 0.02294872 0.02807826
## 9 0.1 1e+00 0.60192308 0.06346118
## 10 1.0 1e+00 0.06365385 0.04845299
## 11 5.0 1e+00 0.06121795 0.04387918
## 12 10.0 1e+00 0.06121795 0.04387918
## 13 0.1 5e+00 0.60192308 0.06346118
## 14 1.0 5e+00 0.52814103 0.08413728
## 15 5.0 5e+00 0.52814103 0.08238251
## 16 10.0 5e+00 0.52814103 0.08238251
## 17 0.1 1e+01 0.60192308 0.06346118
## 18 1.0 1e+01 0.55615385 0.07526477
## 19 5.0 1e+01 0.55358974 0.07343728
## 20 10.0 1e+01 0.55358974 0.07343728
## 21 0.1 1e+02 0.60192308 0.06346118
## 22 1.0 1e+02 0.60192308 0.06346118
## 23 5.0 1e+02 0.60192308 0.06346118
## 24 10.0 1e+02 0.60192308 0.06346118
With the SVM, Polynomial kernel, the lowest Cross Validation error appears at a cost of 10, and a degree of 2
With the SVM, Radial Kernel, the lowest Cross Validation error appears at a cost of 10, and a gamma of .01
svm.linear = svm(mpglevel ~ ., data = Auto, kernel = "linear", cost = 1)
svm.poly = svm(mpglevel ~ ., data = Auto, kernel = "polynomial", cost = 10,
degree = 2)
svm.radial = svm(mpglevel ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.01)
plotpairs = function(fit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpglevel", "name"))]) {
plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
}
}
# Linear kernel
plotpairs(svm.linear)
# Polynomial kernel
plotpairs(svm.poly)
# Radial kernel
plotpairs(svm.radial)
These plots show the decision boundary at each of the predictor variable against the response variable that we created (Mpg, 1 or 0)
The colors red (idk i am color blind), represent a high level of MGP, while the yellowish represents low levels of MPG.
From these plots, they demonstrate that the linear kernel model has a simple boundary, whereas the polynomial and radial kernels have more complex linear decisons that can caputre the non-linear relationship between the pred and response variable
library(ISLR)
set.seed(1)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Default = as.data.frame(OJ)
Default$id <- 1:nrow(Default)
#use 74.76635514% of dataset as training set and the rest as the test set
train <- Default %>% dplyr::sample_frac(0.7476635514)
test <- dplyr::anti_join(Default, train, by = 'id')
dim(train)
## [1] 800 19
svm.linear = svm(Purchase~., kernel = "linear", data = train, cost = .01)
summary(svm.linear)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 433
##
## ( 218 215 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
The SVM model usess a linear kernel function, thus using a linear hyperplane in the feature space for a decision boundary
Cost = .01, which means that there is some missclassifcation errors in order to have a wider margin between the decision and SV
There are 433 Support Vectors, with 218 belonging to CH, and 215 to MM. These SV are teh points that are the closes to the SVM decision boundary
We are using the SVM as a binary classifier with 2 classes: CH, and MM with the response variable “Purchase”
train_pred = predict(svm.linear, train)
table(train$Purchase, train_pred)
## train_pred
## CH MM
## CH 422 63
## MM 76 239
errror_rate = 63+76
training_error_rate = errror_rate/nrow(train)
training_error_rate
## [1] 0.17375
the training error rate is approximately 0.174, or 17.4%. This means that the SVM classifier misclassifies about 17.4% of the observations in the training dataset.
test_pred = predict(svm.linear, test)
table(test$Purchase, test_pred)
## test_pred
## CH MM
## CH 151 17
## MM 35 67
errror_rate = 35+17
training_error_rate = errror_rate/nrow(test)
training_error_rate
## [1] 0.1925926
The test error rate is approximately 0.195, or 19.5%. This means that the SVM classifier misclassifies about 19.5% of the observations in the test dataset.
set.seed(1)
tune.out = tune(svm, Purchase ~., data = train, kernel = "linear", ranges = list(cost = 10^seq(-2, 1, by = .25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1778279
##
## - best performance: 0.1725
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.17500 0.03435921
## 2 0.01778279 0.18250 0.03016160
## 3 0.03162278 0.17750 0.03162278
## 4 0.05623413 0.17500 0.02946278
## 5 0.10000000 0.17375 0.02972676
## 6 0.17782794 0.17250 0.02687419
## 7 0.31622777 0.17375 0.02664713
## 8 0.56234133 0.17500 0.02568506
## 9 1.00000000 0.17500 0.02500000
## 10 1.77827941 0.17375 0.02853482
## 11 3.16227766 0.17250 0.02554952
## 12 5.62341325 0.17750 0.02687419
## 13 10.00000000 0.18125 0.03019037
tune.out$best.performance
## [1] 0.1725
tune.out$best.parameters
## cost
## 6 0.1778279
The optimal cost is 0.1778279
svm.linear = svm(Purchase~., kernel = "linear", data = train, cost = tune.out$best.parameters$cost)
train_pred = predict(svm.linear, train)
table(train$Purchase, train_pred)
## train_pred
## CH MM
## CH 423 62
## MM 70 245
errror_rate = 62+70
training_error_rate = errror_rate/nrow(train)
training_error_rate
## [1] 0.165
test_pred = predict(svm.linear, test)
table(test$Purchase, test_pred)
## test_pred
## CH MM
## CH 154 14
## MM 31 71
errror_rate = 31+14
training_error_rate = errror_rate/nrow(train)
training_error_rate
## [1] 0.05625
The train error rate with a cost of 0.1778279 is \[.165\] which is lower than our cost of .01 (that error rate was 0.17375).
The test error rate with a cost of 0.1778279 is \[0.05625\] which is lower than our cost of .01 (that error rate was 0.1925926).
set.seed(1)
svm.radial = svm(Purchase ~ ., data = train, kernel = "radial")
summary(svm.radial)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 375
##
## ( 188 187 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
The SVM model usess a radial kernel function, thus using a radial hyperplane in the feature space for a decision boundary
Cost = .01, which means that there is some missclassifcation errors in order to have a wider margin between the decision and SV
There are 375 Support Vectors, with 188 belonging to CH, and 187 to MM. These SV are teh points that are the closes to the SVM decision boundary
We are using the SVM as a binary classifier with 2 classes: CH, and MM with the response variable “Purchase”
train_pred = predict(svm.radial, train)
table(train$Purchase, train_pred)
## train_pred
## CH MM
## CH 439 46
## MM 71 244
errror_rate = 71 + 46
training_error_rate = errror_rate/nrow(train)
training_error_rate
## [1] 0.14625
the training error rate is approximately .14625, or 14.6%. This means that the SVM classifier misclassifies about 14.6% of the observations in the training dataset.
test_pred = predict(svm.radial, test)
table(test$Purchase, test_pred)
## test_pred
## CH MM
## CH 153 15
## MM 34 68
errror_rate = 31+14
training_error_rate = errror_rate/nrow(test)
training_error_rate
## [1] 0.1666667
the training error rate is approximately 0.1666667, or 17%. This means that the SVM classifier misclassifies about 17% of the observations in the training dataset.
set.seed(1)
tune.out = tune(svm, Purchase ~., data = train, kernel = "radial", ranges = list(cost = 10^seq(-2, 1, by = .25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.5623413
##
## - best performance: 0.17
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.39375 0.04007372
## 2 0.01778279 0.39375 0.04007372
## 3 0.03162278 0.36875 0.04973890
## 4 0.05623413 0.19875 0.02853482
## 5 0.10000000 0.19125 0.02949223
## 6 0.17782794 0.18000 0.02713137
## 7 0.31622777 0.17625 0.02316157
## 8 0.56234133 0.17000 0.02581989
## 9 1.00000000 0.17875 0.02360703
## 10 1.77827941 0.18250 0.02958040
## 11 3.16227766 0.18375 0.02703521
## 12 5.62341325 0.19000 0.02813657
## 13 10.00000000 0.19500 0.03184162
tune.out$best.performance
## [1] 0.17
tune.out$best.parameters
## cost
## 8 0.5623413
The optimal cost is 0.5623413
svm.radial = svm(Purchase~., kernel = "radial", data = train, cost = tune.out$best.parameters$cost)
train_pred = predict(svm.radial, train)
table(train$Purchase, train_pred)
## train_pred
## CH MM
## CH 437 48
## MM 69 246
errror_rate = 69 +48
training_error_rate = errror_rate/nrow(train)
training_error_rate
## [1] 0.14625
test_pred = predict(svm.radial, test)
table(test$Purchase, test_pred)
## test_pred
## CH MM
## CH 152 16
## MM 34 68
errror_rate = 34+16
training_error_rate = errror_rate/nrow(train)
training_error_rate
## [1] 0.0625
The train error rate for a radial kernel SVM with a cost of 0.5623413 is \[0.14625\] which is the same than our cost of .01 (that error rate was 0.14625).
The test error rate with a cost of 0.5623413 is \[0.0625\] which is lower than our cost of .01 (that error rate was 0.1666667).
I know this is a lot for the grader, cheer up, and click here
set.seed(1)
svm.poly = svm(Purchase ~ ., data = train, kernel = "poly", degree = 2)
summary(svm.poly)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "poly", degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 445
##
## ( 226 219 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
The SVM model usess a radial kernel function, thus using a polynomial hyperplane in the feature space for a decision boundary
The polynomial kernel of degree 2 means that the decision boundary is a quadratic curve in the feature space.
The model has been trained with a cost parameter of 1 and an intercept term (coef.0) of 0. The number of support vectors is 445, with 226 belonging to class CH and 219 belonging to class MM.
We are using the SVM as a binary classifier with 2 classes: CH, and MM with the response variable “Purchase”
train.pred = predict(svm.poly, train)
table(train$Purchase, train.pred)
## train.pred
## CH MM
## CH 451 34
## MM 110 205
errror_rate = 110 + 34
training_error_rate = errror_rate/nrow(train)
training_error_rate
## [1] 0.18
the training error rate is approximately .18, or 18%. This means that the SVM classifier misclassifies about 18% of the observations in the training dataset.
test.pred = predict(svm.poly, test)
table(test$Purchase, test.pred)
## test.pred
## CH MM
## CH 152 16
## MM 45 57
errror_rate = 45 + 16
training_error_rate = errror_rate/nrow(test)
training_error_rate
## [1] 0.2259259
the training error rate is approximately 0.2259259, or 23%. This means that the SVM classifier misclassifies about 23% of the observations in the training dataset.
set.seed(1)
tune.out = tune(svm, Purchase ~ ., data = train, kernel = "poly", degree = 2,
ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 3.162278
##
## - best performance: 0.1825
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.39375 0.04007372
## 2 0.01778279 0.37125 0.03537988
## 3 0.03162278 0.36375 0.03606033
## 4 0.05623413 0.34125 0.04604120
## 5 0.10000000 0.32875 0.05337563
## 6 0.17782794 0.24750 0.04816061
## 7 0.31622777 0.20500 0.03736085
## 8 0.56234133 0.19375 0.03186887
## 9 1.00000000 0.20000 0.03679900
## 10 1.77827941 0.18875 0.03557562
## 11 3.16227766 0.18250 0.02898755
## 12 5.62341325 0.19125 0.03283481
## 13 10.00000000 0.18875 0.03197764
tune.out$best.performance
## [1] 0.1825
tune.out$best.parameters
## cost
## 11 3.162278
The best cost is 3.162278
svm.poly = svm(Purchase ~ ., data = train, kernel = "poly", degree = 2, cost = tune.out$best.parameters$cost)
train.pred = predict(svm.poly, train)
table(train$Purchase, train.pred)
## train.pred
## CH MM
## CH 452 33
## MM 90 225
errror_rate = 90 + 33
training_error_rate = errror_rate/nrow(train)
training_error_rate
## [1] 0.15375
test.pred = predict(svm.poly, test)
table(test$Purchase, test.pred)
## test.pred
## CH MM
## CH 154 14
## MM 43 59
errror_rate = 43 + 14
training_error_rate = errror_rate/nrow(train)
training_error_rate
## [1] 0.07125
The train error rate for a gamma kernel SVM with a cost of 3.162278 is \[0.15375\] which is the same than our cost of .01 (that error rate was 0.18).
The train error rate for a gamma kernel SVM with a cost of 3.162278 is \[0.07125\] which is lower than our cost of .01 (that error rate was 0.2259259).
To review:
The train error rate for a linear SVM with a cost of 0.1778279 is \[.165\] which is lower than our cost of .01 (that error rate was 0.17375).
The test error rate for a linear SVM with a cost of 0.1778279 is \[0.05625\] which is lower than our cost of .01 (that error rate was 0.1925926).
The train error rate for a radial kernel SVM with a cost of 0.5623413 is \[0.14625\] which is the same than our cost of .01 (that error rate was 0.14625).
The test error rate with a radial kernel with cost of 0.5623413 is \[0.0625\] which is lower than our cost of .01 (that error rate was 0.1666667).
The train error rate for a gamma kernel SVM with a cost of 3.162278 is \[0.15375\] which is the same than our cost of .01 (that error rate was 0.18).
The train error rate for a gamma kernel SVM with a cost of 3.162278 is \[0.07125\] which is lower than our cost of .01 (that error rate was 0.2259259).
For us, our lowest error rate with the most optimal cost function is the SVM linear with a cost of 0.1778279, and an error rate of 0.05625