This problem involves the OJ data set which is part of the ISLR package.
Part A: Create a training set containing a random sample of 800 observations, and a test set containing the remaining observations
set.seed(1)
train <- sample(nrow(OJ), 800)
OJ.train <- OJ[train, ]
OJ.test <- OJ[-train, ]
Part B: Fit a support vector classifier to the training data using cost=0.01, with Purchase as the response and the other variables as predictors. Use the summary() function to produce summary statistics, and describe the results obtained.
svm.linear <- svm(Purchase ~ ., data = OJ.train, kernel = "linear", cost = 0.01)
summary(svm.linear)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
Part C: What are the training and test error rates?
train.pred <- predict(svm.linear, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 420 65
## MM 75 240
(78 + 55) / (439 + 228 + 78 + 55)
## [1] 0.16625
test.pred <- predict(svm.linear, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 153 15
## MM 33 69
(31 + 18) / (141 + 80 + 31 + 18)
## [1] 0.1814815
Part D: Use the tune() function to select an optimal cost. Consider values in the range 0.01 to 10.
set.seed(2)
tune.out <- tune(svm, Purchase ~ ., data = OJ.train, kernel = "linear", ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1.778279
##
## - best performance: 0.1675
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.17625 0.04059026
## 2 0.01778279 0.17625 0.04348132
## 3 0.03162278 0.17125 0.04604120
## 4 0.05623413 0.17000 0.04005205
## 5 0.10000000 0.17125 0.04168749
## 6 0.17782794 0.17000 0.04090979
## 7 0.31622777 0.17125 0.04411554
## 8 0.56234133 0.17125 0.04084609
## 9 1.00000000 0.17000 0.04090979
## 10 1.77827941 0.16750 0.03782269
## 11 3.16227766 0.16750 0.03782269
## 12 5.62341325 0.16750 0.03545341
## 13 10.00000000 0.17000 0.03736085
Part E: Compute the training and test error rates using this new value for cost.
svm.linear <- svm(Purchase ~ ., kernel = "linear", data = OJ.train, cost = tune.out$best.parameter$cost)
train.pred <- predict(svm.linear, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 423 62
## MM 69 246
(71 + 56) / (438 + 235 + 71 + 56)
## [1] 0.15875
test.pred <- predict(svm.linear, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 156 12
## MM 29 73
(32 + 19) / (140 + 79 + 32 + 19)
## [1] 0.1888889
Part F: Repeat parts (b) through (e) using a support vector machine with a radial kernel. Use the default value for gamma.
svm.radial <- svm(Purchase ~ ., kernel = "radial", data = OJ.train)
summary(svm.radial)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 373
##
## ( 188 185 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred <- predict(svm.radial, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 441 44
## MM 77 238
(77 + 39) / (455 + 229 + 77 + 39)
## [1] 0.145
test.pred <- predict(svm.radial, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 151 17
## MM 33 69
(28 + 18) / (141 + 83 + 28 + 18)
## [1] 0.1703704
set.seed(2)
tune.out <- tune(svm, Purchase ~ ., data = OJ.train, kernel = "radial", ranges = list(cost = 10^seq(-2,
1, by = 0.25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.1725
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.39375 0.03240906
## 2 0.01778279 0.39375 0.03240906
## 3 0.03162278 0.34750 0.05552777
## 4 0.05623413 0.19250 0.03016160
## 5 0.10000000 0.19500 0.03782269
## 6 0.17782794 0.18000 0.04048319
## 7 0.31622777 0.17250 0.03809710
## 8 0.56234133 0.17500 0.04124790
## 9 1.00000000 0.17250 0.03162278
## 10 1.77827941 0.17750 0.03717451
## 11 3.16227766 0.18375 0.03438447
## 12 5.62341325 0.18500 0.03717451
## 13 10.00000000 0.18750 0.03173239
svm.radial <- svm(Purchase ~ ., kernel = "radial", data = OJ.train, cost = tune.out$best.parameter$cost)
summary(svm.radial)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "radial", cost = tune.out$best.parameter$cost)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 373
##
## ( 188 185 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred <- predict(svm.radial, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 441 44
## MM 77 238
(77 + 39) / (455 + 229 + 77 + 39)
## [1] 0.145
test.pred <- predict(svm.radial, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 151 17
## MM 33 69
(28 + 18) / (141 + 83 + 28 + 18)
## [1] 0.1703704
Part G: Repeat parts (b) through (e) using a support vector machine with a polynomial kernel. Set degree=2.
svm.poly <- svm(Purchase ~ ., kernel = "polynomial", data = OJ.train, degree = 2)
summary(svm.poly)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "polynomial",
## degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 447
##
## ( 225 222 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred <- predict(svm.poly, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 449 36
## MM 110 205
(105 + 33) / (461 + 201 + 105 + 33)
## [1] 0.1725
test.pred <- predict(svm.poly, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 153 15
## MM 45 57
(41 + 10) / (149 + 70 + 41 + 10)
## [1] 0.1888889
set.seed(2)
tune.out <- tune(svm, Purchase ~ ., data = OJ.train, kernel = "polynomial", degree = 2, ranges = list(cost = 10^seq(-2,
1, by = 0.25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 3.162278
##
## - best performance: 0.18
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.39000 0.03670453
## 2 0.01778279 0.37000 0.03395258
## 3 0.03162278 0.36375 0.03197764
## 4 0.05623413 0.34500 0.03291403
## 5 0.10000000 0.32125 0.03866254
## 6 0.17782794 0.24750 0.03322900
## 7 0.31622777 0.20250 0.04073969
## 8 0.56234133 0.20250 0.03670453
## 9 1.00000000 0.19625 0.03910900
## 10 1.77827941 0.19125 0.03586723
## 11 3.16227766 0.18000 0.04005205
## 12 5.62341325 0.18000 0.04133199
## 13 10.00000000 0.18125 0.03830162
svm.poly <- svm(Purchase ~ ., kernel = "polynomial", degree = 2, data = OJ.train, cost = tune.out$best.parameter$cost)
summary(svm.poly)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "polynomial",
## degree = 2, cost = tune.out$best.parameter$cost)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 3.162278
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 385
##
## ( 197 188 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred <- predict(svm.poly, OJ.train)
table(OJ.train$Purchase, train.pred)
## train.pred
## CH MM
## CH 451 34
## MM 90 225
(72 + 44) / (450 + 234 + 72 + 44)
## [1] 0.145
test.pred <- predict(svm.poly, OJ.test)
table(OJ.test$Purchase, test.pred)
## test.pred
## CH MM
## CH 154 14
## MM 41 61
(31 + 19) / (140 + 80 + 31 + 19)
## [1] 0.1851852
Part H: Overall, which approach seems to give the best results on this data?
Answer: Utimately, radial basis kernel seems to be producing minimum misclassification error on both train and test data.