library(ggplot2)
library(e1071)
library(ISLR2)
library(caret)
## Loading required package: lattice
We have seen that we can fit an SVM with a non-linear kernel in order to perform classification using a non-linear decision boundary. We will now see that we can also obtain a non-linear decision boundary by performing logistic regression using non-linear transformations of the features.
set.seed(42)
x1 <- runif (500) - 0.5
x2 <- runif (500) - 0.5
y <- 1 * (x1^2 - x2^2 > 0)
plot(x1[y == 0], x2[y == 0], col = "red", xlab = "X1", ylab = "X2")
points(x1[y == 1], x2[y == 1], col = "blue")
logit.fit <- glm(y ~ x1 + x2, family = "binomial")
summary(logit.fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = "binomial")
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.09978 0.08976 1.112 0.266
## x1 -0.17659 0.30658 -0.576 0.565
## x2 -0.20067 0.30978 -0.648 0.517
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 691.79 on 499 degrees of freedom
## Residual deviance: 691.08 on 497 degrees of freedom
## AIC: 697.08
##
## Number of Fisher Scoring iterations: 3
df=data.frame(x1 = x1, x2 = x2, y = as.factor(y))
logit.prob=predict(logit.fit,newdata=df,type='response')
logit.pred=ifelse(logit.prob>0.5,1,0)
plot(x1, x2, col = c('blue', 'red')[logit.pred+1])
logit.fit2 <- glm(y ~ poly(x1,2) + poly(x2,2), family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(logit.fit2)
##
## Call:
## glm(formula = y ~ poly(x1, 2) + poly(x2, 2), family = "binomial")
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 241.2 2464.3 0.098 0.922
## poly(x1, 2)1 -1560.4 43432.8 -0.036 0.971
## poly(x1, 2)2 150754.9 1452847.9 0.104 0.917
## poly(x2, 2)1 3829.4 54613.8 0.070 0.944
## poly(x2, 2)2 -145721.8 1403130.1 -0.104 0.917
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9179e+02 on 499 degrees of freedom
## Residual deviance: 6.5043e-05 on 495 degrees of freedom
## AIC: 10
##
## Number of Fisher Scoring iterations: 25
logit.prob=predict(logit.fit2,newdata=df,type='response')
logit.pred=ifelse(logit.prob>0.5,1,0)
plot(x1, x2, col = c('blue', 'red')[logit.pred+1])
svm.fit <- svm(y ~ x1 + x2, df, kernal = 'linear', cost = .01)
svm.pred = predict(svm.fit, df)
data.pos = df[svm.pred == 1, ]
data.neg = df[svm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "blue", xlab = "X1", ylab = "X2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "red", pch = 4)
svm.fit <- svm(y ~ x1 + x2, df, kernel = 'radial', gamma=1)
svm.pred = predict(svm.fit, df)
data.pos = df[svm.pred == 1, ]
data.neg = df[svm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "blue", xlab = "X1", ylab = "X2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "red", pch = 4)
The Support Vector Classifier from part (g) was unable to detect any linear boundaries in the data, but in part (h), the SVM was able to detect the non-linear boundary present in the data.
In this problem, you will use support vector approaches in order to predict whether a given car gets high or low gas mileage based on the Auto data set.
auto.df<-Auto
auto.df<-na.omit(auto.df)
auto.df$MPG_Abv_Med <- as.factor(ifelse(auto.df$mpg > median(auto.df$mpg),1,0))
head(auto.df)
## mpg cylinders displacement horsepower weight acceleration year origin
## 1 18 8 307 130 3504 12.0 70 1
## 2 15 8 350 165 3693 11.5 70 1
## 3 18 8 318 150 3436 11.0 70 1
## 4 16 8 304 150 3433 12.0 70 1
## 5 17 8 302 140 3449 10.5 70 1
## 6 15 8 429 198 4341 10.0 70 1
## name MPG_Abv_Med
## 1 chevrolet chevelle malibu 0
## 2 buick skylark 320 0
## 3 plymouth satellite 0
## 4 amc rebel sst 0
## 5 ford torino 0
## 6 ford galaxie 500 0
set.seed(42)
tune.fit <- tune(svm,MPG_Abv_Med ~ . - mpg ,data=auto.df,kernel="linear", ranges=list(cost=c(0.001, 0.01, 0.1, 1,5,10,100)))
summary(tune.fit)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.01
##
## - best performance: 0.08916667
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-03 0.12775641 0.06746999
## 2 1e-02 0.08916667 0.05258186
## 3 1e-01 0.09160256 0.05869690
## 4 1e+00 0.09173077 0.04357345
## 5 5e+00 0.10942308 0.04734731
## 6 1e+01 0.11705128 0.05314992
## 7 1e+02 0.12993590 0.05797340
The results above show that the best value for the cost parameter is .01 as it give the least error of 0.08916667.
set.seed(42)
tune.fit2 <- tune(svm,MPG_Abv_Med ~ . - mpg ,data=auto.df,kernel="radial", ranges=list(cost=c(0.001, 0.01, 0.1, 1,5,10,100), gamma = c(0.01, 0.1, 1)))
summary(tune.fit2)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 1 1
##
## - best performance: 0.07891026
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-03 0.01 0.59679487 0.05312225
## 2 1e-02 0.01 0.59679487 0.05312225
## 3 1e-01 0.01 0.11493590 0.05837054
## 4 1e+00 0.01 0.08660256 0.05519479
## 5 5e+00 0.01 0.09166667 0.05754492
## 6 1e+01 0.01 0.08660256 0.06001684
## 7 1e+02 0.01 0.10442308 0.05805399
## 8 1e-03 0.10 0.59679487 0.05312225
## 9 1e-02 0.10 0.29326923 0.08606908
## 10 1e-01 0.10 0.08666667 0.05390785
## 11 1e+00 0.10 0.08660256 0.05519479
## 12 5e+00 0.10 0.08647436 0.03940028
## 13 1e+01 0.10 0.08647436 0.04445518
## 14 1e+02 0.10 0.10173077 0.04696584
## 15 1e-03 1.00 0.59679487 0.05312225
## 16 1e-02 1.00 0.59679487 0.05312225
## 17 1e-01 1.00 0.59679487 0.05312225
## 18 1e+00 1.00 0.07891026 0.03633038
## 19 5e+00 1.00 0.08910256 0.04132724
## 20 1e+01 1.00 0.08910256 0.04132724
## 21 1e+02 1.00 0.08910256 0.04132724
Looking at the results above it can be seen that the best value for cost is 1, and the best value for gamma is 1, as that combination of those parameters result in an error of 0.07891026. This is using a radial based kernel.
set.seed(42)
tune.fit3 <- tune(svm,MPG_Abv_Med ~ . - mpg ,data=auto.df, kernel="polynomial", ranges=list(cost=c(0.001, 0.01, 0.1, 1,5,10,100), gamma = c(0.01, 0.1, 1), degree = c(2, 3, 4)))
summary(tune.fit3)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma degree
## 100 0.1 3
##
## - best performance: 0.08641026
##
## - Detailed performance results:
## cost gamma degree error dispersion
## 1 1e-03 0.01 2 0.59679487 0.05312225
## 2 1e-02 0.01 2 0.59679487 0.05312225
## 3 1e-01 0.01 2 0.59679487 0.05312225
## 4 1e+00 0.01 2 0.58653846 0.05488085
## 5 5e+00 0.01 2 0.39064103 0.10386602
## 6 1e+01 0.01 2 0.31698718 0.09209637
## 7 1e+02 0.01 2 0.28083333 0.06885413
## 8 1e-03 0.10 2 0.59679487 0.05312225
## 9 1e-02 0.10 2 0.58653846 0.05488085
## 10 1e-01 0.10 2 0.31698718 0.09209637
## 11 1e+00 0.10 2 0.28083333 0.06885413
## 12 5e+00 0.10 2 0.16775641 0.10359086
## 13 1e+01 0.10 2 0.17294872 0.08470264
## 14 1e+02 0.10 2 0.19839744 0.07771274
## 15 1e-03 1.00 2 0.31698718 0.09209637
## 16 1e-02 1.00 2 0.28083333 0.06885413
## 17 1e-01 1.00 2 0.17294872 0.08470264
## 18 1e+00 1.00 2 0.19839744 0.07771274
## 19 5e+00 1.00 2 0.21121795 0.08538310
## 20 1e+01 1.00 2 0.22391026 0.09572557
## 21 1e+02 1.00 2 0.25717949 0.09485533
## 22 1e-03 0.01 3 0.59679487 0.05312225
## 23 1e-02 0.01 3 0.59679487 0.05312225
## 24 1e-01 0.01 3 0.59679487 0.05312225
## 25 1e+00 0.01 3 0.59679487 0.05312225
## 26 5e+00 0.01 3 0.36064103 0.14593226
## 27 1e+01 0.01 3 0.30166667 0.10288776
## 28 1e+02 0.01 3 0.25301282 0.07977961
## 29 1e-03 0.10 3 0.59679487 0.05312225
## 30 1e-02 0.10 3 0.30166667 0.10288776
## 31 1e-01 0.10 3 0.25301282 0.07977961
## 32 1e+00 0.10 3 0.09679487 0.05029680
## 33 5e+00 0.10 3 0.09929487 0.05393973
## 34 1e+01 0.10 3 0.09166667 0.05101515
## 35 1e+02 0.10 3 0.08641026 0.05301971
## 36 1e-03 1.00 3 0.09679487 0.05029680
## 37 1e-02 1.00 3 0.09166667 0.05101515
## 38 1e-01 1.00 3 0.08641026 0.05301971
## 39 1e+00 1.00 3 0.10179487 0.04394941
## 40 5e+00 1.00 3 0.10442308 0.03816415
## 41 1e+01 1.00 3 0.10442308 0.03816415
## 42 1e+02 1.00 3 0.10442308 0.03816415
## 43 1e-03 0.01 4 0.59679487 0.05312225
## 44 1e-02 0.01 4 0.59679487 0.05312225
## 45 1e-01 0.01 4 0.59679487 0.05312225
## 46 1e+00 0.01 4 0.59679487 0.05312225
## 47 5e+00 0.01 4 0.59679487 0.05312225
## 48 1e+01 0.01 4 0.59679487 0.05312225
## 49 1e+02 0.01 4 0.41666667 0.13285341
## 50 1e-03 0.10 4 0.59679487 0.05312225
## 51 1e-02 0.10 4 0.41666667 0.13285341
## 52 1e-01 0.10 4 0.34256410 0.11428217
## 53 1e+00 0.10 4 0.27314103 0.06981031
## 54 5e+00 0.10 4 0.19602564 0.07480024
## 55 1e+01 0.10 4 0.21929487 0.08090842
## 56 1e+02 0.10 4 0.20416667 0.05433005
## 57 1e-03 1.00 4 0.21929487 0.08090842
## 58 1e-02 1.00 4 0.20416667 0.05433005
## 59 1e-01 1.00 4 0.21410256 0.06210638
## 60 1e+00 1.00 4 0.22166667 0.07114891
## 61 5e+00 1.00 4 0.22166667 0.07114891
## 62 1e+01 1.00 4 0.22166667 0.07114891
## 63 1e+02 1.00 4 0.22166667 0.07114891
Looking at the results above it can be seen that the best value for cost is 100, the best value for gamma is .1, and the best value for degree is 3 as that combination of those parameters result in an error of 0.08641026 This is using a polynomial based kernel.
auto.df.subset<- auto.df[, c("horsepower", "weight", "MPG_Abv_Med")]
svc.fit<- svm(MPG_Abv_Med ~ ., data=auto.df.subset, kernel="linear", cost = .01)
svm.fit.rad<- svm(MPG_Abv_Med ~ ., data=auto.df.subset, kernel="radial", cost = 1, gamma = 1)
svm.fit.poly<- svm(MPG_Abv_Med ~ ., data=auto.df.subset, kernel="polynomial", cost = 100, gamma = .1, degree = 3)
plot(svc.fit, data = auto.df.subset)
plot(svm.fit.rad, data = auto.df.subset)
plot(svm.fit.poly, data = auto.df.subset)
oj.df = OJ
set.seed(42)
oj_train_index <- sample(1:nrow(oj.df), 800)
oj_train_data <- oj.df[oj_train_index, ]
oj_test_data <- oj.df[-oj_train_index, ]
svc.fit<- svm(Purchase ~ ., data=oj_train_data, kernel="linear", cost = .01)
summary(svc.fit)
##
## Call:
## svm(formula = Purchase ~ ., data = oj_train_data, kernel = "linear",
## cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 432
##
## ( 215 217 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
The support vector classifier resulted in a total of 432 vectors. 215
vectors belong to the Level CH, and 217 vectors belong to
the Level MM.
# Confusion Matrix for prediction on train data
oj.preds = predict(svc.fit, oj_train_data)
confusionMatrix(oj.preds, oj_train_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 432 77
## MM 60 231
##
## Accuracy : 0.8288
## 95% CI : (0.8008, 0.8542)
## No Information Rate : 0.615
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6346
##
## Mcnemar's Test P-Value : 0.1716
##
## Sensitivity : 0.7500
## Specificity : 0.8780
## Pos Pred Value : 0.7938
## Neg Pred Value : 0.8487
## Prevalence : 0.3850
## Detection Rate : 0.2888
## Detection Prevalence : 0.3638
## Balanced Accuracy : 0.8140
##
## 'Positive' Class : MM
##
# Confusion Matrix for prediction on test data
oj.preds = predict(svc.fit, oj_test_data)
confusionMatrix(oj.preds, oj_test_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 142 25
## MM 19 84
##
## Accuracy : 0.837
## 95% CI : (0.7875, 0.879)
## No Information Rate : 0.5963
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6585
##
## Mcnemar's Test P-Value : 0.451
##
## Sensitivity : 0.7706
## Specificity : 0.8820
## Pos Pred Value : 0.8155
## Neg Pred Value : 0.8503
## Prevalence : 0.4037
## Detection Rate : 0.3111
## Detection Prevalence : 0.3815
## Balanced Accuracy : 0.8263
##
## 'Positive' Class : MM
##
The train error is 0.1712, and the test error is 0.163.
set.seed(42)
tune.fit <- tune(svm,Purchase ~ ., data=oj_train_data, kernel="linear", ranges=list(cost=c(0.01, 0.1, 1,5,10)))
summary(tune.fit)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.175
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17750 0.02415229
## 2 0.10 0.17625 0.03356689
## 3 1.00 0.17500 0.02886751
## 4 5.00 0.18375 0.02703521
## 5 10.00 0.18625 0.02729087
best.svc<- svm(Purchase ~ ., data=oj_train_data, kernel="linear", cost = 1)
# Confusion Matrix for prediction on train data
oj.preds = predict(best.svc, oj_train_data)
confusionMatrix(oj.preds, oj_train_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 434 76
## MM 58 232
##
## Accuracy : 0.8325
## 95% CI : (0.8048, 0.8577)
## No Information Rate : 0.615
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6424
##
## Mcnemar's Test P-Value : 0.1419
##
## Sensitivity : 0.7532
## Specificity : 0.8821
## Pos Pred Value : 0.8000
## Neg Pred Value : 0.8510
## Prevalence : 0.3850
## Detection Rate : 0.2900
## Detection Prevalence : 0.3625
## Balanced Accuracy : 0.8177
##
## 'Positive' Class : MM
##
# Confusion Matrix for prediction on test data
oj.preds = predict(best.svc, oj_test_data)
confusionMatrix(oj.preds, oj_test_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 140 23
## MM 21 86
##
## Accuracy : 0.837
## 95% CI : (0.7875, 0.879)
## No Information Rate : 0.5963
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6605
##
## Mcnemar's Test P-Value : 0.8802
##
## Sensitivity : 0.7890
## Specificity : 0.8696
## Pos Pred Value : 0.8037
## Neg Pred Value : 0.8589
## Prevalence : 0.4037
## Detection Rate : 0.3185
## Detection Prevalence : 0.3963
## Balanced Accuracy : 0.8293
##
## 'Positive' Class : MM
##
The train error is 0.1675, and the test error is 0.163. The training error got slightly better.
svm.fit.rad<- svm(Purchase ~ ., data=oj_train_data, kernel="radial", cost = .01)
summary(svm.fit.rad)
##
## Call:
## svm(formula = Purchase ~ ., data = oj_train_data, kernel = "radial",
## cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 621
##
## ( 308 313 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
The support vector classifier resulted in a total of 621 vectors. 308 vectors belong to the Level CH, and 313 vectors belong to the Level MM.
# Confusion Matrix for prediction on train data
oj.preds = predict(svm.fit.rad, oj_train_data)
confusionMatrix(oj.preds, oj_train_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 492 308
## MM 0 0
##
## Accuracy : 0.615
## 95% CI : (0.5803, 0.6489)
## No Information Rate : 0.615
## P-Value [Acc > NIR] : 0.5156
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.000
## Specificity : 1.000
## Pos Pred Value : NaN
## Neg Pred Value : 0.615
## Prevalence : 0.385
## Detection Rate : 0.000
## Detection Prevalence : 0.000
## Balanced Accuracy : 0.500
##
## 'Positive' Class : MM
##
# Confusion Matrix for prediction on test data
oj.preds = predict(svm.fit.rad, oj_test_data)
confusionMatrix(oj.preds, oj_test_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 161 109
## MM 0 0
##
## Accuracy : 0.5963
## 95% CI : (0.5351, 0.6553)
## No Information Rate : 0.5963
## P-Value [Acc > NIR] : 0.5263
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.5963
## Prevalence : 0.4037
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : MM
##
The train error is 0.385, and the test error is 0.4037.
set.seed(42)
tune.fit <- tune(svm,Purchase ~ ., data=oj_train_data, kernel="radial", ranges=list(cost=c(0.01, 0.1, 1,5,10)))
summary(tune.fit)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.18
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.38500 0.04199868
## 2 0.10 0.18125 0.03784563
## 3 1.00 0.18000 0.03343734
## 4 5.00 0.18625 0.03701070
## 5 10.00 0.19375 0.03738408
best.svm<- svm(Purchase ~ ., data=oj_train_data, kernel="radial", cost = 1)
# Confusion Matrix for prediction on train data
oj.preds = predict(best.svm, oj_train_data)
confusionMatrix(oj.preds, oj_train_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 453 81
## MM 39 227
##
## Accuracy : 0.85
## 95% CI : (0.8233, 0.874)
## No Information Rate : 0.615
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.675
##
## Mcnemar's Test P-Value : 0.000182
##
## Sensitivity : 0.7370
## Specificity : 0.9207
## Pos Pred Value : 0.8534
## Neg Pred Value : 0.8483
## Prevalence : 0.3850
## Detection Rate : 0.2838
## Detection Prevalence : 0.3325
## Balanced Accuracy : 0.8289
##
## 'Positive' Class : MM
##
# Confusion Matrix for prediction on test data
oj.preds = predict(best.svm, oj_test_data)
confusionMatrix(oj.preds, oj_test_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 146 28
## MM 15 81
##
## Accuracy : 0.8407
## 95% CI : (0.7915, 0.8823)
## No Information Rate : 0.5963
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.6627
##
## Mcnemar's Test P-Value : 0.06725
##
## Sensitivity : 0.7431
## Specificity : 0.9068
## Pos Pred Value : 0.8438
## Neg Pred Value : 0.8391
## Prevalence : 0.4037
## Detection Rate : 0.3000
## Detection Prevalence : 0.3556
## Balanced Accuracy : 0.8250
##
## 'Positive' Class : MM
##
The train error is 0.15, and the test error is 0.1593. Both errors got marginally better than before.
svm.fit.poly<- svm(Purchase ~ ., data=oj_train_data, kernel="polynomial", cost = .01, degree = 2)
summary(svm.fit.poly)
##
## Call:
## svm(formula = Purchase ~ ., data = oj_train_data, kernel = "polynomial",
## cost = 0.01, degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 0.01
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 621
##
## ( 308 313 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
The support vector classifier resulted in a total of 621 vectors. 308 vectors belong to the Level CH, and 313 vectors belong to the Level MM.
# Confusion Matrix for prediction on train data
oj.preds = predict(svm.fit.poly, oj_train_data)
confusionMatrix(oj.preds, oj_train_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 492 308
## MM 0 0
##
## Accuracy : 0.615
## 95% CI : (0.5803, 0.6489)
## No Information Rate : 0.615
## P-Value [Acc > NIR] : 0.5156
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.000
## Specificity : 1.000
## Pos Pred Value : NaN
## Neg Pred Value : 0.615
## Prevalence : 0.385
## Detection Rate : 0.000
## Detection Prevalence : 0.000
## Balanced Accuracy : 0.500
##
## 'Positive' Class : MM
##
# Confusion Matrix for prediction on test data
oj.preds = predict(svm.fit.poly, oj_test_data)
confusionMatrix(oj.preds, oj_test_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 161 109
## MM 0 0
##
## Accuracy : 0.5963
## 95% CI : (0.5351, 0.6553)
## No Information Rate : 0.5963
## P-Value [Acc > NIR] : 0.5263
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.5963
## Prevalence : 0.4037
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : MM
##
The train error is 0.385, and the test error is 0.4037.
set.seed(42)
tune.fit.poly <- tune(svm,Purchase ~ ., data=oj_train_data, kernel="polynomial", ranges=list(cost=c(0.01, 0.1, 1,5,10), degree = 2))
summary(tune.fit.poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 5 2
##
## - best performance: 0.18375
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.01 2 0.38625 0.04308019
## 2 0.10 2 0.31625 0.05529278
## 3 1.00 2 0.19250 0.04216370
## 4 5.00 2 0.18375 0.04041881
## 5 10.00 2 0.19000 0.03425801
best.svm.poly<- svm(Purchase ~ ., data=oj_train_data, kernel="polynomial", cost = 5, degree = 2)
# Confusion Matrix for prediction on train data
oj.preds = predict(best.svm.poly, oj_train_data)
confusionMatrix(oj.preds, oj_train_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 459 85
## MM 33 223
##
## Accuracy : 0.8525
## 95% CI : (0.826, 0.8764)
## No Information Rate : 0.615
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6784
##
## Mcnemar's Test P-Value : 2.667e-06
##
## Sensitivity : 0.7240
## Specificity : 0.9329
## Pos Pred Value : 0.8711
## Neg Pred Value : 0.8438
## Prevalence : 0.3850
## Detection Rate : 0.2787
## Detection Prevalence : 0.3200
## Balanced Accuracy : 0.8285
##
## 'Positive' Class : MM
##
# Confusion Matrix for prediction on test data
oj.preds = predict(best.svm.poly, oj_test_data)
confusionMatrix(oj.preds, oj_test_data$Purchase, positive = 'MM')
## Confusion Matrix and Statistics
##
## Reference
## Prediction CH MM
## CH 146 30
## MM 15 79
##
## Accuracy : 0.8333
## 95% CI : (0.7834, 0.8758)
## No Information Rate : 0.5963
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.646
##
## Mcnemar's Test P-Value : 0.03689
##
## Sensitivity : 0.7248
## Specificity : 0.9068
## Pos Pred Value : 0.8404
## Neg Pred Value : 0.8295
## Prevalence : 0.4037
## Detection Rate : 0.2926
## Detection Prevalence : 0.3481
## Balanced Accuracy : 0.8158
##
## 'Positive' Class : MM
##
The train error is 0.1475, and the test error is 0.1667 Both errors got marginally better than before.
The model with that produced the lowest test error of 0.1593 was the SVM using a radial kernel.