set.seed(123)
x1 = runif(500) - 0.5
x2 = runif(500) - 0.5
y = 1 * ((x1^2 - x2^2) > 0)
plot(x1[y == 0], x2[y == 0], col = "red", xlab = "X1", ylab = "X2", pch = 5)
points(x1[y == 1], x2[y == 1], col = "blue", pch = 2)
### (c) Fit a logistic regression model to the data, using X1 and X2 as predictors.
log.fit = glm(y ~ x1 + x2, family = binomial)
summary(log.fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.227 -1.200 1.133 1.157 1.188
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.04792 0.08949 0.535 0.592
## x1 -0.03999 0.31516 -0.127 0.899
## x2 0.11509 0.30829 0.373 0.709
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.86 on 499 degrees of freedom
## Residual deviance: 692.71 on 497 degrees of freedom
## AIC: 698.71
##
## Number of Fisher Scoring iterations: 3
dat = data.frame(x1 = x1, x2 = x2, y = y)
probs = predict(log.fit, dat, type = "response")
#mean(probs)
preds = ifelse(probs >= 0.512, 1, 0)
dat.pos = dat[preds == 1,]
dat.neg = dat[preds == 0,]
plot(dat.pos$x1, dat.pos$x2, col = "blue", xlab = "X1", ylab = "X2", pch = 5, type = 'p')
points(dat.neg$x1, dat.neg$x2, col = "red", pch = 2)
### (e) Now fit a logistic regression model to the data using non-linear functions of X1 and X2 as predictors (e.g. X21 , X1×X2, log(X2),and so forth).
poly.fit = glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), dat, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(poly.fit)
##
## Call:
## glm(formula = y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), family = binomial,
## data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.625e-04 -2.000e-08 2.000e-08 2.000e-08 9.604e-04
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -54.24 3335.32 -0.016 0.987
## poly(x1, 2)1 542.33 71411.93 0.008 0.994
## poly(x1, 2)2 20838.39 778877.62 0.027 0.979
## poly(x2, 2)1 2163.06 115506.63 0.019 0.985
## poly(x2, 2)2 -21646.31 811141.33 -0.027 0.979
## I(x1 * x2) 566.19 36927.06 0.015 0.988
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9286e+02 on 499 degrees of freedom
## Residual deviance: 2.3990e-06 on 494 degrees of freedom
## AIC: 12
##
## Number of Fisher Scoring iterations: 25
None of the terms are significant, they are all very close to p-values of 1. Plus the model is predicting perfectly 1s or 2s.
probs = predict(poly.fit, dat, type = "response")
mean(probs)
## [1] 0.512
preds = ifelse(probs > 0.512, 1, 0)
dat.pos = dat[preds == 1, ]
dat.neg = dat[preds == 0, ]
plot(dat.pos$x1, dat.pos$x2, col = "blue", xlab = "X1", ylab = "X2", pch = 5)
points(dat.neg$x1, dat.neg$x2, col = "red", pch = 2)
Much closer to original shape of x1 and x2 based on y 0s.
svm.fit = svm(as.factor(y) ~ x1 + x2, dat, kernel = "linear")
svm.preds = predict(svm.fit, dat)
dat.pos = dat[svm.preds == 1, ]
dat.neg = dat[svm.preds == 0, ]
plot(dat.pos$x1, dat.pos$x2, col = "blue", xlab = "X1", ylab = "X2", pch = 5)
points(dat.neg$x1, dat.neg$x2, col = "red", pch = 2)
Linear prediction kernel returned only values equally 1 in prediction.
svm.fit = svm(as.factor(y) ~ x1 + x2, dat, kernel = "radial", gamma = 1)
svm.preds = predict(svm.fit, dat)
dat.pos = dat[svm.preds == 1, ]
dat.neg = dat[svm.preds == 0, ]
plot(dat.pos$x1, dat.pos$x2, col = "blue", xlab = "X1", ylab = "X2", pch = 5)
points(dat.neg$x1, dat.neg$x2, col = "red", pch = 2)
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
library(e1071)
set.seed(123)
gas.med = median(Auto$mpg)
med.var = ifelse(Auto$mpg > gas.med, 1, 0)
Auto$mpglevel = as.factor(med.var)
lin.svm = tune(svm, mpglevel ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10, 100)))
summary(lin.svm)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.07634615 0.03928191
## 2 1e-01 0.04333333 0.03191738
## 3 1e+00 0.01025641 0.01792836
## 4 5e+00 0.01538462 0.01792836
## 5 1e+01 0.01788462 0.01727588
## 6 1e+02 0.03320513 0.02720447
The cross validation error rate range from 0.0763 at a cost of 0.01 to 0.033 at a cost of 100. There is a dip, with the lowest error being at a cost of 1, with an error of 0.0102.
set.seed(123)
poly.svm = tune(svm, mpglevel ~ ., data = Auto, kernel = "polynomial", ranges = list(cost=c(0.1,1,10,100), gamma=c(0.1,0.5,1,2,3,4), degree = c(0.1,1,2,3,4)))
summary(poly.svm)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma degree
## 10 0.1 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost gamma degree error dispersion
## 1 0.1 0.1 0.1 0.58173077 0.04740051
## 2 1.0 0.1 0.1 0.58173077 0.04740051
## 3 10.0 0.1 0.1 0.58173077 0.04740051
## 4 100.0 0.1 0.1 0.58173077 0.04740051
## 5 0.1 0.5 0.1 0.58173077 0.04740051
## 6 1.0 0.5 0.1 0.58173077 0.04740051
## 7 10.0 0.5 0.1 0.58173077 0.04740051
## 8 100.0 0.5 0.1 0.58173077 0.04740051
## 9 0.1 1.0 0.1 0.58173077 0.04740051
## 10 1.0 1.0 0.1 0.58173077 0.04740051
## 11 10.0 1.0 0.1 0.58173077 0.04740051
## 12 100.0 1.0 0.1 0.58173077 0.04740051
## 13 0.1 2.0 0.1 0.58173077 0.04740051
## 14 1.0 2.0 0.1 0.58173077 0.04740051
## 15 10.0 2.0 0.1 0.58173077 0.04740051
## 16 100.0 2.0 0.1 0.58173077 0.04740051
## 17 0.1 3.0 0.1 0.58173077 0.04740051
## 18 1.0 3.0 0.1 0.58173077 0.04740051
## 19 10.0 3.0 0.1 0.58173077 0.04740051
## 20 100.0 3.0 0.1 0.58173077 0.04740051
## 21 0.1 4.0 0.1 0.58173077 0.04740051
## 22 1.0 4.0 0.1 0.58173077 0.04740051
## 23 10.0 4.0 0.1 0.58173077 0.04740051
## 24 100.0 4.0 0.1 0.58173077 0.04740051
## 25 0.1 0.1 1.0 0.07634615 0.03928191
## 26 1.0 0.1 1.0 0.04333333 0.03191738
## 27 10.0 0.1 1.0 0.01025641 0.01792836
## 28 100.0 0.1 1.0 0.01788462 0.01727588
## 29 0.1 0.5 1.0 0.05602564 0.03551922
## 30 1.0 0.5 1.0 0.01788462 0.01727588
## 31 10.0 0.5 1.0 0.01538462 0.01792836
## 32 100.0 0.5 1.0 0.03320513 0.02720447
## 33 0.1 1.0 1.0 0.04333333 0.03191738
## 34 1.0 1.0 1.0 0.01025641 0.01792836
## 35 10.0 1.0 1.0 0.01788462 0.01727588
## 36 100.0 1.0 1.0 0.03320513 0.02720447
## 37 0.1 2.0 1.0 0.02288462 0.01427008
## 38 1.0 2.0 1.0 0.01025641 0.01792836
## 39 10.0 2.0 1.0 0.03320513 0.02720447
## 40 100.0 2.0 1.0 0.03320513 0.02720447
## 41 0.1 3.0 1.0 0.02038462 0.01617396
## 42 1.0 3.0 1.0 0.01025641 0.01792836
## 43 10.0 3.0 1.0 0.03320513 0.02720447
## 44 100.0 3.0 1.0 0.03320513 0.02720447
## 45 0.1 4.0 1.0 0.01788462 0.01727588
## 46 1.0 4.0 1.0 0.01282051 0.01813094
## 47 10.0 4.0 1.0 0.03320513 0.02720447
## 48 100.0 4.0 1.0 0.03320513 0.02720447
## 49 0.1 0.1 2.0 0.30602564 0.10620333
## 50 1.0 0.1 2.0 0.25467949 0.10273137
## 51 10.0 0.1 2.0 0.15782051 0.07365927
## 52 100.0 0.1 2.0 0.16288462 0.08475580
## 53 0.1 0.5 2.0 0.16294872 0.06148264
## 54 1.0 0.5 2.0 0.16551282 0.08747687
## 55 10.0 0.5 2.0 0.17826923 0.07176717
## 56 100.0 0.5 2.0 0.19871795 0.06697346
## 57 0.1 1.0 2.0 0.15782051 0.07365927
## 58 1.0 1.0 2.0 0.16288462 0.08475580
## 59 10.0 1.0 2.0 0.19871795 0.06697346
## 60 100.0 1.0 2.0 0.19871795 0.06697346
## 61 0.1 2.0 2.0 0.16801282 0.08821472
## 62 1.0 2.0 2.0 0.18333333 0.06944214
## 63 10.0 2.0 2.0 0.19871795 0.06697346
## 64 100.0 2.0 2.0 0.19871795 0.06697346
## 65 0.1 3.0 2.0 0.16282051 0.08547489
## 66 1.0 3.0 2.0 0.19871795 0.06697346
## 67 10.0 3.0 2.0 0.19871795 0.06697346
## 68 100.0 3.0 2.0 0.19871795 0.06697346
## 69 0.1 4.0 2.0 0.17564103 0.08212248
## 70 1.0 4.0 2.0 0.19871795 0.06697346
## 71 10.0 4.0 2.0 0.19871795 0.06697346
## 72 100.0 4.0 2.0 0.19871795 0.06697346
## 73 0.1 0.1 3.0 0.23711538 0.09399789
## 74 1.0 0.1 3.0 0.07891026 0.03228186
## 75 10.0 0.1 3.0 0.04083333 0.03008810
## 76 100.0 0.1 3.0 0.03570513 0.03436393
## 77 0.1 0.5 3.0 0.03814103 0.02133598
## 78 1.0 0.5 3.0 0.03570513 0.03436393
## 79 10.0 0.5 3.0 0.03570513 0.02442271
## 80 100.0 0.5 3.0 0.03570513 0.02442271
## 81 0.1 1.0 3.0 0.03570513 0.03436393
## 82 1.0 1.0 3.0 0.03570513 0.02442271
## 83 10.0 1.0 3.0 0.03570513 0.02442271
## 84 100.0 1.0 3.0 0.03570513 0.02442271
## 85 0.1 2.0 3.0 0.03826923 0.02740388
## 86 1.0 2.0 3.0 0.03570513 0.02442271
## 87 10.0 2.0 3.0 0.03570513 0.02442271
## 88 100.0 2.0 3.0 0.03570513 0.02442271
## 89 0.1 3.0 3.0 0.03570513 0.02442271
## 90 1.0 3.0 3.0 0.03570513 0.02442271
## 91 10.0 3.0 3.0 0.03570513 0.02442271
## 92 100.0 3.0 3.0 0.03570513 0.02442271
## 93 0.1 4.0 3.0 0.03570513 0.02442271
## 94 1.0 4.0 3.0 0.03570513 0.02442271
## 95 10.0 4.0 3.0 0.03570513 0.02442271
## 96 100.0 4.0 3.0 0.03570513 0.02442271
## 97 0.1 0.1 4.0 0.31121795 0.09684710
## 98 1.0 0.1 4.0 0.23692308 0.09340281
## 99 10.0 0.1 4.0 0.19089744 0.06434271
## 100 100.0 0.1 4.0 0.18589744 0.07043897
## 101 0.1 0.5 4.0 0.19096154 0.06956790
## 102 1.0 0.5 4.0 0.19365385 0.05673827
## 103 10.0 0.5 4.0 0.19346154 0.07042458
## 104 100.0 0.5 4.0 0.19346154 0.07042458
## 105 0.1 1.0 4.0 0.19615385 0.06565708
## 106 1.0 1.0 4.0 0.19346154 0.07042458
## 107 10.0 1.0 4.0 0.19346154 0.07042458
## 108 100.0 1.0 4.0 0.19346154 0.07042458
## 109 0.1 2.0 4.0 0.19346154 0.07042458
## 110 1.0 2.0 4.0 0.19346154 0.07042458
## 111 10.0 2.0 4.0 0.19346154 0.07042458
## 112 100.0 2.0 4.0 0.19346154 0.07042458
## 113 0.1 3.0 4.0 0.19346154 0.07042458
## 114 1.0 3.0 4.0 0.19346154 0.07042458
## 115 10.0 3.0 4.0 0.19346154 0.07042458
## 116 100.0 3.0 4.0 0.19346154 0.07042458
## 117 0.1 4.0 4.0 0.19346154 0.07042458
## 118 1.0 4.0 4.0 0.19346154 0.07042458
## 119 10.0 4.0 4.0 0.19346154 0.07042458
## 120 100.0 4.0 4.0 0.19346154 0.07042458
poly.svm$best.parameters
## cost gamma degree
## 27 10 0.1 1
The best parameters come from using a cost of 10, a gamma of 0.1, and a degree of 1.
set.seed(123)
rad.svm = tune(svm, mpglevel ~ ., data = Auto, kernel = "radial", ranges = list(cost=c(0.1,1,10,100), gamma=c(0.1,0.5,1,2,3,4), degree = c(0.1,1,2,3,4)))
summary(rad.svm)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma degree
## 10 0.1 0.1
##
## - best performance: 0.03314103
##
## - Detailed performance results:
## cost gamma degree error dispersion
## 1 0.1 0.1 0.1 0.07634615 0.03928191
## 2 1.0 0.1 0.1 0.05852564 0.03960325
## 3 10.0 0.1 0.1 0.03314103 0.02942215
## 4 100.0 0.1 0.1 0.03326923 0.02434857
## 5 0.1 0.5 0.1 0.08147436 0.03707182
## 6 1.0 0.5 0.1 0.04576923 0.03903092
## 7 10.0 0.5 0.1 0.05339744 0.03440111
## 8 100.0 0.5 0.1 0.05339744 0.03440111
## 9 0.1 1.0 0.1 0.58173077 0.04740051
## 10 1.0 1.0 0.1 0.05865385 0.04942437
## 11 10.0 1.0 0.1 0.05608974 0.04595880
## 12 100.0 1.0 0.1 0.05608974 0.04595880
## 13 0.1 2.0 0.1 0.58173077 0.04740051
## 14 1.0 2.0 0.1 0.11474359 0.06630201
## 15 10.0 2.0 0.1 0.11474359 0.06630201
## 16 100.0 2.0 0.1 0.11474359 0.06630201
## 17 0.1 3.0 0.1 0.58173077 0.04740051
## 18 1.0 3.0 0.1 0.42878205 0.17823496
## 19 10.0 3.0 0.1 0.40839744 0.18573046
## 20 100.0 3.0 0.1 0.40839744 0.18573046
## 21 0.1 4.0 0.1 0.58173077 0.04740051
## 22 1.0 4.0 0.1 0.51538462 0.06959451
## 23 10.0 4.0 0.1 0.50012821 0.07022396
## 24 100.0 4.0 0.1 0.50012821 0.07022396
## 25 0.1 0.1 1.0 0.07634615 0.03928191
## 26 1.0 0.1 1.0 0.05852564 0.03960325
## 27 10.0 0.1 1.0 0.03314103 0.02942215
## 28 100.0 0.1 1.0 0.03326923 0.02434857
## 29 0.1 0.5 1.0 0.08147436 0.03707182
## 30 1.0 0.5 1.0 0.04576923 0.03903092
## 31 10.0 0.5 1.0 0.05339744 0.03440111
## 32 100.0 0.5 1.0 0.05339744 0.03440111
## 33 0.1 1.0 1.0 0.58173077 0.04740051
## 34 1.0 1.0 1.0 0.05865385 0.04942437
## 35 10.0 1.0 1.0 0.05608974 0.04595880
## 36 100.0 1.0 1.0 0.05608974 0.04595880
## 37 0.1 2.0 1.0 0.58173077 0.04740051
## 38 1.0 2.0 1.0 0.11474359 0.06630201
## 39 10.0 2.0 1.0 0.11474359 0.06630201
## 40 100.0 2.0 1.0 0.11474359 0.06630201
## 41 0.1 3.0 1.0 0.58173077 0.04740051
## 42 1.0 3.0 1.0 0.42878205 0.17823496
## 43 10.0 3.0 1.0 0.40839744 0.18573046
## 44 100.0 3.0 1.0 0.40839744 0.18573046
## 45 0.1 4.0 1.0 0.58173077 0.04740051
## 46 1.0 4.0 1.0 0.51538462 0.06959451
## 47 10.0 4.0 1.0 0.50012821 0.07022396
## 48 100.0 4.0 1.0 0.50012821 0.07022396
## 49 0.1 0.1 2.0 0.07634615 0.03928191
## 50 1.0 0.1 2.0 0.05852564 0.03960325
## 51 10.0 0.1 2.0 0.03314103 0.02942215
## 52 100.0 0.1 2.0 0.03326923 0.02434857
## 53 0.1 0.5 2.0 0.08147436 0.03707182
## 54 1.0 0.5 2.0 0.04576923 0.03903092
## 55 10.0 0.5 2.0 0.05339744 0.03440111
## 56 100.0 0.5 2.0 0.05339744 0.03440111
## 57 0.1 1.0 2.0 0.58173077 0.04740051
## 58 1.0 1.0 2.0 0.05865385 0.04942437
## 59 10.0 1.0 2.0 0.05608974 0.04595880
## 60 100.0 1.0 2.0 0.05608974 0.04595880
## 61 0.1 2.0 2.0 0.58173077 0.04740051
## 62 1.0 2.0 2.0 0.11474359 0.06630201
## 63 10.0 2.0 2.0 0.11474359 0.06630201
## 64 100.0 2.0 2.0 0.11474359 0.06630201
## 65 0.1 3.0 2.0 0.58173077 0.04740051
## 66 1.0 3.0 2.0 0.42878205 0.17823496
## 67 10.0 3.0 2.0 0.40839744 0.18573046
## 68 100.0 3.0 2.0 0.40839744 0.18573046
## 69 0.1 4.0 2.0 0.58173077 0.04740051
## 70 1.0 4.0 2.0 0.51538462 0.06959451
## 71 10.0 4.0 2.0 0.50012821 0.07022396
## 72 100.0 4.0 2.0 0.50012821 0.07022396
## 73 0.1 0.1 3.0 0.07634615 0.03928191
## 74 1.0 0.1 3.0 0.05852564 0.03960325
## 75 10.0 0.1 3.0 0.03314103 0.02942215
## 76 100.0 0.1 3.0 0.03326923 0.02434857
## 77 0.1 0.5 3.0 0.08147436 0.03707182
## 78 1.0 0.5 3.0 0.04576923 0.03903092
## 79 10.0 0.5 3.0 0.05339744 0.03440111
## 80 100.0 0.5 3.0 0.05339744 0.03440111
## 81 0.1 1.0 3.0 0.58173077 0.04740051
## 82 1.0 1.0 3.0 0.05865385 0.04942437
## 83 10.0 1.0 3.0 0.05608974 0.04595880
## 84 100.0 1.0 3.0 0.05608974 0.04595880
## 85 0.1 2.0 3.0 0.58173077 0.04740051
## 86 1.0 2.0 3.0 0.11474359 0.06630201
## 87 10.0 2.0 3.0 0.11474359 0.06630201
## 88 100.0 2.0 3.0 0.11474359 0.06630201
## 89 0.1 3.0 3.0 0.58173077 0.04740051
## 90 1.0 3.0 3.0 0.42878205 0.17823496
## 91 10.0 3.0 3.0 0.40839744 0.18573046
## 92 100.0 3.0 3.0 0.40839744 0.18573046
## 93 0.1 4.0 3.0 0.58173077 0.04740051
## 94 1.0 4.0 3.0 0.51538462 0.06959451
## 95 10.0 4.0 3.0 0.50012821 0.07022396
## 96 100.0 4.0 3.0 0.50012821 0.07022396
## 97 0.1 0.1 4.0 0.07634615 0.03928191
## 98 1.0 0.1 4.0 0.05852564 0.03960325
## 99 10.0 0.1 4.0 0.03314103 0.02942215
## 100 100.0 0.1 4.0 0.03326923 0.02434857
## 101 0.1 0.5 4.0 0.08147436 0.03707182
## 102 1.0 0.5 4.0 0.04576923 0.03903092
## 103 10.0 0.5 4.0 0.05339744 0.03440111
## 104 100.0 0.5 4.0 0.05339744 0.03440111
## 105 0.1 1.0 4.0 0.58173077 0.04740051
## 106 1.0 1.0 4.0 0.05865385 0.04942437
## 107 10.0 1.0 4.0 0.05608974 0.04595880
## 108 100.0 1.0 4.0 0.05608974 0.04595880
## 109 0.1 2.0 4.0 0.58173077 0.04740051
## 110 1.0 2.0 4.0 0.11474359 0.06630201
## 111 10.0 2.0 4.0 0.11474359 0.06630201
## 112 100.0 2.0 4.0 0.11474359 0.06630201
## 113 0.1 3.0 4.0 0.58173077 0.04740051
## 114 1.0 3.0 4.0 0.42878205 0.17823496
## 115 10.0 3.0 4.0 0.40839744 0.18573046
## 116 100.0 3.0 4.0 0.40839744 0.18573046
## 117 0.1 4.0 4.0 0.58173077 0.04740051
## 118 1.0 4.0 4.0 0.51538462 0.06959451
## 119 10.0 4.0 4.0 0.50012821 0.07022396
## 120 100.0 4.0 4.0 0.50012821 0.07022396
rad.svm$best.parameters
## cost gamma degree
## 3 10 0.1 0.1
The best parameters for the radial model have a cost of 10, a gamma of 0.1, and a degree of 0.1.
svm.lin = svm(mpglevel ~ ., data = Auto, kernel = "linear", cost = 1)
svm.poly = svm(mpglevel ~ ., data = Auto, kernel = "polynomial", cost = 10, gamma = 0.1, degree = 1)
svm.rad = svm(mpglevel ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.1, degree = 0.1)
#plot(svm.lin, Auto, mpg~acceleration)
plotpairs = function(fit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpglevel", "name"))]) {
plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
}
}
plotpairs(svm.lin)
plotpairs(svm.poly)
plotpairs(svm.rad)
The above plots divide each variable between 1 and 0 from our obtained, and then plots the data according to input variable and actual mpg.
library(caret)
## Warning: package 'caret' was built under R version 4.0.3
## Loading required package: lattice
## Loading required package: ggplot2
set.seed(123)
data("OJ")
trainrows = createDataPartition(OJ$Purchase, p = (799/1070), list = FALSE)
traind = OJ[trainrows,]
testd = OJ[-trainrows,]
svm.lin = svm(Purchase ~ ., kernel = "linear", data = traind, cost = 0.01)
summary(svm.lin)
##
## Call:
## svm(formula = Purchase ~ ., data = traind, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 431
##
## ( 216 215 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
Using a linear SVM to predict the purchase of CH or MM at a cost of 1 returns a model that used 431 Support Vectors. 216 of these vectors fall under the CH purchase while the other 215 are CH.
train.preds = predict(svm.lin, traind)
table(traind$Purchase, train.preds)
## train.preds
## CH MM
## CH 433 55
## MM 75 237
(75 + 55) / (433 + 237 + 75 + 55)
## [1] 0.1625
test.preds = predict(svm.lin, testd)
table(testd$Purchase, test.preds)
## test.preds
## CH MM
## CH 136 29
## MM 25 80
(25 + 29) / (136 + 80 + 25 + 29)
## [1] 0.2
The training misclassification rate is 0.1625 while the testing misclassification rate is 0.2.
set.seed(123)
svm.lin.tuned = tune(svm, Purchase ~ ., data = traind, kernel = "linear", ranges = list(cost = c(0.01, 0.05, 0.1, 0.5, (seq(1, 10, length.out = 10)))))
summary(svm.lin.tuned)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.05
##
## - best performance: 0.16625
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.16875 0.05376453
## 2 0.05 0.16625 0.05622685
## 3 0.10 0.16625 0.05337563
## 4 0.50 0.17125 0.04966904
## 5 1.00 0.17250 0.05361903
## 6 2.00 0.17250 0.05614960
## 7 3.00 0.17000 0.05533986
## 8 4.00 0.17000 0.05533986
## 9 5.00 0.16875 0.05408648
## 10 6.00 0.16750 0.05277047
## 11 7.00 0.16750 0.05277047
## 12 8.00 0.16750 0.05277047
## 13 9.00 0.16750 0.05277047
## 14 10.00 0.16750 0.05277047
The best cost was 0.05 with an error rate of .16625.
svm.lin.twoned = svm(Purchase~., kernel = 'linear', data = traind, cost = svm.lin.tuned$best.parameters$cost)
train.preds = predict(svm.lin.twoned, traind)
table(traind$Purchase, train.preds)
## train.preds
## CH MM
## CH 433 55
## MM 75 237
(75 + 55) / (433 + 237 + 75 + 55)
## [1] 0.1625
test.preds = predict(svm.lin.twoned, testd)
table(testd$Purchase, test.preds)
## test.preds
## CH MM
## CH 136 29
## MM 21 84
(21 + 29) / (136 + 84 + 21 + 29)
## [1] 0.1851852
The training error rate stated the same but there was a slight decrease from 0.2 to 0.185 in the testing error rate.
set.seed(123)
svm.rad = svm(Purchase ~ ., kernel = "radial", data = traind, cost = 0.01)
summary(svm.rad)
##
## Call:
## svm(formula = Purchase ~ ., data = traind, kernel = "radial", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 627
##
## ( 315 312 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.preds = predict(svm.rad, traind)
table(traind$Purchase, train.preds)
## train.preds
## CH MM
## CH 488 0
## MM 312 0
(312) / (488 + 312)
## [1] 0.39
test.preds = predict(svm.rad, testd)
table(testd$Purchase, test.preds)
## test.preds
## CH MM
## CH 165 0
## MM 105 0
(105) / (165 + 105)
## [1] 0.3888889
The radial model did not predict any purchases of MM, despite the support vectors for the two being equally distributed. The misclassification rate for training was 0.39 and testing was 0.389.
set.seed(123)
svm.rad.tuned = tune(svm, Purchase ~ ., data = traind, kernel = "rad", ranges = list(cost = c(0.01, 0.05, 0.1, 0.5, (seq(1, 10, length.out = 10)))))
summary(svm.rad.tuned)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.17
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39000 0.07518496
## 2 0.05 0.22125 0.08760906
## 3 0.10 0.17375 0.05726704
## 4 0.50 0.17000 0.05075814
## 5 1.00 0.17000 0.05342440
## 6 2.00 0.18000 0.05041494
## 7 3.00 0.18375 0.05272110
## 8 4.00 0.18750 0.04787136
## 9 5.00 0.19250 0.04609772
## 10 6.00 0.19375 0.04759858
## 11 7.00 0.19625 0.05138701
## 12 8.00 0.19750 0.05096295
## 13 9.00 0.19625 0.05138701
## 14 10.00 0.19625 0.05239076
svm.rad.twoned = svm(Purchase~., kernel = 'radial', data = traind, cost = svm.rad.tuned$best.parameters$cost)
train.preds = predict(svm.rad.twoned, traind)
table(traind$Purchase, train.preds)
## train.preds
## CH MM
## CH 446 42
## MM 76 236
(76 + 42) / (466 + 236 + 76 + 42)
## [1] 0.1439024
test.preds = predict(svm.rad.twoned, testd)
table(testd$Purchase, test.preds)
## test.preds
## CH MM
## CH 141 24
## MM 25 80
(25 + 24) / (141 + 80 + 25 + 24)
## [1] 0.1814815
The tuned model using the best cost parameter returns a training misclassification rate of 0.144 and a testing misclassification rate of .181. This model performed much better than the untuned radial model and is about on par with the tuned linear model.
###(g) Repeat parts (b) through (e) using a support vector machine with a polynomial kernel. Set degree=2.
set.seed(123)
svm.poly = svm(Purchase ~ ., kernel = "polynomial", data = traind, cost = 0.01, degree = 2)
summary(svm.poly)
##
## Call:
## svm(formula = Purchase ~ ., data = traind, kernel = "polynomial",
## cost = 0.01, degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 0.01
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 630
##
## ( 318 312 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.preds = predict(svm.poly, traind)
table(traind$Purchase, train.preds)
## train.preds
## CH MM
## CH 488 0
## MM 312 0
(312) / (488 + 312)
## [1] 0.39
test.preds = predict(svm.poly, testd)
table(testd$Purchase, test.preds)
## test.preds
## CH MM
## CH 165 0
## MM 105 0
(105) / (165 + 105)
## [1] 0.3888889
Using an SVM with the polynomial kernel, the results are identical to the untuned radial method.
set.seed(123)
svm.poly.tuned = tune(svm, Purchase ~ ., data = traind, kernel = "polynomial", ranges = list(cost = c(0.01, 0.05, 0.1, 0.5, (seq(1, 10, length.out = 10)))), degree = 2)
summary(svm.poly.tuned)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 5
##
## - best performance: 0.17375
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39000 0.07518496
## 2 0.05 0.33750 0.06744339
## 3 0.10 0.33125 0.07482619
## 4 0.50 0.20375 0.07096801
## 5 1.00 0.18375 0.05239076
## 6 2.00 0.18125 0.05810969
## 7 3.00 0.17750 0.06061032
## 8 4.00 0.17875 0.05622685
## 9 5.00 0.17375 0.04466309
## 10 6.00 0.17875 0.04715886
## 11 7.00 0.18125 0.05212498
## 12 8.00 0.18000 0.05688683
## 13 9.00 0.18375 0.05804991
## 14 10.00 0.18500 0.05583955
svm.poly.twoned = svm(Purchase~., kernel = 'polynomial', data = traind, cost = svm.poly.tuned$best.parameters$cost)
train.preds = predict(svm.poly.twoned, traind)
table(traind$Purchase, train.preds)
## train.preds
## CH MM
## CH 460 28
## MM 79 233
(79 + 28) / (460 + 233 + 79 + 28)
## [1] 0.13375
test.preds = predict(svm.poly.twoned, testd)
table(testd$Purchase, test.preds)
## test.preds
## CH MM
## CH 142 23
## MM 28 77
(23 + 28) / (142 + 77 + 23 + 28)
## [1] 0.1888889
The Tuned SVM radial returned the best testing misclassification rate of 0.1814815, while the Tuned SVM for Linear and Polynomial kernels returned results fairly close at 0.1851852 and 0.1888889 respectively.
Comment on your results.
The Radial SVM worked much better than the Linear Kernel in predicting nonlinear results. Even on the same data, the linear model failed to predict properly. The model with interactions and squared inputs also performed fairly well.