library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
x1 <- runif (500) - 0.5x2 <- runif (500) - 0.5y <- 1 * (x1 ^2 - x2 ^2 > 0)set.seed(1)
x1 <- runif (500) - 0.5
x2 <- runif (500) - 0.5
#Note: Y is the class!
y <- 1 * (x1 ^2 - x2 ^2 > 0)
plot(x=x1[y==0], y=x2[y==0], xlab="X1",ylab="X2", col="black")
points(x1[y==1],x2[y==1],col="lightblue")
logRegMod.fit <- glm(y~x1+x2, family = 'binomial')
summary(logRegMod.fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.179 -1.139 -1.112 1.206 1.257
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260 0.089579 -0.974 0.330
## x1 0.196199 0.316864 0.619 0.536
## x2 -0.002854 0.305712 -0.009 0.993
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 691.79 on 497 degrees of freedom
## AIC: 697.79
##
## Number of Fisher Scoring iterations: 3
df <- data.frame(x1 = x1, x2 = x2, y = y)
lm.prob <- predict(logRegMod.fit, newdata=df, type = "response")
lm.pred <- ifelse(lm.prob > 0.50, 1, 0)
ggplot(data = df, mapping = aes(x1, x2)) +
geom_point(data = df, mapping = aes(colour = lm.pred))
logRegMod.fit2 <- glm(y ~ poly(x1, 3) + poly(x2, 3), data = df, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(logRegMod.fit2)
##
## Call:
## glm(formula = y ~ poly(x1, 3) + poly(x2, 3), family = binomial,
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.116e-04 -2.000e-08 -2.000e-08 2.000e-08 1.098e-03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -112.8 7700.8 -0.015 0.988
## poly(x1, 3)1 2320.9 202067.7 0.011 0.991
## poly(x1, 3)2 26012.9 817440.0 0.032 0.975
## poly(x1, 3)3 -238.4 100966.6 -0.002 0.998
## poly(x2, 3)1 200.2 85132.7 0.002 0.998
## poly(x2, 3)2 -27719.8 872877.7 -0.032 0.975
## poly(x2, 3)3 387.8 62341.3 0.006 0.995
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9218e+02 on 499 degrees of freedom
## Residual deviance: 3.3141e-06 on 493 degrees of freedom
## AIC: 14
##
## Number of Fisher Scoring iterations: 25
lm.prob2 <- predict(logRegMod.fit2, newdata=df, type = "response")
lm.pred2 <- ifelse(lm.prob2 > 0.50, 1, 0)
ggplot(data = df, mapping = aes(x1, x2)) +
geom_point(data = df, mapping = aes(colour = lm.pred2))
library(e1071)
svm_lin <- svm(y~.,data=df,kernel='linear',cost=0.01)
plot(svm_lin,df)
svm_lin_2=svm(y~.,data=df,kernel='radial',gamma=1)
plot(svm_lin_2,data=df)
Auto data set.library(ISLR)
attach(Auto)
## The following object is masked from package:ggplot2:
##
## mpg
Auto$MPG_Abv_Med <- as.factor(ifelse(Auto$mpg > median(Auto$mpg),1,0))
cost, in order to predict whether a car gets high or low
gas mileage. Report the cross-validation errors associated with
different values of this parameter. Comment on your results. Note you
will need to fit the classifier without the gas mileage variable to
produce sensible results.set.seed(1)
library(e1071)
tune.7b = tune(svm, MPG_Abv_Med ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.7b)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.07653846 0.03617137
## 2 1e-01 0.04596154 0.03378238
## 3 1e+00 0.01025641 0.01792836
## 4 5e+00 0.02051282 0.02648194
## 5 1e+01 0.02051282 0.02648194
## 6 1e+02 0.03076923 0.03151981
gamma and
degree and cost. Comment on your results.tune.7c1 = tune(svm, MPG_Abv_Med ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1, 1, 5, 10), degree = c(2, 3, 4)))
summary(tune.7c1)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 2
##
## - best performance: 0.5841667
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 2 0.6019231 0.06346118
## 2 1.0 2 0.6019231 0.06346118
## 3 5.0 2 0.6019231 0.06346118
## 4 10.0 2 0.5841667 0.07806609
## 5 0.1 3 0.6019231 0.06346118
## 6 1.0 3 0.6019231 0.06346118
## 7 5.0 3 0.6019231 0.06346118
## 8 10.0 3 0.6019231 0.06346118
## 9 0.1 4 0.6019231 0.06346118
## 10 1.0 4 0.6019231 0.06346118
## 11 5.0 4 0.6019231 0.06346118
## 12 10.0 4 0.6019231 0.06346118
tune.7c2 = tune(svm, MPG_Abv_Med ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.1, 1, 5, 10), gamma = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.7c2)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.01
##
## - best performance: 0.02044872
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 1e-02 0.08698718 0.04560056
## 2 1.0 1e-02 0.07160256 0.03373099
## 3 5.0 1e-02 0.05121795 0.02967002
## 4 10.0 1e-02 0.02044872 0.01077927
## 5 0.1 1e-01 0.07673077 0.03419344
## 6 1.0 1e-01 0.05121795 0.03203768
## 7 5.0 1e-01 0.02557692 0.01709522
## 8 10.0 1e-01 0.02301282 0.02244393
## 9 0.1 1e+00 0.59461538 0.08083319
## 10 1.0 1e+00 0.06141026 0.03026776
## 11 5.0 1e+00 0.06397436 0.02789391
## 12 10.0 1e+00 0.06397436 0.02789391
## 13 0.1 5e+00 0.59461538 0.08083319
## 14 1.0 5e+00 0.52051282 0.09421163
## 15 5.0 5e+00 0.51538462 0.10051415
## 16 10.0 5e+00 0.51538462 0.10051415
## 17 0.1 1e+01 0.59461538 0.08083319
## 18 1.0 1e+01 0.55384615 0.09432787
## 19 5.0 1e+01 0.54358974 0.09085645
## 20 10.0 1e+01 0.54358974 0.09085645
## 21 0.1 1e+02 0.59461538 0.08083319
## 22 1.0 1e+02 0.59461538 0.08083319
## 23 5.0 1e+02 0.59461538 0.08083319
## 24 10.0 1e+02 0.59461538 0.08083319
plot() function for
svm objects only in cases with p = 2. When p > 2, you
can use the plot() function to create plots displaying
pairs of variables at a time. Essentially, instead of typing
plot (svmfit, dat) where svmfit contains your
fitted model and dat is a data frame containing your data,
you can type plot (svmfit, dat, x1∼x4) in order to plot
just the first and fourth variables. However, you must replace
x1 and x4 with the correct variable names. To
find out more, type ?plot.svm.svm.linear <- svm(MPG_Abv_Med ~ ., data = Auto, kernel = "linear", cost = 1)
svm.poly <- svm(MPG_Abv_Med ~ ., data = Auto, kernel = "polynomial", cost = 10, degree = 2)
svm.radial <- svm(MPG_Abv_Med ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.01)
plotpairs <- function(fit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "MPG_Abv_Med", "name"))]) {
plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
}
}
plotpairs(svm.linear)
OJ data set which is part of
the ISLR2 package.library(ISLR2)
##
## Attaching package: 'ISLR2'
## The following object is masked _by_ '.GlobalEnv':
##
## Auto
## The following objects are masked from 'package:ISLR':
##
## Auto, Credit
attach(OJ)
set.seed(1)
train <- sample(1:nrow(OJ), 800)
oj.train <- OJ[train,]
oj.test <- OJ[-train,]
cost = 0.01, with Purchase as the response and
the other variables as predictors. Use the summary()
function to produce summary statistics, and describe the results
obtained.svm.8b <- svm(Purchase ~ ., data = oj.train, kernel = "linear", cost = 0.01)
summary(svm.8b)
##
## Call:
## svm(formula = Purchase ~ ., data = oj.train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
435 Support vectors were made, with CH and
MM being the main variables to split.
train.pred<-predict(svm.8b, oj.train)
test.pred<-predict(svm.8b, oj.test)
mean(train.pred != oj.train$Purchase)
## [1] 0.175
mean(test.pred != oj.test$Purchase)
## [1] 0.1777778
Both error raters were just over 17%
tune() function to select an optimal
cost. Consider values in the range 0.01 to 10.tune.8d = tune(svm, Purchase ~ ., data = oj.train, kernel = "linear", ranges = list(cost = seq(0.01, 10, by = 0.1)))
summary(tune.8d)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 6.51
##
## - best performance: 0.17
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17375 0.03884174
## 2 0.11 0.18000 0.03184162
## 3 0.21 0.18000 0.03291403
## 4 0.31 0.17875 0.03438447
## 5 0.41 0.17875 0.03438447
## 6 0.51 0.17625 0.03197764
## 7 0.61 0.17625 0.03197764
## 8 0.71 0.17625 0.03197764
## 9 0.81 0.17625 0.03197764
## 10 0.91 0.17625 0.03197764
## 11 1.01 0.17500 0.03061862
## 12 1.11 0.17500 0.03061862
## 13 1.21 0.17500 0.03061862
## 14 1.31 0.17500 0.03061862
## 15 1.41 0.17500 0.03061862
## 16 1.51 0.17500 0.03061862
## 17 1.61 0.17500 0.03061862
## 18 1.71 0.17500 0.03061862
## 19 1.81 0.17375 0.02972676
## 20 1.91 0.17375 0.02972676
## 21 2.01 0.17375 0.02972676
## 22 2.11 0.17375 0.03030516
## 23 2.21 0.17375 0.03030516
## 24 2.31 0.17375 0.03030516
## 25 2.41 0.17375 0.03304563
## 26 2.51 0.17250 0.03374743
## 27 2.61 0.17125 0.03230175
## 28 2.71 0.17125 0.03230175
## 29 2.81 0.17250 0.03106892
## 30 2.91 0.17125 0.03175973
## 31 3.01 0.17250 0.03270236
## 32 3.11 0.17125 0.03175973
## 33 3.21 0.17250 0.03270236
## 34 3.31 0.17250 0.03270236
## 35 3.41 0.17250 0.03270236
## 36 3.51 0.17375 0.03408018
## 37 3.61 0.17250 0.03322900
## 38 3.71 0.17250 0.03322900
## 39 3.81 0.17250 0.03322900
## 40 3.91 0.17250 0.03322900
## 41 4.01 0.17250 0.03322900
## 42 4.11 0.17250 0.03322900
## 43 4.21 0.17250 0.03322900
## 44 4.31 0.17250 0.03322900
## 45 4.41 0.17250 0.03322900
## 46 4.51 0.17250 0.03322900
## 47 4.61 0.17250 0.03322900
## 48 4.71 0.17250 0.03322900
## 49 4.81 0.17250 0.03322900
## 50 4.91 0.17250 0.03322900
## 51 5.01 0.17250 0.03322900
## 52 5.11 0.17250 0.03322900
## 53 5.21 0.17250 0.03322900
## 54 5.31 0.17250 0.03322900
## 55 5.41 0.17250 0.03322900
## 56 5.51 0.17250 0.03322900
## 57 5.61 0.17250 0.03322900
## 58 5.71 0.17250 0.03322900
## 59 5.81 0.17250 0.03322900
## 60 5.91 0.17250 0.03322900
## 61 6.01 0.17125 0.03438447
## 62 6.11 0.17125 0.03438447
## 63 6.21 0.17125 0.03438447
## 64 6.31 0.17125 0.03438447
## 65 6.41 0.17125 0.03438447
## 66 6.51 0.17000 0.03593976
## 67 6.61 0.17125 0.03438447
## 68 6.71 0.17125 0.03438447
## 69 6.81 0.17125 0.03438447
## 70 6.91 0.17125 0.03438447
## 71 7.01 0.17125 0.03438447
## 72 7.11 0.17000 0.03593976
## 73 7.21 0.17125 0.03438447
## 74 7.31 0.17125 0.03438447
## 75 7.41 0.17125 0.03438447
## 76 7.51 0.17125 0.03438447
## 77 7.61 0.17125 0.03438447
## 78 7.71 0.17125 0.03438447
## 79 7.81 0.17125 0.03438447
## 80 7.91 0.17125 0.03438447
## 81 8.01 0.17000 0.03593976
## 82 8.11 0.17000 0.03593976
## 83 8.21 0.17000 0.03593976
## 84 8.31 0.17000 0.03593976
## 85 8.41 0.17000 0.03593976
## 86 8.51 0.17000 0.03593976
## 87 8.61 0.17000 0.03593976
## 88 8.71 0.17000 0.03593976
## 89 8.81 0.17125 0.03488573
## 90 8.91 0.17125 0.03488573
## 91 9.01 0.17125 0.03488573
## 92 9.11 0.17125 0.03488573
## 93 9.21 0.17125 0.03488573
## 94 9.31 0.17125 0.03488573
## 95 9.41 0.17125 0.03488573
## 96 9.51 0.17125 0.03488573
## 97 9.61 0.17125 0.03488573
## 98 9.71 0.17125 0.03488573
## 99 9.81 0.17125 0.03488573
## 100 9.91 0.17125 0.03488573
tune.8d$best.parameters$cost
## [1] 6.51
The best performance is at an error rate of 0.1675,
which occurss at multiple cost values between 3.81 and
4.41. Using the code tune.8d$best.parameters$cost, we get a
cost value of 3.81, which I will use going
forward.
cost.svm.8e<-svm(Purchase ~ ., kernel='linear', data=oj.train, cost=tune.8d$best.parameters$cost)
train.pred<-predict(svm.8e, oj.train)
test.pred<-predict(svm.8e, oj.test)
mean(train.pred != oj.train$Purchase)
## [1] 0.1625
mean(test.pred != oj.test$Purchase)
## [1] 0.1518519
gamma.#8b repeat
svm.8b <- svm(Purchase ~ ., data = oj.train, kernel = "radial", cost = 0.01)
summary(svm.8b)
##
## Call:
## svm(formula = Purchase ~ ., data = oj.train, kernel = "radial", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 634
##
## ( 319 315 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
#8c repeat
train.pred<-predict(svm.8b, oj.train)
test.pred<-predict(svm.8b, oj.test)
mean(train.pred != oj.train$Purchase)
## [1] 0.39375
mean(test.pred != oj.test$Purchase)
## [1] 0.3777778
#8d repeat
tune.8d = tune(svm, Purchase ~ ., data = oj.train, kernel = "radial", ranges = list(cost = seq(0.01, 10, by = 0.1)))
summary(tune.8d)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.71
##
## - best performance: 0.1675
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39375 0.06568284
## 2 0.11 0.18125 0.04759858
## 3 0.21 0.17500 0.04487637
## 4 0.31 0.17375 0.04505013
## 5 0.41 0.17375 0.04543387
## 6 0.51 0.17375 0.04767147
## 7 0.61 0.16750 0.04495368
## 8 0.71 0.16750 0.04005205
## 9 0.81 0.17125 0.03821086
## 10 0.91 0.17250 0.04031129
## 11 1.01 0.17625 0.03793727
## 12 1.11 0.17625 0.03793727
## 13 1.21 0.17375 0.04016027
## 14 1.31 0.17500 0.03864008
## 15 1.41 0.17875 0.03682259
## 16 1.51 0.17750 0.03717451
## 17 1.61 0.18000 0.03641962
## 18 1.71 0.17750 0.03622844
## 19 1.81 0.17750 0.03622844
## 20 1.91 0.17625 0.03606033
## 21 2.01 0.17625 0.03606033
## 22 2.11 0.17625 0.03606033
## 23 2.21 0.17875 0.03910900
## 24 2.31 0.17875 0.04084609
## 25 2.41 0.17875 0.04084609
## 26 2.51 0.18000 0.04048319
## 27 2.61 0.18125 0.03919768
## 28 2.71 0.18125 0.03919768
## 29 2.81 0.18125 0.03919768
## 30 2.91 0.18125 0.03919768
## 31 3.01 0.18500 0.03987829
## 32 3.11 0.18500 0.03987829
## 33 3.21 0.18250 0.03917553
## 34 3.31 0.18250 0.03917553
## 35 3.41 0.18375 0.03998698
## 36 3.51 0.18125 0.04379958
## 37 3.61 0.18125 0.04379958
## 38 3.71 0.18125 0.04379958
## 39 3.81 0.18000 0.04297932
## 40 3.91 0.18000 0.04297932
## 41 4.01 0.18000 0.04297932
## 42 4.11 0.18125 0.04299952
## 43 4.21 0.18000 0.04377975
## 44 4.31 0.18000 0.04377975
## 45 4.41 0.18000 0.04377975
## 46 4.51 0.18125 0.04497299
## 47 4.61 0.18250 0.04417453
## 48 4.71 0.18375 0.04168749
## 49 4.81 0.18250 0.04133199
## 50 4.91 0.18250 0.04133199
## 51 5.01 0.18125 0.04299952
## 52 5.11 0.18250 0.04090979
## 53 5.21 0.18250 0.04090979
## 54 5.31 0.18250 0.04090979
## 55 5.41 0.18250 0.04090979
## 56 5.51 0.18250 0.04090979
## 57 5.61 0.18250 0.04090979
## 58 5.71 0.18250 0.04090979
## 59 5.81 0.18125 0.04093101
## 60 5.91 0.18125 0.04093101
## 61 6.01 0.18125 0.04093101
## 62 6.11 0.18125 0.04093101
## 63 6.21 0.18000 0.04297932
## 64 6.31 0.18000 0.04297932
## 65 6.41 0.18000 0.04297932
## 66 6.51 0.18000 0.04297932
## 67 6.61 0.18125 0.04135299
## 68 6.71 0.18125 0.04135299
## 69 6.81 0.18125 0.04135299
## 70 6.91 0.18125 0.04135299
## 71 7.01 0.18125 0.04135299
## 72 7.11 0.18125 0.04135299
## 73 7.21 0.18125 0.04135299
## 74 7.31 0.18125 0.04007372
## 75 7.41 0.18125 0.04007372
## 76 7.51 0.18125 0.04007372
## 77 7.61 0.18125 0.04007372
## 78 7.71 0.18000 0.04005205
## 79 7.81 0.18000 0.04005205
## 80 7.91 0.18000 0.04005205
## 81 8.01 0.18000 0.04005205
## 82 8.11 0.18000 0.04005205
## 83 8.21 0.17875 0.04126894
## 84 8.31 0.17875 0.04126894
## 85 8.41 0.17875 0.04126894
## 86 8.51 0.17875 0.04126894
## 87 8.61 0.17875 0.04126894
## 88 8.71 0.17875 0.04126894
## 89 8.81 0.17875 0.04126894
## 90 8.91 0.17875 0.04126894
## 91 9.01 0.17875 0.04126894
## 92 9.11 0.17875 0.04126894
## 93 9.21 0.17875 0.04126894
## 94 9.31 0.17875 0.04126894
## 95 9.41 0.18000 0.04216370
## 96 9.51 0.18000 0.04216370
## 97 9.61 0.18125 0.04340139
## 98 9.71 0.18125 0.04340139
## 99 9.81 0.18125 0.04340139
## 100 9.91 0.18125 0.04340139
tune.8d$best.parameters$cost
## [1] 0.71
#8e repeat
svm.8e<-svm(Purchase ~ ., kernel='radial', data=oj.train, cost=tune.8d$best.parameters$cost)
train.pred<-predict(svm.8e, oj.train)
test.pred<-predict(svm.8e, oj.test)
mean(train.pred != oj.train$Purchase)
## [1] 0.15125
mean(test.pred != oj.test$Purchase)
## [1] 0.1851852
8b repeat: 634 Support vectors were made, with CH and
MM being the main variables to split.
8c repeat: Both error rates were just at 39% and 37%
8d repeat: Using the code tune.8d$best.parameters$cost,
we get a best cost value of 0.61 with an error
rate of .18375 which I will use going forward.
degree = 2.#8b repeat
svm.8b <- svm(Purchase ~ ., data = oj.train, kernel = "polynomial", cost = 0.01, degree=2)
summary(svm.8b)
##
## Call:
## svm(formula = Purchase ~ ., data = oj.train, kernel = "polynomial",
## cost = 0.01, degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 0.01
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 636
##
## ( 321 315 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
#8c repeat
train.pred<-predict(svm.8b, oj.train)
test.pred<-predict(svm.8b, oj.test)
mean(train.pred != oj.train$Purchase)
## [1] 0.3725
mean(test.pred != oj.test$Purchase)
## [1] 0.3666667
#8d repeat
tune.8d = tune(svm, Purchase ~ ., data = oj.train, kernel = "polynomial", degree=2, ranges = list(cost = seq(0.01, 10, by = 0.1)))
summary(tune.8d)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 5.11
##
## - best performance: 0.1825
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39000 0.08287373
## 2 0.11 0.31125 0.06958458
## 3 0.21 0.23000 0.06826216
## 4 0.31 0.20875 0.06209592
## 5 0.41 0.20500 0.05470883
## 6 0.51 0.20750 0.05809475
## 7 0.61 0.20375 0.05466120
## 8 0.71 0.21000 0.05916080
## 9 0.81 0.20625 0.05628857
## 10 0.91 0.20500 0.05627314
## 11 1.01 0.20375 0.05653477
## 12 1.11 0.20375 0.05923412
## 13 1.21 0.20125 0.05726704
## 14 1.31 0.19500 0.05749396
## 15 1.41 0.19375 0.05311479
## 16 1.51 0.19125 0.05529278
## 17 1.61 0.19250 0.05898446
## 18 1.71 0.19375 0.05899918
## 19 1.81 0.19500 0.05809475
## 20 1.91 0.19375 0.05781015
## 21 2.01 0.19250 0.06043821
## 22 2.11 0.19125 0.05804991
## 23 2.21 0.18750 0.06123724
## 24 2.31 0.18625 0.06248611
## 25 2.41 0.18750 0.06236096
## 26 2.51 0.18750 0.06152010
## 27 2.61 0.18625 0.05964304
## 28 2.71 0.18625 0.05964304
## 29 2.81 0.18750 0.05833333
## 30 2.91 0.18625 0.05816941
## 31 3.01 0.18875 0.05756940
## 32 3.11 0.19000 0.06061032
## 33 3.21 0.18750 0.05833333
## 34 3.31 0.18625 0.05905800
## 35 3.41 0.18750 0.05803495
## 36 3.51 0.18625 0.05816941
## 37 3.61 0.18625 0.05816941
## 38 3.71 0.18625 0.05816941
## 39 3.81 0.18750 0.05621141
## 40 3.91 0.18750 0.05621141
## 41 4.01 0.18500 0.06202598
## 42 4.11 0.18500 0.06202598
## 43 4.21 0.18750 0.06038074
## 44 4.31 0.18750 0.06038074
## 45 4.41 0.18750 0.06152010
## 46 4.51 0.18500 0.05676462
## 47 4.61 0.18500 0.05676462
## 48 4.71 0.18625 0.05510407
## 49 4.81 0.18500 0.05329426
## 50 4.91 0.18375 0.05104804
## 51 5.01 0.18375 0.05104804
## 52 5.11 0.18250 0.04901814
## 53 5.21 0.18250 0.04901814
## 54 5.31 0.18250 0.04901814
## 55 5.41 0.18250 0.04901814
## 56 5.51 0.18250 0.04901814
## 57 5.61 0.18375 0.04825065
## 58 5.71 0.18375 0.04825065
## 59 5.81 0.18375 0.04825065
## 60 5.91 0.18500 0.04706674
## 61 6.01 0.18375 0.04788949
## 62 6.11 0.18375 0.04788949
## 63 6.21 0.18375 0.04788949
## 64 6.31 0.18375 0.04788949
## 65 6.41 0.18375 0.04788949
## 66 6.51 0.18375 0.04788949
## 67 6.61 0.18375 0.04788949
## 68 6.71 0.18375 0.04788949
## 69 6.81 0.18375 0.04788949
## 70 6.91 0.18375 0.04788949
## 71 7.01 0.18250 0.05041494
## 72 7.11 0.18250 0.05041494
## 73 7.21 0.18375 0.05239076
## 74 7.31 0.18375 0.05239076
## 75 7.41 0.18375 0.05239076
## 76 7.51 0.18375 0.05239076
## 77 7.61 0.18375 0.05239076
## 78 7.71 0.18375 0.05239076
## 79 7.81 0.18375 0.05239076
## 80 7.91 0.18375 0.05239076
## 81 8.01 0.18500 0.05230785
## 82 8.11 0.18375 0.05239076
## 83 8.21 0.18500 0.04993051
## 84 8.31 0.18500 0.04993051
## 85 8.41 0.18375 0.05036326
## 86 8.51 0.18375 0.05036326
## 87 8.61 0.18375 0.05205833
## 88 8.71 0.18375 0.05205833
## 89 8.81 0.18375 0.05205833
## 90 8.91 0.18375 0.05205833
## 91 9.01 0.18375 0.05205833
## 92 9.11 0.18375 0.05205833
## 93 9.21 0.18500 0.05130248
## 94 9.31 0.18500 0.05130248
## 95 9.41 0.18500 0.05130248
## 96 9.51 0.18500 0.05130248
## 97 9.61 0.18625 0.05084358
## 98 9.71 0.18625 0.05084358
## 99 9.81 0.18500 0.04958158
## 100 9.91 0.18500 0.04958158
tune.8d$best.parameters$cost
## [1] 5.11
#8e repeat
svm.8e<-svm(Purchase ~ ., kernel='polynomial', degree=2, data=oj.train, cost=tune.8d$best.parameters$cost)
train.pred<-predict(svm.8e, oj.train)
test.pred<-predict(svm.8e, oj.test)
mean(train.pred != oj.train$Purchase)
## [1] 0.15625
mean(test.pred != oj.test$Purchase)
## [1] 0.1814815
8b repeat: 636 Support vectors were made, with CH and
MM being the main variables to split.
8c repeat: Both error rates were just at 37% and 36%
8d repeat: Using the code tune.8d$best.parameters$cost,
we get a best cost value of 2.11 with an error
rate of .18 which I will use going forward.
The linear approach was best in my case as it had the lowest error rate for training and test with the smallest difference between them. Linear had 16% and 15%, Radial had 14% and 18%, and polynomial had 15% and 21%
detach(OJ)