x1=runif (500) -0.5
x2=runif (500) -0.5
y=1*(x1^2-x2^2 > 0)
library(ggplot2)
ggplot() + geom_point(
mapping = aes(x1[y == 0], x2[y == 0]),
col = "blue",
alpha = 0.5,
size = 2
) + geom_point(
mapping = aes(x1[y == 1], x2[y == 1]),
col = "orange",
alpha = 0.8,
size = 2
) + labs(x = "x1", y = "x2")
df=as.data.frame(cbind(x1, x2, y))
glm.fit = glm(y ~ ., df, family = "binomial")
summary(glm.fit)
##
## Call:
## glm(formula = y ~ ., family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.23819 -1.17504 -0.00174 1.16878 1.24401
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0007834 0.0895614 0.009 0.993
## x1 0.2995040 0.3063108 0.978 0.328
## x2 -0.0351100 0.3040163 -0.115 0.908
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 693.15 on 499 degrees of freedom
## Residual deviance: 692.19 on 497 degrees of freedom
## AIC: 698.19
##
## Number of Fisher Scoring iterations: 3
glm.probs = predict(glm.fit, newdata = df, type = "response")
glm.preds = ifelse(glm.probs > 0.5, 1, 0)
pred1 = df[glm.preds == 1, ]
pred2 = df[glm.preds == 0, ]
ggplot() +
geom_point(
data = pred1,
mapping = aes(x1, x2),
col = "slateblue",
alpha = 0.8,
size = 2
) +
geom_point(
data = pred2,
mapping = aes(x1, x2),
col = "skyblue2",
alpha = 0.8,
size = 2
)
glm.fit2 = glm(y ~ I(x1^2) + I(x2^2) + I(x1^3) + I(x2^3) + (x1 * x2), family = "binomial", data=df)
summary(glm.fit2)
##
## Call:
## glm(formula = y ~ I(x1^2) + I(x2^2) + I(x1^3) + I(x2^3) + (x1 *
## x2), family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.001709 0.000000 0.000000 0.000000 0.008549
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.485 594.422 0.006 0.995
## I(x1^2) 24078.366 554808.978 0.043 0.965
## I(x2^2) -24657.267 565474.427 -0.044 0.965
## I(x1^3) -2910.808 178065.069 -0.016 0.987
## I(x2^3) 2033.627 144241.483 0.014 0.989
## x1 175.599 21641.734 0.008 0.994
## x2 -72.037 14395.457 -0.005 0.996
## x1:x2 727.445 37194.532 0.020 0.984
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9315e+02 on 499 degrees of freedom
## Residual deviance: 8.0412e-05 on 492 degrees of freedom
## AIC: 16
##
## Number of Fisher Scoring iterations: 25
glm.probs2 = predict(glm.fit2, newdata = df, type = "response")
glm.preds2 = ifelse(glm.probs2 > 0.5, 1, 0)
pred3 = df[glm.preds2==1,]
pred4 = df[glm.preds2==0,]
ggplot()+
geom_point(data=pred3, mapping = aes(x1, x2), col =
"yellowgreen", alpha=0.8, size=2)+
geom_point(data=pred4, mapping = aes(x1, x2), col =
"turquoise4", alpha=0.5, size=2)
library(e1071)
svm.fit <- svm(as.factor(y)~ ., data=df, kernel ="linear", cost=0.1)
summary(svm.fit)
##
## Call:
## svm(formula = as.factor(y) ~ ., data = df, kernel = "linear", cost = 0.1)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.1
##
## Number of Support Vectors: 493
##
## ( 246 247 )
##
##
## Number of Classes: 2
##
## Levels:
## 0 1
svm.probs = predict(svm.fit, df)
pred5 = df[svm.probs==1,]
pred6 = df[svm.probs==0,]
ggplot() +
geom_point(
data = pred5,
mapping = aes(x1, x2),
col = "skyblue" ,
size = 2, alpha=0.8
) +
geom_point(
data = pred6,
mapping = aes(x1, x2),
col = "orange2",
size = 2, alpha=0.6
)
plot(svm.fit, df)
svm.fit2 <- svm(as.factor(y)~ x1 + x2, data=df, method = "radial", cost=0.1)
summary(svm.fit2)
##
## Call:
## svm(formula = as.factor(y) ~ x1 + x2, data = df, method = "radial",
## cost = 0.1)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.1
##
## Number of Support Vectors: 320
##
## ( 160 160 )
##
##
## Number of Classes: 2
##
## Levels:
## 0 1
svm.probs2 = predict(svm.fit2, df)
pred7 = df[svm.probs2==1,]
pred8 = df[svm.probs2==0,]
ggplot() +
geom_point(
data = pred7,
mapping = aes(x1, x2),
col = "skyblue" ,
size = 2, alpha=0.8
) +
geom_point(
data = pred8,
mapping = aes(x1, x2),
col = "orange2",
size = 2, alpha=0.6
)
plot(svm.fit2, df)
library(ISLR)
data("Auto")
mpg01 = ifelse(Auto$mpg > median(Auto$mpg), 1,0)
Auto01 = data.frame(Auto[-c(1,9)], mpg01)
set.seed(1)
tune.lin=tune(svm,mpg01~.,data=Auto01,kernel ="linear",ranges=list(cost=c(0.001, 0.01, 0.1, 1,5,10,100)))
summary(tune.lin)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.01
##
## - best performance: 0.1053223
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-03 0.1088870 0.02541796
## 2 1e-02 0.1053223 0.03162078
## 3 1e-01 0.1083165 0.03461157
## 4 1e+00 0.1100350 0.03552713
## 5 5e+00 0.1102002 0.03559065
## 6 1e+01 0.1101804 0.03557982
## 7 1e+02 0.1101708 0.03556061
Cost = 0.01 results in the lowest cross-validation error rate. #### (c) Now repeat (b), this time using SVMs with radial and polynomial basis kernels, with different values of gamma and degree and cost. Comment on your results.
tune.rad <- tune(svm, mpg01 ~ ., data = Auto01, kernel = "radial", ranges = list(cost = c(0.1, 1, 5, 10), gamma = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.rad)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 1 1
##
## - best performance: 0.06031574
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 1e-02 0.09893598 0.036615111
## 2 1.0 1e-02 0.08754791 0.042547338
## 3 5.0 1e-02 0.08480617 0.045493029
## 4 10.0 1e-02 0.08642087 0.046948300
## 5 0.1 1e-01 0.07233360 0.037968503
## 6 1.0 1e-01 0.07477629 0.041412967
## 7 5.0 1e-01 0.07091706 0.037875175
## 8 10.0 1e-01 0.06693812 0.034121153
## 9 0.1 1e+00 0.08114160 0.026627777
## 10 1.0 1e+00 0.06031574 0.028798351
## 11 5.0 1e+00 0.06735518 0.027624894
## 12 10.0 1e+00 0.06986628 0.028134940
## 13 0.1 5e+00 0.31384670 0.049494935
## 14 1.0 5e+00 0.10575613 0.014517033
## 15 5.0 5e+00 0.11102705 0.021541907
## 16 10.0 5e+00 0.11186421 0.022222646
## 17 0.1 1e+01 0.42414373 0.055935817
## 18 1.0 1e+01 0.15301406 0.011420254
## 19 5.0 1e+01 0.15472405 0.015294250
## 20 10.0 1e+01 0.15472408 0.015294221
## 21 0.1 1e+02 0.49304390 0.052133504
## 22 1.0 1e+02 0.24947282 0.002756306
## 23 5.0 1e+02 0.24947267 0.002755672
## 24 10.0 1e+02 0.24947267 0.002755672
Cost = 1 results in the lowest cross-validation error rate.
tune.poly <- tune(svm, mpg01 ~ ., data = Auto01, kernel = "polynomial", ranges = list(cost = c(0.1, 1, 5, 10), degree = c(2, 3, 4)))
summary(tune.poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 3
##
## - best performance: 0.1113738
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 2 0.2269920 0.03627644
## 2 1.0 2 0.1651022 0.02910030
## 3 5.0 2 0.1565969 0.04467648
## 4 10.0 2 0.1592405 0.04433953
## 5 0.1 3 0.1376712 0.03070863
## 6 1.0 3 0.1228948 0.03188548
## 7 5.0 3 0.1130214 0.02872703
## 8 10.0 3 0.1113738 0.02617738
## 9 0.1 4 0.1943853 0.04498174
## 10 1.0 4 0.1546828 0.03542809
## 11 5.0 4 0.1626235 0.06784320
## 12 10.0 4 0.1665109 0.08650442
Cost = 10 and degree = 3 results in the lowest cross-validation error rate.
svm.lin = svm(as.factor(mpg01) ~ ., data = Auto01, kernel = "linear", cost = 0.01)
svm.poly = svm(as.factor(mpg01) ~ ., data = Auto01, kernel = "polynomial", cost = 10,
degree = 3)
svm.rad = svm(as.factor(mpg01) ~ ., data = Auto01, kernel = "radial", cost = 1, gamma = 1)
Auto02 = data.frame(Auto01, Auto[c(1)])
# for slicing
m.acc <- mean(acceleration)
m.wt <- mean(weight)
m.hp <- mean(horsepower)
m.dp <- mean(displacement)
m.mpg <- mean(Auto$mpg)
# linear
plot(
svm.lin,
Auto02,
mpg ~ acceleration,
main = "Linear: mpg~acceleration",
slice = list(displacement = m.dp, horsepower = m.hp)
)
plot(
svm.lin,
Auto02,
mpg ~ displacement,
main = "Linear: mpg~displacement",
slice = list(acceleration = m.acc, horsepower = m.hp)
)
plot(
svm.lin,
Auto02,
mpg ~ horsepower,
main = "Linear: mpg~horsepower",
slice = list(acceleration = m.acc, displacement = m.dp)
)
plot(
svm.lin,
Auto02,
mpg ~ weight,
main = "Linear: mpg~weight",
slice = list(acceleration = m.acc, displacement = m.dp)
)
plot(
svm.lin,
Auto02,
mpg ~ cylinders,
main = "Linear: mpg~cylinders",
slice = list(displacement = m.dp, horsepower = m.hp)
)
plot(
svm.lin,
Auto02,
mpg ~ origin,
main = "Linear: mpg~origin",
slice = list(displacement = m.dp, horsepower = m.hp)
)
plot(
svm.lin,
Auto02,
mpg ~ year,
main = "Linear: mpg~year",
slice = list(displacement = m.dp, horsepower = m.hp)
)
# Radial plots
plot(
svm.rad,
Auto02,
mpg ~ acceleration,
main = "Radial: mpg~acceleration",
slice = list(displacement = m.dp, horsepower = m.hp)
)
plot(
svm.rad,
Auto02,
mpg ~ displacement,
main = "Radial: mpg~displacement",
slice = list(acceleration = m.acc, horsepower = m.hp)
)
plot(
svm.rad,
Auto02,
mpg ~ horsepower,
main = "Radial: mpg~horsepower",
slice = list(acceleration = m.acc, displacement = m.dp)
)
plot(
svm.rad,
Auto02,
mpg ~ weight,
main = "Radial: mpg~weight",
slice = list(acceleration = m.acc, displacement = m.dp)
)
plot(
svm.rad,
Auto02,
mpg ~ cylinders,
main = "Linear: mpg~cylinders",
slice = list(displacement = m.dp, horsepower = m.hp)
)
plot(
svm.rad,
Auto02,
mpg ~ origin,
main = "Linear: mpg~origin",
slice = list(displacement = m.dp, horsepower = m.hp)
)
plot(
svm.rad,
Auto02,
mpg ~ year,
main = "Linear: mpg~year",
slice = list(displacement = m.dp, horsepower = m.hp)
)
# Polynomial
plot(
svm.poly,
Auto02,
mpg ~ acceleration,
main = "Polynomial: mpg~acceleration",
slice = list(displacement = m.dp, horsepower = m.hp)
)
plot(
svm.poly,
Auto02,
mpg ~ displacement,
main = "Polynomial: mpg~displacement",
slice = list(acceleration = m.acc, horsepower = m.hp)
)
plot(
svm.poly,
Auto02,
mpg ~ horsepower,
main = "Polynomial: mpg~horsepower",
slice = list(acceleration = m.acc, displacement = m.dp)
)
plot(
svm.poly,
Auto02,
mpg ~ weight,
main = "Polynomial: mpg~weight",
slice = list(acceleration = m.acc, displacement = m.dp)
)
plot(
svm.poly,
Auto02,
mpg ~ cylinders,
main = "Linear: mpg~cylinders",
slice = list(displacement = m.dp, horsepower = m.hp)
)
plot(
svm.poly,
Auto02,
mpg ~ origin,
main = "Linear: mpg~origin",
slice = list(displacement = m.dp, horsepower = m.hp)
)
plot(
svm.poly,
Auto02,
mpg ~ year,
main = "Linear: mpg~year",
slice = list(displacement = m.dp, horsepower = m.hp)
)
library(ISLR)
data("OJ")
oj = OJ
inTrain = sample(1070,800)
train = oj[inTrain,]
test = oj[-inTrain,]
svm.lin2 = svm(Purchase ~ ., kernel = "linear", data = train, cost = 0.01)
summary(svm.lin2)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 426
##
## ( 213 213 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
test.pred1=predict(svm.lin2, test)
train.pred1=predict(svm.lin2, train)
TrainErr = mean(train.pred1 != train$Purchase)
TrainErr
## [1] 0.16625
TestErr = mean(test.pred1 != test$Purchase)
TestErr
## [1] 0.1851852
set.seed(1)
tune.lin2 = tune(svm, Purchase~., data=train, kernel ="linear", ranges =list(cost=seq(0.01, 10, 0.333)))
op = tune.lin2$best.parameters$cost
op
## [1] 1.009
svm.lin4 = svm(Purchase ~ ., kernel = "linear", data = train, cost = op)
test.pred2=predict(svm.lin4, test)
train.pred2=predict(svm.lin4, train)
TrainErr = mean(train.pred2 != train$Purchase)
TrainErr
## [1] 0.16375
TestErr = mean(test.pred2 != test$Purchase)
TestErr
## [1] 0.1814815
svm.rad2 = svm(Purchase ~ ., kernel = "radial", data = train, cost = 0.01)
test.pred3=predict(svm.rad2, test)
train.pred3=predict(svm.rad2, train)
TestErr = mean(train.pred3 != train$Purchase)
TestErr
## [1] 0.38125
TrainErr = mean(test.pred3 != test$Purchase)
TrainErr
## [1] 0.4148148
Accuracy = 1 - TrainErr
Accuracy
## [1] 0.5851852
tune.lin3 = tune(svm, Purchase~., data=train, kernel ="linear", ranges =list(cost=seq(0.01, 10, 0.333)))
op = tune.lin3$best.parameters$cost
svm.lin5 = svm(Purchase ~ ., kernel = "radial", data = train, cost = op)
train.pred4=predict(svm.lin5, train)
test.pred4=predict(svm.lin5, test)
TestErr = mean(test.pred4 != test$Purchase)
TestErr
## [1] 0.2
TrainErr = mean(train.pred4 != train$Purchase)
TrainErr
## [1] 0.14625
svm.poly2 = svm(Purchase ~ ., kernel = "polynomial", data = train, cost = 0.01, degree=2)
train.pred5=predict(svm.poly2, train)
test.pred5=predict(svm.poly2, test)
TestErr = mean(test.pred5 != test$Purchase)
TestErr
## [1] 0.4074074
TrainErr = mean(train.pred5 != train$Purchase)
TrainErr
## [1] 0.35625
tune.lin4 = tune(svm, Purchase~., data=train, kernel ="polynomial", ranges =list(cost=seq(0.01, 10, 0.333)), degree=2)
op = tune.lin4$best.parameters$cost
svm.lin6 = svm(Purchase ~ ., kernel = "polynomial", data = train, cost = op, degree=2)
train.pred6=predict(svm.lin6, train)
test.pred6=predict(svm.lin6, test)
TestErr = mean(train.pred6 != train$Purchase)
TestErr
## [1] 0.13625
TrainErr = mean(test.pred6 != test$Purchase)
TrainErr
## [1] 0.2296296
Accuracy = 1 - TrainErr
Accuracy
## [1] 0.7703704
# Radial Model
TestErr = mean(test.pred4 != test$Purchase)
TestErr
## [1] 0.2
TrainErr = mean(train.pred4 != train$Purchase)
TrainErr
## [1] 0.14625
The support vector model with radial kernel gives the best results because it has the lowest number of errors and a high accuracy.