> x_1=runif (500) -0.5
> x_2=runif (500) -0.5
> y=1*(x1^2 - {x2}^2 > 0)
set.seed(1)
x1 <- runif(500)-0.5
x2 <- runif(500)-0.5
y <- 1*(x1^2-x2^2 > 0)
plot(x1, x2, xlab = "X1", ylab = "X2", col = ifelse(y, "darkmagenta", "darkcyan"))
glm.x <- glm(y ~ ., data = data.frame(x1, x2, y), family = "binomial")
summary(glm.x)
##
## Call:
## glm(formula = y ~ ., family = "binomial", data = data.frame(x1,
## x2, y))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.179 -1.139 -1.112 1.206 1.257
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260 0.089579 -0.974 0.330
## x1 0.196199 0.316864 0.619 0.536
## x2 -0.002854 0.305712 -0.009 0.993
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 691.79 on 497 degrees of freedom
## AIC: 697.79
##
## Number of Fisher Scoring iterations: 3
pred.glm.x <- predict(glm.x, data.frame(x1, x2))
plot(x1, x2, xlab = "X1", ylab = "X2", col = ifelse(pred.glm.x > 0, "darkmagenta", "darkcyan"))
log.x <- glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = data.frame(x1, x2, y), family = "binomial")
summary(log.x)
##
## Call:
## glm(formula = y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), family = "binomial",
## data = data.frame(x1, x2, y))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.240e-04 -2.000e-08 -2.000e-08 2.000e-08 1.163e-03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -102.2 4302.0 -0.024 0.981
## poly(x1, 2)1 2715.3 141109.5 0.019 0.985
## poly(x1, 2)2 27218.5 842987.2 0.032 0.974
## poly(x2, 2)1 -279.7 97160.4 -0.003 0.998
## poly(x2, 2)2 -28693.0 875451.3 -0.033 0.974
## I(x1 * x2) -206.4 41802.8 -0.005 0.996
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9218e+02 on 499 degrees of freedom
## Residual deviance: 3.5810e-06 on 494 degrees of freedom
## AIC: 12
##
## Number of Fisher Scoring iterations: 25
train = data.frame(x1 = x1, x2 = x2, y = as.factor(y))
pred.log.x <- predict(log.x, data.frame(x1, x2))
plot(x1, x2, xlab = "X1", ylab = "X2", col = ifelse(pred.log.x > 0, "darkmagenta", "darkcyan"))
svm.x <- svm(y ~ ., data = train, kernel = "linear", cost = 0.01)
summary(svm.x)
##
## Call:
## svm(formula = y ~ ., data = train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 480
##
## ( 239 241 )
##
##
## Number of Classes: 2
##
## Levels:
## 0 1
pred.svm.x <- predict(svm.x, data.frame(x1, x2), type = "response")
plot(x1, x2, xlab = "X1", ylab = "X2", col = ifelse(pred.svm.x == 0, "darkmagenta", "darkcyan"))
svm.x <- svm(y ~ ., data = train, kernel = "radial", gamma = 1)
summary(svm.x)
##
## Call:
## svm(formula = y ~ ., data = train, kernel = "radial", gamma = 1)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 147
##
## ( 73 74 )
##
##
## Number of Classes: 2
##
## Levels:
## 0 1
pred.svm.x <- predict(svm.x, data.frame(x1, x2), type = "response")
plot(x1, x2, xlab = "X1", ylab = "X2", col = ifelse(pred.svm.x == 0, "darkmagenta", "darkcyan"))
SVM models can help us find the non-linear models in the dataset. The radial model is a better fit for the data.
Auto
data set.mpg.binary <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
Auto$mpg.class <- as.factor(mpg.binary)
cost
, in order to predict whether a car gets high
or low gas mileage. Report the cross-validation errors associated with
different values of this parameter. Comment on your
results.set.seed(123)
tune.mpg.lin <- tune(svm, mpg.class ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
summary(tune.mpg.lin)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.07634615 0.03928191
## 2 0.10 0.04333333 0.03191738
## 3 1.00 0.01025641 0.01792836
## 4 5.00 0.01538462 0.01792836
## 5 10.00 0.01788462 0.01727588
plot(tune.mpg.lin)
Best performance is at cost = 1.
set.seed(123)
tune.mpg.rad <- tune(svm, mpg.class ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.01, 0.1, 1, 5, 10, 20, 30, 40, 50)))
summary(tune.mpg.rad)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 40
##
## - best performance: 0.01788462
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.58173077 0.04740051
## 2 0.10 0.10692308 0.05900981
## 3 1.00 0.07891026 0.03828837
## 4 5.00 0.06608974 0.04785032
## 5 10.00 0.05602564 0.03551922
## 6 20.00 0.03820513 0.02463148
## 7 30.00 0.02288462 0.01427008
## 8 40.00 0.01788462 0.01234314
## 9 50.00 0.01788462 0.01234314
The best performance with a radial kernel has an error of 0.01788, which happens around cost = 40.
set.seed(123)
tune.mpg.poly <- tune(svm, mpg.class ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
tune.mpg.poly
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
The best performance has an error of 0.01026, which is at cost = 1.
plot()
function for
svm
objects only in cases with \(p = 2\). When \(p
> 2\), you can use the plot()
function to create
plots displaying pairs of variables at a time. Essentially, instead of
typing> plot(svmfit , dat)
svmfit
contains your fitted model and
dat
is a data frame containing your data, you can
type> plot(svmfit , dat, x1∼x4)
x1
and x4
with the correct
variable names. To find out more, type ?plot.svm
.plot(tune.mpg.rad)
Confirmed that best cost is around cost = 40.
plot(tune.mpg.poly)
Confirmed that best performance is when cost = 1.
detach(Auto)
OJ
data set which is
part of the ISLR
package.set.seed(21)
set.training.oj <- sample(nrow(OJ), 800)
training.oj <- OJ[set.training.oj,]
test.oj <- OJ[-set.training.oj, ]
cost=0.01
, with Purchase
as the response
and the other variables as predictors. Use the summary()
function to produce summary statistics, and describe the results
obtained.svm.oj.lin <- svm(Purchase ~ ., data = training.oj, cost = 0.01, kernel = "linear")
summary(svm.oj.lin)
##
## Call:
## svm(formula = Purchase ~ ., data = training.oj, cost = 0.01, kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 444
##
## ( 223 221 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
There are 444 support vectors out of 800 training points.
Of the 2 levels, 223 of those support vectors are for CH, and 221 are
for MM.
train.pred.oj.lin <- predict(svm.oj.lin, training.oj)
mean(train.pred.oj.lin != training.oj$Purchase)
## [1] 0.1675
test.pred.oj.lin <- predict(svm.oj.lin, test.oj)
mean(test.pred.oj.lin != test.oj$Purchase)
## [1] 0.1703704
Training Error Rate: 0.1675
Test Error Rate: 0.1703704
tune()
function to select an
optimal cost
. Consider values in the range \(0.01\) to \(10\).set.seed(123)
tune.oj.lin <- tune(svm, Purchase ~ ., data = OJ, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
tune.oj.lin
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 5
##
## - best performance: 0.164486
The optimal cost is 5.
cost
.train.cost.oj <- svm(Purchase ~ ., data = training.oj, kernel = "linear", cost = 5)
pred.train.cost <- predict(train.cost.oj, training.oj)
mean(pred.train.cost != training.oj$Purchase)
## [1] 0.165
test.cost.oj <- svm(Purchase ~ ., data = test.oj, kernel = "linear", cost = 5)
pred.test.cost <- predict(test.cost.oj, test.oj)
mean(pred.test.cost != test.oj$Purchase)
## [1] 0.1296296
Training Error: 0.165
Test Error: 0.1296
gamma
.svm.oj.rad <- svm(Purchase ~ ., data = training.oj, cost = 0.01, kernel = "radial")
summary(svm.oj.rad)
##
## Call:
## svm(formula = Purchase ~ ., data = training.oj, cost = 0.01, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 627
##
## ( 316 311 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred.oj.rad <- predict(svm.oj.rad, training.oj)
mean(train.pred.oj.rad != training.oj$Purchase)
## [1] 0.38875
test.pred.oj.rad <- predict(svm.oj.rad, test.oj)
mean(test.pred.oj.rad != test.oj$Purchase)
## [1] 0.3925926
set.seed(123)
tune.oj.rad<- tune(svm, Purchase ~ ., data = OJ, kernel = "radial", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
tune.oj.rad
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.1691589
train.cost.oj.rad <- svm(Purchase ~ ., data = training.oj, kernel = "radial", cost = tune.oj.rad$best.parameter$cost)
pred.train.cost.rad <- predict(train.cost.oj.rad, training.oj)
mean(pred.train.cost.rad != training.oj$Purchase)
## [1] 0.15125
test.cost.oj.rad <- svm(Purchase ~ ., data = test.oj, kernel = "radial", cost = tune.oj.rad$best.parameter$cost)
pred.test.cost.rad <- predict(test.cost.oj.rad, test.oj)
mean(pred.test.cost.rad != test.oj$Purchase)
## [1] 0.1296296
degree=2
.svm.oj.poly <- svm(Purchase ~ ., data = training.oj, cost = 0.01, kernel = "polynomial")
summary(svm.oj.poly)
##
## Call:
## svm(formula = Purchase ~ ., data = training.oj, cost = 0.01, kernel = "polynomial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 0.01
## degree: 3
## coef.0: 0
##
## Number of Support Vectors: 614
##
## ( 309 305 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred.oj.poly <- predict(svm.oj.poly, training.oj)
mean(train.pred.oj.poly != training.oj$Purchase)
## [1] 0.3675
test.pred.oj.poly <- predict(svm.oj.poly, test.oj)
mean(test.pred.oj.poly != test.oj$Purchase)
## [1] 0.3740741
set.seed(123)
tune.oj.poly<- tune(svm, Purchase ~ ., data = OJ, kernel = "polynomial", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
tune.oj.poly
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 5
##
## - best performance: 0.1841121
train.cost.oj.poly <- svm(Purchase ~ ., data = training.oj, kernel = "polynomial", cost = tune.oj.poly$best.parameter$cost)
pred.train.cost.poly <- predict(train.cost.oj.poly, training.oj)
mean(pred.train.cost.poly != training.oj$Purchase)
## [1] 0.15125
test.cost.oj.poly <- svm(Purchase ~ ., data = test.oj, kernel = "polynomial", cost = tune.oj.poly$best.parameter$cost)
pred.test.cost.poly <- predict(test.cost.oj.poly, test.oj)
mean(pred.test.cost.poly != test.oj$Purchase)
## [1] 0.1
Linear Test: 0.1703704
Linear Tuned Test: 0.1296
Radial Test: 0.3925926
Radial Tuned Test: 0.1296296
Polynomial Test: 0.3740741
Polynomial Tuned Test: 0.1
The Polynomial Tuned Test gives the best results.
detach(OJ)