set.seed(1)
x1 <- runif (500) - 0.5
x2 <- runif (500) - 0.5
y <- 1 * (x1^2 - x2^2 > 0)
plot(x1,x2,xlab="x1", ylab="x2", col = (4-y), pch = (3-y))
df=data.frame(x1 = x1, x2 = x2, y = as.factor(y))
logreg=glm(y~.,data=df, family='binomial')
summary(logreg)
##
## Call:
## glm(formula = y ~ ., family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.179 -1.139 -1.112 1.206 1.257
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260 0.089579 -0.974 0.330
## x1 0.196199 0.316864 0.619 0.536
## x2 -0.002854 0.305712 -0.009 0.993
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 691.79 on 497 degrees of freedom
## AIC: 697.79
##
## Number of Fisher Scoring iterations: 3
library(ggplot2)
probs <- predict(logreg, df, type = "response")
preds <- ifelse(probs > 0.5,1,0)
ggplot(data = df, mapping = aes(x1, x2))+
geom_point(data = df, mapping = aes(colour = preds))
logreg2=glm(y~poly(x1,2)+poly(x2,2)+ I(x1*x2),data=df,family='binomial')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
probs <- predict(logreg2, df, type = "response")
preds <- ifelse(probs > 0.5,1,0)
ggplot(data = df, mapping = aes(x1, x2))+
geom_point(data = df, mapping = aes(colour = preds))
library(e1071)
svm=svm(y~., data=df, kernel ="linear", cost =0.1, scale=FALSE)
plot(svm, df)
library(e1071)
svm=svm(y~., data=df, kernel ="radial", gamma=1)
plot(svm, df)
library(ISLR)
Auto$mpglevel <- ifelse(Auto$mpg > median(Auto$mpg),1,0)
Auto$mpglevel <- as.factor(Auto$mpglevel)
library(e1071)
set.seed(1)
linear <- tune(svm, mpglevel ~ . -mpg, data = Auto, kernel = "linear", ranges = list(cost = c(0.001, 0.01, 0.1, 1, 5, 10, 100)))
summary(linear)
We see that cost=0.1 results in the lowest cross-validation error rate.
# Polynomial
set.seed(2)
grid <- c(0.1, 1, 5, 10)
form <- mpg ~.
poly = tune.svm(form, data = Auto, kernel = "polynomial", cost = grid, gamma = grid)
best.poly <- poly$best.model
summary(best.poly)
##
## Call:
## best.svm(x = form, data = Auto, gamma = grid, cost = grid, kernel = "polynomial")
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: polynomial
## cost: 1
## degree: 3
## gamma: 0.1
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 264
For polynomial, best cost is at 1, with degree of 3 and gamma = 0.1
#Radial
set.seed(2)
grid <- c(0.1, 1, 5, 10)
form <- mpg ~.
radial = tune.svm(form, data = Auto, kernel = "radial", cost = grid, gamma = grid)
best.radial <- radial$best.model
summary(best.radial)
##
## Call:
## best.svm(x = form, data = Auto, gamma = grid, cost = grid, kernel = "radial")
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 5
## gamma: 0.1
## epsilon: 0.1
##
##
## Number of Support Vectors: 275
For radial, best cost is at 5, with gamma = 0.1 and epsilon = 0.1
library(ISLR)
set.seed(1)
tr.idx <- sample(1:nrow(OJ), size = 800, replace=FALSE)
train <- OJ[tr.idx,]
test <- OJ[-tr.idx,]
library(e1071)
OJ.svm=svm(Purchase~., data=train, kernel='linear', cost=0.01)
summary(OJ.svm)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
There are 435 support vectors created, 219 belongs to CH and 216 for MM.
# Training error rate:
pred_train = predict(OJ.svm, train)
table(train$Purchase, pred_train)
## pred_train
## CH MM
## CH 420 65
## MM 75 240
(72+65)/800
## [1] 0.17125
# Testing error rate:
pred_test = predict(OJ.svm, test)
table(test$Purchase, pred_test)
## pred_test
## CH MM
## CH 153 15
## MM 33 69
(33+15)/nrow(test)
## [1] 0.1777778
training error ratge is 17.12% and testing error rate is at 17.78%
set.seed(1)
cost.grid <- c(0.01,0.1,1,10)
form <- Purchase ~ .
OJ.tune <- tune.svm(form, data = train, kernel = "linear", cost = cost.grid)
summary(OJ.tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.1725
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17625 0.02853482
## 2 0.10 0.17250 0.03162278
## 3 1.00 0.17500 0.02946278
## 4 10.00 0.17375 0.03197764
cost of 0.1 has lowest error rate, meaning optimal cost to sell the product is 0.03
svm2=svm(Purchase~., data=train, kernel='linear', cost=0.1)
# Training error rate:
pred_train = predict(svm2, train)
table(train$Purchase, pred_train)
## pred_train
## CH MM
## CH 422 63
## MM 69 246
(63+69)/nrow(train)
## [1] 0.165
# Testing error rate:
pred_test = predict(svm2, test)
table(test$Purchase, pred_test)
## pred_test
## CH MM
## CH 155 13
## MM 31 71
(31+13)/nrow(test)
## [1] 0.162963
Both the train and test error rate decrease. New train error rate is 16.5% while new test error rate is 16.29%
set.seed(1)
svm_radial = svm(Purchase ~ ., data = train, kernel = "radial", cost=0.1)
# Training error rate:
pred_train_radial = predict(svm_radial, train)
table(train$Purchase, pred_train_radial)
## pred_train_radial
## CH MM
## CH 433 52
## MM 87 228
(87+52)/nrow(train)
## [1] 0.17375
# Testing error rate:
pred_test_radial = predict(svm_radial, test)
table(test$Purchase, pred_test_radial)
## pred_test_radial
## CH MM
## CH 150 18
## MM 37 65
(37+18)/nrow(test)
## [1] 0.2037037
For radial kernel, train error rate is 17.38 while test error rate is 0.20
set.seed(1)
svm_poly = svm(Purchase ~ ., data = train, kernel = "polynomial", cost=0.1, degree=2)
# Training error rate:
pred_train_poly = predict(svm_poly, train)
table(train$Purchase, pred_train_poly)
## pred_train_poly
## CH MM
## CH 465 20
## MM 225 90
(225+20)/nrow(train)
## [1] 0.30625
# Testing error rate:
pred_test_poly = predict(svm_poly, test)
table(test$Purchase, pred_test_poly)
## pred_test_poly
## CH MM
## CH 161 7
## MM 73 29
(73+7)/nrow(test)
## [1] 0.2962963
For polynomial kernel, train error rate is 30.625 while test error rate is 0.296
Overall, linear approach gives the best results