Sameer Mathur
Support Vector Classifier
# creating a matrix "x" of dimentions (20*2)
x <- matrix(rnorm(20*2), ncol=2)
# creating a variable "y" with repeating 10 times "-1" and 10 times "1"
y <- c(rep(-1,10), rep(1,10))
x[y==1,] = x[y==1,] + 1
# plotting
plot(x, col=(3-y))
# creating the data frame with x and y
dat <- data.frame(x=x, y=as.factor(y))
# dimentions of the data frame
dim(dat)
[1] 20 3
# some rows of the data frame
head(dat, n= 12)
x.1 x.2 y
1 -0.22248386 0.88900370 -1
2 -0.35638707 -0.63819798 -1
3 0.68920944 -0.92926803 -1
4 0.41945989 0.53390835 -1
5 0.06655965 1.47992536 -1
6 -1.57645055 -1.76388240 -1
7 1.73027638 0.77519960 -1
8 -1.05621556 -1.04855863 -1
9 -1.39435952 0.13312775 -1
10 1.75391192 -0.09001491 -1
11 0.23542411 1.18415747 1
12 1.27633945 0.53220068 1
# loading the package
library(e1071)
# fitting the model
svmfit <- svm(y~., data = dat, kernel="linear", cost=10, scale=FALSE)
summary(svmfit)
Call:
svm(formula = y ~ ., data = dat, kernel = "linear", cost = 10,
scale = FALSE)
Parameters:
SVM-Type: C-classification
SVM-Kernel: linear
cost: 10
gamma: 0.5
Number of Support Vectors: 15
( 7 8 )
Number of Classes: 2
Levels:
-1 1
# Index
svmfit$index
[1] 1 2 3 4 5 7 10 11 12 13 14 15 16 19 20
# plotting the model
plot(svmfit, dat)
# fitting the model for cost = 0.1
svmfit <- svm(y~.,data=dat, kernel="linear", cost=0.1,scale = FALSE)
# index
svmfit$index
[1] 1 2 3 4 5 7 8 9 10 11 12 13 14 15 16 17 19 20
# plotting
plot(svmfit, dat)
# best parameter salection
set.seed(1)
tune.out <- tune(svm,y~.,data=dat,kernel="linear",ranges = list(cost=c(0.001, 0.01, 0.1, 1,5,10,100)))
# summary
summary(tune.out)
Parameter tuning of 'svm':
- sampling method: 10-fold cross validation
- best parameters:
cost
1
- best performance: 0.4
- Detailed performance results:
cost error dispersion
1 1e-03 0.75 0.3535534
2 1e-02 0.75 0.3535534
3 1e-01 0.45 0.3689324
4 1e+00 0.40 0.4594683
5 5e+00 0.45 0.4377975
6 1e+01 0.45 0.4377975
7 1e+02 0.45 0.4377975
set.seed(1)
# best model
bestmod <- tune.out$best.model
# summary
summary(bestmod)
Call:
best.tune(method = svm, train.x = y ~ ., data = dat, ranges = list(cost = c(0.001,
0.01, 0.1, 1, 5, 10, 100)), kernel = "linear")
Parameters:
SVM-Type: C-classification
SVM-Kernel: linear
cost: 1
gamma: 0.5
Number of Support Vectors: 15
( 7 8 )
Number of Classes: 2
Levels:
-1 1
# creating test set from normal distribution with dimentions 20*2
xtest <- matrix(rnorm(20*2), ncol=2)
# creating variable y taking value (-1,1)
ytest <- sample(c(-1,1), 20, rep=TRUE)
xtest[ytest==1,]=xtest[ytest==1,] + 1
# creating dataframe of test data set
testdat <- data.frame(x=xtest, y=as.factor(ytest))
# some rows of the test data set
head(testdat)
x.1 x.2 y
1 -0.6264538 0.91897737 -1
2 1.1836433 1.78213630 1
3 -0.8356286 0.07456498 -1
4 1.5952808 -1.98935170 -1
5 1.3295078 1.61982575 1
6 -0.8204684 -0.05612874 -1
# predicting y using test set
ypred <- predict(bestmod,testdat)
# table for correct prediction
table(predict = ypred, truth = testdat$y)
truth
predict -1 1
-1 8 1
1 3 8
# fitting the model
svmfit <- svm(y~., data=dat, kernel="linear", cost=.01,scale=FALSE)
# predicting y using test set
ypred <- predict(svmfit,testdat)
# table for correct prediction
table(predict=ypred, truth=testdat$y)
truth
predict -1 1
-1 4 1
1 7 8
# applying some transformation
x[y==1,]=x[y==1,]+0.5
# plotting
plot(x, col=(y+5)/2, pch=19)
# creating data frame with matrix x and factor variable y
dat3 <- data.frame(x=x,y=as.factor(y))
# fitting the model
svmfit2 <- svm(y~., data = dat3, kernel="linear", cost = 1e5)
# summary
summary(svmfit2)
Call:
svm(formula = y ~ ., data = dat3, kernel = "linear", cost = 1e+05)
Parameters:
SVM-Type: C-classification
SVM-Kernel: linear
cost: 1e+05
gamma: 0.5
Number of Support Vectors: 9
( 4 5 )
Number of Classes: 2
Levels:
-1 1
# plotting
plot(svmfit2, dat3)
# fitting the model
svmfit3 <- svm(y~., data=dat3, kernel="linear", cost = 1)
# summary
summary(svmfit3)
Call:
svm(formula = y ~ ., data = dat3, kernel = "linear", cost = 1)
Parameters:
SVM-Type: C-classification
SVM-Kernel: linear
cost: 1
gamma: 0.5
Number of Support Vectors: 11
( 5 6 )
Number of Classes: 2
Levels:
-1 1
# plotting
plot(svmfit3 , dat3)