ISLR: Chapter 9 Lab

Sameer Mathur

Support Vector Classifier

Creating the Dataset

# creating a matrix "x" of dimentions (20*2)
x <- matrix(rnorm(20*2), ncol=2)
# creating a variable "y" with repeating 10 times "-1" and 10 times "1"
y <- c(rep(-1,10), rep(1,10))
x[y==1,] = x[y==1,] + 1
# plotting 
plot(x, col=(3-y))

plot of chunk unnamed-chunk-2

# creating the data frame with x and y
dat <- data.frame(x=x, y=as.factor(y))
# dimentions of the data frame
dim(dat)
[1] 20  3
# some rows of the data frame
head(dat, n= 12)
           x.1         x.2  y
1  -0.22248386  0.88900370 -1
2  -0.35638707 -0.63819798 -1
3   0.68920944 -0.92926803 -1
4   0.41945989  0.53390835 -1
5   0.06655965  1.47992536 -1
6  -1.57645055 -1.76388240 -1
7   1.73027638  0.77519960 -1
8  -1.05621556 -1.04855863 -1
9  -1.39435952  0.13312775 -1
10  1.75391192 -0.09001491 -1
11  0.23542411  1.18415747  1
12  1.27633945  0.53220068  1

Fitting Model for Support Vector Classifier

# loading the package
library(e1071)
# fitting the model
svmfit <- svm(y~., data = dat, kernel="linear", cost=10, scale=FALSE)
summary(svmfit)

Call:
svm(formula = y ~ ., data = dat, kernel = "linear", cost = 10, 
    scale = FALSE)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  linear 
       cost:  10 
      gamma:  0.5 

Number of Support Vectors:  15

 ( 7 8 )


Number of Classes:  2 

Levels: 
 -1 1
# Index
svmfit$index
 [1]  1  2  3  4  5  7 10 11 12 13 14 15 16 19 20
# plotting the model
plot(svmfit, dat)

plot of chunk unnamed-chunk-5

# fitting the model for cost = 0.1
svmfit <- svm(y~.,data=dat, kernel="linear", cost=0.1,scale = FALSE)
# index
svmfit$index
 [1]  1  2  3  4  5  7  8  9 10 11 12 13 14 15 16 17 19 20
# plotting
plot(svmfit, dat)

plot of chunk unnamed-chunk-6

# best parameter salection
set.seed(1)
tune.out <- tune(svm,y~.,data=dat,kernel="linear",ranges = list(cost=c(0.001, 0.01, 0.1, 1,5,10,100)))
# summary
summary(tune.out)

Parameter tuning of 'svm':

- sampling method: 10-fold cross validation 

- best parameters:
 cost
    1

- best performance: 0.4 

- Detailed performance results:
   cost error dispersion
1 1e-03  0.75  0.3535534
2 1e-02  0.75  0.3535534
3 1e-01  0.45  0.3689324
4 1e+00  0.40  0.4594683
5 5e+00  0.45  0.4377975
6 1e+01  0.45  0.4377975
7 1e+02  0.45  0.4377975
set.seed(1)
# best model
bestmod <- tune.out$best.model
# summary
summary(bestmod)

Call:
best.tune(method = svm, train.x = y ~ ., data = dat, ranges = list(cost = c(0.001, 
    0.01, 0.1, 1, 5, 10, 100)), kernel = "linear")


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  linear 
       cost:  1 
      gamma:  0.5 

Number of Support Vectors:  15

 ( 7 8 )


Number of Classes:  2 

Levels: 
 -1 1

Creating Test Set

# creating test set from normal distribution with dimentions 20*2
xtest <- matrix(rnorm(20*2), ncol=2)
# creating variable y taking value (-1,1)
ytest <- sample(c(-1,1), 20, rep=TRUE)
xtest[ytest==1,]=xtest[ytest==1,] + 1
# creating dataframe of test data set
testdat <- data.frame(x=xtest, y=as.factor(ytest))
# some rows of the test data set
head(testdat)
         x.1         x.2  y
1 -0.6264538  0.91897737 -1
2  1.1836433  1.78213630  1
3 -0.8356286  0.07456498 -1
4  1.5952808 -1.98935170 -1
5  1.3295078  1.61982575  1
6 -0.8204684 -0.05612874 -1
# predicting y using test set
ypred <- predict(bestmod,testdat) 
# table for correct prediction
table(predict = ypred, truth = testdat$y)
       truth
predict -1 1
     -1  8 1
     1   3 8
# fitting the model
svmfit <- svm(y~., data=dat, kernel="linear", cost=.01,scale=FALSE)
# predicting y using test set
ypred <- predict(svmfit,testdat)
# table for correct prediction
table(predict=ypred, truth=testdat$y)
       truth
predict -1 1
     -1  4 1
     1   7 8
# applying some transformation
x[y==1,]=x[y==1,]+0.5
# plotting 
plot(x, col=(y+5)/2, pch=19)

plot of chunk unnamed-chunk-12

# creating data frame with matrix x and factor variable y
dat3 <- data.frame(x=x,y=as.factor(y))
# fitting the model
svmfit2 <- svm(y~., data = dat3, kernel="linear", cost = 1e5)
# summary
summary(svmfit2)

Call:
svm(formula = y ~ ., data = dat3, kernel = "linear", cost = 1e+05)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  linear 
       cost:  1e+05 
      gamma:  0.5 

Number of Support Vectors:  9

 ( 4 5 )


Number of Classes:  2 

Levels: 
 -1 1
# plotting
plot(svmfit2, dat3)

plot of chunk unnamed-chunk-14

# fitting the model
svmfit3 <- svm(y~., data=dat3, kernel="linear", cost = 1)
# summary
summary(svmfit3)

Call:
svm(formula = y ~ ., data = dat3, kernel = "linear", cost = 1)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  linear 
       cost:  1 
      gamma:  0.5 

Number of Support Vectors:  11

 ( 5 6 )


Number of Classes:  2 

Levels: 
 -1 1
# plotting
plot(svmfit3 , dat3)

plot of chunk unnamed-chunk-16