Linear SVM

Generate the Linearly Separable Data Points, p=2, n=20

set.seed(10111)
x=matrix(rnorm(40),20,2)
y=rep(c(-1,1),c(10,10))
plot(x,col=y+2, pch=19)

Therefore let us separate the data points a bit.

x[y==1,]=x[y==1,]+1
plot(x,col=y+3,pch=19)

class(y)
## [1] "numeric"

The svm() function in e1071 package can do both classification and regression, as a result we need to specify our y variable as a factor, so that it is not recognised as a numeric variable anymore.

dat=data.frame(x,y=as.factor(y))
library(e1071)
## Warning: package 'e1071' was built under R version 3.3.3
svmfit = svm(y~.,data=dat, kernel="linear", cost=10,scale=FALSE)
svmfit #or
## 
## Call:
## svm(formula = y ~ ., data = dat, kernel = "linear", cost = 10, 
##     scale = FALSE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  10 
##       gamma:  0.5 
## 
## Number of Support Vectors:  6
print(svmfit)
## 
## Call:
## svm(formula = y ~ ., data = dat, kernel = "linear", cost = 10, 
##     scale = FALSE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  10 
##       gamma:  0.5 
## 
## Number of Support Vectors:  6
summary(svmfit)
## 
## Call:
## svm(formula = y ~ ., data = dat, kernel = "linear", cost = 10, 
##     scale = FALSE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  10 
##       gamma:  0.5 
## 
## Number of Support Vectors:  6
## 
##  ( 3 3 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  -1 1
svmfit$index
## [1]  1  4 10 14 16 20
plot(svmfit,dat)

This plot can be pretty ugly to use, so let us make our own grid and plot the data points including the support vectors.

make.grid=function(x,n=75){
  grange=apply(x,2,range)
  x1=seq(from=grange[1,1],to=grange[2,1],length=n)
  x2=seq(from=grange[1,2],to=grange[2,2],length=n)
  expand.grid(X1=x1,X2=x2)
}
xgrid=make.grid(x)
ygrid=predict(svmfit,xgrid)
plot(xgrid,col=c("red","green")[as.numeric(ygrid)],pch=20,cex=.2)
points(x,col=y+5,pch=19)
points(x[svmfit$index,],pch=5,cex=2)

Let us see if we can obtain the separating line. To do this we will use the coefs attribute stored in svmfit object.

beta = drop(t(svmfit$coefs)%*%x[svmfit$index,])
beta0=svmfit$rho
plot(xgrid,col=c("red","green")[as.numeric(ygrid)],pch=20,cex=.2)
points(x,col=y+5,pch=19)
abline(beta0/beta[2],-beta[1]/beta[2])
abline((beta0-1)/beta[2],-beta[1]/beta[2],lty=2)
abline((beta0+1)/beta[2],-beta[1]/beta[2],lty=2)

Nonlinear SVM

Let us load the data from the url link:

rm(x,y)
load(url("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/ESL.mixture.rda"))
attach(ESL.mixture)
names(ESL.mixture)
## [1] "x"        "y"        "xnew"     "prob"     "marginal" "px1"     
## [7] "px2"      "means"
plot(x, col = y+2)

We again need to specify our y variable as a factor.

dat=data.frame(y=as.factor(y),x)
library(e1071)
svmfit=svm(factor(y)~.,data=dat,scale=FALSE,kernel="radial",cost=5)
svmfit #or
## 
## Call:
## svm(formula = factor(y) ~ ., data = dat, kernel = "radial", cost = 5, 
##     scale = FALSE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  5 
##       gamma:  0.5 
## 
## Number of Support Vectors:  103
print(svmfit)
## 
## Call:
## svm(formula = factor(y) ~ ., data = dat, kernel = "radial", cost = 5, 
##     scale = FALSE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  5 
##       gamma:  0.5 
## 
## Number of Support Vectors:  103
summary(svmfit)
## 
## Call:
## svm(formula = factor(y) ~ ., data = dat, kernel = "radial", cost = 5, 
##     scale = FALSE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  5 
##       gamma:  0.5 
## 
## Number of Support Vectors:  103
## 
##  ( 49 54 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  0 1
svmfit$index
##   [1]   1   3   4   5   8  10  14  15  16  17  23  24  25  27  30  31  33
##  [18]  35  38  42  44  45  46  47  48  50  54  56  57  58  60  62  63  66
##  [35]  67  69  73  80  82  85  87  89  91  92  94  95  97  98  99 101 102
##  [52] 103 104 105 106 107 108 111 114 119 120 121 122 123 124 126 129 135
##  [69] 136 138 140 142 147 148 149 150 154 157 158 160 162 164 165 166 167
##  [86] 169 170 173 174 176 177 178 180 182 183 188 192 193 194 195 197 199
## [103] 200
plot(svmfit,dat)

The plot is not pretty enough. So let us do our own plot and create the grids.

grange=apply(x,2,range)
x1=seq(from=grange[1,1],to=grange[2,1],length=100)
x2=seq(from=grange[1,2],to=grange[2,2],length=100)
xgrid=expand.grid(X1=x1,X2=x2)
ygrid=predict(svmfit,xgrid)
plot(xgrid,col=c("red","black")[as.numeric(ygrid)],pch=20,cex=.2)

The grid shows the SVM decision boundary as usual. We can add our data points and support vectors onto this grid.

plot(xgrid,col=c("red","black")[as.numeric(ygrid)],pch=20,cex=.2)
points(x,col=y+1,pch=19)
points(x[svmfit$index,],pch=5,cex=2)

We would like to put the curve around the decision boundary. Use Contour.

func = predict(svmfit,xgrid,decision.values = TRUE)
func = attributes(func)$decision
plot(xgrid,col=c("red","black")[as.numeric(ygrid)],pch=20,cex=.2)
points(x,col=y+1,pch=19)
points(x[svmfit$index,],pch=5,cex=2)
contour(x1,x2, matrix(func,100,100),level=0, add=TRUE)