set.seed(1)
x=matrix(rnorm(20*2), ncol=2)
y=c(rep(-1,10),rep(1,10))
x[y==1,]=x[y==1,]+1
plot(x,col=(3-y))The class is not linearly seperable, so we use SVM regression.
dat= data.frame(x=x, y=as.factor(y))
svmfit=svm(y~. ,data= dat, kernel="linear", cost=10, scale=FALSE)
plot(svmfit, dat)## [1] 1 2 5 7 14 16 17
##
## Call:
## svm(formula = y ~ ., data = dat, kernel = "linear", cost = 10,
## scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 10
##
## Number of Support Vectors: 7
##
## ( 4 3 )
##
##
## Number of Classes: 2
##
## Levels:
## -1 1
## [1] 1 2 3 4 5 7 9 10 12 13 14 15 16 17 18 20
##
## Call:
## svm(formula = y ~ ., data = dat, kernel = "linear", cost = 0.1,
## scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.1
##
## Number of Support Vectors: 16
##
## ( 8 8 )
##
##
## Number of Classes: 2
##
## Levels:
## -1 1
set.seed(1)
tune.out=tune(svm, y~., data=dat, kernel="linear",
ranges=list(cost=c(0.01, 0.1,1,5,10,100)))
summary(tune.out)##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.05
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.55 0.4377975
## 2 1e-01 0.05 0.1581139
## 3 1e+00 0.15 0.2415229
## 4 5e+00 0.15 0.2415229
## 5 1e+01 0.15 0.2415229
## 6 1e+02 0.15 0.2415229
You can also embed plots, for example:
##
## Call:
## best.tune(method = svm, train.x = y ~ ., data = dat, ranges = list(cost = c(0.01,
## 0.1, 1, 5, 10, 100)), kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.1
##
## Number of Support Vectors: 16
##
## ( 8 8 )
##
##
## Number of Classes: 2
##
## Levels:
## -1 1
xtest=matrix(rnorm(20*2), ncol=2)
ytest=sample(c(-1,1),20, rep=TRUE)
xtest[ytest==1,]=xtest[ytest==1,]+1
testdata=data.frame(x=xtest,y=as.factor(ytest))
ypred=predict(bestmod, testdata)
table(predict=ypred, truth=testdata$y)## truth
## predict -1 1
## -1 9 1
## 1 2 8
svmfit=svm(y~. , data=dat, kernel="linear", cost=0.01, scale=FALSE)
ypred=predict(svmfit,testdata)
table(predict=ypred, truth=testdata$y)## truth
## predict -1 1
## -1 11 6
## 1 0 3
dat1= data.frame(x=x, y=as.factor(y))
svmfit=svm(y~. ,data= dat1, kernel="linear", cost=1, scale=FALSE)
plot(svmfit, dat1)##
## Call:
## svm(formula = y ~ ., data = dat1, kernel = "linear", cost = 1,
## scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
##
## Number of Support Vectors: 7
##
## ( 3 4 )
##
##
## Number of Classes: 2
##
## Levels:
## -1 1
set.seed(1)
x=matrix(rnorm(200*2), ncol=2)
x[1:100,]=x[1:100,]+2
x[101:150,]=x[101:150,]-2
y=c(rep(1,150),rep(2,50))
datn=data.frame(x=x,y=as.factor(y))
plot(x,col=y)train=sample(200,100)
svmfit=svm(y~. , data=datn[train,] , kernel="radial", gamma=1,
cost=0.0001)
plot(svmfit,datn[train,])##
## Call:
## svm(formula = y ~ ., data = datn[train, ], kernel = "radial",
## gamma = 1, cost = 1e-04)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1e-04
##
## Number of Support Vectors: 54
##
## ( 27 27 )
##
##
## Number of Classes: 2
##
## Levels:
## 1 2
set.seed(1)
tune.out=tune(svm, y~., data=datn[train,], kernel="radial",
ranges=list(cost=c(0.01, 0.1,1,5,10,100), gamma=c(0.5,1,2,3,4)))
summary(tune.out)##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 1 0.5
##
## - best performance: 0.07
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-02 0.5 0.27 0.15670212
## 2 1e-01 0.5 0.26 0.15776213
## 3 1e+00 0.5 0.07 0.08232726
## 4 5e+00 0.5 0.07 0.08232726
## 5 1e+01 0.5 0.07 0.08232726
## 6 1e+02 0.5 0.14 0.15055453
## 7 1e-02 1.0 0.27 0.15670212
## 8 1e-01 1.0 0.22 0.16193277
## 9 1e+00 1.0 0.07 0.08232726
## 10 5e+00 1.0 0.08 0.07888106
## 11 1e+01 1.0 0.09 0.07378648
## 12 1e+02 1.0 0.12 0.12292726
## 13 1e-02 2.0 0.27 0.15670212
## 14 1e-01 2.0 0.27 0.15670212
## 15 1e+00 2.0 0.07 0.08232726
## 16 5e+00 2.0 0.09 0.07378648
## 17 1e+01 2.0 0.11 0.07378648
## 18 1e+02 2.0 0.12 0.13165612
## 19 1e-02 3.0 0.27 0.15670212
## 20 1e-01 3.0 0.27 0.15670212
## 21 1e+00 3.0 0.07 0.08232726
## 22 5e+00 3.0 0.11 0.07378648
## 23 1e+01 3.0 0.08 0.07888106
## 24 1e+02 3.0 0.13 0.14181365
## 25 1e-02 4.0 0.27 0.15670212
## 26 1e-01 4.0 0.27 0.15670212
## 27 1e+00 4.0 0.07 0.08232726
## 28 5e+00 4.0 0.10 0.06666667
## 29 1e+01 4.0 0.09 0.07378648
## 30 1e+02 4.0 0.13 0.14181365
## pred
## true 1 2
## 1 67 10
## 2 2 21
rocplot=function(pred, truth, ...){
predob=prediction(pred,truth)
perf=performance(predob, "tpr", "fpr")
plot(perf,...)
}svmfit.opt= svm(y~. ,data=datn[train,], kernel="radial",
gamma=2, cost=1, decision.values=T)
fitted=attributes(predict(svmfit.opt, datn[train,],decision.values=TRUE))$decision.values
par(mfrow=c(1,2))
rocplot(fitted, datn[train,"y"], main="Training Data")set.seed(1)
x=rbind(x,matrix(rnorm(50*2), ncol=2))
y=c(y,rep(0,50))
x[y==0,2]=x[y==0,2]+2
datm=data.frame(x=x,y=as.factor(y))
par(mfrow=c(1,1))
plot(x,col=(y+1))##
## Call:
## svm(formula = y ~ ., data = datm, kernel = "radial", cost = 10,
## gamma = 1)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 10
##
## Number of Support Vectors: 105
##
## ( 38 37 30 )
##
##
## Number of Classes: 3
##
## Levels:
## 0 1 2
## [1] "xtrain" "xtest" "ytrain" "ytest"
## [1] 63 2308
## [1] 20 2308
## [1] 63
## [1] 20
##
## 1 2 3 4
## 8 23 12 20
##
## 1 2 3 4
## 3 6 6 5
dat=data.frame(x=Khan$xtrain, y=as.factor(Khan$ytrain))
out=svm(y~., data=dat, kernel="linear", cost=10)
summary(out)##
## Call:
## svm(formula = y ~ ., data = dat, kernel = "linear", cost = 10)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 10
##
## Number of Support Vectors: 58
##
## ( 20 20 11 7 )
##
##
## Number of Classes: 4
##
## Levels:
## 1 2 3 4
##
## 1 2 3 4
## 1 8 0 0 0
## 2 0 23 0 0
## 3 0 0 12 0
## 4 0 0 0 20
dat.te=data.frame(x=Khan$xtest, y=as.factor(Khan$ytest))
pred.te=predict(out, newdata=dat.te)
table(pred.te, dat.te$y)##
## pred.te 1 2 3 4
## 1 3 0 0 0
## 2 0 6 2 0
## 3 0 0 4 0
## 4 0 0 0 5