library(rgl)
data(mtcars)
mylm <- lm(mpg ~ cyl + hp, data = mtcars)
plot3d(mtcars$cyl, mtcars$hp, mtcars$mpg, xlab="cyl", ylab="hp", zlab="mpg",size=10,col="blue")
planes3d(a=coef(mylm)[2], b = coef(mylm)[3], c = -1, d=coef(mylm)[1],alpha=0.3)
myproj <- data.frame(cyl=mtcars$cyl, hp=mtcars$hp, mpg=mylm$fitted.values)
plot3d(myproj, col="red", size=10, add=TRUE)
mylist <- list(mtcars[,c("cyl", "hp", "mpg")],myproj)
segments3d(do.call(rbind,mylist)[order(sequence(sapply(mylist,nrow))),])
The key idea is to find a hyperplane such that it separates the two
classes as widely as possible.
Here we generate a toy dataset in 2D, and learn how to train and test a SVM.
n <- 150 # number of data points
p <- 2 # dimension
sigma <- 1 # variance of the distribution
meanpos <- 0 # centre of the distribution of positive examples
meanneg <- 3 # centre of the distribution of negative examples
npos <- round(n/2) # number of positive examples
nneg <- n-npos # number of negative examples
# Generate the positive and negative examples
xpos <- matrix(rnorm(npos*p,mean=meanpos,sd=sigma),npos,p)
xneg <- matrix(rnorm(nneg*p,mean=meanneg,sd=sigma),npos,p)
x <- rbind(xpos,xneg)
# Generate the labels
y <- matrix(c(rep(1,npos),rep(-1,nneg)))
# Visualize the data
plot(x,col=ifelse(y>0,1,2))
legend("topleft",c('Positive','Negative'),col=seq(2),pch=1,text.col=seq(2))
Now we split the data into a training set (80%) and a test set (20%):
## Prepare a training and a test set ##
ntrain <- round(n*0.8) # number of training examples
tindex <- sample(n,ntrain) # indices of training samples
xtrain <- x[tindex,]
xtest <- x[-tindex,]
ytrain <- y[tindex]
ytest <- y[-tindex]
istrain=rep(0,n)
istrain[tindex]=1
# Visualize
plot(x,col=ifelse(y>0,1,2),pch=ifelse(istrain==1,1,2))
legend("topleft",c('Positive Train','Positive Test','Negative Train','Negative Test'),
col=c(1,1,2,2),pch=c(1,2,1,2),text.col=c(1,1,2,2))
### Train a SVM Now we train a linear SVM with parameter C=100 on the
training set
# load the kernlab package
library(kernlab)
svp <- ksvm(xtrain,ytrain,type="C-svc",C=100,scaled=c())
# General summary
svp
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 100
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.340996078863984
##
## Number of Support Vectors : 19
##
## Objective Function Value : -469.3637
## Training error : 0.016667
# Attributes that you can access
attributes(svp)
## $param
## $param$C
## [1] 100
##
##
## $scaling
## `\001NULL\001`
##
## $coef
## $coef[[1]]
## [1] -16.09341907 -0.58890670 -4.02520413 -3.76268068 -60.48766396
## [6] 0.30189686 0.26887470 0.19591706 0.02541916 19.13220772
## [11] 42.23342125 100.00000000 0.30454048 0.44897632 -76.85148936
## [16] -100.00000000 100.00000000 -1.08702113 -0.01486853
##
##
## $alphaindex
## $alphaindex[[1]]
## [1] 1 4 5 7 12 16 23 29 31 38 42 44 65 82 85 99 102 111 114
##
##
## $b
## [1] -0.296334
##
## $obj
## [1] -469.3637
##
## $SVindex
## [1] 1 4 5 7 12 16 23 29 31 38 42 44 65 82 85 99 102 111 114
##
## $nSV
## [1] 19
##
## $prior
## $prior[[1]]
## $prior[[1]]$prior1
## [1] 62
##
## $prior[[1]]$prior0
## [1] 58
##
##
##
## $prob.model
## $prob.model[[1]]
## NULL
##
##
## $alpha
## $alpha[[1]]
## [1] 16.09341907 0.58890670 4.02520413 3.76268068 60.48766396
## [6] 0.30189686 0.26887470 0.19591706 0.02541916 19.13220772
## [11] 42.23342125 100.00000000 0.30454048 0.44897632 76.85148936
## [16] 100.00000000 100.00000000 1.08702113 0.01486853
##
##
## $type
## [1] "C-svc"
##
## $kernelf
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.340996078863984
##
## $kpar
## list()
##
## $xmatrix
## $xmatrix[[1]]
## X1 X2
## 1 4.1473184 0.9298683
## 4 2.0893264 3.4749399
## 5 1.3978777 2.7888444
## 7 2.4614029 3.1557118
## 12 1.5797594 2.0632175
## 16 -1.5250870 -1.5487910
## 23 1.1071430 -1.9028421
## 29 -1.7305234 -0.5864856
## 31 -1.3729612 -1.7798993
## 38 0.5281332 2.0374797
## 42 3.2863057 0.8051940
## 44 1.6821866 2.1375101
## 65 -0.3680327 -2.1465153
## 82 2.5472261 0.6675309
## 85 2.4945606 1.2333967
## 99 1.4813342 1.8761347
## 102 1.8426995 1.5137857
## 111 4.7132771 5.0511625
## 114 3.5354808 5.0494387
##
##
## $ymatrix
## [1] -1 -1 1 -1 -1 -1 -1 1 -1 -1 -1 -1 1 1 1 1 1 -1 -1 -1 -1 -1 1 1 -1
## [26] -1 1 -1 1 1 1 -1 1 1 1 -1 -1 1 1 -1 -1 1 1 1 -1 -1 1 -1 1 -1
## [51] -1 1 -1 1 1 -1 -1 1 1 -1 1 -1 -1 -1 1 -1 1 -1 1 1 -1 1 -1 -1 1
## [76] 1 -1 1 -1 -1 1 1 1 1 -1 -1 1 -1 1 1 1 1 -1 1 -1 1 -1 1 -1 1
## [101] 1 1 -1 -1 -1 1 1 1 1 -1 -1 1 1 -1 1 1 1 1 -1 -1
##
## $fitted
## [1] -1 -1 1 -1 -1 -1 -1 1 -1 -1 -1 -1 1 1 1 1 1 -1 -1 -1 -1 -1 1 1 -1
## [26] -1 1 -1 1 1 1 -1 1 1 1 -1 -1 1 1 -1 -1 1 1 -1 -1 -1 1 -1 1 -1
## [51] -1 1 -1 1 1 -1 -1 1 1 -1 1 -1 -1 -1 1 -1 1 -1 1 1 -1 1 -1 -1 1
## [76] 1 -1 1 -1 -1 1 1 1 1 -1 -1 1 -1 1 1 1 1 -1 1 -1 1 -1 1 -1 1
## [101] 1 -1 -1 -1 -1 1 1 1 1 -1 -1 1 1 -1 1 1 1 1 -1 -1
##
## $lev
## [1] -1 1
##
## $nclass
## [1] 2
##
## $error
## [1] 0.01666667
##
## $cross
## [1] -1
##
## $n.action
## function (object, ...)
## UseMethod("na.omit")
## <bytecode: 0x000002e31d9046f8>
## <environment: namespace:stats>
##
## $terms
## `\001NULL\001`
##
## $kcall
## .local(x = x, y = ..1, scaled = ..4, type = "C-svc", C = 100)
##
## $class
## [1] "ksvm"
## attr(,"package")
## [1] "kernlab"
# For example, the support vectors
alpha(svp)
## [[1]]
## [1] 16.09341907 0.58890670 4.02520413 3.76268068 60.48766396
## [6] 0.30189686 0.26887470 0.19591706 0.02541916 19.13220772
## [11] 42.23342125 100.00000000 0.30454048 0.44897632 76.85148936
## [16] 100.00000000 100.00000000 1.08702113 0.01486853
alphaindex(svp)
## [[1]]
## [1] 1 4 5 7 12 16 23 29 31 38 42 44 65 82 85 99 102 111 114
b(svp)
## [1] -0.296334
# Use the built-in function to pretty-plot the classifier
plot(svp,data=xtrain)
### Predict with a SVM
# Predict labels on test
ypred = predict(svp,xtest)
table(ytest,ypred)
## ypred
## ytest -1 1
## -1 17 0
## 1 0 13
# Compute accuracy
sum(ypred==ytest)/length(ytest)
## [1] 1
# Compute at the prediction scores
ypredscore = predict(svp,xtest,type="decision")
# Check that the predicted labels are the signs of the scores
table(ypredscore > 0,ypred)
## ypred
## -1 1
## FALSE 17 0
## TRUE 0 13
# Package to compute ROC curve, precision-recall etc...
library(ROCR)
pred <- prediction(ypredscore,ytest)
# Plot ROC curve
perf <- performance(pred, measure = "tpr", x.measure = "fpr")
plot(perf)
# Plot precision/recall curve
perf <- performance(pred, measure = "prec", x.measure = "rec")
plot(perf)
# Plot accuracy as function of threshold
perf <- performance(pred, measure = "acc")
plot(perf)
## Decision Tree
Let’s illustrate this with help of an example. Let’s assume we want to play pick-ball on a particular day — say Saturday — how will you decide whether to play or not. Let’s say you go out and check if it’s hot or cold, check the speed of the wind and humidity, how the weather is, i.e. is it sunny, cloudy, or rainy. You take all these factors into account to decide if you want to play or not.
library(rpart)
library(rpart.plot)
set.seed(1)
ntr <- 1000
ntst <- 100
pb <- data.frame(we=sample(c("Sunny","Cloudy","Rainy"),ntr,replace=TRUE),
Temperature=sample(c("Hot","Wild","Mild"),ntr,replace=TRUE),
Humidity=sample(c("High","Normal"),ntr,replace=TRUE),
Wind=sample(c("Weak","Strong"),ntr,replace=TRUE),
Play=sample(c("No","Yes"),ntr,replace=TRUE))
tree <- rpart(Play~.,data=pb,method="class")
rpart.plot(tree)
pb.test <- data.frame(we=sample(c("Sunny","Cloudy","Rainy"),ntst,replace=TRUE),
Temperature=sample(c("Hot","Wild","Mild"),ntst,replace=TRUE),
Humidity=sample(c("High","Normal"),ntst,replace=TRUE),
Wind=sample(c("Weak","Strong"),ntst,replace=TRUE),
Play=sample(c("No","Yes"),ntst,replace=TRUE))
pre<- predict(tree,pb.test,type="class")
table(pre, pb.test$Play)
##
## pre No Yes
## No 22 22
## Yes 25 31
tree <- rpart(Species ~.,data=iris,method="class")
rpart.plot(tree)