STAT3000 110624

Linear SVM

The key idea is to find a hyperplane such that it separates the two classes as widely as possible. SVM

Here we generate a toy dataset in 2D, and learn how to train and test a SVM.

n <- 150 # number of data points
p <- 2 # dimension
sigma <- 1 # variance of the distribution
meanpos <- 0 # centre of the distribution of positive examples
meanneg <- 3 # centre of the distribution of negative examples
npos <- round(n/2) # number of positive examples
nneg <- n-npos # number of negative examples
# Generate the positive and negative examples
xpos <- matrix(rnorm(npos*p,mean=meanpos,sd=sigma),npos,p)
xneg <- matrix(rnorm(nneg*p,mean=meanneg,sd=sigma),npos,p)
x <- rbind(xpos,xneg)
# Generate the labels
y <- matrix(c(rep(1,npos),rep(-1,nneg)))

# Visualize the data
plot(x,col=ifelse(y>0,1,2))
legend("topleft",c('Positive','Negative'),col=seq(2),pch=1,text.col=seq(2))

Now we split the data into a training set (80%) and a test set (20%):

## Prepare a training and a test set ##
ntrain <- round(n*0.8) # number of training examples
tindex <- sample(n,ntrain) # indices of training samples
xtrain <- x[tindex,]
xtest <- x[-tindex,]
ytrain <- y[tindex] 
ytest <- y[-tindex]
istrain=rep(0,n)
istrain[tindex]=1

# Visualize
plot(x,col=ifelse(y>0,1,2),pch=ifelse(istrain==1,1,2))
legend("topleft",c('Positive Train','Positive Test','Negative Train','Negative Test'),
col=c(1,1,2,2),pch=c(1,2,1,2),text.col=c(1,1,2,2))

### Train a SVM Now we train a linear SVM with parameter C=100 on the training set

# load the kernlab package
library(kernlab)
svp <- ksvm(xtrain,ytrain,type="C-svc",C=100,scaled=c())
# General summary
svp

## Support Vector Machine object of class "ksvm" 
## 
## SV type: C-svc  (classification) 
##  parameter : cost C = 100 
## 
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  0.340996078863984 
## 
## Number of Support Vectors : 19 
## 
## Objective Function Value : -469.3637 
## Training error : 0.016667

# Attributes that you can access
attributes(svp)

## $param
## $param$C
## [1] 100
## 
## 
## $scaling
## `\001NULL\001`
## 
## $coef
## $coef[[1]]
##  [1]  -16.09341907   -0.58890670   -4.02520413   -3.76268068  -60.48766396
##  [6]    0.30189686    0.26887470    0.19591706    0.02541916   19.13220772
## [11]   42.23342125  100.00000000    0.30454048    0.44897632  -76.85148936
## [16] -100.00000000  100.00000000   -1.08702113   -0.01486853
## 
## 
## $alphaindex
## $alphaindex[[1]]
##  [1]   1   4   5   7  12  16  23  29  31  38  42  44  65  82  85  99 102 111 114
## 
## 
## $b
## [1] -0.296334
## 
## $obj
## [1] -469.3637
## 
## $SVindex
##  [1]   1   4   5   7  12  16  23  29  31  38  42  44  65  82  85  99 102 111 114
## 
## $nSV
## [1] 19
## 
## $prior
## $prior[[1]]
## $prior[[1]]$prior1
## [1] 62
## 
## $prior[[1]]$prior0
## [1] 58
## 
## 
## 
## $prob.model
## $prob.model[[1]]
## NULL
## 
## 
## $alpha
## $alpha[[1]]
##  [1]  16.09341907   0.58890670   4.02520413   3.76268068  60.48766396
##  [6]   0.30189686   0.26887470   0.19591706   0.02541916  19.13220772
## [11]  42.23342125 100.00000000   0.30454048   0.44897632  76.85148936
## [16] 100.00000000 100.00000000   1.08702113   0.01486853
## 
## 
## $type
## [1] "C-svc"
## 
## $kernelf
## Gaussian Radial Basis kernel function. 
##  Hyperparameter : sigma =  0.340996078863984 
## 
## $kpar
## list()
## 
## $xmatrix
## $xmatrix[[1]]
##             X1         X2
## 1    4.1473184  0.9298683
## 4    2.0893264  3.4749399
## 5    1.3978777  2.7888444
## 7    2.4614029  3.1557118
## 12   1.5797594  2.0632175
## 16  -1.5250870 -1.5487910
## 23   1.1071430 -1.9028421
## 29  -1.7305234 -0.5864856
## 31  -1.3729612 -1.7798993
## 38   0.5281332  2.0374797
## 42   3.2863057  0.8051940
## 44   1.6821866  2.1375101
## 65  -0.3680327 -2.1465153
## 82   2.5472261  0.6675309
## 85   2.4945606  1.2333967
## 99   1.4813342  1.8761347
## 102  1.8426995  1.5137857
## 111  4.7132771  5.0511625
## 114  3.5354808  5.0494387
## 
## 
## $ymatrix
##   [1] -1 -1  1 -1 -1 -1 -1  1 -1 -1 -1 -1  1  1  1  1  1 -1 -1 -1 -1 -1  1  1 -1
##  [26] -1  1 -1  1  1  1 -1  1  1  1 -1 -1  1  1 -1 -1  1  1  1 -1 -1  1 -1  1 -1
##  [51] -1  1 -1  1  1 -1 -1  1  1 -1  1 -1 -1 -1  1 -1  1 -1  1  1 -1  1 -1 -1  1
##  [76]  1 -1  1 -1 -1  1  1  1  1 -1 -1  1 -1  1  1  1  1 -1  1 -1  1 -1  1 -1  1
## [101]  1  1 -1 -1 -1  1  1  1  1 -1 -1  1  1 -1  1  1  1  1 -1 -1
## 
## $fitted
##   [1] -1 -1  1 -1 -1 -1 -1  1 -1 -1 -1 -1  1  1  1  1  1 -1 -1 -1 -1 -1  1  1 -1
##  [26] -1  1 -1  1  1  1 -1  1  1  1 -1 -1  1  1 -1 -1  1  1 -1 -1 -1  1 -1  1 -1
##  [51] -1  1 -1  1  1 -1 -1  1  1 -1  1 -1 -1 -1  1 -1  1 -1  1  1 -1  1 -1 -1  1
##  [76]  1 -1  1 -1 -1  1  1  1  1 -1 -1  1 -1  1  1  1  1 -1  1 -1  1 -1  1 -1  1
## [101]  1 -1 -1 -1 -1  1  1  1  1 -1 -1  1  1 -1  1  1  1  1 -1 -1
## 
## $lev
## [1] -1  1
## 
## $nclass
## [1] 2
## 
## $error
## [1] 0.01666667
## 
## $cross
## [1] -1
## 
## $n.action
## function (object, ...) 
## UseMethod("na.omit")
## <bytecode: 0x000002e31d9046f8>
## <environment: namespace:stats>
## 
## $terms
## `\001NULL\001`
## 
## $kcall
## .local(x = x, y = ..1, scaled = ..4, type = "C-svc", C = 100)
## 
## $class
## [1] "ksvm"
## attr(,"package")
## [1] "kernlab"

# For example, the support vectors
alpha(svp)

## [[1]]
##  [1]  16.09341907   0.58890670   4.02520413   3.76268068  60.48766396
##  [6]   0.30189686   0.26887470   0.19591706   0.02541916  19.13220772
## [11]  42.23342125 100.00000000   0.30454048   0.44897632  76.85148936
## [16] 100.00000000 100.00000000   1.08702113   0.01486853

alphaindex(svp)

## [[1]]
##  [1]   1   4   5   7  12  16  23  29  31  38  42  44  65  82  85  99 102 111 114

b(svp)

## [1] -0.296334

# Use the built-in function to pretty-plot the classifier
plot(svp,data=xtrain)

### Predict with a SVM

# Predict labels on test
ypred = predict(svp,xtest)
table(ytest,ypred)

##      ypred
## ytest -1  1
##    -1 17  0
##    1   0 13

# Compute accuracy
sum(ypred==ytest)/length(ytest)

## [1] 1

# Compute at the prediction scores
ypredscore = predict(svp,xtest,type="decision")
# Check that the predicted labels are the signs of the scores
table(ypredscore > 0,ypred)

##        ypred
##         -1  1
##   FALSE 17  0
##   TRUE   0 13

# Package to compute ROC curve, precision-recall etc...
library(ROCR)
pred <- prediction(ypredscore,ytest)
# Plot ROC curve
perf <- performance(pred, measure = "tpr", x.measure = "fpr")
plot(perf)

# Plot precision/recall curve
perf <- performance(pred, measure = "prec", x.measure = "rec")
plot(perf)

# Plot accuracy as function of threshold
perf <- performance(pred, measure = "acc")
plot(perf)

## Decision Tree

Let’s illustrate this with help of an example. Let’s assume we want to play pick-ball on a particular day — say Saturday — how will you decide whether to play or not. Let’s say you go out and check if it’s hot or cold, check the speed of the wind and humidity, how the weather is, i.e. is it sunny, cloudy, or rainy. You take all these factors into account to decide if you want to play or not.

each node represents an attribute
each branch represents the outcome
leaves made the decision
split the continuous by a threshold

library(rpart)
library(rpart.plot)
set.seed(1)
ntr <- 1000
ntst <- 100
pb <- data.frame(we=sample(c("Sunny","Cloudy","Rainy"),ntr,replace=TRUE),
                         Temperature=sample(c("Hot","Wild","Mild"),ntr,replace=TRUE),
                         Humidity=sample(c("High","Normal"),ntr,replace=TRUE),
                         Wind=sample(c("Weak","Strong"),ntr,replace=TRUE),
                         Play=sample(c("No","Yes"),ntr,replace=TRUE))
tree <- rpart(Play~.,data=pb,method="class")
rpart.plot(tree)

pb.test <- data.frame(we=sample(c("Sunny","Cloudy","Rainy"),ntst,replace=TRUE),
                         Temperature=sample(c("Hot","Wild","Mild"),ntst,replace=TRUE),
                         Humidity=sample(c("High","Normal"),ntst,replace=TRUE),
                         Wind=sample(c("Weak","Strong"),ntst,replace=TRUE),
                         Play=sample(c("No","Yes"),ntst,replace=TRUE))
pre<- predict(tree,pb.test,type="class")
table(pre, pb.test$Play)

##      
## pre   No Yes
##   No  22  22
##   Yes 25  31

tree <- rpart(Species ~.,data=iris,method="class")
rpart.plot(tree)

STAT3000 110624

2024-11-06

Visulalizing a linear regression fitting prediction process.

Linear SVM