Understanding support hyperplane

# Generate random data
# Random seed for reproducibility
set.seed(123)

# Set n = 500 data points. 
n <- 500  # For reproducibility

# Create a data frame with two uniformly distributed predictors lying between 0 and 1.
# runif() generates n random variables from a normal distribution
df <- data.frame(x1 = runif(n), 
                 x2 = runif(n))

# Create a variable y whose value is -1 or +1 depending on whether a point (x1, x2) lies below or above the straight line that passes through the origin(0) and has a slope of 1.5
df$y <- factor(ifelse(df$x2 - 1.5*df$x1 < 0, -1, 1), 
    levels = c(-1, 1))

# Set margin (note: you can set the size of the margin to whatever you like, but beware of its impact on classification accuracy)
delta <- 0.05

# Retain only those points that lie outside the margin (they will be not used as support vectors). We will later label them in different colors
df1 <- df[abs(1.5*df$x1 - df$x2) > delta, ]

# Load required packages (require ggplot2 for plotting)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
# Construct plot
plot_margins <- ggplot(data = df1, aes(x = x1, y = x2, color = y)) + # Color according to group labels (y) 
    geom_point() + 
    scale_color_manual(values = c("red", "blue")) + 
    geom_abline(slope = 1.5, intercept = 0)+
    geom_abline(slope = 1.5, intercept = delta, linetype = "dashed") +
    geom_abline(slope = 1.5, intercept = -delta, linetype = "dashed")
 
#Display plot 
plot_margins

Plot SVM using \(\textsf{svm()}\) output from \(\textsf{e1071}\) package.

The svm() function

# Load the e1071 package
library(e1071)

# Build a SVM model, set required parameters, and use previously generated dataframe df
set.seed(123)
svm.fit <- svm(y ~ ., 
                data = df, 
                type = "C-classification", 
                kernel = "linear", 
                scale = FALSE)

# Take a look at the elements inside the svm object
names(svm.fit)
##  [1] "call"            "type"            "kernel"          "cost"           
##  [5] "degree"          "gamma"           "coef0"           "nu"             
##  [9] "epsilon"         "sparse"          "scaled"          "x.scale"        
## [13] "y.scale"         "nclasses"        "levels"          "tot.nSV"        
## [17] "nSV"             "labels"          "SV"              "index"          
## [21] "rho"             "compprob"        "probA"           "probB"          
## [25] "sigma"           "coefs"           "na.action"       "xlevels"        
## [29] "fitted"          "decision.values" "terms"
## List values of the index, support vectors (SV), and rho (intercepts)
# Indices of support vectors (data points used as SV)
svm.fit$index
##   [1]   1  13  40  64  66  72  77  82 108 113 121 131 148 149 153 159 167 168
##  [19] 186 196 208 209 211 221 224 233 243 245 250 251 252 265 268 270 280 286
##  [37] 290 310 312 322 324 329 336 343 344 345 349 357 367 369 370 374 382 385
##  [55] 388 390 399 406 414 415 421 427 428 429 440 449 454 462 463 464 476 487
##  [73] 493   6   9  17  27  30  35  36  39  48  63  79  98  99 102 110 117 119
##  [91] 124 127 129 141 142 144 152 154 157 160 161 164 169 185 187 192 204 207
## [109] 210 212 226 227 239 241 287 293 298 309 319 326 328 335 351 364 365 368
## [127] 375 383 387 389 405 418 438 444 448 455 459 465 466 473 477 478 479 481
## [145] 492
# The coordinates of support vectors (on the x1-x2 plane)
head(svm.fit$SV)
##           x1        x2
## 1  0.2875775 0.3536061
## 13 0.6775706 0.8601068
## 40 0.2316258 0.2745454
## 64 0.2743836 0.2010806
## 66 0.4485163 0.6537667
## 72 0.6292211 0.9060481
# Soft margin regularization parameter (aka., the loss function C, for reasons given at below)
svm.fit$rho
## [1] 0.04565241
# Weighted coefficients (alpha) for support vectors
table(svm.fit$coefs)
## 
##                 -1 -0.825610720102249  0.400890280950286  0.424720439151963 
##                 71                  1                  1                  1 
##                  1 
##                 71

Note in particular that \(\textsf{rho}\) \(\in\) [0, 1] is the parameter of \(\textit{ν}\)-parameterization (\(\textit{ν}\) is pronounced \(\textsf{nu}\)), representing the lower and upper bound on the number of examples that are support vectors lying on the wrong side of the SHP. It replaces the standard \(\textit{soft margin}\) cost parameter \(C\) \(\in\) (0, 1) (just a matter of preference … ):

\[\begin{align*} \textrm{Max}& \mathcal{L} = \frac{1}{2}\sum_{ij}\underbrace{\alpha_{i}\alpha_{j}}_{\textsf{coefs}}y_{i}y_{j}x^{T}_{i}x_{j} - \underbrace{C^{l}_{i=1}}_{\textsf{rho}}\sum_{i}\alpha_{i}\\ \textrm{s.t.}& \sum_{i}\alpha_{i}y_{i} = 0, \mbox{ 0 $\leq$ $\alpha_{i}$ $\leq$ $C$}. \end{align*}\]

Plotting

# We begin with a simple plot
plot(svm.fit, data = df)

# Now a more elaborated plot
# First generate a ggplot object as "p", color different classes by group labels (y)

p <- ggplot(df, aes(x = x1, y = x2, color = y)) +
     geom_point() + 
     scale_color_manual(values = c("red", "blue"))


# Mark SV data points in purple (note: within a dataframe that is used to create the plot, mapping must be done inside the original dataframe)
p <- p + geom_point(data = df[svm.fit$index,], # Subset those within the "svm.fit" object that are indexed as SV
        aes(x = x1, y = x2), 
        color = "purple", 
        size = 4, alpha = 0.5)
# Display the plot
p

# We now construct the weight vector from SVM coefs and support vectors (SV), which is the product of the transpose of coef matrix and the SV matrix because the two matrices need to be conformable (you can verify this by typing dim(t(svm.fit$coefs))[2] == dim(svm.fit$SV)[1] . You should also see dim(svm.fit$SV)[2] = 2 (which is the column length of SV), why?)
w <- t(svm.fit$coefs) %*% svm.fit$SV
# Check out what's inside w
w
##            x1        x2
## [1,] 7.199356 -4.811942
# Generate the slope variable
slope <- -w[1] / w[2] 

# Generate the intercept
intercept <- svm.fit$rho / w[2]   # rho is the negative intercept

# Add intercept and slope onto the plot object
p <- p + geom_abline(slope = slope, intercept = intercept)

# Add margins to the plot
p <- p + geom_abline(slope = slope, intercept = intercept - 1 / w[2], linetype = "dashed") +
         geom_abline(slope = slope, intercept = intercept + 1 / w[2], linetype = "dashed")

# Display the plot
p