set.seed(1)
small.x<-rnorm(n = 20, mean = 0, sd = 1.3)
set.seed(2)
small.y<-rnorm(n = 20, mean = 0, sd = 1.3)
small.coords<-data.frame(cbind(small.x, small.y))
small.coords<-cbind(small.coords, 1)
ggplot(small.coords, aes(x=small.x, y=small.y)) + geom_point()
set.seed(1)
big.x<-rnorm(n = 1000, mean = 0, sd = 8)
set.seed(2)
big.y<-rnorm(n = 1000, mean = 0, sd = 8)
big.coords<-data.frame(cbind(big.x, big.y))
ggplot(big.coords, aes(x=big.x, y=big.y)) + geom_point()
for(i in 1:nrow(big.coords)){
if(sqrt(big.coords[i,1]^2+big.coords[i,2]^2)<5 |sqrt(big.coords[i,1]^2+big.coords[i,2]^2)>8){
big.coords[i,1]<-0
big.coords[i,2]<-0
}
}
big.coords<-subset(big.coords, big.x!=0)
big.coords<-cbind(big.coords, 2)
coords<-rbind(as.matrix(big.coords), as.matrix(small.coords))
coords<-as.data.frame(coords)
colnames(coords)<-c("x.coords", "y.coords", "Class")
coords$Class<-as.factor(coords$Class)
a1<-ggplot(coords, aes(x=x.coords, y=y.coords, color=Class)) + geom_point()+
labs(colour = "Class", title="Input Space")+
xlab(expression(X[1])) + ylab(expression(X[2]))
a1
svmfit<-svm(Class ~ x.coords + y.coords, data = coords, kernel="polynomial", cost = 1538065, gamma= 1, degree = 2)
svm.y<-coords$Class
svm.predy<-predict(svmfit, coords)
svmfit
##
## Call:
## svm(formula = Class ~ x.coords + y.coords, data = coords, kernel = "polynomial",
## cost = 1538065, gamma = 1, degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1538065
## degree: 2
## gamma: 1
## coef.0: 0
##
## Number of Support Vectors: 4
mean(svm.y!=svm.predy)
## [1] 0
table(svm.y, svm.predy)
## svm.predy
## svm.y 1 2
## 1 20 0
## 2 0 206
plot(svmfit, coords)
xgrid<-expand.grid(X1=seq(min(coords$x.coords), max(coords$x.coords), length.out = 150),
X2=seq(min(coords$y.coords), max(coords$y.coords), length.out =150))
colnames(xgrid)<-c("x.coords", "y.coords")
group.train.set.pred<-predict(svmfit, xgrid)
xgrid<-cbind(xgrid, group.train.set.pred)
a3<-ggplot(xgrid, aes(x=x.coords,y=y.coords)) +
geom_point(aes(colour=group.train.set.pred), alpha = 1/10) +
geom_point(data = coords[-svmfit$index,], aes(x=x.coords, y=y.coords, colour=Class)) +
geom_point(data = coords[svmfit$index,], aes(x=x.coords, y=y.coords, colour=Class), shape=4) +
labs(colour = "Class", title="Decision Surface SVM Polynomial Kernel") +
xlab(expression(X[1])) + ylab(expression(X[2]))
a3
squared.coords<-cbind(X1 = coords$x.coords^2, X2 = coords$y.coords^2, X3 = sqrt(2)*coords$x.coords*coords$y.coords, Class = as.factor(coords$Class))
squared.coords<-data.frame(squared.coords)
scatter3D(x = squared.coords$X1, y = squared.coords$X2, z = squared.coords$X3, phi=10
, col = c(squared.coords$Class), bty="g", ticktype = "detailed" ,
pch=16, xlab="X", ylab="Y", zlab="Z")
s3d<-scatterplot3d(squared.coords[, 1:3],
color = c( "#00BFC4"), angle = 40,
xlab = "X1^2", ylab = "X2^2", zlab = "sqrt(2)*X1*X2", pch=16)
s3d$plane3d(6, 0, 0)
scatterplot3d(squared.coords[, 1:3],
color = ifelse(squared.coords$Class == 1, "#F8766D", "#00BFC4"),
angle = 30,
xlab = expression(X[1]^2), ylab = expression(X[2]^2), zlab = expression(sqrt(2) * X[1]*X[2]),
grid = T,
pch=16,
main = "Feature Space")
svmfit<-svm(Class ~ x.coords + y.coords, data = coords, kernel="linear", cost = 1538065)
svm.y<-coords$Class
svm.predy<-predict(svmfit, coords)
svmfit
##
## Call:
## svm(formula = Class ~ x.coords + y.coords, data = coords, kernel = "linear",
## cost = 1538065)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1538065
## gamma: 0.5
##
## Number of Support Vectors: 55
mean(svm.y!=svm.predy)
## [1] 0.3982301
table(svm.y, svm.predy)
## svm.predy
## svm.y 1 2
## 1 0 20
## 2 70 136
plot(svmfit, coords)
xgrid<-expand.grid(X1=seq(min(coords$x.coords), max(coords$x.coords), length.out = 150),
X2=seq(min(coords$y.coords), max(coords$y.coords), length.out =150))
colnames(xgrid)<-c("x.coords", "y.coords")
group.train.set.pred<-predict(svmfit, xgrid)
xgrid<-cbind(xgrid, group.train.set.pred)
a2<-ggplot(xgrid, aes(x=x.coords,y=y.coords)) +
geom_point(aes(colour=group.train.set.pred), alpha = 1/10) +
geom_point(data = coords[-svmfit$index,], aes(x=x.coords, y=y.coords, colour=Class)) +
geom_point(data = coords[svmfit$index,], aes(x=x.coords, y=y.coords, colour=Class), shape=4) +
labs(colour = "Class", title="Decision Surface SVM Linear Kernel") +
xlab(expression(X[1])) + ylab(expression(X[2]))
a2
set.seed(1)
Class.1.x<-rnorm(n = 20, mean = 1.2, sd = 0.8)
set.seed(3)
Class.1.y<-rnorm(n = 20, mean = 0.8, sd = 0.8)
Class.1<-data.frame(cbind(Class.1.x, Class.1.y))
Class.1<-cbind(Class.1, Class=1)
ggplot(Class.1, aes(x = Class.1.x, y=Class.1.y)) + geom_point()
set.seed(1)
Class.2.x<-rnorm(n = 20, mean = -1, sd = 0.8)
set.seed(3)
Class.2.y<-runif(n = 20, max = -0.5, min = -3)
Class.2<-data.frame(cbind(Class.2.x, Class.2.y))
Class.2<-cbind(Class.2, 2)
ggplot(Class.2, aes(x=Class.2.x, y=Class.2.y)) + geom_point()
Class.Coords<-rbind(as.matrix(Class.1), as.matrix(Class.2))
Class.Coords<-as.data.frame(Class.Coords)
colnames(Class.Coords)<-c("x.coords", "y.coords", "Class")
Class.Coords$Class<-as.factor(Class.Coords$Class)
b1<-ggplot(Class.Coords, aes(x=x.coords, y=y.coords, color=Class)) + geom_point()+
labs(colour = "Class", title="Input Space")+
xlab(expression(X[1])) + ylab(expression(X[2]))
b1
svmfit<-svm(Class ~ x.coords + y.coords, data = Class.Coords, kernel="linear", cost = 1538065)
svm.y<-Class.Coords$Class
svm.predy<-predict(svmfit, Class.Coords)
svmfit
##
## Call:
## svm(formula = Class ~ x.coords + y.coords, data = Class.Coords,
## kernel = "linear", cost = 1538065)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1538065
## gamma: 0.5
##
## Number of Support Vectors: 3
mean(svm.y!=svm.predy)
## [1] 0
table(svm.y, svm.predy)
## svm.predy
## svm.y 1 2
## 1 20 0
## 2 0 20
plot(svmfit, Class.Coords)
xgrid<-expand.grid(X1=seq(min(Class.Coords$x.coords), max(Class.Coords$x.coords), length.out = 150),
X2=seq(min(Class.Coords$y.coords), max(Class.Coords$y.coords), length.out =150))
colnames(xgrid)<-c("x.coords", "y.coords")
group.train.set.pred<-predict(svmfit, xgrid)
xgrid<-cbind(xgrid, group.train.set.pred)
b2<-ggplot(xgrid, aes(x=x.coords,y=y.coords)) +
geom_point(aes(colour=group.train.set.pred), alpha = 1/10) +
geom_point(data = Class.Coords[-svmfit$index,], aes(x=x.coords, y=y.coords, colour=Class)) +
geom_point(data = Class.Coords[svmfit$index,], aes(x=x.coords, y=y.coords, colour=Class), shape=4) +
labs(colour = "Class", title="Decision Surface SVM Linear Kernel (Separable)") +
xlab(expression(X[1])) + ylab(expression(X[2]))
b2
set.seed(1)
NS.Class.1.x<-rnorm(n = 20, mean = 0.5, sd = 0.8)
set.seed(3)
NS.Class.1.y<-rnorm(n = 20, mean = 0.8, sd = 0.8)
Class.1<-data.frame(cbind(NS.Class.1.x, NS.Class.1.y))
Class.1<-cbind(Class.1, Class=1)
ggplot(Class.1, aes(x = NS.Class.1.x, y=NS.Class.1.y)) + geom_point()
set.seed(1)
NS.Class.2.x<-rnorm(n = 20, mean = -1, sd = 0.8)
set.seed(3)
NS.Class.2.y<-runif(n = 20, max = 1, min = -1.5)
Class.2<-data.frame(cbind(NS.Class.2.x, NS.Class.2.y))
Class.2<-cbind(Class.2, 2)
ggplot(Class.2, aes(x=NS.Class.2.x, y=NS.Class.2.y)) + geom_point()
Class.Coords<-rbind(as.matrix(Class.1), as.matrix(Class.2))
Class.Coords<-as.data.frame(Class.Coords)
colnames(Class.Coords)<-c("x.coords", "y.coords", "Class")
Class.Coords$Class<-as.factor(Class.Coords$Class)
b3<-ggplot(Class.Coords, aes(x=x.coords, y=y.coords, color=Class)) + geom_point()+
labs(colour = "Class", title="Input Space")+
xlab(expression(X[1])) + ylab(expression(X[2]))
b3
svmfit<-svm(Class ~ x.coords + y.coords, data = Class.Coords, kernel="linear", cost = 1538065)
svm.y<-Class.Coords$Class
svm.predy<-predict(svmfit, Class.Coords)
svmfit
##
## Call:
## svm(formula = Class ~ x.coords + y.coords, data = Class.Coords,
## kernel = "linear", cost = 1538065)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1538065
## gamma: 0.5
##
## Number of Support Vectors: 11
mean(svm.y!=svm.predy)
## [1] 0.1
table(svm.y, svm.predy)
## svm.predy
## svm.y 1 2
## 1 18 2
## 2 2 18
plot(svmfit, Class.Coords)
xgrid<-expand.grid(X1=seq(min(Class.Coords$x.coords), max(Class.Coords$x.coords), length.out = 150),
X2=seq(min(Class.Coords$y.coords), max(Class.Coords$y.coords), length.out =150))
colnames(xgrid)<-c("x.coords", "y.coords")
group.train.set.pred<-predict(svmfit, xgrid)
xgrid<-cbind(xgrid, group.train.set.pred)
b4<-ggplot(xgrid, aes(x=x.coords,y=y.coords)) +
geom_point(aes(colour=group.train.set.pred), alpha = 1/10) +
geom_point(data = Class.Coords[-svmfit$index,], aes(x=x.coords, y=y.coords, colour=Class)) +
geom_point(data = Class.Coords[svmfit$index,], aes(x=x.coords, y=y.coords, colour=Class), shape=4) +
labs(colour = "Class", title="Decision Surface SVM Linear Kernel (Non-Separable)") +
xlab(expression(X[1])) + ylab(expression(X[2]))
b4
Find the support vetor lines by calculating the equation of the line that passes through two points. Note that the lines are all parallel, use this fact to find the lines in the separable case where class 2 only has 1 support vector.