library(e1071)
library(ggplot2)
library(ISLR2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
set.seed(99)
x1 <- runif (500) - 0.5
x2 <- runif (500) - 0.5
y <- 1 * (x1^2 - x2^2 > 0)
plot(x1[y==0],x2[y==0],col="red")
points(x1[y==1],x2[y==1],col="purple")
data <- data.frame(x1 = x1, x2 = x2, y = as.factor(y))
log.fit <- glm( y ~ . ,data = data, family = 'binomial')
log.fit
##
## Call: glm(formula = y ~ ., family = "binomial", data = data)
##
## Coefficients:
## (Intercept) x1 x2
## -0.04996 -0.57916 -0.21671
##
## Degrees of Freedom: 499 Total (i.e. Null); 497 Residual
## Null Deviance: 692.9
## Residual Deviance: 688.9 AIC: 694.9
prob <- predict( log.fit , newdata=data, type = 'response')
pred <- ifelse( prob > 0.5 , 1 , 0)
ggplot(data = data, mapping = aes(x1, x2)) +
geom_point(data = data, mapping = aes(colour = pred))
log.fit2 <- glm( y ~ poly(x1,3) + poly(x2,3), data = data, family='binomial')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
prob2 <- predict( log.fit2 , newdata=data, type = 'response')
pred2 <- ifelse( prob2 > 0.5 , 1 , 0)
ggplot(data = data, mapping = aes(x1, x2)) +
geom_point(data = data, mapping = aes(colour = pred2))
svm1 <- svm(y ~ . , data = data , kernel = 'linear', cost=0.01)
plot(svm1, data)
svm2 <- svm( y ~ . , data = data, kernel = 'radial' , gamma = 1)
plot(svm2, data = data)
The Support Vector Machine using a radial kernel and the logistic regression with non-linear functions did a better job at classifying and setting a non-linear boundary.
attach(Auto)
## The following object is masked from package:ggplot2:
##
## mpg
avg_mpg <- mean(Auto$mpg)
mpg_fct <- ifelse(Auto$mpg > avg_mpg, 1, 0)
Auto$mpgfct <- as.factor(mpg_fct)
set.seed(99)
Auto1 <- select(Auto, -mpg)
linear <- tune(svm, mpgfct ~ . , data = Auto1, kernel='linear' , ranges = list(cost = c(0.001, 0.01, 0.1, 1, 10, 100)))
summary(linear)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.09974359
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-03 0.12012821 0.04698834
## 2 1e-02 0.10224359 0.03837508
## 3 1e-01 0.09974359 0.03933969
## 4 1e+00 0.10230769 0.04865343
## 5 1e+01 0.12275641 0.05806720
## 6 1e+02 0.10467949 0.04281835
For the data set without the mpg variable, the value of cost with the lowest error is cost = 0.1, with an associated error of 0.09974
radial <- tune(svm ,mpgfct~.,data = Auto1, kernel="radial", ranges=list(cost=c(0.1,1,10,100),gamma=c(0.5,1,2,3,4)))
summary(radial)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 1 0.5
##
## - best performance: 0.07679487
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 0.5 0.11000000 0.05436949
## 2 1.0 0.5 0.07679487 0.05419167
## 3 10.0 0.5 0.08185897 0.05257109
## 4 100.0 0.5 0.08185897 0.05257109
## 5 0.1 1.0 0.47448718 0.08180429
## 6 1.0 1.0 0.07685897 0.05927741
## 7 10.0 1.0 0.07679487 0.05545833
## 8 100.0 1.0 0.07679487 0.05545833
## 9 0.1 2.0 0.47448718 0.08180429
## 10 1.0 2.0 0.19884615 0.08418738
## 11 10.0 2.0 0.17589744 0.07856928
## 12 100.0 2.0 0.17589744 0.07856928
## 13 0.1 3.0 0.47448718 0.08180429
## 14 1.0 3.0 0.42352564 0.06408550
## 15 10.0 3.0 0.41076923 0.07273863
## 16 100.0 3.0 0.41076923 0.07273863
## 17 0.1 4.0 0.47448718 0.08180429
## 18 1.0 4.0 0.42858974 0.05992300
## 19 10.0 4.0 0.42608974 0.06048786
## 20 100.0 4.0 0.42608974 0.06048786
Lowest error for SVM with radial kernel= 0.07923077; and is associated with cost= 1 and alpha=0.5
poly <- tune(svm ,mpgfct~.,data = Auto1, kernel="polynomial", ranges=list(cost=c(0.1,1,10,100),degree = c(2, 3, 4)))
summary(poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 100 2
##
## - best performance: 0.390641
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 2 0.4746795 0.08632651
## 2 1.0 2 0.4746795 0.08632651
## 3 10.0 2 0.4746795 0.08632651
## 4 100.0 2 0.3906410 0.10917453
## 5 0.1 3 0.4746795 0.08632651
## 6 1.0 3 0.4746795 0.08632651
## 7 10.0 3 0.4746795 0.08632651
## 8 100.0 3 0.4288462 0.16307913
## 9 0.1 4 0.4746795 0.08632651
## 10 1.0 4 0.4746795 0.08632651
## 11 10.0 4 0.4746795 0.08632651
## 12 100.0 4 0.4746795 0.08632651
Lowest error for SVM with polynomial kernel= 0.4137; and is associated with cost= 100 and degree=2
SVM with radial kernel outperformed the other models with an associated min error of 0.07923077.
lin <- svm(mpgfct~., data=Auto1, kernel="linear", cost=0.1)
rad <- svm(mpgfct~., data=Auto1, kernel="radial", cost=1, gamma=0.5)
pol <- svm(mpgfct~., data=Auto1, kernel="polynomial", cost=100, degree=2)
par(mfrow=c(2,2))
plot(lin, data=Auto, mpg ~ weight)
plot(rad, data=Auto, mpg ~ weight)
plot(pol, data=Auto, mpg ~ weight)
plot(lin, data=Auto, mpg ~ horsepower)
plot(rad, data=Auto, mpg ~ horsepower)
plot(pol, data=Auto, mpg ~ horsepower)
detach(Auto)
attach(OJ)
set.seed(98)
training <- sample(nrow(OJ), 800)
train <- OJ[training,]
test <- OJ[-training,]
lin1 <- svm( Purchase ~ . ,data = train, kernel = "linear" , cost = 0.01)
summary(lin1)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 437
##
## ( 219 218 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
There are 436 support vectors for the classifier, with two classes: CH and MM. Of Those 436 vectors, 219 belong to the CH class and 217 to the MM class.
pred_train <- predict( lin1, train)
matrix <-table(train$Purchase, pred_train)
matrix
## pred_train
## CH MM
## CH 447 51
## MM 84 218
err <- 1-round(mean(train$Purchase==pred_train),5)
err
## [1] 0.16875
The training error is 0.16875
pred_test <- predict( lin1, test)
matrix2 <-table(test$Purchase, pred_test)
matrix2
## pred_test
## CH MM
## CH 138 17
## MM 25 90
err <- 1-round(mean(test$Purchase==pred_test),4)
err
## [1] 0.1556
The test error is 0.1556
set.seed(99)
tuneOJ <- tune(svm, Purchase ~ ., data = train, kernel = "linear", ranges = list(cost = c(0.01,0.1,1,10)))
summary(tuneOJ)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.1725
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17875 0.03729108
## 2 0.10 0.17250 0.03374743
## 3 1.00 0.17750 0.03425801
## 4 10.00 0.18250 0.03184162
Smallest error: 0.17250, occurs with a cost value of 0.10
lin2 <- svm(Purchase ~ ., kernel = "linear", data = train, cost = 0.10)
pred2 <- predict(lin2, train)
table(train$Purchase, pred2)
## pred2
## CH MM
## CH 445 53
## MM 79 223
err <- 1-round(mean(train$Purchase==pred2),4)
err
## [1] 0.165
lin3 <- svm(Purchase ~ ., kernel = "linear", data = test, cost = 0.10)
pred3 <- predict(lin3, test)
table(test$Purchase, pred3)
## pred3
## CH MM
## CH 136 19
## MM 17 98
err <- 1-round(mean(test$Purchase==pred3),3)
err
## [1] 0.133
set.seed(99)
rad1 <- svm(Purchase ~ ., data = train, kernel = "radial")
summary(rad)
##
## Call:
## svm(formula = mpgfct ~ ., data = Auto1, kernel = "radial", cost = 1,
## gamma = 0.5)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 278
##
## ( 137 141 )
##
##
## Number of Classes: 2
##
## Levels:
## 0 1
train.rad <- predict(rad1, train)
matrix <- table(train$Purchase, train.rad)
matrix
## train.rad
## CH MM
## CH 455 43
## MM 85 217
err <- 1- round(mean(train$Purchase==train.rad),3)
err
## [1] 0.16
test.rad <- predict(rad1, test)
matrix <- table(test$Purchase, test.rad)
matrix
## test.rad
## CH MM
## CH 140 15
## MM 26 89
err <- 1- round(mean(test$Purchase==test.rad),3)
err
## [1] 0.152
svm.rad <- svm(Purchase ~ ., data = train, kernel = "radial", cost = 0.10)
pred.train <- predict(svm.rad, train)
table(train$Purchase, pred.train)
## pred.train
## CH MM
## CH 455 43
## MM 96 206
err <- 1- round(mean(train$Purchase==pred.train),3)
err
## [1] 0.174
svm.rad2 <- svm(Purchase ~ ., data = test, kernel = "radial", cost = 0.10)
pred.test <- predict(svm.rad2, test)
table(test$Purchase, pred.test)
## pred.test
## CH MM
## CH 138 17
## MM 41 74
err <- 1- round(mean(test$Purchase==pred.test),3)
err
## [1] 0.215
set.seed(99)
poly1 <- svm(Purchase ~ ., data = train, kernel = "radial", degree=2)
summary(poly1)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "radial", degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 374
##
## ( 184 190 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.poly <- predict(poly1, train)
matrix <- table(train$Purchase, train.poly)
matrix
## train.poly
## CH MM
## CH 455 43
## MM 85 217
err <- 1- round(mean(train$Purchase==train.poly),3)
err
## [1] 0.16
test.poly <- predict(poly1, test)
matrix <- table(test$Purchase, test.poly)
matrix
## test.poly
## CH MM
## CH 140 15
## MM 26 89
err <- 1- round(mean(test$Purchase==test.poly),3)
err
## [1] 0.152
set.seed(99)
tune2 <- tune(svm, Purchase ~ ., data = train, kernel = "poly", degree = 2, ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune2)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.18125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.37750 0.06286007
## 2 0.01778279 0.35625 0.06270799
## 3 0.03162278 0.35000 0.05921946
## 4 0.05623413 0.33250 0.05986095
## 5 0.10000000 0.31750 0.06157651
## 6 0.17782794 0.25750 0.05927806
## 7 0.31622777 0.21125 0.03839216
## 8 0.56234133 0.20000 0.03679900
## 9 1.00000000 0.20000 0.04409586
## 10 1.77827941 0.19750 0.04362084
## 11 3.16227766 0.18875 0.03793727
## 12 5.62341325 0.18500 0.03809710
## 13 10.00000000 0.18125 0.03830162
With a test error of 0.1556, the SVM with linear kernel had the best results for this data.