#set WD
setwd("C:/Users/sharl/Desktop/USF/Fall 2021/LIS 4805 - Predictive Analytics/Week 10 - SVM")
#support vector machine using iris data
library(e1071)
plot(iris)

plot(iris$Sepal.Length, iris$Sepal.Width, col=iris$Species)

plot(iris$Petal.Length, iris$Petal.Width, col=iris$Species)

length(iris)
## [1] 5
dim(iris)
## [1] 150 5
s <-sample(150, 100)
col<-c("Petal.Length", "Petal.Width", "Species")
iris_train <-iris[s,col]
iris_test <-iris[-s,col]
svmfit <-svm(Species ~., data=iris_train, kernel="linear", cost=0.1, scale=F)
print(svmfit)
##
## Call:
## svm(formula = Species ~ ., data = iris_train, kernel = "linear",
## cost = 0.1, scale = F)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.1
##
## Number of Support Vectors: 46
plot(svmfit, iris_train[,col])

#cross-validation
tuned <-tune(svm, Species~., data=iris_train, kernel="linear", ranges=list(cost=c(0.001, 0.01, 0.1, 1,10,100)))
summary(tuned)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.05
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-03 0.78 0.06324555
## 2 1e-02 0.37 0.11595018
## 3 1e-01 0.05 0.05270463
## 4 1e+00 0.05 0.05270463
## 5 1e+01 0.05 0.05270463
## 6 1e+02 0.06 0.05163978
#set cost=100
svmfit <-svm(Species ~., data=iris_train, kernel="linear", cost=100, scale=F)
print(svmfit)
##
## Call:
## svm(formula = Species ~ ., data = iris_train, kernel = "linear",
## cost = 100, scale = F)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 100
##
## Number of Support Vectors: 11
plot(svmfit, iris_train[,col])

p <-predict(svmfit, iris_test[,col], type="class")
plot(p)

table(p, iris_test[,3])
##
## p setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 16 2
## virginica 0 0 17
mean(p==iris_test[,3])
## [1] 0.96