1. Read Data

summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
attach(iris)

2.Package install for SVM, e’1071’, for crosstable, ‘caret’

library(e1071)
## Warning: package 'e1071' was built under R version 3.4.4
library(caret)
## Warning: package 'caret' was built under R version 3.4.4
## Loading required package: lattice
## Loading required package: ggplot2
help(svm)
## starting httpd help server ...
##  done

3.Training & Test Data set

set.seed(1000)
N=nrow(iris)
tr.idx=sample(1:N, size=N*2/3, replace = F) #split data
y=iris[,5] #target variable
train=iris[tr.idx,] #training data
test=iris[-tr.idx,]

4. Kernel type 별 SVM 실행

degree: parameter needed for kernel of type polynomial (default: 3)
gamma: parameter needed for all kernels except linear (default: 1/(data dimension))
coef0: parameter needed for kernels of type polynomial and sigmoid (default: 0)
m1<-svm(Species~., data=train) #default 는 radial basis exp(-gamma*|u-v|^2)
summary(m1)
## 
## Call:
## svm(formula = Species ~ ., data = train)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.25 
## 
## Number of Support Vectors:  38
## 
##  ( 5 16 17 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica
m2<-svm(Species~., data=train, kernel="polynomial") #(gamma*u'*v + coef0)^degree
summary(m2)
## 
## Call:
## svm(formula = Species ~ ., data = train, kernel = "polynomial")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  3 
##       gamma:  0.25 
##      coef.0:  0 
## 
## Number of Support Vectors:  45
## 
##  ( 3 20 22 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica
m3<-svm(Species~., data=train, kernel="sigmoid") #tanh(gamma*u'*v + coef0)
summary(m3)
## 
## Call:
## svm(formula = Species ~ ., data = train, kernel = "sigmoid")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  sigmoid 
##        cost:  1 
##       gamma:  0.25 
##      coef.0:  0 
## 
## Number of Support Vectors:  44
## 
##  ( 4 17 23 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica
m4<-svm(Species~., data=train, kernel="linear")
summary(m4)
## 
## Call:
## svm(formula = Species ~ ., data = train, kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  1 
##       gamma:  0.25 
## 
## Number of Support Vectors:  23
## 
##  ( 2 10 11 )
## 
## 
## Number of Classes:  3 
## 
## Levels: 
##  setosa versicolor virginica

5. Prediction_Radial Basis Function

pred11<-predict(m1,test)
confusionMatrix(pred11, test$Species)#predict와 test Species와 비교, Accuracy 96%
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         19          0         0
##   versicolor      0         18         1
##   virginica       0          1        11
## 
## Overall Statistics
##                                           
##                Accuracy : 0.96            
##                  95% CI : (0.8629, 0.9951)
##     No Information Rate : 0.38            
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9388          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                   1.00            0.9474           0.9167
## Specificity                   1.00            0.9677           0.9737
## Pos Pred Value                1.00            0.9474           0.9167
## Neg Pred Value                1.00            0.9677           0.9737
## Prevalence                    0.38            0.3800           0.2400
## Detection Rate                0.38            0.3600           0.2200
## Detection Prevalence          0.38            0.3800           0.2400
## Balanced Accuracy             1.00            0.9576           0.9452

6. prediction_Polynomial Function

pred12<-predict(m2, test)
confusionMatrix(pred12, test$Species)#Accuracy 90%
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         19          0         0
##   versicolor      0         19         5
##   virginica       0          0         7
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9             
##                  95% CI : (0.7819, 0.9667)
##     No Information Rate : 0.38            
##     P-Value [Acc > NIR] : 2.557e-14       
##                                           
##                   Kappa : 0.8437          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                   1.00            1.0000           0.5833
## Specificity                   1.00            0.8387           1.0000
## Pos Pred Value                1.00            0.7917           1.0000
## Neg Pred Value                1.00            1.0000           0.8837
## Prevalence                    0.38            0.3800           0.2400
## Detection Rate                0.38            0.3800           0.1400
## Detection Prevalence          0.38            0.4800           0.1400
## Balanced Accuracy             1.00            0.9194           0.7917

7 prediction_sigmoid

pred13<-predict(m3, test)
confusionMatrix(pred13, test$Species)#Accuracy 90%
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         19          0         0
##   versicolor      0         15         1
##   virginica       0          4        11
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9             
##                  95% CI : (0.7819, 0.9667)
##     No Information Rate : 0.38            
##     P-Value [Acc > NIR] : 2.557e-14       
##                                           
##                   Kappa : 0.8489          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                   1.00            0.7895           0.9167
## Specificity                   1.00            0.9677           0.8947
## Pos Pred Value                1.00            0.9375           0.7333
## Neg Pred Value                1.00            0.8824           0.9714
## Prevalence                    0.38            0.3800           0.2400
## Detection Rate                0.38            0.3000           0.2200
## Detection Prevalence          0.38            0.3200           0.3000
## Balanced Accuracy             1.00            0.8786           0.9057

8. prediction_linear

pred14<-predict(m4, test)
confusionMatrix(pred14,test$Species)#Accuracy 96%
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         19          0         0
##   versicolor      0         17         0
##   virginica       0          2        12
## 
## Overall Statistics
##                                           
##                Accuracy : 0.96            
##                  95% CI : (0.8629, 0.9951)
##     No Information Rate : 0.38            
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9393          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                   1.00            0.8947           1.0000
## Specificity                   1.00            1.0000           0.9474
## Pos Pred Value                1.00            1.0000           0.8571
## Neg Pred Value                1.00            0.9394           1.0000
## Prevalence                    0.38            0.3800           0.2400
## Detection Rate                0.38            0.3400           0.2400
## Detection Prevalence          0.38            0.3400           0.2800
## Balanced Accuracy             1.00            0.9474           0.9737