1. Read Data
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
attach(iris)
2.Package install for SVM, e’1071’, for crosstable, ‘caret’
library(e1071)
## Warning: package 'e1071' was built under R version 3.4.4
library(caret)
## Warning: package 'caret' was built under R version 3.4.4
## Loading required package: lattice
## Loading required package: ggplot2
help(svm)
## starting httpd help server ...
## done
3.Training & Test Data set
set.seed(1000)
N=nrow(iris)
tr.idx=sample(1:N, size=N*2/3, replace = F) #split data
y=iris[,5] #target variable
train=iris[tr.idx,] #training data
test=iris[-tr.idx,]
4. Kernel type 별 SVM 실행
degree: parameter needed for kernel of type polynomial (default: 3)
gamma: parameter needed for all kernels except linear (default: 1/(data dimension))
coef0: parameter needed for kernels of type polynomial and sigmoid (default: 0)
m1<-svm(Species~., data=train) #default 는 radial basis exp(-gamma*|u-v|^2)
summary(m1)
##
## Call:
## svm(formula = Species ~ ., data = train)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
## gamma: 0.25
##
## Number of Support Vectors: 38
##
## ( 5 16 17 )
##
##
## Number of Classes: 3
##
## Levels:
## setosa versicolor virginica
m2<-svm(Species~., data=train, kernel="polynomial") #(gamma*u'*v + coef0)^degree
summary(m2)
##
## Call:
## svm(formula = Species ~ ., data = train, kernel = "polynomial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 3
## gamma: 0.25
## coef.0: 0
##
## Number of Support Vectors: 45
##
## ( 3 20 22 )
##
##
## Number of Classes: 3
##
## Levels:
## setosa versicolor virginica
m3<-svm(Species~., data=train, kernel="sigmoid") #tanh(gamma*u'*v + coef0)
summary(m3)
##
## Call:
## svm(formula = Species ~ ., data = train, kernel = "sigmoid")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: sigmoid
## cost: 1
## gamma: 0.25
## coef.0: 0
##
## Number of Support Vectors: 44
##
## ( 4 17 23 )
##
##
## Number of Classes: 3
##
## Levels:
## setosa versicolor virginica
m4<-svm(Species~., data=train, kernel="linear")
summary(m4)
##
## Call:
## svm(formula = Species ~ ., data = train, kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
## gamma: 0.25
##
## Number of Support Vectors: 23
##
## ( 2 10 11 )
##
##
## Number of Classes: 3
##
## Levels:
## setosa versicolor virginica
5. Prediction_Radial Basis Function
pred11<-predict(m1,test)
confusionMatrix(pred11, test$Species)#predict와 test Species와 비교, Accuracy 96%
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 19 0 0
## versicolor 0 18 1
## virginica 0 1 11
##
## Overall Statistics
##
## Accuracy : 0.96
## 95% CI : (0.8629, 0.9951)
## No Information Rate : 0.38
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9388
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.00 0.9474 0.9167
## Specificity 1.00 0.9677 0.9737
## Pos Pred Value 1.00 0.9474 0.9167
## Neg Pred Value 1.00 0.9677 0.9737
## Prevalence 0.38 0.3800 0.2400
## Detection Rate 0.38 0.3600 0.2200
## Detection Prevalence 0.38 0.3800 0.2400
## Balanced Accuracy 1.00 0.9576 0.9452
6. prediction_Polynomial Function
pred12<-predict(m2, test)
confusionMatrix(pred12, test$Species)#Accuracy 90%
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 19 0 0
## versicolor 0 19 5
## virginica 0 0 7
##
## Overall Statistics
##
## Accuracy : 0.9
## 95% CI : (0.7819, 0.9667)
## No Information Rate : 0.38
## P-Value [Acc > NIR] : 2.557e-14
##
## Kappa : 0.8437
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.00 1.0000 0.5833
## Specificity 1.00 0.8387 1.0000
## Pos Pred Value 1.00 0.7917 1.0000
## Neg Pred Value 1.00 1.0000 0.8837
## Prevalence 0.38 0.3800 0.2400
## Detection Rate 0.38 0.3800 0.1400
## Detection Prevalence 0.38 0.4800 0.1400
## Balanced Accuracy 1.00 0.9194 0.7917
7 prediction_sigmoid
pred13<-predict(m3, test)
confusionMatrix(pred13, test$Species)#Accuracy 90%
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 19 0 0
## versicolor 0 15 1
## virginica 0 4 11
##
## Overall Statistics
##
## Accuracy : 0.9
## 95% CI : (0.7819, 0.9667)
## No Information Rate : 0.38
## P-Value [Acc > NIR] : 2.557e-14
##
## Kappa : 0.8489
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.00 0.7895 0.9167
## Specificity 1.00 0.9677 0.8947
## Pos Pred Value 1.00 0.9375 0.7333
## Neg Pred Value 1.00 0.8824 0.9714
## Prevalence 0.38 0.3800 0.2400
## Detection Rate 0.38 0.3000 0.2200
## Detection Prevalence 0.38 0.3200 0.3000
## Balanced Accuracy 1.00 0.8786 0.9057
8. prediction_linear
pred14<-predict(m4, test)
confusionMatrix(pred14,test$Species)#Accuracy 96%
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 19 0 0
## versicolor 0 17 0
## virginica 0 2 12
##
## Overall Statistics
##
## Accuracy : 0.96
## 95% CI : (0.8629, 0.9951)
## No Information Rate : 0.38
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9393
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.00 0.8947 1.0000
## Specificity 1.00 1.0000 0.9474
## Pos Pred Value 1.00 1.0000 0.8571
## Neg Pred Value 1.00 0.9394 1.0000
## Prevalence 0.38 0.3800 0.2400
## Detection Rate 0.38 0.3400 0.2400
## Detection Prevalence 0.38 0.3400 0.2800
## Balanced Accuracy 1.00 0.9474 0.9737