The Spam data with description are in https://archive.ics.uci.edu/ml/datasets/Spambase
We can also get its formatted version from a package “kernlab”
#install.packages("kernlab")
library(kernlab)
data(spam) # getting spam data from kernlab package
dim(spam) # to see the number of rows and columns / dimension
## [1] 4601 58
names(spam) # to see the column names
## [1] "make" "address" "all"
## [4] "num3d" "our" "over"
## [7] "remove" "internet" "order"
## [10] "mail" "receive" "will"
## [13] "people" "report" "addresses"
## [16] "free" "business" "email"
## [19] "you" "credit" "your"
## [22] "font" "num000" "money"
## [25] "hp" "hpl" "george"
## [28] "num650" "lab" "labs"
## [31] "telnet" "num857" "data"
## [34] "num415" "num85" "technology"
## [37] "num1999" "parts" "pm"
## [40] "direct" "cs" "meeting"
## [43] "original" "project" "re"
## [46] "edu" "table" "conference"
## [49] "charSemicolon" "charRoundbracket" "charSquarebracket"
## [52] "charExclamation" "charDollar" "charHash"
## [55] "capitalAve" "capitalLong" "capitalTotal"
## [58] "type"
head(spam) # to see the first 6 rows/observations
## make address all num3d our over remove internet order mail receive will
## 1 0.00 0.64 0.64 0 0.32 0.00 0.00 0.00 0.00 0.00 0.00 0.64
## 2 0.21 0.28 0.50 0 0.14 0.28 0.21 0.07 0.00 0.94 0.21 0.79
## 3 0.06 0.00 0.71 0 1.23 0.19 0.19 0.12 0.64 0.25 0.38 0.45
## 4 0.00 0.00 0.00 0 0.63 0.00 0.31 0.63 0.31 0.63 0.31 0.31
## 5 0.00 0.00 0.00 0 0.63 0.00 0.31 0.63 0.31 0.63 0.31 0.31
## 6 0.00 0.00 0.00 0 1.85 0.00 0.00 1.85 0.00 0.00 0.00 0.00
## people report addresses free business email you credit your font num000
## 1 0.00 0.00 0.00 0.32 0.00 1.29 1.93 0.00 0.96 0 0.00
## 2 0.65 0.21 0.14 0.14 0.07 0.28 3.47 0.00 1.59 0 0.43
## 3 0.12 0.00 1.75 0.06 0.06 1.03 1.36 0.32 0.51 0 1.16
## 4 0.31 0.00 0.00 0.31 0.00 0.00 3.18 0.00 0.31 0 0.00
## 5 0.31 0.00 0.00 0.31 0.00 0.00 3.18 0.00 0.31 0 0.00
## 6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0 0.00
## money hp hpl george num650 lab labs telnet num857 data num415 num85
## 1 0.00 0 0 0 0 0 0 0 0 0 0 0
## 2 0.43 0 0 0 0 0 0 0 0 0 0 0
## 3 0.06 0 0 0 0 0 0 0 0 0 0 0
## 4 0.00 0 0 0 0 0 0 0 0 0 0 0
## 5 0.00 0 0 0 0 0 0 0 0 0 0 0
## 6 0.00 0 0 0 0 0 0 0 0 0 0 0
## technology num1999 parts pm direct cs meeting original project re edu
## 1 0 0.00 0 0 0.00 0 0 0.00 0 0.00 0.00
## 2 0 0.07 0 0 0.00 0 0 0.00 0 0.00 0.00
## 3 0 0.00 0 0 0.06 0 0 0.12 0 0.06 0.06
## 4 0 0.00 0 0 0.00 0 0 0.00 0 0.00 0.00
## 5 0 0.00 0 0 0.00 0 0 0.00 0 0.00 0.00
## 6 0 0.00 0 0 0.00 0 0 0.00 0 0.00 0.00
## table conference charSemicolon charRoundbracket charSquarebracket
## 1 0 0 0.00 0.000 0
## 2 0 0 0.00 0.132 0
## 3 0 0 0.01 0.143 0
## 4 0 0 0.00 0.137 0
## 5 0 0 0.00 0.135 0
## 6 0 0 0.00 0.223 0
## charExclamation charDollar charHash capitalAve capitalLong capitalTotal type
## 1 0.778 0.000 0.000 3.756 61 278 spam
## 2 0.372 0.180 0.048 5.114 101 1028 spam
## 3 0.276 0.184 0.010 9.821 485 2259 spam
## 4 0.137 0.000 0.000 3.537 40 191 spam
## 5 0.135 0.000 0.000 3.537 40 191 spam
## 6 0.000 0.000 0.000 3.000 15 54 spam
attach(spam)
table(type) # to see the number of spam and none spam types
## type
## nonspam spam
## 2788 1813
str(spam) # to see the structure of the entire data frame
## 'data.frame': 4601 obs. of 58 variables:
## $ make : num 0 0.21 0.06 0 0 0 0 0 0.15 0.06 ...
## $ address : num 0.64 0.28 0 0 0 0 0 0 0 0.12 ...
## $ all : num 0.64 0.5 0.71 0 0 0 0 0 0.46 0.77 ...
## $ num3d : num 0 0 0 0 0 0 0 0 0 0 ...
## $ our : num 0.32 0.14 1.23 0.63 0.63 1.85 1.92 1.88 0.61 0.19 ...
## $ over : num 0 0.28 0.19 0 0 0 0 0 0 0.32 ...
## $ remove : num 0 0.21 0.19 0.31 0.31 0 0 0 0.3 0.38 ...
## $ internet : num 0 0.07 0.12 0.63 0.63 1.85 0 1.88 0 0 ...
## $ order : num 0 0 0.64 0.31 0.31 0 0 0 0.92 0.06 ...
## $ mail : num 0 0.94 0.25 0.63 0.63 0 0.64 0 0.76 0 ...
## $ receive : num 0 0.21 0.38 0.31 0.31 0 0.96 0 0.76 0 ...
## $ will : num 0.64 0.79 0.45 0.31 0.31 0 1.28 0 0.92 0.64 ...
## $ people : num 0 0.65 0.12 0.31 0.31 0 0 0 0 0.25 ...
## $ report : num 0 0.21 0 0 0 0 0 0 0 0 ...
## $ addresses : num 0 0.14 1.75 0 0 0 0 0 0 0.12 ...
## $ free : num 0.32 0.14 0.06 0.31 0.31 0 0.96 0 0 0 ...
## $ business : num 0 0.07 0.06 0 0 0 0 0 0 0 ...
## $ email : num 1.29 0.28 1.03 0 0 0 0.32 0 0.15 0.12 ...
## $ you : num 1.93 3.47 1.36 3.18 3.18 0 3.85 0 1.23 1.67 ...
## $ credit : num 0 0 0.32 0 0 0 0 0 3.53 0.06 ...
## $ your : num 0.96 1.59 0.51 0.31 0.31 0 0.64 0 2 0.71 ...
## $ font : num 0 0 0 0 0 0 0 0 0 0 ...
## $ num000 : num 0 0.43 1.16 0 0 0 0 0 0 0.19 ...
## $ money : num 0 0.43 0.06 0 0 0 0 0 0.15 0 ...
## $ hp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ hpl : num 0 0 0 0 0 0 0 0 0 0 ...
## $ george : num 0 0 0 0 0 0 0 0 0 0 ...
## $ num650 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ lab : num 0 0 0 0 0 0 0 0 0 0 ...
## $ labs : num 0 0 0 0 0 0 0 0 0 0 ...
## $ telnet : num 0 0 0 0 0 0 0 0 0 0 ...
## $ num857 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ data : num 0 0 0 0 0 0 0 0 0.15 0 ...
## $ num415 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ num85 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ technology : num 0 0 0 0 0 0 0 0 0 0 ...
## $ num1999 : num 0 0.07 0 0 0 0 0 0 0 0 ...
## $ parts : num 0 0 0 0 0 0 0 0 0 0 ...
## $ pm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ direct : num 0 0 0.06 0 0 0 0 0 0 0 ...
## $ cs : num 0 0 0 0 0 0 0 0 0 0 ...
## $ meeting : num 0 0 0 0 0 0 0 0 0 0 ...
## $ original : num 0 0 0.12 0 0 0 0 0 0.3 0 ...
## $ project : num 0 0 0 0 0 0 0 0 0 0.06 ...
## $ re : num 0 0 0.06 0 0 0 0 0 0 0 ...
## $ edu : num 0 0 0.06 0 0 0 0 0 0 0 ...
## $ table : num 0 0 0 0 0 0 0 0 0 0 ...
## $ conference : num 0 0 0 0 0 0 0 0 0 0 ...
## $ charSemicolon : num 0 0 0.01 0 0 0 0 0 0 0.04 ...
## $ charRoundbracket : num 0 0.132 0.143 0.137 0.135 0.223 0.054 0.206 0.271 0.03 ...
## $ charSquarebracket: num 0 0 0 0 0 0 0 0 0 0 ...
## $ charExclamation : num 0.778 0.372 0.276 0.137 0.135 0 0.164 0 0.181 0.244 ...
## $ charDollar : num 0 0.18 0.184 0 0 0 0.054 0 0.203 0.081 ...
## $ charHash : num 0 0.048 0.01 0 0 0 0 0 0.022 0 ...
## $ capitalAve : num 3.76 5.11 9.82 3.54 3.54 ...
## $ capitalLong : num 61 101 485 40 40 15 4 11 445 43 ...
## $ capitalTotal : num 278 1028 2259 191 191 ...
## $ type : Factor w/ 2 levels "nonspam","spam": 2 2 2 2 2 2 2 2 2 2 ...
logreg = glm( type ~ ., data=spam, family = "binomial" ) # running logistic regression
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
names(logreg) # to see the names of the model object output
## [1] "coefficients" "residuals" "fitted.values"
## [4] "effects" "R" "rank"
## [7] "qr" "family" "linear.predictors"
## [10] "deviance" "aic" "null.deviance"
## [13] "iter" "weights" "prior.weights"
## [16] "df.residual" "df.null" "y"
## [19] "converged" "boundary" "model"
## [22] "call" "formula" "terms"
## [25] "data" "offset" "control"
## [28] "method" "contrasts" "xlevels"
prob.spam = fitted.values(logreg) # the fitted probabilities of the model
head(prob.spam) # to see the first 6 probabilities obtained
## 1 2 3 4 5 6
## 0.6189824 0.9880333 0.9999977 0.7786119 0.7785570 0.6686157
class.spam = ifelse( prob.spam > 0.5, "spam", "nonspam" ) # Use 0.5 as the cutoff for classifying
head(class.spam)
## 1 2 3 4 5 6
## "spam" "spam" "spam" "spam" "spam" "spam"
table(class.spam, type)
## type
## class.spam nonspam spam
## nonspam 2666 194
## spam 122 1619
mean( type == class.spam ) # Correct classification rate = 93%
## [1] 0.9313193
The Correct classification rate is 93%. However, it is better be validated by CV.
## What are error rates among spam and benign emails?
sum( type=="nonspam" & class.spam=="spam" ) / sum( type=="nonspam" )
## [1] 0.04375897
sum( type=="spam" & class.spam=="nonspam" ) / sum( type=="spam" )
## [1] 0.107005
class.spam = ifelse( prob.spam > 10/11, "spam", "nonspam" )
table(class.spam, type)
## type
## class.spam nonspam spam
## nonspam 2759 674
## spam 29 1139
mean( type == class.spam ) # The overall error rate
## [1] 0.8472071
sum( type=="nonspam" & class.spam=="spam" ) / sum( type=="nonspam" ) #the error rate for being emails
## [1] 0.01040172
sum( type=="spam" & class.spam=="nonspam" ) / sum( type=="spam" ) # the error rate of classifying spam emails as nonspam
## [1] 0.3717595
The overall error rate increased, and the error rate of misclassifying spam emails as nonspam increased substantially, but the error rate for being emails is only 1% now.
#install.packages("tree")
library(tree)
tr = tree( type ~ ., data=spam )
plot(tr)
text(tr)
summary(tr)
##
## Classification tree:
## tree(formula = type ~ ., data = spam)
## Variables actually used in tree construction:
## [1] "charDollar" "remove" "charExclamation" "hp"
## [5] "capitalLong" "our" "capitalAve" "free"
## [9] "george" "edu"
## Number of terminal nodes: 13
## Residual mean deviance: 0.4879 = 2238 / 4588
## Misclassification error rate: 0.08259 = 380 / 4601
Misclassification error rate: 0.08259 = 380 / 4601
#install.packages("randomForest")
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
rf = randomForest( type ~ ., data=spam )
names(rf)
## [1] "call" "type" "predicted" "err.rate"
## [5] "confusion" "votes" "oob.times" "classes"
## [9] "importance" "importanceSD" "localImportance" "proximity"
## [13] "ntree" "mtry" "forest" "y"
## [17] "test" "inbag" "terms"
importance(rf)
## MeanDecreaseGini
## make 7.8978626
## address 11.7285799
## all 27.6757990
## num3d 1.7299575
## our 60.0124292
## over 15.9645788
## remove 166.6937100
## internet 27.7286671
## order 9.7409332
## mail 16.9984869
## receive 20.7983030
## will 22.9667989
## people 7.2867589
## report 4.5789773
## addresses 2.5292543
## free 145.6777294
## business 27.3220002
## email 17.7410217
## you 59.2223406
## credit 10.3568390
## your 131.0754942
## font 5.7979131
## num000 49.9561970
## money 75.4034532
## hp 104.3969645
## hpl 40.5402989
## george 45.3540006
## num650 11.3869206
## lab 4.3991936
## labs 7.3919751
## telnet 4.4924007
## num857 1.5985785
## data 5.9151665
## num415 1.7707652
## num85 7.4793902
## technology 7.6448474
## num1999 28.1301628
## parts 1.0489031
## pm 6.9789365
## direct 2.5130790
## cs 2.2707389
## meeting 11.6676949
## original 3.1639730
## project 3.9801021
## re 20.7317135
## edu 40.9623212
## table 0.5347235
## conference 2.8664228
## charSemicolon 10.5399412
## charRoundbracket 24.6733040
## charSquarebracket 6.0698685
## charExclamation 258.2145015
## charDollar 198.6380084
## charHash 7.6491697
## capitalAve 147.6482807
## capitalLong 126.6579448
## capitalTotal 81.8965802
varImpPlot(rf) # To see which variables are most significant in a tree based classification
rf
##
## Call:
## randomForest(formula = type ~ ., data = spam)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 7
##
## OOB estimate of error rate: 4.54%
## Confusion matrix:
## nonspam spam class.error
## nonspam 2709 79 0.02833572
## spam 130 1683 0.07170436
The lowest classification rate we could get so far
# Searching for the optimal solution
dim(spam)
## [1] 4601 58
Let’s sample m = root of 57, rounded = 8.
rf3 = randomForest(type ~ ., data=spam, mtry=8) #
plot(rf3)
mtry is m, the number of X-variables available at each node
*How many trees to grow? The default is 500, but error is rather flat after 50.
names(rf3)
## [1] "call" "type" "predicted" "err.rate"
## [5] "confusion" "votes" "oob.times" "classes"
## [9] "importance" "importanceSD" "localImportance" "proximity"
## [13] "ntree" "mtry" "forest" "y"
## [17] "test" "inbag" "terms"
We would like to minimize the mean squared error and to maximize R2, the percent of total variation explained by the forest.
which.min(rf3$err.rate)
## [1] 770
rf50 = randomForest(type ~ ., data=spam, mtry=8,ntree=50) #
plot(rf50)
which.min(rf50$err.rate)
## [1] 89
After ntree = 30, the error is becoming flat - it is not anymore decreasing.
rf30 = randomForest(type ~ ., data=spam, mtry=8,ntree=30) #
plot(rf30)
rf30
##
## Call:
## randomForest(formula = type ~ ., data = spam, mtry = 8, ntree = 30)
## Type of random forest: classification
## Number of trees: 30
## No. of variables tried at each split: 8
##
## OOB estimate of error rate: 5.15%
## Confusion matrix:
## nonspam spam class.error
## nonspam 2691 97 0.03479197
## spam 140 1673 0.07722008
OOB estimate of error rate: 5.37% - it went up!
library(MASS)
LDA = lda( type ~ ., data=spam, CV=TRUE )
names(LDA)
## [1] "class" "posterior" "terms" "call" "xlevels"
attach(spam)
## The following objects are masked from spam (pos = 6):
##
## address, addresses, all, business, capitalAve, capitalLong,
## capitalTotal, charDollar, charExclamation, charHash,
## charRoundbracket, charSemicolon, charSquarebracket, conference,
## credit, cs, data, direct, edu, email, font, free, george, hp, hpl,
## internet, lab, labs, mail, make, meeting, money, num000, num1999,
## num3d, num415, num650, num85, num857, order, original, our, over,
## parts, people, pm, project, re, receive, remove, report, table,
## technology, telnet, type, will, you, your
table( type, LDA$class ) #Confusion Matrix
##
## type nonspam spam
## nonspam 2657 131
## spam 391 1422
or
table(spam$type, LDA$class ) #Confusion Matrix
##
## nonspam spam
## nonspam 2657 131
## spam 391 1422
mean(spam$type == LDA$class) # correct classification rate /Accuracy ~ 88.5%
## [1] 0.8865464
mean( type != LDA$class ) # This error rate is higher ~11.5%
## [1] 0.1134536
QDA = qda( type ~ ., data=spam, CV=TRUE )
names(QDA)
## [1] "class" "posterior" "terms" "call" "xlevels"
table( type, QDA$class ) # Confusion Matrix
##
## type nonspam spam
## nonspam 2090 691
## spam 87 1722
mean( type != QDA$class ) # NA appeared - there may be NA values introduced
## [1] NA
summary(QDA$class) # let us see if NA is introduced
## nonspam spam NA's
## 2177 2413 11
Yes! 11 NA values are introduced. There are missing values, predictions not computed by QDA. We’ll remove these points in the following way
mean( type != QDA$class, na.rm=TRUE ) # This error rate is even higher ~17%
## [1] 0.1694989
No improvement with QDA; linear models are ok.
##GEOMETRY of LDA and QDA
head(spam)
## make address all num3d our over remove internet order mail receive will
## 1 0.00 0.64 0.64 0 0.32 0.00 0.00 0.00 0.00 0.00 0.00 0.64
## 2 0.21 0.28 0.50 0 0.14 0.28 0.21 0.07 0.00 0.94 0.21 0.79
## 3 0.06 0.00 0.71 0 1.23 0.19 0.19 0.12 0.64 0.25 0.38 0.45
## 4 0.00 0.00 0.00 0 0.63 0.00 0.31 0.63 0.31 0.63 0.31 0.31
## 5 0.00 0.00 0.00 0 0.63 0.00 0.31 0.63 0.31 0.63 0.31 0.31
## 6 0.00 0.00 0.00 0 1.85 0.00 0.00 1.85 0.00 0.00 0.00 0.00
## people report addresses free business email you credit your font num000
## 1 0.00 0.00 0.00 0.32 0.00 1.29 1.93 0.00 0.96 0 0.00
## 2 0.65 0.21 0.14 0.14 0.07 0.28 3.47 0.00 1.59 0 0.43
## 3 0.12 0.00 1.75 0.06 0.06 1.03 1.36 0.32 0.51 0 1.16
## 4 0.31 0.00 0.00 0.31 0.00 0.00 3.18 0.00 0.31 0 0.00
## 5 0.31 0.00 0.00 0.31 0.00 0.00 3.18 0.00 0.31 0 0.00
## 6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0 0.00
## money hp hpl george num650 lab labs telnet num857 data num415 num85
## 1 0.00 0 0 0 0 0 0 0 0 0 0 0
## 2 0.43 0 0 0 0 0 0 0 0 0 0 0
## 3 0.06 0 0 0 0 0 0 0 0 0 0 0
## 4 0.00 0 0 0 0 0 0 0 0 0 0 0
## 5 0.00 0 0 0 0 0 0 0 0 0 0 0
## 6 0.00 0 0 0 0 0 0 0 0 0 0 0
## technology num1999 parts pm direct cs meeting original project re edu
## 1 0 0.00 0 0 0.00 0 0 0.00 0 0.00 0.00
## 2 0 0.07 0 0 0.00 0 0 0.00 0 0.00 0.00
## 3 0 0.00 0 0 0.06 0 0 0.12 0 0.06 0.06
## 4 0 0.00 0 0 0.00 0 0 0.00 0 0.00 0.00
## 5 0 0.00 0 0 0.00 0 0 0.00 0 0.00 0.00
## 6 0 0.00 0 0 0.00 0 0 0.00 0 0.00 0.00
## table conference charSemicolon charRoundbracket charSquarebracket
## 1 0 0 0.00 0.000 0
## 2 0 0 0.00 0.132 0
## 3 0 0 0.01 0.143 0
## 4 0 0 0.00 0.137 0
## 5 0 0 0.00 0.135 0
## 6 0 0 0.00 0.223 0
## charExclamation charDollar charHash capitalAve capitalLong capitalTotal type
## 1 0.778 0.000 0.000 3.756 61 278 spam
## 2 0.372 0.180 0.048 5.114 101 1028 spam
## 3 0.276 0.184 0.010 9.821 485 2259 spam
## 4 0.137 0.000 0.000 3.537 40 191 spam
## 5 0.135 0.000 0.000 3.537 40 191 spam
## 6 0.000 0.000 0.000 3.000 15 54 spam
plot( your, address, col = type)
library(e1071)
SVM = svm( type ~ ., data=spam, kernel="linear" )
summary(SVM)
##
## Call:
## svm(formula = type ~ ., data = spam, kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
##
## Number of Support Vectors: 940
##
## ( 456 484 )
##
##
## Number of Classes: 2
##
## Levels:
## nonspam spam
940 support vectors (not cleanly classified) out of 4601 data points.
dim(spam)
## [1] 4601 58
class = predict(SVM)
table(class,type)
## type
## class nonspam spam
## nonspam 2663 185
## spam 125 1628
mean( class != type ) # This error rate is so far the lowest ~6.7%
## [1] 0.06737666
# thus we can predict the spam detection using support vectors machine algorithm reasonably.
#let us check other SVM kernels
SVMPoly = svm( type ~ ., data=spam, kernel="polynomial" )
summary(SVMPoly)
##
## Call:
## svm(formula = type ~ ., data = spam, kernel = "polynomial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 3
## coef.0: 0
##
## Number of Support Vectors: 2561
##
## ( 1250 1311 )
##
##
## Number of Classes: 2
##
## Levels:
## nonspam spam
2561 support vectors (not cleanly classified) out of 4601 data points.
SVMradial = svm( type ~ ., data=spam, kernel="radial" )
summary(SVMradial)
##
## Call:
## svm(formula = type ~ ., data = spam, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 1275
##
## ( 595 680 )
##
##
## Number of Classes: 2
##
## Levels:
## nonspam spam
1275 support vectors (not cleanly classified) out of 4601 data points.
SVMsigmoid = svm( type ~ ., data=spam, kernel="sigmoid" )
summary(SVMsigmoid)
##
## Call:
## svm(formula = type ~ ., data = spam, kernel = "sigmoid")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: sigmoid
## cost: 1
## coef.0: 0
##
## Number of Support Vectors: 1038
##
## ( 517 521 )
##
##
## Number of Classes: 2
##
## Levels:
## nonspam spam
1038 support vectors (not cleanly classified) out of 4601 data points.
The “cost” option specifies the cost of violating the margin. We can try costs 0.001, 0.01, 0.1, 1, 10, 100, 1000:
Stuned = tune( svm,type ~ ., data=spam, kernel="linear", ranges=list(cost=10^seq(-3,3)) )
summary(Stuned)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.06846223
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-03 0.10758512 0.01588266
## 2 1e-02 0.08215599 0.01441233
## 3 1e-01 0.07498302 0.01284616
## 4 1e+00 0.07215599 0.01277532
## 5 1e+01 0.06846223 0.01035335
## 6 1e+02 0.07063614 0.01004829
## 7 1e+03 0.08346176 0.01298136