SVM for letterdata

getwd()
[1] "C:/Users/icy/Desktop/Learn R"
setwd("C:/Users/icy/Desktop/Learn R")
ldata=read.csv("letterdata.csv")
str(ldata)
'data.frame':   20000 obs. of  17 variables:
 $ letter: Factor w/ 26 levels "A","B","C","D",..: 20 9 4 14 7 19 2 1 10 13 ...
 $ xbox  : int  2 5 4 7 2 4 4 1 2 11 ...
 $ ybox  : int  8 12 11 11 1 11 2 1 2 15 ...
 $ width : int  3 3 6 6 3 5 5 3 4 13 ...
 $ height: int  5 7 8 6 1 8 4 2 4 9 ...
 $ onpix : int  1 2 6 3 1 3 4 1 2 7 ...
 $ xbar  : int  8 10 10 5 8 8 8 8 10 13 ...
 $ ybar  : int  13 5 6 9 6 8 7 2 6 2 ...
 $ x2bar : int  0 5 2 4 6 6 6 2 2 6 ...
 $ y2bar : int  6 4 6 6 6 9 6 2 6 2 ...
 $ xybar : int  6 13 10 4 6 5 7 8 12 12 ...
 $ x2ybar: int  10 3 3 4 5 6 6 2 4 1 ...
 $ xy2bar: int  8 9 7 10 9 6 6 8 8 9 ...
 $ xedge : int  0 2 3 6 1 0 2 1 1 8 ...
 $ xedgey: int  8 8 7 10 7 8 8 6 6 1 ...
 $ yedge : int  0 4 3 2 5 9 7 2 1 1 ...
 $ yedgex: int  8 10 9 8 10 7 10 7 7 8 ...
summary(ldata)
     letter           xbox             ybox            width            height           onpix       
 U      :  813   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
 D      :  805   1st Qu.: 3.000   1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.: 4.000   1st Qu.: 2.000  
 P      :  803   Median : 4.000   Median : 7.000   Median : 5.000   Median : 6.000   Median : 3.000  
 T      :  796   Mean   : 4.024   Mean   : 7.035   Mean   : 5.122   Mean   : 5.372   Mean   : 3.506  
 M      :  792   3rd Qu.: 5.000   3rd Qu.: 9.000   3rd Qu.: 6.000   3rd Qu.: 7.000   3rd Qu.: 5.000  
 A      :  789   Max.   :15.000   Max.   :15.000   Max.   :15.000   Max.   :15.000   Max.   :15.000  
 (Other):15202                                                                                       
      xbar             ybar          x2bar            y2bar            xybar            x2ybar      
 Min.   : 0.000   Min.   : 0.0   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
 1st Qu.: 6.000   1st Qu.: 6.0   1st Qu.: 3.000   1st Qu.: 4.000   1st Qu.: 7.000   1st Qu.: 5.000  
 Median : 7.000   Median : 7.0   Median : 4.000   Median : 5.000   Median : 8.000   Median : 6.000  
 Mean   : 6.898   Mean   : 7.5   Mean   : 4.629   Mean   : 5.179   Mean   : 8.282   Mean   : 6.454  
 3rd Qu.: 8.000   3rd Qu.: 9.0   3rd Qu.: 6.000   3rd Qu.: 7.000   3rd Qu.:10.000   3rd Qu.: 8.000  
 Max.   :15.000   Max.   :15.0   Max.   :15.000   Max.   :15.000   Max.   :15.000   Max.   :15.000  
                                                                                                    
     xy2bar           xedge            xedgey           yedge            yedgex      
 Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
 1st Qu.: 7.000   1st Qu.: 1.000   1st Qu.: 8.000   1st Qu.: 2.000   1st Qu.: 7.000  
 Median : 8.000   Median : 3.000   Median : 8.000   Median : 3.000   Median : 8.000  
 Mean   : 7.929   Mean   : 3.046   Mean   : 8.339   Mean   : 3.692   Mean   : 7.801  
 3rd Qu.: 9.000   3rd Qu.: 4.000   3rd Qu.: 9.000   3rd Qu.: 5.000   3rd Qu.: 9.000  
 Max.   :15.000   Max.   :15.000   Max.   :15.000   Max.   :15.000   Max.   :15.000  
                                                                                     
library(tidyverse)
package <U+393C><U+3E31>tidyverse<U+393C><U+3E32> was built under R version 3.2.5Note: the specification for S3 class <U+393C><U+3E33>difftime<U+393C><U+3E34> in package <U+393C><U+3E31>hms<U+393C><U+3E32> seems equivalent to one from package <U+393C><U+3E31>DBI<U+393C><U+3E32>: not turning on duplicate class definitions for this class.
Note: the specification for S3 class <U+393C><U+3E33>AsIs<U+393C><U+3E34> in package <U+393C><U+3E31>jsonlite<U+393C><U+3E32> seems equivalent to one from package <U+393C><U+3E31>DBI<U+393C><U+3E32>: not turning on duplicate class definitions for this class.
Note: the specification for S3 class <U+393C><U+3E33>difftime<U+393C><U+3E34> in package <U+393C><U+3E31>lubridate<U+393C><U+3E32> seems equivalent to one from package <U+393C><U+3E31>DBI<U+393C><U+3E32>: not turning on duplicate class definitions for this class.
Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
package <U+393C><U+3E31>ggplot2<U+393C><U+3E32> was built under R version 3.2.5package <U+393C><U+3E31>tibble<U+393C><U+3E32> was built under R version 3.2.5package <U+393C><U+3E31>tidyr<U+393C><U+3E32> was built under R version 3.2.5package <U+393C><U+3E31>readr<U+393C><U+3E32> was built under R version 3.2.5package <U+393C><U+3E31>purrr<U+393C><U+3E32> was built under R version 3.2.5package <U+393C><U+3E31>dplyr<U+393C><U+3E32> was built under R version 3.2.5Conflicts with tidy packages -----------------------------------------------------------------------
filter(): dplyr, stats
lag():    dplyr, stats
library(caret)
package <U+393C><U+3E31>caret<U+393C><U+3E32> was built under R version 3.2.5Loading required package: lattice
Note: the specification for S3 class <U+393C><U+3E33>family<U+393C><U+3E34> in package <U+393C><U+3E31>MatrixModels<U+393C><U+3E32> seems equivalent to one from package <U+393C><U+3E31>lme4<U+393C><U+3E32>: not turning on duplicate class definitions for this class.

Attaching package: <U+393C><U+3E31>caret<U+393C><U+3E32>

The following object is masked from <U+393C><U+3E31>package:purrr<U+393C><U+3E32>:

    lift
index=createDataPartition(ldata$letter,p=0.8,list=FALSE)
train_letter=ldata[index,]
test_letter=ldata[-index,]

use a package called kernlab

letter_svm
Support Vector Machine object of class "ksvm" 

SV type: C-svc  (classification) 
 parameter : cost C = 1 

Linear (vanilla) kernel function. 

Number of Support Vectors : 7111 

Objective Function Value : -18.6087 -20.8582 -26.668 -5.1723 -10.3891 -32.0694 -58.2625 -19.2452 -56.9296 -33.0767 -16.2863 -33.8415 -36.5354 -54.8804 -10.054 -36.8408 -34.4037 -17.8865 -13.3825 -34.9807 -31.508 -8.9759 -12.4229 -39.6089 -12.8078 -8.1237 -157.026 -45.64 -67.2166 -121.9172 -158.9839 -59.4497 -48.0067 -67.6814 -28.2933 -28.8665 -25.9533 -37.6157 -44.661 -118.3237 -186.6086 -210.3946 -20.1156 -9.3052 -62.5243 -11.7271 -55.4564 -10.0277 -20.2575 -7.8247 -115.3387 -25.9461 -230.4933 -72.0279 -8.0863 -4.5609 -137.2144 -79.6241 -17.6702 -13.1622 -74.6711 -12.1711 -28.9402 -17.3269 -20.0291 -22.9013 -49.1707 -9.2169 -5.0213 -14.2506 -4.6712 -4.6656 -7.2932 -37.5912 -47.5064 -181.4255 -49.6158 -44.6401 -39.7724 -16.8354 -19.5784 -85.5915 -99.2012 -36.9984 -32.7984 -118.8794 -29.8076 -26.0421 -35.0803 -17.4023 -5.0412 -42.308 -7.3711 -17.9997 -55.2773 -141.9599 -48.0394 -40.1792 -36.5803 -72.8561 -119.3986 -8.672 -5.6144 -12.6147 -29.4895 -129.8038 -43.9596 -174.1037 -89.717 -8.4932 -15.4301 -3.2427 -58.4606 -7.6616 -97.0397 -49.1021 -93.1863 -78.1754 -69.5294 -20.2119 -13.2502 -8.2241 -27.0198 -13.8202 -251.7534 -29.186 -26.6998 -134.9192 -140.2855 -9.2257 -37.6259 -6.6605 -55.3281 -70.0511 -32.4258 -213.6153 -28.8569 -16.8404 -129.7384 -175.3738 -42.2916 -24.0891 -146.8322 -87.0628 -337.1914 -145.6859 -145.3664 -33.6332 -30.2419 -56.0353 -25.8858 -46.267 -6.8555 -12.0445 -35.5726 -65.3854 -188.7138 -54.8911 -94.9918 -151.2312 -598.4124 -128.7064 -141.863 -319.1336 -34.2674 -66.1976 -155.0092 -125.405 -37.6525 -66.487 -52.6632 -7.8975 -229.4488 -15.2649 -42.8023 -1.9334 -7.3521 -15.501 -25.6151 -61.0721 -23.4658 -188.9469 -24.4591 -4.2232 -4.5194 -0.8287 -124.6983 -8.5829 -77.4323 -19.7557 -13.7904 -5.7424 -16.5662 -30.1795 -24.462 -86.4478 -25.9641 -98.4674 -15.0772 -10.3766 -7.9588 -1.7603 -82.9371 -7.8094 -108.7857 -103.3466 -44.9908 -27.5022 -60.6222 -25.9253 -52.2885 -248.2756 -36.022 -46.643 -33.0551 -19.1138 -11.2341 -130.6471 -5.829 -5.2044 -8.2181 -13.902 -25.6998 -23.1363 -149.3111 -37.0495 -97.7243 -34.3134 -13.4788 -10.2387 -2.8857 -99.0203 -7.66 -15.1167 -69.8241 -104.1519 -13.6087 -14.6342 -56.5432 -2.7099 -7.7852 -81.761 -38.0888 -103.3055 -4.625 -7.9284 -1.1873 -97.3704 -18.9703 -10.2893 -53.0391 -3.4612 -18.4687 -67.2429 -50.4526 -56.9285 -5.7611 -19.5804 -2.1773 -76.3193 -116.8639 -113.6003 -26.7688 -19.6599 -53.1117 -37.3493 -58.7851 -25.7271 -6.4058 -4.2579 -57.4593 -34.3957 -59.537 -29.1546 -5.7413 -56.7806 -12.9309 -21.7818 -65.8739 -4.7988 -57.3379 -232.308 -15.7156 -11.1053 -15.9073 -8.2365 -74.0576 -16.6418 -41.8188 -46.1307 -25.3015 -14.1329 -48.4203 -16.9153 -66.6833 -5.8405 -5.6308 -92.9946 -3.6888 -7.0958 -1.125 -132.4024 -26.6408 -382.2582 -26.4193 -34.0209 -5.0488 -79.3392 -132.0235 -83.6394 -26.1943 -41.4468 -12.6686 -22.9893 -1.8363 -61.0521 -8.8424 -159.6874 -1.7799 -2.0123 -10.4057 -0.4891 -28.1062 -31.535 -6.3504 
Training error : 0.132713 

find the test error

letter_pred=predict(letter_svm,test_letter)
table(letter_pred,test_letter$letter)
           
letter_pred   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O   P   Q   R   S   T   U   V
          A 147   0   0   0   0   0   0   0   0   8   0   1   3   2   2   0   3   0   0   0   2   0
          B   0 139   0   6   3   1   1   2   1   0   2   0   1   0   0   0   2   6   8   1   1   1
          C   0   0 128   0   3   1   5   2   0   0   4   0   0   1   1   0   1   0   0   0   0   0
          D   0   2   0 143   0   2   7  12   3   3   4   1   0   2   7   3   1   3   1   1   0   0
          E   0   0   2   0 118   1   2   1   1   0   0   6   0   0   0   1   4   0   7   1   0   0
          F   0   0   1   0   1 137   0   1   1   5   0   0   0   0   0  14   0   0   1   2   0   0
          G   0   2   1   0  12   2 121   3   1   0   1   2   1   0   2   3   6   2   4   1   0   2
          H   0   1   1   1   0   0   0  88   0   0   0   0   2   4  17   2   2   1   0   2   1   0
          I   0   0   0   0   0   0   0   0 133   8   0   0   0   0   0   0   0   0   3   0   0   0
          J   3   0   0   0   0   1   0   0   4 120   0   0   0   0   0   0   1   0   0   0   0   0
          K   4   1   9   0   0   1   3   5   0   0 121   1   0   0   0   0   0   9   0   0   2   0
          L   0   0   0   0   4   0   0   0   1   0   3 136   0   0   0   0   1   0   4   0   0   0
          M   0   0   0   0   0   0   1   1   0   0   0   0 143   0   2   0   0   0   0   0   5   0
          N   0   0   0   3   0   1   0   1   0   0   0   0   2 144   0   0   0   2   0   0   0   0
          O   0   0   4   1   0   0   2   8   0   0   0   0   0   1 112   1   3   1   2   0   1   0
          P   0   0   0   0   0   4   1   0   0   0   0   0   0   0   1 133   1   0   0   1   0   0
          Q   0   0   0   0   2   0   2   2   0   0   0   3   0   0   2   0 121   0   4   0   0   0
          R   0   7   0   6   4   0   0  15   0   1  11   0   0   1   0   0   0 126   2   1   0   1
          S   1   1   0   0   0   3   5   0   3   2   0   1   0   0   0   0   8   0 108   2   0   0
          T   1   0   1   0   3   0   0   1   0   0   0   0   0   0   0   0   0   1   0 141   0   2
          U   0   0   0   1   0   1   1   1   0   0   0   0   0   1   0   1   0   0   0   1 150   0
          V   1   0   0   0   0   0   2   0   0   0   0   0   0   0   0   0   0   0   0   0   0 140
          W   0   0   0   0   0   0   1   0   0   0   0   0   6   0   4   0   0   0   0   0   0   4
          X   0   0   0   0   2   0   0   2   1   1   1   1   0   0   0   0   1   0   0   0   0   0
          Y   0   0   0   0   0   0   0   1   1   0   0   0   0   0   0   2   0   0   0   4   0   2
          Z   0   0   0   0   1   0   0   0   1   1   0   0   0   0   0   0   1   0   5   1   0   0
           
letter_pred   W   X   Y   Z
          A   0   0   1   1
          B   0   0   0   0
          C   0   0   0   0
          D   0   2   2   1
          E   0   3   0   8
          F   0   0   1   0
          G   1   1   0   0
          H   0   0   0   0
          I   0   5   0   0
          J   0   1   0   6
          K   0   1   0   0
          L   0   1   0   0
          M  10   0   1   0
          N   1   0   0   0
          O   1   0   0   0
          P   0   0   1   0
          Q   0   0   1   1
          R   1   0   0   0
          S   0   0   0   9
          T   0   0   4   2
          U   1   1   0   0
          V   0   0   4   0
          W 135   0   0   0
          X   0 142   1   0
          Y   0   0 141   0
          Z   0   0   0 118

calculate total accuracy

agree%>%table%>%prop.table
.
    FALSE      TRUE 
0.1512036 0.8487964 

could we improve the accuracy further?

letter_svm2=ksvm(letter~.,data=train_letter,kernel="rbfdot",C=10)
letter_pred2=predict(letter_svm2,test_letter)
agree2<- letter_pred2==test_letter$letter
agree2%>%table%>%prop.table()
.
     FALSE       TRUE 
0.03284855 0.96715145 

this is amazing

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIyBTVk0gZm9yIGxldHRlcmRhdGEgDQoNCmBgYHtyfQ0KZ2V0d2QoKQ0Kc2V0d2QoIkM6L1VzZXJzL2ljeS9EZXNrdG9wL0xlYXJuIFIiKQ0KDQpsZGF0YT1yZWFkLmNzdigibGV0dGVyZGF0YS5jc3YiKQ0KDQpzdHIobGRhdGEpDQpzdW1tYXJ5KGxkYXRhKQ0KDQpgYGANCg0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShjYXJldCkNCg0KDQppbmRleD1jcmVhdGVEYXRhUGFydGl0aW9uKGxkYXRhJGxldHRlcixwPTAuOCxsaXN0PUZBTFNFKQ0KDQp0cmFpbl9sZXR0ZXI9bGRhdGFbaW5kZXgsXQ0KDQp0ZXN0X2xldHRlcj1sZGF0YVstaW5kZXgsXQ0KDQpgYGANCg0KDQojIyB1c2UgYSBwYWNrYWdlIGNhbGxlZCBrZXJubGFiDQpgYGB7cn0NCiNpbnN0YWxsLnBhY2thZ2VzKCJrZXJubGFiIikNCmxpYnJhcnkoa2VybmxhYikNCg0KbGV0dGVyX3N2bT1rc3ZtKGxldHRlcn4uLGRhdGE9dHJhaW5fbGV0dGVyLGtlcm5lbD0idmFuaWxsYWRvdCIpDQoNCmxldHRlcl9zdm0NCg0KDQpgYGANCiMgZmluZCB0aGUgdGVzdCBlcnJvcg0KDQpgYGB7cn0NCg0KbGV0dGVyX3ByZWQ9cHJlZGljdChsZXR0ZXJfc3ZtLHRlc3RfbGV0dGVyKQ0KdGFibGUobGV0dGVyX3ByZWQsdGVzdF9sZXR0ZXIkbGV0dGVyKQ0KDQoNCg0KDQpgYGANCiMjIGNhbGN1bGF0ZSB0b3RhbCBhY2N1cmFjeQ0KYGBge3J9DQoNCmFncmVlPC1sZXR0ZXJfcHJlZD09dGVzdF9sZXR0ZXIkbGV0dGVyDQoNCiNpbnN0YWxsLnBhY2thZ2VzKCJtYWdyaXR0ciIpDQoNCmxpYnJhcnkobWFncml0dHIpDQphZ3JlZSU+JXRhYmxlJT4lcHJvcC50YWJsZQ0KDQpgYGANCg0KDQojIyBjb3VsZCB3ZSBpbXByb3ZlIHRoZSBhY2N1cmFjeSBmdXJ0aGVyPw0KDQpgYGB7cn0NCmxldHRlcl9zdm0yPWtzdm0obGV0dGVyfi4sZGF0YT10cmFpbl9sZXR0ZXIsa2VybmVsPSJyYmZkb3QiLEM9MTApDQoNCmxldHRlcl9wcmVkMj1wcmVkaWN0KGxldHRlcl9zdm0yLHRlc3RfbGV0dGVyKQ0KDQoNCmFncmVlMjwtIGxldHRlcl9wcmVkMj09dGVzdF9sZXR0ZXIkbGV0dGVyDQoNCmFncmVlMiU+JXRhYmxlJT4lcHJvcC50YWJsZSgpDQpgYGANCg0KDQojIyB0aGlzIGlzIGFtYXppbmcNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg==