library(e1071)
## Warning: package 'e1071' was built under R version 4.3.3
data(iris)
set.seed(1234)
rndSample <- sample(1:nrow(iris), 100)
tr <- iris[rndSample, ]
ts <- iris[-rndSample, ]
s <- svm(Species ~ ., tr)
ps <- predict (s, ts)
(cm <- table(ps, ts$Species))
##             
## ps           setosa versicolor virginica
##   setosa         18          0         0
##   versicolor      0         17         1
##   virginica       0          1        13
precision <- 100*(1-sum(diag(cm))/sum(cm)) #the error rate

#try different numbers for cost and degree and see which gives lowest error
s2 <- svm(Species ~., tr, cost=30, kernel = "polynomial", degree = 1)
ps2 <- predict (s2, ts)
cm2 <- table (ps2, ts$Species)
#the error rate
100*(1-sum(diag(cm2))/sum(cm2))
## [1] 0
data(Boston, package='MASS')
set.seed(1234)
sp<-sample(1:nrow(Boston), 354)
tr<-Boston[sp, ]
ts<-Boston[-sp, ]
s1<-svm(medv ~., tr)
ps1<-predict(s1, ts)
mean(abs(ps1-ts$medv))
## [1] 2.158999
s2<-svm(medv~., tr, kernel="radial", cost=15, epsilon=.1, gamma=.04)
ps2<-predict(s2,ts)
mean(abs(ps2-ts$medv))
## [1] 1.904667
library(readr)
letterdata <- read_csv("C:/Users/dnred/Downloads/letterdata.csv")
## Rows: 20000 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): letter
## dbl (16): xbox, ybox, width, height, onpix, xbar, ybar, x2bar, y2bar, xybar,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(letterdata)

library(kernlab)
str(letterdata)
## spc_tbl_ [20,000 × 17] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ letter: chr [1:20000] "T" "I" "D" "N" ...
##  $ xbox  : num [1:20000] 2 5 4 7 2 4 4 1 2 11 ...
##  $ ybox  : num [1:20000] 8 12 11 11 1 11 2 1 2 15 ...
##  $ width : num [1:20000] 3 3 6 6 3 5 5 3 4 13 ...
##  $ height: num [1:20000] 5 7 8 6 1 8 4 2 4 9 ...
##  $ onpix : num [1:20000] 1 2 6 3 1 3 4 1 2 7 ...
##  $ xbar  : num [1:20000] 8 10 10 5 8 8 8 8 10 13 ...
##  $ ybar  : num [1:20000] 13 5 6 9 6 8 7 2 6 2 ...
##  $ x2bar : num [1:20000] 0 5 2 4 6 6 6 2 2 6 ...
##  $ y2bar : num [1:20000] 6 4 6 6 6 9 6 2 6 2 ...
##  $ xybar : num [1:20000] 6 13 10 4 6 5 7 8 12 12 ...
##  $ x2ybar: num [1:20000] 10 3 3 4 5 6 6 2 4 1 ...
##  $ xy2bar: num [1:20000] 8 9 7 10 9 6 6 8 8 9 ...
##  $ xedge : num [1:20000] 0 2 3 6 1 0 2 1 1 8 ...
##  $ xedgey: num [1:20000] 8 8 7 10 7 8 8 6 6 1 ...
##  $ yedge : num [1:20000] 0 4 3 2 5 9 7 2 1 1 ...
##  $ yedgex: num [1:20000] 8 10 9 8 10 7 10 7 7 8 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   letter = col_character(),
##   ..   xbox = col_double(),
##   ..   ybox = col_double(),
##   ..   width = col_double(),
##   ..   height = col_double(),
##   ..   onpix = col_double(),
##   ..   xbar = col_double(),
##   ..   ybar = col_double(),
##   ..   x2bar = col_double(),
##   ..   y2bar = col_double(),
##   ..   xybar = col_double(),
##   ..   x2ybar = col_double(),
##   ..   xy2bar = col_double(),
##   ..   xedge = col_double(),
##   ..   xedgey = col_double(),
##   ..   yedge = col_double(),
##   ..   yedgex = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
letters <- letterdata
letters$letter <- as.factor(letters$letter)
str(letters)
## spc_tbl_ [20,000 × 17] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ letter: Factor w/ 26 levels "A","B","C","D",..: 20 9 4 14 7 19 2 1 10 13 ...
##  $ xbox  : num [1:20000] 2 5 4 7 2 4 4 1 2 11 ...
##  $ ybox  : num [1:20000] 8 12 11 11 1 11 2 1 2 15 ...
##  $ width : num [1:20000] 3 3 6 6 3 5 5 3 4 13 ...
##  $ height: num [1:20000] 5 7 8 6 1 8 4 2 4 9 ...
##  $ onpix : num [1:20000] 1 2 6 3 1 3 4 1 2 7 ...
##  $ xbar  : num [1:20000] 8 10 10 5 8 8 8 8 10 13 ...
##  $ ybar  : num [1:20000] 13 5 6 9 6 8 7 2 6 2 ...
##  $ x2bar : num [1:20000] 0 5 2 4 6 6 6 2 2 6 ...
##  $ y2bar : num [1:20000] 6 4 6 6 6 9 6 2 6 2 ...
##  $ xybar : num [1:20000] 6 13 10 4 6 5 7 8 12 12 ...
##  $ x2ybar: num [1:20000] 10 3 3 4 5 6 6 2 4 1 ...
##  $ xy2bar: num [1:20000] 8 9 7 10 9 6 6 8 8 9 ...
##  $ xedge : num [1:20000] 0 2 3 6 1 0 2 1 1 8 ...
##  $ xedgey: num [1:20000] 8 8 7 10 7 8 8 6 6 1 ...
##  $ yedge : num [1:20000] 0 4 3 2 5 9 7 2 1 1 ...
##  $ yedgex: num [1:20000] 8 10 9 8 10 7 10 7 7 8 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   letter = col_character(),
##   ..   xbox = col_double(),
##   ..   ybox = col_double(),
##   ..   width = col_double(),
##   ..   height = col_double(),
##   ..   onpix = col_double(),
##   ..   xbar = col_double(),
##   ..   ybar = col_double(),
##   ..   x2bar = col_double(),
##   ..   y2bar = col_double(),
##   ..   xybar = col_double(),
##   ..   x2ybar = col_double(),
##   ..   xy2bar = col_double(),
##   ..   xedge = col_double(),
##   ..   xedgey = col_double(),
##   ..   yedge = col_double(),
##   ..   yedgex = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
letters_train <- letters[1:16000, ]
letters_test <- letters[16001:20000, ]
letter_classifier <- ksvm(letter~., data=letters_train, kernel="vanilladot") #~., means predict response against all the variables
##  Setting default kernel parameters
letter_predictions<-predict(letter_classifier, letters_test)
head(letter_predictions)
## [1] U N V X N H
## Levels: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
table(letter_predictions, letters_test$letter)
##                   
## letter_predictions   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
##                  A 144   0   0   0   0   0   0   0   0   1   0   0   1   2   2
##                  B   0 121   0   5   2   0   1   2   0   0   1   0   1   0   0
##                  C   0   0 120   0   4   0  10   2   2   0   1   3   0   0   2
##                  D   2   2   0 156   0   1   3  10   4   3   4   3   0   5   5
##                  E   0   0   5   0 127   3   1   1   0   0   3   4   0   0   0
##                  F   0   0   0   0   0 138   2   2   6   0   0   0   0   0   0
##                  G   1   1   2   1   9   2 123   2   0   0   1   2   1   0   1
##                  H   0   0   0   1   0   1   0 102   0   2   3   2   3   4  20
##                  I   0   1   0   0   0   1   0   0 141   8   0   0   0   0   0
##                  J   0   1   0   0   0   1   0   2   5 128   0   0   0   0   1
##                  K   1   1   9   0   0   0   2   5   0   0 118   0   0   2   0
##                  L   0   0   0   0   2   0   1   1   0   0   0 133   0   0   0
##                  M   0   0   1   1   0   0   1   1   0   0   0   0 135   4   0
##                  N   0   0   0   0   0   1   0   1   0   0   0   0   0 145   0
##                  O   1   0   2   1   0   0   1   2   0   1   0   0   0   1  99
##                  P   0   0   0   1   0   2   1   0   0   0   0   0   0   0   2
##                  Q   0   0   0   0   0   0   8   2   0   0   0   3   0   0   3
##                  R   0   7   0   0   1   0   3   8   0   0  13   0   0   1   1
##                  S   1   1   0   0   1   0   3   0   1   1   0   1   0   0   0
##                  T   0   0   0   0   3   2   0   0   0   0   1   0   0   0   0
##                  U   1   0   3   1   0   0   0   2   0   0   0   0   0   0   1
##                  V   0   0   0   0   0   1   3   4   0   0   0   0   1   2   1
##                  W   0   0   0   0   0   0   1   0   0   0   0   0   2   0   0
##                  X   0   1   0   0   2   0   0   1   3   0   1   6   0   0   1
##                  Y   3   0   0   0   0   0   0   1   0   0   0   0   0   0   0
##                  Z   2   0   0   0   1   0   0   0   3   4   0   0   0   0   0
##                   
## letter_predictions   P   Q   R   S   T   U   V   W   X   Y   Z
##                  A   0   5   0   1   1   1   0   1   0   0   1
##                  B   2   2   3   5   0   0   2   0   1   0   0
##                  C   0   0   0   0   0   0   0   0   0   0   0
##                  D   3   1   4   0   0   0   0   0   3   3   1
##                  E   0   2   0  10   0   0   0   0   2   0   3
##                  F  16   0   0   3   0   0   1   0   1   2   0
##                  G   2   8   2   4   3   0   0   0   1   0   0
##                  H   0   2   3   0   3   0   2   0   0   1   0
##                  I   1   0   0   3   0   0   0   0   5   1   1
##                  J   1   3   0   2   0   0   0   0   1   0   6
##                  K   1   0   7   0   1   3   0   0   5   0   0
##                  L   0   1   0   5   0   0   0   0   0   0   1
##                  M   0   0   0   0   0   3   0   8   0   0   0
##                  N   0   0   3   0   0   1   0   2   0   0   0
##                  O   3   3   0   0   0   3   0   0   0   0   0
##                  P 130   0   0   0   0   0   0   0   0   1   0
##                  Q   1 124   0   5   0   0   0   0   0   2   0
##                  R   1   0 138   0   1   0   1   0   0   0   0
##                  S   0  14   0 101   3   0   0   0   2   0  10
##                  T   0   0   0   3 133   1   0   0   0   2   2
##                  U   0   0   0   0   0 152   0   0   1   1   0
##                  V   0   3   1   0   0   0 126   1   0   4   0
##                  W   0   0   0   0   0   4   4 127   0   0   0
##                  X   0   0   0   1   0   0   0   0 137   1   1
##                  Y   7   0   0   0   3   0   0   0   0 127   0
##                  Z   0   0   0  18   3   0   0   0   0   0 132
#diagonal values indicate the total number of records where the predicted letter matches the true value
agreement<-letter_predictions==letters_test$letter
table(agreement)
## agreement
## FALSE  TRUE 
##   643  3357
prop.table(table(agreement))
## agreement
##   FALSE    TRUE 
## 0.16075 0.83925
letter_classifier_rbf<-ksvm(letter~.,data=letters_train, kernel="rbfdot")
letter_predictions_rbf<-predict(letter_classifier_rbf, letters_test)