library(e1071)
## Warning: package 'e1071' was built under R version 4.3.3
data(iris)
set.seed(1234)
rndSample <- sample(1:nrow(iris), 100)
tr <- iris[rndSample, ]
ts <- iris[-rndSample, ]
s <- svm(Species ~ ., tr)
ps <- predict (s, ts)
(cm <- table(ps, ts$Species))
##
## ps setosa versicolor virginica
## setosa 18 0 0
## versicolor 0 17 1
## virginica 0 1 13
precision <- 100*(1-sum(diag(cm))/sum(cm)) #the error rate
#try different numbers for cost and degree and see which gives lowest error
s2 <- svm(Species ~., tr, cost=30, kernel = "polynomial", degree = 1)
ps2 <- predict (s2, ts)
cm2 <- table (ps2, ts$Species)
#the error rate
100*(1-sum(diag(cm2))/sum(cm2))
## [1] 0
data(Boston, package='MASS')
set.seed(1234)
sp<-sample(1:nrow(Boston), 354)
tr<-Boston[sp, ]
ts<-Boston[-sp, ]
s1<-svm(medv ~., tr)
ps1<-predict(s1, ts)
mean(abs(ps1-ts$medv))
## [1] 2.158999
s2<-svm(medv~., tr, kernel="radial", cost=15, epsilon=.1, gamma=.04)
ps2<-predict(s2,ts)
mean(abs(ps2-ts$medv))
## [1] 1.904667
library(readr)
letterdata <- read_csv("C:/Users/dnred/Downloads/letterdata.csv")
## Rows: 20000 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): letter
## dbl (16): xbox, ybox, width, height, onpix, xbar, ybar, x2bar, y2bar, xybar,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(letterdata)
library(kernlab)
str(letterdata)
## spc_tbl_ [20,000 × 17] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ letter: chr [1:20000] "T" "I" "D" "N" ...
## $ xbox : num [1:20000] 2 5 4 7 2 4 4 1 2 11 ...
## $ ybox : num [1:20000] 8 12 11 11 1 11 2 1 2 15 ...
## $ width : num [1:20000] 3 3 6 6 3 5 5 3 4 13 ...
## $ height: num [1:20000] 5 7 8 6 1 8 4 2 4 9 ...
## $ onpix : num [1:20000] 1 2 6 3 1 3 4 1 2 7 ...
## $ xbar : num [1:20000] 8 10 10 5 8 8 8 8 10 13 ...
## $ ybar : num [1:20000] 13 5 6 9 6 8 7 2 6 2 ...
## $ x2bar : num [1:20000] 0 5 2 4 6 6 6 2 2 6 ...
## $ y2bar : num [1:20000] 6 4 6 6 6 9 6 2 6 2 ...
## $ xybar : num [1:20000] 6 13 10 4 6 5 7 8 12 12 ...
## $ x2ybar: num [1:20000] 10 3 3 4 5 6 6 2 4 1 ...
## $ xy2bar: num [1:20000] 8 9 7 10 9 6 6 8 8 9 ...
## $ xedge : num [1:20000] 0 2 3 6 1 0 2 1 1 8 ...
## $ xedgey: num [1:20000] 8 8 7 10 7 8 8 6 6 1 ...
## $ yedge : num [1:20000] 0 4 3 2 5 9 7 2 1 1 ...
## $ yedgex: num [1:20000] 8 10 9 8 10 7 10 7 7 8 ...
## - attr(*, "spec")=
## .. cols(
## .. letter = col_character(),
## .. xbox = col_double(),
## .. ybox = col_double(),
## .. width = col_double(),
## .. height = col_double(),
## .. onpix = col_double(),
## .. xbar = col_double(),
## .. ybar = col_double(),
## .. x2bar = col_double(),
## .. y2bar = col_double(),
## .. xybar = col_double(),
## .. x2ybar = col_double(),
## .. xy2bar = col_double(),
## .. xedge = col_double(),
## .. xedgey = col_double(),
## .. yedge = col_double(),
## .. yedgex = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
letters <- letterdata
letters$letter <- as.factor(letters$letter)
str(letters)
## spc_tbl_ [20,000 × 17] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ letter: Factor w/ 26 levels "A","B","C","D",..: 20 9 4 14 7 19 2 1 10 13 ...
## $ xbox : num [1:20000] 2 5 4 7 2 4 4 1 2 11 ...
## $ ybox : num [1:20000] 8 12 11 11 1 11 2 1 2 15 ...
## $ width : num [1:20000] 3 3 6 6 3 5 5 3 4 13 ...
## $ height: num [1:20000] 5 7 8 6 1 8 4 2 4 9 ...
## $ onpix : num [1:20000] 1 2 6 3 1 3 4 1 2 7 ...
## $ xbar : num [1:20000] 8 10 10 5 8 8 8 8 10 13 ...
## $ ybar : num [1:20000] 13 5 6 9 6 8 7 2 6 2 ...
## $ x2bar : num [1:20000] 0 5 2 4 6 6 6 2 2 6 ...
## $ y2bar : num [1:20000] 6 4 6 6 6 9 6 2 6 2 ...
## $ xybar : num [1:20000] 6 13 10 4 6 5 7 8 12 12 ...
## $ x2ybar: num [1:20000] 10 3 3 4 5 6 6 2 4 1 ...
## $ xy2bar: num [1:20000] 8 9 7 10 9 6 6 8 8 9 ...
## $ xedge : num [1:20000] 0 2 3 6 1 0 2 1 1 8 ...
## $ xedgey: num [1:20000] 8 8 7 10 7 8 8 6 6 1 ...
## $ yedge : num [1:20000] 0 4 3 2 5 9 7 2 1 1 ...
## $ yedgex: num [1:20000] 8 10 9 8 10 7 10 7 7 8 ...
## - attr(*, "spec")=
## .. cols(
## .. letter = col_character(),
## .. xbox = col_double(),
## .. ybox = col_double(),
## .. width = col_double(),
## .. height = col_double(),
## .. onpix = col_double(),
## .. xbar = col_double(),
## .. ybar = col_double(),
## .. x2bar = col_double(),
## .. y2bar = col_double(),
## .. xybar = col_double(),
## .. x2ybar = col_double(),
## .. xy2bar = col_double(),
## .. xedge = col_double(),
## .. xedgey = col_double(),
## .. yedge = col_double(),
## .. yedgex = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
letters_train <- letters[1:16000, ]
letters_test <- letters[16001:20000, ]
letter_classifier <- ksvm(letter~., data=letters_train, kernel="vanilladot") #~., means predict response against all the variables
## Setting default kernel parameters
letter_predictions<-predict(letter_classifier, letters_test)
head(letter_predictions)
## [1] U N V X N H
## Levels: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
table(letter_predictions, letters_test$letter)
##
## letter_predictions A B C D E F G H I J K L M N O
## A 144 0 0 0 0 0 0 0 0 1 0 0 1 2 2
## B 0 121 0 5 2 0 1 2 0 0 1 0 1 0 0
## C 0 0 120 0 4 0 10 2 2 0 1 3 0 0 2
## D 2 2 0 156 0 1 3 10 4 3 4 3 0 5 5
## E 0 0 5 0 127 3 1 1 0 0 3 4 0 0 0
## F 0 0 0 0 0 138 2 2 6 0 0 0 0 0 0
## G 1 1 2 1 9 2 123 2 0 0 1 2 1 0 1
## H 0 0 0 1 0 1 0 102 0 2 3 2 3 4 20
## I 0 1 0 0 0 1 0 0 141 8 0 0 0 0 0
## J 0 1 0 0 0 1 0 2 5 128 0 0 0 0 1
## K 1 1 9 0 0 0 2 5 0 0 118 0 0 2 0
## L 0 0 0 0 2 0 1 1 0 0 0 133 0 0 0
## M 0 0 1 1 0 0 1 1 0 0 0 0 135 4 0
## N 0 0 0 0 0 1 0 1 0 0 0 0 0 145 0
## O 1 0 2 1 0 0 1 2 0 1 0 0 0 1 99
## P 0 0 0 1 0 2 1 0 0 0 0 0 0 0 2
## Q 0 0 0 0 0 0 8 2 0 0 0 3 0 0 3
## R 0 7 0 0 1 0 3 8 0 0 13 0 0 1 1
## S 1 1 0 0 1 0 3 0 1 1 0 1 0 0 0
## T 0 0 0 0 3 2 0 0 0 0 1 0 0 0 0
## U 1 0 3 1 0 0 0 2 0 0 0 0 0 0 1
## V 0 0 0 0 0 1 3 4 0 0 0 0 1 2 1
## W 0 0 0 0 0 0 1 0 0 0 0 0 2 0 0
## X 0 1 0 0 2 0 0 1 3 0 1 6 0 0 1
## Y 3 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## Z 2 0 0 0 1 0 0 0 3 4 0 0 0 0 0
##
## letter_predictions P Q R S T U V W X Y Z
## A 0 5 0 1 1 1 0 1 0 0 1
## B 2 2 3 5 0 0 2 0 1 0 0
## C 0 0 0 0 0 0 0 0 0 0 0
## D 3 1 4 0 0 0 0 0 3 3 1
## E 0 2 0 10 0 0 0 0 2 0 3
## F 16 0 0 3 0 0 1 0 1 2 0
## G 2 8 2 4 3 0 0 0 1 0 0
## H 0 2 3 0 3 0 2 0 0 1 0
## I 1 0 0 3 0 0 0 0 5 1 1
## J 1 3 0 2 0 0 0 0 1 0 6
## K 1 0 7 0 1 3 0 0 5 0 0
## L 0 1 0 5 0 0 0 0 0 0 1
## M 0 0 0 0 0 3 0 8 0 0 0
## N 0 0 3 0 0 1 0 2 0 0 0
## O 3 3 0 0 0 3 0 0 0 0 0
## P 130 0 0 0 0 0 0 0 0 1 0
## Q 1 124 0 5 0 0 0 0 0 2 0
## R 1 0 138 0 1 0 1 0 0 0 0
## S 0 14 0 101 3 0 0 0 2 0 10
## T 0 0 0 3 133 1 0 0 0 2 2
## U 0 0 0 0 0 152 0 0 1 1 0
## V 0 3 1 0 0 0 126 1 0 4 0
## W 0 0 0 0 0 4 4 127 0 0 0
## X 0 0 0 1 0 0 0 0 137 1 1
## Y 7 0 0 0 3 0 0 0 0 127 0
## Z 0 0 0 18 3 0 0 0 0 0 132
#diagonal values indicate the total number of records where the predicted letter matches the true value
agreement<-letter_predictions==letters_test$letter
table(agreement)
## agreement
## FALSE TRUE
## 643 3357
prop.table(table(agreement))
## agreement
## FALSE TRUE
## 0.16075 0.83925
letter_classifier_rbf<-ksvm(letter~.,data=letters_train, kernel="rbfdot")
letter_predictions_rbf<-predict(letter_classifier_rbf, letters_test)