Section 39 Grid Search
df <- read.csv("G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 3 - Classification\\Section 17 - Kernel SVM\\Kernel_SVM\\Social_Network_Ads.csv")
head(df)
Select the fields that we will be working with
df <- df[,3:5]
head(df)
Split dataset into training and test set (300 training, 100 test)
library(caTools)
set.seed(1234)
split <- sample.split(df$Purchased, SplitRatio = 0.75)
training_set <- subset(df, split == TRUE)
test_set <- subset(df, split == FALSE)
For Classification,it is better to do feature scaling (normalization)
# Feature Scaling 1 age, 2 is salary
training_set[,1:2] <- scale(training_set[,1:2])
test_set[,1:2] <- scale(test_set[,1:2])
Applying Grid Search to find the best parameters
# install.packages("caret")
library(caret)
classifier = train(form = Purchased ~ . ,
data = training_set,
method = "svmRadial")
You are trying to do regression and your outcome only has two possible values Are you trying to do classification? If so, use a 2 level factor as your outcome column.
classifier
Support Vector Machines with Radial Basis Function Kernel
300 samples
2 predictor
No pre-processing
Resampling: Bootstrapped (25 reps)
Summary of sample sizes: 300, 300, 300, 300, 300, 300, ...
Resampling results across tuning parameters:
C RMSE Rsquared
0.25 0.2704022 0.6852358
0.50 0.2741952 0.6796394
1.00 0.2804945 0.6680970
Tuning parameter 'sigma' was held constant at a value of 2.247319
RMSE was used to select the optimal model using the smallest value.
The final values used for the model were sigma = 2.247319 and C = 0.25.
classifier$bestTune
applying sVm without using the best tune parameters:
# Create the classifier here
# install.packages("e1071")
# you can also use kernlab
library(e1071)
classifier <- svm(formula = Purchased ~ Age + EstimatedSalary,
data = training_set,
type = "C-classification",
kernel = "radial"
)
y_pred <- predict(classifier, newdata=test_set[-3])
cm <- table(test_set[,3], y_pred)
cm
y_pred
0 1
0 55 9
1 4 32
applying sVm WITH using the best tune parameters:
# Create the classifier here
# install.packages("e1071")
# you can also use kernlab
library(e1071)
classifier <- svm(formula = Purchased ~ Age + EstimatedSalary,
data = training_set,
type = "C-classification",
kernel = "radial",
cost = 0.25,
cross = 10,
sigma = 1.22723
)
y_pred <- predict(classifier, newdata=test_set[-3])
cm <- table(test_set[,3], y_pred)
cm
y_pred
0 1
0 55 9
1 4 32
LS0tDQp0aXRsZTogIk1MIFVzaW5nIFIgU2VjdGlvbiAzOSBHcmlkIFNlYXJjaCINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCiMgU2VjdGlvbiAzOSBHcmlkIFNlYXJjaA0KDQoNCmBgYHtyfQ0KZGYgPC0gIHJlYWQuY3N2KCJHOlxcUlN0dWRpb1xcdWRlbXlcXG1sXFxNYWNoaW5lIExlYXJuaW5nIEFaXFxQYXJ0IDMgLSBDbGFzc2lmaWNhdGlvblxcU2VjdGlvbiAxNyAtIEtlcm5lbCBTVk1cXEtlcm5lbF9TVk1cXFNvY2lhbF9OZXR3b3JrX0Fkcy5jc3YiKQ0KaGVhZChkZikNCmBgYA0KDQojIFNlbGVjdCB0aGUgZmllbGRzIHRoYXQgd2Ugd2lsbCBiZSB3b3JraW5nIHdpdGgNCg0KYGBge3J9DQpkZiA8LSBkZlssMzo1XQ0KaGVhZChkZikNCmBgYA0KDQojIFNwbGl0IGRhdGFzZXQgaW50byB0cmFpbmluZyBhbmQgdGVzdCBzZXQgKDMwMCB0cmFpbmluZywgMTAwIHRlc3QpDQpgYGB7cn0NCmxpYnJhcnkoY2FUb29scykNCnNldC5zZWVkKDEyMzQpDQpzcGxpdCA8LSBzYW1wbGUuc3BsaXQoZGYkUHVyY2hhc2VkLCBTcGxpdFJhdGlvID0gMC43NSkNCnRyYWluaW5nX3NldCA8LSBzdWJzZXQoZGYsIHNwbGl0ID09IFRSVUUpDQp0ZXN0X3NldCA8LSBzdWJzZXQoZGYsIHNwbGl0ID09IEZBTFNFKQ0KDQpgYGANCg0KIyBGb3IgQ2xhc3NpZmljYXRpb24saXQgaXMgYmV0dGVyIHRvIGRvIGZlYXR1cmUgc2NhbGluZyAobm9ybWFsaXphdGlvbikNCg0KYGBge3J9DQojIEZlYXR1cmUgU2NhbGluZyAxIGFnZSwgMiBpcyBzYWxhcnkNCnRyYWluaW5nX3NldFssMToyXSA8LSAgc2NhbGUodHJhaW5pbmdfc2V0WywxOjJdKQ0KdGVzdF9zZXRbLDE6Ml0gPC0gIHNjYWxlKHRlc3Rfc2V0WywxOjJdKQ0KYGBgDQoNCg0KDQoNCiMgQXBwbHlpbmcgR3JpZCBTZWFyY2ggdG8gZmluZCB0aGUgYmVzdCBwYXJhbWV0ZXJzDQoNCmBgYHtyfQ0KIyBpbnN0YWxsLnBhY2thZ2VzKCJjYXJldCIpDQpsaWJyYXJ5KGNhcmV0KQ0KY2xhc3NpZmllciA9IHRyYWluKGZvcm0gPSBQdXJjaGFzZWQgfiAuICwgDQogICAgICAgICAgICAgICAgICAgZGF0YSA9IHRyYWluaW5nX3NldCwgDQogICAgICAgICAgICAgICAgICAgbWV0aG9kID0gInN2bVJhZGlhbCIpDQpjbGFzc2lmaWVyDQpjbGFzc2lmaWVyJGJlc3RUdW5lDQoNCmBgYA0KDQojIGFwcGx5aW5nIHNWbSB3aXRob3V0IHVzaW5nIHRoZSBiZXN0IHR1bmUgcGFyYW1ldGVyczoNCmBgYHtyfQ0KIyBDcmVhdGUgdGhlIGNsYXNzaWZpZXIgaGVyZQ0KIyBpbnN0YWxsLnBhY2thZ2VzKCJlMTA3MSIpDQojIHlvdSBjYW4gYWxzbyB1c2Uga2VybmxhYg0KbGlicmFyeShlMTA3MSkNCmNsYXNzaWZpZXIgPC0gc3ZtKGZvcm11bGEgPSBQdXJjaGFzZWQgfiBBZ2UgKyBFc3RpbWF0ZWRTYWxhcnksDQogICAgICAgICAgICAgICAgICBkYXRhID0gdHJhaW5pbmdfc2V0LA0KICAgICAgICAgICAgICAgICAgdHlwZSA9ICJDLWNsYXNzaWZpY2F0aW9uIiwNCiAgICAgICAgICAgICAgICAgIGtlcm5lbCA9ICJyYWRpYWwiDQogICAgICAgICAgICAgICAgICApIA0KeV9wcmVkIDwtICBwcmVkaWN0KGNsYXNzaWZpZXIsIG5ld2RhdGE9dGVzdF9zZXRbLTNdKQ0KY20gPC0gdGFibGUodGVzdF9zZXRbLDNdLCB5X3ByZWQpDQpjbQ0KDQpgYGANCg0KIyBhcHBseWluZyBzVm0gV0lUSCB1c2luZyB0aGUgYmVzdCB0dW5lIHBhcmFtZXRlcnM6DQpgYGB7cn0NCiMgQ3JlYXRlIHRoZSBjbGFzc2lmaWVyIGhlcmUNCiMgaW5zdGFsbC5wYWNrYWdlcygiZTEwNzEiKQ0KIyB5b3UgY2FuIGFsc28gdXNlIGtlcm5sYWINCmxpYnJhcnkoZTEwNzEpDQpjbGFzc2lmaWVyIDwtIHN2bShmb3JtdWxhID0gUHVyY2hhc2VkIH4gQWdlICsgRXN0aW1hdGVkU2FsYXJ5LA0KICAgICAgICAgICAgICAgICAgZGF0YSA9IHRyYWluaW5nX3NldCwNCiAgICAgICAgICAgICAgICAgIHR5cGUgPSAiQy1jbGFzc2lmaWNhdGlvbiIsDQogICAgICAgICAgICAgICAgICBrZXJuZWwgPSAicmFkaWFsIiwgDQogICAgICAgICAgICAgICAgICBjb3N0ID0gMC4yNSwgDQogICAgICAgICAgICAgICAgICBjcm9zcyA9IDEwLA0KICAgICAgICAgICAgICAgICAgc2lnbWEgPSAxLjIyNzIzDQogICAgICAgICAgICAgICAgICApIA0KeV9wcmVkIDwtICBwcmVkaWN0KGNsYXNzaWZpZXIsIG5ld2RhdGE9dGVzdF9zZXRbLTNdKQ0KY20gPC0gdGFibGUodGVzdF9zZXRbLDNdLCB5X3ByZWQpDQpjbQ0KDQpgYGANCg==