Section 38 K Fold Validation
df <- read.csv("G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 3 - Classification\\Section 17 - Kernel SVM\\Kernel_SVM\\Social_Network_Ads.csv")
head(df)
Select the fields that we will be working with
df <- df[,3:5]
head(df)
Split dataset into training and test set (300 training, 100 test)
library(caTools)
set.seed(1234)
split <- sample.split(df$Purchased, SplitRatio = 0.75)
training_set <- subset(df, split == TRUE)
test_set <- subset(df, split == FALSE)
For Classification,it is better to do feature scaling (normalization)
# Feature Scaling 1 age, 2 is salary
training_set[,1:2] <- scale(training_set[,1:2])
test_set[,1:2] <- scale(test_set[,1:2])
Fitting Classifier to the Training Set
# Create the classifier here
# install.packages("e1071")
# you can also use kernlab
library(e1071)
classifier <- svm(formula = Purchased ~ Age + EstimatedSalary,
data = training_set,
type = "C-classification",
kernel = "radial"
)
summary(classifier)
Call:
svm(formula = Purchased ~ Age + EstimatedSalary, data = training_set, type = "C-classification",
kernel = "radial")
Parameters:
SVM-Type: C-classification
SVM-Kernel: radial
cost: 1
gamma: 0.5
Number of Support Vectors: 80
( 38 42 )
Number of Classes: 2
Levels:
0 1
Predicting the test set results
y_pred <- predict(classifier, newdata=test_set[-3])
y_pred
5 15 19 25 29 40 42 43 49 51 54 55 66 74 76 77 78 90 92 100 104 106 110 112 120 133 136
0 0 1 1 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 0 0
137 141 142 143 144 145 153 157 159 164 166 172 174 179 181 183 184 195 197 202 203 216 218 221 223 228 229
0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 1 1 0 0 1 1 0
235 238 239 240 242 244 245 249 250 254 256 258 271 273 275 286 287 290 291 294 301 303 305 309 318 319 327
1 0 1 1 0 1 0 0 0 1 1 0 1 1 1 0 0 0 1 0 1 1 0 1 0 1 0
330 332 333 338 341 343 351 353 354 357 360 361 363 368 369 373 386 387 394
1 1 0 0 1 0 1 0 0 1 0 1 1 1 0 0 1 1 1
Levels: 0 1
Evaluate the prediction using confusion Matrix.
# Making the confusion matrix
# [3] refers to the outcome
cm <- table(test_set[,3], y_pred)
cm
y_pred
0 1
0 55 9
1 4 32
Applying K-fold cross validation
# Applying K-fold cross validation
# install.packages("caret")
library(caret)
folds = createFolds(training_set$Purchased, k = 10)
cv = lapply(folds, function(x){
training_fold <- training_set[-x, ]
test_fold <- training_set[x, ]
classifier <- svm(formula = Purchased ~ Age + EstimatedSalary,
data = training_fold,
type = "C-classification",
kernel = "radial"
)
y_pred <- predict(classifier, newdata=test_fold[-3])
cm <- table(test_fold[,3], y_pred)
accuracy = (cm[1,1]+ cm[2,2]) / (cm[1,1]+ cm[2,2]+ cm[1,2]+ cm[2,1])
return(accuracy)
})
accuracy <- mean(as.numeric(cv))
accuracy
[1] 0.9133333
LS0tDQp0aXRsZTogIk1MIFVzaW5nIFIgU2VjdGlvbiAzOCB4IEZvbGQgVmFsaWRhdGlvbiINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCiMgU2VjdGlvbiAzOCBLIEZvbGQgVmFsaWRhdGlvbg0KDQoNCmBgYHtyfQ0KZGYgPC0gIHJlYWQuY3N2KCJHOlxcUlN0dWRpb1xcdWRlbXlcXG1sXFxNYWNoaW5lIExlYXJuaW5nIEFaXFxQYXJ0IDMgLSBDbGFzc2lmaWNhdGlvblxcU2VjdGlvbiAxNyAtIEtlcm5lbCBTVk1cXEtlcm5lbF9TVk1cXFNvY2lhbF9OZXR3b3JrX0Fkcy5jc3YiKQ0KaGVhZChkZikNCmBgYA0KDQojIFNlbGVjdCB0aGUgZmllbGRzIHRoYXQgd2Ugd2lsbCBiZSB3b3JraW5nIHdpdGgNCg0KYGBge3J9DQpkZiA8LSBkZlssMzo1XQ0KaGVhZChkZikNCmBgYA0KDQojIFNwbGl0IGRhdGFzZXQgaW50byB0cmFpbmluZyBhbmQgdGVzdCBzZXQgKDMwMCB0cmFpbmluZywgMTAwIHRlc3QpDQpgYGB7cn0NCmxpYnJhcnkoY2FUb29scykNCnNldC5zZWVkKDEyMzQpDQpzcGxpdCA8LSBzYW1wbGUuc3BsaXQoZGYkUHVyY2hhc2VkLCBTcGxpdFJhdGlvID0gMC43NSkNCnRyYWluaW5nX3NldCA8LSBzdWJzZXQoZGYsIHNwbGl0ID09IFRSVUUpDQp0ZXN0X3NldCA8LSBzdWJzZXQoZGYsIHNwbGl0ID09IEZBTFNFKQ0KDQpgYGANCg0KIyBGb3IgQ2xhc3NpZmljYXRpb24saXQgaXMgYmV0dGVyIHRvIGRvIGZlYXR1cmUgc2NhbGluZyAobm9ybWFsaXphdGlvbikNCg0KYGBge3J9DQojIEZlYXR1cmUgU2NhbGluZyAxIGFnZSwgMiBpcyBzYWxhcnkNCnRyYWluaW5nX3NldFssMToyXSA8LSAgc2NhbGUodHJhaW5pbmdfc2V0WywxOjJdKQ0KdGVzdF9zZXRbLDE6Ml0gPC0gIHNjYWxlKHRlc3Rfc2V0WywxOjJdKQ0KYGBgDQoNCiMgRml0dGluZyBDbGFzc2lmaWVyIHRvIHRoZSBUcmFpbmluZyBTZXQNCg0KYGBge3J9DQojIENyZWF0ZSB0aGUgY2xhc3NpZmllciBoZXJlDQojIGluc3RhbGwucGFja2FnZXMoImUxMDcxIikNCiMgeW91IGNhbiBhbHNvIHVzZSBrZXJubGFiDQpsaWJyYXJ5KGUxMDcxKQ0KY2xhc3NpZmllciA8LSBzdm0oZm9ybXVsYSA9IFB1cmNoYXNlZCB+IEFnZSArIEVzdGltYXRlZFNhbGFyeSwNCiAgICAgICAgICAgICAgICAgIGRhdGEgPSB0cmFpbmluZ19zZXQsDQogICAgICAgICAgICAgICAgICB0eXBlID0gIkMtY2xhc3NpZmljYXRpb24iLA0KICAgICAgICAgICAgICAgICAga2VybmVsID0gInJhZGlhbCINCiAgICAgICAgICAgICAgICAgICkgDQpzdW1tYXJ5KGNsYXNzaWZpZXIpDQoNCmBgYA0KDQojIFByZWRpY3RpbmcgdGhlIHRlc3Qgc2V0IHJlc3VsdHMNCg0KYGBge3J9DQoNCnlfcHJlZCA8LSAgcHJlZGljdChjbGFzc2lmaWVyLCBuZXdkYXRhPXRlc3Rfc2V0Wy0zXSkNCnlfcHJlZA0KDQpgYGANCg0KIyBFdmFsdWF0ZSB0aGUgcHJlZGljdGlvbiB1c2luZyBjb25mdXNpb24gTWF0cml4Lg0KDQpgYGB7cn0NCiMgTWFraW5nIHRoZSBjb25mdXNpb24gbWF0cml4DQojIFszXSByZWZlcnMgdG8gdGhlIG91dGNvbWUNCg0KY20gPC0gdGFibGUodGVzdF9zZXRbLDNdLCB5X3ByZWQpDQpjbQ0KYGBgDQoNCiMgQXBwbHlpbmcgSy1mb2xkIGNyb3NzIHZhbGlkYXRpb24NCg0KYGBge3J9DQoNCiMgaW5zdGFsbC5wYWNrYWdlcygiY2FyZXQiKQ0KbGlicmFyeShjYXJldCkNCmZvbGRzID0gY3JlYXRlRm9sZHModHJhaW5pbmdfc2V0JFB1cmNoYXNlZCwgayA9IDEwKQ0KY3YgPSBsYXBwbHkoZm9sZHMsIGZ1bmN0aW9uKHgpew0KICB0cmFpbmluZ19mb2xkIDwtICB0cmFpbmluZ19zZXRbLXgsIF0NCiAgdGVzdF9mb2xkIDwtICB0cmFpbmluZ19zZXRbeCwgXQ0KICBjbGFzc2lmaWVyIDwtIHN2bShmb3JtdWxhID0gUHVyY2hhc2VkIH4gQWdlICsgRXN0aW1hdGVkU2FsYXJ5LA0KICAgICAgICAgICAgICAgICAgZGF0YSA9IHRyYWluaW5nX2ZvbGQsDQogICAgICAgICAgICAgICAgICB0eXBlID0gIkMtY2xhc3NpZmljYXRpb24iLA0KICAgICAgICAgICAgICAgICAga2VybmVsID0gInJhZGlhbCINCiAgICAgICAgICAgICAgICAgICkgDQogIHlfcHJlZCA8LSAgcHJlZGljdChjbGFzc2lmaWVyLCBuZXdkYXRhPXRlc3RfZm9sZFstM10pDQogIGNtIDwtIHRhYmxlKHRlc3RfZm9sZFssM10sIHlfcHJlZCkNCiAgYWNjdXJhY3kgPSAoY21bMSwxXSsgY21bMiwyXSkgLyAoY21bMSwxXSsgY21bMiwyXSsgY21bMSwyXSsgY21bMiwxXSkNCiAgcmV0dXJuKGFjY3VyYWN5KQ0KICANCn0pDQphY2N1cmFjeSA8LSBtZWFuKGFzLm51bWVyaWMoY3YpKQ0KYWNjdXJhY3kNCmBgYA0KDQoNCg0K