Grid Search

Basic explanations:
Lecture 286 https://www.udemy.com/machinelearning/learn/lecture/6453728
https://medium.com/datadriveninvestor/an-introduction-to-grid-search-ff57adcc0998
https://towardsdatascience.com/grid-search-for-model-tuning-3319b259367e
Well done description of process with some other data.
https://towardsdatascience.com/grid-search-for-model-tuning-3319b259367e
Pieces of code for above https://gist.github.com/rohanjoseph93

Importing the dataset

dataset = read.csv('Social_Network_Ads.csv')
dataset = dataset[3:5]

Encoding the target feature as factor

dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1))

Splitting the dataset into the Training set and Test set

# install.packages('caTools')
library(caTools)
set.seed(123)
split = sample.split(dataset$Purchased, SplitRatio = 0.75)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)

Feature Scaling

training_set[-3] = scale(training_set[-3])
test_set[-3] = scale(test_set[-3])

Applying Grid Search to find the best parameters

Lecture 286 https://www.udemy.com/machinelearning/learn/lecture/6453728

# install.packages('caret')
library(caret)

## Loading required package: lattice

## Loading required package: ggplot2

# here is where we setup the grid seach mechanism
classifier = train(form = Purchased ~ ., data = training_set, method = 'svmRadial')

classifier

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 300 samples
##   2 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 300, 300, 300, 300, 300, 300, ... 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.9170686  0.8177473
##   0.50  0.9185892  0.8211741
##   1.00  0.9190782  0.8219777
## 
## Tuning parameter 'sigma' was held constant at a value of 1.560428
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.560428 and C = 1.

Lets strip that down to only the best values for Sigma and C.

classifier$bestTune

##      sigma C
## 3 1.560428 1

Predicting the Test set results

y_pred = predict(classifier, newdata = test_set[-3])

Making the Confusion Matrix

cm = table(test_set[, 3], y_pred)

cm

##    y_pred
##      0  1
##   0 58  6
##   1  6 30

Visualising the Training set results

library(ElemStatLearn)
set = training_set
X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set = expand.grid(X1, X2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_grid = predict(classifier, newdata = grid_set)
plot(set[, -3],
     main = 'Kernel SVM (Training set)',
     xlab = 'Age', ylab = 'Estimated Salary',
     xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

Visualising the Test set results

library(ElemStatLearn)
set = test_set
X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set = expand.grid(X1, X2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_grid = predict(classifier, newdata = grid_set)
plot(set[, -3], main = 'Kernel SVM (Test set)',
     xlab = 'Age', ylab = 'Estimated Salary',
     xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

===============================================

Now, what you can do.

We could take the above and work the sigma and C values into the old way of doing the model build (classifier) or we could just use the Classifier we made above.

Fitting Kernel SVM to the Training set - the old way

We’ll use SVM but we could be using any algorithm that would be best for the data, more on that in Grid Search which is next. Additionally here with algorithm selection it’s important to have some sense of the algorithm and your data and do they compliment one another properly.

# install.packages('e1071')
library(e1071)
classifierO = svm(formula = Purchased ~ .,
                 data = training_set,
                 type = 'C-classification',
                 kernel = 'radial')

Predicting the Test set results

y_predO = predict(classifierO, newdata = test_set[-3])

Making the Confusion Matrix

cmO = table(test_set[, 3], y_predO)

cmO

##    y_predO
##      0  1
##   0 58  6
##   1  4 32

Applying k-Fold Cross Validation

What is K-Fold

The name comes from the idea that we are creating K # of folds; each iteration is called a fold. 10 is the most common # of folds. Once the process is complete we’ll be able to see elements of the 10 iterations such as Mean and Standard Deviation.

knitr::include_graphics("k-fold_crossValidation.png")

What are we doing with K-fold?

# install.packages('caret')
library(caret)
folds = createFolds(training_set$Purchased, k = 10)
cv = lapply(folds, function(x) {
  training_fold = training_set[-x, ]
  test_fold = training_set[x, ]
  classifierO = svm(formula = Purchased ~ .,
                   data = training_fold,
                   type = 'C-classification',
                   kernel = 'radial')
  y_predO = predict(classifierO, newdata = test_fold[-3])
  cmO = table(test_fold[, 3], y_predO)
  accuracy = (cmO[1,1] + cmO[2,2]) / (cmO[1,1] + cmO[2,2] + cmO[1,2] + cmO[2,1])
  return(accuracy)
})
accuracy = mean(as.numeric(cv))

accuracy

## [1] 0.9132814

Visualising the Training set results

library(ElemStatLearn)
set = training_set
X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set = expand.grid(X1, X2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_gridO = predict(classifier, newdata = grid_set)
plot(set[, -3],
     main = 'Kernel SVM old way (Training set)',
     xlab = 'Age', ylab = 'Estimated Salary',
     xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_gridO), length(X1), length(X2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_gridO == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

Visualising the Test set results

library(ElemStatLearn)
set = test_set
X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set = expand.grid(X1, X2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_gridO = predict(classifier, newdata = grid_set)
plot(set[, -3], main = 'Kernel SVM old way (Test set)',
     xlab = 'Age', ylab = 'Estimated Salary',
     xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_gridO), length(X1), length(X2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_gridO == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

Grid Search applied in R

Grid Search

Importing the dataset

Encoding the target feature as factor

Splitting the dataset into the Training set and Test set

Feature Scaling

Applying Grid Search to find the best parameters

Predicting the Test set results

Making the Confusion Matrix

Visualising the Training set results

Visualising the Test set results

Now, what you can do.

Fitting Kernel SVM to the Training set - the old way

Predicting the Test set results

Making the Confusion Matrix

Applying k-Fold Cross Validation

What is K-Fold

Visualising the Training set results

Visualising the Test set results