Section 13 KNN
setwd()
df <- read.csv("G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 3 - Classification\\Section 15 - K-Nearest Neighbors (K-NN)\\K_Nearest_Neighbors\\Social_Network_Ads.csv")
head(df)
Select the fields that we will be working with
df <- df[,3:5]
head(df)
Split dataset into training and test set (300 training, 100 test)
library(caTools)
set.seed(1234)
split <- sample.split(df$Purchased, SplitRatio = 0.75)
training_set <- subset(df, split == TRUE)
test_set <- subset(df, split == FALSE)
For Classification,it is better to do feature scaling (normalization)
# Feature Scaling 1 age, 2 is salary
training_set[,1:2] <- scale(training_set[,1:2])
test_set[,1:2] <- scale(test_set[,1:2])
Fitting Classifier to the Training Set
# Create the classifier here
# install.package("class")
library(class)
package <U+393C><U+3E31>class<U+393C><U+3E32> was built under R version 3.3.3
# for KNN, the step process of fitting and predicting are in one step.
y_pred <- knn(train = training_set[,-3],
test = test_set[,-3],
cl = training_set[,3],
k = 5)
y_pred
[1] 0 0 1 1 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
[39] 1 0 0 0 1 0 0 0 1 1 1 0 0 1 1 0 1 0 0 1 0 1 0 0 0 1 1 0 1 1 1 0 0 0 1 0 1 1
[77] 0 1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 1 1 1 0 0 1 1 1
Levels: 0 1
Evaluate the prediction using confusion Matrix.
# Making the confusion matrix
# [3] refers to the outcome
cm <- table(test_set[,3], y_pred)
cm
y_pred
0 1
0 56 8
1 4 32
Plot
# install.packages("ElemStatLearn")
library(ElemStatLearn)
set <- training_set
X1 <- seq(min(set[,1]) - 1, max(set[,1]) + 1, by = 0.01)
X2 <- seq(min(set[,2]) - 1, max(set[,2]) + 1, by = 0.01)
grid_set <- expand.grid(X1, X2)
colnames(grid_set) = c('Age','EstimatedSalary')
y_grid = knn(train = training_set[,-3],
test = grid_set,
cl = training_set[,3],
k = 5)
plot(set[,-3],
main = 'KNN Classifier Model (Training Set)',
xlab = 'Age', ylab = 'Estimated Salary',
xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg= ifelse(set[,3] == 1,'green4', 'red3'))

Plot description
The red region is predicted by the classifier as “Dont buy” The green region is predicted by the classifier as “Buy” The red dots are those people that actually did not buy The green dots are those people that actually bought. The line is the prediction boundary.
Now we see the result of the test set
set <- test_set
X1 <- seq(min(set[,1]) - 1, max(set[,1]) + 1, by = 0.01)
X2 <- seq(min(set[,2]) - 1, max(set[,2]) + 1, by = 0.01)
grid_set <- expand.grid(X1, X2)
colnames(grid_set) = c('Age','EstimatedSalary')
y_grid = knn(train = training_set[,-3],
test = grid_set,
cl = training_set[,3],
k = 5)
plot(set[,-3],
main = 'KNN Classifier Model (Test Set)',
xlab = 'Age', ylab = 'Estimated Salary',
xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg= ifelse(set[,3] == 1,'green4', 'red3'))

LS0tDQp0aXRsZTogIk1MIFVzaW5nIFIgU2VjdGlvbiAxMyBLTk4gIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyBTZWN0aW9uIDEzIEtOTiANCiMgc2V0d2QoKQ0KDQpgYGB7cn0NCmRmIDwtICByZWFkLmNzdigiRzpcXFJTdHVkaW9cXHVkZW15XFxtbFxcTWFjaGluZSBMZWFybmluZyBBWlxcUGFydCAzIC0gQ2xhc3NpZmljYXRpb25cXFNlY3Rpb24gMTUgLSBLLU5lYXJlc3QgTmVpZ2hib3JzIChLLU5OKVxcS19OZWFyZXN0X05laWdoYm9yc1xcU29jaWFsX05ldHdvcmtfQWRzLmNzdiIpDQpoZWFkKGRmKQ0KYGBgDQoNCiMgU2VsZWN0IHRoZSBmaWVsZHMgdGhhdCB3ZSB3aWxsIGJlIHdvcmtpbmcgd2l0aA0KDQpgYGB7cn0NCmRmIDwtIGRmWywzOjVdDQpoZWFkKGRmKQ0KYGBgDQoNCiMgU3BsaXQgZGF0YXNldCBpbnRvIHRyYWluaW5nIGFuZCB0ZXN0IHNldCAoMzAwIHRyYWluaW5nLCAxMDAgdGVzdCkNCmBgYHtyfQ0KbGlicmFyeShjYVRvb2xzKQ0Kc2V0LnNlZWQoMTIzNCkNCnNwbGl0IDwtIHNhbXBsZS5zcGxpdChkZiRQdXJjaGFzZWQsIFNwbGl0UmF0aW8gPSAwLjc1KQ0KdHJhaW5pbmdfc2V0IDwtIHN1YnNldChkZiwgc3BsaXQgPT0gVFJVRSkNCnRlc3Rfc2V0IDwtIHN1YnNldChkZiwgc3BsaXQgPT0gRkFMU0UpDQoNCmBgYA0KDQojIEZvciBDbGFzc2lmaWNhdGlvbixpdCBpcyBiZXR0ZXIgdG8gZG8gZmVhdHVyZSBzY2FsaW5nIChub3JtYWxpemF0aW9uKQ0KDQpgYGB7cn0NCiMgRmVhdHVyZSBTY2FsaW5nIDEgYWdlLCAyIGlzIHNhbGFyeQ0KdHJhaW5pbmdfc2V0WywxOjJdIDwtICBzY2FsZSh0cmFpbmluZ19zZXRbLDE6Ml0pDQp0ZXN0X3NldFssMToyXSA8LSAgc2NhbGUodGVzdF9zZXRbLDE6Ml0pDQpgYGANCg0KIyBGaXR0aW5nIENsYXNzaWZpZXIgdG8gdGhlIFRyYWluaW5nIFNldA0KDQpgYGB7cn0NCiMgQ3JlYXRlIHRoZSBjbGFzc2lmaWVyIGhlcmUNCiMgaW5zdGFsbC5wYWNrYWdlKCJjbGFzcyIpDQpsaWJyYXJ5KGNsYXNzKQ0KDQojIGZvciBLTk4sIHRoZSBzdGVwIHByb2Nlc3Mgb2YgZml0dGluZyBhbmQgcHJlZGljdGluZyBhcmUgaW4gb25lIHN0ZXAuDQp5X3ByZWQgPC0ga25uKHRyYWluID0gdHJhaW5pbmdfc2V0WywtM10sDQogICAgICAgICAgICAgIHRlc3QgPSB0ZXN0X3NldFssLTNdLA0KICAgICAgICAgICAgICBjbCA9IHRyYWluaW5nX3NldFssM10sDQogICAgICAgICAgICAgIGsgPSA1KQ0KeV9wcmVkDQpgYGANCg0KDQojIEV2YWx1YXRlIHRoZSBwcmVkaWN0aW9uIHVzaW5nIGNvbmZ1c2lvbiBNYXRyaXguDQoNCmBgYHtyfQ0KIyBNYWtpbmcgdGhlIGNvbmZ1c2lvbiBtYXRyaXgNCiMgWzNdIHJlZmVycyB0byB0aGUgb3V0Y29tZQ0KDQpjbSA8LSB0YWJsZSh0ZXN0X3NldFssM10sIHlfcHJlZCkNCmNtDQpgYGANCg0KIyBQbG90DQoNCmBgYHtyfQ0KIyBpbnN0YWxsLnBhY2thZ2VzKCJFbGVtU3RhdExlYXJuIikNCmxpYnJhcnkoRWxlbVN0YXRMZWFybikNCnNldCA8LSB0cmFpbmluZ19zZXQNClgxIDwtIHNlcShtaW4oc2V0WywxXSkgLSAxLCBtYXgoc2V0WywxXSkgKyAxLCBieSA9IDAuMDEpDQpYMiA8LSBzZXEobWluKHNldFssMl0pIC0gMSwgbWF4KHNldFssMl0pICsgMSwgYnkgPSAwLjAxKQ0KZ3JpZF9zZXQgPC0gZXhwYW5kLmdyaWQoWDEsIFgyKQ0KY29sbmFtZXMoZ3JpZF9zZXQpID0gYygnQWdlJywnRXN0aW1hdGVkU2FsYXJ5JykNCg0KeV9ncmlkID0ga25uKHRyYWluID0gdHJhaW5pbmdfc2V0WywtM10sDQogICAgICAgICAgICAgIHRlc3QgPSBncmlkX3NldCwNCiAgICAgICAgICAgICAgY2wgPSB0cmFpbmluZ19zZXRbLDNdLA0KICAgICAgICAgICAgICBrID0gNSkNCnBsb3Qoc2V0WywtM10sDQogICAgIG1haW4gPSAnS05OIENsYXNzaWZpZXIgTW9kZWwgKFRyYWluaW5nIFNldCknLA0KICAgICB4bGFiID0gICdBZ2UnLCB5bGFiID0gJ0VzdGltYXRlZCBTYWxhcnknLA0KICAgICB4bGltID0gcmFuZ2UoWDEpLCB5bGltID0gcmFuZ2UoWDIpKQ0KY29udG91cihYMSwgWDIsIG1hdHJpeChhcy5udW1lcmljKHlfZ3JpZCksIGxlbmd0aChYMSksIGxlbmd0aChYMikpLCBhZGQgPSBUUlVFKQ0KcG9pbnRzKGdyaWRfc2V0LCBwY2ggPSAnLicsIGNvbCA9IGlmZWxzZSh5X2dyaWQgPT0gMSwgJ3NwcmluZ2dyZWVuMycsICd0b21hdG8nKSkNCnBvaW50cyhzZXQsIHBjaCA9IDIxLCBiZyA9IGlmZWxzZShzZXRbLDNdID09IDEsJ2dyZWVuNCcsICdyZWQzJykpDQpgYGANCg0KIyBQbG90IGRlc2NyaXB0aW9uDQpUaGUgcmVkIHJlZ2lvbiBpcyBwcmVkaWN0ZWQgYnkgdGhlIGNsYXNzaWZpZXIgYXMgIkRvbnQgYnV5IiA8L2JyPg0KVGhlIGdyZWVuIHJlZ2lvbiBpcyBwcmVkaWN0ZWQgYnkgdGhlIGNsYXNzaWZpZXIgYXMgIkJ1eSIgPC9icj4NClRoZSByZWQgZG90cyBhcmUgdGhvc2UgcGVvcGxlIHRoYXQgYWN0dWFsbHkgZGlkIG5vdCBidXkgPC9icj4NClRoZSBncmVlbiBkb3RzIGFyZSB0aG9zZSBwZW9wbGUgdGhhdCBhY3R1YWxseSBib3VnaHQuIDwvYnI+DQpUaGUgbGluZSBpcyB0aGUgcHJlZGljdGlvbiBib3VuZGFyeS4gDQoNCiMgTm93IHdlIHNlZSB0aGUgcmVzdWx0IG9mIHRoZSB0ZXN0IHNldA0KDQpgYGB7cn0NCnNldCA8LSB0ZXN0X3NldA0KWDEgPC0gc2VxKG1pbihzZXRbLDFdKSAtIDEsIG1heChzZXRbLDFdKSArIDEsIGJ5ID0gMC4wMSkNClgyIDwtIHNlcShtaW4oc2V0WywyXSkgLSAxLCBtYXgoc2V0WywyXSkgKyAxLCBieSA9IDAuMDEpDQpncmlkX3NldCA8LSBleHBhbmQuZ3JpZChYMSwgWDIpDQpjb2xuYW1lcyhncmlkX3NldCkgPSBjKCdBZ2UnLCdFc3RpbWF0ZWRTYWxhcnknKQ0KeV9ncmlkID0ga25uKHRyYWluID0gdHJhaW5pbmdfc2V0WywtM10sDQogICAgICAgICAgICAgIHRlc3QgPSBncmlkX3NldCwNCiAgICAgICAgICAgICAgY2wgPSB0cmFpbmluZ19zZXRbLDNdLA0KICAgICAgICAgICAgICBrID0gNSkNCnBsb3Qoc2V0WywtM10sDQogICAgIG1haW4gPSAnS05OIENsYXNzaWZpZXIgTW9kZWwgKFRlc3QgU2V0KScsDQogICAgIHhsYWIgPSAgJ0FnZScsIHlsYWIgPSAnRXN0aW1hdGVkIFNhbGFyeScsDQogICAgIHhsaW0gPSByYW5nZShYMSksIHlsaW0gPSByYW5nZShYMikpDQpjb250b3VyKFgxLCBYMiwgbWF0cml4KGFzLm51bWVyaWMoeV9ncmlkKSwgbGVuZ3RoKFgxKSwgbGVuZ3RoKFgyKSksIGFkZCA9IFRSVUUpDQpwb2ludHMoZ3JpZF9zZXQsIHBjaCA9ICcuJywgY29sID0gaWZlbHNlKHlfZ3JpZCA9PSAxLCAnc3ByaW5nZ3JlZW4zJywgJ3RvbWF0bycpKQ0KcG9pbnRzKHNldCwgcGNoID0gMjEsIGJnPSBpZmVsc2Uoc2V0WywzXSA9PSAxLCdncmVlbjQnLCAncmVkMycpKQ0KYGBgDQoNCg==