Section 36: Kernal PCA
df <- read.csv("G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 9 - Dimensionality Reduction\\Section 45 - Kernel PCA\\Kernel_PCA\\Social_Network_Ads.csv")
df <- df[,3:5]
head(df)
Split dataset into training and test set (300 training, 100 test)
library(caTools)
set.seed(123)
split <- sample.split(df$Purchased, SplitRatio = 0.75)
training_set <- subset(df, split == TRUE)
test_set <- subset(df, split == FALSE)
For KernelPCA ,it is better to do feature scaling (normalization)
# Feature Scaling 1 age, 2 is salary
training_set[,1:2] <- scale(training_set[,1:2])
test_set[,1:2] <- scale(test_set[,1:2])
Applying PCA to the data
# install.packages("kernlab")
library(kernlab)
kpca <- kpca(~., data = training_set[-3], kernel = 'rbfdot',
features = 2)
# add the depenent variable into the pca
training_set_pca <- as.data.frame(predict(kpca, training_set))
# add the dependent varaible Purchased to the training set
training_set_pca$Purchased <- training_set$Purchased
# do the same for the test dataset
test_set_pca <- as.data.frame(predict(kpca, test_set))
# add the dependent varaible Purchased to the test set
test_set_pca$Purchased <- test_set$Purchased
Building the classification model
classifier <- glm(formula = Purchased ~ .,
family = binomial,
data = training_set_pca ,
)
summary(classifier)
Call:
glm(formula = Purchased ~ ., family = binomial, data = training_set_pca)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.8679 -0.4607 -0.1230 0.3191 2.4451
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.27556 0.21592 -5.907 3.47e-09 ***
V1 0.47918 0.05873 8.159 3.39e-16 ***
V2 0.14365 0.03280 4.380 1.19e-05 ***
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 390.89 on 299 degrees of freedom
Residual deviance: 191.09 on 297 degrees of freedom
AIC: 197.09
Number of Fisher Scoring iterations: 6
Predicting the test set results
prob_pred <- predict(classifier, newdata = test_set_pca[-3]
)
y_pred <- ifelse(prob_pred >0.5,1,0)
Evaluate the prediction using confusion Matrix.
# Making the confusion matrix
# [3] refers to the outcome
cm <- table(test_set_pca[,3], y_pred)
cm
y_pred
0 1
0 58 6
1 13 23
Plot
# install.packages("ElemStatLearn")
library(ElemStatLearn)
Warning message:
In scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
EOF within quoted string
set <- test_set_pca
X1 <- seq(min(set[,1]) - 1, max(set[,1]) + 1, by = 0.01)
X2 <- seq(min(set[,2]) - 1, max(set[,2]) + 1, by = 0.01)
grid_set <- expand.grid(X1, X2)
colnames(grid_set) = c('V1','V2')
prob_set = predict(classifier, type = 'response', newdata = grid_set)
y_grid = ifelse(prob_set > 0.5, 1, 0)
plot(set[,-3],
main = 'Logistic Regression(Test Set)',
xlab = 'Age', ylab = 'Estimated Salary',
xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg= ifelse(set[,3] == 1,'green4', 'red3'))

LS0tDQp0aXRsZTogIk1MIFVzaW5nIFIgU2VjdGlvbjM2IEtlcm5lbCBQQ0EiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIFNlY3Rpb24gMzY6IEtlcm5hbCBQQ0ENCmBgYHtyfQ0KZGYgPC0gIHJlYWQuY3N2KCJHOlxcUlN0dWRpb1xcdWRlbXlcXG1sXFxNYWNoaW5lIExlYXJuaW5nIEFaXFxQYXJ0IDkgLSBEaW1lbnNpb25hbGl0eSBSZWR1Y3Rpb25cXFNlY3Rpb24gNDUgLSBLZXJuZWwgUENBXFxLZXJuZWxfUENBXFxTb2NpYWxfTmV0d29ya19BZHMuY3N2IikNCmRmIDwtICBkZlssMzo1XQ0KaGVhZChkZikNCmBgYA0KDQoNCiMgU3BsaXQgZGF0YXNldCBpbnRvIHRyYWluaW5nIGFuZCB0ZXN0IHNldCAoMzAwIHRyYWluaW5nLCAxMDAgdGVzdCkNCmBgYHtyfQ0KbGlicmFyeShjYVRvb2xzKQ0Kc2V0LnNlZWQoMTIzKQ0Kc3BsaXQgPC0gc2FtcGxlLnNwbGl0KGRmJFB1cmNoYXNlZCwgU3BsaXRSYXRpbyA9IDAuNzUpDQp0cmFpbmluZ19zZXQgPC0gc3Vic2V0KGRmLCBzcGxpdCA9PSBUUlVFKQ0KdGVzdF9zZXQgPC0gc3Vic2V0KGRmLCBzcGxpdCA9PSBGQUxTRSkNCg0KYGBgDQoNCiMgRm9yIEtlcm5lbFBDQSAsaXQgaXMgYmV0dGVyIHRvIGRvIGZlYXR1cmUgc2NhbGluZyAobm9ybWFsaXphdGlvbikNCg0KYGBge3J9DQojIEZlYXR1cmUgU2NhbGluZyAxIGFnZSwgMiBpcyBzYWxhcnkNCnRyYWluaW5nX3NldFssMToyXSA8LSAgc2NhbGUodHJhaW5pbmdfc2V0WywxOjJdKQ0KdGVzdF9zZXRbLDE6Ml0gPC0gIHNjYWxlKHRlc3Rfc2V0WywxOjJdKQ0KYGBgDQoNCiMgQXBwbHlpbmcgUENBIHRvIHRoZSBkYXRhIA0KDQpgYGB7cn0NCiMgaW5zdGFsbC5wYWNrYWdlcygia2VybmxhYiIpDQpsaWJyYXJ5KGtlcm5sYWIpDQprcGNhIDwtICBrcGNhKH4uLCBkYXRhID0gdHJhaW5pbmdfc2V0Wy0zXSwga2VybmVsID0gJ3JiZmRvdCcsIA0KICAgICAgICAgICAgICBmZWF0dXJlcyA9IDIpDQoNCg0KDQpgYGANCg0KYGBge3J9DQojIGFkZCB0aGUgZGVwZW5lbnQgdmFyaWFibGUgaW50byB0aGUgcGNhIA0KdHJhaW5pbmdfc2V0X3BjYSA8LSAgYXMuZGF0YS5mcmFtZShwcmVkaWN0KGtwY2EsIHRyYWluaW5nX3NldCkpDQojIGFkZCB0aGUgZGVwZW5kZW50IHZhcmFpYmxlIFB1cmNoYXNlZCB0byB0aGUgdHJhaW5pbmcgc2V0DQp0cmFpbmluZ19zZXRfcGNhJFB1cmNoYXNlZCA8LSAgdHJhaW5pbmdfc2V0JFB1cmNoYXNlZA0KDQojIGRvIHRoZSBzYW1lIGZvciB0aGUgdGVzdCBkYXRhc2V0DQp0ZXN0X3NldF9wY2EgPC0gIGFzLmRhdGEuZnJhbWUocHJlZGljdChrcGNhLCB0ZXN0X3NldCkpDQojIGFkZCB0aGUgZGVwZW5kZW50IHZhcmFpYmxlIFB1cmNoYXNlZCB0byB0aGUgdGVzdCBzZXQNCnRlc3Rfc2V0X3BjYSRQdXJjaGFzZWQgPC0gIHRlc3Rfc2V0JFB1cmNoYXNlZA0KDQpgYGANCg0KIyBCdWlsZGluZyB0aGUgY2xhc3NpZmljYXRpb24gbW9kZWwgDQoNCmBgYHtyfQ0KY2xhc3NpZmllciA8LSBnbG0oZm9ybXVsYSA9IFB1cmNoYXNlZCB+IC4sDQogICAgICAgICAgICAgICAgICBmYW1pbHkgPSBiaW5vbWlhbCwNCiAgICAgICAgICAgICAgICAgIGRhdGEgPSB0cmFpbmluZ19zZXRfcGNhICwNCiAgICAgICAgICAgICAgICAgICkNCg0Kc3VtbWFyeShjbGFzc2lmaWVyKQ0KDQpgYGANCg0KIyBQcmVkaWN0aW5nIHRoZSB0ZXN0IHNldCByZXN1bHRzDQoNCmBgYHtyfQ0KcHJvYl9wcmVkIDwtICBwcmVkaWN0KGNsYXNzaWZpZXIsIG5ld2RhdGEgPSB0ZXN0X3NldF9wY2FbLTNdDQogICAgICAgICAgICAgICAgICAgKQ0KeV9wcmVkIDwtICBpZmVsc2UocHJvYl9wcmVkID4wLjUsMSwwKQ0KDQpgYGANCg0KIyBFdmFsdWF0ZSB0aGUgcHJlZGljdGlvbiB1c2luZyBjb25mdXNpb24gTWF0cml4Lg0KDQpgYGB7cn0NCiMgTWFraW5nIHRoZSBjb25mdXNpb24gbWF0cml4DQojIFszXSByZWZlcnMgdG8gdGhlIG91dGNvbWUNCg0KY20gPC0gdGFibGUodGVzdF9zZXRfcGNhWywzXSwgeV9wcmVkKQ0KY20NCmBgYA0KDQojIFBsb3QNCg0KYGBge3J9DQojIGluc3RhbGwucGFja2FnZXMoIkVsZW1TdGF0TGVhcm4iKQ0KbGlicmFyeShFbGVtU3RhdExlYXJuKQ0Kc2V0IDwtIHRlc3Rfc2V0X3BjYQ0KWDEgPC0gc2VxKG1pbihzZXRbLDFdKSAtIDEsIG1heChzZXRbLDFdKSArIDEsIGJ5ID0gMC4wMSkNClgyIDwtIHNlcShtaW4oc2V0WywyXSkgLSAxLCBtYXgoc2V0WywyXSkgKyAxLCBieSA9IDAuMDEpDQpncmlkX3NldCA8LSBleHBhbmQuZ3JpZChYMSwgWDIpDQpjb2xuYW1lcyhncmlkX3NldCkgPSBjKCdWMScsJ1YyJykNCnByb2Jfc2V0ID0gcHJlZGljdChjbGFzc2lmaWVyLCB0eXBlID0gJ3Jlc3BvbnNlJywgbmV3ZGF0YSA9IGdyaWRfc2V0KQ0KeV9ncmlkID0gaWZlbHNlKHByb2Jfc2V0ID4gMC41LCAxLCAwKQ0KcGxvdChzZXRbLC0zXSwNCiAgICAgbWFpbiA9ICdMb2dpc3RpYyBSZWdyZXNzaW9uKFRlc3QgU2V0KScsDQogICAgIHhsYWIgPSAgJ0FnZScsIHlsYWIgPSAnRXN0aW1hdGVkIFNhbGFyeScsDQogICAgIHhsaW0gPSByYW5nZShYMSksIHlsaW0gPSByYW5nZShYMikpDQpjb250b3VyKFgxLCBYMiwgbWF0cml4KGFzLm51bWVyaWMoeV9ncmlkKSwgbGVuZ3RoKFgxKSwgbGVuZ3RoKFgyKSksIGFkZCA9IFRSVUUpDQpwb2ludHMoZ3JpZF9zZXQsIHBjaCA9ICcuJywgY29sID0gaWZlbHNlKHlfZ3JpZCA9PSAxLCAnc3ByaW5nZ3JlZW4zJywgJ3RvbWF0bycpKQ0KcG9pbnRzKHNldCwgcGNoID0gMjEsIGJnPSBpZmVsc2Uoc2V0WywzXSA9PSAxLCdncmVlbjQnLCAncmVkMycpKQ0KDQpgYGANCg0KDQoNCg==