IMPORT DATA , SCALE AND PARTITION INTO TRAIN AND TEST DATA
setwd("/Users/anand/RProjects/KNN")
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
index <- createDataPartition(iris$Species, p = 0.7,list = FALSE)
iris_ <- subset(iris, select = c(-Species))
iris_1 <- scale(iris_)
head(iris_1)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 -0.8976739 1.01560199 -1.335752 -1.311052
## 2 -1.1392005 -0.13153881 -1.335752 -1.311052
## 3 -1.3807271 0.32731751 -1.392399 -1.311052
## 4 -1.5014904 0.09788935 -1.279104 -1.311052
## 5 -1.0184372 1.24503015 -1.335752 -1.311052
## 6 -0.5353840 1.93331463 -1.165809 -1.048667
traindata <- iris_1[index,]
testdata <- iris_1[-index,]
CREATE CLASSIFICATION VECTORS
Ytrain <- iris$Species[index]
Ytest <- iris$Species[-index]
LOAD LIBRARY
CREATE MODEL
knnmodel <- knn(traindata, testdata, k=round(sqrt(nrow(traindata)),0)-1, cl=Ytrain)
summary(knnmodel)
## setosa versicolor virginica
## 15 18 12
GENERATE CONFUSION MATRIX AND CHECK SENSITIVITY AND SPECIFICTY
cMatrix <- table(knnmodel, Ytest) ;cMatrix
## Ytest
## knnmodel setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 15 3
## virginica 0 0 12
confusionMatrix(cMatrix)
## Confusion Matrix and Statistics
##
## Ytest
## knnmodel setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 15 3
## virginica 0 0 12
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.8173, 0.986)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.8000
## Specificity 1.0000 0.9000 1.0000
## Pos Pred Value 1.0000 0.8333 1.0000
## Neg Pred Value 1.0000 1.0000 0.9091
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.2667
## Detection Prevalence 0.3333 0.4000 0.2667
## Balanced Accuracy 1.0000 0.9500 0.9000
FORMULATE PREDICTED VECTOR
class <- sapply(knnmodel,as.factor); class
## [1] setosa setosa setosa setosa setosa setosa
## [7] setosa setosa setosa setosa setosa setosa
## [13] setosa setosa setosa versicolor versicolor versicolor
## [19] versicolor versicolor versicolor versicolor versicolor versicolor
## [25] versicolor versicolor versicolor versicolor versicolor versicolor
## [31] virginica virginica virginica versicolor virginica virginica
## [37] virginica virginica virginica virginica virginica versicolor
## [43] virginica versicolor virginica
## Levels: setosa versicolor virginica
str(class)
## Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
typeof(class)
## [1] "integer"
CREATE A NEW DATA FRAME WITH CLASSIFICATION VECTOR APPENDED TO TESTDATA
newdF <- data.frame(testdata,class)
newdF
## Sepal.Length Sepal.Width Petal.Length Petal.Width class
## 2 -1.13920048 -0.13153881 -1.33575163 -1.3110521482 setosa
## 11 -0.53538397 1.47445831 -1.27910398 -1.3110521482 setosa
## 13 -1.25996379 -0.13153881 -1.33575163 -1.4422448248 setosa
## 14 -1.86378030 -0.13153881 -1.50569459 -1.4422448248 setosa
## 20 -0.89767388 1.70388647 -1.27910398 -1.1798594716 setosa
## 29 -0.77691058 0.78617383 -1.33575163 -1.3110521482 setosa
## 31 -1.25996379 0.09788935 -1.22245633 -1.3110521482 setosa
## 33 -0.77691058 2.39217095 -1.27910398 -1.4422448248 setosa
## 38 -1.13920048 1.24503015 -1.33575163 -1.4422448248 setosa
## 40 -0.89767388 0.78617383 -1.27910398 -1.3110521482 setosa
## 43 -1.74301699 0.32731751 -1.39239929 -1.3110521482 setosa
## 44 -1.01843718 1.01560199 -1.22245633 -0.7862814418 setosa
## 47 -0.89767388 1.70388647 -1.22245633 -1.3110521482 setosa
## 48 -1.50149039 0.32731751 -1.33575163 -1.3110521482 setosa
## 50 -1.01843718 0.55674567 -1.33575163 -1.3110521482 setosa
## 52 0.67224905 0.32731751 0.42032558 0.3944526477 versicolor
## 54 -0.41462067 -1.73753594 0.13708732 0.1320672944 versicolor
## 57 0.55148575 0.55674567 0.53362088 0.5256453243 versicolor
## 62 0.06843254 -0.13153881 0.25038262 0.3944526477 versicolor
## 63 0.18919584 -1.96696410 0.13708732 -0.2615107354 versicolor
## 65 -0.29385737 -0.36096697 -0.08950329 0.1320672944 versicolor
## 70 -0.29385737 -1.27867961 0.08043967 -0.1303180588 versicolor
## 73 0.55148575 -1.27867961 0.64691619 0.3944526477 versicolor
## 74 0.30995914 -0.59039513 0.53362088 0.0008746178 versicolor
## 76 0.91377565 -0.13153881 0.36367793 0.2632599711 versicolor
## 80 -0.17309407 -1.04925145 -0.14615094 -0.2615107354 versicolor
## 81 -0.41462067 -1.50810778 0.02379201 -0.1303180588 versicolor
## 82 -0.41462067 -1.50810778 -0.03285564 -0.2615107354 versicolor
## 85 -0.53538397 -0.13153881 0.42032558 0.3944526477 versicolor
## 99 -0.89767388 -1.27867961 -0.42938920 -0.1303180588 versicolor
## 101 0.55148575 0.55674567 1.27004036 1.7063794137 virginica
## 103 1.51759216 -0.13153881 1.21339271 1.1816087073 virginica
## 104 0.55148575 -0.36096697 1.04344975 0.7880306775 virginica
## 107 -1.13920048 -1.27867961 0.42032558 0.6568380009 versicolor
## 109 1.03453895 -1.27867961 1.15674505 0.7880306775 virginica
## 111 0.79301235 0.32731751 0.76021149 1.0504160307 virginica
## 112 0.67224905 -0.81982329 0.87350679 0.9192233541 virginica
## 117 0.79301235 -0.13153881 0.98680210 0.7880306775 virginica
## 119 2.24217198 -1.04925145 1.77986923 1.4439940605 virginica
## 126 1.63835547 0.32731751 1.27004036 0.7880306775 virginica
## 128 0.30995914 -0.13153881 0.64691619 0.7880306775 virginica
## 134 0.55148575 -0.59039513 0.76021149 0.3944526477 versicolor
## 137 0.55148575 0.78617383 1.04344975 1.5751867371 virginica
## 139 0.18919584 -0.13153881 0.59026853 0.7880306775 versicolor
## 147 0.55148575 -1.27867961 0.70356384 0.9192233541 virginica