IMPORT DATA , SCALE AND PARTITION INTO TRAIN AND TEST DATA

setwd("/Users/anand/RProjects/KNN")
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
index <- createDataPartition(iris$Species, p = 0.7,list = FALSE)
iris_ <- subset(iris, select = c(-Species))
iris_1 <- scale(iris_)
head(iris_1)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1   -0.8976739  1.01560199    -1.335752   -1.311052
## 2   -1.1392005 -0.13153881    -1.335752   -1.311052
## 3   -1.3807271  0.32731751    -1.392399   -1.311052
## 4   -1.5014904  0.09788935    -1.279104   -1.311052
## 5   -1.0184372  1.24503015    -1.335752   -1.311052
## 6   -0.5353840  1.93331463    -1.165809   -1.048667
traindata <- iris_1[index,]
testdata <- iris_1[-index,]

CREATE CLASSIFICATION VECTORS

Ytrain <- iris$Species[index]
Ytest <- iris$Species[-index]

LOAD LIBRARY

CREATE MODEL

knnmodel <- knn(traindata, testdata, k=round(sqrt(nrow(traindata)),0)-1, cl=Ytrain)
summary(knnmodel)
##     setosa versicolor  virginica 
##         15         18         12

GENERATE CONFUSION MATRIX AND CHECK SENSITIVITY AND SPECIFICTY

cMatrix <- table(knnmodel, Ytest) ;cMatrix
##             Ytest
## knnmodel     setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         15         3
##   virginica       0          0        12
confusionMatrix(cMatrix)
## Confusion Matrix and Statistics
## 
##             Ytest
## knnmodel     setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         15         3
##   virginica       0          0        12
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9333         
##                  95% CI : (0.8173, 0.986)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9            
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

FORMULATE PREDICTED VECTOR

class <- sapply(knnmodel,as.factor); class
##  [1] setosa     setosa     setosa     setosa     setosa     setosa    
##  [7] setosa     setosa     setosa     setosa     setosa     setosa    
## [13] setosa     setosa     setosa     versicolor versicolor versicolor
## [19] versicolor versicolor versicolor versicolor versicolor versicolor
## [25] versicolor versicolor versicolor versicolor versicolor versicolor
## [31] virginica  virginica  virginica  versicolor virginica  virginica 
## [37] virginica  virginica  virginica  virginica  virginica  versicolor
## [43] virginica  versicolor virginica 
## Levels: setosa versicolor virginica
str(class)
##  Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
typeof(class)
## [1] "integer"

CREATE A NEW DATA FRAME WITH CLASSIFICATION VECTOR APPENDED TO TESTDATA

newdF <- data.frame(testdata,class)
newdF
##     Sepal.Length Sepal.Width Petal.Length   Petal.Width      class
## 2    -1.13920048 -0.13153881  -1.33575163 -1.3110521482     setosa
## 11   -0.53538397  1.47445831  -1.27910398 -1.3110521482     setosa
## 13   -1.25996379 -0.13153881  -1.33575163 -1.4422448248     setosa
## 14   -1.86378030 -0.13153881  -1.50569459 -1.4422448248     setosa
## 20   -0.89767388  1.70388647  -1.27910398 -1.1798594716     setosa
## 29   -0.77691058  0.78617383  -1.33575163 -1.3110521482     setosa
## 31   -1.25996379  0.09788935  -1.22245633 -1.3110521482     setosa
## 33   -0.77691058  2.39217095  -1.27910398 -1.4422448248     setosa
## 38   -1.13920048  1.24503015  -1.33575163 -1.4422448248     setosa
## 40   -0.89767388  0.78617383  -1.27910398 -1.3110521482     setosa
## 43   -1.74301699  0.32731751  -1.39239929 -1.3110521482     setosa
## 44   -1.01843718  1.01560199  -1.22245633 -0.7862814418     setosa
## 47   -0.89767388  1.70388647  -1.22245633 -1.3110521482     setosa
## 48   -1.50149039  0.32731751  -1.33575163 -1.3110521482     setosa
## 50   -1.01843718  0.55674567  -1.33575163 -1.3110521482     setosa
## 52    0.67224905  0.32731751   0.42032558  0.3944526477 versicolor
## 54   -0.41462067 -1.73753594   0.13708732  0.1320672944 versicolor
## 57    0.55148575  0.55674567   0.53362088  0.5256453243 versicolor
## 62    0.06843254 -0.13153881   0.25038262  0.3944526477 versicolor
## 63    0.18919584 -1.96696410   0.13708732 -0.2615107354 versicolor
## 65   -0.29385737 -0.36096697  -0.08950329  0.1320672944 versicolor
## 70   -0.29385737 -1.27867961   0.08043967 -0.1303180588 versicolor
## 73    0.55148575 -1.27867961   0.64691619  0.3944526477 versicolor
## 74    0.30995914 -0.59039513   0.53362088  0.0008746178 versicolor
## 76    0.91377565 -0.13153881   0.36367793  0.2632599711 versicolor
## 80   -0.17309407 -1.04925145  -0.14615094 -0.2615107354 versicolor
## 81   -0.41462067 -1.50810778   0.02379201 -0.1303180588 versicolor
## 82   -0.41462067 -1.50810778  -0.03285564 -0.2615107354 versicolor
## 85   -0.53538397 -0.13153881   0.42032558  0.3944526477 versicolor
## 99   -0.89767388 -1.27867961  -0.42938920 -0.1303180588 versicolor
## 101   0.55148575  0.55674567   1.27004036  1.7063794137  virginica
## 103   1.51759216 -0.13153881   1.21339271  1.1816087073  virginica
## 104   0.55148575 -0.36096697   1.04344975  0.7880306775  virginica
## 107  -1.13920048 -1.27867961   0.42032558  0.6568380009 versicolor
## 109   1.03453895 -1.27867961   1.15674505  0.7880306775  virginica
## 111   0.79301235  0.32731751   0.76021149  1.0504160307  virginica
## 112   0.67224905 -0.81982329   0.87350679  0.9192233541  virginica
## 117   0.79301235 -0.13153881   0.98680210  0.7880306775  virginica
## 119   2.24217198 -1.04925145   1.77986923  1.4439940605  virginica
## 126   1.63835547  0.32731751   1.27004036  0.7880306775  virginica
## 128   0.30995914 -0.13153881   0.64691619  0.7880306775  virginica
## 134   0.55148575 -0.59039513   0.76021149  0.3944526477 versicolor
## 137   0.55148575  0.78617383   1.04344975  1.5751867371  virginica
## 139   0.18919584 -0.13153881   0.59026853  0.7880306775 versicolor
## 147   0.55148575 -1.27867961   0.70356384  0.9192233541  virginica