Introduction

This is a famous dataset that has measurements (in centimeters) of the sepal and petal for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica. It has four variable and hundred fifty rows.

Libraries used

library(tidyverse)
library(GGally) # for GGPairs 
library(gridExtra)  # for grid arrange 
library(class) #for KNN 
library(gmodels) #for Cross Table 

Exploratory Data Analysis

About the Dataset

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
#summary 
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
#plot 
ggpairs(iris)

Plot of Sepal Length - Width and Petal Length- Width

g1 <- ggplot(iris , aes(x= Sepal.Length, y=Sepal.Width, col= Species))+
  geom_point()

g2 <- ggplot(iris , aes(x= Petal.Length, y=Petal.Width, col= Species))+
  geom_point()

grid.arrange(g1, g2)

Analysis

Step I: Scaling the data.

final_iris <- iris 
final_iris[,1:4] <- scale(final_iris[,1:4])

Step II: SPlitting the data in training and testing data

set.seed(1234)
sample_index <- sample(nrow(final_iris), nrow(final_iris)*0.80)
iris_train <- final_iris[sample_index,]
iris_test <- final_iris[-sample_index,]

Step III: Building the model for k = 10,15,20

iris_knn_test_1 <- knn(train = iris_train[,1:4], test = iris_test[,1:4], cl= as.factor(iris_train$Species), k=10)

iris_knn_test_2 <- knn(train = iris_train[,1:4], test = iris_test[,1:4], cl= as.factor(iris_train$Species), k=15)

iris_knn_test_3 <- knn(train = iris_train[,1:4], test = iris_test[,1:4], cl= as.factor(iris_train$Species), k=20)

Step IV: Checking the results

test_1 <- CrossTable(x= iris_test$Species, y= iris_knn_test_1, prop.chisq =  FALSE )
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  30 
## 
##  
##                   | iris_knn_test_1 
## iris_test$Species |     setosa | versicolor |  virginica |  Row Total | 
## ------------------|------------|------------|------------|------------|
##            setosa |         10 |          0 |          0 |         10 | 
##                   |      1.000 |      0.000 |      0.000 |      0.333 | 
##                   |      1.000 |      0.000 |      0.000 |            | 
##                   |      0.333 |      0.000 |      0.000 |            | 
## ------------------|------------|------------|------------|------------|
##        versicolor |          0 |          8 |          2 |         10 | 
##                   |      0.000 |      0.800 |      0.200 |      0.333 | 
##                   |      0.000 |      0.889 |      0.182 |            | 
##                   |      0.000 |      0.267 |      0.067 |            | 
## ------------------|------------|------------|------------|------------|
##         virginica |          0 |          1 |          9 |         10 | 
##                   |      0.000 |      0.100 |      0.900 |      0.333 | 
##                   |      0.000 |      0.111 |      0.818 |            | 
##                   |      0.000 |      0.033 |      0.300 |            | 
## ------------------|------------|------------|------------|------------|
##      Column Total |         10 |          9 |         11 |         30 | 
##                   |      0.333 |      0.300 |      0.367 |            | 
## ------------------|------------|------------|------------|------------|
## 
## 
test_2 <- CrossTable(x= iris_test$Species, y= iris_knn_test_2, prop.chisq =  FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  30 
## 
##  
##                   | iris_knn_test_2 
## iris_test$Species |     setosa | versicolor |  virginica |  Row Total | 
## ------------------|------------|------------|------------|------------|
##            setosa |         10 |          0 |          0 |         10 | 
##                   |      1.000 |      0.000 |      0.000 |      0.333 | 
##                   |      1.000 |      0.000 |      0.000 |            | 
##                   |      0.333 |      0.000 |      0.000 |            | 
## ------------------|------------|------------|------------|------------|
##        versicolor |          0 |          9 |          1 |         10 | 
##                   |      0.000 |      0.900 |      0.100 |      0.333 | 
##                   |      0.000 |      1.000 |      0.091 |            | 
##                   |      0.000 |      0.300 |      0.033 |            | 
## ------------------|------------|------------|------------|------------|
##         virginica |          0 |          0 |         10 |         10 | 
##                   |      0.000 |      0.000 |      1.000 |      0.333 | 
##                   |      0.000 |      0.000 |      0.909 |            | 
##                   |      0.000 |      0.000 |      0.333 |            | 
## ------------------|------------|------------|------------|------------|
##      Column Total |         10 |          9 |         11 |         30 | 
##                   |      0.333 |      0.300 |      0.367 |            | 
## ------------------|------------|------------|------------|------------|
## 
## 
test_3 <- CrossTable(x= iris_test$Species, y= iris_knn_test_3, prop.chisq =  FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  30 
## 
##  
##                   | iris_knn_test_3 
## iris_test$Species |     setosa | versicolor |  virginica |  Row Total | 
## ------------------|------------|------------|------------|------------|
##            setosa |         10 |          0 |          0 |         10 | 
##                   |      1.000 |      0.000 |      0.000 |      0.333 | 
##                   |      1.000 |      0.000 |      0.000 |            | 
##                   |      0.333 |      0.000 |      0.000 |            | 
## ------------------|------------|------------|------------|------------|
##        versicolor |          0 |          9 |          1 |         10 | 
##                   |      0.000 |      0.900 |      0.100 |      0.333 | 
##                   |      0.000 |      0.900 |      0.100 |            | 
##                   |      0.000 |      0.300 |      0.033 |            | 
## ------------------|------------|------------|------------|------------|
##         virginica |          0 |          1 |          9 |         10 | 
##                   |      0.000 |      0.100 |      0.900 |      0.333 | 
##                   |      0.000 |      0.100 |      0.900 |            | 
##                   |      0.000 |      0.033 |      0.300 |            | 
## ------------------|------------|------------|------------|------------|
##      Column Total |         10 |         10 |         10 |         30 | 
##                   |      0.333 |      0.333 |      0.333 |            | 
## ------------------|------------|------------|------------|------------|
## 
##