KNN

Zoo Animals

Assignment 8

dataset <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\11 KNN\\Assignment\\Zoo.csv")
str(dataset)
## 'data.frame':    101 obs. of  18 variables:
##  $ animal.name: Factor w/ 100 levels "aardvark","antelope",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ hair       : int  1 1 0 1 1 1 1 0 0 1 ...
##  $ feathers   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ eggs       : int  0 0 1 0 0 0 0 1 1 0 ...
##  $ milk       : int  1 1 0 1 1 1 1 0 0 1 ...
##  $ airborne   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ aquatic    : int  0 0 1 0 0 0 0 1 1 0 ...
##  $ predator   : int  1 0 1 1 1 0 0 0 1 0 ...
##  $ toothed    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ backbone   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ breathes   : int  1 1 0 1 1 1 1 0 0 1 ...
##  $ venomous   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ fins       : int  0 0 1 0 0 0 0 1 1 0 ...
##  $ legs       : int  4 4 0 4 4 4 4 0 0 4 ...
##  $ tail       : int  0 1 1 0 1 1 1 1 1 0 ...
##  $ domestic   : int  0 0 0 0 0 0 1 1 0 1 ...
##  $ catsize    : int  1 1 0 1 1 1 1 0 0 0 ...
##  $ type       : int  1 1 4 1 1 1 1 4 4 1 ...
dim(dataset)
## [1] 101  18
dataset <- dataset[-1]
dim(dataset)
## [1] 101  17
table(dataset$type)
## 
##  1  2  3  4  5  6  7 
## 41 20  5 13  4  8 10
round(prop.table(table(dataset$type))*100, digits = 1)
## 
##    1    2    3    4    5    6    7 
## 40.6 19.8  5.0 12.9  4.0  7.9  9.9
summary(dataset[c("feathers","aquatic","legs")])
##     feathers        aquatic            legs      
##  Min.   :0.000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:2.000  
##  Median :0.000   Median :0.0000   Median :4.000  
##  Mean   :0.198   Mean   :0.3564   Mean   :2.842  
##  3rd Qu.:0.000   3rd Qu.:1.0000   3rd Qu.:4.000  
##  Max.   :1.000   Max.   :1.0000   Max.   :8.000
normalize_data <- function(x){
  return((x-min(x))/(max(x)-min(x)))
}

dataset_n <- as.data.frame(lapply(dataset[1:16], normalize_data))
summary(dataset_n[c("feathers","aquatic","legs")])
##     feathers        aquatic            legs       
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.2500  
##  Median :0.000   Median :0.0000   Median :0.5000  
##  Mean   :0.198   Mean   :0.3564   Mean   :0.3552  
##  3rd Qu.:0.000   3rd Qu.:1.0000   3rd Qu.:0.5000  
##  Max.   :1.000   Max.   :1.0000   Max.   :1.0000
dataset_train <- dataset_n[1:80,]
dataset_test <- dataset_n[81:101,]

dataset_train_labels <- dataset[1:80,17]

dataset_test_labels <- dataset[81:101,17]
# EDA part ends here

library(class)

dataset_pred <- knn(train = dataset_train, test = dataset_test, cl = dataset_train_labels, k=5)
# for k =5, got only 6 types : 1, 2, 4, 5, 6, 7

dataset_pred <- knn(train = dataset_train, test = dataset_test, cl = dataset_train_labels, k=2)
# for k = 2, got all 7 types : 1, 2, 3, 4, 5, 6, 7

library(gmodels)
## Warning: package 'gmodels' was built under R version 3.5.1
CrossTable(dataset_test_labels, dataset_pred)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  21 
## 
##  
##                     | dataset_pred 
## dataset_test_labels |         1 |         2 |         3 |         4 |         5 |         6 |         7 | Row Total | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                   1 |         5 |         0 |         0 |         0 |         0 |         0 |         0 |         5 | 
##                     |    12.190 |     1.190 |     0.238 |     0.714 |     0.476 |     0.952 |     0.238 |           | 
##                     |     1.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.238 | 
##                     |     1.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
##                     |     0.238 |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                   2 |         0 |         4 |         0 |         0 |         0 |         0 |         0 |         4 | 
##                     |     0.952 |     9.752 |     0.190 |     0.571 |     0.381 |     0.762 |     0.190 |           | 
##                     |     0.000 |     1.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.190 | 
##                     |     0.000 |     0.800 |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
##                     |     0.000 |     0.190 |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                   3 |         0 |         1 |         1 |         0 |         1 |         0 |         0 |         3 | 
##                     |     0.714 |     0.114 |     5.143 |     0.429 |     1.786 |     0.571 |     0.143 |           | 
##                     |     0.000 |     0.333 |     0.333 |     0.000 |     0.333 |     0.000 |     0.000 |     0.143 | 
##                     |     0.000 |     0.200 |     1.000 |     0.000 |     0.500 |     0.000 |     0.000 |           | 
##                     |     0.000 |     0.048 |     0.048 |     0.000 |     0.048 |     0.000 |     0.000 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                   4 |         0 |         0 |         0 |         3 |         0 |         0 |         0 |         3 | 
##                     |     0.714 |     0.714 |     0.143 |    15.429 |     0.286 |     0.571 |     0.143 |           | 
##                     |     0.000 |     0.000 |     0.000 |     1.000 |     0.000 |     0.000 |     0.000 |     0.143 | 
##                     |     0.000 |     0.000 |     0.000 |     1.000 |     0.000 |     0.000 |     0.000 |           | 
##                     |     0.000 |     0.000 |     0.000 |     0.143 |     0.000 |     0.000 |     0.000 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                   5 |         0 |         0 |         0 |         0 |         1 |         0 |         0 |         1 | 
##                     |     0.238 |     0.238 |     0.048 |     0.143 |     8.595 |     0.190 |     0.048 |           | 
##                     |     0.000 |     0.000 |     0.000 |     0.000 |     1.000 |     0.000 |     0.000 |     0.048 | 
##                     |     0.000 |     0.000 |     0.000 |     0.000 |     0.500 |     0.000 |     0.000 |           | 
##                     |     0.000 |     0.000 |     0.000 |     0.000 |     0.048 |     0.000 |     0.000 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                   6 |         0 |         0 |         0 |         0 |         0 |         2 |         0 |         2 | 
##                     |     0.476 |     0.476 |     0.095 |     0.286 |     0.190 |     6.881 |     0.095 |           | 
##                     |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |     1.000 |     0.000 |     0.095 | 
##                     |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.500 |     0.000 |           | 
##                     |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.095 |     0.000 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                   7 |         0 |         0 |         0 |         0 |         0 |         2 |         1 |         3 | 
##                     |     0.714 |     0.714 |     0.143 |     0.429 |     0.286 |     3.571 |     5.143 |           | 
##                     |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.667 |     0.333 |     0.143 | 
##                     |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.500 |     1.000 |           | 
##                     |     0.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.095 |     0.048 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##        Column Total |         5 |         5 |         1 |         3 |         2 |         4 |         1 |        21 | 
##                     |     0.238 |     0.238 |     0.048 |     0.143 |     0.095 |     0.190 |     0.048 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## 
## 
# Out of 21 Records, Model got 17 Correct Predictions and 4 Wrong Prediction
# Model is 81% Accurate