dataset <- read.csv("D:\\DataScience\\Assignments\\KNN\\Zoo.csv")
dataset <- dataset[-1]
dataset$type <- factor(dataset$type,levels = c("1","2","3","4","5","6","7"),labels = c("categori-1","categori-2","categori-3","categori-4","categori-5","categori-6","categori-7"))
table(dataset$type)
##
## categori-1 categori-2 categori-3 categori-4 categori-5 categori-6
## 41 20 5 13 4 8
## categori-7
## 10
round(prop.table(table(dataset$type))*100, digits = 1)
##
## categori-1 categori-2 categori-3 categori-4 categori-5 categori-6
## 40.6 19.8 5.0 12.9 4.0 7.9
## categori-7
## 9.9
summary(dataset[c("feathers","aquatic","legs")])
## feathers aquatic legs
## Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:2.000
## Median :0.000 Median :0.0000 Median :4.000
## Mean :0.198 Mean :0.3564 Mean :2.842
## 3rd Qu.:0.000 3rd Qu.:1.0000 3rd Qu.:4.000
## Max. :1.000 Max. :1.0000 Max. :8.000
normalize_data <- function(x){
return((x-min(x))/(max(x)-min(x)))}
dataset_n <- as.data.frame(lapply(dataset[1:16], normalize_data))
summary(dataset_n[c("feathers","aquatic","legs")])
## feathers aquatic legs
## Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.2500
## Median :0.000 Median :0.0000 Median :0.5000
## Mean :0.198 Mean :0.3564 Mean :0.3552
## 3rd Qu.:0.000 3rd Qu.:1.0000 3rd Qu.:0.5000
## Max. :1.000 Max. :1.0000 Max. :1.0000
dataset_train <- dataset_n[1:80,]
dataset_test <- dataset_n[81:101,]
dataset_train_labels <- dataset[1:80,17]
dataset_test_labels <- dataset[81:101,17]
library(class)
## Warning: package 'class' was built under R version 3.5.1
dataset_pred <- knn(train = dataset_train, test = dataset_test, cl = dataset_train_labels, k=2)
dataset_pred
## [1] categori-5 categori-6 categori-4 categori-2 categori-1 categori-7
## [7] categori-4 categori-2 categori-6 categori-5 categori-2 categori-3
## [13] categori-4 categori-1 categori-1 categori-2 categori-1 categori-6
## [19] categori-1 categori-6 categori-2
## 7 Levels: categori-1 categori-2 categori-3 categori-4 ... categori-7
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.5.1
CrossTable(dataset_test_labels, dataset_pred)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 21
##
##
## | dataset_pred
## dataset_test_labels | categori-1 | categori-2 | categori-3 | categori-4 | categori-5 | categori-6 | categori-7 | Row Total |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|
## categori-1 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 5 |
## | 12.190 | 1.190 | 0.238 | 0.714 | 0.476 | 0.952 | 0.238 | |
## | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.238 |
## | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | |
## | 0.238 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|
## categori-2 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 4 |
## | 0.952 | 9.752 | 0.190 | 0.571 | 0.381 | 0.762 | 0.190 | |
## | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.190 |
## | 0.000 | 0.800 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | |
## | 0.000 | 0.190 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|
## categori-3 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 3 |
## | 0.714 | 0.114 | 5.143 | 0.429 | 1.786 | 0.571 | 0.143 | |
## | 0.000 | 0.333 | 0.333 | 0.000 | 0.333 | 0.000 | 0.000 | 0.143 |
## | 0.000 | 0.200 | 1.000 | 0.000 | 0.500 | 0.000 | 0.000 | |
## | 0.000 | 0.048 | 0.048 | 0.000 | 0.048 | 0.000 | 0.000 | |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|
## categori-4 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 3 |
## | 0.714 | 0.714 | 0.143 | 15.429 | 0.286 | 0.571 | 0.143 | |
## | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | 0.143 |
## | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.000 | |
## | 0.000 | 0.000 | 0.000 | 0.143 | 0.000 | 0.000 | 0.000 | |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|
## categori-5 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
## | 0.238 | 0.238 | 0.048 | 0.143 | 8.595 | 0.190 | 0.048 | |
## | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.048 |
## | 0.000 | 0.000 | 0.000 | 0.000 | 0.500 | 0.000 | 0.000 | |
## | 0.000 | 0.000 | 0.000 | 0.000 | 0.048 | 0.000 | 0.000 | |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|
## categori-6 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 2 |
## | 0.476 | 0.476 | 0.095 | 0.286 | 0.190 | 6.881 | 0.095 | |
## | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.095 |
## | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.500 | 0.000 | |
## | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.095 | 0.000 | |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|
## categori-7 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 3 |
## | 0.714 | 0.714 | 0.143 | 0.429 | 0.286 | 3.571 | 5.143 | |
## | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.667 | 0.333 | 0.143 |
## | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.500 | 1.000 | |
## | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.095 | 0.048 | |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|
## Column Total | 5 | 5 | 1 | 3 | 2 | 4 | 1 | 21 |
## | 0.238 | 0.238 | 0.048 | 0.143 | 0.095 | 0.190 | 0.048 | |
## --------------------|------------|------------|------------|------------|------------|------------|------------|------------|
##
##
18/21
## [1] 0.8571429
#85% Accuracy Model