Data <- read.csv("D:\\DataScience\\Assignments\\KNN\\Zoo.csv")
View(Data)
table(Data$domestic)
##
## 0 1
## 88 13
# Replace 0 with Wild animal/Game animal and 1 with Domestic Animal.
Data$domestic <- factor(Data$domestic, levels = c(0,1), labels = c("Wild Animal/Game Animal","Domestic Animal"))
# table or proportation of enteries in the datasets. What % of entry is Wild/Game animal and % of entry is Domestic Anima
round(prop.table(table(Data$domestic))*100,1)
##
## Wild Animal/Game Animal Domestic Animal
## 87.1 12.9
Data1 <- Data[,-c(1,16)]
norm <- function(x){return((x-min(x))/(max(x)-min(x)))}
Data_n <-as.data.frame(lapply(Data1[1:16],norm))
View(Data_n)
#create training and test datasets
Data_Train <- Data_n[1:81,]
Data_Test <- Data_n[82:101,]
Data_Train_Lables <- Data[1:81,16]
Data_Test_Lables <- Data[82:101,16]
# Build a KNN model on taining dataset
library("class")
library("caret")
## Warning: package 'caret' was built under R version 3.5.1
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.5.1
## Loading required package: ggplot2
# Building the KNN model on training dataset and also need labels which we are including c1. Once we build the preduction model
# we have to test on test dataset
Data_pred <- knn(train = Data_Train, test = Data_Test, cl = Data_Train_Lables,k=5)
class(Data_Train)
## [1] "data.frame"
class(Data_Test)
## [1] "data.frame"
x <- as.data.frame(Data_pred)
x
## Data_pred
## 1 Wild Animal/Game Animal
## 2 Wild Animal/Game Animal
## 3 Domestic Animal
## 4 Wild Animal/Game Animal
## 5 Wild Animal/Game Animal
## 6 Wild Animal/Game Animal
## 7 Wild Animal/Game Animal
## 8 Wild Animal/Game Animal
## 9 Wild Animal/Game Animal
## 10 Wild Animal/Game Animal
## 11 Wild Animal/Game Animal
## 12 Wild Animal/Game Animal
## 13 Wild Animal/Game Animal
## 14 Wild Animal/Game Animal
## 15 Wild Animal/Game Animal
## 16 Wild Animal/Game Animal
## 17 Wild Animal/Game Animal
## 18 Wild Animal/Game Animal
## 19 Wild Animal/Game Animal
## 20 Domestic Animal
x <- cbind(x,Data_Test_Lables)
x
## Data_pred Data_Test_Lables
## 1 Wild Animal/Game Animal Wild Animal/Game Animal
## 2 Wild Animal/Game Animal Wild Animal/Game Animal
## 3 Domestic Animal Wild Animal/Game Animal
## 4 Wild Animal/Game Animal Wild Animal/Game Animal
## 5 Wild Animal/Game Animal Wild Animal/Game Animal
## 6 Wild Animal/Game Animal Wild Animal/Game Animal
## 7 Wild Animal/Game Animal Wild Animal/Game Animal
## 8 Wild Animal/Game Animal Wild Animal/Game Animal
## 9 Wild Animal/Game Animal Wild Animal/Game Animal
## 10 Wild Animal/Game Animal Wild Animal/Game Animal
## 11 Wild Animal/Game Animal Wild Animal/Game Animal
## 12 Wild Animal/Game Animal Wild Animal/Game Animal
## 13 Wild Animal/Game Animal Wild Animal/Game Animal
## 14 Wild Animal/Game Animal Wild Animal/Game Animal
## 15 Wild Animal/Game Animal Wild Animal/Game Animal
## 16 Wild Animal/Game Animal Wild Animal/Game Animal
## 17 Wild Animal/Game Animal Wild Animal/Game Animal
## 18 Wild Animal/Game Animal Wild Animal/Game Animal
## 19 Wild Animal/Game Animal Wild Animal/Game Animal
## 20 Domestic Animal Wild Animal/Game Animal
## Now evualuation the model performance
#install.packages("gmodels")
library("gmodels")
## Warning: package 'gmodels' was built under R version 3.5.1
# Create cross table of predicted and actual
CrossTable( x = Data_Test_Lables, y = Data_pred)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 20
##
##
## | Data_pred
## Data_Test_Lables | Wild Animal/Game Animal | Domestic Animal | Row Total |
## ------------------------|-------------------------|-------------------------|-------------------------|
## Wild Animal/Game Animal | 18 | 2 | 20 |
## | 0.900 | 0.100 | |
## ------------------------|-------------------------|-------------------------|-------------------------|
## Column Total | 18 | 2 | 20 |
## ------------------------|-------------------------|-------------------------|-------------------------|
##
##