# Read the dataset

Data <- read.csv("D:\\DataScience\\Assignments\\KNN\\glass.csv")




Data$Type <- factor(Data$Type,levels = c("1","2","3","5","6","7"),labels = c("Model-1","Model-1","Model-2","Model-2","Model-2","Model-2"))
#Based on Data type column we can classified into diff models




Data_Model1 <- Data[Data$Type=="Model-1",]
Data_Model2 <- Data[Data$Type=="Model-2",]


#create training and test datasets

Data_train1 <- rbind(Data_Model1[1:106,],Data_Model2[1:48,])
Data_test1 <- rbind(Data_Model1[107:146,],Data_Model2[49:68,])
 

#Create a function to normalize the data
norm <- function(x){ 
  return((x-min(x))/(max(x)-min(x)))
}


table(Data$Type)
## 
## Model-1 Model-2 
##     146      68
#Apply the normalization function 


Data_train <- as.data.frame(lapply(Data_train1[1:9],norm))
Data_test <- as.data.frame(lapply(Data_test1[1:9],norm))

Data_train <- Data_train[,-10]
Data_test <- Data_test[,-10]
#Get labels for training and test datasets

Data_train_labels <- Data_train1[1:154,10]
Data_test_labels <- Data_test1[1:60,10]
# Build a KNN model on taining dataset
library("class")
## Warning: package 'class' was built under R version 3.5.1
library("caret")
## Warning: package 'caret' was built under R version 3.5.1
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.5.1
## Loading required package: ggplot2
# Building the KNN model on training dataset and also need labels which we are including c1. Once we build the preduction model
# we have to test on test dataset

Data_pred <- knn(train = Data_train,test = Data_test,cl = Data_train_labels,k=16)

library(gmodels)
## Warning: package 'gmodels' was built under R version 3.5.1
CrossTable(Data_test_labels, Data_pred)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  60 
## 
##  
##                  | Data_pred 
## Data_test_labels |   Model-1 |   Model-2 | Row Total | 
## -----------------|-----------|-----------|-----------|
##          Model-1 |        31 |         9 |        40 | 
##                  |     5.167 |     5.523 |           | 
##                  |     0.775 |     0.225 |     0.667 | 
##                  |     1.000 |     0.310 |           | 
##                  |     0.517 |     0.150 |           | 
## -----------------|-----------|-----------|-----------|
##          Model-2 |         0 |        20 |        20 | 
##                  |    10.333 |    11.046 |           | 
##                  |     0.000 |     1.000 |     0.333 | 
##                  |     0.000 |     0.690 |           | 
##                  |     0.000 |     0.333 |           | 
## -----------------|-----------|-----------|-----------|
##     Column Total |        31 |        29 |        60 | 
##                  |     0.517 |     0.483 |           | 
## -----------------|-----------|-----------|-----------|
## 
## 
51/60
## [1] 0.85
#85% Accuracy Model