# Read the dataset
Data <- read.csv("D:\\DataScience\\Assignments\\KNN\\glass.csv")
Data$Type <- factor(Data$Type,levels = c("1","2","3","5","6","7"),labels = c("Model-1","Model-1","Model-2","Model-2","Model-2","Model-2"))
#Based on Data type column we can classified into diff models
Data_Model1 <- Data[Data$Type=="Model-1",]
Data_Model2 <- Data[Data$Type=="Model-2",]
#create training and test datasets
Data_train1 <- rbind(Data_Model1[1:106,],Data_Model2[1:48,])
Data_test1 <- rbind(Data_Model1[107:146,],Data_Model2[49:68,])
#Create a function to normalize the data
norm <- function(x){
return((x-min(x))/(max(x)-min(x)))
}
table(Data$Type)
##
## Model-1 Model-2
## 146 68
#Apply the normalization function
Data_train <- as.data.frame(lapply(Data_train1[1:9],norm))
Data_test <- as.data.frame(lapply(Data_test1[1:9],norm))
Data_train <- Data_train[,-10]
Data_test <- Data_test[,-10]
#Get labels for training and test datasets
Data_train_labels <- Data_train1[1:154,10]
Data_test_labels <- Data_test1[1:60,10]
# Build a KNN model on taining dataset
library("class")
## Warning: package 'class' was built under R version 3.5.1
library("caret")
## Warning: package 'caret' was built under R version 3.5.1
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.5.1
## Loading required package: ggplot2
# Building the KNN model on training dataset and also need labels which we are including c1. Once we build the preduction model
# we have to test on test dataset
Data_pred <- knn(train = Data_train,test = Data_test,cl = Data_train_labels,k=16)
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.5.1
CrossTable(Data_test_labels, Data_pred)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 60
##
##
## | Data_pred
## Data_test_labels | Model-1 | Model-2 | Row Total |
## -----------------|-----------|-----------|-----------|
## Model-1 | 31 | 9 | 40 |
## | 5.167 | 5.523 | |
## | 0.775 | 0.225 | 0.667 |
## | 1.000 | 0.310 | |
## | 0.517 | 0.150 | |
## -----------------|-----------|-----------|-----------|
## Model-2 | 0 | 20 | 20 |
## | 10.333 | 11.046 | |
## | 0.000 | 1.000 | 0.333 |
## | 0.000 | 0.690 | |
## | 0.000 | 0.333 | |
## -----------------|-----------|-----------|-----------|
## Column Total | 31 | 29 | 60 |
## | 0.517 | 0.483 | |
## -----------------|-----------|-----------|-----------|
##
##
51/60
## [1] 0.85
#85% Accuracy Model