data<-read.csv("D:\\excelR\\Data Sets\\KNN\\glass.csv")
head(data)
##        RI    Na   Mg   Al    Si    K   Ca Ba   Fe Type
## 1 1.52101 13.64 4.49 1.10 71.78 0.06 8.75  0 0.00    1
## 2 1.51761 13.89 3.60 1.36 72.73 0.48 7.83  0 0.00    1
## 3 1.51618 13.53 3.55 1.54 72.99 0.39 7.78  0 0.00    1
## 4 1.51766 13.21 3.69 1.29 72.61 0.57 8.22  0 0.00    1
## 5 1.51742 13.27 3.62 1.24 73.08 0.55 8.07  0 0.00    1
## 6 1.51596 12.79 3.61 1.62 72.97 0.64 8.07  0 0.26    1
str(data)
## 'data.frame':    214 obs. of  10 variables:
##  $ RI  : num  1.52 1.52 1.52 1.52 1.52 ...
##  $ Na  : num  13.6 13.9 13.5 13.2 13.3 ...
##  $ Mg  : num  4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
##  $ Al  : num  1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
##  $ Si  : num  71.8 72.7 73 72.6 73.1 ...
##  $ K   : num  0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
##  $ Ca  : num  8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
##  $ Ba  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fe  : num  0 0 0 0 0 0.26 0 0 0 0.11 ...
##  $ Type: int  1 1 1 1 1 1 1 1 1 1 ...
# custom function for normalization
data_norm<- function(x){
  ((x-min(x))/max(x)-min(x))
}

# normalizing all colunns but not type column
glass_norm<-as.data.frame(lapply(data[ ,-10], data_norm))

# before normalization
summary(data)
##        RI              Na              Mg              Al       
##  Min.   :1.511   Min.   :10.73   Min.   :0.000   Min.   :0.290  
##  1st Qu.:1.517   1st Qu.:12.91   1st Qu.:2.115   1st Qu.:1.190  
##  Median :1.518   Median :13.30   Median :3.480   Median :1.360  
##  Mean   :1.518   Mean   :13.41   Mean   :2.685   Mean   :1.445  
##  3rd Qu.:1.519   3rd Qu.:13.82   3rd Qu.:3.600   3rd Qu.:1.630  
##  Max.   :1.534   Max.   :17.38   Max.   :4.490   Max.   :3.500  
##        Si              K                Ca               Ba       
##  Min.   :69.81   Min.   :0.0000   Min.   : 5.430   Min.   :0.000  
##  1st Qu.:72.28   1st Qu.:0.1225   1st Qu.: 8.240   1st Qu.:0.000  
##  Median :72.79   Median :0.5550   Median : 8.600   Median :0.000  
##  Mean   :72.65   Mean   :0.4971   Mean   : 8.957   Mean   :0.175  
##  3rd Qu.:73.09   3rd Qu.:0.6100   3rd Qu.: 9.172   3rd Qu.:0.000  
##  Max.   :75.41   Max.   :6.2100   Max.   :16.190   Max.   :3.150  
##        Fe               Type     
##  Min.   :0.00000   Min.   :1.00  
##  1st Qu.:0.00000   1st Qu.:1.00  
##  Median :0.00000   Median :2.00  
##  Mean   :0.05701   Mean   :2.78  
##  3rd Qu.:0.10000   3rd Qu.:3.00  
##  Max.   :0.51000   Max.   :7.00
#after normalizaion 
summary(glass_norm)
##        RI               Na               Mg               Al          
##  Min.   :-1.511   Min.   :-10.73   Min.   :0.0000   Min.   :-0.29000  
##  1st Qu.:-1.508   1st Qu.:-10.60   1st Qu.:0.4710   1st Qu.:-0.03286  
##  Median :-1.507   Median :-10.58   Median :0.7751   Median : 0.01571  
##  Mean   :-1.506   Mean   :-10.58   Mean   :0.5979   Mean   : 0.03997  
##  3rd Qu.:-1.506   3rd Qu.:-10.55   3rd Qu.:0.8018   3rd Qu.: 0.09286  
##  Max.   :-1.496   Max.   :-10.35   Max.   :1.0000   Max.   : 0.62714  
##        Si               K                 Ca               Ba         
##  Min.   :-69.81   Min.   :0.00000   Min.   :-5.430   Min.   :0.00000  
##  1st Qu.:-69.78   1st Qu.:0.01973   1st Qu.:-5.256   1st Qu.:0.00000  
##  Median :-69.77   Median :0.08937   Median :-5.234   Median :0.00000  
##  Mean   :-69.77   Mean   :0.08004   Mean   :-5.212   Mean   :0.05557  
##  3rd Qu.:-69.77   3rd Qu.:0.09823   3rd Qu.:-5.199   3rd Qu.:0.00000  
##  Max.   :-69.74   Max.   :1.00000   Max.   :-4.765   Max.   :1.00000  
##        Fe        
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.1118  
##  3rd Qu.:0.1961  
##  Max.   :1.0000
# appending normalized data column "type"
glass<-append(glass_norm,data[10])
View(glass)

model_data<- as.data.frame(glass)
View(model_data)

attach(model_data)

#spliting into training and testing data 
split_data<-sample(2,nrow(model_data),prob = c(0.7,0.3), replace = TRUE)

# training data
train_glass<-model_data[split_data ==1,] 
nrow(train_glass)
## [1] 157
View(train_glass)

#testing data
test_glass<-model_data[split_data==2,]
nrow(test_glass)
## [1] 57
library(class)

# knn model k=20
knn_model<-knn(train_glass, test_glass, cl= data[split_data==1,10], k=20)
confusion<-table(knn_model,data[split_data==2,10])
confusion
##          
## knn_model  1  2  3  5  6  7
##         1 24  0  0  0  0  0
##         2  0 18  0  0  0  0
##         3  0  0  5  0  0  0
##         5  0  0  0  3  0  0
##         6  0  0  0  0  0  0
##         7  0  0  0  0  2  5
accuracy<- sum(diag(confusion))/sum(confusion)
accuracy # 0.94234
## [1] 0.9649123