KNN

Glass dataset

Assignment 22

dataset <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\9 KNN\\Assignment\\glass.csv")
str(dataset)
## 'data.frame':    214 obs. of  10 variables:
##  $ RI  : num  1.52 1.52 1.52 1.52 1.52 ...
##  $ Na  : num  13.6 13.9 13.5 13.2 13.3 ...
##  $ Mg  : num  4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
##  $ Al  : num  1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
##  $ Si  : num  71.8 72.7 73 72.6 73.1 ...
##  $ K   : num  0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
##  $ Ca  : num  8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
##  $ Ba  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fe  : num  0 0 0 0 0 0.26 0 0 0 0.11 ...
##  $ Type: int  1 1 1 1 1 1 1 1 1 1 ...
dim(dataset)
## [1] 214  10
table(dataset$Type)
## 
##  1  2  3  5  6  7 
## 70 76 17 13  9 29
round(prop.table(table(dataset$Type))*100, digits = 1)
## 
##    1    2    3    5    6    7 
## 32.7 35.5  7.9  6.1  4.2 13.6
summary(dataset[c("Na","Si","Ca")])
##        Na              Si              Ca        
##  Min.   :10.73   Min.   :69.81   Min.   : 5.430  
##  1st Qu.:12.91   1st Qu.:72.28   1st Qu.: 8.240  
##  Median :13.30   Median :72.79   Median : 8.600  
##  Mean   :13.41   Mean   :72.65   Mean   : 8.957  
##  3rd Qu.:13.82   3rd Qu.:73.09   3rd Qu.: 9.172  
##  Max.   :17.38   Max.   :75.41   Max.   :16.190
normalize_data <- function(x){
  return((x-min(x))/(max(x)-min(x)))
}

dataset_n <- as.data.frame(lapply(dataset[1:9], normalize_data))
summary(dataset_n[c("Na","Si","Ca")])
##        Na               Si               Ca        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3274   1st Qu.:0.4411   1st Qu.:0.2612  
##  Median :0.3865   Median :0.5321   Median :0.2946  
##  Mean   :0.4027   Mean   :0.5073   Mean   :0.3278  
##  3rd Qu.:0.4654   3rd Qu.:0.5853   3rd Qu.:0.3478  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000
dataset_train <- dataset_n[1:128,]
dataset_test <- dataset_n[129:214,]

dataset_train_labels <- dataset[1:128,10]
dataset_test_labels <- dataset[129:214,10]
# EDA part ends here

library(class)

dataset_pred <- knn(train = dataset_train, test = dataset_test, cl = dataset_train_labels, k=1)
# for k =5, got only 6 types : 1, 2, 4, 5, 6, 7

#dataset_pred <- knn(train = dataset_train, test = dataset_test, cl = dataset_train_labels, k=1)
# for k = 2, got all 7 types : 1, 2, 3, 4, 5, 6, 7

library(gmodels)
## Warning: package 'gmodels' was built under R version 3.5.1
CrossTable(dataset_test_labels, dataset_pred)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  86 
## 
##  
##                     | dataset_pred 
## dataset_test_labels |         1 |         2 | Row Total | 
## --------------------|-----------|-----------|-----------|
##                   2 |         6 |        12 |        18 | 
##                     |     0.423 |     0.145 |           | 
##                     |     0.333 |     0.667 |     0.209 | 
##                     |     0.273 |     0.188 |           | 
##                     |     0.070 |     0.140 |           | 
## --------------------|-----------|-----------|-----------|
##                   3 |        10 |         7 |        17 | 
##                     |     7.343 |     2.524 |           | 
##                     |     0.588 |     0.412 |     0.198 | 
##                     |     0.455 |     0.109 |           | 
##                     |     0.116 |     0.081 |           | 
## --------------------|-----------|-----------|-----------|
##                   5 |         0 |        13 |        13 | 
##                     |     3.326 |     1.143 |           | 
##                     |     0.000 |     1.000 |     0.151 | 
##                     |     0.000 |     0.203 |           | 
##                     |     0.000 |     0.151 |           | 
## --------------------|-----------|-----------|-----------|
##                   6 |         4 |         5 |         9 | 
##                     |     1.252 |     0.430 |           | 
##                     |     0.444 |     0.556 |     0.105 | 
##                     |     0.182 |     0.078 |           | 
##                     |     0.047 |     0.058 |           | 
## --------------------|-----------|-----------|-----------|
##                   7 |         2 |        27 |        29 | 
##                     |     3.958 |     1.360 |           | 
##                     |     0.069 |     0.931 |     0.337 | 
##                     |     0.091 |     0.422 |           | 
##                     |     0.023 |     0.314 |           | 
## --------------------|-----------|-----------|-----------|
##        Column Total |        22 |        64 |        86 | 
##                     |     0.256 |     0.744 |           | 
## --------------------|-----------|-----------|-----------|
## 
##