Read the dataset

Providing the detailed description of the glass set

RI refractive index, Na Sodium, Mg Magnesium,Al Aluminum, Si Silicon,K Potassium

Ca Calcium, Ba Barium, Fe Iron

# install.packages("caret")
# install.packages("pROC")
# install.packages("mlbench")
# install.packages("lattice")
# install.packages("gmodels")
# install.packages("class")

library(caret)
## Warning: package 'caret' was built under R version 3.5.1
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.1
library(pROC)
## Warning: package 'pROC' was built under R version 3.5.1
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(mlbench)
## Warning: package 'mlbench' was built under R version 3.5.1
library(class)
library(lattice)
library(gmodels) 
## Warning: package 'gmodels' was built under R version 3.5.1
## 
## Attaching package: 'gmodels'
## The following object is masked from 'package:pROC':
## 
##     ci
glass <- read.csv(file.choose())
View(glass)
#First colum in dataset is id which is not required so we will be taking out
# wbcd <- wbcd[-1]
# View(wbcd)
#table on different types of glasses 
table(glass$Type)
## 
##  1  2  3  5  6  7 
## 70 76 17 13  9 29
glass$type = as.factor(glass$Type)
str(glass)
## 'data.frame':    214 obs. of  11 variables:
##  $ RI  : num  1.52 1.52 1.52 1.52 1.52 ...
##  $ Na  : num  13.6 13.9 13.5 13.2 13.3 ...
##  $ Mg  : num  4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
##  $ Al  : num  1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
##  $ Si  : num  71.8 72.7 73 72.6 73.1 ...
##  $ K   : num  0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
##  $ Ca  : num  8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
##  $ Ba  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Fe  : num  0 0 0 0 0 0.26 0 0 0 0.11 ...
##  $ Type: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ type: Factor w/ 6 levels "1","2","3","5",..: 1 1 1 1 1 1 1 1 1 1 ...
# Type1- 70, Type2-76,Type3-17,Type5-13,Type6-9,Type7-29


# table or proportation of enteries in the datasets. What % of glass of Type 1 and what % of glass of Type 2
round(prop.table(table(glass$Type))*100,1)
## 
##    1    2    3    5    6    7 
## 32.7 35.5  7.9  6.1  4.2 13.6
summary(glass[c("RI","Na","Mg")])
##        RI              Na              Mg       
##  Min.   :1.511   Min.   :10.73   Min.   :0.000  
##  1st Qu.:1.517   1st Qu.:12.91   1st Qu.:2.115  
##  Median :1.518   Median :13.30   Median :3.480  
##  Mean   :1.518   Mean   :13.41   Mean   :2.685  
##  3rd Qu.:1.519   3rd Qu.:13.82   3rd Qu.:3.600  
##  Max.   :1.534   Max.   :17.38   Max.   :4.490
#Create a function to normalize the data
norm <- function(x){ 
  return((x-min(x))/(max(x)-min(x)))
}
#test normalization
norm(c(1,2,3,4,5))
## [1] 0.00 0.25 0.50 0.75 1.00
norm(c(10,20,30,40,50))
## [1] 0.00 0.25 0.50 0.75 1.00
#Apply the normalization function to glass dataset
glass_n<- as.data.frame(lapply(glass[1:9], norm))
View(glass_n)
summary(glass_n[c("RI","Na","Mg")])
##        RI               Na               Mg        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.2358   1st Qu.:0.3274   1st Qu.:0.4710  
##  Median :0.2867   Median :0.3865   Median :0.7751  
##  Mean   :0.3167   Mean   :0.4027   Mean   :0.5979  
##  3rd Qu.:0.3515   3rd Qu.:0.4654   3rd Qu.:0.8018  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000
# glass_n <- cbind(glass$Type,glass_n[1:9])
View(glass_n)

#create training and test datasets
set.seed(123)
ind <- sample(2, nrow(glass_n), replace = TRUE, prob = c(0.7,0.3))
glass_train <- glass_n[ind==1,]
glass_test <-  glass_n[ind==2,]


#Get labels for training and test datasets
set.seed(123)
ind1 <- sample(2, nrow(glass), replace = TRUE, prob = c(0.7,0.3))
glass_train_labels <- glass[ind1==1,10]
glass_test_labels <-  glass[ind1==2,10]


# Build a KNN model on taining dataset

# Building the KNN model on training dataset and also need labels which we are including c1. Once we build the preduction model
# we have to test on test dataset

glass_test_pred <- knn(train = glass_train, test = glass_test, cl = glass_train_labels, k=3)
table(glass_test_pred,glass_test_labels)
##                glass_test_labels
## glass_test_pred  1  2  3  5  6  7
##               1 18  6  1  0  0  0
##               2  4 12  1  0  0  0
##               3  0  1  0  0  0  0
##               5  0  1  0  3  1  1
##               6  0  0  0  0  2  1
##               7  0  1  0  0  0  4
mean(glass_test_pred==glass_test_labels) #68.42
## [1] 0.6842105
# Evaluating Model Performance ----

# load the gmodel library

CrossTable(x=glass_test_labels,y=glass_test_pred,prop.chisq = FALSE) 
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  57 
## 
##  
##                   | glass_test_pred 
## glass_test_labels |         1 |         2 |         3 |         5 |         6 |         7 | Row Total | 
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                 1 |        18 |         4 |         0 |         0 |         0 |         0 |        22 | 
##                   |     0.818 |     0.182 |     0.000 |     0.000 |     0.000 |     0.000 |     0.386 | 
##                   |     0.720 |     0.235 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
##                   |     0.316 |     0.070 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                 2 |         6 |        12 |         1 |         1 |         0 |         1 |        21 | 
##                   |     0.286 |     0.571 |     0.048 |     0.048 |     0.000 |     0.048 |     0.368 | 
##                   |     0.240 |     0.706 |     1.000 |     0.167 |     0.000 |     0.200 |           | 
##                   |     0.105 |     0.211 |     0.018 |     0.018 |     0.000 |     0.018 |           | 
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                 3 |         1 |         1 |         0 |         0 |         0 |         0 |         2 | 
##                   |     0.500 |     0.500 |     0.000 |     0.000 |     0.000 |     0.000 |     0.035 | 
##                   |     0.040 |     0.059 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
##                   |     0.018 |     0.018 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                 5 |         0 |         0 |         0 |         3 |         0 |         0 |         3 | 
##                   |     0.000 |     0.000 |     0.000 |     1.000 |     0.000 |     0.000 |     0.053 | 
##                   |     0.000 |     0.000 |     0.000 |     0.500 |     0.000 |     0.000 |           | 
##                   |     0.000 |     0.000 |     0.000 |     0.053 |     0.000 |     0.000 |           | 
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                 6 |         0 |         0 |         0 |         1 |         2 |         0 |         3 | 
##                   |     0.000 |     0.000 |     0.000 |     0.333 |     0.667 |     0.000 |     0.053 | 
##                   |     0.000 |     0.000 |     0.000 |     0.167 |     0.667 |     0.000 |           | 
##                   |     0.000 |     0.000 |     0.000 |     0.018 |     0.035 |     0.000 |           | 
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##                 7 |         0 |         0 |         0 |         1 |         1 |         4 |         6 | 
##                   |     0.000 |     0.000 |     0.000 |     0.167 |     0.167 |     0.667 |     0.105 | 
##                   |     0.000 |     0.000 |     0.000 |     0.167 |     0.333 |     0.800 |           | 
##                   |     0.000 |     0.000 |     0.000 |     0.018 |     0.018 |     0.070 |           | 
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##      Column Total |        25 |        17 |         1 |         6 |         3 |         5 |        57 | 
##                   |     0.439 |     0.298 |     0.018 |     0.105 |     0.053 |     0.088 |           | 
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## 
## 
# Accuracy is 65.07 %.