data<-read.csv("D:\\excelR\\Data Sets\\KNN\\glass.csv")
head(data)
## RI Na Mg Al Si K Ca Ba Fe Type
## 1 1.52101 13.64 4.49 1.10 71.78 0.06 8.75 0 0.00 1
## 2 1.51761 13.89 3.60 1.36 72.73 0.48 7.83 0 0.00 1
## 3 1.51618 13.53 3.55 1.54 72.99 0.39 7.78 0 0.00 1
## 4 1.51766 13.21 3.69 1.29 72.61 0.57 8.22 0 0.00 1
## 5 1.51742 13.27 3.62 1.24 73.08 0.55 8.07 0 0.00 1
## 6 1.51596 12.79 3.61 1.62 72.97 0.64 8.07 0 0.26 1
str(data)
## 'data.frame': 214 obs. of 10 variables:
## $ RI : num 1.52 1.52 1.52 1.52 1.52 ...
## $ Na : num 13.6 13.9 13.5 13.2 13.3 ...
## $ Mg : num 4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
## $ Al : num 1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
## $ Si : num 71.8 72.7 73 72.6 73.1 ...
## $ K : num 0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
## $ Ca : num 8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
## $ Ba : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Fe : num 0 0 0 0 0 0.26 0 0 0 0.11 ...
## $ Type: int 1 1 1 1 1 1 1 1 1 1 ...
# custom function for normalization
data_norm<- function(x){
((x-min(x))/max(x)-min(x))
}
# normalizing all colunns but not type column
glass_norm<-as.data.frame(lapply(data[ ,-10], data_norm))
# before normalization
summary(data)
## RI Na Mg Al
## Min. :1.511 Min. :10.73 Min. :0.000 Min. :0.290
## 1st Qu.:1.517 1st Qu.:12.91 1st Qu.:2.115 1st Qu.:1.190
## Median :1.518 Median :13.30 Median :3.480 Median :1.360
## Mean :1.518 Mean :13.41 Mean :2.685 Mean :1.445
## 3rd Qu.:1.519 3rd Qu.:13.82 3rd Qu.:3.600 3rd Qu.:1.630
## Max. :1.534 Max. :17.38 Max. :4.490 Max. :3.500
## Si K Ca Ba
## Min. :69.81 Min. :0.0000 Min. : 5.430 Min. :0.000
## 1st Qu.:72.28 1st Qu.:0.1225 1st Qu.: 8.240 1st Qu.:0.000
## Median :72.79 Median :0.5550 Median : 8.600 Median :0.000
## Mean :72.65 Mean :0.4971 Mean : 8.957 Mean :0.175
## 3rd Qu.:73.09 3rd Qu.:0.6100 3rd Qu.: 9.172 3rd Qu.:0.000
## Max. :75.41 Max. :6.2100 Max. :16.190 Max. :3.150
## Fe Type
## Min. :0.00000 Min. :1.00
## 1st Qu.:0.00000 1st Qu.:1.00
## Median :0.00000 Median :2.00
## Mean :0.05701 Mean :2.78
## 3rd Qu.:0.10000 3rd Qu.:3.00
## Max. :0.51000 Max. :7.00
#after normalizaion
summary(glass_norm)
## RI Na Mg Al
## Min. :-1.511 Min. :-10.73 Min. :0.0000 Min. :-0.29000
## 1st Qu.:-1.508 1st Qu.:-10.60 1st Qu.:0.4710 1st Qu.:-0.03286
## Median :-1.507 Median :-10.58 Median :0.7751 Median : 0.01571
## Mean :-1.506 Mean :-10.58 Mean :0.5979 Mean : 0.03997
## 3rd Qu.:-1.506 3rd Qu.:-10.55 3rd Qu.:0.8018 3rd Qu.: 0.09286
## Max. :-1.496 Max. :-10.35 Max. :1.0000 Max. : 0.62714
## Si K Ca Ba
## Min. :-69.81 Min. :0.00000 Min. :-5.430 Min. :0.00000
## 1st Qu.:-69.78 1st Qu.:0.01973 1st Qu.:-5.256 1st Qu.:0.00000
## Median :-69.77 Median :0.08937 Median :-5.234 Median :0.00000
## Mean :-69.77 Mean :0.08004 Mean :-5.212 Mean :0.05557
## 3rd Qu.:-69.77 3rd Qu.:0.09823 3rd Qu.:-5.199 3rd Qu.:0.00000
## Max. :-69.74 Max. :1.00000 Max. :-4.765 Max. :1.00000
## Fe
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.1118
## 3rd Qu.:0.1961
## Max. :1.0000
# appending normalized data column "type"
glass<-append(glass_norm,data[10])
View(glass)
model_data<- as.data.frame(glass)
View(model_data)
attach(model_data)
#spliting into training and testing data
split_data<-sample(2,nrow(model_data),prob = c(0.7,0.3), replace = TRUE)
# training data
train_glass<-model_data[split_data ==1,]
nrow(train_glass)
## [1] 157
View(train_glass)
#testing data
test_glass<-model_data[split_data==2,]
nrow(test_glass)
## [1] 57
library(class)
# knn model k=20
knn_model<-knn(train_glass, test_glass, cl= data[split_data==1,10], k=20)
confusion<-table(knn_model,data[split_data==2,10])
confusion
##
## knn_model 1 2 3 5 6 7
## 1 24 0 0 0 0 0
## 2 0 18 0 0 0 0
## 3 0 0 5 0 0 0
## 5 0 0 0 3 0 0
## 6 0 0 0 0 0 0
## 7 0 0 0 0 2 5
accuracy<- sum(diag(confusion))/sum(confusion)
accuracy # 0.94234
## [1] 0.9649123