Ca Calcium, Ba Barium, Fe Iron
# install.packages("caret")
# install.packages("pROC")
# install.packages("mlbench")
# install.packages("lattice")
# install.packages("gmodels")
# install.packages("class")
library(caret)
## Warning: package 'caret' was built under R version 3.5.1
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.1
library(pROC)
## Warning: package 'pROC' was built under R version 3.5.1
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(mlbench)
## Warning: package 'mlbench' was built under R version 3.5.1
library(class)
library(lattice)
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.5.1
##
## Attaching package: 'gmodels'
## The following object is masked from 'package:pROC':
##
## ci
glass <- read.csv(file.choose())
View(glass)
#First colum in dataset is id which is not required so we will be taking out
# wbcd <- wbcd[-1]
# View(wbcd)
#table on different types of glasses
table(glass$Type)
##
## 1 2 3 5 6 7
## 70 76 17 13 9 29
glass$type = as.factor(glass$Type)
str(glass)
## 'data.frame': 214 obs. of 11 variables:
## $ RI : num 1.52 1.52 1.52 1.52 1.52 ...
## $ Na : num 13.6 13.9 13.5 13.2 13.3 ...
## $ Mg : num 4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
## $ Al : num 1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
## $ Si : num 71.8 72.7 73 72.6 73.1 ...
## $ K : num 0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
## $ Ca : num 8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
## $ Ba : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Fe : num 0 0 0 0 0 0.26 0 0 0 0.11 ...
## $ Type: int 1 1 1 1 1 1 1 1 1 1 ...
## $ type: Factor w/ 6 levels "1","2","3","5",..: 1 1 1 1 1 1 1 1 1 1 ...
# Type1- 70, Type2-76,Type3-17,Type5-13,Type6-9,Type7-29
# table or proportation of enteries in the datasets. What % of glass of Type 1 and what % of glass of Type 2
round(prop.table(table(glass$Type))*100,1)
##
## 1 2 3 5 6 7
## 32.7 35.5 7.9 6.1 4.2 13.6
summary(glass[c("RI","Na","Mg")])
## RI Na Mg
## Min. :1.511 Min. :10.73 Min. :0.000
## 1st Qu.:1.517 1st Qu.:12.91 1st Qu.:2.115
## Median :1.518 Median :13.30 Median :3.480
## Mean :1.518 Mean :13.41 Mean :2.685
## 3rd Qu.:1.519 3rd Qu.:13.82 3rd Qu.:3.600
## Max. :1.534 Max. :17.38 Max. :4.490
#Create a function to normalize the data
norm <- function(x){
return((x-min(x))/(max(x)-min(x)))
}
#test normalization
norm(c(1,2,3,4,5))
## [1] 0.00 0.25 0.50 0.75 1.00
norm(c(10,20,30,40,50))
## [1] 0.00 0.25 0.50 0.75 1.00
#Apply the normalization function to glass dataset
glass_n<- as.data.frame(lapply(glass[1:9], norm))
View(glass_n)
summary(glass_n[c("RI","Na","Mg")])
## RI Na Mg
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.2358 1st Qu.:0.3274 1st Qu.:0.4710
## Median :0.2867 Median :0.3865 Median :0.7751
## Mean :0.3167 Mean :0.4027 Mean :0.5979
## 3rd Qu.:0.3515 3rd Qu.:0.4654 3rd Qu.:0.8018
## Max. :1.0000 Max. :1.0000 Max. :1.0000
# glass_n <- cbind(glass$Type,glass_n[1:9])
View(glass_n)
#create training and test datasets
set.seed(123)
ind <- sample(2, nrow(glass_n), replace = TRUE, prob = c(0.7,0.3))
glass_train <- glass_n[ind==1,]
glass_test <- glass_n[ind==2,]
#Get labels for training and test datasets
set.seed(123)
ind1 <- sample(2, nrow(glass), replace = TRUE, prob = c(0.7,0.3))
glass_train_labels <- glass[ind1==1,10]
glass_test_labels <- glass[ind1==2,10]
# Build a KNN model on taining dataset
# Building the KNN model on training dataset and also need labels which we are including c1. Once we build the preduction model
# we have to test on test dataset
glass_test_pred <- knn(train = glass_train, test = glass_test, cl = glass_train_labels, k=3)
table(glass_test_pred,glass_test_labels)
## glass_test_labels
## glass_test_pred 1 2 3 5 6 7
## 1 18 6 1 0 0 0
## 2 4 12 1 0 0 0
## 3 0 1 0 0 0 0
## 5 0 1 0 3 1 1
## 6 0 0 0 0 2 1
## 7 0 1 0 0 0 4
mean(glass_test_pred==glass_test_labels) #68.42
## [1] 0.6842105
# Evaluating Model Performance ----
# load the gmodel library
CrossTable(x=glass_test_labels,y=glass_test_pred,prop.chisq = FALSE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 57
##
##
## | glass_test_pred
## glass_test_labels | 1 | 2 | 3 | 5 | 6 | 7 | Row Total |
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## 1 | 18 | 4 | 0 | 0 | 0 | 0 | 22 |
## | 0.818 | 0.182 | 0.000 | 0.000 | 0.000 | 0.000 | 0.386 |
## | 0.720 | 0.235 | 0.000 | 0.000 | 0.000 | 0.000 | |
## | 0.316 | 0.070 | 0.000 | 0.000 | 0.000 | 0.000 | |
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## 2 | 6 | 12 | 1 | 1 | 0 | 1 | 21 |
## | 0.286 | 0.571 | 0.048 | 0.048 | 0.000 | 0.048 | 0.368 |
## | 0.240 | 0.706 | 1.000 | 0.167 | 0.000 | 0.200 | |
## | 0.105 | 0.211 | 0.018 | 0.018 | 0.000 | 0.018 | |
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## 3 | 1 | 1 | 0 | 0 | 0 | 0 | 2 |
## | 0.500 | 0.500 | 0.000 | 0.000 | 0.000 | 0.000 | 0.035 |
## | 0.040 | 0.059 | 0.000 | 0.000 | 0.000 | 0.000 | |
## | 0.018 | 0.018 | 0.000 | 0.000 | 0.000 | 0.000 | |
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## 5 | 0 | 0 | 0 | 3 | 0 | 0 | 3 |
## | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 | 0.053 |
## | 0.000 | 0.000 | 0.000 | 0.500 | 0.000 | 0.000 | |
## | 0.000 | 0.000 | 0.000 | 0.053 | 0.000 | 0.000 | |
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## 6 | 0 | 0 | 0 | 1 | 2 | 0 | 3 |
## | 0.000 | 0.000 | 0.000 | 0.333 | 0.667 | 0.000 | 0.053 |
## | 0.000 | 0.000 | 0.000 | 0.167 | 0.667 | 0.000 | |
## | 0.000 | 0.000 | 0.000 | 0.018 | 0.035 | 0.000 | |
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## 7 | 0 | 0 | 0 | 1 | 1 | 4 | 6 |
## | 0.000 | 0.000 | 0.000 | 0.167 | 0.167 | 0.667 | 0.105 |
## | 0.000 | 0.000 | 0.000 | 0.167 | 0.333 | 0.800 | |
## | 0.000 | 0.000 | 0.000 | 0.018 | 0.018 | 0.070 | |
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## Column Total | 25 | 17 | 1 | 6 | 3 | 5 | 57 |
## | 0.439 | 0.298 | 0.018 | 0.105 | 0.053 | 0.088 | |
## ------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##
##
# Accuracy is 65.07 %.