data(iris)
set.seed(9850)
#Create the random rows
gp<- runif(nrow(iris)) #runif will create the uniform distributions,they are between 0 and 1
iris1<- iris[order(gp),]
#Normalize each coloum of variables, we could use 0~1 function or z-scores,we use 0~1 here
normalize<- function(x){
return((x-min(x))/(max(x)-min(x)))
}
normalize(c(1,2,3,4,5))
## [1] 0.00 0.25 0.50 0.75 1.00
iris_n<- as.data.frame(lapply(iris1[,c(1,2,3,4)],normalize))
#Create the training dataset and testing dataset
iris_train<- iris_n[1:129,]
iris_test<- iris_n[130:150,]
iris_train_target<- iris1[1:129,5]
iris_test_target<- iris1[130:150,5]
require(class)
## Loading required package: class
k=sqrt(nrow(iris1))
m1<- knn(train=iris_train,test=iris_test,cl=iris_train_target,k=k)
m1
## [1] versicolor setosa virginica virginica virginica setosa
## [7] virginica versicolor virginica setosa setosa virginica
## [13] setosa virginica virginica virginica setosa virginica
## [19] virginica versicolor setosa
## Levels: setosa versicolor virginica
table(iris_test_target,m1) #Only 1 wrong, it is a pretty good result
## m1
## iris_test_target setosa versicolor virginica
## setosa 7 0 0
## versicolor 0 3 2
## virginica 0 0 9