Import Data

library(class)
library(caTools)

# import data
df_iris = iris
set.seed(123)

View Data

head(df_iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
levels(iris$Species)
## [1] "setosa"     "versicolor" "virginica"

Split dataset into data train and data test

df_iris$status = sample.split(df_iris,SplitRatio = 0.80)

df_train = subset(df_iris,status==TRUE) # label TRUE = data train
df_test = subset(df_iris,status==FALSE) # label FALSE = data test

kolom_X = c('Sepal.Length','Sepal.Width','Petal.Length','Petal.Width')
kolom_y = c('Species')

X_train = df_train[kolom_X]
y_train = df_train[kolom_y]
X_test = df_test[kolom_X]
y_test = df_test[kolom_y]

Modelling using k-NN

Akan digunakan 3 tetangga terdekat dalam pemodelan ini.

knn_result <- knn(train = X_train,test = X_test,cl=y_train$Species,k = 3,prob = TRUE)

Evaluasi Model

Hasil akurasi menggunakan k-nn sebesar 96% yang menandakan bahwa model memiliki akurasi yang tinggi.

real_prediksi = head(cbind(y_test,knn_result)) # real vs prediksi

conf_matrix <- table(y_test$Species,knn_result) # Confusion Matrix
accuracy <- sum(diag(conf_matrix))/sum(conf_matrix) # Akurasi Model

accuracy
## [1] 0.9666667