library(class)
library(caTools)
# import data
df_iris = iris
set.seed(123)
head(df_iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
levels(iris$Species)
## [1] "setosa" "versicolor" "virginica"
df_iris$status = sample.split(df_iris,SplitRatio = 0.80)
df_train = subset(df_iris,status==TRUE) # label TRUE = data train
df_test = subset(df_iris,status==FALSE) # label FALSE = data test
kolom_X = c('Sepal.Length','Sepal.Width','Petal.Length','Petal.Width')
kolom_y = c('Species')
X_train = df_train[kolom_X]
y_train = df_train[kolom_y]
X_test = df_test[kolom_X]
y_test = df_test[kolom_y]
Akan digunakan 3 tetangga terdekat dalam pemodelan ini.
knn_result <- knn(train = X_train,test = X_test,cl=y_train$Species,k = 3,prob = TRUE)
Hasil akurasi menggunakan k-nn sebesar 96% yang menandakan bahwa model memiliki akurasi yang tinggi.
real_prediksi = head(cbind(y_test,knn_result)) # real vs prediksi
conf_matrix <- table(y_test$Species,knn_result) # Confusion Matrix
accuracy <- sum(diag(conf_matrix))/sum(conf_matrix) # Akurasi Model
accuracy
## [1] 0.9666667