Contoh 8.2 Implementasi KNN
library("kknn")
library(dplyr)
Dataset
head(iris3)
## , , Setosa
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 5.1 3.5 1.4 0.2
## [2,] 4.9 3.0 1.4 0.2
## [3,] 4.7 3.2 1.3 0.2
## [4,] 4.6 3.1 1.5 0.2
## [5,] 5.0 3.6 1.4 0.2
## [6,] 5.4 3.9 1.7 0.4
##
## , , Versicolor
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 7.0 3.2 4.7 1.4
## [2,] 6.4 3.2 4.5 1.5
## [3,] 6.9 3.1 4.9 1.5
## [4,] 5.5 2.3 4.0 1.3
## [5,] 6.5 2.8 4.6 1.5
## [6,] 5.7 2.8 4.5 1.3
##
## , , Virginica
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 6.3 3.3 6.0 2.5
## [2,] 5.8 2.7 5.1 1.9
## [3,] 7.1 3.0 5.9 2.1
## [4,] 6.3 2.9 5.6 1.8
## [5,] 6.5 3.0 5.8 2.2
## [6,] 7.6 3.0 6.6 2.1
Splitting
dt.train <- rbind(iris3[1:25, , 1], iris3[1:25, , 2], iris3[1:25, , 3])
dt.test <- rbind(iris3[26:50, , 1], iris3[26:50, , 2], iris3[26:50, , 3])
class <- factor(c(rep("s", 25), rep("c", 25), rep("v", 25)))
head(data.frame(dt.train, class))
## Sepal.L. Sepal.W. Petal.L. Petal.W. class
## 1 5.1 3.5 1.4 0.2 s
## 2 4.9 3.0 1.4 0.2 s
## 3 4.7 3.2 1.3 0.2 s
## 4 4.6 3.1 1.5 0.2 s
## 5 5.0 3.6 1.4 0.2 s
## 6 5.4 3.9 1.7 0.4 s
LOOCV
library(class)
KnnRes
KnnRes <- knn.cv(dt.train, class, k=3)
table(KnnRes, class)
## class
## KnnRes c s v
## c 24 0 3
## s 0 25 0
## v 1 0 22
KnnRes2
KnnRes2 <- knn(train = dt.train, test = dt.test, cl = class, k=3)
KnnRes2
## [1] s s s s s s s s s s s s s s s s s s s s s s s s s c c v c c c c c v c c c c
## [39] c c c c c c c c c c c c v c c v v v v v c v v v v c v v v v v v v v v v v
## Levels: c s v
Confusion Matrix
tab <- table(KnnRes2, class)
tab
## class
## KnnRes2 c s v
## c 23 0 4
## s 0 25 0
## v 2 0 21
n = sum(tab)
correct = sum(diag(tab))
MisErr <- (n-correct)/n
MisErr
## [1] 0.08
Pada saat K = 3, nilai error yang diperoleh adalah 0.08
KnnRes3 | K = 2
KnnRes3 <- knn(train = dt.train, test = dt.test, cl = class, k = 2)
tab <- table(KnnRes3, class)
n = sum(tab)
correct = sum(diag(tab))
MisErr <- (n-correct)/n
MisErr
## [1] 0.05333333
Pada saat K = 2, nilai error yang diperoleh adalah 0.0533
Find Optimal K
library(e1071)
Boxplot
colnames(kfoldcv) <- 1:10
boxplot(kfoldcv, ylab = "misclassification", xlab = "k", main = "10-folds CV, 100 times")

Dari boxplot di atas didapatkan nilai K terbaik adalah pada K = 5
KnnRes3 <- knn(dt.test, dt.train, class, k = 5, prob = TRUE)
tab <- table(KnnRes3, class)
tab
## class
## KnnRes3 c s v
## c 23 0 1
## s 0 25 0
## v 2 0 24
n=sum(tab)
correct = sum(diag(tab))
MisErr <- (n - correct)/n
MisErr
## [1] 0.04
Pada saat K = 5, nilai error yang diperoleh adalah 0.04
Contoh 8.2 Implementasi fungsi KNN
library(kknn)
Dataset
Data <- iris
Sample <- sample(1:150, 50)
testing <- Data[Sample,]
training <- Data[-Sample,]
dim(Data)
## [1] 150 5
dim(testing)
## [1] 50 5
dim(training)
## [1] 100 5
Modelling
model <- train.kknn(Species ~ ., data = training, kmax = 9)
model
##
## Call:
## train.kknn(formula = Species ~ ., data = training, kmax = 9)
##
## Type of response variable: nominal
## Minimal misclassification: 0.05
## Best kernel: optimal
## Best k: 4
model$MISCLASS
## optimal
## 1 0.06
## 2 0.06
## 3 0.06
## 4 0.05
## 5 0.05
## 6 0.05
## 7 0.05
## 8 0.05
## 9 0.05
Prediksi
prediction <- predict(model, testing[,-5])
CM <- table(testing$Species, prediction)
CM
## prediction
## setosa versicolor virginica
## setosa 16 0 0
## versicolor 0 17 2
## virginica 0 3 12
accuracy <- (sum(diag(CM)))/sum(CM)
accuracy
## [1] 0.9
Hasil Klasifikasi KNN didapatkan niali Accuracy 90% , Artinya model KNN sudah baik dalam mengklasifikasikan data Iris
Contoh 8.6 Random Forest
library(party)
library(randomForest)
library(readxl)
library(caret)
Import Data
german <- read_xlsx('german_credit.xlsx', sheet = 'Fix')
head(german)
## # A tibble: 6 x 10
## Duration Credit_Amount Installment Residence Age Number_Existing~
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 6 1169 4 4 67 2
## 2 48 5951 2 2 22 1
## 3 12 2096 2 3 49 1
## 4 42 7882 2 4 45 1
## 5 24 4870 3 4 53 2
## 6 36 9055 2 4 35 1
## # ... with 4 more variables: Maintenance <dbl>, Telephone <dbl>,
## # Foreign_Worker <dbl>, Class <chr>
Preprocessing
german$Telephone <- as.factor(german$Telephone)
german$Foreign_Worker <- as.factor(german$Foreign_Worker)
german$Class <- as.factor(german$Class)
Splitting Data
inTrain <- createDataPartition(y = german$Class, p = 0.75, list = FALSE)
train <- german[inTrain,]
test <- german[-inTrain,]
Modelling
output.forest <- randomForest(Class ~. , data = train)
Plotting
varImpPlot(output.forest)

Prediction
Prediksi <- predict(output.forest, test)
## Confusion Matrix
CM <- table(test$Class, Prediksi)
CM
## Prediksi
## Bad Good
## Bad 20 55
## Good 15 160
## Accuracy
accuracy = (sum(diag(CM)))/sum(CM)
accuracy
## [1] 0.72
Hasil Klasifikasi KNN didapatkan niali Accuracy 72%, artinya model Random Forest cukup untuk mengklasifikasikan Telephone di German