IMPLEMENTASI DBSCAN

Memanggil package yang dibutuhkan

require(dbscan)
require(factoextra)
library(dbscan)
library(factoextra)
library(readxl)

Mengentri data

url <- "https://goo.gl/MFPUvT"
data <- read.table(url, fileEncoding="UTF-8", sep=",", header=TRUE)
names <- c('preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class')
colnames(data) <- names
data <- data[complete.cases(data),]
data2 <- data[,2:5]

Melakukan standardisasi data

data2 <-scale(data2)

Melakukan Plotting data

Jika minPts=5, maka Eps:

kNNdistplot(data2, k=5)
abline(h=1.2,col="red")

Dipilih eps=1,2

db1 <- dbscan(data2, 1.2, 5)
db1
## DBSCAN clustering for 768 objects.
## Parameters: eps = 1.2, minPts = 5
## The clustering contains 2 cluster(s) and 18 noise points.
## 
##   0   1   2 
##  18 717  33 
## 
## Available fields: cluster, eps, minPts
hullplot(data,db1$cluster)

fviz_cluster(db1, data2, ellipse = FALSE, 
             geom = "point")  

###Menyiapkan data

customers <- read_xlsx("Wholesale-customers-data.xlsx")
customers <- customers[, c("Fresh","Milk")]
customers <- scale(customers)
customers <- as.data.frame(customers)

Melakukan plotting distribusi dari jarak dengan 5 tetangga terdekat

kNNdistplot(customers, k = 5)
abline(h = 0.4, col = "red")

db_clusters_customers <- dbscan(customers, eps=0.4, minPts=5)
print(db_clusters_customers)
## DBSCAN clustering for 440 objects.
## Parameters: eps = 0.4, minPts = 5
## The clustering contains 1 cluster(s) and 22 noise points.
## 
##   0   1 
##  22 418 
## 
## Available fields: cluster, eps, minPts
fviz_cluster(db_clusters_customers, customers, ellipse = FALSE, 
             geom = "point")

iris3 <- iris[,c(1,2)] 
x <- iris3
plot(x)

Memjalankan OPTICS

Catatan: digunakan default kalkulasi eps

res <- optics(x,minPts = 10)
res
## OPTICS ordering/clustering for 150 objects.
## Parameters: minPts = 10, eps = 1.11803398874989, eps_cl = NA, xi = NA
## Available fields: order, reachdist, coredist, predecessor, minPts, eps,
##                   eps_cl, xi

Mendapatkan order

res$order
##   [1]   1  44  41  40  50  29  27  24  18   8  28   5  36  38  32  25  22  21
##  [19]  12  47  45  35  20  10  49  26   2  46  31  48  30  13   7   4   3  43
##  [37]  39   9  37  23  17  11   6  19  14  33  34  60  85 122 115 143 102 100
##  [55]  97  95  83  68  56  65  80 114  93  70  96  91  84  89  67 135  79 150
##  [73] 139 128  92  74  72  64  62 134 133 129 127 124 112 104  98  75 148 117
##  [91] 105  59  55 146 141 145 144 142 140 125 121  87  66  53 113  78  76 138
## [109] 111  77  51 103 147  73 116 137 101  90  71  57  52  82  81  54 149 130
## [127] 126 108  88  69 109  86 131 106 110  99  15 120  63 107  58 136 123  94
## [145] 119  61  42  16 118 132

Melakukan Plot produces a reachability plot

plot(res)

Melakukan plotting order of point ke dalam reachability plot

plot(x, col = "grey")

Mengekstraksi DBSCAN clustering dengan memotong reachability plot pada eps_cl=0.6

res <- extractDBSCAN(res, eps_cl = .6) 
res
## OPTICS ordering/clustering for 150 objects.
## Parameters: minPts = 10, eps = 1.11803398874989, eps_cl = 0.6, xi = NA
## The clustering contains 1 cluster(s) and 1 noise points.
## 
##   0   1 
##   1 149 
## 
## Available fields: order, reachdist, coredist, predecessor, minPts, eps,
##                   eps_cl, xi, cluster