DBSCAN Clustering

Examples

## Example 1
# ---
# Question: Perform DBSCAN Clustering on the given IRIS Dataset.
# Reference: https://rpubs.com/kalipradeep/dbscan
# ---
# OUR CODE GOES BELOW
# 
# Importing the required package
# ---
# 
#install.packages("dbscan")
# Loading the required library
# ---
# 
library("dbscan")
## Warning: package 'dbscan' was built under R version 4.1.3
# Lets load our Iris dataset
# ---
# 
m<-read.csv("http://bit.ly/IrisDataset")
head(m)
##   sepal_length sepal_width petal_length petal_width     species
## 1          5.1         3.5          1.4         0.2 Iris-setosa
## 2          4.9         3.0          1.4         0.2 Iris-setosa
## 3          4.7         3.2          1.3         0.2 Iris-setosa
## 4          4.6         3.1          1.5         0.2 Iris-setosa
## 5          5.0         3.6          1.4         0.2 Iris-setosa
## 6          5.4         3.9          1.7         0.4 Iris-setosa
# Removing the class label 
# ---
#
m1<-m[,c(1,2,3,4)]
head(m1)
##   sepal_length sepal_width petal_length petal_width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4
# Applying our DBSCAN algorithm
# ---
# We want minimum 4 points within a distance of eps(0.4)
# 
db<-dbscan(m1,eps=0.4,MinPts = 4)
## Warning in dbscan(m1, eps = 0.4, MinPts = 4): converting argument MinPts (fpc)
## to minPts (dbscan)!
# Printing out the clustering results
# ---
# 
print(db)
## DBSCAN clustering for 150 objects.
## Parameters: eps = 0.4, minPts = 4
## The clustering contains 4 cluster(s) and 25 noise points.
## 
##  0  1  2  3  4 
## 25 47 38 36  4 
## 
## Available fields: cluster, eps, minPts
# We also plot our clusters as shown
# ---
# The dataset and cluster method of dbscan is used to plot the clusters.
# 
hullplot(m1,db$cluster)

##checking my own practice

## Example 1: use dbscan on the iris data set
data(iris)
iris <- as.matrix(iris[,1:4])
head(iris)
##      Sepal.Length Sepal.Width Petal.Length Petal.Width
## [1,]          5.1         3.5          1.4         0.2
## [2,]          4.9         3.0          1.4         0.2
## [3,]          4.7         3.2          1.3         0.2
## [4,]          4.6         3.1          1.5         0.2
## [5,]          5.0         3.6          1.4         0.2
## [6,]          5.4         3.9          1.7         0.4
## find suitable eps parameter using a k-NN plot for k = dim + 1
## Look for the knee!
kNNdistplot(iris, k = 5)
abline(h=.5, col = "red", lty=2)

## using the best esp of 0.5 in our modelx`
res <- dbscan(iris, eps = .5, minPts = 5)
res
## DBSCAN clustering for 150 objects.
## Parameters: eps = 0.5, minPts = 5
## The clustering contains 2 cluster(s) and 17 noise points.
## 
##  0  1  2 
## 17 49 84 
## 
## Available fields: cluster, eps, minPts
pairs(iris, col = res$cluster + 1L)

## use precomputed frNN
fr <- frNN(iris, eps = .5)
dbscan(fr, minPts = 5)
## DBSCAN clustering for 150 objects.
## Parameters: eps = 0.5, minPts = 5
## The clustering contains 2 cluster(s) and 17 noise points.
## 
##  0  1  2 
## 17 49 84 
## 
## Available fields: cluster, eps, minPts
## Example 2: use data from fpc
set.seed(665544)
n <- 100
x <- cbind(
  x = runif(10, 0, 10) + rnorm(n, sd = 0.2),
  y = runif(10, 0, 10) + rnorm(n, sd = 0.2)
  )

res <- dbscan(x, eps = .3, minPts = 3)
res
## DBSCAN clustering for 100 objects.
## Parameters: eps = 0.3, minPts = 3
## The clustering contains 9 cluster(s) and 4 noise points.
## 
##  0  1  2  3  4  5  6  7  8  9 
##  4  9 10 11 10 10  8  9 20  9 
## 
## Available fields: cluster, eps, minPts
## plot clusters and add noise (cluster 0) as crosses.
plot(x, col=res$cluster)
points(x[res$cluster==0,], pch = 3, col = "grey")

hullplot(x, res)
## Warning in hullplot(x, res): Not enough colors. Some colors will be reused.

## predict cluster membership for new data points
## (Note: 0 means it is predicted as noise)
newdata <- x[1:5,] + rnorm(10, 0, .2)
predict(res, newdata, data = x)
## [1] 1 2 3 0 9

Challenges

## Challenge 1
# ---
# Question: For the given dataset, perform DBSCAN clustering.
# ---
# Hint: Remove the label class
# ---
# Dataset url = http://bit.ly/MSDBSCANClusteringDataset
# ---
# OUR CODE GOES BELOW
# 
## Challenge 2
# ---
# Question: Perform DBSCAN clustering on the following toy dataset.
# ---
# Dataset url = http://bit.ly/MSDBSCANClusteringDataset2
# ---
# OUR CODE GOES BELOW
#
## Challenge 3
# ---
# Question: Apply and Visualize DBCAN clustering on the following dataset.
# ---
# Dataset url = http://bit.ly/MSDBSCANClusteringDataset3
# ---
# OUR CODE GOES BELOW
#