## Example 1
# ---
# Question: Perform DBSCAN Clustering on the given IRIS Dataset.
# Reference: https://rpubs.com/kalipradeep/dbscan
# ---
# OUR CODE GOES BELOW
#
# Importing the required package
# ---
#
#install.packages("dbscan")
# Loading the required library
# ---
#
library("dbscan")
## Warning: package 'dbscan' was built under R version 4.1.3
# Lets load our Iris dataset
# ---
#
m<-read.csv("http://bit.ly/IrisDataset")
head(m)
## sepal_length sepal_width petal_length petal_width species
## 1 5.1 3.5 1.4 0.2 Iris-setosa
## 2 4.9 3.0 1.4 0.2 Iris-setosa
## 3 4.7 3.2 1.3 0.2 Iris-setosa
## 4 4.6 3.1 1.5 0.2 Iris-setosa
## 5 5.0 3.6 1.4 0.2 Iris-setosa
## 6 5.4 3.9 1.7 0.4 Iris-setosa
# Removing the class label
# ---
#
m1<-m[,c(1,2,3,4)]
head(m1)
## sepal_length sepal_width petal_length petal_width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## 4 4.6 3.1 1.5 0.2
## 5 5.0 3.6 1.4 0.2
## 6 5.4 3.9 1.7 0.4
# Applying our DBSCAN algorithm
# ---
# We want minimum 4 points within a distance of eps(0.4)
#
db<-dbscan(m1,eps=0.4,MinPts = 4)
## Warning in dbscan(m1, eps = 0.4, MinPts = 4): converting argument MinPts (fpc)
## to minPts (dbscan)!
# Printing out the clustering results
# ---
#
print(db)
## DBSCAN clustering for 150 objects.
## Parameters: eps = 0.4, minPts = 4
## The clustering contains 4 cluster(s) and 25 noise points.
##
## 0 1 2 3 4
## 25 47 38 36 4
##
## Available fields: cluster, eps, minPts
# We also plot our clusters as shown
# ---
# The dataset and cluster method of dbscan is used to plot the clusters.
#
hullplot(m1,db$cluster)
##checking my own practice
## Example 1: use dbscan on the iris data set
data(iris)
iris <- as.matrix(iris[,1:4])
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## [1,] 5.1 3.5 1.4 0.2
## [2,] 4.9 3.0 1.4 0.2
## [3,] 4.7 3.2 1.3 0.2
## [4,] 4.6 3.1 1.5 0.2
## [5,] 5.0 3.6 1.4 0.2
## [6,] 5.4 3.9 1.7 0.4
## find suitable eps parameter using a k-NN plot for k = dim + 1
## Look for the knee!
kNNdistplot(iris, k = 5)
abline(h=.5, col = "red", lty=2)
## using the best esp of 0.5 in our modelx`
res <- dbscan(iris, eps = .5, minPts = 5)
res
## DBSCAN clustering for 150 objects.
## Parameters: eps = 0.5, minPts = 5
## The clustering contains 2 cluster(s) and 17 noise points.
##
## 0 1 2
## 17 49 84
##
## Available fields: cluster, eps, minPts
pairs(iris, col = res$cluster + 1L)
## use precomputed frNN
fr <- frNN(iris, eps = .5)
dbscan(fr, minPts = 5)
## DBSCAN clustering for 150 objects.
## Parameters: eps = 0.5, minPts = 5
## The clustering contains 2 cluster(s) and 17 noise points.
##
## 0 1 2
## 17 49 84
##
## Available fields: cluster, eps, minPts
## Example 2: use data from fpc
set.seed(665544)
n <- 100
x <- cbind(
x = runif(10, 0, 10) + rnorm(n, sd = 0.2),
y = runif(10, 0, 10) + rnorm(n, sd = 0.2)
)
res <- dbscan(x, eps = .3, minPts = 3)
res
## DBSCAN clustering for 100 objects.
## Parameters: eps = 0.3, minPts = 3
## The clustering contains 9 cluster(s) and 4 noise points.
##
## 0 1 2 3 4 5 6 7 8 9
## 4 9 10 11 10 10 8 9 20 9
##
## Available fields: cluster, eps, minPts
## plot clusters and add noise (cluster 0) as crosses.
plot(x, col=res$cluster)
points(x[res$cluster==0,], pch = 3, col = "grey")
hullplot(x, res)
## Warning in hullplot(x, res): Not enough colors. Some colors will be reused.
## predict cluster membership for new data points
## (Note: 0 means it is predicted as noise)
newdata <- x[1:5,] + rnorm(10, 0, .2)
predict(res, newdata, data = x)
## [1] 1 2 3 0 9
## Challenge 1
# ---
# Question: For the given dataset, perform DBSCAN clustering.
# ---
# Hint: Remove the label class
# ---
# Dataset url = http://bit.ly/MSDBSCANClusteringDataset
# ---
# OUR CODE GOES BELOW
#
## Challenge 2
# ---
# Question: Perform DBSCAN clustering on the following toy dataset.
# ---
# Dataset url = http://bit.ly/MSDBSCANClusteringDataset2
# ---
# OUR CODE GOES BELOW
#
## Challenge 3
# ---
# Question: Apply and Visualize DBCAN clustering on the following dataset.
# ---
# Dataset url = http://bit.ly/MSDBSCANClusteringDataset3
# ---
# OUR CODE GOES BELOW
#