library(pixmap) # performing image processing
library(dendextend) # performing clustering techniques
library(clValid) # Calculating Dunn indexBelow I will provide some image processing techniques.
Imagine we have a set of images of different people and we have a blurred version that we can not recognize by simple eye ball testing. Below is a trivial case of this problem with a function that recognizes who is depicted on blurred picture.
Below are a set of pictures with clear face of a person and a blurred one that we want to recognize.
par(mfrow=c(2,2),
bg = 'gainsboro')
directory = paste(getwd(),"/","images 1", sep = "")
images = list.files(directory) # list of images
for (i in images) {
plot(read.pnm(paste(directory,"/",i, sep = "")),
sub = i) # ploting images
}The goal is to determine who is misterX.(black image)
images = images[images != "MrX.pnm"] # Exclude MisterX from the list of images
sim = function(names, directory, compare = "MrX.pnm", br = 0.01) {
x = read.pnm(paste(directory, "/", compare, sep = "")) # reading misterX image
dist = list() # creating a list of distances
for (n in names) {
y = read.pnm(paste(directory,"/", n, sep = "")) # reading images
y@grey = y@grey/norm(y@grey, type = "F") # normalizing images
dist[n] = norm(x@grey - y@grey, type = "F") # populating a list of distances
}
par(mfrow=c(1,2), bg = 'gainsboro')
x@grey = x@grey + br # giving a little brightness Mrx
plot(read.pnm(paste(directory, "/", rownames(as.matrix(which.min(dist))), # Who is MisterX?
sep = "")))
plot(x)
dist
}
sim(names = images,
directory = paste(getwd(),"/","images 1", sep = ""),
br = 0.3)## $bob.pgm
## [1] 2.218785
##
## $chris.pgm
## [1] 2.244208
##
## $don.pgm
## [1] 2.127421
##
## $fred.pgm
## [1] 2.204757
##
## $greg.pgm
## [1] 2.2059
##
## $jim.pgm
## [1] 2.236136
##
## $ted.pgm
## [1] 2.209724
Above is the a list of euclidean distances between MrX and each image from comparing set of images. The blurred image is a picture of Don.
Imagine that you have a lot of pictures of different people faces and you want to group pictures of same people. This can be solved using clustering technique below.
directory = paste(getwd(),"/","images 2", sep = "")
names = list.files(directory)
par(mfrow=c(2,4), bg = 'gainsboro')
images = list()
for (i in names) {
plot(read.pnm(paste(directory,"/", i, sep = "")), sub = i) # ploting images
images[i] = read.pnm(paste(directory,"/",i, sep = "")) # storing in the list
}So, as you can see we have a set of pictures, that we want to group based on the person depicted on the picture.
# Creating a list with normalized matrices of images
norm_m = list()
for (i in 1:length(images)) {
norm = as.matrix(images[[i]]@grey/norm(images[[i]]@grey,
type = "F"))
name = names(images[i])
norm_m[[name]] = norm
}
# Creating a distance matrix
dist = matrix(0,nrow = length(norm_m), ncol = length(norm_m), byrow = TRUE)
for (i in 1:length(norm_m)) {
for (j in 1:length(norm_m)) {
dist[i,j] = norm(norm_m[[i]] - norm_m[[j]],
type = "F")
}
}
# Naming the rows and columns
rownames(dist) = substr(names, 1,2)
colnames(dist) = substr(names, 1,2)
# Converting a distance matrix to distance format
dist_cl = as.dist(dist)# Method "Single"
hcSingle = hclust(dist_cl, method = "single")
hcSingled = as.dendrogram(hcSingle)
# Plot hcd
plot(hcSingled, main = "Method Single")
# Add cluster rectangles
rect.dendrogram(hcSingled, k = 5, border = "green")clsingle = as.data.frame(cutree(hcSingled, 5))
# Ploting pictures from the same cluster
col = c('gainsboro', "floralwhite", "deepskyblue1", "darkolivegreen1", "darkslategray2")
for (i in 1:5) {
cl = rownames(clsingle)[clsingle == i]
par(mfrow = c(1, length(cl)), bg = col[i])
for (i in cl) {
plot(read.pnm(paste(directory,"/", i,".pgm", sep = "")), sub = i)
}
}# Calculating Dunn Index
memb_single = cutree(hcSingle, 5)
dunn_single = dunn(clusters = memb_single, Data = dist_cl)# Method "Ward"
hcWard = hclust(dist_cl,method="ward.D")
hcWardd = as.dendrogram(hcWard)
# Plot hcd
plot(hcWardd, main = "Method Ward")
clward = as.data.frame(cutree(hcWardd, 5))
# Add cluster rectangles
rect.dendrogram(hcWardd , k = 5, border = "red")# Ploting pictures from the same cluster
for (i in 1:5) {
cl = rownames(clward)[clward == i]
par(mfrow = c(1, length(cl)),bg = col[i])
for (i in cl) {
plot(read.pnm(paste(directory,"/", i,".pgm", sep = "")), sub = i)
}
}# Calculating Dunn Index
memb_ward = cutree(hcWard, 5)
dunn_ward = dunn(clusters = memb_ward, Data = dist_cl)# Method "Average"
hcAverage = hclust(dist_cl,method="average")
hcAveraged = as.dendrogram(hcAverage)
# Plot hcd
plot(hcAveraged, main = "Method Average")
claverage = as.data.frame(cutree(hcAveraged, 5))
# Add cluster rectangles
rect.dendrogram(hcAveraged, k = 5, border = "orange")for (i in 1:5) {
cl = rownames(claverage )[claverage == i]
par(mfrow = c(1, length(cl)),bg = col[i])
for (i in cl) {
plot(read.pnm(paste(directory,"/", i,".pgm", sep = "")), sub = i)
}
}# Calculating Dunn Index
memb_average = cutree(hcAverage, 5)
dunn_average = dunn(clusters = memb_average, Data = dist_cl)sum = rbind(Single = dunn_single, Average = dunn_average, Ward = dunn_ward)
colnames(sum) = "Dunn Index"
sum ## Dunn Index
## Single 0.8843090
## Average 0.8843090
## Ward 0.7727617
Based on above analysis, using both eye ball test as well as Dunn index (returns the ratio between the minimum intercluster distance to the maximum intracluster diameter). Ward.d gives us the best split.