in image segmentaion, we divide image into cluster according to the some feature of image like color, pixel intensity Here we Restrict ourselves to gray-scale images
flower = read.csv("flower.csv", header=FALSE)
str(flower)
## 'data.frame': 50 obs. of 50 variables:
## $ V1 : num 0.0991 0.0991 0.1034 0.1034 0.1034 ...
## $ V2 : num 0.112 0.108 0.112 0.116 0.108 ...
## $ V3 : num 0.134 0.116 0.121 0.116 0.112 ...
## $ V4 : num 0.138 0.138 0.121 0.121 0.112 ...
## $ V5 : num 0.138 0.134 0.125 0.116 0.112 ...
## $ V6 : num 0.138 0.129 0.121 0.108 0.112 ...
## $ V7 : num 0.129 0.116 0.103 0.108 0.112 ...
## $ V8 : num 0.116 0.103 0.103 0.103 0.116 ...
## $ V9 : num 0.1121 0.0991 0.1078 0.1121 0.1164 ...
## $ V10: num 0.121 0.108 0.112 0.116 0.125 ...
## $ V11: num 0.134 0.125 0.129 0.134 0.129 ...
## $ V12: num 0.147 0.134 0.138 0.129 0.138 ...
## $ V13: num 0.000862 0.146552 0.142241 0.142241 0.133621 ...
## $ V14: num 0.000862 0.000862 0.142241 0.133621 0.12931 ...
## $ V15: num 0.142 0.142 0.134 0.121 0.116 ...
## $ V16: num 0.125 0.125 0.116 0.108 0.108 ...
## $ V17: num 0.1121 0.1164 0.1078 0.0991 0.0991 ...
## $ V18: num 0.108 0.112 0.108 0.108 0.108 ...
## $ V19: num 0.121 0.129 0.125 0.116 0.116 ...
## $ V20: num 0.138 0.129 0.125 0.116 0.116 ...
## $ V21: num 0.138 0.134 0.121 0.125 0.125 ...
## $ V22: num 0.134 0.129 0.125 0.121 0.103 ...
## $ V23: num 0.125 0.1207 0.1164 0.1164 0.0819 ...
## $ V24: num 0.1034 0.1034 0.0991 0.0991 0.1034 ...
## $ V25: num 0.0948 0.0905 0.0905 0.1034 0.125 ...
## $ V26: num 0.0862 0.0862 0.0991 0.125 0.1422 ...
## $ V27: num 0.086207 0.086207 0.103448 0.12931 0.000862 ...
## $ V28: num 0.0991 0.1078 0.1164 0.1293 0.1466 ...
## $ V29: num 0.116 0.134 0.134 0.121 0.142 ...
## $ V30: num 0.121 0.138 0.142 0.129 0.138 ...
## $ V31: num 0.121 0.134 0.142 0.134 0.129 ...
## $ V32: num 0.116 0.134 0.129 0.116 0.112 ...
## $ V33: num 0.108 0.112 0.116 0.108 0.108 ...
## $ V34: num 0.1078 0.1078 0.1034 0.0991 0.1034 ...
## $ V35: num 0.1078 0.1034 0.0991 0.0991 0.0991 ...
## $ V36: num 0.1078 0.1034 0.1034 0.0905 0.0862 ...
## $ V37: num 0.1078 0.1078 0.1034 0.0819 0.0733 ...
## $ V38: num 0.0948 0.0991 0.0776 0.069 0.0733 ...
## $ V39: num 0.0733 0.056 0.0474 0.0474 0.056 ...
## $ V40: num 0.0474 0.0388 0.0431 0.0474 0.0603 ...
## $ V41: num 0.0345 0.0345 0.0388 0.0474 0.0647 ...
## $ V42: num 0.0259 0.0259 0.0345 0.0431 0.056 ...
## $ V43: num 0.0259 0.0259 0.0388 0.0517 0.0603 ...
## $ V44: num 0.0302 0.0302 0.0345 0.0517 0.0603 ...
## $ V45: num 0.0259 0.0259 0.0259 0.0388 0.0474 ...
## $ V46: num 0.0259 0.0172 0.0172 0.0259 0.0345 ...
## $ V47: num 0.01724 0.01724 0.00862 0.02155 0.02586 ...
## $ V48: num 0.0216 0.0129 0.0129 0.0172 0.0302 ...
## $ V49: num 0.0216 0.0216 0.0216 0.0345 0.0603 ...
## $ V50: num 0.0302 0.0345 0.0388 0.0603 0.0776 ...
gray-scale image is represented as a matrix of pixel intensity values ranging from 0 (black) to 1 (white) so let's convert our dataframe into matrix
flowerMatrix = as.matrix(flower)
str(flowerMatrix)
## num [1:50, 1:50] 0.0991 0.0991 0.1034 0.1034 0.1034 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:50] "V1" "V2" "V3" "V4" ...
flowerVector = as.vector(flowerMatrix)
str(flowerVector)
## num [1:2500] 0.0991 0.0991 0.1034 0.1034 0.1034 ...
TO see any type of image we need to conver matrix of pixel of image into a vector, here we are use matrix.
flowerVector2 = as.vector(flowerMatrix)
str(flowerVector2)
## num [1:2500] 0.0991 0.0991 0.1034 0.1034 0.1034 ...
Let's find the pairwase distance between all the indesity of the vector
distance = dist(flowerVector2, method = "euclidean")
Now we can cluster the intensity for that we use hclust function,
clusterIntensity = hclust(distance, method="ward.D")
here method ward find minimum variance method which find compact cluster
plot(clusterIntensity)
we can visualize the cluster by using the rectangle to cluster
rect.hclust(clusterIntensity, k = 3, border = "red")
## Error in rect(m[which[n]] + 0.66, par("usr")[3L], m[which[n] + 1] + 0.33, : plot.new has not been called yet
flowerClusters = cutree(clusterIntensity, k = 3)
# flowerClusters
let's split the data into these three cluster this is done by using cutree.
rect.hclust(clusterIntensity, k = 3, border = "red")
## Error in rect(m[which[n]] + 0.66, par("usr")[3L], m[which[n] + 1] + 0.33, : plot.new has not been called yet
flowerClusters = cutree(clusterIntensity, k = 3)
# flowerClusters
tapply(flowerVector, flowerClusters, mean)
## 1 2 3
## 0.08574315 0.50826255 0.93147713
Let view image flowerclusters to view we need to conver this into matrix
dim(flowerClusters) = c(50,50)
image(flowerClusters, axes = FALSE)
image(flowerMatrix,axes=FALSE,col=grey(seq(0,1,length=256)))
healthy = read.csv("healthy.csv", header=FALSE)
healthyMatrix = as.matrix(healthy)
str(healthyMatrix)
## num [1:566, 1:646] 0.00427 0.00855 0.01282 0.01282 0.01282 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:646] "V1" "V2" "V3" "V4" ...
image(healthyMatrix,axes=FALSE,col=grey(seq(0,1,length=256)))
healthyVector = as.vector(healthyMatrix)
distance = dist(healthyVector, method = "euclidean")
## Error: cannot allocate vector of size 498.0 Gb
str(healthyVector)
## num [1:365636] 0.00427 0.00855 0.01282 0.01282 0.01282 ...
So let's usr k-mean clustering algorithm
k = 5
# Run k-means
set.seed(1)
KMC = kmeans(healthyVector, centers = k, iter.max = 1000)
str(KMC)
## List of 9
## $ cluster : int [1:365636] 3 3 3 3 3 3 3 3 3 3 ...
## $ centers : num [1:5, 1] 0.4818 0.1062 0.0196 0.3094 0.1842
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:5] "1" "2" "3" "4" ...
## .. ..$ : NULL
## $ totss : num 5775
## $ withinss : num [1:5] 96.6 47.2 39.2 57.5 62.3
## $ tot.withinss: num 303
## $ betweenss : num 5472
## $ size : int [1:5] 20556 101085 133162 31555 79278
## $ iter : int 2
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
healthyClusters = KMC$cluster
KMC$centers[2]
## [1] 0.1061945
dim(healthyClusters) = c(nrow(healthyMatrix), ncol(healthyMatrix))
image(healthyClusters, axes = FALSE, col=rainbow(k))
tumor = read.csv("tumor.csv", header=FALSE)
tumorMatrix = as.matrix(tumor)
tumorVector = as.vector(tumorMatrix)
#install.packages("flexclust")
library(flexclust)
KMC.kcca = as.kcca(KMC, healthyVector)
tumorClusters = predict(KMC.kcca, newdata = tumorVector)
dim(tumorClusters) = c(nrow(tumorMatrix), ncol(tumorMatrix))
image(tumorClusters, axes = FALSE, col=rainbow(k))