Image to Analyze in This Script

1. Loading raw data and getting it ready

library(readr)
larva2 <- read_csv("~/GENER/SOMs/1. Raw Data/PSOMML_2Larvas.csv", 
    col_names = FALSE)
colnames(larva2) <- c("FILA", "COLUMNA", "RED", "GREEN", "BLUE")

larva2Esp <- larva2
larva2 <- larva2[,-1:-2]

The dataframe obtained is made of integer values:

str(larva2)

This will be a problem down the line when feeding this data to the algorithm, values have to be changed to numeric:

larva2[,'RED'] <- as.numeric(as.factor(larva2$RED))
larva2[,'GREEN'] <- as.numeric(as.factor(larva2$GREEN))
larva2[,'BLUE'] <- as.numeric(as.factor(larva2$BLUE))
###larva2Esp
larva2Esp[,'FILA'] <- as.numeric(as.factor(larva2Esp$FILA))
larva2Esp[,'COLUMNA'] <- as.numeric(as.factor(larva2Esp$COLUMNA))
larva2Esp[,'RED'] <- as.numeric(as.factor(larva2Esp$RED))
larva2Esp[,'GREEN'] <- as.numeric(as.factor(larva2Esp$GREEN))
larva2Esp[,'BLUE'] <- as.numeric(as.factor(larva2Esp$BLUE))

Whit this taken care of we have a database ready to be fed to the algorithmL

str(larva2)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   262144 obs. of  3 variables:
 $ RED  : num  166 166 165 164 164 164 164 164 164 164 ...
 $ GREEN: num  166 166 167 167 167 167 167 167 167 167 ...
 $ BLUE : num  170 170 170 170 170 170 170 170 170 170 ...

2 Initial exploration of data

A brief summary of the values of the variable can help with the initial understanding of things:

summary(larva2)
      RED            GREEN            BLUE      
 Min.   :  1.0   Min.   :  1.0   Min.   :  1.0  
 1st Qu.:164.0   1st Qu.:166.0   1st Qu.:169.0  
 Median :166.0   Median :167.0   Median :170.0  
 Mean   :163.3   Mean   :164.6   Mean   :165.5  
 3rd Qu.:167.0   3rd Qu.:168.0   3rd Qu.:171.0  
 Max.   :171.0   Max.   :172.0   Max.   :175.0  

3 Self Organizing Map

Loading required package

require(kohonen)

Changing dataframe to a matrix

#I removed the function scale() because it does not need to be scaled.
larva2.matrix <- as.matrix(larva2)
larva2Esp.matrix <- as.matrix(larva2Esp)
#TEST
#larva2.matrixTest <- larva2.matrix[1:20000,]

Creating the SOM grid

som_grid <- somgrid(xdim = 30, ydim = 30, topo = "rectangular")

Training the SOM

With no spatial references

som_model <- som(larva2.matrix,
                 grid = som_grid,
                 rlen = 1500,
                 alpha = c(0.05,0.01),
                 keep.data = TRUE)

With spatial references

som_modelEsp <- som(larva2Esp.matrix,
                 grid = som_grid,
                 rlen = 1500,
                 alpha = c(0.05,0.01),
                 keep.data = TRUE)

Training progress evaluation

plot(som_modelEsp, type = "changes")

Node counts

source('coolBlueHotRed.R')
plot(som_modelEsp, type = "counts", palette.name=coolBlueHotRed)

SOM by RGB value

par(mfrow = c(2,2))
models <- as.data.frame(som_modelEsp$codes)
plot(som_modelEsp,
     type = "property",
     property = models[,3],
     main=names(models)[3],
     palette.name=coolBlueHotRed)
plot(som_modelEsp,
     type = "property",
     property = models[,4],
     main=names(models)[4],
     palette.name=coolBlueHotRed)
plot(som_modelEsp,
     type = "property",
     property = models[,5],
     main=names(models)[5],
     palette.name=coolBlueHotRed)

Neighbor Distance

plot(som_modelEsp, type = "dist.neighbours", palette.name = grey.colors)

Code Spread

plot(som_modelEsp, type = "codes", palette.name = coolBlueHotRed)

Clustering

mydata <- som_modelEsp$codes 
wss <- (nrow(mydata)-1)*sum(apply(data.frame(mydata),2,var)) 
for (i in 2:20) {
  wss[i] <- sum(kmeans(data.frame(mydata), centers=i)$withinss)
}
plot(1:20, wss, type="b", xlab="Number of Clusters",
     ylab="Within groups sum of squares", main="Within cluster sum of squares (WCSS)")

som_cluster <- cutree(hclust(dist(data.frame(som_modelEsp$codes))), 6)

In this graphic we can see that the ideal amount of clusters is not more than 5. Starting with 5 and going down we will choose the smallest number that gives us a smooth distribution in quantity of it’s members:

# 7 Clusters -- the first cluster has considerably less members than the other four ones.
table(cutree(hclust(dist(data.frame(som_modelEsp$codes))), 3))

  1   2   3 
489 172 239 
# 5 Clusters -- the first cluster has considerably less members than the other four ones.
table(cutree(hclust(dist(data.frame(som_modelEsp$codes))), 5))

  1   2   3   4   5 
132 172 134 239 223 
#4 clusters -- we have founde the good one.
table(cutree(hclust(dist(data.frame(som_modelEsp$codes))), 4))

  1   2   3   4 
266 172 239 223 
pretty_palette <- c("#1f77b4", '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2')
plot(som_modelEsp, type="mapping", bgcol = pretty_palette[som_cluster], main = "Clusters", keepMargins = TRUE) 
add.cluster.boundaries(som_modelEsp, som_cluster, lwd = 7)

LS0tDQp0aXRsZTogIlVzaW5nIGEgU2VsZiBPcmdhbml6aW5nIE1hcCBmb3IgSW1hZ2UgQW5hbHlzaXMiDQpvdXRwdXQ6IA0KICBodG1sX25vdGVib29rOiANCiAgICB0aGVtZTogc3BhY2VsYWINCiAgICB0b2M6IHllcw0KICAgIHRvY19mbG9hdDogdHJ1ZQ0KYXV0aG9yOiBHZW5lciBBdmlsw6lzIFINCmRhdGU6IE1hcnpvLCAyMDE3DQotLS0NCg0KPGNlbnRlcj4hW0ltYWdlIHRvIEFuYWx5emUgaW4gVGhpcyBTY3JpcHRdKEM6L1VzZXJzL1RlbGVtYXRpY2EgSklOSC9Eb2N1bWVudHMvR0VORVIvU09Ncy8xLiBSYXcgRGF0YS8yX2xhcnZhcy5wbmcpPC9jZW50ZXI+DQoNCiMxLiBMb2FkaW5nIHJhdyBkYXRhIGFuZCBnZXR0aW5nIGl0IHJlYWR5DQoNCmBgYHtyLCBlY2hvPVRSVUUsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpsaWJyYXJ5KHJlYWRyKQ0KbGFydmEyIDwtIHJlYWRfY3N2KCJ+L0dFTkVSL1NPTXMvMS4gUmF3IERhdGEvUFNPTU1MXzJMYXJ2YXMuY3N2IiwgDQogICAgY29sX25hbWVzID0gRkFMU0UpDQpjb2xuYW1lcyhsYXJ2YTIpIDwtIGMoIkZJTEEiLCAiQ09MVU1OQSIsICJSRUQiLCAiR1JFRU4iLCAiQkxVRSIpDQoNCmxhcnZhMkVzcCA8LSBsYXJ2YTINCmxhcnZhMiA8LSBsYXJ2YTJbLC0xOi0yXQ0KDQoNCg0KYGBgDQoNClRoZSBkYXRhZnJhbWUgb2J0YWluZWQgaXMgbWFkZSBvZiBgaW50ZWdlcmAgdmFsdWVzOg0KDQpgYGB7cn0NCnN0cihsYXJ2YTIpDQpgYGANCg0KDQpUaGlzIHdpbGwgYmUgYSBwcm9ibGVtIGRvd24gdGhlIGxpbmUgd2hlbiBmZWVkaW5nIHRoaXMgZGF0YSB0byB0aGUgYWxnb3JpdGhtLCB2YWx1ZXMgaGF2ZSB0byBiZSBjaGFuZ2VkIHRvICBgbnVtZXJpY2A6DQoNCmBgYHtyfQ0KbGFydmEyWywnUkVEJ10gPC0gYXMubnVtZXJpYyhhcy5mYWN0b3IobGFydmEyJFJFRCkpDQpsYXJ2YTJbLCdHUkVFTiddIDwtIGFzLm51bWVyaWMoYXMuZmFjdG9yKGxhcnZhMiRHUkVFTikpDQpsYXJ2YTJbLCdCTFVFJ10gPC0gYXMubnVtZXJpYyhhcy5mYWN0b3IobGFydmEyJEJMVUUpKQ0KDQojIyNsYXJ2YTJFc3ANCmxhcnZhMkVzcFssJ0ZJTEEnXSA8LSBhcy5udW1lcmljKGFzLmZhY3RvcihsYXJ2YTJFc3AkRklMQSkpDQpsYXJ2YTJFc3BbLCdDT0xVTU5BJ10gPC0gYXMubnVtZXJpYyhhcy5mYWN0b3IobGFydmEyRXNwJENPTFVNTkEpKQ0KbGFydmEyRXNwWywnUkVEJ10gPC0gYXMubnVtZXJpYyhhcy5mYWN0b3IobGFydmEyRXNwJFJFRCkpDQpsYXJ2YTJFc3BbLCdHUkVFTiddIDwtIGFzLm51bWVyaWMoYXMuZmFjdG9yKGxhcnZhMkVzcCRHUkVFTikpDQpsYXJ2YTJFc3BbLCdCTFVFJ10gPC0gYXMubnVtZXJpYyhhcy5mYWN0b3IobGFydmEyRXNwJEJMVUUpKQ0KYGBgDQoNCldoaXQgdGhpcyB0YWtlbiBjYXJlIG9mIHdlIGhhdmUgYSBkYXRhYmFzZSByZWFkeSB0byBiZSBmZWQgdG8gdGhlIGFsZ29yaXRobUwNCmBgYHtyfQ0Kc3RyKGxhcnZhMikNCmBgYA0KIzIgSW5pdGlhbCBleHBsb3JhdGlvbiBvZiBkYXRhDQoNCkEgYnJpZWYgc3VtbWFyeSBvZiB0aGUgdmFsdWVzIG9mIHRoZSB2YXJpYWJsZSBjYW4gaGVscCB3aXRoIHRoZSBpbml0aWFsIHVuZGVyc3RhbmRpbmcgb2YgdGhpbmdzOg0KYGBge3J9DQpzdW1tYXJ5KGxhcnZhMikNCmBgYA0KDQoNCiMzIFNlbGYgT3JnYW5pemluZyBNYXANCg0KIyNMb2FkaW5nIHJlcXVpcmVkIHBhY2thZ2UNCg0KYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpyZXF1aXJlKGtvaG9uZW4pDQpgYGANCg0KIyNDaGFuZ2luZyBkYXRhZnJhbWUgdG8gYSBgbWF0cml4YA0KYGBge3J9DQojSSByZW1vdmVkIHRoZSBmdW5jdGlvbiBzY2FsZSgpIGJlY2F1c2UgaXQgZG9lcyBub3QgbmVlZCB0byBiZSBzY2FsZWQuDQpsYXJ2YTIubWF0cml4IDwtIGFzLm1hdHJpeChsYXJ2YTIpDQpsYXJ2YTJFc3AubWF0cml4IDwtIGFzLm1hdHJpeChsYXJ2YTJFc3ApDQoNCiNURVNUDQojbGFydmEyLm1hdHJpeFRlc3QgPC0gbGFydmEyLm1hdHJpeFsxOjIwMDAwLF0NCmBgYA0KDQojI0NyZWF0aW5nIHRoZSBTT00gZ3JpZA0KYGBge3J9DQpzb21fZ3JpZCA8LSBzb21ncmlkKHhkaW0gPSAzMCwgeWRpbSA9IDMwLCB0b3BvID0gInJlY3Rhbmd1bGFyIikNCmBgYA0KDQoNCiMjVHJhaW5pbmcgdGhlIFNPTQ0KDQojIyNXaXRoIG5vIHNwYXRpYWwgcmVmZXJlbmNlcw0KYGBge3IsIGVjaG89VFJVRSwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCnNvbV9tb2RlbCA8LSBzb20obGFydmEyLm1hdHJpeCwNCiAgICAgICAgICAgICAgICAgZ3JpZCA9IHNvbV9ncmlkLA0KICAgICAgICAgICAgICAgICBybGVuID0gMTUwMCwNCiAgICAgICAgICAgICAgICAgYWxwaGEgPSBjKDAuMDUsMC4wMSksDQogICAgICAgICAgICAgICAgIGtlZXAuZGF0YSA9IFRSVUUpDQpgYGANCg0KIyMjV2l0aCBzcGF0aWFsIHJlZmVyZW5jZXMNCg0KYGBge3IsIGVjaG89VFJVRSwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCnNvbV9tb2RlbEVzcCA8LSBzb20obGFydmEyRXNwLm1hdHJpeCwNCiAgICAgICAgICAgICAgICAgZ3JpZCA9IHNvbV9ncmlkLA0KICAgICAgICAgICAgICAgICBybGVuID0gMTUwMCwNCiAgICAgICAgICAgICAgICAgYWxwaGEgPSBjKDAuMDUsMC4wMSksDQogICAgICAgICAgICAgICAgIGtlZXAuZGF0YSA9IFRSVUUpDQpgYGANCg0KIyNUcmFpbmluZyBwcm9ncmVzcyBldmFsdWF0aW9uDQpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCnBsb3Qoc29tX21vZGVsRXNwLCB0eXBlID0gImNoYW5nZXMiKQ0KYGBgDQoNCiMjTm9kZSBjb3VudHMNCmBgYHtyLCBmaWcuaGVpZ2h0PSA4fQ0Kc291cmNlKCdjb29sQmx1ZUhvdFJlZC5SJykNCnBsb3Qoc29tX21vZGVsRXNwLCB0eXBlID0gImNvdW50cyIsIHBhbGV0dGUubmFtZT1jb29sQmx1ZUhvdFJlZCkNCmBgYA0KDQojI1NPTSBieSBSR0IgdmFsdWUNCg0KYGBge3IsIGZpZy5oZWlnaHQ9MTB9DQpwYXIobWZyb3cgPSBjKDIsMikpDQptb2RlbHMgPC0gYXMuZGF0YS5mcmFtZShzb21fbW9kZWxFc3AkY29kZXMpDQpwbG90KHNvbV9tb2RlbEVzcCwNCiAgICAgdHlwZSA9ICJwcm9wZXJ0eSIsDQogICAgIHByb3BlcnR5ID0gbW9kZWxzWywzXSwNCiAgICAgbWFpbj1uYW1lcyhtb2RlbHMpWzNdLA0KICAgICBwYWxldHRlLm5hbWU9Y29vbEJsdWVIb3RSZWQpDQpwbG90KHNvbV9tb2RlbEVzcCwNCiAgICAgdHlwZSA9ICJwcm9wZXJ0eSIsDQogICAgIHByb3BlcnR5ID0gbW9kZWxzWyw0XSwNCiAgICAgbWFpbj1uYW1lcyhtb2RlbHMpWzRdLA0KICAgICBwYWxldHRlLm5hbWU9Y29vbEJsdWVIb3RSZWQpDQpwbG90KHNvbV9tb2RlbEVzcCwNCiAgICAgdHlwZSA9ICJwcm9wZXJ0eSIsDQogICAgIHByb3BlcnR5ID0gbW9kZWxzWyw1XSwNCiAgICAgbWFpbj1uYW1lcyhtb2RlbHMpWzVdLA0KICAgICBwYWxldHRlLm5hbWU9Y29vbEJsdWVIb3RSZWQpDQoNCmBgYA0KDQoNCiMjTmVpZ2hib3IgRGlzdGFuY2UNCmBgYHtyLCBmaWcuYWxpZ249J2NlbnRlcid9DQpwbG90KHNvbV9tb2RlbEVzcCwgdHlwZSA9ICJkaXN0Lm5laWdoYm91cnMiLCBwYWxldHRlLm5hbWUgPSBncmV5LmNvbG9ycykNCmBgYA0KDQojI0NvZGUgU3ByZWFkDQpgYGB7ciwgZmlnLmhlaWdodD0gMTB9DQpwbG90KHNvbV9tb2RlbEVzcCwgdHlwZSA9ICJjb2RlcyIsIHBhbGV0dGUubmFtZSA9IGNvb2xCbHVlSG90UmVkKQ0KYGBgDQoNCiMjQ2x1c3RlcmluZw0KYGBge3J9DQpteWRhdGEgPC0gc29tX21vZGVsRXNwJGNvZGVzIA0Kd3NzIDwtIChucm93KG15ZGF0YSktMSkqc3VtKGFwcGx5KGRhdGEuZnJhbWUobXlkYXRhKSwyLHZhcikpIA0KZm9yIChpIGluIDI6MjApIHsNCiAgd3NzW2ldIDwtIHN1bShrbWVhbnMoZGF0YS5mcmFtZShteWRhdGEpLCBjZW50ZXJzPWkpJHdpdGhpbnNzKQ0KfQ0KcGxvdCgxOjIwLCB3c3MsIHR5cGU9ImIiLCB4bGFiPSJOdW1iZXIgb2YgQ2x1c3RlcnMiLA0KICAgICB5bGFiPSJXaXRoaW4gZ3JvdXBzIHN1bSBvZiBzcXVhcmVzIiwgbWFpbj0iV2l0aGluIGNsdXN0ZXIgc3VtIG9mIHNxdWFyZXMgKFdDU1MpIikNCg0Kc29tX2NsdXN0ZXIgPC0gY3V0cmVlKGhjbHVzdChkaXN0KGRhdGEuZnJhbWUoc29tX21vZGVsRXNwJGNvZGVzKSkpLCA2KQ0KDQoNCmBgYA0KDQpJbiB0aGlzIGdyYXBoaWMgd2UgY2FuIHNlZSB0aGF0IHRoZSBpZGVhbCBhbW91bnQgb2YgY2x1c3RlcnMgaXMgbm90IG1vcmUgdGhhbiA1LiBTdGFydGluZyB3aXRoIDUgYW5kIGdvaW5nIGRvd24gd2Ugd2lsbCBjaG9vc2UgdGhlIHNtYWxsZXN0IG51bWJlciB0aGF0IGdpdmVzIHVzIGEgc21vb3RoIGRpc3RyaWJ1dGlvbiBpbiBxdWFudGl0eSBvZiBpdCdzIG1lbWJlcnM6DQoNCmBgYHtyfQ0KIyA3IENsdXN0ZXJzIC0tIHRoZSBmaXJzdCBjbHVzdGVyIGhhcyBjb25zaWRlcmFibHkgbGVzcyBtZW1iZXJzIHRoYW4gdGhlIG90aGVyIGZvdXIgb25lcy4NCnRhYmxlKGN1dHJlZShoY2x1c3QoZGlzdChkYXRhLmZyYW1lKHNvbV9tb2RlbEVzcCRjb2RlcykpKSwgMykpDQoNCg0KIyA1IENsdXN0ZXJzIC0tIHRoZSBmaXJzdCBjbHVzdGVyIGhhcyBjb25zaWRlcmFibHkgbGVzcyBtZW1iZXJzIHRoYW4gdGhlIG90aGVyIGZvdXIgb25lcy4NCnRhYmxlKGN1dHJlZShoY2x1c3QoZGlzdChkYXRhLmZyYW1lKHNvbV9tb2RlbEVzcCRjb2RlcykpKSwgNSkpDQoNCiM0IGNsdXN0ZXJzIC0tIHdlIGhhdmUgZm91bmRlIHRoZSBnb29kIG9uZS4NCnRhYmxlKGN1dHJlZShoY2x1c3QoZGlzdChkYXRhLmZyYW1lKHNvbV9tb2RlbEVzcCRjb2RlcykpKSwgNCkpDQpgYGANCg0KDQpgYGB7ciwgZmlnLmhlaWdodD0gMTB9DQpwcmV0dHlfcGFsZXR0ZSA8LSBjKCIjMWY3N2I0IiwgJyNmZjdmMGUnLCAnIzJjYTAyYycsICcjZDYyNzI4JywgJyM5NDY3YmQnLCAnIzhjNTY0YicsICcjZTM3N2MyJykNCg0KcGxvdChzb21fbW9kZWxFc3AsIHR5cGU9Im1hcHBpbmciLCBiZ2NvbCA9IHByZXR0eV9wYWxldHRlW3NvbV9jbHVzdGVyXSwgbWFpbiA9ICJDbHVzdGVycyIsIGtlZXBNYXJnaW5zID0gVFJVRSkgDQphZGQuY2x1c3Rlci5ib3VuZGFyaWVzKHNvbV9tb2RlbEVzcCwgc29tX2NsdXN0ZXIsIGx3ZCA9IDcpDQpgYGANCg0K