library(readr)
bin500 <- read_csv("~/GENER/SOMs/Dra Anabel/1 Raw Data/Representacion_Binaria_500.csv")
bin500 <- bin[,c(-1,-3,-14:-20)]
bin500 <- as.data.frame(bin)
La ‘dataframe’ que se obtuvo esta compuesta por valores numericos:
str(bin500)
'data.frame': 500 obs. of 11 variables:
$ Aptitud: num 0.394 0.386 0.364 0.382 0.35 ...
$ UX1 : num 9.024 0.766 5.418 4.499 0.678 ...
$ UX2 : num 4.98 8.4 5.48 2.02 1.75 ...
$ UX3 : num 8.64 5.66 8.81 8.3 6.66 ...
$ UX4 : num 1.85 4.8 7.13 4.92 4.67 ...
$ UX5 : num 4.95 6.95 1.05 6.63 1.84 ...
$ UY1 : num 1.68 3.7 5.31 7.93 1.89 ...
$ UY2 : num 1.89 8.72 2.66 4.02 3.58 ...
$ UY3 : num 6.36 3.3 9.14 8.98 1.32 ...
$ UY4 : num 3.51 8.25 4 3.49 4.64 ...
$ UY5 : num 8.172 0.348 5.015 6.615 8.091 ...
Estadistica descriptiva de los datos:
summary(bin500)
Aptitud UX1 UX2 UX3 UX4 UX5
Min. :0.2820 Min. :0.0822 Min. :0.1533 Min. :0.0042 Min. :0.1101 Min. :0.1495
1st Qu.:0.3500 1st Qu.:2.5692 1st Qu.:3.0093 1st Qu.:3.0239 1st Qu.:2.9253 1st Qu.:3.2197
Median :0.3640 Median :5.2568 Median :5.6768 Median :5.5091 Median :5.2350 Median :5.6193
Mean :0.3647 Mean :5.0724 Mean :5.4305 Mean :5.2850 Mean :5.1499 Mean :5.4737
3rd Qu.:0.3800 3rd Qu.:7.7322 3rd Qu.:7.8102 3rd Qu.:7.6499 3rd Qu.:7.4742 3rd Qu.:7.9033
Max. :0.4380 Max. :9.8986 Max. :9.9236 Max. :9.7131 Max. :9.6506 Max. :9.9887
UY1 UY2 UY3 UY4 UY5
Min. :0.1708 Min. :0.3563 Min. :0.0001 Min. :0.0281 Min. :0.2262
1st Qu.:3.2290 1st Qu.:2.9097 1st Qu.:2.9924 1st Qu.:3.1246 1st Qu.:3.3243
Median :5.1716 Median :4.5949 Median :4.7825 Median :5.0133 Median :5.2421
Mean :5.2519 Mean :4.8649 Mean :5.0182 Mean :5.0376 Mean :5.2753
3rd Qu.:7.6833 3rd Qu.:7.2608 3rd Qu.:7.2951 3rd Qu.:7.3563 3rd Qu.:7.5692
Max. :9.8777 Max. :9.6523 Max. :9.5358 Max. :9.6957 Max. :9.7919
require(kohonen)
bin.matrix <- as.matrix(bin500)
som_grid <- somgrid(xdim =10 , ydim = 10, topo = "rectangular")
plot(som_model, type = "changes")
source('coolBlueHotRed.R')
plot(som_model, type = "counts", palette.name=coolBlueHotRed)
source("coolBlueHotRed.R")
models <- as.data.frame(som_model$codes)
plot(som_model,
type = "property",
property = models[,1],
main=names(models)[1],
palette.name=coolBlueHotRed)
plot(som_model, type = "dist.neighbours", palette.name = grey.colors)
plot(som_model, type = "codes", palette.name = coolBlueHotRed)
mydata <- som_model$codes
wss <- (nrow(mydata)-1)*sum(apply(data.frame(mydata),2,var))
for (i in 2:20) {
wss[i] <- sum(kmeans(data.frame(mydata), centers=i)$withinss)
}
plot(1:20, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares", main="Within cluster sum of squares (WCSS)")
La grafica anterior nos ayuda a decidir la cantidad ideal de clusters para agrupar los datos.
# 7 Clusters -- the first cluster has considerably less members than the other four ones.
#table(cutree(hclust(dist(data.frame(som_model$codes))), 3))
# 5 Clusters -- the first cluster has considerably less members than the other four ones.
#table(cutree(hclust(dist(data.frame(som_model$codes))), 5))
#4 clusters -- we have founde the good one.
table(cutree(hclust(dist(data.frame(som_model$codes))), 10))
1 2 3 4 5 6 7 8 9 10
9 10 7 7 5 15 12 5 18 12
pretty_palette <- c("#1f77b4", '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#89f442', '#f44183', '#4143f4', '#41e8f4', '#f4f141', '#849947', '#773737', '#1a7046')
som_cluster <- cutree(hclust(dist(data.frame(som_model$codes))), 15)
plot(som_model, type="mapping", bgcol = pretty_palette[som_cluster], main = "Clusters", keepMargins = TRUE)
add.cluster.boundaries(som_model, som_cluster, lwd = 5)
codes <- as.data.frame(som_model$codes)
codesOrdered <- codes[order(codes$Aptitud, decreasing = TRUE),]
plot(codes)