algas <- read.table("algas.txt", T, "\t")
## Scy Pcol Iri Ulv Cer Gel Gra Cor Pumb Lit
## 1 2 9 4 4 0 0 0 0 0 0
## 2 6 6 4 2 0 0 0 0 0 0
## 3 2 6 4 2 0 0 0 0 0 0
## 4 0 2 7 4 0 0 0 0 0 0
## 5 0 0 8 2 0 2 2 2 2 0
## 6 0 2 9 6 2 0 0 0 0 2
## 7 0 0 9 4 0 0 2 5 2 6
## 8 0 2 8 4 2 2 2 5 2 5
## 9 0 2 8 4 2 2 6 5 2 6
## 10 0 0 5 5 2 2 2 8 2 0
## 11 0 0 0 0 0 2 2 4 0 9
## 12 0 0 0 0 2 2 6 5 2 8
algas.manh <- dist(algas, "manhattan", diag = T)
## 1 2 3 4 5 6 7 8 9 10 11 12
## 1 0
## 2 9 0
## 3 5 4 0
## 4 12 15 11 0
## 5 25 24 20 13 0
## 6 20 23 19 8 19 0
## 7 31 34 30 19 14 19 0
## 8 31 34 30 19 14 17 8 0
## 9 36 39 35 24 19 22 11 5 0
## 10 29 32 28 21 14 23 18 14 19 0
## 11 36 35 31 30 23 34 21 23 26 27 0
## 12 44 43 39 38 27 38 23 21 16 25 10 0
algas.c <- hclust(algas.manh, "average")
plot(algas.c, hang = -1)
cor(cophenetic(algas.c), algas.manh)
## [1] 0.7858
ieva <- read.table("ieva.txt", T, "\t", row.names = 1)
## antenas cabeza ojos boca idioma cuello cuerpo corazon brazo rodillas
## Af 1 0 0 1 1 0 1 1 0 0
## Ef 1 1 0 1 0 0 1 1 0 0
## If 0 1 1 1 0 1 1 1 0 1
## Of 0 1 1 0 0 1 1 0 1 1
## Uf 0 1 1 0 0 1 0 0 1 1
library(vegan)
ieva.SM <- designdist(ieva, "(a+d)/P", abcd = T)
## Af Ef If Of
## Ef 0.8
## If 0.4 0.6
## Of 0.1 0.3 0.7
## Uf 0.0 0.2 0.6 0.9
ieva.c <- hclust(1 - ieva.SM, method = "single")
NOTA: como estamos trabajando con matriz de asociación, utilizamos \( 1 - ieva.SM \) para tomar la matriz de distancia.
plot(ieva.c)
plot(ieva.c, hang = -1, axes = F, main = "Indice de Asociación de Simple Matching")
axis(2, seq(0, 0.4, 0.1), seq(1, 0.6, -0.1))
cor(cophenetic(ieva.c), 1 - ieva.SM)
## [1] 0.8152
proteinas <- read.table("proteinas.txt", T, "\t", row.names = 1)
## C_Roja C_Blanca HUEVOS LECHE PESCADO CEREAL EMBUTIDOS
## Albania 10.1 1.4 0.5 8.9 0.2 42.3 0.6
## Austria 8.9 14.0 4.3 19.9 2.1 28.0 3.6
## Belgica 13.5 9.3 4.1 17.5 4.5 26.6 5.7
## Bulgaria 7.8 6.0 1.6 8.3 1.2 56.7 1.1
## Checosl 9.7 11.4 2.8 12.5 2.0 34.3 5.0
## Dinamarca 10.6 10.8 3.7 25.0 9.9 21.9 4.8
kmeans(proteinas, centers = 5)$cluster
## Bulgaria Rumania Yugoslavia Austria Belgica Francia
## 1 1 1 2 2 2
## Irlanda P_Bajos Suiza Inglaterra AlemaniaO AlemaniaE
## 2 2 2 2 2 3
## Portugal Espania Dinamarca Finlandia Noruega Suecia
## 3 3 4 4 4 4
## Albania Checosl Grecia Hungria Italia Polonia
## 5 5 5 5 5 5
## Rusia
## 5
SCDG <- sapply(lapply(1:6, function(g) {kmeans(proteinas, centers = g)$withinss}), sum)
plot(SCDG, type = "b")