El dataset task9data2.txt que se ha utilizado se puede descargar de task9data2.txt.
clust <- read.table("task9data2.txt", quote="\"", comment.char="")
clust
# Nombres de las columnas y de filas
cnames <- c("Pais","wine","heart")
colnames(clust)<-cnames
rownames(clust)<-clust$Pais
clust
# Cluster jerarquico
# Creando matriz de distancias
d <- dist(clust, method = "euclidean")
## Warning in dist(clust, method = "euclidean"): NAs introducidos por coerción
fit <- hclust(d)
# dibujando el dendograma
plot(fit)
plot(fit)
groups<-cutree(fit, k=5) # cut tree para 5 clusters
#
# Dibujar dendograma con bordes rojos alrededor de 3 clusters
rect.hclust(fit,k=5,border="red")
groups
## Australia Austria Belgium/Lux Canada Denmark
## 1 2 3 2 1
## Finland France Iceland Ireland Italy
## 4 5 1 4 3
## Netherlands New_Zealand Norway Spain Sweden
## 2 4 1 5 1
## Switzerland England United_States Germany
## 3 4 2 2
library(knitr)
kable(table(groups))
groups | Freq |
---|---|
1 | 5 |
2 | 5 |
3 | 3 |
4 | 4 |
5 | 2 |
plot(clust$wine,clust$heart)
text(x=clust$wine, y=clust$heart, labels=clust$country, col=groups)
#Asignar los cluster en una para variable para cada uno de los indivíduos o países
Data_Hierarchical <- cbind(clust[,-1],groups)
print(Data_Hierarchical)
## wine heart groups
## Australia 2.5 211 1
## Austria 3.9 167 2
## Belgium/Lux 2.9 131 3
## Canada 2.4 191 2
## Denmark 2.9 220 1
## Finland 0.8 297 4
## France 9.1 71 5
## Iceland 0.8 211 1
## Ireland 0.7 300 4
## Italy 7.9 107 3
## Netherlands 1.8 167 2
## New_Zealand 1.9 266 4
## Norway 0.8 227 1
## Spain 6.5 86 5
## Sweden 1.6 207 1
## Switzerland 5.8 115 3
## England 1.3 285 4
## United_States 1.2 199 2
## Germany 2.7 172 2
# K means
clustk <- kmeans(clust[,c("wine","heart")], centers=3, nstart=10)
clustk
## K-means clustering with 3 clusters of sizes 4, 5, 10
##
## Cluster means:
## wine heart
## 1 1.175 287.0
## 2 6.440 102.0
## 3 2.060 197.2
##
## Clustering vector:
## Australia Austria Belgium/Lux Canada Denmark
## 3 3 2 3 3
## Finland France Iceland Ireland Italy
## 1 2 3 1 2
## Netherlands New_Zealand Norway Spain Sweden
## 3 1 3 2 3
## Switzerland England United_States Germany
## 2 1 3 3
##
## Within cluster sum of squares by cluster:
## [1] 714.9075 2274.1520 4394.6040
## (between_SS / total_SS = 91.2 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
plot(clust$wine, clust$heart, xlab="wine", ylab="heart")
text(x=clust$wine, y=clust$heart, labels=clust$country,col=clustk$cluster+1)
# Agregar los cluster como una nueva variable a la base de datos original
Data_k <- cbind(clust[,-1],clustk$cluster)
print(Data_k)
## wine heart clustk$cluster
## Australia 2.5 211 3
## Austria 3.9 167 3
## Belgium/Lux 2.9 131 2
## Canada 2.4 191 3
## Denmark 2.9 220 3
## Finland 0.8 297 1
## France 9.1 71 2
## Iceland 0.8 211 3
## Ireland 0.7 300 1
## Italy 7.9 107 2
## Netherlands 1.8 167 3
## New_Zealand 1.9 266 1
## Norway 0.8 227 3
## Spain 6.5 86 2
## Sweden 1.6 207 3
## Switzerland 5.8 115 2
## England 1.3 285 1
## United_States 1.2 199 3
## Germany 2.7 172 3