Cluster jerárquico.

El dataset task9data2.txt que se ha utilizado se puede descargar de task9data2.txt.

clust <- read.table("task9data2.txt", quote="\"", comment.char="")
clust

# Nombres de las columnas y de filas
cnames <- c("Pais","wine","heart")
colnames(clust)<-cnames
rownames(clust)<-clust$Pais
clust

# Cluster jerarquico
# Creando matriz de distancias
d <- dist(clust, method = "euclidean")

## Warning in dist(clust, method = "euclidean"): NAs introducidos por coerción

fit <- hclust(d)
# dibujando el dendograma 
plot(fit)

plot(fit)
groups<-cutree(fit, k=5) # cut tree para 5 clusters
# 
# Dibujar dendograma con bordes rojos alrededor de 3 clusters
rect.hclust(fit,k=5,border="red")

groups

##     Australia       Austria   Belgium/Lux        Canada       Denmark 
##             1             2             3             2             1 
##       Finland        France       Iceland       Ireland         Italy 
##             4             5             1             4             3 
##   Netherlands   New_Zealand        Norway         Spain        Sweden 
##             2             4             1             5             1 
##   Switzerland       England United_States       Germany 
##             3             4             2             2

library(knitr)
kable(table(groups))

groups	Freq
1	5
2	5
3	3
4	4
5	2

plot(clust$wine,clust$heart)
text(x=clust$wine, y=clust$heart, labels=clust$country, col=groups)

#Asignar los cluster en una para variable para cada uno de los indivíduos o países
Data_Hierarchical <- cbind(clust[,-1],groups)
print(Data_Hierarchical)

##               wine heart groups
## Australia      2.5   211      1
## Austria        3.9   167      2
## Belgium/Lux    2.9   131      3
## Canada         2.4   191      2
## Denmark        2.9   220      1
## Finland        0.8   297      4
## France         9.1    71      5
## Iceland        0.8   211      1
## Ireland        0.7   300      4
## Italy          7.9   107      3
## Netherlands    1.8   167      2
## New_Zealand    1.9   266      4
## Norway         0.8   227      1
## Spain          6.5    86      5
## Sweden         1.6   207      1
## Switzerland    5.8   115      3
## England        1.3   285      4
## United_States  1.2   199      2
## Germany        2.7   172      2

k means en R de forma corta

# K means
clustk <- kmeans(clust[,c("wine","heart")], centers=3, nstart=10)
clustk

## K-means clustering with 3 clusters of sizes 4, 5, 10
## 
## Cluster means:
##    wine heart
## 1 1.175 287.0
## 2 6.440 102.0
## 3 2.060 197.2
## 
## Clustering vector:
##     Australia       Austria   Belgium/Lux        Canada       Denmark 
##             3             3             2             3             3 
##       Finland        France       Iceland       Ireland         Italy 
##             1             2             3             1             2 
##   Netherlands   New_Zealand        Norway         Spain        Sweden 
##             3             1             3             2             3 
##   Switzerland       England United_States       Germany 
##             2             1             3             3 
## 
## Within cluster sum of squares by cluster:
## [1]  714.9075 2274.1520 4394.6040
##  (between_SS / total_SS =  91.2 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

plot(clust$wine, clust$heart, xlab="wine", ylab="heart")
text(x=clust$wine, y=clust$heart, labels=clust$country,col=clustk$cluster+1)

# Agregar los cluster como una nueva variable a la base de datos original
Data_k <- cbind(clust[,-1],clustk$cluster)
print(Data_k)

##               wine heart clustk$cluster
## Australia      2.5   211              3
## Austria        3.9   167              3
## Belgium/Lux    2.9   131              2
## Canada         2.4   191              3
## Denmark        2.9   220              3
## Finland        0.8   297              1
## France         9.1    71              2
## Iceland        0.8   211              3
## Ireland        0.7   300              1
## Italy          7.9   107              2
## Netherlands    1.8   167              3
## New_Zealand    1.9   266              1
## Norway         0.8   227              3
## Spain          6.5    86              2
## Sweden         1.6   207              3
## Switzerland    5.8   115              2
## England        1.3   285              1
## United_States  1.2   199              3
## Germany        2.7   172              3

cluster jerarquico y kmedias 2

Jaime Isaac Peña Mejía

23/4/2021

Cluster jerárquico.

k means en R de forma corta