Datos

Datos_3_3_Caso <- data.frame(
CCAA = c("Espana","Andalucia","Aragon","Asturias","Baleares","Canarias",
"Cantabria","Castilla y Leon","Cast.-La Mancha","Cataluna",
"Com. Valenciana","Extremadura","Galicia","Madrid","Murcia",
"Navarra","Pais Vasco","La Rioja"),
automovi = c(69.0,66.7,67.2,63.7,71.9,72.7,63.4,65.8,61.5,70.4,72.7,60.5,65.5,74.9,69.0,72.4,71.3,64.9),
tvcolor = c(97.6,98.0,97.5,95.2,98.8,96.8,99.0,97.4,97.3,98.1,98.4,97.7,91.3,98.7,98.7,99.2,99.3,98.6),
video = c(62.4,82.7,56.8,50.4,62.4,39.8,49.9,47.7,39.7,71.1,68.2,42.0,42.7,68.7,59.3,69.8,61.6,54.4),
microond = c(32.3,24.1,43.4,29.4,29.8,23.9,35.6,28.1,27.7,36.8,29.7,33.2,13.5,40.3,19.1,35.0,45.7,44.4),
lavavaji = c(17.0,12.7,20.2,13.3,10.1,5.2,11.0,14.0,7.0,19.8,11.5,13.2,14.6,32.4,10.7,22.5,23.7,17.6),
telefono = c(85.2,74.7,84.7,88.1,88.8,73.4,80.5,85.0,82.9,92.9,84.4,81.5,82.4,90.8,81.4,89.7,97.4,83.4)
)

Distancia de Mahalanobis (Cuadro 3.23)

D2 <- mahalanobis(Datos_3_3_Caso[,-1],
colMeans(Datos_3_3_Caso[,-1]),
cov(Datos_3_3_Caso[,-1]))

pvalue <- 1 - pchisq(D2, df = 6)

kable(
  cbind(CCAA = Datos_3_3_Caso$CCAA, D2 = D2, p_value = pvalue),
  caption = "Distancia de Mahalanobis y p-values (Cuadro 3.23)",
  align = "c",
  digits = 4
)
Distancia de Mahalanobis y p-values (Cuadro 3.23)
CCAA D2 p_value
Espana 0.285529490979512 0.999564109397034
Andalucia 12.3338438163011 0.0549223418067925
Aragon 4.61868651189262 0.593562660689252
Asturias 4.33233501792167 0.631798960633162
Baleares 5.10542129585225 0.530364999009056
Canarias 12.8895499303021 0.0448238194989306
Cantabria 3.00514896261354 0.808200303820504
Castilla y Leon 1.91284635250955 0.927536958538267
Cast.-La Mancha 4.22487747665658 0.646273721887997
Cataluna 3.44806178221917 0.750866539029342
Com. Valenciana 3.72731781742071 0.713520824303198
Extremadura 4.71304077699256 0.58110856374489
Galicia 12.3926037393498 0.0537619862311076
Madrid 9.89784163546484 0.129020578990423
Murcia 7.03284077124648 0.317820784778911
Navarra 2.54966157831187 0.8628688667223
Pais Vasco 5.17364623666483 0.521742568303784
La Rioja 4.35674680730077 0.62851830068773

Métodos jerárquicos

matriz.dis.euclid.caso3 <- dist(Datos_3_3_Caso[,-1])

hclust.single.caso3 <- hclust(matriz.dis.euclid.caso3, method="single")
hclust.complete.caso3 <- hclust(matriz.dis.euclid.caso3, method="complete")
hclust.average.caso3 <- hclust(matriz.dis.euclid.caso3, method="average")
hclust.centroid.caso3 <- hclust(matriz.dis.euclid.caso3, method="centroid")
hclust.ward.caso3 <- hclust(matriz.dis.euclid.caso3, method="ward.D2")

NbClust (código del libro)

Datos.NbClust <- Datos_3_3_Caso[,2:7]

res.wardD2 <- NbClust(
Datos.NbClust,
distance = "euclidean",
min.nc = 2,
max.nc = 15,
method = "ward.D2",
index = "alllong"
)

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 7 proposed 2 as the best number of clusters 
## * 7 proposed 3 as the best number of clusters 
## * 2 proposed 4 as the best number of clusters 
## * 2 proposed 7 as the best number of clusters 
## * 1 proposed 12 as the best number of clusters 
## * 3 proposed 14 as the best number of clusters 
## * 5 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************

Grupos.

grupo.ward <- cutree(hclust.ward.caso3, k = 2)
datos.caso3.grupos <- cbind(Datos_3_3_Caso, grupo.ward)

Centroides.

kable(
round(aggregate(datos.caso3.grupos[,2:7], list(grupo.ward), mean), 2),
caption = "Centroides por grupo (Cuadro 3.24)",
align = "c"
)
Centroides por grupo (Cuadro 3.24)
Group.1 automovi tvcolor video microond lavavaji telefono
1 70.04 98.45 65.22 34.60 18.02 86.67
2 64.73 96.39 44.60 27.34 11.19 81.97

K-Means.

c1 <- c(66.87,96.82,56.01,25.43,11.81,80.71)
c2 <- c(70.78,98.53,63.47,44.70,22.43,90.23)

solucion <- kmeans(datos.caso3.grupos[,2:7], centers = rbind(c1, c2))

kable(
solucion$centers,
caption = "Centroides resultantes del metodo no jerarquico (Cuadro 3.25)",
align = "c",
digits = 2
)
Centroides resultantes del metodo no jerarquico (Cuadro 3.25)
automovi tvcolor video microond lavavaji telefono
65.26 96.67 46.44 26.31 11.12 81.9
70.14 98.42 65.81 36.15 18.75 87.2

Pruebas t.

kable(
broom::tidy(t.test(automovi ~ solucion$cluster, data = datos.caso3.grupos)),
caption = "Resultados de la prueba t para automovi (Cuadro 3.26)",
align = "c",
digits = 4
)
Resultados de la prueba t para automovi (Cuadro 3.26)
estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative
-4.8775 65.2625 70.14 -2.823 0.0143 13.0731 -8.608 -1.147 Welch Two Sample t-test two.sided