Datos

Datos_3_3_Caso <- data.frame(
CCAA = c("Espana","Andalucia","Aragon","Asturias","Baleares","Canarias",
"Cantabria","Castilla y Leon","Cast.-La Mancha","Cataluna",
"Com. Valenciana","Extremadura","Galicia","Madrid","Murcia",
"Navarra","Pais Vasco","La Rioja"),
automovi = c(69.0,66.7,67.2,63.7,71.9,72.7,63.4,65.8,61.5,70.4,72.7,60.5,65.5,74.9,69.0,72.4,71.3,64.9),
tvcolor = c(97.6,98.0,97.5,95.2,98.8,96.8,99.0,97.4,97.3,98.1,98.4,97.7,91.3,98.7,98.7,99.2,99.3,98.6),
video = c(62.4,82.7,56.8,50.4,62.4,39.8,49.9,47.7,39.7,71.1,68.2,42.0,42.7,68.7,59.3,69.8,61.6,54.4),
microond = c(32.3,24.1,43.4,29.4,29.8,23.9,35.6,28.1,27.7,36.8,29.7,33.2,13.5,40.3,19.1,35.0,45.7,44.4),
lavavaji = c(17.0,12.7,20.2,13.3,10.1,5.2,11.0,14.0,7.0,19.8,11.5,13.2,14.6,32.4,10.7,22.5,23.7,17.6),
telefono = c(85.2,74.7,84.7,88.1,88.8,73.4,80.5,85.0,82.9,92.9,84.4,81.5,82.4,90.8,81.4,89.7,97.4,83.4)
)

Distancia de Mahalanobis (Cuadro 3.23)

D2 <- mahalanobis(Datos_3_3_Caso[,-1],
colMeans(Datos_3_3_Caso[,-1]),
cov(Datos_3_3_Caso[,-1]))

pvalue <- 1 - pchisq(D2, df = 6)

kable(
  cbind(CCAA = Datos_3_3_Caso$CCAA, D2 = D2, p_value = pvalue),
  caption = "Distancia de Mahalanobis y p-values (Cuadro 3.23)",
  align = "c",
  digits = 4
)

Distancia de Mahalanobis y p-values (Cuadro 3.23)
CCAA	D2	p_value
Espana	0.285529490979512	0.999564109397034
Andalucia	12.3338438163011	0.0549223418067925
Aragon	4.61868651189262	0.593562660689252
Asturias	4.33233501792167	0.631798960633162
Baleares	5.10542129585225	0.530364999009056
Canarias	12.8895499303021	0.0448238194989306
Cantabria	3.00514896261354	0.808200303820504
Castilla y Leon	1.91284635250955	0.927536958538267
Cast.-La Mancha	4.22487747665658	0.646273721887997
Cataluna	3.44806178221917	0.750866539029342
Com. Valenciana	3.72731781742071	0.713520824303198
Extremadura	4.71304077699256	0.58110856374489
Galicia	12.3926037393498	0.0537619862311076
Madrid	9.89784163546484	0.129020578990423
Murcia	7.03284077124648	0.317820784778911
Navarra	2.54966157831187	0.8628688667223
Pais Vasco	5.17364623666483	0.521742568303784
La Rioja	4.35674680730077	0.62851830068773

Métodos jerárquicos

matriz.dis.euclid.caso3 <- dist(Datos_3_3_Caso[,-1])

hclust.single.caso3 <- hclust(matriz.dis.euclid.caso3, method="single")
hclust.complete.caso3 <- hclust(matriz.dis.euclid.caso3, method="complete")
hclust.average.caso3 <- hclust(matriz.dis.euclid.caso3, method="average")
hclust.centroid.caso3 <- hclust(matriz.dis.euclid.caso3, method="centroid")
hclust.ward.caso3 <- hclust(matriz.dis.euclid.caso3, method="ward.D2")

NbClust (código del libro)

Datos.NbClust <- Datos_3_3_Caso[,2:7]

res.wardD2 <- NbClust(
Datos.NbClust,
distance = "euclidean",
min.nc = 2,
max.nc = 15,
method = "ward.D2",
index = "alllong"
)

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 7 proposed 2 as the best number of clusters 
## * 7 proposed 3 as the best number of clusters 
## * 2 proposed 4 as the best number of clusters 
## * 2 proposed 7 as the best number of clusters 
## * 1 proposed 12 as the best number of clusters 
## * 3 proposed 14 as the best number of clusters 
## * 5 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************

Grupos.

grupo.ward <- cutree(hclust.ward.caso3, k = 2)
datos.caso3.grupos <- cbind(Datos_3_3_Caso, grupo.ward)

Centroides.

kable(
round(aggregate(datos.caso3.grupos[,2:7], list(grupo.ward), mean), 2),
caption = "Centroides por grupo (Cuadro 3.24)",
align = "c"
)

Centroides por grupo (Cuadro 3.24)
Group.1	automovi	tvcolor	video	microond	lavavaji	telefono
1	70.04	98.45	65.22	34.60	18.02	86.67
2	64.73	96.39	44.60	27.34	11.19	81.97

K-Means.

c1 <- c(66.87,96.82,56.01,25.43,11.81,80.71)
c2 <- c(70.78,98.53,63.47,44.70,22.43,90.23)

solucion <- kmeans(datos.caso3.grupos[,2:7], centers = rbind(c1, c2))

kable(
solucion$centers,
caption = "Centroides resultantes del metodo no jerarquico (Cuadro 3.25)",
align = "c",
digits = 2
)

Centroides resultantes del metodo no jerarquico (Cuadro 3.25)
automovi	tvcolor	video	microond	lavavaji	telefono
65.26	96.67	46.44	26.31	11.12	81.9
70.14	98.42	65.81	36.15	18.75	87.2

Pruebas t.

kable(
broom::tidy(t.test(automovi ~ solucion$cluster, data = datos.caso3.grupos)),
caption = "Resultados de la prueba t para automovi (Cuadro 3.26)",
align = "c",
digits = 4
)

Resultados de la prueba t para automovi (Cuadro 3.26)
estimate	estimate1	estimate2	statistic	p.value	parameter	conf.low	conf.high	method	alternative
-4.8775	65.2625	70.14	-2.823	0.0143	13.0731	-8.608	-1.147	Welch Two Sample t-test	two.sided

A32 - Analisis de Conglomerados

Fatima Maria Barrera Orellana - Carne BO21003

2025-11-22

Datos

Distancia de Mahalanobis (Cuadro 3.23)

Métodos jerárquicos

NbClust (código del libro)

Grupos.

Centroides.

K-Means.

Pruebas t.