Datos
Datos_3_3_Caso <- data.frame(
CCAA = c("Espana","Andalucia","Aragon","Asturias","Baleares","Canarias",
"Cantabria","Castilla y Leon","Cast.-La Mancha","Cataluna",
"Com. Valenciana","Extremadura","Galicia","Madrid","Murcia",
"Navarra","Pais Vasco","La Rioja"),
automovi = c(69.0,66.7,67.2,63.7,71.9,72.7,63.4,65.8,61.5,70.4,72.7,60.5,65.5,74.9,69.0,72.4,71.3,64.9),
tvcolor = c(97.6,98.0,97.5,95.2,98.8,96.8,99.0,97.4,97.3,98.1,98.4,97.7,91.3,98.7,98.7,99.2,99.3,98.6),
video = c(62.4,82.7,56.8,50.4,62.4,39.8,49.9,47.7,39.7,71.1,68.2,42.0,42.7,68.7,59.3,69.8,61.6,54.4),
microond = c(32.3,24.1,43.4,29.4,29.8,23.9,35.6,28.1,27.7,36.8,29.7,33.2,13.5,40.3,19.1,35.0,45.7,44.4),
lavavaji = c(17.0,12.7,20.2,13.3,10.1,5.2,11.0,14.0,7.0,19.8,11.5,13.2,14.6,32.4,10.7,22.5,23.7,17.6),
telefono = c(85.2,74.7,84.7,88.1,88.8,73.4,80.5,85.0,82.9,92.9,84.4,81.5,82.4,90.8,81.4,89.7,97.4,83.4)
)
Distancia de Mahalanobis (Cuadro 3.23)
D2 <- mahalanobis(Datos_3_3_Caso[,-1],
colMeans(Datos_3_3_Caso[,-1]),
cov(Datos_3_3_Caso[,-1]))
pvalue <- 1 - pchisq(D2, df = 6)
kable(
cbind(CCAA = Datos_3_3_Caso$CCAA, D2 = D2, p_value = pvalue),
caption = "Distancia de Mahalanobis y p-values (Cuadro 3.23)",
align = "c",
digits = 4
)
Distancia de Mahalanobis y p-values (Cuadro 3.23)
| Espana |
0.285529490979512 |
0.999564109397034 |
| Andalucia |
12.3338438163011 |
0.0549223418067925 |
| Aragon |
4.61868651189262 |
0.593562660689252 |
| Asturias |
4.33233501792167 |
0.631798960633162 |
| Baleares |
5.10542129585225 |
0.530364999009056 |
| Canarias |
12.8895499303021 |
0.0448238194989306 |
| Cantabria |
3.00514896261354 |
0.808200303820504 |
| Castilla y Leon |
1.91284635250955 |
0.927536958538267 |
| Cast.-La Mancha |
4.22487747665658 |
0.646273721887997 |
| Cataluna |
3.44806178221917 |
0.750866539029342 |
| Com. Valenciana |
3.72731781742071 |
0.713520824303198 |
| Extremadura |
4.71304077699256 |
0.58110856374489 |
| Galicia |
12.3926037393498 |
0.0537619862311076 |
| Madrid |
9.89784163546484 |
0.129020578990423 |
| Murcia |
7.03284077124648 |
0.317820784778911 |
| Navarra |
2.54966157831187 |
0.8628688667223 |
| Pais Vasco |
5.17364623666483 |
0.521742568303784 |
| La Rioja |
4.35674680730077 |
0.62851830068773 |
Métodos jerárquicos
matriz.dis.euclid.caso3 <- dist(Datos_3_3_Caso[,-1])
hclust.single.caso3 <- hclust(matriz.dis.euclid.caso3, method="single")
hclust.complete.caso3 <- hclust(matriz.dis.euclid.caso3, method="complete")
hclust.average.caso3 <- hclust(matriz.dis.euclid.caso3, method="average")
hclust.centroid.caso3 <- hclust(matriz.dis.euclid.caso3, method="centroid")
hclust.ward.caso3 <- hclust(matriz.dis.euclid.caso3, method="ward.D2")
NbClust (código del libro)
Datos.NbClust <- Datos_3_3_Caso[,2:7]
res.wardD2 <- NbClust(
Datos.NbClust,
distance = "euclidean",
min.nc = 2,
max.nc = 15,
method = "ward.D2",
index = "alllong"
)

## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##

## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 7 proposed 2 as the best number of clusters
## * 7 proposed 3 as the best number of clusters
## * 2 proposed 4 as the best number of clusters
## * 2 proposed 7 as the best number of clusters
## * 1 proposed 12 as the best number of clusters
## * 3 proposed 14 as the best number of clusters
## * 5 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 2
##
##
## *******************************************************************
Grupos.
grupo.ward <- cutree(hclust.ward.caso3, k = 2)
datos.caso3.grupos <- cbind(Datos_3_3_Caso, grupo.ward)
Centroides.
kable(
round(aggregate(datos.caso3.grupos[,2:7], list(grupo.ward), mean), 2),
caption = "Centroides por grupo (Cuadro 3.24)",
align = "c"
)
Centroides por grupo (Cuadro 3.24)
| 1 |
70.04 |
98.45 |
65.22 |
34.60 |
18.02 |
86.67 |
| 2 |
64.73 |
96.39 |
44.60 |
27.34 |
11.19 |
81.97 |
K-Means.
c1 <- c(66.87,96.82,56.01,25.43,11.81,80.71)
c2 <- c(70.78,98.53,63.47,44.70,22.43,90.23)
solucion <- kmeans(datos.caso3.grupos[,2:7], centers = rbind(c1, c2))
kable(
solucion$centers,
caption = "Centroides resultantes del metodo no jerarquico (Cuadro 3.25)",
align = "c",
digits = 2
)
Centroides resultantes del metodo no jerarquico (Cuadro
3.25)
| 65.26 |
96.67 |
46.44 |
26.31 |
11.12 |
81.9 |
| 70.14 |
98.42 |
65.81 |
36.15 |
18.75 |
87.2 |
Pruebas t.
kable(
broom::tidy(t.test(automovi ~ solucion$cluster, data = datos.caso3.grupos)),
caption = "Resultados de la prueba t para automovi (Cuadro 3.26)",
align = "c",
digits = 4
)
Resultados de la prueba t para automovi (Cuadro 3.26)
| -4.8775 |
65.2625 |
70.14 |
-2.823 |
0.0143 |
13.0731 |
-8.608 |
-1.147 |
Welch Two Sample t-test |
two.sided |