A33: Aplicación: Análisis de Clúster (Conglomerados)
UNIVERSIDAD DE EL SALVADOR
FACULTAD DE CIENCIAS ECONÓMICAS
ESCUELA DE ECONOMÍA
MÉTODOS PARA EL ÁNALISIS ECONÓMICO
TEMA:
“Aplicación: Análisis de Clúster (Conglomerados)”
DOCENTE:
MSF. Carlos Ademir Pérez Alas.
Grupo de Trabajo
Grupo 06
Integrantes | Carnet | Participación |
---|---|---|
Martinez Alfaro Kelly Jeannette | MP21084 | 100% |
Méndez Pacheco Darleen Ivette | MP21084 | 100% |
Zarpate Crissia Margareth Villalta | MB22006 | 100% |
CIUDAD UNIVERSITARIA, VIERNES 10 DE ENERO DE 2025
3.7. Un ejemplo de la aplicación del análisis de conglomerados
Datos_3_3_Caso <- data.frame(
CC.AA. = c("España", "Andalucía", "Aragón", "Asturias", "Baleares", "Canarias", "Cantabria", "Castilla y León", "Castilla-La Mancha", "Cataluña", "Com. Valenciana", "Extremadura", "Galicia", "Madrid", "Murcia", "Navarra", "País Vasco", "La Rioja"),
automovil = c(69.0, 66.7, 67.2, 63.7, 71.6, 72.7, 63.4, 65.8, 61.5, 70.4, 72.7, 60.0, 65.5, 74.0, 69.0, 76.4, 71.3, 64.9),
tvcolor = c(97.6, 98.0, 97.5, 95.2, 98.8, 96.8, 94.9, 97.1, 97.3, 98.1, 98.4, 91.8, 91.3, 99.4, 98.7, 99.3, 98.3, 98.6),
video = c(62.4, 82.7, 56.8, 52.5, 62.4, 68.4, 48.9, 47.7, 53.6, 71.1, 68.2, 43.7, 42.7, 76.3, 59.3, 60.6, 61.6, 54.4),
microond = c(32.3, 24.1, 43.4, 21.4, 29.8, 47.9, 36.5, 28.1, 21.7, 36.8, 26.6, 20.7, 13.5, 53.9, 19.5, 44.0, 45.7, 44.4),
lavavaji = c(17.0, 12.7, 20.6, 13.3, 14.4, 5.80, 11.2, 14.0, 7.10, 19.8, 12.1, 11.1, 4.6, 32.3, 12.1, 20.6, 23.7, 17.6),
telefono = c(85.2, 74.7, 88.4, 86.4, 87.9, 75.4, 80.5, 85.0, 72.9, 92.2, 84.4, 67.1, 85.9, 95.7, 81.4, 87.4, 94.3, 83.4)
)
matriz_datos <- as.matrix(Datos_3_3_Caso[, c("automovil", "tvcolor", "video", "microond", "lavavaji", "telefono")])
matriz.dis.euclid.caso3 <- dist(matriz_datos, method = "euclidean")
datos.caso3.grupos.kmeans <- kmeans(Datos_3_3_Caso[, c("automovil", "tvcolor", "video", "microond", "lavavaji", "telefono")], 2)
Datos_3_3_Caso$kmeans.caso3.cluster <- datos.caso3.grupos.kmeans$cluster
fit.automovil <- aov(automovil ~ kmeans.caso3.cluster, data = Datos_3_3_Caso)
fit.tvcolor <- aov(tvcolor ~ kmeans.caso3.cluster, data = Datos_3_3_Caso)
fit.video <- aov(video ~ kmeans.caso3.cluster,data = Datos_3_3_Caso)
fit.microond <- aov(microond ~ kmeans.caso3.cluster, data = Datos_3_3_Caso)
fit.lavavaji <- aov(lavavaji ~ kmeans.caso3.cluster, data = Datos_3_3_Caso)
fit.telefono <- aov(telefono ~ kmeans.caso3.cluster, data = Datos_3_3_Caso)
# Mostrar resultados del ANOVA
summary(fit.automovil)
## Df Sum Sq Mean Sq F value Pr(>F)
## kmeans.caso3.cluster 1 180.7 180.66 16.96 0.000805 ***
## Residuals 16 170.4 10.65
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Df Sum Sq Mean Sq F value Pr(>F)
## kmeans.caso3.cluster 1 40.29 40.29 12.12 0.00308 **
## Residuals 16 53.18 3.32
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Df Sum Sq Mean Sq F value Pr(>F)
## kmeans.caso3.cluster 1 1112.8 1112.8 19.35 0.000448 ***
## Residuals 16 919.9 57.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Df Sum Sq Mean Sq F value Pr(>F)
## kmeans.caso3.cluster 1 1086 1086.1 13.9 0.00183 **
## Residuals 16 1250 78.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Df Sum Sq Mean Sq F value Pr(>F)
## kmeans.caso3.cluster 1 233.4 233.43 6.799 0.019 *
## Residuals 16 549.3 34.33
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Df Sum Sq Mean Sq F value Pr(>F)
## kmeans.caso3.cluster 1 174.5 174.51 3.572 0.077 .
## Residuals 16 781.7 48.85
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Datos.NbClust <- as.matrix(Datos_3_3_Caso[, c("automovil", "tvcolor", "video", "microond", "lavavaji", "telefono")])
hclust.average.caso3 <- hclust(matriz.dis.euclid.caso3,
method = "average")
data.frame(hclust.average.caso3[2:1])
## height merge.1 merge.2
## 1 5.338539 -1 -5
## 2 6.874591 -3 -18
## 3 8.854377 -4 -8
## 4 9.088165 -11 1
## 5 9.387225 -16 -17
## 6 12.380548 2 5
## 7 13.111610 -15 3
## 8 13.459198 -9 -12
## 9 13.692195 -10 4
## 10 16.116750 -7 7
## 11 18.315426 6 9
## 12 19.176262 8 10
## 13 20.682296 -13 12
## 14 23.782645 -6 11
## 15 27.887869 13 14
## 16 32.206064 -2 15
## 17 38.876263 -14 16
Datos.NbClust <- Datos_3_3_Caso[,c ("automovil", "tvcolor", "video", "microond", "lavavaji", "telefono")]
library(NbClust)
res.wardD2 <- NbClust(Datos.NbClust,
distance = "euclidean",
min.nc = 2,
max.nc = 15,
method = "ward.D2",
index = "alllong")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 10 proposed 2 as the best number of clusters
## * 3 proposed 3 as the best number of clusters
## * 3 proposed 4 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 1 proposed 6 as the best number of clusters
## * 1 proposed 12 as the best number of clusters
## * 1 proposed 13 as the best number of clusters
## * 7 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 2
##
##
## *******************************************************************
Datos_3_3_Caso<- data.frame(matrix(rnorm(18*20), ncol=20))
matriz.dis.euclid <- dist(Datos_3_3_Caso, method="euclidean")
hclust.ward.caso3 <- hclust(matriz.dis.euclid, method="ward.D2")
grupo.ward <- cutree(hclust.ward.caso3, k = 2)
grupo.ward <- cutree(hclust.ward.caso3, k = 2, h = NULL)
datos.caso3.grupos <- cbind(Datos_3_3_Caso, grupo.ward)
datos.caso3.grupos$id <- NULL
## Group.1 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12
## 1 1 0.08 0.02 0.13 -0.28 0.48 0.01 -0.13 -0.62 -0.24 0.18 0.29 0.05
## 2 2 0.31 -0.11 0.98 0.57 -0.84 0.41 0.15 0.05 1.27 0.47 -0.56 -0.87
## X13 X14 X15 X16 X17 X18 X19 X20 grupo.ward
## 1 0.05 -0.20 0.34 0.08 0.42 -0.11 0.05 0.07 1
## 2 -0.50 0.57 -1.26 0.49 -0.20 0.32 0.50 -0.66 2