Domingo, 30 de noviembre de 2025Caso: Diseño de un plan de incentivos de vendedores
El ejercicio aborda una decisión estratégica para el director de ventas de una cadena de tiendas de electrodomésticos con cobertura nacional. La necesidad surge de la dificultad de establecer un plan de incentivos equitativo; se requiere que los incentivos sean más altos en aquellas zonas geográficas donde las condiciones de vida de los habitantes hacen más difícil las ventas.
El propósito es determinar si las Comunidades Autónomas (CC.AA.) pueden segmentarse en grupos homogéneos con base en seis indicadores de equipamiento de los hogares (Automóvil, TV color, Vídeo, Microondas, Lavavajillas y Teléfono).
Datos_3_3_Caso <- data.frame(
CCAA = c("España", "Andalucía", "Aragón", "Asturias", "Baleares", "Canarias",
"Cantabria", "Castilla y León", "Castilla-La Mancha", "Cataluña",
"Comv. Valenciana", "Extremadura", "Galicia", "Madrid", "Murcia",
"Navarra", "País Vasco", "La Rioja"),
Automovil = c(69.0, 66.7, 67.2, 63.7, 71.9, 72.7, 63.4, 65.8, 65.1,
70.4, 72.7, 60.5, 65.5, 74.0, 69.0, 76.4, 71.3, 64.9),
TV_color = c(97.6, 98.0, 97.5, 95.2, 98.8, 96.8, 94.9, 97.1, 97.3,
98.1, 98.4, 97.7, 91.3, 99.4, 98.7, 99.3, 98.3, 98.6),
Video = c(62.4, 82.7, 56.8, 52.1, 62.4, 68.4, 48.9, 47.7, 53.6,
71.1, 68.2, 43.7, 42.7, 76.3, 59.3, 60.6, 61.6, 54.4),
Microondas = c(32.3, 24.1, 43.4, 24.4, 29.8, 27.9, 36.5, 28.1, 21.7,
36.8, 26.6, 20.7, 13.5, 53.9, 19.5, 44.0, 45.7, 44.4),
Lavavajillas = c(17.0, 12.7, 20.6, 13.3, 10.1, 5.80, 11.2, 14.0, 7.10,
19.8, 12.1, 11.7, 14.6, 32.3, 12.1, 20.6, 23.7, 17.6),
Telefono = c(85.2, 74.7, 88.4, 88.1, 87.9, 75.4, 80.5, 85.0, 72.9,
92.2, 84.4, 67.1, 85.9, 95.7, 81.4, 87.4, 94.3, 83.4)
)
print(Datos_3_3_Caso )## CCAA Automovil TV_color Video Microondas Lavavajillas Telefono
## 1 España 69.0 97.6 62.4 32.3 17.0 85.2
## 2 Andalucía 66.7 98.0 82.7 24.1 12.7 74.7
## 3 Aragón 67.2 97.5 56.8 43.4 20.6 88.4
## 4 Asturias 63.7 95.2 52.1 24.4 13.3 88.1
## 5 Baleares 71.9 98.8 62.4 29.8 10.1 87.9
## 6 Canarias 72.7 96.8 68.4 27.9 5.8 75.4
## 7 Cantabria 63.4 94.9 48.9 36.5 11.2 80.5
## 8 Castilla y León 65.8 97.1 47.7 28.1 14.0 85.0
## 9 Castilla-La Mancha 65.1 97.3 53.6 21.7 7.1 72.9
## 10 Cataluña 70.4 98.1 71.1 36.8 19.8 92.2
## 11 Comv. Valenciana 72.7 98.4 68.2 26.6 12.1 84.4
## 12 Extremadura 60.5 97.7 43.7 20.7 11.7 67.1
## 13 Galicia 65.5 91.3 42.7 13.5 14.6 85.9
## 14 Madrid 74.0 99.4 76.3 53.9 32.3 95.7
## 15 Murcia 69.0 98.7 59.3 19.5 12.1 81.4
## 16 Navarra 76.4 99.3 60.6 44.0 20.6 87.4
## 17 País Vasco 71.3 98.3 61.6 45.7 23.7 94.3
## 18 La Rioja 64.9 98.6 54.4 44.4 17.6 83.4
Datos_num <- Datos_3_3_Caso[, -1] # quita CCAA
# Cálculo de Mahalanobis
mean_vec <- colMeans(Datos_num)
Sx <- cov(Datos_num)
D2 <- mahalanobis(Datos_num, mean_vec, Sx)
# p-values
p_vals <- pchisq(D2, df = 6, lower.tail = FALSE)
# Valor crítico al 99%
critico <- qchisq(.99, df = 6)
Resultados <- data.frame(
CCAA = Datos_3_3_Caso$CCAA,
D2_Mahalanobis = round(D2, 3),
p_value = round(p_vals, 5)
)
print(Resultados)## CCAA D2_Mahalanobis p_value
## 1 España 0.195 0.99986
## 2 Andalucía 10.912 0.09115
## 3 Aragón 1.938 0.92528
## 4 Asturias 4.707 0.58184
## 5 Baleares 5.702 0.45738
## 6 Canarias 9.151 0.16529
## 7 Cantabria 7.448 0.28143
## 8 Castilla y León 2.281 0.89210
## 9 Castilla-La Mancha 2.407 0.87873
## 10 Cataluña 2.898 0.82153
## 11 Comv. Valenciana 2.572 0.86035
## 12 Extremadura 10.398 0.10888
## 13 Galicia 13.396 0.03715
## 14 Madrid 8.385 0.21121
## 15 Murcia 4.820 0.56714
## 16 Navarra 7.980 0.23959
## 17 País Vasco 2.313 0.88874
## 18 La Rioja 4.497 0.60976
## [1] "Valor crítico 99% = 16.812"
Matriz de Distancias
matriz_datos <- as.matrix(Datos_3_3_Caso[, c("Automovil", "TV_color", "Video", "Microondas", "Lavavajillas", "Telefono")])
matriz.dis.euclid.caso3 <- dist(matriz_datos, method = "euclidean")
#print(matriz.dis.euclid.caso3)# Métodos de aglomeración
hc_single <- hclust(matriz.dis.euclid.caso3, method = "single")
hc_complete <- hclust(matriz.dis.euclid.caso3, method = "complete")
hc_average <- hclust(matriz.dis.euclid.caso3, method = "average")
hc_centroid <- hclust(matriz.dis.euclid.caso3, method = "centroid")
hc_ward <- hclust(matriz.dis.euclid.caso3, method = "ward.D2")
par(mfrow = c(5, 1))
par(mai = c(0.7, 0.7, 1.0, 0.4))
par(oma = c(2, 3, 2, 2))
# 1) Single
plot(hc_single,
main = "Método Single (Enlace Simple)",
xlab = "", ylab = "Height")
# 2) Complete
plot(hc_complete,
main = "Método Complete (Enlace Completo)",
xlab = "", ylab = "Height")
# 3) Average
plot(hc_average,
main = "Método Average (UPGMA)",
xlab = "", ylab = "Height")
# 4) Centroid
plot(hc_centroid,
main = "Método Centroid (UPGMC)",
xlab = "", ylab = "Height")
# 5) Ward.D2
plot(hc_ward,
main = "Método Ward.D2 (Min. Varianza)",
xlab = "", ylab = "Height")
mtext("Figuras - Dendrogramas mediante distintos métodos de conglomeración",
side = 2, line = -2, outer = TRUE, cex = 1.3)hclust.average.caso3 <- hclust(matriz.dis.euclid.caso3, method = "average")
data.frame(hclust.average.caso3[2:1])library(NbClust)
Datos.NbClust <- Datos_3_3_Caso[, c("Automovil", "TV_color", "Video", "Microondas", "Lavavajillas", "Telefono")]# Aplicado el procedimiento NbClust (para el método Ward.D2)
res.wardD2 <- NbClust(Datos.NbClust,
distance = "euclidean",
min.nc = 2,
max.nc = 15,
method = "ward.D2",
index = "alllong")## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 6 proposed 2 as the best number of clusters
## * 8 proposed 3 as the best number of clusters
## * 3 proposed 4 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 1 proposed 13 as the best number of clusters
## * 8 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
## KL CH Hartigan CCC Scott Marriot TrCovW
## Number_clusters 3.0000 15.0000 3.0000 2.0000 13.0000 4.000000e+00 3.0
## Value_Index 2.8855 16.2678 4.7124 13.7087 444.6677 1.651845e+14 409368.2
## TraceW Friedman Rubin Cindex DB Silhouette Duda
## Number_clusters 3.000 1.500000e+01 3.0000 2.0000 15.0000 15.0000 2.0000
## Value_Index 779.731 4.687273e+16 -19.9402 0.3675 0.2906 0.7667 0.5526
## PseudoT2 Beale Ratkowsky Ball PtBiserial Gap Frey
## Number_clusters 2.0000 3.0000 3.0000 3.000 4.0000 2.0000 1
## Value_Index 8.0971 -2.3568 0.4182 1020.568 0.5129 -0.6365 NA
## McClain Gamma Gplus Tau Dunn Hubert SDindex Dindex
## Number_clusters 2.000 15 15 4.0000 15.0000 0 5.0000 0
## Value_Index 0.606 1 0 21.8431 1.0795 0 0.1634 0
## SDbw
## Number_clusters 15.0000
## Value_Index 0.0087
c1 <- c(66.87,96.82,56.01,25.43,11.81,80.71)
c2 <- c(70.70,98.53,63.47,44.70,22.43,90.23)
centros_iniciales <- rbind(c1, c2)
solucion <- kmeans(Datos_num, centers = centros_iniciales)
solucion## K-means clustering with 2 clusters of sizes 12, 6
##
## Cluster means:
## Automovil TV_color Video Microondas Lavavajillas Telefono
## 1 67.16667 96.81667 57.67500 25.425 11.80833 80.70833
## 2 70.70000 98.53333 63.46667 44.700 22.43333 90.23333
##
## Clustering vector:
## [1] 1 1 2 1 1 1 1 1 1 2 1 1 1 2 1 2 2 2
##
## Within cluster sum of squares by cluster:
## [1] 2783.8867 848.3533
## (between_SS / total_SS = 40.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
hclust.ward.caso3 <- hclust(matriz.dis.euclid.caso3, method = "ward.D2")
plot(hclust.ward.caso3,
labels = Datos_3_3_Caso$CCAA,
main = "Dendrograma - Método Ward.D2",
xlab = "", ylab = "Distancia")
grupos <- cutree(hclust.ward.caso3, k = 2)
rect.hclust(hclust.ward.caso3, k = 2, border = "red")hclust.complete.caso3<-hclust(matriz.dis.euclid.caso3,method="complete")
plot(hclust.complete.caso3,labels=Datos_3_3_Caso$CC.AA.)
grupos <- cutree(hclust.complete.caso3, k = 2)
rect.hclust(hclust.complete.caso3, k = 2, border = "red")hclust.average.caso3<-hclust(matriz.dis.euclid.caso3,method="average")
plot(hclust.average.caso3,labels=Datos_3_3_Caso$CC.AA.)
grupos <- cutree(hclust.average.caso3, k = 2)
rect.hclust(hclust.average.caso3, k = 2, border = "red")Código de las Pruebas \(t\) de Significatividad
library(dplyr)
library(kableExtra)
solucion.cluster <- solucion$cluster
Datos_3_3_Caso$Cluster <- solucion.cluster
# PRUEBAS t
t1 <- t.test(Automovil ~ Cluster, data = Datos_3_3_Caso)
t2 <- t.test(TV_color ~ Cluster, data = Datos_3_3_Caso)
t3 <- t.test(Video ~ Cluster, data = Datos_3_3_Caso)
t4 <- t.test(Microondas ~ Cluster, data = Datos_3_3_Caso)
t5 <- t.test(Lavavajillas ~ Cluster, data = Datos_3_3_Caso)
t6 <- t.test(Telefono ~ Cluster, data = Datos_3_3_Caso)
resultados.ttest <- data.frame(
Variable = c("Automovil", "TV_color", "Video", "Microondas", "Lavavajillas", "Telefono"),
Prueba_t = c(t1$statistic, t2$statistic, t3$statistic, t4$statistic, t5$statistic, t6$statistic),
Grupo1 = c(t1$estimate[1], t2$estimate[1], t3$estimate[1], t4$estimate[1], t5$estimate[1], t6$estimate[1]),
Grupo2 = c(t1$estimate[2], t2$estimate[2], t3$estimate[2], t4$estimate[2], t5$estimate[2], t6$estimate[2])
)
resultados.ttest %>%
kable(caption = "Significatividad de las diferencias entre los perfiles de los conglomerados",
align = "c", digits = 2) %>%
kable_classic(html_font = "Times New Roman", font_size = 14) %>%
kable_styling()| Variable | Prueba_t | Grupo1 | Grupo2 |
|---|---|---|---|
| Automovil | -1.71 | 67.17 | 70.70 |
| TV_color | -2.52 | 96.82 | 98.53 |
| Video | -1.19 | 57.68 | 63.47 |
| Microondas | -6.73 | 25.42 | 44.70 |
| Lavavajillas | -4.61 | 11.81 | 22.43 |
| Telefono | -3.51 | 80.71 | 90.23 |