Klastrovanie Konecnych vysledkov

Information source - fclust package or helps to hclust command

fclust: An R Package for Fuzzy Clustering

Libraries

library("fclust")
library("cluster")
library(kableExtra)
library(graphics)

Data import

udaje <- read.csv("adot/fclust2.csv",sep=",", dec = ".")
rownames(udaje)<-udaje[,1]
udaje <<- udaje[,-1]

traditional Cluster analysis (Gower distance)

# Calculate Gower distance matrix
dist_matrix <- daisy(udaje[,c(3,4,5)], metric = "gower")

# Perform hierarchical clustering using the Gower distance matrix
hc <<- hclust(dist_matrix, method = "ward.D2")

# Plot dendrogram
plot(hc)

Ďalšie veci z hclust()

# Use the same dist_matrix and hc objects from the previous example

# Cut the dendrogram to obtain cluster assignments
clusters <- cutree(hc, k = 5)  # replace 3 with the desired number of clusters

print("Distribution of the constant variable over clusters")
[1] "Distribution of the constant variable over clusters"
table(clusters,as.factor(udaje$constant))
        
clusters  0 0.5
       1  8   0
       2  0  20
       3 19   0
       4 12   0
       5  0  20
print("Distribution of the demand variable over clusters")
[1] "Distribution of the demand variable over clusters"
table(clusters,udaje$demand)
        
clusters linear uniform
       1      0       8
       2      0      20
       3     19       0
       4      0      12
       5     20       0
# Add cluster assignments to the original data frame
udaje_with_clusters <<- cbind(udaje, Cluster = clusters)

# Preview the first 10 rows of the data frame with cluster assignm
udaje_with_clusters
   constant  demand count_providers count_accepted.offers mean_profit Cluster
18      0.0 uniform               1                   200  796.574145       1
8       0.0 uniform               1                   200  796.089368       1
15      0.0 uniform               1                   200  796.523595       1
7       0.0 uniform               1                   200  796.530775       1
12      0.0 uniform               1                   200  796.309030       1
13      0.0 uniform               1                   200  796.587413       1
5       0.0 uniform               1                   200  796.519947       1
1       0.0 uniform               1                   200  796.544441       1
41      0.5 uniform               9                    62    9.180797       2
42      0.5 uniform               9                    59    9.223583       2
43      0.5 uniform               7                    71   17.394073       2
44      0.5 uniform               8                    48    8.066726       2
24      0.0  linear               1                   121  185.326497       3
10      0.0 uniform               2                   200  171.315904       4
29      0.0  linear               1                   131  201.205393       3
45      0.5 uniform               6                    44    9.243612       2
46      0.5 uniform               5                    53   16.198871       2
47      0.5 uniform               8                    71   13.841523       2
50      0.5 uniform               6                    49   13.329762       2
25      0.0  linear               1                    98  197.631074       3
48      0.5 uniform               9                    71   11.231729       2
32      0.0  linear               1                    95  183.610697       3
28      0.0  linear               1                   109  209.409515       3
31      0.0  linear               1                    80  185.431412       3
49      0.5 uniform               8                    51    7.751983       2
27      0.0  linear               1                   123  188.842485       3
51      0.5 uniform               8                    42    9.066001       2
53      0.5 uniform               8                    48    6.872999       2
52      0.5 uniform               7                    58   13.607098       2
30      0.0  linear               1                   104  227.535476       3
54      0.5 uniform               8                    50    9.776875       2
26      0.0  linear               1                    89  188.880233       3
55      0.5 uniform               7                    51    9.709894       2
23      0.0  linear               1                    87  209.107141       3
57      0.5 uniform               8                    53    8.787902       2
59      0.5 uniform               7                    46    8.957566       2
62      0.5  linear               1                     6    8.299087       5
33      0.0  linear               1                    85  189.180887       3
56      0.5 uniform               8                    55   11.388808       2
60      0.5 uniform               8                    73   13.563680       2
63      0.5  linear               1                     5    7.341410       5
58      0.5 uniform               8                    55   10.700445       2
68      0.5  linear               1                     5    9.102165       5
64      0.5  linear               2                    14    9.029785       5
34      0.0  linear               1                   127  194.798283       3
38      0.0  linear               1                    85  204.138981       3
66      0.5  linear               2                    16    9.589896       5
67      0.5  linear               1                     5    9.326693       5
6       0.0 uniform               2                   200  170.137709       4
61      0.5  linear               1                    20    7.191841       5
22      0.0  linear               1                   126  193.503928       3
69      0.5  linear               1                     8   13.448409       5
70      0.5  linear               1                    12   23.371362       5
78      0.5  linear               1                     6   12.677431       5
79      0.5  linear               1                     8    5.724451       5
72      0.5  linear               1                     9   17.736802       5
4       0.0 uniform               2                   200  138.998580       4
80      0.5  linear               1                    10   11.758018       5
65      0.5  linear               2                    28   10.474913       5
37      0.0  linear               1                    90  189.133835       3
17      0.0 uniform               2                   200  167.803193       4
9       0.0 uniform               2                   200  174.376063       4
35      0.0  linear               1                    85  192.996452       3
75      0.5  linear               1                    14   16.567186       5
14      0.0 uniform               2                   200  174.553658       4
71      0.5  linear               2                    31   10.902416       5
77      0.5  linear               2                    19   15.615800       5
16      0.0 uniform               2                   200  174.320577       4
40      0.0  linear               1                   127  215.812091       3
2       0.0 uniform               2                   200  139.049358       4
36      0.0  linear               1                    72  172.536267       3
11      0.0 uniform               2                   200  174.220205       4
3       0.0 uniform               2                   200  167.723489       4
74      0.5  linear               2                    17   17.414007       5
19      0.0 uniform               2                   200  119.003857       4
20      0.0 uniform               2                   200  149.608687       4
76      0.5  linear               2                    29   12.157709       5
39      0.0  linear               1                    72  172.523132       3
73      0.5  linear               2                    27   16.333304       5

Checking up the previous results, we see, the expected classification of the results. It means, the data of the end number of providers, satisfied demand and achieved profits are clustered according to the parameter settings (i.e. demand classification and constant (0 or 0.5)). The only surprising result is division of the uniform vs. constant = 0 cluster to two subclusters. The first one defines the case with just one surviving provider and the second one is the case if the resulting number of providers is more, than 1. One simulation was excluded - no providers survived.

summary(udaje_with_clusters[udaje_with_clusters$Cluster==1,])
    constant    demand          count_providers count_accepted.offers
 Min.   :0   Length:8           Min.   :1       Min.   :200          
 1st Qu.:0   Class :character   1st Qu.:1       1st Qu.:200          
 Median :0   Mode  :character   Median :1       Median :200          
 Mean   :0                      Mean   :1       Mean   :200          
 3rd Qu.:0                      3rd Qu.:1       3rd Qu.:200          
 Max.   :0                      Max.   :1       Max.   :200          
  mean_profit       Cluster 
 Min.   :796.1   Min.   :1  
 1st Qu.:796.5   1st Qu.:1  
 Median :796.5   Median :1  
 Mean   :796.5   Mean   :1  
 3rd Qu.:796.6   3rd Qu.:1  
 Max.   :796.6   Max.   :1  
table(udaje_with_clusters[udaje_with_clusters$Cluster==1,]$demand)

uniform 
      8 
summary(udaje_with_clusters[udaje_with_clusters$Cluster==2,])
    constant      demand          count_providers count_accepted.offers
 Min.   :0.5   Length:20          Min.   :5.0     Min.   :42.00        
 1st Qu.:0.5   Class :character   1st Qu.:7.0     1st Qu.:48.75        
 Median :0.5   Mode  :character   Median :8.0     Median :53.00        
 Mean   :0.5                      Mean   :7.6     Mean   :55.50        
 3rd Qu.:0.5                      3rd Qu.:8.0     3rd Qu.:59.75        
 Max.   :0.5                      Max.   :9.0     Max.   :73.00        
  mean_profit        Cluster 
 Min.   : 6.873   Min.   :2  
 1st Qu.: 9.039   1st Qu.:2  
 Median : 9.743   Median :2  
 Mean   :10.895   Mean   :2  
 3rd Qu.:13.388   3rd Qu.:2  
 Max.   :17.394   Max.   :2  
table(udaje_with_clusters[udaje_with_clusters$Cluster==2,]$demand)

uniform 
     20 
summary(udaje_with_clusters[udaje_with_clusters$Cluster==3,])
    constant    demand          count_providers count_accepted.offers
 Min.   :0   Length:19          Min.   :1       Min.   : 72.0        
 1st Qu.:0   Class :character   1st Qu.:1       1st Qu.: 85.0        
 Median :0   Mode  :character   Median :1       Median : 95.0        
 Mean   :0                      Mean   :1       Mean   :100.3        
 3rd Qu.:0                      3rd Qu.:1       3rd Qu.:122.0        
 Max.   :0                      Max.   :1       Max.   :131.0        
  mean_profit       Cluster 
 Min.   :172.5   Min.   :3  
 1st Qu.:187.1   1st Qu.:3  
 Median :193.0   Median :3  
 Mean   :194.8   Mean   :3  
 3rd Qu.:202.7   3rd Qu.:3  
 Max.   :227.5   Max.   :3  
table(udaje_with_clusters[udaje_with_clusters$Cluster==3,]$demand)

linear 
    19 
summary(udaje_with_clusters[udaje_with_clusters$Cluster==4,])
    constant    demand          count_providers count_accepted.offers
 Min.   :0   Length:12          Min.   :2       Min.   :200          
 1st Qu.:0   Class :character   1st Qu.:2       1st Qu.:200          
 Median :0   Mode  :character   Median :2       Median :200          
 Mean   :0                      Mean   :2       Mean   :200          
 3rd Qu.:0                      3rd Qu.:2       3rd Qu.:200          
 Max.   :0                      Max.   :2       Max.   :200          
  mean_profit       Cluster 
 Min.   :119.0   Min.   :4  
 1st Qu.:147.0   1st Qu.:4  
 Median :169.0   Median :4  
 Mean   :160.1   Mean   :4  
 3rd Qu.:174.2   3rd Qu.:4  
 Max.   :174.6   Max.   :4  
table(udaje_with_clusters[udaje_with_clusters$Cluster==4,]$demand)

uniform 
     12 
summary(udaje_with_clusters[udaje_with_clusters$Cluster==5,])
    constant      demand          count_providers count_accepted.offers
 Min.   :0.5   Length:20          Min.   :1.0     Min.   : 5.00        
 1st Qu.:0.5   Class :character   1st Qu.:1.0     1st Qu.: 7.50        
 Median :0.5   Mode  :character   Median :1.0     Median :13.00        
 Mean   :0.5                      Mean   :1.4     Mean   :14.45        
 3rd Qu.:0.5                      3rd Qu.:2.0     3rd Qu.:19.25        
 Max.   :0.5                      Max.   :2.0     Max.   :31.00        
  mean_profit        Cluster 
 Min.   : 5.724   Min.   :5  
 1st Qu.: 9.084   1st Qu.:5  
 Median :11.330   Median :5  
 Mean   :12.203   Mean   :5  
 3rd Qu.:15.795   3rd Qu.:5  
 Max.   :23.371   Max.   :5  
table(udaje_with_clusters[udaje_with_clusters$Cluster==5,]$demand)

linear 
    20 
table(udaje_with_clusters$Cluster,as.factor(udaje_with_clusters$constant))
   
     0 0.5
  1  8   0
  2  0  20
  3 19   0
  4 12   0
  5  0  20
table(udaje_with_clusters$Cluster,as.factor(udaje_with_clusters$demand))
   
    linear uniform
  1      0       8
  2      0      20
  3     19       0
  4      0      12
  5     20       0
#kbl(tabulka) %>% 
#  kable_styling() %>% 
#  scroll_box(height = "550px")

Typickí reprezentanti

# Use the same data_with_clusters object from the previous example

# Calculate cluster centroids
centroids <- aggregate(udaje_with_clusters[, !colnames(udaje_with_clusters) %in% c("demand", "constant", "Cluster")], 
                        by = list(Cluster = udaje_with_clusters$Cluster), 
                        FUN = mean)

# Preview the centroids table
centroids
  Cluster count_providers count_accepted.offers mean_profit
1       1             1.0              200.0000   796.45984
2       2             7.6               55.5000    10.89470
3       3             1.0              100.3158   194.82125
4       4             2.0              200.0000   160.09261
5       5             1.4               14.4500    12.20313