library("fclust")
library("cluster")
library(kableExtra)
library(graphics)
Klastrovanie Konecnych vysledkov
Information source - fclust package or helps to hclust command
Libraries
Data import
<- read.csv("adot/fclust2.csv",sep=",", dec = ".")
udaje rownames(udaje)<-udaje[,1]
<<- udaje[,-1] udaje
traditional Cluster analysis (Gower distance)
# Calculate Gower distance matrix
<- daisy(udaje[,c(3,4,5)], metric = "gower")
dist_matrix
# Perform hierarchical clustering using the Gower distance matrix
<<- hclust(dist_matrix, method = "ward.D2")
hc
# Plot dendrogram
plot(hc)
Ďalšie veci z hclust()
# Use the same dist_matrix and hc objects from the previous example
# Cut the dendrogram to obtain cluster assignments
<- cutree(hc, k = 5) # replace 3 with the desired number of clusters
clusters
print("Distribution of the constant variable over clusters")
[1] "Distribution of the constant variable over clusters"
table(clusters,as.factor(udaje$constant))
clusters 0 0.5
1 8 0
2 0 20
3 19 0
4 12 0
5 0 20
print("Distribution of the demand variable over clusters")
[1] "Distribution of the demand variable over clusters"
table(clusters,udaje$demand)
clusters linear uniform
1 0 8
2 0 20
3 19 0
4 0 12
5 20 0
# Add cluster assignments to the original data frame
<<- cbind(udaje, Cluster = clusters)
udaje_with_clusters
# Preview the first 10 rows of the data frame with cluster assignm
udaje_with_clusters
constant demand count_providers count_accepted.offers mean_profit Cluster
18 0.0 uniform 1 200 796.574145 1
8 0.0 uniform 1 200 796.089368 1
15 0.0 uniform 1 200 796.523595 1
7 0.0 uniform 1 200 796.530775 1
12 0.0 uniform 1 200 796.309030 1
13 0.0 uniform 1 200 796.587413 1
5 0.0 uniform 1 200 796.519947 1
1 0.0 uniform 1 200 796.544441 1
41 0.5 uniform 9 62 9.180797 2
42 0.5 uniform 9 59 9.223583 2
43 0.5 uniform 7 71 17.394073 2
44 0.5 uniform 8 48 8.066726 2
24 0.0 linear 1 121 185.326497 3
10 0.0 uniform 2 200 171.315904 4
29 0.0 linear 1 131 201.205393 3
45 0.5 uniform 6 44 9.243612 2
46 0.5 uniform 5 53 16.198871 2
47 0.5 uniform 8 71 13.841523 2
50 0.5 uniform 6 49 13.329762 2
25 0.0 linear 1 98 197.631074 3
48 0.5 uniform 9 71 11.231729 2
32 0.0 linear 1 95 183.610697 3
28 0.0 linear 1 109 209.409515 3
31 0.0 linear 1 80 185.431412 3
49 0.5 uniform 8 51 7.751983 2
27 0.0 linear 1 123 188.842485 3
51 0.5 uniform 8 42 9.066001 2
53 0.5 uniform 8 48 6.872999 2
52 0.5 uniform 7 58 13.607098 2
30 0.0 linear 1 104 227.535476 3
54 0.5 uniform 8 50 9.776875 2
26 0.0 linear 1 89 188.880233 3
55 0.5 uniform 7 51 9.709894 2
23 0.0 linear 1 87 209.107141 3
57 0.5 uniform 8 53 8.787902 2
59 0.5 uniform 7 46 8.957566 2
62 0.5 linear 1 6 8.299087 5
33 0.0 linear 1 85 189.180887 3
56 0.5 uniform 8 55 11.388808 2
60 0.5 uniform 8 73 13.563680 2
63 0.5 linear 1 5 7.341410 5
58 0.5 uniform 8 55 10.700445 2
68 0.5 linear 1 5 9.102165 5
64 0.5 linear 2 14 9.029785 5
34 0.0 linear 1 127 194.798283 3
38 0.0 linear 1 85 204.138981 3
66 0.5 linear 2 16 9.589896 5
67 0.5 linear 1 5 9.326693 5
6 0.0 uniform 2 200 170.137709 4
61 0.5 linear 1 20 7.191841 5
22 0.0 linear 1 126 193.503928 3
69 0.5 linear 1 8 13.448409 5
70 0.5 linear 1 12 23.371362 5
78 0.5 linear 1 6 12.677431 5
79 0.5 linear 1 8 5.724451 5
72 0.5 linear 1 9 17.736802 5
4 0.0 uniform 2 200 138.998580 4
80 0.5 linear 1 10 11.758018 5
65 0.5 linear 2 28 10.474913 5
37 0.0 linear 1 90 189.133835 3
17 0.0 uniform 2 200 167.803193 4
9 0.0 uniform 2 200 174.376063 4
35 0.0 linear 1 85 192.996452 3
75 0.5 linear 1 14 16.567186 5
14 0.0 uniform 2 200 174.553658 4
71 0.5 linear 2 31 10.902416 5
77 0.5 linear 2 19 15.615800 5
16 0.0 uniform 2 200 174.320577 4
40 0.0 linear 1 127 215.812091 3
2 0.0 uniform 2 200 139.049358 4
36 0.0 linear 1 72 172.536267 3
11 0.0 uniform 2 200 174.220205 4
3 0.0 uniform 2 200 167.723489 4
74 0.5 linear 2 17 17.414007 5
19 0.0 uniform 2 200 119.003857 4
20 0.0 uniform 2 200 149.608687 4
76 0.5 linear 2 29 12.157709 5
39 0.0 linear 1 72 172.523132 3
73 0.5 linear 2 27 16.333304 5
Checking up the previous results, we see, the expected classification of the results. It means, the data of the end number of providers, satisfied demand and achieved profits are clustered according to the parameter settings (i.e. demand classification and constant (0 or 0.5)). The only surprising result is division of the uniform vs. constant = 0 cluster to two subclusters. The first one defines the case with just one surviving provider and the second one is the case if the resulting number of providers is more, than 1. One simulation was excluded - no providers survived.
summary(udaje_with_clusters[udaje_with_clusters$Cluster==1,])
constant demand count_providers count_accepted.offers
Min. :0 Length:8 Min. :1 Min. :200
1st Qu.:0 Class :character 1st Qu.:1 1st Qu.:200
Median :0 Mode :character Median :1 Median :200
Mean :0 Mean :1 Mean :200
3rd Qu.:0 3rd Qu.:1 3rd Qu.:200
Max. :0 Max. :1 Max. :200
mean_profit Cluster
Min. :796.1 Min. :1
1st Qu.:796.5 1st Qu.:1
Median :796.5 Median :1
Mean :796.5 Mean :1
3rd Qu.:796.6 3rd Qu.:1
Max. :796.6 Max. :1
table(udaje_with_clusters[udaje_with_clusters$Cluster==1,]$demand)
uniform
8
summary(udaje_with_clusters[udaje_with_clusters$Cluster==2,])
constant demand count_providers count_accepted.offers
Min. :0.5 Length:20 Min. :5.0 Min. :42.00
1st Qu.:0.5 Class :character 1st Qu.:7.0 1st Qu.:48.75
Median :0.5 Mode :character Median :8.0 Median :53.00
Mean :0.5 Mean :7.6 Mean :55.50
3rd Qu.:0.5 3rd Qu.:8.0 3rd Qu.:59.75
Max. :0.5 Max. :9.0 Max. :73.00
mean_profit Cluster
Min. : 6.873 Min. :2
1st Qu.: 9.039 1st Qu.:2
Median : 9.743 Median :2
Mean :10.895 Mean :2
3rd Qu.:13.388 3rd Qu.:2
Max. :17.394 Max. :2
table(udaje_with_clusters[udaje_with_clusters$Cluster==2,]$demand)
uniform
20
summary(udaje_with_clusters[udaje_with_clusters$Cluster==3,])
constant demand count_providers count_accepted.offers
Min. :0 Length:19 Min. :1 Min. : 72.0
1st Qu.:0 Class :character 1st Qu.:1 1st Qu.: 85.0
Median :0 Mode :character Median :1 Median : 95.0
Mean :0 Mean :1 Mean :100.3
3rd Qu.:0 3rd Qu.:1 3rd Qu.:122.0
Max. :0 Max. :1 Max. :131.0
mean_profit Cluster
Min. :172.5 Min. :3
1st Qu.:187.1 1st Qu.:3
Median :193.0 Median :3
Mean :194.8 Mean :3
3rd Qu.:202.7 3rd Qu.:3
Max. :227.5 Max. :3
table(udaje_with_clusters[udaje_with_clusters$Cluster==3,]$demand)
linear
19
summary(udaje_with_clusters[udaje_with_clusters$Cluster==4,])
constant demand count_providers count_accepted.offers
Min. :0 Length:12 Min. :2 Min. :200
1st Qu.:0 Class :character 1st Qu.:2 1st Qu.:200
Median :0 Mode :character Median :2 Median :200
Mean :0 Mean :2 Mean :200
3rd Qu.:0 3rd Qu.:2 3rd Qu.:200
Max. :0 Max. :2 Max. :200
mean_profit Cluster
Min. :119.0 Min. :4
1st Qu.:147.0 1st Qu.:4
Median :169.0 Median :4
Mean :160.1 Mean :4
3rd Qu.:174.2 3rd Qu.:4
Max. :174.6 Max. :4
table(udaje_with_clusters[udaje_with_clusters$Cluster==4,]$demand)
uniform
12
summary(udaje_with_clusters[udaje_with_clusters$Cluster==5,])
constant demand count_providers count_accepted.offers
Min. :0.5 Length:20 Min. :1.0 Min. : 5.00
1st Qu.:0.5 Class :character 1st Qu.:1.0 1st Qu.: 7.50
Median :0.5 Mode :character Median :1.0 Median :13.00
Mean :0.5 Mean :1.4 Mean :14.45
3rd Qu.:0.5 3rd Qu.:2.0 3rd Qu.:19.25
Max. :0.5 Max. :2.0 Max. :31.00
mean_profit Cluster
Min. : 5.724 Min. :5
1st Qu.: 9.084 1st Qu.:5
Median :11.330 Median :5
Mean :12.203 Mean :5
3rd Qu.:15.795 3rd Qu.:5
Max. :23.371 Max. :5
table(udaje_with_clusters[udaje_with_clusters$Cluster==5,]$demand)
linear
20
table(udaje_with_clusters$Cluster,as.factor(udaje_with_clusters$constant))
0 0.5
1 8 0
2 0 20
3 19 0
4 12 0
5 0 20
table(udaje_with_clusters$Cluster,as.factor(udaje_with_clusters$demand))
linear uniform
1 0 8
2 0 20
3 19 0
4 0 12
5 20 0
#kbl(tabulka) %>%
# kable_styling() %>%
# scroll_box(height = "550px")
Typickí reprezentanti
# Use the same data_with_clusters object from the previous example
# Calculate cluster centroids
<- aggregate(udaje_with_clusters[, !colnames(udaje_with_clusters) %in% c("demand", "constant", "Cluster")],
centroids by = list(Cluster = udaje_with_clusters$Cluster),
FUN = mean)
# Preview the centroids table
centroids
Cluster count_providers count_accepted.offers mean_profit
1 1 1.0 200.0000 796.45984
2 2 7.6 55.5000 10.89470
3 3 1.0 100.3158 194.82125
4 4 2.0 200.0000 160.09261
5 5 1.4 14.4500 12.20313