food <- read.csv('C:/Users/User/Desktop/study/menu.csv', nrows = 50)
food <- subset(food, select = c(Calories, Calories.from.Fat))
head(food)
Calories Calories.from.Fat
1 300 120
2 250 70
3 370 200
4 450 250
5 400 210
6 430 210
Warning: package 'factoextra' was built under R version 4.2.2
Loading required package: ggplot2
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
food.dist <- get_dist(food)
head(food.dist)
[1] 70.71068 106.30146 198.49433 134.53624 158.11388 194.16488
fviz_dend(hclust(food.dist, method = "centroid"), k = 3)
Warning in get_col(col, k): Length of color vector was shorter than the number
of clusters - color vector was recycled
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
"none")` instead.
cutree(hclust(food.dist, method = "centroid"), k = 3)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
1 2 2 1 2 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1
fviz_dend(hclust(food.dist, method = "ward.D"), k = 2)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
"none")` instead.
cutree(hclust(food.dist, method = "ward.D"), k = 2)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 1 2 1 1
library(NbClust)
fviz_nbclust(food, kmeans, method = "wss") + labs(subtitle = "Метод ліктя")
fviz_nbclust(food, kmeans, method = "silhouette") + labs(subtitle = "метод середнього силуету")
food.means3 <- kmeans(food.dist, centers = 3, nstart = 10)
100*food.means3$betweenss / food.means3$totss
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
2 3 3 2 2 3 3 3 3 1 2 1 1 2 1 1 2 2 2 2 2 3 1 1
food.means4 <- kmeans(food.dist, centers = 4, nstart = 10)
food.means4$cluster
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
1 1 1 1 1 4 4 4 4 3 2 3 3 2 3 3 2 2 2 2 2 1 3 3
fviz_cluster(food.means3, food, ellipse.type = "norm")
fviz_cluster(food.means4, food, ellipse.type = "norm")
food.pam2 <- eclust(food, "pam", k = 2)
food.pam3 <- eclust(food, "pam", k = 3)
food.pam4 <- eclust(food, "pam", k = 4)
fviz_silhouette(food.pam2)
cluster size ave.sil.width
1 1 40 0.65
2 2 10 0.40
fviz_silhouette(food.pam3)
cluster size ave.sil.width
1 1 11 0.59
2 2 34 0.49
3 3 5 0.63
fviz_silhouette(food.pam4)
cluster size ave.sil.width
1 1 9 0.66
2 2 27 0.54
3 3 10 0.38
4 4 4 0.72
Warning: package 'fpc' was built under R version 4.2.2
clust_stats <- cluster.stats(d = dist(food), food.pam4$cluster, food.means4$cluster)
clust_stats$corrected.rand