library(tidymodels)
library(tidyverse)
set.seed(27)
centers <- tibble(
cluster = factor(1:3),
num_points = c(100, 150, 50), # number points in each cluster
x1 = c(5, 0, -3), # x1 coordinate of cluster center
x2 = c(-1, 1, -2) # x2 coordinate of cluster center
)
labelled_points <-
centers %>%
mutate(
x1 = map2(num_points, x1, rnorm),
x2 = map2(num_points, x2, rnorm)
) %>%
select(-num_points) %>%
unnest(cols = c(x1, x2))
ggplot(labelled_points, aes(x1, x2, color = cluster)) +
geom_point(alpha = 0.3) + theme_bw()

points <- labelled_points %>% select(-cluster)
kclust <- kmeans(points, centers = 3)
kclust
K-means clustering with 3 clusters of sizes 148, 51, 101
Cluster means:
x1 x2
1 0.08853475 1.045461
2 -3.14292460 -2.000043
3 5.00401249 -1.045811
Clustering vector:
[1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[56] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1
[111] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[166] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[221] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[276] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
Within cluster sum of squares by cluster:
[1] 298.9415 108.8112 243.2092
(between_SS / total_SS = 82.5 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss" "betweenss" "size"
[8] "iter" "ifault"
summary(kclust)
Length Class Mode
cluster 300 -none- numeric
centers 6 -none- numeric
totss 1 -none- numeric
withinss 3 -none- numeric
tot.withinss 1 -none- numeric
betweenss 1 -none- numeric
size 3 -none- numeric
iter 1 -none- numeric
ifault 1 -none- numeric
augment(kclust,points)
tidy(kclust)
glance(kclust)
kclusts <-
tibble(k = 1:9) %>%
mutate(
kclust = map(k, ~kmeans(points, .x)),
tidied = map(kclust, tidy),
glanced = map(kclust, glance),
augmented = map(kclust, augment, points)
)
kclusts
clusters <-
kclusts %>%
unnest(cols = c(tidied))
assignments <-
kclusts %>%
unnest(cols = c(augmented))
clusterings <-
kclusts %>%
unnest(cols = c(glanced))
p1 <-
ggplot(assignments, aes(x = x1, y = x2)) +
geom_point(aes(color = .cluster), alpha = 0.8) +
facet_wrap(~ k) +theme_bw()
p1

p2 <- p1 + geom_point(data = clusters, size = 10, shape = "x")
p2

ggplot(clusterings, aes(k, tot.withinss)) +
geom_line() +
geom_point() + theme_bw()

LS0tCnRpdGxlOiAiSy1NZWFucyBDbHVzdGVyaW5nIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeSh0aWR5bW9kZWxzKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKYGBgCgpgYGB7cn0Kc2V0LnNlZWQoMjcpCgpjZW50ZXJzIDwtIHRpYmJsZSgKICBjbHVzdGVyID0gZmFjdG9yKDE6MyksIAogIG51bV9wb2ludHMgPSBjKDEwMCwgMTUwLCA1MCksICAjIG51bWJlciBwb2ludHMgaW4gZWFjaCBjbHVzdGVyCiAgeDEgPSBjKDUsIDAsIC0zKSwgICAgICAgICAgICAgICMgeDEgY29vcmRpbmF0ZSBvZiBjbHVzdGVyIGNlbnRlcgogIHgyID0gYygtMSwgMSwgLTIpICAgICAgICAgICAgICAjIHgyIGNvb3JkaW5hdGUgb2YgY2x1c3RlciBjZW50ZXIKKQoKbGFiZWxsZWRfcG9pbnRzIDwtIAogIGNlbnRlcnMgJT4lCiAgbXV0YXRlKAogICAgeDEgPSBtYXAyKG51bV9wb2ludHMsIHgxLCBybm9ybSksCiAgICB4MiA9IG1hcDIobnVtX3BvaW50cywgeDIsIHJub3JtKQogICkgJT4lIAogIHNlbGVjdCgtbnVtX3BvaW50cykgJT4lIAogIHVubmVzdChjb2xzID0gYyh4MSwgeDIpKQoKZ2dwbG90KGxhYmVsbGVkX3BvaW50cywgYWVzKHgxLCB4MiwgY29sb3IgPSBjbHVzdGVyKSkgKwogIGdlb21fcG9pbnQoYWxwaGEgPSAwLjMpICsgdGhlbWVfYncoKQpgYGAKYGBge3J9CnBvaW50cyA8LSBsYWJlbGxlZF9wb2ludHMgJT4lIHNlbGVjdCgtY2x1c3RlcikKa2NsdXN0IDwtIGttZWFucyhwb2ludHMsIGNlbnRlcnMgPSAzKQprY2x1c3QKYGBgCmBgYHtyfQpzdW1tYXJ5KGtjbHVzdCkKYGBgCgpgYGB7cn0KYXVnbWVudChrY2x1c3QscG9pbnRzKQpgYGAKYGBge3J9CnRpZHkoa2NsdXN0KQpgYGAKYGBge3J9CmdsYW5jZShrY2x1c3QpCmBgYApgYGB7cn0Ka2NsdXN0cyA8LSAKICB0aWJibGUoayA9IDE6OSkgJT4lCiAgbXV0YXRlKAogICAga2NsdXN0ID0gbWFwKGssIH5rbWVhbnMocG9pbnRzLCAueCkpLAogICAgdGlkaWVkID0gbWFwKGtjbHVzdCwgdGlkeSksCiAgICBnbGFuY2VkID0gbWFwKGtjbHVzdCwgZ2xhbmNlKSwKICAgIGF1Z21lbnRlZCA9IG1hcChrY2x1c3QsIGF1Z21lbnQsIHBvaW50cykKICApCgprY2x1c3RzCmBgYApgYGB7cn0KY2x1c3RlcnMgPC0gCiAga2NsdXN0cyAlPiUKICB1bm5lc3QoY29scyA9IGModGlkaWVkKSkKCmFzc2lnbm1lbnRzIDwtIAogIGtjbHVzdHMgJT4lIAogIHVubmVzdChjb2xzID0gYyhhdWdtZW50ZWQpKQoKY2x1c3RlcmluZ3MgPC0gCiAga2NsdXN0cyAlPiUKICB1bm5lc3QoY29scyA9IGMoZ2xhbmNlZCkpCgpgYGAKCmBgYHtyfQpwMSA8LSAKICBnZ3Bsb3QoYXNzaWdubWVudHMsIGFlcyh4ID0geDEsIHkgPSB4MikpICsKICBnZW9tX3BvaW50KGFlcyhjb2xvciA9IC5jbHVzdGVyKSwgYWxwaGEgPSAwLjgpICsgCiAgZmFjZXRfd3JhcCh+IGspICt0aGVtZV9idygpCnAxCmBgYAoKYGBge3J9CnAyIDwtIHAxICsgZ2VvbV9wb2ludChkYXRhID0gY2x1c3RlcnMsIHNpemUgPSAxMCwgc2hhcGUgPSAieCIpCnAyCmBgYApgYGB7cn0KZ2dwbG90KGNsdXN0ZXJpbmdzLCBhZXMoaywgdG90LndpdGhpbnNzKSkgKwogIGdlb21fbGluZSgpICsKICBnZW9tX3BvaW50KCkgKyB0aGVtZV9idygpCmBgYAoK