rm(list = ls())
udaje <- read.csv("world_population.csv", stringsAsFactors = FALSE)
# výber kontinentov podľa tvojich údajov
kontinenty <- c("Europe")
udaje2020 <- subset(udaje, Continent %in% kontinenty)
# výber stĺpcov, ktoré v databáze existujú
udaje2020 <- udaje2020[, c("Country.Territory",
"X2020.Population",
"Area..km..",
"Density..per.km..",
"Growth.Rate")]
# zmena názvu riadkov
rownames(udaje2020) <- udaje2020$Country.Territory
udaje2020$Country.Territory <- NULL
udaje2020
Z našej svetovej databázy sme si na skúmanie vytiahli iba krajiny
európskeho kontinetnu, ktorých celkový počet je 50.
udaje_complete <- na.omit(udaje2020)
udaje_scaled <- scale(udaje_complete)
num_vars <- as.data.frame(udaje_scaled)
num_plots <- ncol(num_vars)
par(mfrow = c(ceiling(sqrt(num_plots)), ceiling(num_plots / ceiling(sqrt(num_plots)))))
par(mar = c(4, 4, 2, 1))
for (col in names(num_vars)) {
boxplot(num_vars[[col]],
main = col,
col = "lightpink",
horizontal = TRUE)
}
mtext("Boxploty numerických premenných (rok 2020)", outer = TRUE, cex = 1.3, font = 2)

cor_mat <- cor(udaje_scaled, use="pairwise.complete.obs")
cor_mat <- round(cor_mat,2)
print(cor_mat)
X2020.Population Area..km.. Density..per.km.. Growth.Rate
X2020.Population 1.00 0.72 -0.11 -0.13
Area..km.. 0.72 1.00 -0.05 -0.06
Density..per.km.. -0.11 -0.05 1.00 -0.05
Growth.Rate -0.13 -0.06 -0.05 1.00
dist_mat <- dist(udaje2020, method = "euclidean")
hc <- hclust(dist_mat, method = "ward.D2")
plot(hc, labels = rownames(udaje_scaled),
main = "Hierarchical klastering of countries (Ward.D2)",
xlab = "", sub = "")
k <- 3
h_cut <- hc$height[length(hc$height) - (k - 1)]
abline(h = h_cut, col = "pink", lwd = 2, lty = 2)

klaster_membership <- cutree(hc, k = k)
udaje_klasters <- data.frame(
Country = rownames(udaje_complete),
udaje_complete,
klaster = factor(klaster_membership)
)
data_prac <- data.frame(cbind(udaje_klasters$Country, udaje_klasters$klaster))
colnames(data_prac) <- c("Country","klaster")
data_prac
ssq <- function(x, m) sum((x - m)^2)
var_names <- colnames(udaje_scaled)
TSS <- sapply(var_names, function(v) ssq(udaje_scaled[, v], mean(udaje_scaled[, v])))
WSS <- sapply(var_names, function(v) {
x <- udaje_scaled[, v]
tapply(x, klaster_membership, function(z) ssq(z, mean(z))) |> sum()
})
BSS <- TSS - WSS
ss_table <- data.frame(
Variable = var_names,
TSS = TSS,
WSS = WSS,
BSS = BSS,
Prop_Between = BSS / TSS
)
ss_table
udaje2020 <- data.frame(
udaje2020,
klaster = udaje_klasters$klaster
)
library(dplyr)
descriptives <- udaje2020 %>%
group_by(klaster) %>%
summarise(
across(
.cols = where(is.numeric),
.fns = list(
mean = ~mean(.x, na.rm = TRUE)
),
.names = "{.col}_{.fn}"
)
)
descriptives
LS0tCnRpdGxlOiAiUHLDoWNhIHMgZGF0YWLDoXpvdSAtIFdvcmxkIHBvcHVsYXRpb24iCmF1dGhvcjogIkJhcmJvcmEgS3VjaMOhcmlrb3bDoSAgPGJyPiIKZGF0ZTogIk5vdmVtYmVyIDIwMjUiCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIHRoZW1lOiB1bml0ZWQKICAgIGhpZ2hsaWdodDogdGFuZ28KICAgIGNzczogY3VzdG9tLmNzcwogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IHRydWUKICAgIGRmX3ByaW50OiBwYWdlZAogIHBkZl9kb2N1bWVudDoKICAgIHRvYzogdHJ1ZQplZGl0b3Jfb3B0aW9uczoKICBtYXJrZG93bjoKICAgIHdyYXA6IDcyCi0tLQoKYGBge3Igc2V0dXAxLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoCiAgZWNobyAgICA9IFRSVUUsICAgIyBkbyBub3Qgc2hvdyBjb2RlCiAgbWVzc2FnZSA9IFRSVUUsICAgIyBzdXBwcmVzcyBwYWNrYWdlL3N5c3RlbSBtZXNzYWdlcwogIHdhcm5pbmcgPSBGQUxTRSwgICAjIHN1cHByZXNzIHdhcm5pbmdzCiAgZXJyb3IgICA9IEZBTFNFICAgICMgc3VwcHJlc3MgZXJyb3Igb3V0cHV0CikKYGBgCgpgYGB7ciBzZXR1cDIsIG1lc3NhZ2U9RkFMU0V9CmxpYnJhcnkoa25pdHIpCmBgYAoKCmBgYHtyfQpybShsaXN0ID0gbHMoKSkKdWRhamUgPC0gcmVhZC5jc3YoIndvcmxkX3BvcHVsYXRpb24uY3N2Iiwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQoKIyB2w71iZXIga29udGluZW50b3YgcG9kxL5hIHR2b2ppY2ggw7pkYWpvdgprb250aW5lbnR5IDwtIGMoIkV1cm9wZSIpCgp1ZGFqZTIwMjAgPC0gc3Vic2V0KHVkYWplLCBDb250aW5lbnQgJWluJSBrb250aW5lbnR5KQoKIyB2w71iZXIgc3TEunBjb3YsIGt0b3LDqSB2IGRhdGFiw6F6ZSBleGlzdHVqw7oKdWRhamUyMDIwIDwtIHVkYWplMjAyMFssIGMoIkNvdW50cnkuVGVycml0b3J5IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgIlgyMDIwLlBvcHVsYXRpb24iLAogICAgICAgICAgICAgICAgICAgICAgICAgICAiQXJlYS4ua20uLiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICJEZW5zaXR5Li5wZXIua20uLiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICJHcm93dGguUmF0ZSIpXQoKIyB6bWVuYSBuw6F6dnUgcmlhZGtvdgpyb3duYW1lcyh1ZGFqZTIwMjApIDwtIHVkYWplMjAyMCRDb3VudHJ5LlRlcnJpdG9yeQp1ZGFqZTIwMjAkQ291bnRyeS5UZXJyaXRvcnkgPC0gTlVMTAoKdWRhamUyMDIwCgpgYGAKWiBuYcWhZWogc3ZldG92ZWogZGF0YWLDoXp5IHNtZSBzaSBuYSBza8O6bWFuaWUgdnl0aWFobGkgaWJhIGtyYWppbnkgZXVyw7Nwc2tlaG8ga29udGluZXRudSwga3RvcsO9Y2ggY2Vsa292w70gcG/EjWV0IGplIDUwLgoKYGBge3J9CnVkYWplX2NvbXBsZXRlIDwtIG5hLm9taXQodWRhamUyMDIwKQp1ZGFqZV9zY2FsZWQgPC0gc2NhbGUodWRhamVfY29tcGxldGUpCmBgYAoKYGBge3IgYm94cGxvdHMsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIGZpZy5zaG93PSdob2xkJ30KbnVtX3ZhcnMgPC0gYXMuZGF0YS5mcmFtZSh1ZGFqZV9zY2FsZWQpCm51bV9wbG90cyA8LSBuY29sKG51bV92YXJzKQoKcGFyKG1mcm93ID0gYyhjZWlsaW5nKHNxcnQobnVtX3Bsb3RzKSksIGNlaWxpbmcobnVtX3Bsb3RzIC8gY2VpbGluZyhzcXJ0KG51bV9wbG90cykpKSkpCnBhcihtYXIgPSBjKDQsIDQsIDIsIDEpKQoKZm9yIChjb2wgaW4gbmFtZXMobnVtX3ZhcnMpKSB7CiAgYm94cGxvdChudW1fdmFyc1tbY29sXV0sCiAgICAgICAgICBtYWluID0gY29sLAogICAgICAgICAgY29sID0gImxpZ2h0cGluayIsCiAgICAgICAgICBob3Jpem9udGFsID0gVFJVRSkKfQoKbXRleHQoIkJveHBsb3R5IG51bWVyaWNrw71jaCBwcmVtZW5uw71jaCAocm9rIDIwMjApIiwgb3V0ZXIgPSBUUlVFLCBjZXggPSAxLjMsIGZvbnQgPSAyKQoKYGBgCmBgYHtyfQpjb3JfbWF0IDwtIGNvcih1ZGFqZV9zY2FsZWQsIHVzZT0icGFpcndpc2UuY29tcGxldGUub2JzIikKY29yX21hdCA8LSByb3VuZChjb3JfbWF0LDIpCnByaW50KGNvcl9tYXQpCmBgYAoKYGBge3J9CmRpc3RfbWF0IDwtIGRpc3QodWRhamUyMDIwLCBtZXRob2QgPSAiZXVjbGlkZWFuIikKaGMgPC0gaGNsdXN0KGRpc3RfbWF0LCBtZXRob2QgPSAid2FyZC5EMiIpCgpwbG90KGhjLCBsYWJlbHMgPSByb3duYW1lcyh1ZGFqZV9zY2FsZWQpLAogICAgIG1haW4gPSAiSGllcmFyY2hpY2FsIGtsYXN0ZXJpbmcgb2YgY291bnRyaWVzIChXYXJkLkQyKSIsCiAgICAgeGxhYiA9ICIiLCBzdWIgPSAiIikKCmsgPC0gMwpoX2N1dCA8LSBoYyRoZWlnaHRbbGVuZ3RoKGhjJGhlaWdodCkgLSAoayAtIDEpXQphYmxpbmUoaCA9IGhfY3V0LCBjb2wgPSAicGluayIsIGx3ZCA9IDIsIGx0eSA9IDIpCgprbGFzdGVyX21lbWJlcnNoaXAgPC0gY3V0cmVlKGhjLCBrID0gaykKCnVkYWplX2tsYXN0ZXJzIDwtIGRhdGEuZnJhbWUoCiAgQ291bnRyeSA9IHJvd25hbWVzKHVkYWplX2NvbXBsZXRlKSwKICB1ZGFqZV9jb21wbGV0ZSwKICBrbGFzdGVyID0gZmFjdG9yKGtsYXN0ZXJfbWVtYmVyc2hpcCkKKQpgYGAKYGBge3J9CmRhdGFfcHJhYyA8LSBkYXRhLmZyYW1lKGNiaW5kKHVkYWplX2tsYXN0ZXJzJENvdW50cnksIHVkYWplX2tsYXN0ZXJzJGtsYXN0ZXIpKQpjb2xuYW1lcyhkYXRhX3ByYWMpIDwtIGMoIkNvdW50cnkiLCJrbGFzdGVyIikKZGF0YV9wcmFjCmBgYApgYGB7cn0Kc3NxIDwtIGZ1bmN0aW9uKHgsIG0pIHN1bSgoeCAtIG0pXjIpCgp2YXJfbmFtZXMgPC0gY29sbmFtZXModWRhamVfc2NhbGVkKQoKVFNTIDwtIHNhcHBseSh2YXJfbmFtZXMsIGZ1bmN0aW9uKHYpIHNzcSh1ZGFqZV9zY2FsZWRbLCB2XSwgbWVhbih1ZGFqZV9zY2FsZWRbLCB2XSkpKQoKV1NTIDwtIHNhcHBseSh2YXJfbmFtZXMsIGZ1bmN0aW9uKHYpIHsKICB4IDwtIHVkYWplX3NjYWxlZFssIHZdCiAgdGFwcGx5KHgsIGtsYXN0ZXJfbWVtYmVyc2hpcCwgZnVuY3Rpb24oeikgc3NxKHosIG1lYW4oeikpKSB8PiBzdW0oKQp9KQoKQlNTIDwtIFRTUyAtIFdTUwoKc3NfdGFibGUgPC0gZGF0YS5mcmFtZSgKICBWYXJpYWJsZSA9IHZhcl9uYW1lcywKICBUU1MgPSBUU1MsCiAgV1NTID0gV1NTLAogIEJTUyA9IEJTUywKICBQcm9wX0JldHdlZW4gPSBCU1MgLyBUU1MKKQoKc3NfdGFibGUKYGBgCgpgYGB7cn0KdWRhamUyMDIwIDwtIGRhdGEuZnJhbWUoCiAgdWRhamUyMDIwLAogIGtsYXN0ZXIgPSB1ZGFqZV9rbGFzdGVycyRrbGFzdGVyCikKbGlicmFyeShkcGx5cikKCmRlc2NyaXB0aXZlcyA8LSB1ZGFqZTIwMjAgJT4lCiAgZ3JvdXBfYnkoa2xhc3RlcikgJT4lCiAgc3VtbWFyaXNlKAogICAgYWNyb3NzKAogICAgICAuY29scyA9IHdoZXJlKGlzLm51bWVyaWMpLAogICAgICAuZm5zID0gbGlzdCgKICAgICAgICBtZWFuID0gfm1lYW4oLngsIG5hLnJtID0gVFJVRSkKICAgICAgKSwKICAgICAgLm5hbWVzID0gInsuY29sfV97LmZufSIKICAgICkKICApCmRlc2NyaXB0aXZlcwpgYGAKCgoKCgoKCgoKCg==