library(knitr)
rm(list = ls())
udaje <- read.csv("world_population.csv", stringsAsFactors = FALSE)

# výber kontinentov podľa tvojich údajov
kontinenty <- c("Europe")

udaje2020 <- subset(udaje, Continent %in% kontinenty)

# výber stĺpcov, ktoré v databáze existujú
udaje2020 <- udaje2020[, c("Country.Territory",
                           "X2020.Population",
                           "Area..km..",
                           "Density..per.km..",
                           "Growth.Rate")]

# zmena názvu riadkov
rownames(udaje2020) <- udaje2020$Country.Territory
udaje2020$Country.Territory <- NULL

udaje2020
NA

Z našej svetovej databázy sme si na skúmanie vytiahli iba krajiny európskeho kontinetnu, ktorých celkový počet je 50.

udaje_complete <- na.omit(udaje2020)
udaje_scaled <- scale(udaje_complete)
num_vars <- as.data.frame(udaje_scaled)
num_plots <- ncol(num_vars)

par(mfrow = c(ceiling(sqrt(num_plots)), ceiling(num_plots / ceiling(sqrt(num_plots)))))
par(mar = c(4, 4, 2, 1))

for (col in names(num_vars)) {
  boxplot(num_vars[[col]],
          main = col,
          col = "lightpink",
          horizontal = TRUE)
}

mtext("Boxploty numerických premenných (rok 2020)", outer = TRUE, cex = 1.3, font = 2)

cor_mat <- cor(udaje_scaled, use="pairwise.complete.obs")
cor_mat <- round(cor_mat,2)
print(cor_mat)
                  X2020.Population Area..km.. Density..per.km.. Growth.Rate
X2020.Population              1.00       0.72             -0.11       -0.13
Area..km..                    0.72       1.00             -0.05       -0.06
Density..per.km..            -0.11      -0.05              1.00       -0.05
Growth.Rate                  -0.13      -0.06             -0.05        1.00
dist_mat <- dist(udaje2020, method = "euclidean")
hc <- hclust(dist_mat, method = "ward.D2")

plot(hc, labels = rownames(udaje_scaled),
     main = "Hierarchical klastering of countries (Ward.D2)",
     xlab = "", sub = "")

k <- 3
h_cut <- hc$height[length(hc$height) - (k - 1)]
abline(h = h_cut, col = "pink", lwd = 2, lty = 2)


klaster_membership <- cutree(hc, k = k)

udaje_klasters <- data.frame(
  Country = rownames(udaje_complete),
  udaje_complete,
  klaster = factor(klaster_membership)
)
data_prac <- data.frame(cbind(udaje_klasters$Country, udaje_klasters$klaster))
colnames(data_prac) <- c("Country","klaster")
data_prac
ssq <- function(x, m) sum((x - m)^2)

var_names <- colnames(udaje_scaled)

TSS <- sapply(var_names, function(v) ssq(udaje_scaled[, v], mean(udaje_scaled[, v])))

WSS <- sapply(var_names, function(v) {
  x <- udaje_scaled[, v]
  tapply(x, klaster_membership, function(z) ssq(z, mean(z))) |> sum()
})

BSS <- TSS - WSS

ss_table <- data.frame(
  Variable = var_names,
  TSS = TSS,
  WSS = WSS,
  BSS = BSS,
  Prop_Between = BSS / TSS
)

ss_table
udaje2020 <- data.frame(
  udaje2020,
  klaster = udaje_klasters$klaster
)
library(dplyr)

descriptives <- udaje2020 %>%
  group_by(klaster) %>%
  summarise(
    across(
      .cols = where(is.numeric),
      .fns = list(
        mean = ~mean(.x, na.rm = TRUE)
      ),
      .names = "{.col}_{.fn}"
    )
  )
descriptives
LS0tCnRpdGxlOiAiUHLDoWNhIHMgZGF0YWLDoXpvdSAtIFdvcmxkIHBvcHVsYXRpb24iCmF1dGhvcjogIkJhcmJvcmEgS3VjaMOhcmlrb3bDoSAgPGJyPiIKZGF0ZTogIk5vdmVtYmVyIDIwMjUiCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIHRoZW1lOiB1bml0ZWQKICAgIGhpZ2hsaWdodDogdGFuZ28KICAgIGNzczogY3VzdG9tLmNzcwogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IHRydWUKICAgIGRmX3ByaW50OiBwYWdlZAogIHBkZl9kb2N1bWVudDoKICAgIHRvYzogdHJ1ZQplZGl0b3Jfb3B0aW9uczoKICBtYXJrZG93bjoKICAgIHdyYXA6IDcyCi0tLQoKYGBge3Igc2V0dXAxLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoCiAgZWNobyAgICA9IFRSVUUsICAgIyBkbyBub3Qgc2hvdyBjb2RlCiAgbWVzc2FnZSA9IFRSVUUsICAgIyBzdXBwcmVzcyBwYWNrYWdlL3N5c3RlbSBtZXNzYWdlcwogIHdhcm5pbmcgPSBGQUxTRSwgICAjIHN1cHByZXNzIHdhcm5pbmdzCiAgZXJyb3IgICA9IEZBTFNFICAgICMgc3VwcHJlc3MgZXJyb3Igb3V0cHV0CikKYGBgCgpgYGB7ciBzZXR1cDIsIG1lc3NhZ2U9RkFMU0V9CmxpYnJhcnkoa25pdHIpCmBgYAoKCmBgYHtyfQpybShsaXN0ID0gbHMoKSkKdWRhamUgPC0gcmVhZC5jc3YoIndvcmxkX3BvcHVsYXRpb24uY3N2Iiwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQoKIyB2w71iZXIga29udGluZW50b3YgcG9kxL5hIHR2b2ppY2ggw7pkYWpvdgprb250aW5lbnR5IDwtIGMoIkV1cm9wZSIpCgp1ZGFqZTIwMjAgPC0gc3Vic2V0KHVkYWplLCBDb250aW5lbnQgJWluJSBrb250aW5lbnR5KQoKIyB2w71iZXIgc3TEunBjb3YsIGt0b3LDqSB2IGRhdGFiw6F6ZSBleGlzdHVqw7oKdWRhamUyMDIwIDwtIHVkYWplMjAyMFssIGMoIkNvdW50cnkuVGVycml0b3J5IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgIlgyMDIwLlBvcHVsYXRpb24iLAogICAgICAgICAgICAgICAgICAgICAgICAgICAiQXJlYS4ua20uLiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICJEZW5zaXR5Li5wZXIua20uLiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICJHcm93dGguUmF0ZSIpXQoKIyB6bWVuYSBuw6F6dnUgcmlhZGtvdgpyb3duYW1lcyh1ZGFqZTIwMjApIDwtIHVkYWplMjAyMCRDb3VudHJ5LlRlcnJpdG9yeQp1ZGFqZTIwMjAkQ291bnRyeS5UZXJyaXRvcnkgPC0gTlVMTAoKdWRhamUyMDIwCgpgYGAKWiBuYcWhZWogc3ZldG92ZWogZGF0YWLDoXp5IHNtZSBzaSBuYSBza8O6bWFuaWUgdnl0aWFobGkgaWJhIGtyYWppbnkgZXVyw7Nwc2tlaG8ga29udGluZXRudSwga3RvcsO9Y2ggY2Vsa292w70gcG/EjWV0IGplIDUwLgoKYGBge3J9CnVkYWplX2NvbXBsZXRlIDwtIG5hLm9taXQodWRhamUyMDIwKQp1ZGFqZV9zY2FsZWQgPC0gc2NhbGUodWRhamVfY29tcGxldGUpCmBgYAoKYGBge3IgYm94cGxvdHMsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIGZpZy5zaG93PSdob2xkJ30KbnVtX3ZhcnMgPC0gYXMuZGF0YS5mcmFtZSh1ZGFqZV9zY2FsZWQpCm51bV9wbG90cyA8LSBuY29sKG51bV92YXJzKQoKcGFyKG1mcm93ID0gYyhjZWlsaW5nKHNxcnQobnVtX3Bsb3RzKSksIGNlaWxpbmcobnVtX3Bsb3RzIC8gY2VpbGluZyhzcXJ0KG51bV9wbG90cykpKSkpCnBhcihtYXIgPSBjKDQsIDQsIDIsIDEpKQoKZm9yIChjb2wgaW4gbmFtZXMobnVtX3ZhcnMpKSB7CiAgYm94cGxvdChudW1fdmFyc1tbY29sXV0sCiAgICAgICAgICBtYWluID0gY29sLAogICAgICAgICAgY29sID0gImxpZ2h0cGluayIsCiAgICAgICAgICBob3Jpem9udGFsID0gVFJVRSkKfQoKbXRleHQoIkJveHBsb3R5IG51bWVyaWNrw71jaCBwcmVtZW5uw71jaCAocm9rIDIwMjApIiwgb3V0ZXIgPSBUUlVFLCBjZXggPSAxLjMsIGZvbnQgPSAyKQoKYGBgCmBgYHtyfQpjb3JfbWF0IDwtIGNvcih1ZGFqZV9zY2FsZWQsIHVzZT0icGFpcndpc2UuY29tcGxldGUub2JzIikKY29yX21hdCA8LSByb3VuZChjb3JfbWF0LDIpCnByaW50KGNvcl9tYXQpCmBgYAoKYGBge3J9CmRpc3RfbWF0IDwtIGRpc3QodWRhamUyMDIwLCBtZXRob2QgPSAiZXVjbGlkZWFuIikKaGMgPC0gaGNsdXN0KGRpc3RfbWF0LCBtZXRob2QgPSAid2FyZC5EMiIpCgpwbG90KGhjLCBsYWJlbHMgPSByb3duYW1lcyh1ZGFqZV9zY2FsZWQpLAogICAgIG1haW4gPSAiSGllcmFyY2hpY2FsIGtsYXN0ZXJpbmcgb2YgY291bnRyaWVzIChXYXJkLkQyKSIsCiAgICAgeGxhYiA9ICIiLCBzdWIgPSAiIikKCmsgPC0gMwpoX2N1dCA8LSBoYyRoZWlnaHRbbGVuZ3RoKGhjJGhlaWdodCkgLSAoayAtIDEpXQphYmxpbmUoaCA9IGhfY3V0LCBjb2wgPSAicGluayIsIGx3ZCA9IDIsIGx0eSA9IDIpCgprbGFzdGVyX21lbWJlcnNoaXAgPC0gY3V0cmVlKGhjLCBrID0gaykKCnVkYWplX2tsYXN0ZXJzIDwtIGRhdGEuZnJhbWUoCiAgQ291bnRyeSA9IHJvd25hbWVzKHVkYWplX2NvbXBsZXRlKSwKICB1ZGFqZV9jb21wbGV0ZSwKICBrbGFzdGVyID0gZmFjdG9yKGtsYXN0ZXJfbWVtYmVyc2hpcCkKKQpgYGAKYGBge3J9CmRhdGFfcHJhYyA8LSBkYXRhLmZyYW1lKGNiaW5kKHVkYWplX2tsYXN0ZXJzJENvdW50cnksIHVkYWplX2tsYXN0ZXJzJGtsYXN0ZXIpKQpjb2xuYW1lcyhkYXRhX3ByYWMpIDwtIGMoIkNvdW50cnkiLCJrbGFzdGVyIikKZGF0YV9wcmFjCmBgYApgYGB7cn0Kc3NxIDwtIGZ1bmN0aW9uKHgsIG0pIHN1bSgoeCAtIG0pXjIpCgp2YXJfbmFtZXMgPC0gY29sbmFtZXModWRhamVfc2NhbGVkKQoKVFNTIDwtIHNhcHBseSh2YXJfbmFtZXMsIGZ1bmN0aW9uKHYpIHNzcSh1ZGFqZV9zY2FsZWRbLCB2XSwgbWVhbih1ZGFqZV9zY2FsZWRbLCB2XSkpKQoKV1NTIDwtIHNhcHBseSh2YXJfbmFtZXMsIGZ1bmN0aW9uKHYpIHsKICB4IDwtIHVkYWplX3NjYWxlZFssIHZdCiAgdGFwcGx5KHgsIGtsYXN0ZXJfbWVtYmVyc2hpcCwgZnVuY3Rpb24oeikgc3NxKHosIG1lYW4oeikpKSB8PiBzdW0oKQp9KQoKQlNTIDwtIFRTUyAtIFdTUwoKc3NfdGFibGUgPC0gZGF0YS5mcmFtZSgKICBWYXJpYWJsZSA9IHZhcl9uYW1lcywKICBUU1MgPSBUU1MsCiAgV1NTID0gV1NTLAogIEJTUyA9IEJTUywKICBQcm9wX0JldHdlZW4gPSBCU1MgLyBUU1MKKQoKc3NfdGFibGUKYGBgCgpgYGB7cn0KdWRhamUyMDIwIDwtIGRhdGEuZnJhbWUoCiAgdWRhamUyMDIwLAogIGtsYXN0ZXIgPSB1ZGFqZV9rbGFzdGVycyRrbGFzdGVyCikKbGlicmFyeShkcGx5cikKCmRlc2NyaXB0aXZlcyA8LSB1ZGFqZTIwMjAgJT4lCiAgZ3JvdXBfYnkoa2xhc3RlcikgJT4lCiAgc3VtbWFyaXNlKAogICAgYWNyb3NzKAogICAgICAuY29scyA9IHdoZXJlKGlzLm51bWVyaWMpLAogICAgICAuZm5zID0gbGlzdCgKICAgICAgICBtZWFuID0gfm1lYW4oLngsIG5hLnJtID0gVFJVRSkKICAgICAgKSwKICAgICAgLm5hbWVzID0gInsuY29sfV97LmZufSIKICAgICkKICApCmRlc2NyaXB0aXZlcwpgYGAKCgoKCgoKCgoKCg==