First we compile the data on percentage of families with different number of kids from eurostat
Children_stat <- read_csv("~/Documents/DataQuestions/Family_stat/Data/Compiled/Children_stat.csv")
df <- Children_stat
c <- df$Country
df.pred <- df [,2:12]
row.names(df.pred) <- c
df
## # A tibble: 33 x 12
## Country single couple `couple 1` `single 1` `couple 2` `single 2` `couple 3+`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Belgium 0.31 0.69 0.588 0.412 0.764 0.236 0.759
## 2 Bulgar… 0.45 0.55 0.501 0.499 0.637 0.363 0.548
## 3 Czechia 0.28 0.72 0.614 0.386 0.813 0.187 0.8
## 4 Denmark 0.359 0.641 0.521 0.479 0.714 0.286 0.776
## 5 Germany 0.292 0.708 0.614 0.386 0.803 0.197 0.808
## 6 Estonia 0.37 0.63 0.542 0.458 0.712 0.288 0.729
## 7 Ireland 0.339 0.661 0.508 0.492 0.716 0.284 0.787
## 8 Greece 0.217 0.783 0.691 0.309 0.86 0.14 0.859
## 9 Spain 0.319 0.681 0.568 0.432 0.807 0.193 0.77
## 10 France 0.325 0.675 0.566 0.434 0.742 0.258 0.781
## # … with 23 more rows, and 4 more variables: `single 3+` <dbl>, `1
## # child` <dbl>, `2 children` <dbl>, `3+ children` <dbl>
Then I run hierarchical clustering.
# Compute the dissimilarity matrix
res.dist <- dist(df.pred, method = "euclidean")
#create the hierarchical tree-----
res.hc <- hclust(d = res.dist, method = "average")
# Cut in 4 groups and color by groups fviz_dend(res.hc, k = 4, # Cut in four groups---------
fviz_dend(res.hc, k = 8, # Cut in four groups
cex = .8, # label size
k_colors = c("green", "red", "blue", "deeppink4","orange3","purple","brown3","black"),
color_labels_by_k = TRUE, # color labels by groups
rect = FALSE,# Add rectangle around groups
horiz = TRUE
)
Then save cluster ID and plot with leaflet map.
#save cluster number to country name-----
df.8cl <- cutree(res.hc, k=8)
df.cluster <- cbind(df, clusterID=df.8cl)
world_shapefiles <- read_sf(dsn = "../Data/RAW/world-shape-files/")
clusters <- rownames_to_column(df.cluster, var = "NAME")
clusters_calc <- world_shapefiles %>%
left_join(clusters)
color_clusters <- colorFactor("Set1",unique(clusters_calc$clusterID))
clusters_calc%>%
leaflet() %>%
setView(18,48,zoom = 3.5) %>%
addPolygons(weight = 1,
label = ~NAME,
popup = ~paste("Country", NAME, "<br/>","ID:", `clusterID`),
color = ~color_clusters(clusterID)) %>%
addLegend(pal = color_clusters, values = ~clusterID)