library(cluster)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
df <- scale(USArrests)
# Subset containing 10 rows
set.seed(123)
ss <- sample(1:50, 10)
df <- df[ss,]

library(dendextend)
## 
## ---------------------
## Welcome to dendextend version 1.7.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
## 
##     cutree
# Compute distance matrix
res.dist <- dist(df, method = "euclidean")
# Compute 2 hierarchical clusterings
hc1 <- hclust(res.dist, method = "average")
hc2 <- hclust(res.dist, method = "ward.D2")
# Create two dendrograms
dend1 <- as.dendrogram (hc1)
dend2 <- as.dendrogram (hc2)
# Create a list to hold dendrograms
dend_list <- dendlist(dend1, dend2)

tanglegram(dend1, dend2)

tanglegram(dend1, dend2,
highlight_distinct_edges = FALSE, # Turn-off dashed lines
common_subtrees_color_lines = FALSE, # Turn-off line colors
common_subtrees_color_branches = TRUE, # Color common branches
main = paste("entanglement =", round(entanglement(dend_list), 2))
)

# Cophenetic correlation matrix
cor.dendlist(dend_list, method = "cophenetic")
##           [,1]      [,2]
## [1,] 1.0000000 0.9646883
## [2,] 0.9646883 1.0000000
# Baker correlation matrix
cor.dendlist(dend_list, method = "baker")
##           [,1]      [,2]
## [1,] 1.0000000 0.9622885
## [2,] 0.9622885 1.0000000
# Cophenetic correlation coefficient
cor_cophenetic(dend1, dend2)
## [1] 0.9646883
# Baker correlation coefficient
cor_bakers_gamma(dend1, dend2)
## [1] 0.9622885
# Create multiple dendrograms by chaining
dend1 <- df %>% dist %>% hclust("complete") %>% as.dendrogram
dend2 <- df %>% dist %>% hclust("single") %>% as.dendrogram
dend3 <- df %>% dist %>% hclust("average") %>% as.dendrogram
dend4 <- df %>% dist %>% hclust("centroid") %>% as.dendrogram
# Compute correlation matrix
dend_list <- dendlist("Complete" = dend1, "Single" = dend2,
"Average" = dend3, "Centroid" = dend4)
cors <- cor.dendlist(dend_list)
# Print correlation matrix
round(cors, 2)
##          Complete Single Average Centroid
## Complete     1.00   0.76    0.99     0.75
## Single       0.76   1.00    0.80     0.84
## Average      0.99   0.80    1.00     0.74
## Centroid     0.75   0.84    0.74     1.00
# Visualize the correlation matrix using corrplot package
library(corrplot)
## corrplot 0.84 loaded
corrplot(cors, "pie", "lower")

data(USArrests)
# Compute distances and hierarchical clustering
dd <- dist(scale(USArrests), method = "euclidean")
hc <- hclust(dd, method = "ward.D2")
fviz_dend(hc, cex = 0.5)

fviz_dend(hc, cex = 0.5,
main = "Dendrogram - ward.D2",
xlab = "Objects", ylab = "Distance", sub = "")

fviz_dend(hc, cex = 0.5, horiz = TRUE)

fviz_dend(hc, k = 4, # Cut in four groups
cex = 0.5, # label size
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
color_labels_by_k = TRUE, # color labels by groups
rect = TRUE, # Add rectangle around groups
rect_border = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
rect_fill = TRUE)
## Warning in if (color == "cluster") color <- "default": the condition has
## length > 1 and only the first element will be used

fviz_dend(hc, k = 4, # Cut in four groups
cex = 0.5, # label size
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
color_labels_by_k = TRUE, # color labels by groups
ggtheme = theme_gray() # Change theme
)

fviz_dend(hc, cex = 0.5, k = 4, # Cut in four groups
k_colors = "jco")

fviz_dend(hc, k = 4, cex = 0.4, horiz = TRUE, k_colors = "jco",
rect = TRUE, rect_border = "jco", rect_fill = TRUE)

fviz_dend(hc, cex = 0.5, k = 4,
k_colors = "jco", type = "circular")

require("igraph")
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
fviz_dend(hc, k = 4, k_colors = "jco",
type = "phylogenic", repel = TRUE)

require("igraph")
fviz_dend(hc, k = 4, # Cut in four groups
k_colors = "jco",
type = "phylogenic", repel = TRUE,
phylo_layout = "layout.gem")

fviz_dend(hc, xlim = c(1, 20), ylim = c(1, 8))

# Create a plot of the whole dendrogram,
# and extract the dendrogram data
dend_plot <- fviz_dend(hc, k = 4, # Cut in four groups
cex = 0.5, # label size
k_colors = "jco"
)
dend_data <- attr(dend_plot, "dendrogram") # Extract dendrogram data
# Cut the dendrogram at height h = 10
dend_cuts <- cut(dend_data, h = 10)
# Visualize the truncated version containing
# two branches
fviz_dend(dend_cuts$upper)
## Warning in min(-diff(our_dend_heights)): no non-missing arguments to min;
## returning Inf

# Plot the whole dendrogram
print(dend_plot)

# Plot subtree 1
fviz_dend(dend_cuts$lower[[1]], main = "Subtree 1")

# Plot subtree 2
fviz_dend(dend_cuts$lower[[2]], main = "Subtree 2")

fviz_dend(dend_cuts$lower[[2]], type = "circular")