library(datasets)
df <- scale(USArrests)
# Subset containing 10 rows
set.seed(123)
ss <- sample(1:50, 10)
print(ss)
## [1] 31 15 14 3 42 43 37 48 25 26
df <- df[ss,]
library(dendextend)
##
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
# Compute distance matrix
res.dist <- dist(df, method = "euclidean")
# Compute 2 hierarchical clusterings
hc1 <- hclust(res.dist, method = "average")
hc1
##
## Call:
## hclust(d = res.dist, method = "average")
##
## Cluster method : average
## Distance : euclidean
## Number of objects: 10
hc2 <- hclust(res.dist, method = "ward.D2")
hc2
##
## Call:
## hclust(d = res.dist, method = "ward.D2")
##
## Cluster method : ward.D2
## Distance : euclidean
## Number of objects: 10
# Create two dendrograms
dend1 <- as.dendrogram (hc1)
dend2 <- as.dendrogram (hc2)
# Create a list to hold dendrograms
dend_list <- dendlist(dend1, dend2)
# Align and plot two dendrograms side by side
tanglegram(dend1, dend2)
# Compute alignment quality. Lower value = good alignment quality
tanglegram(dend1, dend2,
highlight_distinct_edges = FALSE,
common_subtrees_color_lines = FALSE,
common_subtrees_color_branches = FALSE,
main = paste("entanglement =", round(entanglement(dend_list), 2)))
#Cophenetic correlation matrix
cor.dendlist(dend_list, method = "cophenetic")
## [,1] [,2]
## [1,] 1.0000000 0.9925544
## [2,] 0.9925544 1.0000000
# Baker correlation
cor.dendlist(dend_list, method = "baker")
## [,1] [,2]
## [1,] 1.0000000 0.9895528
## [2,] 0.9895528 1.0000000
Numbers look similar allowing us to conclude that the correlations seem correct
Then we compute the cor between two trees
cor_cophenetic(dend1, dend2)
## [1] 0.9925544
The two tress seem to be highly correlated to confirm we do baker as well
cor_bakers_gamma(dend1, dend2)
## [1] 0.9895528
compare multiple dendrograms together simultaneously. This resembles a heat map of all the corr together
# Create multiple dendrograms by chaining
dend1 <- df %>% dist %>% hclust ("complete") %>% as.dendrogram
dend2 <- df %>% dist %>% hclust ("single") %>% as.dendrogram
dend3 <- df %>% dist %>% hclust ("average") %>% as.dendrogram
dend4 <- df %>% dist %>% hclust ("centroid") %>% as.dendrogram
# Compute correlation matrix
dend_list <- dendlist ("Complete" = dend1, "Single" = dend2,
"Average" = dend3, "Centroid" = dend4)
cors <- cor.dendlist(dend_list)
# Print correlation matrix
round (cors, 2)
## Complete Single Average Centroid
## Complete 1.00 0.46 0.45 0.30
## Single 0.46 1.00 0.23 0.17
## Average 0.45 0.23 1.00 0.31
## Centroid 0.30 0.17 0.31 1.00
# Visualize the correlatioin matrix using corrplot package
library(corrplot)
## corrplot 0.92 loaded
corrplot(cors, "pie", "lower")