rm(list = ls())
#ggdendro package : ggplot2 and dendrogram
#loading PACKAGE
library(ggdendro)
library(ggplot2)
# Load data
data(USArrests)
dim(USArrests)
## [1] 50 4
head(USArrests)
## Murder Assault UrbanPop Rape
## Alabama 13.2 236 58 21.2
## Alaska 10.0 263 48 44.5
## Arizona 8.1 294 80 31.0
## Arkansas 8.8 190 50 19.5
## California 9.0 276 91 40.6
## Colorado 7.9 204 78 38.7
#1.1 Compute distances and hierarchical clustering
dd <- dist(scale(USArrests), method = "euclidean")
hc <- hclust(dd, method = "ward.D2")
##Visualize dendrogram using ggdendrogram() function
ggdendrogram(hc)

# Rotate the plot and remove default theme
ggdendrogram(hc, rotate = TRUE, theme_dendro = FALSE)

#1.2 Extract dendrogram plot data
#The function dendro_data() can be used for extracting the data.
# Build dendrogram object from hclust results
hc
##
## Call:
## hclust(d = dd, method = "ward.D2")
##
## Cluster method : ward.D2
## Distance : euclidean
## Number of objects: 50
dend <- as.dendrogram(hc)
dend
## 'dendrogram' with 2 branches and 50 members total, at height 13.51624
# Extract the data (for rectangular lines)
# Type can be "rectangle" or "triangle"
dend_data <- dendro_data(dend, type = "rectangle")
names(dend_data)
## [1] "segments" "labels" "leaf_labels" "class"
head(dend_data$segments)
## x y xend yend
## 1 19.771484 13.516242 8.867188 13.516242
## 2 8.867188 13.516242 8.867188 6.461866
## 3 8.867188 6.461866 4.125000 6.461866
## 4 4.125000 6.461866 4.125000 2.714554
## 5 4.125000 2.714554 2.500000 2.714554
## 6 2.500000 2.714554 2.500000 1.091092
#?dendro_data
# Plot line segments and add labels
ggplot(dend_data$segments) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend))+
geom_text(data = dend_data$labels, aes(x, y, label = label),
hjust = 1, angle = 90, size = 3)+
ylim(-3, 15)

###########################
#triangular line segments (instead of rectangular segments).
ddata <- dendro_data(dend, type = "triangle")
ggplot(segment(ddata)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
coord_flip() +
scale_y_reverse(expand = c(0.2, 0)) +
theme_dendro() +
geom_text(data = dend_data$labels, aes(x, y, label = label),
hjust = 0, angle = 0, size = 3)

################################
source("C:/Users/liyix/OneDrive/Desktop/Tweaking ggdendro.R")
################################
#Basic dendrogram
hcdata <- dendro_data_k(hc, 3)
plot_ggdendro(hcdata,
direction = "lr",
expand.y = 0.2)

#Customized dendrograms
cols <- c("#a9a9a9", "#1f77b4", "#ff7f0e", "#2ca02c")
#Customized dendrograms
p <- plot_ggdendro(hcdata,
direction = "tb",
scale.color = cols,
label.size = 2.5,
branch.size = 0.5,
expand.y = 0.2) + theme_void() + expand_limits(x = c(-1, 32))
#Finally, let’s do a fan dendrogram.
p <- plot_ggdendro(hcdata,
fan = TRUE,
scale.color = cols,
label.size = 4,
nudge.label = 0.02,
expand.y = 0.4) + theme_void()
p

#Further customization
#Besides the graphical properties, it is also possible to add other geom elements, making the possibilities endless.
p + geom_point(data = USArrests,
aes(x = match(rownames(USArrests), hcdata$labels$label),
y = -0.7,
fill = as.factor(UrbanPop)),
size = 5,
shape = 21,
show.legend = FALSE) +
geom_text(data = USArrests,
aes(x = match(rownames(USArrests), hcdata$labels$label),
y = -0.7,
label = UrbanPop),
size = 3)

#scale_fill_manual(values = c("white", "yellow", "red")) +
#Combining multiple plots, with gridExtra for example, we can easily make tanglegrams.
library(gridExtra)
hc1 <- hclust(dd , "average")
hc2 <- hclust(dd, "ward.D2")
hcdata1 <- dendro_data_k(hc1, 5)
hcdata2 <- dendro_data_k(hc2, 5)
cols <- c("#a9a9a9", "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd")
p1 <- plot_ggdendro(hcdata1,
direction = "lr",
scale.color = cols,
expand.y = 0.2) +
theme_void()
p2 <- plot_ggdendro(hcdata2,
direction = "rl",
scale.color = cols,
expand.y = 0.2) +
theme_void()
idx <- data.frame(y1 = 1:nrow(hcdata1$labels),
y2 = match(hcdata1$labels$label, hcdata2$labels$label))
p3 <- ggplot() +
geom_segment(data = idx,
aes(x = 0,
y = y1,
xend = 1,
yend = y2),
color = "grey") +
theme_void()
g1 <- grid.arrange(p1, p3, p2, ncol = 3, widths = c(2, 1, 2))

ggsave(filename = paste0(Sys.Date(),"-","dendrogram.tif"),
plot = g1, device = "tiff",
width =15, height = 15, units = "cm",
dpi = 300, limitsize = TRUE, compression = "lzw")
#REF http://www.sthda.com/english/wiki/beautiful-dendrogram-visualizations-in-r-5-must-known-methods-unsupervised-machine-learning
#https://atrebas.github.io/post/2019-06-08-lightweight-dendrograms/
#Tweaking ggdendro.R
dendro_data_k <- function(hc, k) {
hcdata <- ggdendro::dendro_data(hc, type = "rectangle")
seg <- hcdata$segments
labclust <- cutree(hc, k)[hc$order]
segclust <- rep(0L, nrow(seg))
heights <- sort(hc$height, decreasing = TRUE)
height <- mean(c(heights[k], heights[k - 1L]), na.rm = TRUE)
for (i in 1:k) {
xi <- hcdata$labels$x[labclust == i]
idx1 <- seg$x >= min(xi) & seg$x <= max(xi)
idx2 <- seg$xend >= min(xi) & seg$xend <= max(xi)
idx3 <- seg$yend < height
idx <- idx1 & idx2 & idx3
segclust[idx] <- i
}
idx <- which(segclust == 0L)
segclust[idx] <- segclust[idx + 1L]
hcdata$segments$clust <- segclust
hcdata$segments$line <- as.integer(segclust < 1L)
hcdata$labels$clust <- labclust
hcdata
}
set_labels_params <- function(nbLabels,
direction = c("tb", "bt", "lr", "rl"),
fan = FALSE) {
if (fan) {
angle <- 360 / nbLabels * 1:nbLabels + 90
idx <- angle >= 90 & angle <= 270
angle[idx] <- angle[idx] + 180
hjust <- rep(0, nbLabels)
hjust[idx] <- 1
} else {
angle <- rep(0, nbLabels)
hjust <- 0
if (direction %in% c("tb", "bt")) { angle <- angle + 45 }
if (direction %in% c("tb", "rl")) { hjust <- 1 }
}
list(angle = angle, hjust = hjust, vjust = 0.5)
}
plot_ggdendro <- function(hcdata,
direction = c("lr", "rl", "tb", "bt"),
fan = FALSE,
scale.color = NULL,
branch.size = 1,
label.size = 3,
nudge.label = 0.01,
expand.y = 0.1) {
direction <- match.arg(direction) # if fan = FALSE
ybreaks <- pretty(segment(hcdata)$y, n = 5)
ymax <- max(segment(hcdata)$y)
## branches
p <- ggplot() +
geom_segment(data = segment(hcdata),
aes(x = x,
y = y,
xend = xend,
yend = yend,
linetype = factor(line),
colour = factor(clust)),
lineend = "round",
show.legend = FALSE,
size = branch.size)
## orientation
if (fan) {
p <- p +
coord_polar(direction = -1) +
scale_x_continuous(breaks = NULL,
limits = c(0, nrow(label(hcdata)))) +
scale_y_reverse(breaks = ybreaks)
} else {
p <- p + scale_x_continuous(breaks = NULL)
if (direction %in% c("rl", "lr")) {
p <- p + coord_flip()
}
if (direction %in% c("bt", "lr")) {
p <- p + scale_y_reverse(breaks = ybreaks)
} else {
p <- p + scale_y_continuous(breaks = ybreaks)
nudge.label <- -(nudge.label)
}
}
# labels
labelParams <- set_labels_params(nrow(hcdata$labels), direction, fan)
hcdata$labels$angle <- labelParams$angle
p <- p +
geom_text(data = label(hcdata),
aes(x = x,
y = y,
label = label,
colour = factor(clust),
angle = angle),
vjust = labelParams$vjust,
hjust = labelParams$hjust,
nudge_y = ymax * nudge.label,
size = label.size,
show.legend = FALSE)
# colors and limits
if (!is.null(scale.color)) {
p <- p + scale_color_manual(values = scale.color)
}
ylim <- -round(ymax * expand.y, 1)
p <- p + expand_limits(y = ylim)
p
}