rm(list = ls())
#ggdendro package : ggplot2 and dendrogram
#loading PACKAGE
library(ggdendro)
library(ggplot2)
# Load data
data(USArrests)
dim(USArrests)
## [1] 50  4
head(USArrests)
##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7
#1.1 Compute distances and hierarchical clustering
dd <- dist(scale(USArrests), method = "euclidean")
hc <- hclust(dd, method = "ward.D2")
##Visualize dendrogram using ggdendrogram() function
ggdendrogram(hc)

# Rotate the plot and remove default theme
ggdendrogram(hc, rotate = TRUE, theme_dendro = FALSE)

#1.2 Extract dendrogram plot data
#The function dendro_data() can be used for extracting the data.
# Build dendrogram object from hclust results
hc
## 
## Call:
## hclust(d = dd, method = "ward.D2")
## 
## Cluster method   : ward.D2 
## Distance         : euclidean 
## Number of objects: 50
dend <- as.dendrogram(hc)
dend
## 'dendrogram' with 2 branches and 50 members total, at height 13.51624
# Extract the data (for rectangular lines)
# Type can be "rectangle" or "triangle"
dend_data <- dendro_data(dend, type = "rectangle")
names(dend_data)
## [1] "segments"    "labels"      "leaf_labels" "class"
head(dend_data$segments)
##           x         y     xend      yend
## 1 19.771484 13.516242 8.867188 13.516242
## 2  8.867188 13.516242 8.867188  6.461866
## 3  8.867188  6.461866 4.125000  6.461866
## 4  4.125000  6.461866 4.125000  2.714554
## 5  4.125000  2.714554 2.500000  2.714554
## 6  2.500000  2.714554 2.500000  1.091092
#?dendro_data
# Plot line segments and add labels
ggplot(dend_data$segments) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend))+
  geom_text(data = dend_data$labels, aes(x, y, label = label),
            hjust = 1, angle = 90, size = 3)+
  ylim(-3, 15)

###########################
#triangular line segments (instead of rectangular segments).
ddata <- dendro_data(dend, type = "triangle")
ggplot(segment(ddata)) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
  coord_flip() + 
  scale_y_reverse(expand = c(0.2, 0)) +
  theme_dendro() +
  geom_text(data = dend_data$labels, aes(x, y, label = label),
            hjust = 0, angle = 0, size = 3)

################################
source("C:/Users/liyix/OneDrive/Desktop/Tweaking ggdendro.R")
################################
#Basic dendrogram
hcdata <- dendro_data_k(hc, 3)
plot_ggdendro(hcdata,
                   direction   = "lr",
                   expand.y    = 0.2)

#Customized dendrograms
cols <- c("#a9a9a9", "#1f77b4", "#ff7f0e", "#2ca02c")
#Customized dendrograms
p <- plot_ggdendro(hcdata,
                   direction   = "tb",
                   scale.color = cols,
                   label.size  = 2.5,
                   branch.size = 0.5,
                   expand.y    = 0.2) + theme_void() + expand_limits(x = c(-1, 32))
#Finally, let’s do a fan dendrogram.
p <- plot_ggdendro(hcdata,
                   fan         = TRUE,
                   scale.color = cols,
                   label.size  = 4,
                   nudge.label = 0.02,
                   expand.y    = 0.4) + theme_void()
p

#Further customization
#Besides the graphical properties, it is also possible to add other geom elements, making the possibilities endless.
p + geom_point(data     = USArrests, 
               aes(x    = match(rownames(USArrests), hcdata$labels$label),
                   y    = -0.7,
                   fill = as.factor(UrbanPop)),
               size     = 5,
               shape    = 21,
               show.legend = FALSE) +
   geom_text(data      = USArrests, 
            aes(x     = match(rownames(USArrests), hcdata$labels$label),
                y     = -0.7,
                label = UrbanPop),
            size = 3)

#scale_fill_manual(values = c("white", "yellow", "red")) +
#Combining multiple plots, with gridExtra for example, we can easily make tanglegrams.
library(gridExtra)
hc1     <- hclust(dd , "average")
hc2     <- hclust(dd, "ward.D2")
hcdata1 <- dendro_data_k(hc1, 5)
hcdata2 <- dendro_data_k(hc2, 5)
cols    <- c("#a9a9a9", "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd")

p1 <- plot_ggdendro(hcdata1,
                    direction   = "lr",
                    scale.color = cols,
                    expand.y    = 0.2) +
  theme_void()

p2 <- plot_ggdendro(hcdata2,
                    direction   = "rl",
                    scale.color = cols,
                    expand.y    = 0.2) +
  theme_void()

idx <- data.frame(y1 = 1:nrow(hcdata1$labels),
                  y2 = match(hcdata1$labels$label, hcdata2$labels$label))

p3 <- ggplot() +
  geom_segment(data     = idx, 
               aes(x    = 0,
                   y    = y1,
                   xend = 1,
                   yend = y2),
               color    = "grey") +
  theme_void()

g1 <- grid.arrange(p1, p3, p2, ncol = 3, widths = c(2, 1, 2))

ggsave(filename = paste0(Sys.Date(),"-","dendrogram.tif"), 
       plot = g1, device = "tiff",
       width =15, height = 15, units = "cm",
       dpi = 300, limitsize = TRUE, compression = "lzw")
#REF http://www.sthda.com/english/wiki/beautiful-dendrogram-visualizations-in-r-5-must-known-methods-unsupervised-machine-learning
#https://atrebas.github.io/post/2019-06-08-lightweight-dendrograms/
#Tweaking ggdendro.R
dendro_data_k <- function(hc, k) {
  
  hcdata    <-  ggdendro::dendro_data(hc, type = "rectangle")
  seg       <-  hcdata$segments
  labclust  <-  cutree(hc, k)[hc$order]
  segclust  <-  rep(0L, nrow(seg))
  heights   <-  sort(hc$height, decreasing = TRUE)
  height    <-  mean(c(heights[k], heights[k - 1L]), na.rm = TRUE)
  
  for (i in 1:k) {
    xi      <-  hcdata$labels$x[labclust == i]
    idx1    <-  seg$x    >= min(xi) & seg$x    <= max(xi)
    idx2    <-  seg$xend >= min(xi) & seg$xend <= max(xi)
    idx3    <-  seg$yend < height
    idx     <-  idx1 & idx2 & idx3
    segclust[idx] <- i
  }
  
  idx                    <-  which(segclust == 0L)
  segclust[idx]          <-  segclust[idx + 1L]
  hcdata$segments$clust  <-  segclust
  hcdata$segments$line   <-  as.integer(segclust < 1L)
  hcdata$labels$clust    <-  labclust
  
  hcdata
}
set_labels_params <- function(nbLabels,
                              direction = c("tb", "bt", "lr", "rl"),
                              fan       = FALSE) {
  if (fan) {
    angle       <-  360 / nbLabels * 1:nbLabels + 90
    idx         <-  angle >= 90 & angle <= 270
    angle[idx]  <-  angle[idx] + 180
    hjust       <-  rep(0, nbLabels)
    hjust[idx]  <-  1
  } else {
    angle       <-  rep(0, nbLabels)
    hjust       <-  0
    if (direction %in% c("tb", "bt")) { angle <- angle + 45 }
    if (direction %in% c("tb", "rl")) { hjust <- 1 }
  }
  list(angle = angle, hjust = hjust, vjust = 0.5)
}
plot_ggdendro <- function(hcdata,
                          direction   = c("lr", "rl", "tb", "bt"),
                          fan         = FALSE,
                          scale.color = NULL,
                          branch.size = 1,
                          label.size  = 3,
                          nudge.label = 0.01,
                          expand.y    = 0.1) {
  
  direction <- match.arg(direction) # if fan = FALSE
  ybreaks   <- pretty(segment(hcdata)$y, n = 5)
  ymax      <- max(segment(hcdata)$y)
  
  ## branches
  p <- ggplot() +
    geom_segment(data         =  segment(hcdata),
                 aes(x        =  x,
                     y        =  y,
                     xend     =  xend,
                     yend     =  yend,
                     linetype =  factor(line),
                     colour   =  factor(clust)),
                 lineend      =  "round",
                 show.legend  =  FALSE,
                 size         =  branch.size)
  
  ## orientation
  if (fan) {
    p <- p +
      coord_polar(direction = -1) +
      scale_x_continuous(breaks = NULL,
                         limits = c(0, nrow(label(hcdata)))) +
      scale_y_reverse(breaks = ybreaks)
  } else {
    p <- p + scale_x_continuous(breaks = NULL)
    if (direction %in% c("rl", "lr")) {
      p <- p + coord_flip()
    }
    if (direction %in% c("bt", "lr")) {
      p <- p + scale_y_reverse(breaks = ybreaks)
    } else {
      p <- p + scale_y_continuous(breaks = ybreaks)
      nudge.label <- -(nudge.label)
    }
  }
  
  # labels
  labelParams <- set_labels_params(nrow(hcdata$labels), direction, fan)
  hcdata$labels$angle <- labelParams$angle
  
  p <- p +
    geom_text(data        =  label(hcdata),
              aes(x       =  x,
                  y       =  y,
                  label   =  label,
                  colour  =  factor(clust),
                  angle   =  angle),
              vjust       =  labelParams$vjust,
              hjust       =  labelParams$hjust,
              nudge_y     =  ymax * nudge.label,
              size        =  label.size,
              show.legend =  FALSE)
  
  # colors and limits
  if (!is.null(scale.color)) {
    p <- p + scale_color_manual(values = scale.color)
  }
  
  ylim <- -round(ymax * expand.y, 1)
  p    <- p + expand_limits(y = ylim)
  
  p
}