dendro_data_k <- function(hc, k) {
hcdata <- ggdendro::dendro_data(hc, type = "rectangle")
seg <- hcdata$segments
labclust <- cutree(hc, k)[hc$order]
segclust <- rep(0L, nrow(seg))
heights <- sort(hc$height, decreasing = TRUE)
height <- mean(c(heights[k], heights[k - 1L]), na.rm = TRUE)
for (i in 1:k) {
xi <- hcdata$labels$x[labclust == i]
idx1 <- seg$x >= min(xi) & seg$x <= max(xi)
idx2 <- seg$xend >= min(xi) & seg$xend <= max(xi)
idx3 <- seg$yend < height
idx <- idx1 & idx2 & idx3
segclust[idx] <- i
}
idx <- which(segclust == 0L)
segclust[idx] <- segclust[idx + 1L]
hcdata$segments$clust <- segclust
hcdata$segments$line <- as.integer(segclust < 1L)
hcdata$labels$clust <- labclust
hcdata
}
###################################cluster
set_labels_params <- function(nbLabels,
direction = c("tb", "bt", "lr", "rl"),
fan = FALSE) {
if (fan) {
angle <- 360 / nbLabels * 1:nbLabels + 90
idx <- angle >= 90 & angle <= 270
angle[idx] <- angle[idx] + 180
hjust <- rep(0, nbLabels)
hjust[idx] <- 1
} else {
angle <- rep(0, nbLabels)
hjust <- 0
if (direction %in% c("tb", "bt")) { angle <- angle + 45 }
if (direction %in% c("tb", "rl")) { hjust <- 1 }
}
list(angle = angle, hjust = hjust, vjust = 0.5)
}
plot_ggdendro_multi <- function(hcdata,
direction = c("lr", "rl", "tb", "bt"),
fan = FALSE,
scale.color = NULL,
branch.size = 1,
label.size = 3,
nudge.label = 0.01,
expand.y = 0.1) {
direction <- match.arg(direction) # if fan = FALSE
ybreaks <- pretty(segment(hcdata)$y, n = 5)
ymax <- max(segment(hcdata)$y)
## branches
p <- ggplot() +
geom_segment(data = segment(hcdata),
aes(x = x,
y = y,
xend = xend,
yend = yend,
linetype = "solid",
colour = factor(clust)),
lineend = "round",
show.legend = FALSE,
size = branch.size)
## orientation
if (fan) {
p <- p +
coord_polar(direction = -1) +
scale_x_continuous(breaks = NULL,
limits = c(0, nrow(label(hcdata)))) +
scale_y_reverse(breaks = ybreaks)
} else {
p <- p + scale_x_continuous(breaks = NULL)
if (direction %in% c("rl", "lr")) {
p <- p + coord_flip()
}
if (direction %in% c("bt", "lr")) {
p <- p + scale_y_reverse(breaks = ybreaks)
} else {
p <- p + scale_y_continuous(breaks = ybreaks)
nudge.label <- -(nudge.label)
}
}
# labels
labelParams <- set_labels_params(nrow(hcdata$labels), direction, fan)
hcdata$labels$angle <- labelParams$angle
p <- p +
geom_text(data = label(hcdata),
aes(x = x,
y = y,
label = label,
angle = angle),
colour = "red",
vjust = labelParams$vjust,
hjust = labelParams$hjust,
nudge_y = ymax * nudge.label,
size = label.size,
show.legend = FALSE)
# colors and limits
if (!is.null(scale.color)) {
p <- p + scale_color_manual(values = scale.color)
}
ylim <- -round(ymax * expand.y, 1)
p <- p + expand_limits(y = ylim)
p
}
library(ggdendro)
library(ggplot2)
################################################################
data(USArrests)
dim(USArrests)
## [1] 50 4
#1.1 Compute distances and hierarchical clustering
dd <- dist(scale(USArrests), method = "euclidean")
hc <- hclust(dd, method = "ward.D2")
#Basic dendrogram
hcdata <- dendro_data_k(hc, 4)
##############################################
head(hcdata$segments)
## x y xend yend clust line
## 1 19.771484 13.516242 8.867188 13.516242 0 1
## 2 8.867188 13.516242 8.867188 6.461866 0 1
## 3 8.867188 6.461866 4.125000 6.461866 1 0
## 4 4.125000 6.461866 4.125000 2.714554 1 0
## 5 4.125000 2.714554 2.500000 2.714554 1 0
## 6 2.500000 2.714554 2.500000 1.091092 1 0
#View(hcdata$segments)
hcdata_1 <- hcdata$segments
hcdata_1$clust <- factor(hcdata_1$clust)
hcdata_2 <- data.frame(clust = unique(hcdata_1$clust), level = levels(hcdata_1$clust))
hcdata_3 <- merge(hcdata_1, hcdata_2, by = "clust")
hcdata_3$clust <- hcdata_3$level
hcdata_3$level <- NULL
hcdata$segments <- hcdata_3
##############################################
df1 <- label(hcdata)
df2 <- df1
df2$clust <- factor(df2$clust)
df3 <- data.frame(clust = unique(df1$clust), level = levels(df2$clust))
df4 <- merge(df2, df3, by = "clust")
df4$clust <- df4$level
df4$level <- NULL
str(df4)
## 'data.frame': 50 obs. of 4 variables:
## $ clust: chr "1" "1" "1" "1" ...
## $ x : num 1 2 3 4 5 6 7 8 9 10 ...
## $ y : num 0 0 0 0 0 0 0 0 0 0 ...
## $ label: chr "Alabama" "Louisiana" "Georgia" "Tennessee" ...
df4$clust <- as.numeric(df4$clust)
#View(df4)
write.csv(df4, paste0(Sys.Date(),"-","cluster_info.csv"),row.names = FALSE)
#####################################
d <- df4[,c("x","clust")]
d1 <- aggregate(d[1], list(clust = d$clust), FUN = max)
dim(d)
## [1] 50 2
d2 <- d[!d$x %in% d1$x,]
dim(d2)
## [1] 46 2
str(d2)
## 'data.frame': 46 obs. of 2 variables:
## $ x : num 1 2 3 4 5 6 8 9 10 11 ...
## $ clust: num 1 1 1 1 1 1 2 2 2 2 ...
d2 <- d2[order(d2$clust),]
d3 <- aggregate(d[1],
list(clust = d$clust), FUN = mean)
d3
## clust x
## 1 1 4.0
## 2 2 13.5
## 3 3 25.5
## 4 4 41.0
######################################PLOT
cols <- rainbow(length(unique(d$clust))+1)
p1 <- plot_ggdendro_multi(hcdata,
direction = "lr",
branch.size = 0.5,
scale.color = cols,
expand.y = 0)
p1 + scale_x_continuous(breaks = NULL,
limits = c(0, nrow(label(hcdata)))) +
geom_segment(data = d2,
mapping = aes(x = x, y = -0.5,
xend = x+1, yend = -0.5,color = factor(clust)),
size = 0.5) +
geom_text(data = d3, aes(x = x, y = -2,label = clust, angle = 0)) +
coord_polar(direction = -1) +
theme_void() +
theme(legend.position = "none")
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

###########################original_2
plot_ggdendro_multi(dendro_data_k(hc, 4),
fan = TRUE,
scale.color = cols,
label.size = 4,
nudge.label = 0.02,
expand.y = 0.4) + theme_void()

######################original_1
ggdendrogram(hc, rotate = F, theme_dendro = F)

ggsave(filename = paste0(Sys.Date(),"-","-cluter-original.tif"),
plot = last_plot(), device = "tiff", path = NULL,
width =13, height = 13, units = "cm",
dpi = 300, limitsize = TRUE, compression = "lzw")
ggsave(filename = paste0(Sys.Date(),"-","-cluter-original.pdf"),
plot = last_plot(), device = "pdf", path = NULL,
width = 15, height = 25, units = "cm")
##ref https://rpubs.com/TX-YXL/662586