Se hace una preparación de los datos por medio de un dataframe, además, se hace un subset que contenga 10 columnas para posteriormente plantear una comparación entre dendrogramas con su respectiva gráfica.
#install.packages("dendextend")
#install.packages("corrplot")
library(dendextend)
## Warning: package 'dendextend' was built under R version 4.2.3
##
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.2.3
## corrplot 0.92 loaded
#Data preparation
df <- scale(USArrests)
# Subset containing 10 rows
set.seed(123)
ss <- sample(1:50, 10)
df <- df[ss,]
#Comparing dendrograms
# Compute distance matrix
res.dist <- dist(df, method = "euclidean")
# Compute 2 hierarchical clusterings
hc1 <- hclust(res.dist, method = "average")
hc2 <- hclust(res.dist, method = "ward.D2")
# Create two dendrograms
dend1 <- as.dendrogram (hc1)
dend2 <- as.dendrogram (hc2)
# Create a list to hold dendrograms
dend_list <- dendlist(dend1, dend2)
#Visual comparison of two dendrograms
#Draw a tanglegram:
tanglegram(dend1, dend2)
Al haber realizado ahora se genera el mismo gráfico pero cambiando sus caracterĂsticas estĂ©ticas de la siguiente forma.
#Customized the tanglegram using many other options as follow:
tanglegram(dend1, dend2,
highlight_distinct_edges = FALSE, # Turn-off dashed lines
common_subtrees_color_lines = FALSE, # Turn-off line colors
common_subtrees_color_branches = TRUE, # Color common branches
main = paste("Entanglement =", round(entanglement(dend_list), 2))
)
Se consigue la matriz de correlaciĂłn.
#Correlation matrix between a list of dendrograms
# Cophenetic correlation matrix
cor.dendlist(dend_list, method = "cophenetic")
## [,1] [,2]
## [1,] 1.0000000 0.9925544
## [2,] 0.9925544 1.0000000
También se crea una matriz de correlación con el método de Baker
# Baker correlation matrix
cor.dendlist(dend_list, method = "baker")
## [,1] [,2]
## [1,] 1.0000000 0.9895528
## [2,] 0.9895528 1.0000000
Se halla el coeficiente de correlaciĂłn de Cophenetic
# Cophenetic correlation coefficient
cor_cophenetic(dend1, dend2)
## [1] 0.9925544
También se halla el coeficiente de correlación de Baker
# Baker correlation coefficient
cor_bakers_gamma(dend1, dend2)
## [1] 0.9895528
Entonces, con estos datos se comparan los dendogramas
#COMPARING DENDROGRAMS
# Create multiple dendrograms by chaining
dend1 <- df %>% dist %>% hclust("complete") %>% as.dendrogram
dend2 <- df %>% dist %>% hclust("single") %>% as.dendrogram
dend3 <- df %>% dist %>% hclust("average") %>% as.dendrogram
dend4 <- df %>% dist %>% hclust("centroid") %>% as.dendrogram
# Compute correlation matrix
dend_list <- dendlist("Complete" = dend1, "Single" = dend2,
"Average" = dend3, "Centroid" = dend4)
cors <- cor.dendlist(dend_list)
# Print correlation matrix
round(cors, 2)
## Complete Single Average Centroid
## Complete 1.00 0.46 0.45 0.30
## Single 0.46 1.00 0.23 0.17
## Average 0.45 0.23 1.00 0.31
## Centroid 0.30 0.17 0.31 1.00
Se visualiza la matriz de correlaciĂłn usando una funciĂłn del paquete corrplot
# Visualize the correlation matrix using corrplot package
corrplot(cors, "pie", "lower")
En este capĂtulo se explican y presentan mĂ©todos para gráficar o presentar dendogramas de distintas clases.
#install.packages("igraph")
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(dendextend)
library(igraph)
## Warning: package 'igraph' was built under R version 4.2.3
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
# Load data
data(USArrests)
# Compute distances and hierarchical clustering
dd <- dist(scale(USArrests), method = "euclidean")
hc <- hclust(dd, method = "ward.D2")
#Visualizing dendrograms
fviz_dend(hc, cex = 0.5)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <]8;;https://github.com/kassambara/factoextra/issueshttps://github.com/kassambara/factoextra/issues]8;;>.
Es posible usar los argumentos de main, sub, xlab y ylab para cambiar las caracterĂsticas como se muestra a continuaciĂłn.
fviz_dend(hc, cex = 0.5,
main = "Dendrogram - ward.D2",
xlab = "Objects", ylab = "Distance", sub = "")
También se puede visualizar de manera horizontal.
#To draw a horizontal dendrogram, type this:
fviz_dend(hc, cex = 0.5, horiz = TRUE)
TambiĂ©n es posible cortar el árbol a una altura determinada para particionar los datos en varios grupos, como se describe en el capĂtulo anterior: AgrupaciĂłn jerárquica (CapĂtulo 7). En este caso, es posible colorear ramas alrededor de cada grupo.
#To change the plot theme
fviz_dend(hc, k = 4, # Cut in four groups
cex = 0.5, # label size
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
color_labels_by_k = TRUE, # color labels by groups
ggtheme = theme_gray() # Change theme
)
Para un cambio estético, en el código a continuación, se cambiaran los colores del grupo usando la paleta de colores “jco” (journal of clinical oncology):
#
fviz_dend(hc, cex = 0.5, k = 4, # Cut in four groups
k_colors = "jco")
También se puede representar con cuadros y de manera horizontal.
fviz_dend(hc, k = 4, cex = 0.4, horiz = TRUE, k_colors = "jco",
rect = TRUE, rect_border = "jco", rect_fill = TRUE)
Además, se puece crear un dendograma horizontal añdiendo la opción _circular” al código.
fviz_dend(hc, cex = 0.5, k = 4,
k_colors = "jco", type = "circular")
Otras formas de visualizaciĂłn como la siguiente son posibles.
fviz_dend(hc, k = 4, k_colors = "jco",
type = "phylogenic", repel = TRUE)
# Case of dendrogram with large data sets
#Zooming in the dendrogram
fviz_dend(hc, xlim = c(1, 20), ylim = c(1, 8))
#Plotting a sub-tree of dendrograms
# Create a plot of the whole dendrogram,
# and extract the dendrogram data
dend_plot <- fviz_dend(hc, k = 4, # Cut in four groups
cex = 0.5, # label size
k_colors = "jco"
)
dend_data <- attr(dend_plot, "dendrogram") # Extract dendrogram data
# Cut the dendrogram at height h = 10
dend_cuts <- cut(dend_data, h = 10)
# Visualize the truncated version containing
# two branches
fviz_dend(dend_cuts$upper)
## Warning in min(-diff(our_dend_heights)): ningĂşn argumento finito para min;
## retornando Inf
A pesar de lo realizado anteriormente ahora se presenta el digarama completo.
# Plot the whole dendrogram
print(dend_plot)
Ahora se realiza el primer sub-árbol.
# Plot subtree 1
fviz_dend(dend_cuts$lower[[1]], main = "Subtree 1")
A continuación, de la misma manera, se presenta el segundo sub-árbol.
# Plot subtree 2
fviz_dend(dend_cuts$lower[[2]], main = "Subtree 2")
Esto se puede hacer de manera circular también.
#You can also plot circular trees as follow:
fviz_dend(dend_cuts$lower[[2]], type = "circular")
Con el paquete dendextend también es posible manipular y crear dendogramas. A continuación se presenta un ejemplo de como realizar esto.
#Manipulating dendrograms using dendextend
#Standard R code for creating a dendrogram:
data <- scale(USArrests)
dist.res <- dist(data)
hc <- hclust(dist.res, method = "ward.D2")
dend <- as.dendrogram(hc)
plot(dend)
#R code for creating a dendrogram using chaining operator
library(dendextend)
dend <- USArrests[1:5,] %>% # data
scale %>% # Scale the data
dist %>% # calculate a distance matrix,
hclust(method = "ward.D2") %>% # Hierarchical clustering
as.dendrogram # Turn the object into a dendrogram.
plot(dend)
Otro ejemplo del uso de dendextend se presenta aquĂ.
#Example
library(dendextend)
# 1. Create a customized dendrogram
mycols <- c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07")
dend <- as.dendrogram(hc) %>%
set("branches_lwd", 1) %>% # Branches line width
set("branches_k_color", mycols, k = 4) %>% # Color branches by groups
set("labels_colors", mycols, k = 4) %>% # Color labels by groups
set("labels_cex", 0.5) # Change label size
# 2. Create plot
fviz_dend(dend)
Como en los casos anteriores se preparan los datos y se descargan los paquetes correspondientes.
df <- scale(mtcars)
# Default plot
heatmap(df, scale = "none")
Se pueden añadir distintos colores de la siguiente forma
col<- colorRampPalette(c("red", "white", "blue"))(256)
#install.packages("RColorBrewer")
library("RColorBrewer")
col <- colorRampPalette(brewer.pal(10, "RdYlBu"))(256)
# Use RColorBrewer color palette names
library("RColorBrewer")
col <- colorRampPalette(brewer.pal(10, "RdYlBu"))(256)
heatmap(df, scale = "none", col = col,
RowSideColors = rep(c("blue", "pink"), each = 16),
ColSideColors = c(rep("purple", 5), rep("orange", 6)))
También es posible utilizar la función del paquete ggplot llamada heatmap.2()
# install.packages("gplots")
library("gplots")
## Warning: package 'gplots' was built under R version 4.2.3
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
heatmap.2(df, scale = "none", col = bluered(100),
trace = "none", density.info = "none")
Al igual que en los casos anteriores es posible mejorar la imagen de estos mapas por medio de la funciĂłn pheatmap() como se muestra a continuaciĂłn.
#install.packages("pheatmap")
library("pheatmap")
## Warning: package 'pheatmap' was built under R version 4.2.3
pheatmap(df, cutree_rows = 4)
También es posible usar el paquete dentextend para mejorar los heatmaps
library(dendextend)
# order for rows
Rowv <- mtcars %>% scale %>% dist %>% hclust %>% as.dendrogram %>%
set("branches_k_color", k = 3) %>% set("branches_lwd", 1.2) %>%
ladderize
# Order for columns: We must transpose the data
Colv <- mtcars %>% scale %>% t %>% dist %>% hclust %>% as.dendrogram %>%
set("branches_k_color", k = 2, value = c("orange", "blue")) %>%
set("branches_lwd", 1.2) %>%
ladderize
En la anteriores lĂneas de cĂłdigo se organizĂł la informaciĂłn, a continuaciĂłn se realizará el gráfico.
library(gplots)
heatmap.2(scale(mtcars), scale = "none", col = bluered(100),
Rowv = Rowv, Colv = Colv,
trace = "none", density.info = "none")
Para realizar heatmaps complejos es necesario empleear la siguiente lĂnea de cĂłdigo.
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("ComplexHeatmap")
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.14.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). Most of the arguments
## in the original pheatmap() are identically supported in the new function. You
## can still use the original function by explicitly calling pheatmap::pheatmap().
##
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:pheatmap':
##
## pheatmap
Heatmap(df,
name = "mtcars", #title of legend
column_title = "Variables", row_title = "Samples",
row_names_gp = gpar(fontsize = 7) # Text size for row names
)
library(circlize)
## Warning: package 'circlize' was built under R version 4.2.3
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
##
## Attaching package: 'circlize'
## The following object is masked from 'package:igraph':
##
## degree
mycols <- colorRamp2(breaks = c(-2, 0, 2),
colors = c("green", "white", "red"))
Heatmap(df, name = "mtcars", col = mycols)
library("circlize")
library("RColorBrewer")
Heatmap(df, name = "mtcars",
col = colorRamp2(c(-2, 0, 2), brewer.pal(n=3, name="RdBu")))
library(dendextend)
row_dend = hclust(dist(df)) # row clustering
col_dend = hclust(dist(t(df))) # column clustering
Heatmap(df, name = "mtcars",
row_names_gp = gpar(fontsize = 6.5),
cluster_rows = color_branches(row_dend, k = 4),
cluster_columns = color_branches(col_dend, k = 2))
Para dividir en dos o más parte un heatmap el procedimiento a seguir es el siguiente.
# Divide into 2 groups
set.seed(2)
Heatmap(df, name = "mtcars", k = 2)
# split by a vector specifying rowgroups
Heatmap(df, name = "mtcars", split = mtcars$cyl,
row_names_gp = gpar(fontsize = 7))
También es posible hacer particiones en una serie definida de la siguiente forma.
# Split by combining multiple variables
Heatmap(df, name ="mtcars",
split = data.frame(cyl = mtcars$cyl, am = mtcars$am),
row_names_gp = gpar(fontsize = 7))
Es posible realizar una o una serie de anotaciones dentro del “heatmap”.
# Define some graphics to display the distribution of columns
.hist = anno_histogram(df, gp = gpar(fill = "lightblue"))
.density = anno_density(df, type = "line", gp = gpar(col = "blue"))
ha_mix_top = HeatmapAnnotation(
hist = .hist, density = .density,
height = unit(3.8, "cm")
)
# Define some graphics to display the distribution of rows
.violin = anno_density(df, type = "violin",
gp = gpar(fill = "lightblue"), which = "row")
.boxplot = anno_boxplot(df, which = "row")
ha_mix_right = HeatmapAnnotation(violin = .violin, bxplt = .boxplot,
which = "row", width = unit(4, "cm"))
# Combine annotation with heatmap
Heatmap(df, name = "mtcars",
column_names_gp = gpar(fontsize = 8),
top_annotation = ha_mix_top) + ha_mix_right
Es posible combinar varios heatmaps por medio del siguiente cĂłdigo.
# Heatmap 1
ht1 = Heatmap(df, name = "ht1", km = 2,
column_names_gp = gpar(fontsize = 9))
# Heatmap 2
ht2 = Heatmap(df, name = "ht2",
col = circlize::colorRamp2(c(-2, 0, 2), c("green", "white", "red")),
column_names_gp = gpar(fontsize = 9))
# Combine the two heatmaps
ht1 + ht2
Además de lo visto anteriormente también es factible realizar una matriz de expresión génica. Esto significa que en los datos de expresión génica las filas son genes y las columans son muestras. Esto implica que se puede adjuntar más información sobre los genes después del mapa de calor de expresiones, como la longitud del gent y el tipo de genes.
expr <- readRDS(paste0(system.file(package = "ComplexHeatmap"),
"/extdata/gene_expression.rds"))
mat <- as.matrix(expr[, grep("cell", colnames(expr))])
type <- gsub("s\\d+_", "", colnames(mat))
ha = HeatmapAnnotation(
df = data.frame(type = type),
annotation_height = unit(4, "mm")
)
Heatmap(mat, name = "expression", km = 5, top_annotation = ha,
show_row_names = FALSE, show_column_names = FALSE) +
Heatmap(expr$length, name = "length", width = unit(5, "mm"),
col = circlize::colorRamp2(c(0, 100000), c("white", "orange"))) +
Heatmap(expr$type, name = "type", width = unit(5, "mm")) +
Heatmap(expr$chr, name = "chr", width = unit(5, "mm"),
col = circlize::rand_color(length(unique(expr$chr))))