https://cran.r-project.org/web/packages/superheat/index.html

A new package for creating heatmaps was recently published by Rebecca Barter. As its name would suggest, the superheat package could be the most comprehensive tool to develop and customize heatmaps from your data. The package allows extending the traditional heatmap by multiple visual elements, such as dendrogram, scatter plot, smoothing curve, correlation bars, text labels and more.

In this document, we will explore the performance of superheat package. The “golub” dataset will be used for our demonstration. This package has been used by Pr. V.Tuan Nguyen for his Machine Learning and Quantitative analysis Workshop in Vietnam. The dataset contains gene expression of 3571 markers assessed in 72 patients with leukemia and acute myeloid (Science, 1999). The target variable Y represents the prognosis of these patients.

First, we will use the feature filering function of mlr package to identify the 100 most important genes for making the prognosis (Y). On this filtered dataset we will develop the heatmaps using superheat package.

library(tidyverse)

data0=read.csv("golub.csv")%>%as_tibble()
data0$Y=as.factor(data0$Y)

library(mlr)
task.markers=makeClassifTask(id="Markers",data=data0[,-c(1:5)],target="Y",positive = "1")

filtered.task=filterFeatures(task.markers, method="rf.importance", abs=100)

df100=filtered.task$env$data%>%as_tibble()

df100$PS=data0$PS
df100$gender=data0$Gender
df100$TBCell=data0$T.B.cell
df100$id=data0$id

The Basic heatmap

library(superheat)

superheat(df100[c(1:50),c(1:50)],
          left.label.size = 0.05,
          bottom.label.size = 0.05,
          bottom.label.text.angle = 90)

Reordering the row and column

superheat(df100[c(1:50),c(1:50)],
          pretty.order.rows = TRUE,
          pretty.order.cols = TRUE,
          left.label.size = 0.03,
          bottom.label.size = 0.03,
          bottom.label.text.angle = 90)

Customizing the color palette

library(viridis)

superheat(df100[c(1:50),c(1:50)],
          pretty.order.rows = TRUE,
          pretty.order.cols = TRUE,
          left.label.size = 0.03,
          bottom.label.size = 0.03,
          bottom.label.text.angle = 90,
          heat.pal = viridis::magma(100),legend.height=0.08
          )

Adding a dendrogram

superheat(df100[c(1:50),c(1:50)],
          pretty.order.rows = TRUE,
          pretty.order.cols = TRUE,
          left.label.size = 0.03,
          bottom.label.size = 0.03,
          bottom.label.text.angle = 90,
          heat.pal = viridis::inferno(100),
          row.dendrogram = TRUE,col.dendrogram = TRUE
)

Clustering by column

superheat(df100[c(1:50),c(1:50)],
          pretty.order.rows = TRUE,
          pretty.order.cols = TRUE,
          left.label.size = 0.03,
          bottom.label.size = 0.03,
          bottom.label.text.angle = 45,
          heat.pal = viridis::inferno(100),
          n.clusters.cols =4
)

Clustering by both column and row

superheat(df100[c(1:50),c(1:50)],
          left.label.size = 0.03,
          bottom.label.size = 0.03,
          bottom.label.text.angle = 45,
          heat.pal = viridis::inferno(100),
          n.clusters.cols =4,n.clusters.rows = 2
)

Clustering by a factor variable

superheat(df100[,c(1:50)],
          pretty.order.cols = TRUE,
          left.label.size = 0.1,
          bottom.label.size = 0.03,
          left.label.text.angle =90,
          heat.pal = viridis::magma(100),
          membership.rows=df100$Y,n.clusters.cols = 4
)

Adding a supplementary scatter dot plot

superheat(df100[,c(1:50)],
          yr = df100$PS,yr.axis.name = "PS",
          left.label.size = 0.1,
          bottom.label.size = 0.04,
          bottom.label.text.angle =90,n.clusters.cols =4,n.clusters.rows = 2,
          heat.pal = viridis::viridis(100)
)

Clustering the scatter dot plot

superheat(df100[,c(1:50)],
          yr = df100$PS,yr.axis.name = "PS",yr.cluster.col = c("#d82222", "#831aad", "#a50859"),
          left.label.size = 0.1,
          bottom.label.size = 0.04,
          bottom.label.text.angle =90,n.clusters.cols =4,n.clusters.rows = 3,
          heat.pal = viridis::magma(100)
)

Adding a line graph

superheat(df100[,c(1:50)],
          yr = df100$PS,yr.axis.name = "PS",yr.plot.type = "line",yr.cluster.col = c("#d82222", "#831aad", "#a50859"),
          left.label.size = 0.1,
          bottom.label.size = 0.04,
          bottom.label.text.angle =90,n.clusters.cols =4,n.clusters.rows = 3,
          heat.pal = viridis::magma(100)
)

Adding a smooth curve

superheat(df100[,c(1:50)],
          yr = df100$PS,yr.axis.name = "PS",yr.plot.type = "smooth",yr.line.col = "blue4",
          left.label.size = 0.001,
          bottom.label.size = 0.001,bottom.label.text.angle =90,
          heat.pal = viridis::viridis(100)
)

Adding a scatter line plot

superheat(df100[,c(1:50)],
          yr = df100$PS,yr.axis.name = "PS",yr.plot.type = "scatterline",yr.line.col = "red4",yr.obs.col = rep("gold", nrow(df100)),
          left.label.size = 0.001,
          bottom.label.size = 0.001,bottom.label.text.angle =90,
          heat.pal = viridis::plasma(100)
)

Adding a Bar plot

superheat(df100[,c(1:50)],
          yr = df100$PS,yr.axis.name = "PS",yr.plot.type = "bar",
          yr.bar.col = "black",
          yr.obs.col = rep("#d82222", nrow(df100)),
          left.label.size = 0.001,
          bottom.label.size = 0.001,bottom.label.text.angle =90,
          heat.pal = viridis::magma(100)
)

Adding a Box-plot

superheat(df100[,c(1:50)],
          membership.rows = paste(df100$TBCell,"TBCell"),
          yr=df100$PS,
          yr.axis.name = "PS",
          yr.plot.type = "boxplot",
          yr.cluster.col = c("#d82222", "#831aad", "#a50859"),
          left.label.size = 0.001,
          bottom.label.size = 0.001,bottom.label.text.angle =90,
          heat.pal = viridis::magma(100)
)

Adding a correlation bar plot

superheat(df100[,c(1:50)],
          yr =df100$PS,
          yr.axis.name = "PS",
          yt = cor(df100[,c(1:50)],df100$PS),yr.cluster.col = c("#d82222", "#831aad", "#a50859"),yt.cluster.col = c("gold", "orange", "red3","purple"),
          yt.plot.type = "bar",yt.axis.size = 10,
          yt.axis.name = "Correlation\nwith PS",
          left.label.size = 0.1,
          bottom.label.size = 0.04,
          bottom.label.text.angle =90,n.clusters.cols =4,n.clusters.rows = 3,
          heat.pal = viridis::magma(100)
)

Adding text labels

superheat(df100[c(1:15),c(1:10)],
          left.label.size = 0.08,
          bottom.label.size = 0.2,
          bottom.label.text.angle =90,
          heat.pal = viridis::magma(100),
          X.text = round(as.matrix(df100[c(1:15),c(1:10)]),1),X.text.col="white",
          X.text.size =3)

Now you have seen everything the superheat can do, it’s your turn to adopt this useful package to your works.

Thank you and have a nice day !