需求与目的

      在做基因表达或各类定量分析时,我们常常会使用热图来展示多组数据的差异。
      热图可以直观地呈现行与列之间的聚类关系,帮助我们快速发现潜在模式或分组特征。然而,面对一长串难以辨识的基因 ID 或代码编号时,阅读与解读体验往往并不友好。为增强数据可读性,我们常常需要将这些原始 ID替换为更容易理解或更有意义的标签,如基因符号(symbol)或其他关键信息。
      本篇内容将结合 R 语言,分享如何在热图绘制完毕后,直接对图中已有的标签进行替换,让你在不修改原始矩阵的前提下,快速“矫正”热图行名(或列名),并进一步体会 R 语言绘图背后的灵活与强大。

# 加载必要的包
rm(list=ls())
library(pheatmap)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
set.seed(123)  # 设置随机种子以确保结果可重复
  1. 生成示例数据
# 创建一些模拟的基因ID(使用ENSG格式)
gene_ids = paste0("ENSG000000", sprintf("%05d", 1:20))
# 创建示例表达量数据矩阵(20个基因,6个样本)
sample_names = paste0("Sample_", 1:6)
expression_data = matrix(rnorm(20 * 6, mean = 10, sd = 2),
                         nrow = 20,
                         ncol = 6)
rownames(expression_data) = gene_ids
colnames(expression_data) = sample_names
  1. 创建模拟的基因ID到symbol的映射
gene_symbols = c("TP53", "BRCA1", "EGFR", "KRAS", "PTEN",
                 "MYC", "VEGFA", "IL6", "TNF", "MAPK1",
                 "AKT1", "MTOR", "STAT3", "NFkB1", "CDKN2A",
                 "BCL2", "CASP3", "SOX2", "NOTCH1", "HIF1A")
id_symbol_mapping = data.frame(ensembl_gene_id = gene_ids,
                               external_gene_name = gene_symbols)
  1. 首先使用基因ID绘制热图
# pdf("original_heatmap.pdf", width = 10, height = 12)
original_heatmap = pheatmap(expression_data,
                            scale = "row",  # 按行进行z-score标准化
                            cluster_rows = TRUE,
                            cluster_cols = TRUE,
                            show_rownames = TRUE,
                            show_colnames = TRUE,
                            main = "Expression Heatmap (Gene IDs)",
                            fontsize_row = 10,
                            fontsize_col = 10)
print(original_heatmap)

# dev.off()
  1. 创建ID到symbol的映射向量
symbol_vector = id_symbol_mapping$external_gene_name
names(symbol_vector) = id_symbol_mapping$ensembl_gene_id
  1. 使用gene symbols重新绘制热图
# 创建新的表达矩阵,只改变行名
expression_data_symbols = expression_data
rownames(expression_data_symbols) = symbol_vector[rownames(expression_data)]
# pdf("symbol_heatmap.pdf", width = 10, height = 12)
symbol_heatmap = pheatmap(expression_data_symbols,
                          scale = "row",
                          cluster_rows = TRUE,
                          cluster_cols = TRUE,
                          show_rownames = TRUE,
                          show_colnames = TRUE,
                          main = "Expression Heatmap (Gene Symbols)",
                          fontsize_row = 10,
                          fontsize_col = 10)
print(symbol_heatmap)

# dev.off()
  1. 保存原始数据供后续使用
save(expression_data, id_symbol_mapping, file = "heatmap_data.RData")
  1. 自定义热图显示基因
# 添加注释行
annotation_row = data.frame(GeneType = factor(sample(c("Oncogene", "Tumor Suppressor"), 20, replace = TRUE)))
rownames(annotation_row) = rownames(expression_data)
# 设置颜色
ann_colors = list(GeneType = c(Oncogene = "red", `Tumor Suppressor` = "blue"))
# 带注释的热图
# pdf("annotated_heatmap.pdf", width = 12, height = 12)
annotated_heatmap = pheatmap(expression_data_symbols,
                             scale = "row",
                             annotation_row = annotation_row,
                             annotation_colors = ann_colors,
                             cluster_rows = TRUE,
                             cluster_cols = TRUE,
                             show_rownames = TRUE,
                             show_colnames = TRUE,
                             main = "Annotated Expression Heatmap (Gene Symbols)",
                             fontsize_row = 10,
                             fontsize_col = 10)
print(annotated_heatmap)

# dev.off()