在做基因表达或各类定量分析时,我们常常会使用热图来展示多组数据的差异。
热图可以直观地呈现行与列之间的聚类关系,帮助我们快速发现潜在模式或分组特征。然而,面对一长串难以辨识的基因
ID
或代码编号时,阅读与解读体验往往并不友好。为增强数据可读性,我们常常需要将这些原始
ID替换为更容易理解或更有意义的标签,如基因符号(symbol)或其他关键信息。
本篇内容将结合 R
语言,分享如何在热图绘制完毕后,直接对图中已有的标签进行替换,让你在不修改原始矩阵的前提下,快速“矫正”热图行名(或列名),并进一步体会
R 语言绘图背后的灵活与强大。
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# 创建一些模拟的基因ID(使用ENSG格式)
gene_ids = paste0("ENSG000000", sprintf("%05d", 1:20))
# 创建示例表达量数据矩阵(20个基因,6个样本)
sample_names = paste0("Sample_", 1:6)
expression_data = matrix(rnorm(20 * 6, mean = 10, sd = 2),
nrow = 20,
ncol = 6)
rownames(expression_data) = gene_ids
colnames(expression_data) = sample_namesgene_symbols = c("TP53", "BRCA1", "EGFR", "KRAS", "PTEN",
"MYC", "VEGFA", "IL6", "TNF", "MAPK1",
"AKT1", "MTOR", "STAT3", "NFkB1", "CDKN2A",
"BCL2", "CASP3", "SOX2", "NOTCH1", "HIF1A")
id_symbol_mapping = data.frame(ensembl_gene_id = gene_ids,
external_gene_name = gene_symbols)# pdf("original_heatmap.pdf", width = 10, height = 12)
original_heatmap = pheatmap(expression_data,
scale = "row", # 按行进行z-score标准化
cluster_rows = TRUE,
cluster_cols = TRUE,
show_rownames = TRUE,
show_colnames = TRUE,
main = "Expression Heatmap (Gene IDs)",
fontsize_row = 10,
fontsize_col = 10)
print(original_heatmap)symbol_vector = id_symbol_mapping$external_gene_name
names(symbol_vector) = id_symbol_mapping$ensembl_gene_id# 创建新的表达矩阵,只改变行名
expression_data_symbols = expression_data
rownames(expression_data_symbols) = symbol_vector[rownames(expression_data)]
# pdf("symbol_heatmap.pdf", width = 10, height = 12)
symbol_heatmap = pheatmap(expression_data_symbols,
scale = "row",
cluster_rows = TRUE,
cluster_cols = TRUE,
show_rownames = TRUE,
show_colnames = TRUE,
main = "Expression Heatmap (Gene Symbols)",
fontsize_row = 10,
fontsize_col = 10)
print(symbol_heatmap)# 添加注释行
annotation_row = data.frame(GeneType = factor(sample(c("Oncogene", "Tumor Suppressor"), 20, replace = TRUE)))
rownames(annotation_row) = rownames(expression_data)
# 设置颜色
ann_colors = list(GeneType = c(Oncogene = "red", `Tumor Suppressor` = "blue"))
# 带注释的热图
# pdf("annotated_heatmap.pdf", width = 12, height = 12)
annotated_heatmap = pheatmap(expression_data_symbols,
scale = "row",
annotation_row = annotation_row,
annotation_colors = ann_colors,
cluster_rows = TRUE,
cluster_cols = TRUE,
show_rownames = TRUE,
show_colnames = TRUE,
main = "Annotated Expression Heatmap (Gene Symbols)",
fontsize_row = 10,
fontsize_col = 10)
print(annotated_heatmap)