library(pheatmap)
Step-by-step heatmap tutorial with pheatmap()
|
|
original tutorial I followed: https://biostatsquid.com/step-by-step-heatmap-tutorial-with-pheatmap/
pheatmap package
synthesise gene expression test data
<- 50
N_GENES <- 10
N_OBSERVATIONS
<- matrix(rnorm(500), N_GENES, N_OBSERVATIONS)
geData colnames(geData) <- paste0("Sample_", 1:N_OBSERVATIONS)
rownames(geData) <- paste0("Gene_", 1:N_GENES)
head(geData)
Sample_1 Sample_2 Sample_3 Sample_4 Sample_5 Sample_6
Gene_1 -0.6991055 1.14613447 -1.1802587 -0.6959333 -0.78744135 -0.20791640
Gene_2 0.2742142 0.05545421 -0.8683816 0.3068968 0.86633898 0.78012622
Gene_3 -0.3837213 0.67378293 1.5813930 0.7630419 1.74542758 -0.47304076
Gene_4 1.9310654 2.38825352 0.2484976 -2.0303287 -1.16369394 -1.33636199
Gene_5 1.4499932 -0.07189191 -0.8252876 -0.8925559 0.92687998 0.79835607
Gene_6 -0.6259183 -1.15188830 -0.8685327 -0.5698344 -0.09392029 -0.03969982
Sample_7 Sample_8 Sample_9 Sample_10
Gene_1 -0.75566072 -0.26539055 0.3108790 -0.7443402
Gene_2 -0.61100052 0.85495888 0.1907298 -0.2972436
Gene_3 0.03476893 -0.94139247 -1.0419336 -0.4293556
Gene_4 0.14741243 0.03587988 0.8580009 -0.4563711
Gene_5 0.13428051 0.03744672 0.6608050 0.8179740
Gene_6 -1.50026414 1.83770970 0.1826200 1.1988738
annotations
create a data.table
for sample/column annotation
<- data.table(
ann_dt Group = rep(
c("Disease", "Control"),
c(N_OBSERVATIONS/2, N_OBSERVATIONS/2)
),Lymphocyte_count = rnorm(N_OBSERVATIONS)
)row.names(ann_dt) <- colnames(geData)
print(ann_dt)
Group Lymphocyte_count
<char> <num>
1: Disease -1.08979285
2: Disease -0.26269777
3: Disease 0.01970315
4: Disease -0.43202829
5: Disease -2.85344727
6: Control -0.72119582
7: Control 0.19682254
8: Control 0.93658749
9: Control 0.04796364
10: Control -0.23197666
create a data.table
for feature/row annotation
<- data.table(
gene_functions_dt gene_functions = rep(c('Oxidative_phosphorylation',
'Cell_cycle',
'Immune_regulation',
'Signal_transduction',
'Transcription'), rep(N_GENES/5, 5))
)row.names(gene_functions_dt) <- rownames(geData)
print(gene_functions_dt)
gene_functions
<char>
1: Oxidative_phosphorylation
2: Oxidative_phosphorylation
3: Oxidative_phosphorylation
4: Oxidative_phosphorylation
5: Oxidative_phosphorylation
6: Oxidative_phosphorylation
7: Oxidative_phosphorylation
8: Oxidative_phosphorylation
9: Oxidative_phosphorylation
10: Oxidative_phosphorylation
11: Cell_cycle
12: Cell_cycle
13: Cell_cycle
14: Cell_cycle
15: Cell_cycle
16: Cell_cycle
17: Cell_cycle
18: Cell_cycle
19: Cell_cycle
20: Cell_cycle
21: Immune_regulation
22: Immune_regulation
23: Immune_regulation
24: Immune_regulation
25: Immune_regulation
26: Immune_regulation
27: Immune_regulation
28: Immune_regulation
29: Immune_regulation
30: Immune_regulation
31: Signal_transduction
32: Signal_transduction
33: Signal_transduction
34: Signal_transduction
35: Signal_transduction
36: Signal_transduction
37: Signal_transduction
38: Signal_transduction
39: Signal_transduction
40: Signal_transduction
41: Transcription
42: Transcription
43: Transcription
44: Transcription
45: Transcription
46: Transcription
47: Transcription
48: Transcription
49: Transcription
50: Transcription
gene_functions
create a list of classification colour schemes for various annotations
Reminder: named character vectors (or named vectors) allow you to associate names with values in a vector. This feature is useful when you want to access or refer to elements by name rather than by their position (index) in the vector.
<- list(
ann_colors
gene_functions = c(
"Oxidative_phosphorylation" = "#F46D43",
"Cell_cycle" = "#708238",
"Immune_regulation" = "#9E0142",
"Signal_transduction" = "beige",
"Transcription" = "violet"),
DCGroup = c("Disease" = "darkgreen",
"Control" = "blueviolet"),
Lymphocyte_count = brewer.pal(5, 'PuBu')
)
Basic Heatmap
<- pheatmap(geData,
heat_plot col = brewer.pal(10, 'RdYlGn'), # choose a colour scale for your data
cluster_rows = T, cluster_cols = T, # set to FALSE if you want to remove the dendograms
clustering_distance_cols = 'euclidean',
clustering_distance_rows = 'euclidean',
clustering_method = 'ward.D',
annotation_row = gene_functions_dt, # row (gene) annotations
annotation_col = ann_dt, # column (sample) annotations
annotation_colors = ann_colors, # colours for your annotations
annotation_names_row = F,
annotation_names_col = F,
fontsize_row = 10, # row label font size
fontsize_col = 7, # column label font size
angle_col = 45, # sample names at an angle
legend_breaks = c(-2, 0, 2), # legend customisation
legend_labels = c("Low", "Medium", "High"), # legend customisation
show_colnames = T, show_rownames = F, # displaying column and row names
main = "simulated gene expressions")