This R tutorial describes how to compute and visualize a correlation matrix using R software and ggplot2 package.

Prepare the data

The columns of arms_data includes the GISTIC2 Z-scores of amp or deletion chromosomal arms for 6 cancer types.

We first get the correlation tables for amp and deletion separately.

library(Jerry)
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.1     v dplyr   1.0.6
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.3
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Warning: replacing previous import 'ggplot2::annotate' by 'ggpmisc::annotate'
## when loading 'Jerry'
library(tidyverse)

data(arms_data)

#For AMP, Get the correlation tables.
arms_data_dist_amp = arms_data %>% 
  select(starts_with("Amp")) %>%
  #t() %>%
  cor() %>%
  as.matrix() 

#For Del, Get the correlation tables.

arms_data_dist_del = arms_data %>% 
  select(starts_with("Del")) %>%
  #t() %>%
  cor() %>%
  as.matrix() 

Get the lower and upper triangles of the correlation matrix

Note that, a correlation matrix has redundant information. We’ll use the functions below to set half of it to NA.

Plot the semi correlation martix heatmap

#set upper.triangle to be NA
arms_data_dist_amp %>%
  get_tri(diag = TRUE, upper.tri = TRUE) 
## set the upper tri to be NA
##           Amp_ACA   Amp_NEC   Amp_CIN   Amp_EBV    Amp_GS Amp_MSI
## Amp_ACA        NA        NA        NA        NA        NA      NA
## Amp_NEC 0.8971477        NA        NA        NA        NA      NA
## Amp_CIN 0.7313157 0.6677643        NA        NA        NA      NA
## Amp_EBV 0.6380635 0.6225627 0.7994932        NA        NA      NA
## Amp_GS  0.6683191 0.6172034 0.7019668 0.6668315        NA      NA
## Amp_MSI 0.4838802 0.5112640 0.5480767 0.5583757 0.8330379      NA
arms_data_long_amp = arms_data_dist_amp %>%
  get_tri(diag = TRUE, upper.tri = TRUE) %>%
  reshape2::melt(na.rm = TRUE) %>%
  mutate(
    Var1 = mapply(function(x) x[2], str_split(Var1, "_") ),
    Var2 = mapply(function(x) x[2], str_split(Var2, "_") )
  ) 
## set the upper tri to be NA
head(arms_data_long_amp)
##   Var1 Var2     value
## 2  NEC  ACA 0.8971477
## 3  CIN  ACA 0.7313157
## 4  EBV  ACA 0.6380635
## 5   GS  ACA 0.6683191
## 6  MSI  ACA 0.4838802
## 9  CIN  NEC 0.6677643
arms_data_long_amp = arms_data_long_amp %>%
  mutate(Var1 = factor(Var1, levels = c("ACA","NEC","CIN","EBV","GS","MSI"),
                       labels = c("MANEC-Aca","MANEC-Nec","STAD-CIN","STAD-EBV","STAD-GS","STAD-MSI")
                       ),
         Var2 = factor(Var2, levels = c("ACA","NEC","CIN","EBV","GS","MSI"),
                       labels = c("MANEC-Aca","MANEC-Nec","STAD-CIN","STAD-EBV","STAD-GS","STAD-MSI"))
         )


# Create a ggheatmap
ggheatmap <- ggplot(arms_data_long_amp, aes(Var2, Var1, fill = value))+
  
  geom_tile(color = "white")+
  scale_fill_gradient2(low = "blue", high = "red", mid = "grey100", 
                       midpoint = 0.5, limit = c(0, 0.9), space = "Lab", 
                       name="Pearson\nCorrelation") +
  theme_minimal()+ # minimal theme
  theme(axis.text.x = element_text(angle = 45, vjust = 1, 
                                   size = 10, hjust = 1),
        axis.text.y = element_text(
                                   size = 10)
        )+
  coord_fixed() +
  geom_rect(xmin = 0.5, xmax = 3.5, ymin = 0.5, ymax = 3.5, col = "black", alpha = 0) 

ggheatmap

ggheatmap +
  geom_text(aes(Var2, Var1, label = round(value, 2) ), color = "black", size = 4) +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.grid.major = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.ticks = element_blank(),
    #legend.justification = c(1, 0),
    #legend.position = "none"
   )

Plot the full correlation martix heatmap

Here, we plot the amp and del in the upper and lower tri-angele, thus presenting the data into one full heatmap figure.

#set upper.triangle to be NA
arms_data_dist_amp %>%
  get_tri(diag = TRUE, upper.tri = TRUE) 
## set the upper tri to be NA
##           Amp_ACA   Amp_NEC   Amp_CIN   Amp_EBV    Amp_GS Amp_MSI
## Amp_ACA        NA        NA        NA        NA        NA      NA
## Amp_NEC 0.8971477        NA        NA        NA        NA      NA
## Amp_CIN 0.7313157 0.6677643        NA        NA        NA      NA
## Amp_EBV 0.6380635 0.6225627 0.7994932        NA        NA      NA
## Amp_GS  0.6683191 0.6172034 0.7019668 0.6668315        NA      NA
## Amp_MSI 0.4838802 0.5112640 0.5480767 0.5583757 0.8330379      NA
arms_data_long_amp = arms_data_dist_amp %>%
  get_tri(diag = TRUE, upper.tri = TRUE) %>%
  reshape2::melt(na.rm = TRUE) %>%
  mutate(
    Var1 = mapply(function(x) x[2], str_split(Var1, "_") ),
    Var2 = mapply(function(x) x[2], str_split(Var2, "_") )
  ) 
## set the upper tri to be NA
head(arms_data_long_amp)
##   Var1 Var2     value
## 2  NEC  ACA 0.8971477
## 3  CIN  ACA 0.7313157
## 4  EBV  ACA 0.6380635
## 5   GS  ACA 0.6683191
## 6  MSI  ACA 0.4838802
## 9  CIN  NEC 0.6677643
#same for del.


#set upper.triangle to be NA
arms_data_dist_del %>%
  get_tri(diag = TRUE, lower.tri = TRUE) 
## set the lower tri to be NA
##         Del_ACA   Del_NEC   Del_CIN   Del_EBV    Del_GS    Del_MSI
## Del_ACA      NA 0.7399628 0.5359301 0.1106895 0.3296669 0.29815390
## Del_NEC      NA        NA 0.5443698 0.1922782 0.2971650 0.14946408
## Del_CIN      NA        NA        NA 0.4490654 0.5694798 0.51634555
## Del_EBV      NA        NA        NA        NA 0.5038875 0.06097154
## Del_GS       NA        NA        NA        NA        NA 0.30906615
## Del_MSI      NA        NA        NA        NA        NA         NA
arms_data_long_del = arms_data_dist_del %>%
  get_tri(diag = TRUE, lower.tri = TRUE) %>%
  reshape2::melt(na.rm = TRUE) %>%
  mutate(
    Var1 = mapply(function(x) x[2], str_split(Var1, "_") ),
    Var2 = mapply(function(x) x[2], str_split(Var2, "_") )
  ) 
## set the lower tri to be NA
head(arms_data_long_del)
##    Var1 Var2     value
## 7   ACA  NEC 0.7399628
## 13  ACA  CIN 0.5359301
## 14  NEC  CIN 0.5443698
## 19  ACA  EBV 0.1106895
## 20  NEC  EBV 0.1922782
## 21  CIN  EBV 0.4490654
arms_data_long = rbind( arms_data_long_amp, arms_data_long_del) %>%
  mutate(Var1 = factor(Var1, levels = c("ACA","NEC","CIN","EBV","GS","MSI"),
                       labels = c("MANEC-Aca","MANEC-Nec","STAD-CIN","STAD-EBV","STAD-GS","STAD-MSI")
                       ),
         Var2 = factor(Var2, levels = c("ACA","NEC","CIN","EBV","GS","MSI"),
                       labels = c("MANEC-Aca","MANEC-Nec","STAD-CIN","STAD-EBV","STAD-GS","STAD-MSI"))
         )


# Create a ggheatmap
ggheatmap <- ggplot(arms_data_long, aes(Var2, Var1, fill = value))+
  
  geom_tile(color = "white")+
  scale_fill_gradient2(low = "blue", high = "red", mid = "grey100", 
                       midpoint = 0.5, limit = c(0, 0.9), space = "Lab", 
                       name="Pearson\nCorrelation") +
  theme_minimal()+ # minimal theme
  theme(axis.text.x = element_text(angle = 45, vjust = 1, 
                                   size = 10, hjust = 1),
        axis.text.y = element_text(
                                   size = 10)
        )+
  coord_fixed() +
  geom_rect(xmin = 0.5, xmax = 3.5, ymin = 0.5, ymax = 3.5, col = "black", alpha = 0) 

ggheatmap

ggheatmap +
  geom_text(aes(Var2, Var1, label = round(value, 2) ), color = "black", size = 4) +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.grid.major = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.ticks = element_blank(),
    #legend.justification = c(1, 0),
    #legend.position = "none"
   )