Let’s say we have a 25 column matrix and want to visualize the correlations between columns. One sophisticated way is using corrplot package. Another common way is using heatmaps. However, when the number of objects is low, another possible way is using heatmap tables. Let’s first have a look at the data.

class(mat)
## [1] "matrix"
dim(mat)
## [1] 100  25
mat[1:5,1:5]
##            x_1       x_2         x_3        x_4       x_5
## [1,] -23.39526 -17.27375 -22.6422327 -21.357298 -15.27931
## [2,] -15.63459 -22.06306  -4.8194209 -12.870096 -15.41976
## [3,] -12.33534 -25.08086   0.8416463  -1.141505 -13.15749
## [4,] -26.63951 -12.21723 -11.6534670 -31.252370 -21.06154
## [5,] -16.47164 -13.18288  -4.1775534 -20.571408 -30.13036

Now, calculate the correlations between columns. I will use Spearman’s Rank Correlation Coefficient as I am not sure about the normality of my simulated dataset.

corr_mat=cor(mat,method="s")
corr_mat[1:5,1:5]
##            x_1          x_2       x_3       x_4          x_5
## x_1 1.00000000  0.022454245 0.4223702 0.6083528  0.516675668
## x_2 0.02245425  1.000000000 0.1013621 0.0529613 -0.008916892
## x_3 0.42237024  0.101362136 1.0000000 0.2925533  0.370057006
## x_4 0.60835284  0.052961296 0.2925533 1.0000000  0.379597960
## x_5 0.51667567 -0.008916892 0.3700570 0.3795980  1.000000000

Now, we need to transform this object into a data frame where two columns will specify the names of the objects between which the correlations are calculated and one column will specify the correlation value. There is an easy way to do that using reshape2 package and melt function.

library(reshape2)
co=melt(corr_mat)
head(co)
##   Var1 Var2       value
## 1  x_1  x_1  1.00000000
## 2  x_2  x_1  0.02245425
## 3  x_3  x_1  0.42237024
## 4  x_4  x_1  0.60835284
## 5  x_5  x_1  0.51667567
## 6  x_6  x_1 -0.84846085

Now, we can proceed with visualization

library(ggplot2)
library(scales) # for muted function
ggplot(co, aes(Var1, Var2)) + # x and y axes => Var1 and Var2
  geom_tile(aes(fill = value)) + # background colours are mapped according to the value column
  geom_text(aes(fill = co$value, label = round(co$value, 2))) + # write the values
  scale_fill_gradient2(low = muted("darkred"), 
                       mid = "white", 
                       high = muted("midnightblue"), 
                       midpoint = 0) + # determine the colour
  theme(panel.grid.major.x=element_blank(), #no gridlines
        panel.grid.minor.x=element_blank(), 
        panel.grid.major.y=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.background=element_rect(fill="white"), # background=white
        axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
        plot.title = element_text(size=20,face="bold"),
        axis.text.y = element_text(size = 12,face = "bold")) + 
  ggtitle("Correlation Plot") + 
  theme(legend.title=element_text(face="bold", size=14)) + 
  scale_x_discrete(name="") +
  scale_y_discrete(name="") +
  labs(fill="Corr. Coef.")

Again, if we want to determine the order of the rows and columns according to hierarchical clustering, a quick trick that we can use:

ord=hclust(1-as.dist(corr_mat))$order
co=melt(corr_mat[ord,ord])
ggplot(co, aes(Var1, Var2)) + 
  geom_tile(aes(fill = value)) + 
  geom_text(aes(fill = co$value, label = round(co$value, 2))) +
  scale_fill_gradient2(low = muted("darkred"), 
                       mid = "white", 
                       high = muted("midnightblue"), 
                       midpoint = 0) +
  theme(panel.grid.major.x=element_blank(), 
        panel.grid.minor.x=element_blank(), 
        panel.grid.major.y=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.background=element_rect(fill="white"),
        axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"),
        plot.title = element_text(size=20,face="bold"),
        axis.text.y = element_text(size = 12,face = "bold")) + 
  ggtitle("Correlation Plot") + 
  theme(legend.title=element_text(face="bold", size=14)) + 
  scale_x_discrete(name="") +
  scale_y_discrete(name="") +
  labs(fill="Corr. Coef.")

One might think that it is also possible to obtain this graph using corrplot. What advantage does a heatmap offer? This approach is especially useful when you are plotting correlations that are very close or values that are not correlation coefficients (values do not change between -1 and 1). Take this example:

Let’s say we have a matrix with 6 columns all of which are so close to each other and have a high correlation:

class(mat)
## [1] "matrix"
dim(mat)
## [1] 100   6
head(mat)
##           x_1      x_2      x_3      x_4      x_5      x_6
## [1,] 16.59543 20.08277 18.45166 21.10061 20.48326 20.52644
## [2,] 29.42994 30.30499 33.27433 30.28646 33.18005 31.47009
## [3,] 18.91210 21.37321 21.50674 23.55603 22.83811 23.43764
## [4,] 33.10621 28.03724 29.24482 20.65060 29.31664 28.38730
## [5,] 30.68355 29.09333 32.01873 25.60054 25.73128 29.41890
## [6,] 23.50873 22.06678 26.21683 25.79510 21.74956 20.29958

Let’s try to visualize the correlations using corrplot package:

library(corrplot)
corr_mat=cor(mat)
corrplot(corr_mat, outline = T, addgrid.col = "darkgray", order="hclust",cl.pos = "b", tl.col = "indianred4")

Useless, right? It is of course possible to hack the function with various parameters and change the scale etc. But I think it is waste of time since we can use heatmap table. Again, change the order according to the hierarchical clustering.

ord=hclust(1-as.dist(corr_mat))$order
co=melt(corr_mat[ord,ord])
ggplot(co, aes(Var1, Var2)) + 
  geom_tile(aes(fill = value)) + 
  geom_text(aes(fill = co$value, label = round(co$value, 2))) + 
  scale_fill_gradient(low = "lightblue", high = "darkslategray") + 
  theme(panel.grid.major.x=element_blank(), 
        panel.grid.minor.x=element_blank(), 
        panel.grid.major.y=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.background=element_rect(fill="white"), 
        axis.text.x = element_text(angle=90, hjust = 1,vjust=1,size = 12,face = "bold"), 
        plot.title = element_text(size=20,face="bold"),
        axis.text.y = element_text(size = 12,face = "bold")) + 
  ggtitle("Correlation Plot") + theme(legend.title=element_text(face="bold", size=14)) + scale_y_discrete(name="") + 
  scale_x_discrete(name="") + labs(fill="Corr. Coef.")

As you can see, heatmap table is much more informative when compared to corrplot.