Sometimes it is needed to vizualise a matrix to see sparsity or compare different kind of ordering. In this post I will show you a solution with geom_raster() in ggplot2. It is similar that matlab spy() function. geom_raster() is a special case of geom_tile() where tiles are the same size.
This chunk will create a 10×6 weighted matrix of x1 to x10 and y1 to y6.
A <- matrix(c(2 , 0 , 0 , 0 , 0 , 2 ,
0 , 0 , 0 , 0 , 2 , 2 ,
0 , 0 , 0 , 0 , 3 , 2 ,
0 , 0 , 0 , 0 , 0 , 6 ,
2 , 2 , 3 , 5 , 4 , 2 ,
6 , 4 , 3 , 0 , 0 , 0 ,
0 , 0 , 3 , 1 , 0 , 0 ,
0 , 0 , 0 , 2 , 2 , 1 ,
0 , 0 , 2 , 3 , 0 , 0 ,
0 , 2 , 2 , 0 , 0 , 0), nrow=10, ncol=6, byrow = T)
rownames(A) <- paste("x", 1:10, sep="")
colnames(A) <- paste("y", 1:6, sep="")ggplot2 works with longdata instead of wide data format like a matrix, so you need to convert it first.
library(reshape2)
longData <- melt(A)If you want to control the orderings of rows and colums you need to specify the ordering with factors of rows and columns. Levels are vectors of rownames and colnames of original matrix.
library(dplyr)
longData$Var1 <- factor(longData$Var1, levels=names(sort(rowSums(A), decreasing = T)))
longData$Var2 <- factor(longData$Var2, levels=names(sort(colSums(A), decreasing = T)))
longData<-longData[longData$value!=0,]library(ggplot2)
ggplot(longData, aes(x = Var2, y = Var1)) +
geom_raster(aes(fill=value)) +
scale_fill_gradient(low="grey60", high="red") +
labs(x="", y="") +
theme_bw()There is an excellent implementation of traveling sales person (TSP) ordering in seriation package. It randomly select the first row or column in process of ordering therefore every result of sorting is different.
library(seriation)
o <- seriate(A, method="BEA_TSP")
longData$Var1 <- factor(longData$Var1, levels=names(unlist(o[[1]][])))
longData$Var2 <- factor(longData$Var2, levels=names(unlist(o[[2]][])))longData<-longData[longData$value!=0,]ggplot(longData, aes(x = Var2, y = Var1)) +
geom_raster(aes(fill=value)) +
scale_fill_gradient(low="grey60", high="red") +
labs(x="", y="") +
theme_bw()In this example I use hierarchical clustering where distance matrix is calcualted with generalized Jaccard similarity for row vectors and column vectors.
A <- A
sim.jac <- matrix(0, nrow=nrow(A), ncol=nrow(A))
rownames(sim.jac) <- rownames(A)
colnames(sim.jac) <- rownames(A)
#weighted jaccard for rows
pairs <- t(combn(1:nrow(A), 2))
for (i in 1:nrow(pairs)){
num <- sum(sapply(1:ncol(A), function(x)(min(A[pairs[i,1],x],A[pairs[i,2],x]))))
den <- sum(sapply(1:ncol(A), function(x)(max(A[pairs[i,1],x],A[pairs[i,2],x]))))
sim.jac[pairs[i,1],pairs[i,2]] <- num/den
sim.jac[pairs[i,2],pairs[i,1]] <- num/den
}
sim.jac[which(is.na(sim.jac))] <- 0
diag(sim.jac) <- 1
sim.jac.rows <- sim.jacA <- t(A)
sim.jac <- matrix(0, nrow=nrow(A), ncol=nrow(A))
rownames(sim.jac) <- rownames(A)
colnames(sim.jac) <- rownames(A)
#weighted jaccard for rows
pairs <- t(combn(1:nrow(A), 2))
for (i in 1:nrow(pairs)){
num <- sum(sapply(1:ncol(A), function(x)(min(A[pairs[i,1],x],A[pairs[i,2],x]))))
den <- sum(sapply(1:ncol(A), function(x)(max(A[pairs[i,1],x],A[pairs[i,2],x]))))
sim.jac[pairs[i,1],pairs[i,2]] <- num/den
sim.jac[pairs[i,2],pairs[i,1]] <- num/den
}
sim.jac[which(is.na(sim.jac))] <- 0
diag(sim.jac) <- 1
sim.jac.cols <- sim.jacQ <- A
longData<-melt(Q)
longData<-longData[longData$value!=0,]
longData <- left_join(longData, cut.rows, by=c("Var2"="names"))
colnames(longData)[4] <- "Var2_clust"
longData$Var2 <- as.factor(longData$Var2)
longData <- left_join(longData, cut.cols, by=c("Var1"="names"))
colnames(longData)[5] <- "Var1_clust"
longData$Var1 <- factor(longData$Var1, levels=unique(arrange(longData, Var1_clust)[,1]))
longData$Var2 <- factor(longData$Var2, levels=unique(arrange(longData, Var2_clust)[,2]))
#3 cluster
library(ggsci)
library(scales)
mypal <- pal_simpsons("springfield", alpha = 1)(5)[c(1,2,5)]
show_col(mypal)axis.x.colour <- (longData %>% select(Var1, Var1_clust) %>% unique %>% arrange(Var1_clust) %>% select(Var1_clust))[,1] %>% plyr::mapvalues(from=c(1:3), to=mypal)
axis.y.colour <- (longData %>% select(Var2, Var2_clust) %>% unique %>% arrange(Var2_clust) %>% select(Var2_clust))[,1] %>% plyr::mapvalues(from=c(1:3), to=mypal)ggplot(longData, aes(x = Var1, y = Var2)) +
geom_raster(aes(fill=value)) +
scale_fill_gradient(low="grey60", high="red") +
labs(x="", y="") +
theme_bw() + theme(axis.text.x=element_text(size=11, face="bold"),
axis.text.y=element_text(size=11, face="bold"),
legend.text=element_text(size=8),
legend.title=element_text(size=8)) +
theme(axis.text.y=element_text(colour=axis.y.colour),
axis.text.x=element_text(colour=axis.x.colour)) +
annotate("segment", x = 3.5, xend = 3.5, y = 0, yend = 10.5, colour = "blue", size = 0.5) +
annotate("segment", x = 0, xend = 6.5, y = 2.5, yend = 2.5, colour = "blue", size = 0.5) +
annotate("segment", x = 0, xend = 6.5, y = 5.5, yend = 5.5, colour = "blue", size = 0.5)Be happyR! :)