visualizing derivatives of a matrix multiplication

library(reshape2)
library(ggplot2)
library(RColorBrewer)
cvec <- c("#FFFFFF",RColorBrewer::brewer.pal(10,"Spectral"))
theme_set(theme_classic())
theme_update(legend.position="none")

For obscure reasons, I wanted to know, given a positive-definite matrix \(M\), what the derivatives of its elements were with respect to the derivatives of its Cholesky decomposition (i.e. a lower-triangular \(C\) such that \(C C^{\top} = M\)). For example, given

\[ \left( \begin{array}{ccc} \sigma^2_x & \sigma_{xy} & \sigma_{xz} \\ \sigma_{xy} & \sigma^2_y & \sigma_{yz} \\ \sigma_{xz} & \sigma_{yz} & \sigma^2_z \end{array} \right) = \left( \begin{array}{ccc} \theta_1 & 0 & 0 \\ \theta_2 & \theta_4 & 0 \\ \theta_3 & \theta_5 & \theta_6 \end{array} \right) \left( \begin{array}{ccc} \theta_1 & \theta_2 & \theta_3 \\ 0 & \theta_4 & \theta_5 \\ 0 & 0 & \theta_6 \end{array} \right) \qquad , \]

what are the derivatives of \(\{\sigma^2_x, \sigma_{xy}, \sigma_{xz}, \ldots\}\) with respect to \(\{\theta_i\}\)?

It turns out this has a nice sparsity structure, made up of lower-triangular blocks based on each of the original parameters. For the \(3 \times 3\) case (with rows=\(\theta\) values, columns=\(\sigma_{\cdot \cdot}\) values (lower triangle, column-wise order), this is \[ \left( \begin{array}{cccccc} 2 \theta_1 & 0 & 0 & 0 & 0 & 0 \\ \theta_1 & \theta_1 & 0 & 0 & 0 & 0 \\ \theta_1 & 0 & \theta_1 & 0 & 0 & 0 \\ 0 & 2 \theta_2 & 0 & 2 \theta_4 & 0 & 0 \\ 0 & \theta_2 & \theta_2 & \theta_4 & \theta_4 & 0 \\ 0 & 0 & 2 \theta_3 & 0 & 2 \theta_5 & 2 \theta_6 \end{array} \right) \]

bmat <- function(t,n,dbl=TRUE) {
    m <- matrix(0,n,n)
    diag(m) <- t
    m[,1] <- t
    if (dbl) m[1,1] <- 2*t
    return(m)
}
bigmat <- function(t,...) {
    n <- (sqrt(8*length(t)+1)-1)/2
    N <- n*(n+1)/2
    M <- matrix(0,N,N)
    svec <- n:1
    posvec <- cumsum(c(1,n:2))
    tc <- 1
    for (i in 1:n) {  ## horizontal blocks
        ipos <- posvec[i]
        for (j in i:n) { ## vertical blocks
            jpos <- posvec[j]
            hadj <- (j-i)
            yy <- (ipos+hadj):(ipos+hadj+svec[j]-1)
            xx <- jpos:(jpos+svec[j]-1)
            M[xx,yy] <- bmat(t[tc],svec[j],...)
            tc <- tc+1
        }
    }
    return(M)
}

Suppose we start with this matrix:

n <- 4*5/2 ## (20*19)/2 ## 10 ## pick a value = n(n+1)/2
tm <- matrix(0,4,4)
tm[lower.tri(tm,diag=TRUE)] <- 1:10
ttm <- melt(tm)
ttm <- data.frame(ttm,value2=2*sign(ttm$value))
g0 <- ggplot(ttm,
       aes(y=-Var1,x=Var2,fill=factor(value),alpha=factor(value2,levels=0:2)))+
    geom_tile()+
    scale_alpha_manual(values=c(0,0.5,1))+
    ## scale_fill_manual(values=cvec)+
    labs(x="",y="")+
    scale_x_continuous(breaks=NULL,expand=c(0,0))+
    scale_y_continuous(breaks=NULL,expand=c(0,0))
print(g0)

Then we end up with this derivative matrix:

m1 <- melt(bigmat(1:n,dbl=FALSE))
m2 <- melt(bigmat(rep(1,n),dbl=TRUE))
colnames(m2)[3] <- "value2"
dd <- merge(m1,m2)
print(g0 %+% dd)

A big version:

n2 <- 20*21/2
m1 <- melt(bigmat(1:n2,dbl=FALSE))
m2 <- melt(bigmat(rep(1,n2),dbl=TRUE))
colnames(m2)[3] <- "value2"
dd2 <- merge(m1,m2)
print(g0 %+% dd2)

visualizing derivatives of a matrix multiplication

Ben Bolker