A tractable, scalable, expression for the Kullback Leibler divergence between a multivariate t and a multivariate normal distributions

The \(d\)-variate \(t\) probability density function with \(\nu>0\) degrees of freedom (see and for an extensive review of this model) is given by \[ f_d({\bf x}\vert {\mu},{\Sigma},\nu) = \dfrac{\Gamma\left(\dfrac{\nu+d}{2}\right)}{\Gamma\left(\dfrac{\nu}{2}\right)\sqrt{(\pi\nu)^d\vert{\Sigma}\vert}} \left(1+\dfrac{({\bf x}-{\mu})^{T}{\Sigma}^{-1}({\bf x}-{\mu})}{\nu}\right)^{-\frac{\nu+d}{2}}, \] On the other hand, the \(d\)-variate normal probability density function is given by \[ N_d({\bf x}\vert {\mu},{\Sigma}) = \dfrac{1}{\sqrt{(2\pi)^d\vert{\Sigma}\vert}} \exp \left(-\dfrac{({\bf x}-{\mu})^{T}{\Sigma}^{-1}({\bf x}-{\mu})}{2}\right), \]

[1] found that the Kullback Leibler divergence between a \(d\)-variate \(t\) distribution with \(\nu>0\) degrees of freedom and a \(d\)-variate normal distribution can be written as \[ D_{KL}\Big(N_d({\bf x}|{\bf {\bf 0}},{\bf I})\|f_d({\bf x}\vert {\bf 0},{\bf I},\nu)\Big) = \int_{{\mathbb R}^n} N_d({\bf x}|{\bf {\bf 0}},{\bf I})\log\left\{\frac{N_d({\bf x}|{\bf {\bf 0}},{\bf I})}{f_d({\bf x}\vert {\bf 0},{\bf I},\nu)}\right\}\,d{\bf x} \notag \\ = \log\left\{\frac{1}{(2\pi)^{d/2}K(\nu,d)}\right\}-\frac{d}{2} + \frac{\nu+d}{2}\mathbb{E}_{d}\left\{\log\left(1+\frac{{\bf x}^{\top}{\bf x}}{\nu}\right)\right\}, \] where \[K(d,\nu)=\dfrac{\Gamma\left(\dfrac{\nu+d}{2}\right)}{\Gamma\left(\dfrac{\nu}{2}\right)\sqrt{(\pi\nu)^d}}.\] The expectation in this equation can be reduced to one-dimensional integration using again a change of variable in terms of spherical coordinates: \[ \mathbb{E}_{d}\left\{\log\left(1+\frac{{\bf x}^{\top}{\bf x}}{\nu}\right)\right\} = \dfrac{1}{2^{\frac{d}{2}}\Gamma\left(\frac{d}{2}\right)} \int_0^{\infty} e^{-\frac{t}{2}} t^{\frac{d}{2}-1}\log\left(1+\dfrac{t}{\nu}\right) dt. \]

Thus, this expression only involves a 1-dimensional integral, making the calculation of the Kullback Leibler divergence scalable and tractable for any dimension \(d=1,2,3,\dots\). The following R code shows the implementation of this divergence for several dimensions.

References

  1. Objective priors for the number of degrees of freedom of a multivariate t distribution and the t-copula
rm(list=ls())
# Package required for a nice format of the tables 
library(knitr) 
## Warning: package 'knitr' was built under R version 3.3.3
# Normalising constant
K <- function(d,nu) (gamma(0.5*(nu+d))/( gamma(0.5*nu)*sqrt((pi*nu)^d) ))

# Kullback Liebler divergence
DKLn <- function(nu){
  val1 <- -0.5*d*log(2*pi)  -0.5*d
  tempf <- Vectorize(function(t) exp(-0.5*t)*t^(0.5*d-1)*log(1+t/nu))
  int<- integrate(tempf,0,Inf,rel.tol = 1e-9)$value
  val2 <-  log(K(d,nu)) - 0.5*(nu+d)*(1/(gamma(0.5*d)*2^(0.5*d)))*int
  return(val1-val2)
}

# Kullback Liebler divergence: numerical integration 1-d
DKLn2 <- function(nu){
  tempf <- Vectorize(function(t) dnorm(t)*(dnorm(t,log=T) - dt(t,df=nu,log=T)))
  int<- integrate(tempf,-Inf,Inf,rel.tol = 1e-9)$value
  return(int)
}

# Kullback Leibler in one dimension
d=1 # dimension

D <- matrix(0,ncol=2,nrow=30)
D[,1] <- 1:30
for(i in 1:30) D[i,2] = DKLn(i)
colnames(D) <- c("nu","DKLn(nu)")
rownames(D) <- 1:30
print(kable(D,digits=12))
## 
## 
##  nu      DKLn(nu)
## ---  ------------
##   1   0.259244532
##   2   0.117228517
##   3   0.069151598
##   4   0.046270531
##   5   0.033376682
##   6   0.025320299
##   7   0.019919675
##   8   0.016108929
##   9   0.013312651
##  10   0.011196266
##  11   0.009553694
##  12   0.008252013
##  13   0.007202185
##  14   0.006342651
##  15   0.005629697
##  16   0.005031551
##  17   0.004524658
##  18   0.004091239
##  19   0.003717667
##  20   0.003393347
##  21   0.003109942
##  22   0.002860814
##  23   0.002640624
##  24   0.002445037
##  25   0.002270506
##  26   0.002114100
##  27   0.001973384
##  28   0.001846321
##  29   0.001731192
##  30   0.001626545
# Double check using numerical integration
D2 <- matrix(0,ncol=2,nrow=30)
D2[,1] <- 1:30
for(i in 1:30) D2[i,2] = DKLn2(i)
colnames(D2) <- c("nu","DKLn2(nu)")
rownames(D2) <- 1:30
print(kable(D2,digits=12))
## 
## 
##  nu     DKLn2(nu)
## ---  ------------
##   1   0.259244532
##   2   0.117228517
##   3   0.069151598
##   4   0.046270531
##   5   0.033376682
##   6   0.025320299
##   7   0.019919675
##   8   0.016108929
##   9   0.013312651
##  10   0.011196266
##  11   0.009553694
##  12   0.008252013
##  13   0.007202185
##  14   0.006342651
##  15   0.005629697
##  16   0.005031551
##  17   0.004524658
##  18   0.004091239
##  19   0.003717667
##  20   0.003393347
##  21   0.003109942
##  22   0.002860814
##  23   0.002640624
##  24   0.002445037
##  25   0.002270506
##  26   0.002114100
##  27   0.001973384
##  28   0.001846321
##  29   0.001731192
##  30   0.001626545
# Kullback Leibler in dimesion 2
rm(d)
d=2 # dimension

D <- matrix(0,ncol=2,nrow=30)
D[,1] <- 1:30
for(i in 1:30) D[i,2] = DKLn(i)
colnames(D) <- c("nu","DKLn(nu)")
rownames(D) <- 1:30
print(kable(D,digits=12))
## 
## 
##  nu      DKLn(nu)
## ---  ------------
##   1   0.384365949
##   2   0.192694725
##   3   0.120641673
##   4   0.083985851
##   5   0.062340428
##   6   0.048334961
##   7   0.038686992
##   8   0.031728250
##   9   0.026528734
##  10   0.022533058
##  11   0.019391318
##  12   0.016873405
##  13   0.014822554
##  14   0.013128769
##  15   0.011712901
##  16   0.010516753
##  17   0.009496714
##  18   0.008619556
##  19   0.007859586
##  20   0.007196673
##  21   0.006614857
##  22   0.006101346
##  23   0.005645788
##  24   0.005239731
##  25   0.004876215
##  26   0.004549469
##  27   0.004254673
##  28   0.003987773
##  29   0.003745342
##  30   0.003524466
# Kullback Leibler in dimesion 3
rm(d)
d=3 # dimension

D <- matrix(0,ncol=2,nrow=30)
D[,1] <- 1:30
for(i in 1:30) D[i,2] = DKLn(i)
colnames(D) <- c("nu","DKLn(nu)")
rownames(D) <- 1:30
print(kable(D,digits=12))
## 
## 
##  nu      DKLn(nu)
## ---  ------------
##   1   0.476832362
##   2   0.253366815
##   3   0.164653466
##   4   0.117709694
##   5   0.089146309
##   6   0.070222460
##   7   0.056933686
##   8   0.047194982
##   9   0.039819624
##  10   0.034086017
##  11   0.029532315
##  12   0.025850535
##  13   0.022828252
##  14   0.020314755
##  15   0.018200514
##  16   0.016404261
##  17   0.014864598
##  18   0.013534392
##  19   0.012376947
##  20   0.011363327
##  21   0.010470457
##  22   0.009679743
##  23   0.008976059
##  24   0.008346997
##  25   0.007782299
##  26   0.007273419
##  27   0.006813192
##  28   0.006395573
##  29   0.006015431
##  30   0.005668390
# Kullback Leibler in dimesion 50
rm(d)
d=50 # dimension

D <- matrix(0,ncol=2,nrow=30)
D[,1] <- 1:30
for(i in 1:30) D[i,2] = DKLn(i)
colnames(D) <- c("nu","DKLn(nu)")
rownames(D) <- 1:30
print(kable(D,digits=12))
## 
## 
##  nu    DKLn(nu)
## ---  ----------
##   1   1.6232665
##   2   1.2241719
##   3   1.0144233
##   4   0.8757622
##   5   0.7741199
##   6   0.6950649
##   7   0.6311589
##   8   0.5780742
##   9   0.5330756
##  10   0.4943268
##  11   0.4605374
##  12   0.4307670
##  13   0.4043103
##  14   0.3806252
##  15   0.3592868
##  16   0.3399563
##  17   0.3223594
##  18   0.3062712
##  19   0.2915054
##  20   0.2779058
##  21   0.2653407
##  22   0.2536977
##  23   0.2428807
##  24   0.2328064
##  25   0.2234027
##  26   0.2146066
##  27   0.2063629
##  28   0.1986227
##  29   0.1913430
##  30   0.1844855