Tweening to demonstrate kernel density bandwidth

First off set a few things up: humpcol is the colour of the KDE - bgcol is the colour of the true distribution. closeup closes the distribution curve to a polygon with a base line on the x axis; this is useful for shading in its area. tweens contains the landmark curves at regularly spaced bandwidths, and the tweened curves in between them. The do.call and rbind combination turn a list of tweened vectors into a matrix.

N <- 8
nn <- 8
library(tweenr)
set.seed(250162)
x <- rnorm(400)
bw <- seq(0.02,0.7,l=N)
results <- vector(N,mode='list')
for(i in 1:N) {
  kd <- density(x,bw=bw[i],from=-3,to=3)
  xx <- kd$x
  results[[i]] <- kd$y
}

tweens <- do.call(rbind,tween_numeric(results,n=nn))
humpcol <- adjustcolor('indianred',alpha.f=0.5)
bgcol <- adjustcolor('dodgerblue',alpha.f=0.5)
nd <- dnorm(xx)
closeup <- function(x,y) {
  xy <- cbind(x,y)
  top <- xy[1,]
  btm <- xy[nrow(xy),]
  top[2] <- btm[2] <- 0
  return(rbind(top,xy,btm))
}

Now the preparation is done (much of it in the tweening) just create an animation by drawing each kde in turn.

for (i in 1:ncol(tweens)) {
  plot(closeup(xx,tweens[,i]),type='n',ylim=c(0,0.6))
  polygon(closeup(xx,nd),border=bgcol,col=bgcol)
  polygon(closeup(xx,tweens[,i]),border=humpcol,col=humpcol)
  
}

Another approach is to tween the KDE bandwidths rather than the KDEs themselves

bwvec <- tween_numeric(c(0.02,0.7),n=25)[[1]]
for (bwi in bwvec) {
  kd <- density(x,bw=bwi,from=-3,to=3)
  plot(closeup(kd$x,kd$y),type='n',ylim=c(0,0.6))
  polygon(closeup(xx,nd),border=bgcol,col=bgcol)
  polygon(closeup(kd$x,kd$y),border=humpcol,col=humpcol)
}

The above isn’t that interesting, not much different from using seq(0.02,0.7,l=25) to generate the bandwidths. It gets more interesting with other easing effects:

bwvec <- tween_numeric(c(0.02,0.7),n=65,ease='bounce-out')[[1]]
for (bwi in bwvec) {
  kd <- density(x,bw=bwi,from=-3,to=3)
  plot(closeup(kd$x,kd$y),type='n',ylim=c(0,0.6))
  polygon(closeup(xx,nd),border=bgcol,col=bgcol)
  polygon(closeup(kd$x,kd$y),border=humpcol,col=humpcol)
}

Here is another one:

bwvec <- tween_numeric(c(0.2,0.7),n=65,ease='elastic-in-out')[[1]]
for (bwi in bwvec) {
  kd <- density(x,bw=abs(bwi),from=-3,to=3)
  plot(closeup(kd$x,kd$y),type='n',ylim=c(0,0.6))
  polygon(closeup(xx,nd),border=bgcol,col=bgcol)
  polygon(closeup(kd$x,kd$y),border=humpcol,col=humpcol)
}

Demonstrating Optimal Bandwidth Choice

For a given sample the best choice is one providing a KDE that is in some way ‘nearest’ to the true density. Here we know the true density is Gaussian (because its a simulation) and so some sort of difference between that and the KDE is a good measure. One possibility is to measure the absolute difference between the KDE and the Gaussian distribution function, integrating over the range of possible \(x\) values:

\[ \Delta_h = \int_{-\infty}^{\infty} | \hat{f}_h(x) - \phi(x) | \, dx \] where \(\phi\) is the Gaussian probability density function. However, a better option might be the average absolute deviation - this allows for the fact that if most of the observations are close to the median of the distribution, then more weighting should be given to \(x\) and \(|\hat{f}_h(x) - \phi(x)|\) near to this point. This can be achieved by modifying the measure to be: \[ \Delta_h = \int_{-\infty}^{\infty} \phi(x) | \hat{f}_h(x) - \phi(x) | \, dx \] which can be approximated discretely as the weighted mean of \(|\hat{f}_h(x) - \phi(x)|\) over a range of \(x\) values:

bwvec <- tween_numeric(c(0.1,0.7),n=50)[[1]]
score <- bwvec*0.0
for (i in 1:length(bwvec)) {
  kd <- density(x,bw=bwvec[i],from=-3,to=3)
  score[i] <- weighted.mean(abs(kd$y-nd),nd)
}
plot(bwvec,score,type='l')

From this, a score of around 0.3938776 gives the best estimate for this sample. It is then possible to tween from a low value to a high one, then settle on the optimum to create a simulated search.

min_ind <- which.min(score)
minval <- bwvec[min_ind]
bwvec <- c(tween_numeric(c(0.1,0.7),n=20)[[1]],tween_numeric(c(0.7,minval),n=60,ease='elastic-out')[[1]])

for (bwi in bwvec) {
  kd <- density(x,bw=abs(bwi),from=-3,to=3)
  plot(closeup(kd$x,kd$y),type='n',ylim=c(0,0.6))
  polygon(closeup(xx,nd),border=bgcol,col=bgcol)
  polygon(closeup(kd$x,kd$y),border=humpcol,col=humpcol)
}

Finally - a novelty, but another way to visually imply the optimisation process:

score <- bwvec*0.0
for (i in 1:length(bwvec)) {
  kd <- density(x,bw=bwvec[i],from=-3,to=3)
  score[i] <- weighted.mean(abs(kd$y-nd),nd)
}

bwvec2 <- tween_numeric(c(0.1,0.7),n=60)[[1]]
score2 <- bwvec2*0.0
for (i in 1:length(bwvec2)) {
  kd <- density(x,bw=bwvec2[i],from=-3,to=3)
  score2[i] <- weighted.mean(abs(kd$y-nd),nd)
}

for (i in 1:length(bwvec)) {
  plot(bwvec2,score2,type='l')
  if (i > 1)  points(bwvec[i-1],score[i-1],pch=16,cex=3,col=adjustcolor('indianred',alpha.f=0.2))
  points(bwvec[i],score[i],pch=16,cex=3,col=adjustcolor('indianred',alpha.f=0.4))
}