Rejection Sampling

Marie Mørch

Aim

The aim of this assignment is to sample from the probability distribution on \([0,\infty)\) with density \[ f(y) \propto \exp(-y^3+y), y\geq 0 \]

Find a Gaussian envelope of f and implement rejection sampling from the distribution with density f using this envelope. Then implement an adaptive rejection sampling algorithm and compare it with the one based on the Gaussian envelope.

Gaussian Envelope

Idea: Sample candidates from a simple simple distribution from which we know how to sample and then correcting the sampling probability by rejecting some of the candidates.

We let g(x) be a gaussian distribution \(\mathcal{N}(\mu, \sigma)\).

Find envelope e(x) with the property \[e(x) = g(x)/\alpha \geq f(x)\] for all x for which \(f(x)>0\).

Gaussian Envelope

tfun <- function(y) (y>=0)*(exp(-y^3+y))
alpha <- 1/2.1 
mu <- 0.5
sigma <- 0.5
efun <- function(y) 1/alpha*dnorm(y, mu,sigma)

Rejection Sampling

Algorithm:

  1. Sample \(Y\sim g\)

  2. Sample \(U\sim Unif(0,1)\)

  3. Reject Y if \(U > f(Y)/e(Y)\)

  4. Otherwise, keep the value of Y.

Code

sim <- function(n) {
  y <- numeric(n)
  for(i in 1:n) {
    ratio <- 0 
    u <- 1
    while(u > ratio) {
      y0 <- rnorm(1, 0.5,0.5)
      ratio <- tfun(y0)/efun(y0)
      u <- runif(1)
    }
    y[i] <- y0
  }
  y
}

Test

Profiling

http://rpubs.com/mamo3007/325820

Bottlenecks:

We spend most time on sampling.

Optimization:

Vectorized version

simvec<- function(n){
  X <- rnorm(n*(1+alpha),0.5,0.5)
  U <- runif(n*(1+alpha))
  X <- X[U*efun(X)<tfun(X)]
  m <- length(X)
  if(m>=n){
    X[0:n]
  }else{
    simvec(n)
  }
}

C++ version

#include <Rcpp.h> 
#include <Rmath.h>
#include <R.h>
using namespace Rcpp;
// [[Rcpp::export]]
double ctfun( double x){
  if(x>=0){
    return exp(-pow(x,3)+x);
    }
  }
// [[Rcpp::export]]
double cefun(double x){
  return 2.1*R::dnorm(x,0.5,0.5, false);
}
// [[Rcpp::export]]
NumericVector simFast(int n) {
  NumericVector y(n);
  double ratio, u, y0;
  for(int i = 0; i < n; i++) {
    ratio = 0;
    u = 1; 
    while(u > ratio) {
      y0 = rnorm(1,0.5,0.5)[0];
      ratio = ctfun(y0)/cefun(y0);
      u = runif(1)[0];
    }
    y[i] = y0;
  }
  return y;
}

Benchmarking

library(microbenchmark)
microbenchmark(sim(1000), simvec(1000), simFast(1000))
## Unit: microseconds
##           expr      min        lq       mean    median         uq
##      sim(1000) 7828.702 11129.231 15091.8875 13588.323 17758.7800
##   simvec(1000)  384.817   400.565   685.7169   463.396   614.3345
##  simFast(1000)  385.799   408.837   560.9392   461.493   689.6955
##        max neval
##  82900.507   100
##   9962.008   100
##   2650.465   100

Adaptive rejection sampling

Can be used when the density is log-concave. We let \[ l(x) =\log f(x)\] and \[ T_k =\lbrace x_1, x_2, ..., x_k \rbrace , x_1 < x_2< ...< x_k \] We define the rejection envelope to be the exponential of the piecewise linear upper hull of \(l\) formed by the tangents to \(l\) at each point in \(T_k\):

\[ e_k(x) = \exp(l(x_i) + (x − x_i)l′(x_i) ) \text{ for } x \in [z_{i−1}, z_i] , i=1,..,k \]

where

\[ z_i=\frac{l(x_{i+1})-l(x_i)-x_{i+1}l'(x_{i+1})+x_i l'(x_i)}{l'(x_i)-l'(x_{i+1})} \]

is the intersectionpoints of the tangents. We define the squeezing function to be the exponential of the piecewise linear lower hull of \(l\) formed by the chords between adjacent points in \(T_k\):

\[ s_k(x)=\exp(\frac{(x_{i+1}-x)l(x_i)+(x-x_i)l(x_{i+1})}{x_{i+1}-x_i}), \text{ for } x \in [x_i, x_{i+1}], i=1,..,k \]

Adaptive rejection sampling

Like squeezed sampling. Choose k and suitable grid \(T_k\)

  1. Sample \(Y \sim g\)
  2. Sample \(U\sim Unif(0,1)\)
  3. If \(U ≤s_k(Y)/e_k(Y)\), keep the value of Y.
  4. Otherwise, if \(U ≤ f(Y)/e_k(Y)\) keep the value of Y and add point to \(T_k\). Update \(T_k, e_k, s_k\) to \(T_{k+1}, e_{k+1}, s_{k+1}\)
  5. Else reject Y

Candidate draws are taken from density obtained by scaling the piecewise exponential envelope ek so that it integrates to 1

Envelope and squeezing function

zfun<- function(d,l,x,k){
  z<-c()
  for( i in 1:(k-1)){
    z[i] <- (l[i+1]-l[i]-x[i+1]*d[i+1]+x[i]*d[i])/(d[i]-d[i+1])
  }  
  c(0,z,Inf)
}

ek <- function(k,lfun,dlfun, x){
  Tk <-seq(0, 50, length.out = k)
  lval <- lfun(Tk)
  dlval <- dlfun(Tk)
  z<- zfun(dlval, lval, Tk, k)
  i <- findInterval(x,z)
  exp(lval[i]+(x-Tk[i])*dlval[i])
}

sk <- function(k,lfun,dlfun,x){
  Tk <-seq(0, 50, length.out = k)
  lval <- lfun(Tk)
  dlval <- dlfun(Tk)
  if(x<Tk[1] || x > Tk[k]){ -Inf}
  else{
     i<- findInterval(x, Tk)
     exp(((Tk[i+1]-x)*lval[i]+(x-Tk[i])*lval[i+1])/(Tk[i+1]-Tk[i]))
  }
}

Envelope and squeezing function

logfun <- function(y) (y>=0)*(-y^3+y)
Dlogfun <- function(y) (y>=0)*(-3*y^2+1)

Adaptive rejection algorithm

samplefun <- function(Tk,lval,dlval,c){
  e <- function(x){
    1/c*env(Tk=Tk,lval=lval,dlval=dlval,x)
  }
  cdf <- function(x) integrate(e, 0, x)$value
  cdf.inv <- function(y){uniroot(function(x){cdf(x)-y},interval=c(0,10))$root}
  cdf.inv(runif(1))
}

Adaptive rejection algorithm

ARS<- function(f,l,dl,k, n, update=TRUE){
  Tk <- seq(0,50, length.out =k)
  lval <- l(Tk)
  dlval <- dl(Tk)
  y <- numeric(n)
  i<- 0
  c <- integrate(env, 0, Inf,Tk=Tk, lval=lval, dlval=dlval)$value 
    while(i <= n){
      y0<- samplefun(Tk,lval,dlval, c)
      u0<- runif(1)
      ratio1 <- squeez(Tk,lval,dlval,y0)/env(Tk,lval,dlval,y0)
      if(u0 <= ratio1){
        y[i] <- y0
        i <- i+1
        }else{
          ratio2 <- f(y0)/env(Tk,lval,dlval,y0)
          if(u0<=ratio2){
            y[i]<- y0
            i <- i+1
            if(update){
              if(!is.element(y0,Tk)){
                Tk<-sort(c(Tk,y0))
                lval <- l(Tk)
                dlval <- dl(Tk)
                c <- integrate(env, 0, Inf,Tk=Tk, lval=lval, dlval=dlval)$value
              }
            }
          }
        }
      }
  y
}

Benchmarking

library(microbenchmark)
microbenchmark(ARS(tfun,l=logfun, dl =Dlogfun,k=50, n=100),simFast(100) )
## Unit: microseconds
##                                                  expr         min
##  ARS(tfun, l = logfun, dl = Dlogfun, k = 50, n = 100) 1285719.360
##                                          simFast(100)      39.777
##            lq         mean       median           uq         max neval
##  1517312.5755 1.702742e+06 1674827.2010 1816994.3575 2641644.246   100
##       45.0705 6.927965e+01      60.0745      69.5055     482.551   100

Test and updating step

set.seed(1234)
sims<-ARS(tfun,l=logfun, dl =Dlogfun,k=50, n=1000)
set.seed(1234)
sims2<-ARS(tfun,l=logfun, dl =Dlogfun,k=50, n=1000, update=FALSE)

Purple is udpdate and blue is no update of the envelope.

microbenchmark(ARS(tfun,l=logfun, dl =Dlogfun,k=50, n=100),ARS(tfun,l=logfun, dl = Dlogfun,k=50, n=100,update=FALSE))

Profiling

http://rpubs.com/mamo3007/325826

Bottleneck:

Implementation of the sampling function by hand:

The rejection envelope is a piecewise exponential function.

Fk <- function(lval, dlval, z, Tk, i,c){
  1/(c*dlval[i])*(exp(lval[i]+(z[i+1]-Tk[i])*dlval[i])-exp(lval[i]+(z[i]-Tk[i])*dlval[i]))
}

Finv <- function(u,lval, dlval, Tk,z, c){
    zz <- c(0,cumsum(sapply(1:(k-1), Fk, lval=lval, dlval=dlval,Tk=Tk,z=z, c=c)))
    i <- findInterval(u, zz)
    M<- zz[i]
    Tk[i]-lval[i]/dlval[i]+1/dlval[i]*log(exp(lval[i]+dlval[i]*(z[i]-Tk[i]))+(u-M)*dlval[i]*c)
}

sam <- function(Tk,lval,dlval,n, c){
  k <- length(Tk)
  z <- zfun(dlval, lval, Tk, k)
  Finv(runif(n), lval, dlval, Tk,z, c)
}

ARS:

ARSnew<- function(f,l,dl,k, n){
  Tk <- seq(0,50, length.out =k)
  lval <- l(Tk)
  dlval <- dl(Tk)
  c<-integrate(env, 0, Inf,Tk=Tk, lval=lval, dlval=dlval)$value
  y <- numeric(n)
  i<- 0
    while(i <= n){
      y0<- sam(Tk,lval,dlval,1,c)
      u0<- runif(1)
      ratio1 <- squeez(Tk,lval,dlval,y0)/env(Tk,lval,dlval,y0)
      if(u0 <= ratio1){
        y[i] <- y0
        i <- i+1
        }else{
          ratio2 <- f(y0)/env(Tk,lval,dlval,y0)
          if(u0<=ratio2){
            y[i]<- y0
            i <- i+1
          }
        }
      }
  y
}

Vectorized version

ARSvec<- function(f,l,dl,k, n){
  Tk <- seq(0,50, length.out =k)
  lval <- l(Tk)
  dlval <- dl(Tk)
  c<-integrate(env, 0, Inf,Tk=Tk, lval=lval, dlval=dlval)$value
  X <- sam(Tk,lval,dlval,2*n, c)
  U <- runif(2*n)
  i <- U<=squeez(Tk,lval,dlval,X)/env(Tk,lval,dlval,X)
  j <- U[!i]<=f(X[!i])/env(Tk,lval,dlval,X[!i])
  X<-c(X[i], X[!i][j])
  if(length(X)>=n){
    sample(X,n)
  }else{
    ARSvec(f,l,dl,k,n)
  }
}

Test

sim1<-ARSnew(tfun,l=logfun, Dlogfun, 50,5000)
sim2<-ARSvec(tfun,logfun, Dlogfun, 50,5000)

Benchmarking

microbenchmark(ARS(tfun,l=logfun, dl =Dlogfun,k=50, n=100), ARSnew(tfun,l=logfun, dl =Dlogfun,k=50, n=100),ARSvec(tfun,logfun, Dlogfun, 50,100))
## Unit: milliseconds
##                                                     expr        min
##     ARS(tfun, l = logfun, dl = Dlogfun, k = 50, n = 100) 991.396578
##  ARSnew(tfun, l = logfun, dl = Dlogfun, k = 50, n = 100)  18.233381
##                   ARSvec(tfun, logfun, Dlogfun, 50, 100)   1.540567
##          lq        mean     median          uq        max neval
##  1254.49590 1357.879116 1335.88904 1432.798924 2077.58027   100
##    20.08115   24.684441   22.15801   24.762230  225.19246   100
##     1.67581    2.575222    1.78348    1.973044   50.11228   100
microbenchmark(ARSnew(tfun,l=logfun, dl =Dlogfun,k=50, n=1000),ARSvec(tfun,logfun, Dlogfun, 50,1000), simFast(1000))
## Unit: microseconds
##                                                      expr        min
##  ARSnew(tfun, l = logfun, dl = Dlogfun, k = 50, n = 1000) 192106.300
##                   ARSvec(tfun, logfun, Dlogfun, 50, 1000)   2323.866
##                                             simFast(1000)    385.814
##          lq       mean      median          uq        max neval
##  202178.815 216746.416 207281.2435 223364.7265 328239.850   100
##    2510.243   3557.445   2618.7005   4076.6360  11587.202   100
##     405.205    617.033    422.0155    462.0325   5238.495   100