Marie Mørch
The aim of this assignment is to sample from the probability distribution on \([0,\infty)\) with density \[ f(y) \propto \exp(-y^3+y), y\geq 0 \]
Find a Gaussian envelope of f and implement rejection sampling from the distribution with density f using this envelope. Then implement an adaptive rejection sampling algorithm and compare it with the one based on the Gaussian envelope.
Idea: Sample candidates from a simple simple distribution from which we know how to sample and then correcting the sampling probability by rejecting some of the candidates.
We let g(x) be a gaussian distribution \(\mathcal{N}(\mu, \sigma)\).
Find envelope e(x) with the property \[e(x) = g(x)/\alpha \geq f(x)\] for all x for which \(f(x)>0\).
tfun <- function(y) (y>=0)*(exp(-y^3+y))
alpha <- 1/2.1
mu <- 0.5
sigma <- 0.5
efun <- function(y) 1/alpha*dnorm(y, mu,sigma)Algorithm:
Sample \(Y\sim g\)
Sample \(U\sim Unif(0,1)\)
Reject Y if \(U > f(Y)/e(Y)\)
Otherwise, keep the value of Y.
sim <- function(n) {
y <- numeric(n)
for(i in 1:n) {
ratio <- 0
u <- 1
while(u > ratio) {
y0 <- rnorm(1, 0.5,0.5)
ratio <- tfun(y0)/efun(y0)
u <- runif(1)
}
y[i] <- y0
}
y
}http://rpubs.com/mamo3007/325820
We spend most time on sampling.
simvec<- function(n){
X <- rnorm(n*(1+alpha),0.5,0.5)
U <- runif(n*(1+alpha))
X <- X[U*efun(X)<tfun(X)]
m <- length(X)
if(m>=n){
X[0:n]
}else{
simvec(n)
}
}#include <Rcpp.h>
#include <Rmath.h>
#include <R.h>
using namespace Rcpp;
// [[Rcpp::export]]
double ctfun( double x){
if(x>=0){
return exp(-pow(x,3)+x);
}
}
// [[Rcpp::export]]
double cefun(double x){
return 2.1*R::dnorm(x,0.5,0.5, false);
}
// [[Rcpp::export]]
NumericVector simFast(int n) {
NumericVector y(n);
double ratio, u, y0;
for(int i = 0; i < n; i++) {
ratio = 0;
u = 1;
while(u > ratio) {
y0 = rnorm(1,0.5,0.5)[0];
ratio = ctfun(y0)/cefun(y0);
u = runif(1)[0];
}
y[i] = y0;
}
return y;
}library(microbenchmark)
microbenchmark(sim(1000), simvec(1000), simFast(1000))## Unit: microseconds
## expr min lq mean median uq
## sim(1000) 7828.702 11129.231 15091.8875 13588.323 17758.7800
## simvec(1000) 384.817 400.565 685.7169 463.396 614.3345
## simFast(1000) 385.799 408.837 560.9392 461.493 689.6955
## max neval
## 82900.507 100
## 9962.008 100
## 2650.465 100
Can be used when the density is log-concave. We let \[ l(x) =\log f(x)\] and \[ T_k =\lbrace x_1, x_2, ..., x_k \rbrace , x_1 < x_2< ...< x_k \] We define the rejection envelope to be the exponential of the piecewise linear upper hull of \(l\) formed by the tangents to \(l\) at each point in \(T_k\):
\[ e_k(x) = \exp(l(x_i) + (x − x_i)l′(x_i) ) \text{ for } x \in [z_{i−1}, z_i] , i=1,..,k \]
where
\[ z_i=\frac{l(x_{i+1})-l(x_i)-x_{i+1}l'(x_{i+1})+x_i l'(x_i)}{l'(x_i)-l'(x_{i+1})} \]
is the intersectionpoints of the tangents. We define the squeezing function to be the exponential of the piecewise linear lower hull of \(l\) formed by the chords between adjacent points in \(T_k\):
\[ s_k(x)=\exp(\frac{(x_{i+1}-x)l(x_i)+(x-x_i)l(x_{i+1})}{x_{i+1}-x_i}), \text{ for } x \in [x_i, x_{i+1}], i=1,..,k \]
Like squeezed sampling. Choose k and suitable grid \(T_k\)
Candidate draws are taken from density obtained by scaling the piecewise exponential envelope ek so that it integrates to 1
zfun<- function(d,l,x,k){
z<-c()
for( i in 1:(k-1)){
z[i] <- (l[i+1]-l[i]-x[i+1]*d[i+1]+x[i]*d[i])/(d[i]-d[i+1])
}
c(0,z,Inf)
}
ek <- function(k,lfun,dlfun, x){
Tk <-seq(0, 50, length.out = k)
lval <- lfun(Tk)
dlval <- dlfun(Tk)
z<- zfun(dlval, lval, Tk, k)
i <- findInterval(x,z)
exp(lval[i]+(x-Tk[i])*dlval[i])
}
sk <- function(k,lfun,dlfun,x){
Tk <-seq(0, 50, length.out = k)
lval <- lfun(Tk)
dlval <- dlfun(Tk)
if(x<Tk[1] || x > Tk[k]){ -Inf}
else{
i<- findInterval(x, Tk)
exp(((Tk[i+1]-x)*lval[i]+(x-Tk[i])*lval[i+1])/(Tk[i+1]-Tk[i]))
}
}logfun <- function(y) (y>=0)*(-y^3+y)
Dlogfun <- function(y) (y>=0)*(-3*y^2+1)samplefun <- function(Tk,lval,dlval,c){
e <- function(x){
1/c*env(Tk=Tk,lval=lval,dlval=dlval,x)
}
cdf <- function(x) integrate(e, 0, x)$value
cdf.inv <- function(y){uniroot(function(x){cdf(x)-y},interval=c(0,10))$root}
cdf.inv(runif(1))
}ARS<- function(f,l,dl,k, n, update=TRUE){
Tk <- seq(0,50, length.out =k)
lval <- l(Tk)
dlval <- dl(Tk)
y <- numeric(n)
i<- 0
c <- integrate(env, 0, Inf,Tk=Tk, lval=lval, dlval=dlval)$value
while(i <= n){
y0<- samplefun(Tk,lval,dlval, c)
u0<- runif(1)
ratio1 <- squeez(Tk,lval,dlval,y0)/env(Tk,lval,dlval,y0)
if(u0 <= ratio1){
y[i] <- y0
i <- i+1
}else{
ratio2 <- f(y0)/env(Tk,lval,dlval,y0)
if(u0<=ratio2){
y[i]<- y0
i <- i+1
if(update){
if(!is.element(y0,Tk)){
Tk<-sort(c(Tk,y0))
lval <- l(Tk)
dlval <- dl(Tk)
c <- integrate(env, 0, Inf,Tk=Tk, lval=lval, dlval=dlval)$value
}
}
}
}
}
y
}library(microbenchmark)
microbenchmark(ARS(tfun,l=logfun, dl =Dlogfun,k=50, n=100),simFast(100) )## Unit: microseconds
## expr min
## ARS(tfun, l = logfun, dl = Dlogfun, k = 50, n = 100) 1285719.360
## simFast(100) 39.777
## lq mean median uq max neval
## 1517312.5755 1.702742e+06 1674827.2010 1816994.3575 2641644.246 100
## 45.0705 6.927965e+01 60.0745 69.5055 482.551 100
set.seed(1234)
sims<-ARS(tfun,l=logfun, dl =Dlogfun,k=50, n=1000)
set.seed(1234)
sims2<-ARS(tfun,l=logfun, dl =Dlogfun,k=50, n=1000, update=FALSE)Purple is udpdate and blue is no update of the envelope.
microbenchmark(ARS(tfun,l=logfun, dl =Dlogfun,k=50, n=100),ARS(tfun,l=logfun, dl = Dlogfun,k=50, n=100,update=FALSE))The rejection envelope is a piecewise exponential function.
Fk <- function(lval, dlval, z, Tk, i,c){
1/(c*dlval[i])*(exp(lval[i]+(z[i+1]-Tk[i])*dlval[i])-exp(lval[i]+(z[i]-Tk[i])*dlval[i]))
}
Finv <- function(u,lval, dlval, Tk,z, c){
zz <- c(0,cumsum(sapply(1:(k-1), Fk, lval=lval, dlval=dlval,Tk=Tk,z=z, c=c)))
i <- findInterval(u, zz)
M<- zz[i]
Tk[i]-lval[i]/dlval[i]+1/dlval[i]*log(exp(lval[i]+dlval[i]*(z[i]-Tk[i]))+(u-M)*dlval[i]*c)
}
sam <- function(Tk,lval,dlval,n, c){
k <- length(Tk)
z <- zfun(dlval, lval, Tk, k)
Finv(runif(n), lval, dlval, Tk,z, c)
}ARSnew<- function(f,l,dl,k, n){
Tk <- seq(0,50, length.out =k)
lval <- l(Tk)
dlval <- dl(Tk)
c<-integrate(env, 0, Inf,Tk=Tk, lval=lval, dlval=dlval)$value
y <- numeric(n)
i<- 0
while(i <= n){
y0<- sam(Tk,lval,dlval,1,c)
u0<- runif(1)
ratio1 <- squeez(Tk,lval,dlval,y0)/env(Tk,lval,dlval,y0)
if(u0 <= ratio1){
y[i] <- y0
i <- i+1
}else{
ratio2 <- f(y0)/env(Tk,lval,dlval,y0)
if(u0<=ratio2){
y[i]<- y0
i <- i+1
}
}
}
y
}ARSvec<- function(f,l,dl,k, n){
Tk <- seq(0,50, length.out =k)
lval <- l(Tk)
dlval <- dl(Tk)
c<-integrate(env, 0, Inf,Tk=Tk, lval=lval, dlval=dlval)$value
X <- sam(Tk,lval,dlval,2*n, c)
U <- runif(2*n)
i <- U<=squeez(Tk,lval,dlval,X)/env(Tk,lval,dlval,X)
j <- U[!i]<=f(X[!i])/env(Tk,lval,dlval,X[!i])
X<-c(X[i], X[!i][j])
if(length(X)>=n){
sample(X,n)
}else{
ARSvec(f,l,dl,k,n)
}
}sim1<-ARSnew(tfun,l=logfun, Dlogfun, 50,5000)
sim2<-ARSvec(tfun,logfun, Dlogfun, 50,5000)microbenchmark(ARS(tfun,l=logfun, dl =Dlogfun,k=50, n=100), ARSnew(tfun,l=logfun, dl =Dlogfun,k=50, n=100),ARSvec(tfun,logfun, Dlogfun, 50,100))## Unit: milliseconds
## expr min
## ARS(tfun, l = logfun, dl = Dlogfun, k = 50, n = 100) 991.396578
## ARSnew(tfun, l = logfun, dl = Dlogfun, k = 50, n = 100) 18.233381
## ARSvec(tfun, logfun, Dlogfun, 50, 100) 1.540567
## lq mean median uq max neval
## 1254.49590 1357.879116 1335.88904 1432.798924 2077.58027 100
## 20.08115 24.684441 22.15801 24.762230 225.19246 100
## 1.67581 2.575222 1.78348 1.973044 50.11228 100
microbenchmark(ARSnew(tfun,l=logfun, dl =Dlogfun,k=50, n=1000),ARSvec(tfun,logfun, Dlogfun, 50,1000), simFast(1000))## Unit: microseconds
## expr min
## ARSnew(tfun, l = logfun, dl = Dlogfun, k = 50, n = 1000) 192106.300
## ARSvec(tfun, logfun, Dlogfun, 50, 1000) 2323.866
## simFast(1000) 385.814
## lq mean median uq max neval
## 202178.815 216746.416 207281.2435 223364.7265 328239.850 100
## 2510.243 3557.445 2618.7005 4076.6360 11587.202 100
## 405.205 617.033 422.0155 462.0325 5238.495 100