Problem: Simulate from Truncated Normal Distribution

\(f(x|\mu,c,\sigma^2) =\frac{exp(-(x-\mu)^2/(2\sigma^2))}{\sqrt(2\pi)\sigma[1-\Phi((c-\mu)/\sigma)]}I(x\geq c)\)

1. Inverse Method to draw samples from the truncated normal distribution, sample size N=30,000

#Parameters
mu = 1
c  = 3
var= 5
sig= sqrt(var)
N  = 30000

f <- function(x,mu,sig,c){ifelse(x>=c,1,0)*exp(-(x-mu)^2/(2*sig^2))/(sqrt(2*pi)*sig*(1-pnorm(c,mu,sig)))}

inversion <- function(mu,c,sig,N){
  u = runif(N)
  x = qnorm( (u+(1-u)*pnorm((c-mu)/sig)))*sig+mu
}
result <- inversion(mu,c,sig,N)

# Graphing
hist(result,col='lightblue',axes=FALSE,ann=FALSE,breaks = 50,xlim=c(mu,10),ylim=c(0,6000))
x=seq(1,9,0.01); lines(x,y=f(x,mu,sig,c)*N/50*9,lty = 1, lwd = 3, col='gray')
abline(v=1, col='orange',lwd=3,lty=2);     text(mu+0.2,5000,"mu",lwd=5,col = 'orange')
abline(v=c, col='red',lwd=3,lty=2);        text(c+0.2,5000,"c",lwd=5,col = 'red')
axis(1, las=1,at = seq(mu,10,2))
axis(2, las=1, at = seq(0,10000,1000))
box()
title(xlab= "Simulated x", col.lab=rgb(0,0,0))
title(ylab= "Frequency", col.lab=rgb(0,0,0))
title(main = "Histogram of Simulation by Inversion Transform")
legend(8,5000, c("c","mu","Scaled pdf"), cex=0.8, 
       col=c("red","orange","gray"), pch=c(22,22,22), lty=c(2,2,2),lwd=c(2,2,2))

2. (b) Apply acceptance-rejection method with exponential function \(g\) to simulate samples from the truncated normal distribution, sample size N = 30,000

\(g(x|\alpha,c) = \alpha\, exp(\alpha(x-c))I(x\geq c)\)
# Parameters
mu = 1
c  = 3            # c for both truncate normal and exponential
var= 5
sig = sqrt(var)
a  = 1            # alpha
N  = 30000

exp_sim <- function(c,a){
  u1 = runif(1)
  y = -log(1-u1)/a+c
  return(y)
}


f <- function(x,mu,sig,c){ifelse(x>=c,1,0)*exp(-(x-mu)^2/(2*sig^2))/(sqrt(2*pi)*sig*(1-pnorm(c,mu,sig)))}
g <- function(x,a,c)     {ifelse(x>=c,1,0)*a*exp(-a*(x-c))}

# Accept-Reject function
accept_reject<- function(mu,c,sig,a,N){
  M = exp(-a*c+a^2*sig^2/2+mu*a)/ (sqrt(2*pi)*sig*(1-pnorm(c,mu,sig))*a)
  result = rep(0,N)
  run = 0
  n = 1
  while (n <= N){
    y = exp_sim(c,a)
    u2 = runif(1)
    if (u2 < (  f(y,mu,sig,c)/( M*g(y,a,c)) )  ){
      result[n] = y
      n = n + 1
    }
    run = run + 1
  }
  return(list(result=result, accept_sim = N / run , accept_theo = 1 / M))
}
result = accept_reject(mu,c,sig,a,N)$result

# Graphing
hist(result,col='lightblue',axes=FALSE,ann=FALSE,breaks = 50,xlim=c(mu,10),ylim=c(0,6000))
lines(x=seq(1,9,0.01),y=f(x,mu,sig,c)*N/50*9,lty = 1, lwd = 3, col='gray')
abline(v=1, col='orange',lwd=3,lty=2);     text(mu+0.2,5000,"mu",lwd=5,col = 'orange')
abline(v=c, col='red',lwd=3,lty=2);        text(c+0.2,5000,"c",lwd=5,col = 'red')
axis(1, las=1,at = seq(mu,10,2))
axis(2, las=1, at = seq(0,10000,1000))
box()
title(xlab= "Simulated x", col.lab=rgb(0,0,0))
title(ylab= "Frequency", col.lab=rgb(0,0,0))
title(main = "Histogram of Simulation by Accepte-rejection")
legend(8,5000, c("c","mu","Scaled pdf"), cex=0.8, 
       col=c("red","orange","gray"), pch=c(22,22,22), lty=c(2,2,1),lwd=c(2,2,2))

2. (c) Provide numerical justification of the acceptance rate

N = 100000
test <- accept_reject(mu,c,sig,a,N)
accept_sim  = test$accept_sim
accept_theo = test$accept_theo
diff = accept_theo- accept_sim
# cat("Theoretical Acceptance rate is 1/M : ", accept_theo )
# cat("Estimated Acceptance rate by ", N, "times of simulation: ", accept_sim)

The theoretical acceptance rate is 1/M : 0.6307843, simulated acceptance rate is 0.6303739 with 10^{5} times of simulation. Difference between theoretical and simulated rate is 4.103898910^{-4}.

3. Compare computational time and density estimating accuracy.

3.(a) Computational Time

# Parameters and Functions
mu = 1
c  = 3             # c for both truncate normal and exponential
var= 5 
sig = sqrt(var)
a  = 0.5           # alpha to maximize the acceptance rate
N  = 100000
f <- function(x,mu,sig,c){ifelse(x>=c,1,0)*exp(-(x-mu)^2/(2*sig^2))/(sqrt(2*pi)*sig*(1-pnorm(c,mu,sig)))}
g <- function(x,a,c)     {ifelse(x>=c,1,0)*a*exp(-a*(x-c))}


# Inversion Transform
inversion <- function(mu,c,sig,N){
  u = runif(N)
  x = qnorm( (u+(1-u)*pnorm((c-mu)/sig)))*sig+mu
}

t1<-system.time(test1 <- inversion(mu,c,sig,N))


# Accept-Reject 
exp_sim <- function(c,a){
  u1 = runif(1)
  y = -log(1-u1)/a+c
  return(y)
}

accept_reject<- function(mu,c,sig,a,N){
  M = exp(-a*c+a^2*sig^2/2+mu*a)/ (sqrt(2*pi)*sig*(1-pnorm(c,mu,sig))*a)
  result = rep(0,N)
  run = 0
  n = 1
  while (n <= N){
    y = exp_sim(c,a)
    u2 = runif(1)
    if (u2 < (  f(y,mu,sig,c)/( M*g(y,a,c)) )  ){
      result[n] = y
      n = n + 1
    }
    run = run + 1
  }
  return(list(result=result, acceptance = N / run))
}

t2<-system.time(test2 <- accept_reject(mu,c,sig,a,N))

Let # of simulation be 10^{5}. Inverse Transform takes 0.02 seconds, while Acceptance-Rejection Algorithm takes 2.03 seconds.Inverse transform, therefore, is much faster.

3.(b) Density Estimation Accuracy

From the CDF graph, we can see that the density estimation performance are very similar between inverse transform and acceptance-rejection algorithm

mu = 1
c  = 3             # c for both truncate normal and exponential
var= 5 
sig = sqrt(var)
cdf     <- function(x) {ifelse(x>=c,1,0)*(pnorm(x,mu,sig)-pnorm(c,mu,sig))/(1-pnorm(c,mu,sig))}
cdf_inv <- ecdf(test1)
cdf_ar  <- ecdf(test2$result)
x <- seq(1,10,0.01)
plot(x,cdf(x), lwd = 3, lty=1, col = "red", type = "l",main = "Comparison of the true distribution and simulated empirical distributions")
lines(x,cdf_inv(x),lwd = 2, lty=3, col = "orange")
lines(x,cdf_ar(x),lwd = 2, lty=2, col = "gray")
legend(6,0.2, c("True CDF","Inverse Transform CDF","Acceptance-Rejection CDF"), cex=0.8, 
       col=c("red","orange","gray"), pch=c(22,22,22), lty=c(2,2,1),lwd=c(2,2,2))