Data 604: Week 8 Discussion

Variance Reduction Using Importance Sampling

Importance sampling is an approach to variance reduction that can improve efficiency relative to the simple Monte Carlo method and can be applied to estimates over an unbounded interval. The basic idea underlying importance sampling is that certain values of the input random variables in a Monte Carlo simulation will contribute more to the estimate of our parameter of interest than others, and as such, if we sample from a distribution that overweights these important values, we can reduce variance in our estimator. Since we are sampling from a non-uniform distribution with a given density or importance function, an unweighted sample mean would be a biased estimator; we can produce an unbiased estimator by taking a weighted average. We can reduce the variance of our estimator by choosing an importance sampling function that is easy to simulate, has finite variance, and whose density more closely resembles that of the function for which we are estimating some parameter of interest. In other words, we try to choose \(\phi(x)\) such that \(\phi(x) \simeq |g(x)|f(x)\) on the set A where \(f(x)\) is supported.

Example 5.10

\(g(x) = \int_{0}^{1} \frac{e^{-x}}{1 + x^2} dx\)

Estimation by simple Monte Carlo integration

m <- 10000
x <- runif(m)
g <- function(x) {exp(-x) / (1 + x^2)}
(mci_est <- mean(g(x)))

## [1] 0.5293148

(mci_var <- var(g(x)))

## [1] 0.06051204

(exact <- integrate(g, 0, 1))

## 0.5247971 with absolute error < 5.8e-15

Estimation and variance across importance functions

\(f_0(x) = 1, 0 < x < 1\)

\(f_1(x) = e^{-x}, 0 < x < \infty\)

\(f_2(x) = (1 + x^2)^{-1} / \pi, -\infty < x < \infty\)

\(f_3(x) = e^{-x} / (1 - e^{-1}), 0 < x < 1\)

\(f_4(x) = 4(1 + x^2)^{-1} / \pi, 0 < x < 1\)

estimate <- data.frame(importance_function = character(5), 
                       theta_hat = numeric(5), 
                       variance = numeric(5), 
                       stringsAsFactors = FALSE)

g <- function(x) {exp(-x) / (1 + x^2) * (x > 0) * (x < 1)}
f0 <- function(x) 1
f1 <- function(x) exp(-x)
f2 <- function(x) (1 / pi) / (1 + x^2)
f3 <- function(x) exp(-x) / (1 - exp(-1))
f4 <- function(x) 4 / ((1 + x^2) * pi)

#f0
x <- runif(m)
fg <- g(x) / f0(x)
estimate$importance_function[1] <- "f0"
estimate$theta_hat[1] <- mean(fg)
estimate$variance[1] <- var(fg)

#f1
x <- rexp(m, 1)
fg <- g(x) / f1(x)
estimate$importance_function[2] <- "f1"
estimate$theta_hat[2] <- mean(fg)
estimate$variance[2] <- var(fg)

#f2
x <- rcauchy(m)
i <- c(which(x > 1), which(x < 0))
x[i] <- 2
fg <- g(x) / f2(x)
estimate$importance_function[3] <- "f2"
estimate$theta_hat[3] <- mean(fg)
estimate$variance[3] <- var(fg)

#f3
u <- runif(m)
x <- - log(1 - u * (1 - exp(-1)))
fg <- g(x) / f3(x)
estimate$importance_function[4] <- "f3"
estimate$theta_hat[4] <- mean(fg)
estimate$variance[4] <- var(fg)

#f4
u <- runif(m)
x <- tan(pi * u / 4)
fg <- g(x) / f4(x)
estimate$importance_function[5] <- "f5"
estimate$theta_hat[5] <- mean(fg)
estimate$variance[5] <- var(fg)

knitr::kable(estimate)

importance_function	theta_hat	variance
f0	0.5223684	0.0598330
f1	0.5270698	0.1757460
f2	0.5207145	0.9008786
f3	0.5228689	0.0095242
f5	0.5223731	0.0197161

library(ggplot2)

ggplot(data.frame(x = seq(0.01, 0.99, 0.01)), aes(x)) +
  stat_function(fun = g, aes(linetype = "g", color = "g")) +
  stat_function(fun = f0, aes(linetype = "f0", color = "f0")) +
  stat_function(fun = f1, aes(linetype = "f1", color = "f1")) +
  stat_function(fun = f2, aes(linetype = "f2", color = "f2")) +
  stat_function(fun = f3, aes(linetype = "f3", color = "f3")) +
  stat_function(fun = f4, aes(linetype = "f4", color = "f4")) +
  scale_linetype_manual(name = "", 
                        values = c("g" = "solid",
                                   "f0" = "dashed",
                                   "f1" = "1F",
                                   "f2" = "dotted",
                                   "f3" = "dotdash",
                                   "f4" = "longdash"),
                        breaks = c("g", "f0", "f1", "f2", "f3", "f4")) +
  scale_color_manual(name = "", 
                     values = c("g" = "red",
                                "f0" = "black",
                                "f1" = "black",
                                "f2" = "black",
                                "f3" = "black",
                                "f4" = "black"),
                     breaks = c("g", "f0", "f1", "f2", "f3", "f4")) +
  ylab("f(x)")

ggplot(data.frame(x = seq(0.01, 0.99, 0.01)), aes(x)) +
  stat_function(fun = function(x) g(x) / f0(x), aes(linetype = "f0", color = "f0")) +
  stat_function(fun = function(x) g(x) / f1(x), aes(linetype = "f1", color = "f1")) +
  stat_function(fun = function(x) g(x) / f2(x), aes(linetype = "f2", color = "f2")) +
  stat_function(fun = function(x) g(x) / f3(x), aes(linetype = "f3", color = "f3")) +
  stat_function(fun = function(x) g(x) / f4(x), aes(linetype = "f4", color = "f4")) +
  scale_linetype_manual(name = "", 
                        values = c("f0" = "dashed",
                                   "f1" = "1F",
                                   "f2" = "dotted",
                                   "f3" = "dotdash",
                                   "f4" = "longdash"),
                        breaks = c("f0", "f1", "f2", "f3", "f4")) +
  scale_color_manual(name = "", 
                     values = c("f0" = "black",
                                "f1" = "black",
                                "f2" = "black",
                                "f3" = "black",
                                "f4" = "black"),
                     breaks = c("f0", "f1", "f2", "f3", "f4")) +
  ylab("g(x) / f(x)")

Example 5.3, the standard normal cdf

\(\Phi(x) = \int_{-\infty}^{x} \frac{1}{\sqrt{2 \pi}} e^{-t^2 / 2} dt\)

Substitute \(y = t/x\), \(dt = xdy\)

\(\theta = \int_{0}^{1} xe^{-(xy)^2 / 2} dy\)

\(\theta = E_Y[xe^{-(xY)^2/2}]\), for \(Y \sim U(0, 1)\)

x <- seq(0.1, 2.5, length = 10)
m <- 10000
u <- runif(m)
cdf <- numeric(length(x))

for (i in 1:length(x)) {
  g <- x[i] * exp(-(u * x[i])^2 / 2)
  cdf[i] <- mean(g) / sqrt(2 * pi) + 0.5
}

knitr::kable(cbind(x, cdf, Phi = pnorm(x)))

x	cdf	Phi
0.1000000	0.5398280	0.5398278
0.3666667	0.6430736	0.6430662
0.6333333	0.7367804	0.7367420
0.9000000	0.8160509	0.8159399
1.1666667	0.8785699	0.8783275
1.4333333	0.9245655	0.9241187
1.7000000	0.9561676	0.9554345
1.9666667	0.9764927	0.9753892
2.2333333	0.9887901	0.9872365
2.5000000	0.9958645	0.9937903

\(f_0(x) = 1, 0 < x < 1\)

\(f_1(x) = - 0.08 x + 0.4, 0 \leq x < 5\)