Importance sampling is an approach to variance reduction that can improve efficiency relative to the simple Monte Carlo method and can be applied to estimates over an unbounded interval. The basic idea underlying importance sampling is that certain values of the input random variables in a Monte Carlo simulation will contribute more to the estimate of our parameter of interest than others, and as such, if we sample from a distribution that overweights these important values, we can reduce variance in our estimator. Since we are sampling from a non-uniform distribution with a given density or importance function, an unweighted sample mean would be a biased estimator; we can produce an unbiased estimator by taking a weighted average. We can reduce the variance of our estimator by choosing an importance sampling function that is easy to simulate, has finite variance, and whose density more closely resembles that of the function for which we are estimating some parameter of interest. In other words, we try to choose \(\phi(x)\) such that \(\phi(x) \simeq |g(x)|f(x)\) on the set A where \(f(x)\) is supported.
\(g(x) = \int_{0}^{1} \frac{e^{-x}}{1 + x^2} dx\)
m <- 10000
x <- runif(m)
g <- function(x) {exp(-x) / (1 + x^2)}
(mci_est <- mean(g(x)))
## [1] 0.5293148
(mci_var <- var(g(x)))
## [1] 0.06051204
(exact <- integrate(g, 0, 1))
## 0.5247971 with absolute error < 5.8e-15
\(f_0(x) = 1, 0 < x < 1\)
\(f_1(x) = e^{-x}, 0 < x < \infty\)
\(f_2(x) = (1 + x^2)^{-1} / \pi, -\infty < x < \infty\)
\(f_3(x) = e^{-x} / (1 - e^{-1}), 0 < x < 1\)
\(f_4(x) = 4(1 + x^2)^{-1} / \pi, 0 < x < 1\)
estimate <- data.frame(importance_function = character(5),
theta_hat = numeric(5),
variance = numeric(5),
stringsAsFactors = FALSE)
g <- function(x) {exp(-x) / (1 + x^2) * (x > 0) * (x < 1)}
f0 <- function(x) 1
f1 <- function(x) exp(-x)
f2 <- function(x) (1 / pi) / (1 + x^2)
f3 <- function(x) exp(-x) / (1 - exp(-1))
f4 <- function(x) 4 / ((1 + x^2) * pi)
#f0
x <- runif(m)
fg <- g(x) / f0(x)
estimate$importance_function[1] <- "f0"
estimate$theta_hat[1] <- mean(fg)
estimate$variance[1] <- var(fg)
#f1
x <- rexp(m, 1)
fg <- g(x) / f1(x)
estimate$importance_function[2] <- "f1"
estimate$theta_hat[2] <- mean(fg)
estimate$variance[2] <- var(fg)
#f2
x <- rcauchy(m)
i <- c(which(x > 1), which(x < 0))
x[i] <- 2
fg <- g(x) / f2(x)
estimate$importance_function[3] <- "f2"
estimate$theta_hat[3] <- mean(fg)
estimate$variance[3] <- var(fg)
#f3
u <- runif(m)
x <- - log(1 - u * (1 - exp(-1)))
fg <- g(x) / f3(x)
estimate$importance_function[4] <- "f3"
estimate$theta_hat[4] <- mean(fg)
estimate$variance[4] <- var(fg)
#f4
u <- runif(m)
x <- tan(pi * u / 4)
fg <- g(x) / f4(x)
estimate$importance_function[5] <- "f5"
estimate$theta_hat[5] <- mean(fg)
estimate$variance[5] <- var(fg)
knitr::kable(estimate)
| importance_function | theta_hat | variance |
|---|---|---|
| f0 | 0.5223684 | 0.0598330 |
| f1 | 0.5270698 | 0.1757460 |
| f2 | 0.5207145 | 0.9008786 |
| f3 | 0.5228689 | 0.0095242 |
| f5 | 0.5223731 | 0.0197161 |
library(ggplot2)
ggplot(data.frame(x = seq(0.01, 0.99, 0.01)), aes(x)) +
stat_function(fun = g, aes(linetype = "g", color = "g")) +
stat_function(fun = f0, aes(linetype = "f0", color = "f0")) +
stat_function(fun = f1, aes(linetype = "f1", color = "f1")) +
stat_function(fun = f2, aes(linetype = "f2", color = "f2")) +
stat_function(fun = f3, aes(linetype = "f3", color = "f3")) +
stat_function(fun = f4, aes(linetype = "f4", color = "f4")) +
scale_linetype_manual(name = "",
values = c("g" = "solid",
"f0" = "dashed",
"f1" = "1F",
"f2" = "dotted",
"f3" = "dotdash",
"f4" = "longdash"),
breaks = c("g", "f0", "f1", "f2", "f3", "f4")) +
scale_color_manual(name = "",
values = c("g" = "red",
"f0" = "black",
"f1" = "black",
"f2" = "black",
"f3" = "black",
"f4" = "black"),
breaks = c("g", "f0", "f1", "f2", "f3", "f4")) +
ylab("f(x)")
ggplot(data.frame(x = seq(0.01, 0.99, 0.01)), aes(x)) +
stat_function(fun = function(x) g(x) / f0(x), aes(linetype = "f0", color = "f0")) +
stat_function(fun = function(x) g(x) / f1(x), aes(linetype = "f1", color = "f1")) +
stat_function(fun = function(x) g(x) / f2(x), aes(linetype = "f2", color = "f2")) +
stat_function(fun = function(x) g(x) / f3(x), aes(linetype = "f3", color = "f3")) +
stat_function(fun = function(x) g(x) / f4(x), aes(linetype = "f4", color = "f4")) +
scale_linetype_manual(name = "",
values = c("f0" = "dashed",
"f1" = "1F",
"f2" = "dotted",
"f3" = "dotdash",
"f4" = "longdash"),
breaks = c("f0", "f1", "f2", "f3", "f4")) +
scale_color_manual(name = "",
values = c("f0" = "black",
"f1" = "black",
"f2" = "black",
"f3" = "black",
"f4" = "black"),
breaks = c("f0", "f1", "f2", "f3", "f4")) +
ylab("g(x) / f(x)")
\(\Phi(x) = \int_{-\infty}^{x} \frac{1}{\sqrt{2 \pi}} e^{-t^2 / 2} dt\)
Substitute \(y = t/x\), \(dt = xdy\)
\(\theta = \int_{0}^{1} xe^{-(xy)^2 / 2} dy\)
\(\theta = E_Y[xe^{-(xY)^2/2}]\), for \(Y \sim U(0, 1)\)
x <- seq(0.1, 2.5, length = 10)
m <- 10000
u <- runif(m)
cdf <- numeric(length(x))
for (i in 1:length(x)) {
g <- x[i] * exp(-(u * x[i])^2 / 2)
cdf[i] <- mean(g) / sqrt(2 * pi) + 0.5
}
knitr::kable(cbind(x, cdf, Phi = pnorm(x)))
| x | cdf | Phi |
|---|---|---|
| 0.1000000 | 0.5398280 | 0.5398278 |
| 0.3666667 | 0.6430736 | 0.6430662 |
| 0.6333333 | 0.7367804 | 0.7367420 |
| 0.9000000 | 0.8160509 | 0.8159399 |
| 1.1666667 | 0.8785699 | 0.8783275 |
| 1.4333333 | 0.9245655 | 0.9241187 |
| 1.7000000 | 0.9561676 | 0.9554345 |
| 1.9666667 | 0.9764927 | 0.9753892 |
| 2.2333333 | 0.9887901 | 0.9872365 |
| 2.5000000 | 0.9958645 | 0.9937903 |
\(f_0(x) = 1, 0 < x < 1\)
\(f_1(x) = - 0.08 x + 0.4, 0 \leq x < 5\)
\(f_2(x) = (1 + x^2)^{-1} / \pi, -\infty < x < \infty\)
estimate <- data.frame(importance_function = character(3),
theta_hat = numeric(3),
variance = numeric(3),
stringsAsFactors = FALSE)
g <- function(x) {
(exp(-x^2 / 2) / sqrt(2 * pi)) * (x > 0) * (x < 1.2)
}
pnorm(1.2) - 0.5
## [1] 0.3849303
integrate(g, 0, 1.2)
## 0.3849303 with absolute error < 4.3e-15
#f0
f0 <- function(x) 1
x <- runif(m, 0, 1.2)
fg <- 1.2 * g(x) / f0(x)
estimate$importance_function[1] <- "f0"
estimate$theta_hat[1] <- mean(fg)
estimate$variance[1] <- var(fg)
#f1; use rejection sampling get x_i's
f1 <- function(x) (- 0.08 * x + 0.4)
u <- runif(m * 10, 0, 5)
y <- runif(m * 10, 0, 1)
p <- sapply(u, function(z)
if (z >= 0 && z < 5) {
- 0.08 * z + 0.4
} else {
0
}
)
x <- vector("numeric", m)
j <- 1
for (i in 1:length(u)) {
if(y[i] < p[i]) {
x[j] <- u[i]
j <- j + 1
}
if (j == m + 1) {
break
}
}
i <- c(which(x > 1.2), which(x < 0))
x[i] <- 2
fg <- g(x) / f1(x)
estimate$importance_function[2] <- "f1"
estimate$theta_hat[2] <- mean(fg)
estimate$variance[2] <- var(fg)
#f2
x <- rcauchy(m)
i <- c(which(x > 1.2), which(x < 0))
x[i] <- 2
fg <- g(x) / f2(x)
estimate$importance_function[3] <- "f2"
estimate$theta_hat[3] <- mean(fg)
estimate$variance[3] <- var(fg)
knitr::kable(estimate)
| importance_function | theta_hat | variance |
|---|---|---|
| f0 | 0.3856881 | 0.0058824 |
| f1 | 0.3922011 | 0.2087144 |
| f2 | 0.3828775 | 0.3843178 |
ggplot(data.frame(x = seq(0.01, 1.19, 0.01)), aes(x)) +
coord_cartesian(ylim = c(0, 1)) +
stat_function(fun = g, aes(linetype = "g", color = "g")) +
stat_function(fun = f0, aes(linetype = "f0", color = "f0")) +
stat_function(fun = f1, aes(linetype = "f1", color = "f1")) +
stat_function(fun = f2, aes(linetype = "f2", color = "f2")) +
scale_linetype_manual(name = "",
values = c("g" = "solid",
"f0" = "dashed",
"f1" = "dotted",
"f2" = "dotdash"),
breaks = c("g", "f0", "f1", "f2")) +
scale_color_manual(name = "",
values = c("g" = "red",
"f0" = "black",
"f1" = "black",
"f2" = "black"),
breaks = c("g", "f0", "f1", "f2")) +
ylab("f(x)")
ggplot(data.frame(x = seq(0.01, 1.19, 0.01)), aes(x)) +
# coord_cartesian(ylim = c(0, 1)) +
stat_function(fun = function(x) g(x) / f0(x), aes(linetype = "f0", color = "f0")) +
stat_function(fun = function(x) g(x) / f1(x), aes(linetype = "f1", color = "f1")) +
stat_function(fun = function(x) g(x) / f2(x), aes(linetype = "f2", color = "f2")) +
scale_linetype_manual(name = "",
values = c("f0" = "dashed",
"f1" = "dotted",
"f2" = "dotdash"),
breaks = c("f0", "f1", "f2")) +
scale_color_manual(name = "",
values = c("f0" = "black",
"f1" = "black",
"f2" = "black"),
breaks = c("f0", "f1", "f2")) +
ylab("g(x) / f(x)")
Rizzo, Maria L. 2008. Statistical Computing with R. Chapter 5: Monte Carlo Integration and Variance Reduction.