Data 604 Week 9 Discussion

Rizzo, Maria L. 2008. Statistical Computing with R. Chapter 5: Problem 9, 13, or 14.

5.9) Rayleigh density:

\(f(x) = \frac{x}{\sigma^2}e^{-x^2 / (2 \sigma^2)}\), \(x \geq 0, \sigma > 0\)

\(F(x) = 1 - e^{-x^2 / (2 \sigma^2)}\)

\(F(F^{-1}(u)) = u\)

\(1 - e^{-F^{-1}(u)^2 / (2 \sigma^2)} = u\)

\(e^{-F^{-1}(u)^2 / (2 \sigma^2)} = 1 - u\)

\(-F^{-1}(u)^2 / (2 \sigma^2) = ln(1 - u)\)

\(F^{-1}(u)^2 = -2 \sigma^2 ln(1 - u)\)

\(F^{-1}(u) = \sigma \sqrt{-2 ln(1 - u)}\)

\(F^{-1}(u) = \sigma \sqrt{-2 ln(u)}\)

library(ggplot2)
library(gridExtra)

inv_cdf <- function(x, sigma) sigma * sqrt(-2 * log(x))

rand_rayleigh <- function(n, sigma) {
  u <- runif(n * 2)
  x <- inv_cdf(u, sigma)
  
  u <- runif(n)
  v <- 1 - u
  y <- inv_cdf(c(u, v), sigma)
  var_y <- (1 / 4) *
    (var(inv_cdf(u, sigma)) + var(inv_cdf(v, sigma)) + 
           2 * cov(inv_cdf(u, sigma), inv_cdf(v, sigma)))
 
  return(list(x_mc = x, 
              var_mc = var(x),
              x_anti = y,
              var_anti = var_y))
}

r <- rand_rayleigh(1000, 1)

pdf <- function(x, sigma) {
  (x / sigma^2) * exp(-x^2 / (2 * sigma^2))
}

x <- data.frame(x_mc = r$x_mc, x_anti = r$x_anti)

h1 <- ggplot(x) +
  geom_histogram(aes(x_mc, ..density..), bins = 50) +
  coord_cartesian(ylim = c(0, 0.75)) +
  stat_function(fun = pdf, args = list(sigma = 1), color = "red")

h2 <- ggplot(x) +
  geom_histogram(aes(x_anti, ..density..), bins = 50) +
  coord_cartesian(ylim = c(0, 0.75)) +
  stat_function(fun = pdf, args = list(sigma = 1), color = "red")

grid.arrange(h1, h2, nrow = 1)

knitr::kable(
  data.frame(variance_simple_mc = r$var_mc,
             variance_antithetical = r$var_anti,
             variance_reduction = (r$var_mc - r$var_anti)/ r$var_mc))

variance_simple_mc	variance_antithetical	variance_reduction
0.421436	0.0099342	0.9764279

5.14) Estimate \(\int_{1}^{\infty} \frac{x^2}{\sqrt{2 \pi}} e^{-x^2 / 2} dx\) using importance sampling

g <- function(x) {
  (x^2 / sqrt(2 * pi)) * exp(-x^2 / 2) * (x > 1)
}

int_g <- function(upper) {
  sapply(upper, function(x) integrate(g, 1, x)[['value']])
}

vals <- data.frame(x = seq(1, 10, 0.01))
vals$y <- g(vals$x)
vals$x[which.max(vals$y)]

## [1] 1.41

ggplot(data.frame(x = seq(1.01, 6.99, 0.01)), aes(x)) +
  stat_function(fun = g, aes(color = "red")) +
  stat_function(fun = int_g, aes(color = "blue")) +
  geom_vline(xintercept = vals$x[which.max(vals$y)], linetype = "dashed") +
  coord_cartesian(xlim = c(1, 7)) +
  scale_color_manual(name = "", 
                     values = c("red", "blue"),
                     breaks=c("red", "blue"),
                     labels = c("g(x)", expression(integral(g(x), 1, x))))

Importance functions:

\(f_0(x) = 1/9, 1 < x < 10\)

\(f_1(x) = e^x, 0 < x < \infty\)

\(f_2(x) = \frac{1}{\sqrt{2 \pi}} e^{-(x - 1.41)^2 / 2}, -\infty < x < \infty\)

estimate <- data.frame(importance_function = character(3),
                       theta_hat = numeric(3),
                       variance = numeric(3),
                       stringsAsFactors = FALSE)

m <- 10000

#f0; use inverse transform method to get x_i's
f0 <- function(x) 1 / 9
u <- runif(m)
x <- 9 * u
fg <- g(x) / f0(x)
estimate$importance_function[1] <- "f0"
estimate$theta_hat[1] <- mean(fg)
estimate$variance[1] <- var(fg)

#f1
f1 <- function(x) exp(-x)
x <- rexp(m, 1)
fg <- g(x) / f1(x)
estimate$importance_function[2] <- "f1"
estimate$theta_hat[2] <- mean(fg)
estimate$variance[2] <- var(fg)

#f2; use rejection sampling to get x_i's
f2 <- function(x) (1 / sqrt(2 * pi)) * exp(-(x - 1.41)^2 / 2)
u <- runif(m * 25, -10, 10)
y <- runif(m * 25, 0, 1)
p <- sapply(u, f2)

x <- rep(NA_real_, m)
j <- 1
for (i in 1:length(u)) {
  if(y[i] < p[i]) {
    x[j] <- u[i]
    j <- j + 1
  }
  if (j == m + 1) {
    break
  }
}

length(x[is.na(x)])

## [1] 0

x <- x[!is.na(x)]
fg <- g(x) / f2(x)
estimate$importance_function[3] <- "f2"
estimate$theta_hat[3] <- mean(fg)
estimate$variance[3] <- var(fg)

ggplot(data.frame(x = seq(1.01, 9.99, 0.01)), aes(x)) +
  coord_cartesian(xlim = c(1, 10)) +
  stat_function(fun = g, aes(linetype = "g", color = "g")) +
  stat_function(fun = f0, aes(linetype = "f0", color = "f0")) +
  stat_function(fun = f1, aes(linetype = "f1", color = "f1")) +
  stat_function(fun = f2, aes(linetype = "f2", color = "f2")) +
  scale_linetype_manual(name = "",
                        values = c("g" = "solid",
                                   "f0" = "dashed",
                                   "f1" = "dotted",
                                   "f2" = "dotdash"),
                        breaks = c("g", "f0", "f1", "f2")) +
  scale_color_manual(name = "",
                     values = c("g" = "red",
                                "f0" = "black",
                                "f1" = "black",
                                "f2" = "black"),
                     breaks = c("g", "f0", "f1", "f2")) +
  ylab("f(x)")

ggplot(data.frame(x = seq(1.01, 9.99, 0.01)), aes(x)) +
  coord_cartesian(xlim = c(1, 10)) +
  stat_function(fun = function(x) g(x) / f0(x), aes(linetype = "f0", color = "f0")) +
  stat_function(fun = function(x) g(x) / f1(x), aes(linetype = "f1", color = "f1")) +
  stat_function(fun = function(x) g(x) / f2(x), aes(linetype = "f2", color = "f2")) +
  scale_linetype_manual(name = "",
                        values = c("f0" = "dashed",
                                   "f1" = "dotted",
                                   "f2" = "dotdash"),
                        breaks = c("f0", "f1", "f2")) +
  scale_color_manual(name = "",
                     values = c("f0" = "black",
                                "f1" = "black",
                                "f2" = "black"),
                     breaks = c("f0", "f1", "f2")) +
  ylab("g(x) / f(x)")

integrate(g, 1, 100)

## 0.400626 with absolute error < 1e-07

knitr::kable(estimate)

importance_function	theta_hat	variance
f0	0.4008077	0.6448257
f1	0.4000574	0.3426323
f2	0.4045123	0.0963919

Data 604 Week 9 Discussion

Judd Anderman

October 25, 2017

Rizzo, Maria L. 2008. Statistical Computing with R. Chapter 5: Problem 9, 13, or 14.