user2 — Sep 30, 2013, 5:21 AM
# 4.1, but according to the email, not according the book:
# (I think it would have been nicer looking to set mu to 3, not the sd.)
require(mvtnorm)
Loading required package: mvtnorm
# Set p1=P(distributions with sd=1):
p1 <- 0.8
# Create joint density function:
dmix <- function(x,y) {
p1*dmvnorm(cbind(x,y), mean=c(0,0), sigma=matrix(c(1,0,0,1),2,2)) +
(1-p1)*dmvnorm(cbind(x,y), mean=c(0,0), sigma=matrix(c(3,0,0,3),2,2))
}
# Generate contour plot for this density function
x <- seq(-6,6,0.05)
y <- seq(-6,6,0.05)
Z <- outer(x,y,dmix)
contour(x,y,Z, xlim=c(-6,6), ylim=c(-6,6), nlevels=6)
# the values are summed densities of 0.05*0.05 sized squares; use this for
# actual densities:
# contour(x,y,Z*0.05*0.05, xlim=c(-6,6), ylim=c(-6,6), nlevels=6)
# Generate data (according to Email, not Book) to scatter over the contour
n <- 100
sel <- rbinom(n, 1, p1)
X <- rnorm(n, 0, 1)*sel + rnorm(n, 0, 3)*(1-sel)
Y <- rnorm(n, 0, 1)*sel + rnorm(n, 0, 3)*(1-sel)
par(new=TRUE)
plot(X, Y, xlim=c(-6,6), ylim=c(-6,6))
# Now we create an estimated contour plot using sampled points:
k <- 21 # <-- number of bins for our algorithm
n <- 50000
sel <- rbinom(n, 1, p1)
X <- rnorm(n, 0, 1)*sel + rnorm(n, 0, 3)*(1-sel)
Y <- rnorm(n, 0, 1)*sel + rnorm(n, 0, 3)*(1-sel)
# Create k bins
bins <- seq(-7,7, length=k)
# Calculate bin per observation:
X.int <- findInterval(X, bins, all.inside=TRUE)
Y.int <- findInterval(Y, bins, all.inside=TRUE)
# Determine overlap (create table large enough first):
XY.table <- as.data.frame(table(X.int, Y.int))
Z <- diag(k)*0
Z[cbind(XY.table$X.int, XY.table$Y.int)] <- XY.table$Freq
# Draw our estimated density contour plot, while keeping in mind that bins
# contains min, not mean of bin, so we have to adjust x and y:
contour(x=bins+14/k/2, y=bins+14/k/2, z=Z, xlim=c(-6,6), ylim=c(-6,6),
nlevels=6, add=TRUE, col="red")
# Alternative using lattice:
require(lattice)
Loading required package: lattice
contourplot(Freq ~ bins[X.int] * bins[Y.int], data=XY.table)
# I'm not sure what a "second mode" is, but you can make specific levels visible
# in the contour()-function by setting levels=c(level1, level2, level3)
# ========================
# 4.4: filled contour plot
# Recall that we used:
x <- seq(-6,6,0.1)
y <- seq(-6,6,0.1)
Z <- outer(x,y,dmix)
#contour(x,y,Z, xlim=c(-6,6), ylim=c(-6,6), nlevels=4)
# So now we change this to:
filled.contour(x,y,Z, xlim=c(-6,6), ylim=c(-6,6), nlevels=6)
# note that the values in legend are densities of 0.1*0.1 sized squares
# ========================
# 4.5: surface plot
persp(x,y,Z)
# ========================
# 5.14: Obtain a monte carlo estimate by importance sampling
# We recognise a normal distribution pdf with mu=0, sigma=1 multiplied by x^2.
# We calculate the integration constant to make that a density on [1,infty):
# pnorm(infinity,0,1)-pnorm(1,0,1) =
c <- 1-pnorm(1,0,1)
c
[1] 0.1587
phi <- function(x) pnorm(x, 0, 1)/c
# So now we can estimate the integration by formula:
# (1/n) * sum_n(x^2 * f(X)/phi(X)) = mean(c*X^2) sampled over values from phi
# Generate a sample in an ugly but easy way:
phi.sample <- rnorm(500000, 0, 1)
phi.sample <- phi.sample[which(phi.sample>=1)]
length(phi.sample)
[1] 79520
estimate <- c*mean(phi.sample^2)
estimate
[1] 0.4016
# Real estimate from a smart computer program: 0.400626, so this estimate is
# close enough for a statistical estimation.
# We can estimate the sd of the estimate:
estimate.sd <-sd(phi.sample)/sqrt(length(phi.sample))
estimate.sd
[1] 0.00159