StatComp3.r

user2 — Sep 30, 2013, 5:21 AM

# 4.1, but according to the email, not according the book:
# (I think it would have been nicer looking to set mu to 3, not the sd.)
require(mvtnorm)

Loading required package: mvtnorm


# Set p1=P(distributions with sd=1):
p1 <- 0.8

# Create joint density function:
dmix <- function(x,y) {
  p1*dmvnorm(cbind(x,y), mean=c(0,0), sigma=matrix(c(1,0,0,1),2,2)) +
    (1-p1)*dmvnorm(cbind(x,y), mean=c(0,0), sigma=matrix(c(3,0,0,3),2,2))
}

# Generate contour plot for this density function
x <- seq(-6,6,0.05)
y <- seq(-6,6,0.05)
Z <- outer(x,y,dmix)
contour(x,y,Z, xlim=c(-6,6), ylim=c(-6,6), nlevels=6)
# the values are summed densities of 0.05*0.05 sized squares; use this for 
# actual densities:
# contour(x,y,Z*0.05*0.05, xlim=c(-6,6), ylim=c(-6,6), nlevels=6)


# Generate data (according to Email, not Book) to scatter over the contour
n <- 100
sel <- rbinom(n, 1, p1)
X <- rnorm(n, 0, 1)*sel + rnorm(n, 0, 3)*(1-sel)
Y <- rnorm(n, 0, 1)*sel + rnorm(n, 0, 3)*(1-sel)

par(new=TRUE)
plot(X, Y, xlim=c(-6,6), ylim=c(-6,6))


# Now we create an estimated contour plot using sampled points:
k <- 21  # <-- number of bins for our algorithm
n <- 50000
sel <- rbinom(n, 1, p1)
X <- rnorm(n, 0, 1)*sel + rnorm(n, 0, 3)*(1-sel)
Y <- rnorm(n, 0, 1)*sel + rnorm(n, 0, 3)*(1-sel)

# Create k bins
bins <- seq(-7,7, length=k)

# Calculate bin per observation:
X.int <- findInterval(X, bins, all.inside=TRUE)
Y.int <- findInterval(Y, bins, all.inside=TRUE)

# Determine overlap (create table large enough first):
XY.table <- as.data.frame(table(X.int, Y.int))

Z <- diag(k)*0
Z[cbind(XY.table$X.int, XY.table$Y.int)] <- XY.table$Freq

# Draw our estimated density contour plot, while keeping in mind that bins 
# contains min, not mean of bin, so we have to adjust x and y:
contour(x=bins+14/k/2, y=bins+14/k/2, z=Z, xlim=c(-6,6), ylim=c(-6,6), 
        nlevels=6, add=TRUE, col="red")

# Alternative using lattice:
require(lattice)

Loading required package: lattice

plot of chunk unnamed-chunk-1

contourplot(Freq ~ bins[X.int] * bins[Y.int], data=XY.table)

plot of chunk unnamed-chunk-1


# I'm not sure what a "second mode" is, but you can make specific levels visible
# in the contour()-function by setting levels=c(level1, level2, level3)

# ========================
# 4.4: filled contour plot
# Recall that we used:
x <- seq(-6,6,0.1)
y <- seq(-6,6,0.1)
Z <- outer(x,y,dmix)
#contour(x,y,Z, xlim=c(-6,6), ylim=c(-6,6), nlevels=4)

# So now we change this to:
filled.contour(x,y,Z, xlim=c(-6,6), ylim=c(-6,6), nlevels=6)

plot of chunk unnamed-chunk-1

# note that the values in legend are densities of 0.1*0.1 sized squares


# ========================
# 4.5: surface plot
persp(x,y,Z)

plot of chunk unnamed-chunk-1



# ========================
# 5.14: Obtain a monte carlo estimate by importance sampling
# We recognise a normal distribution pdf with mu=0, sigma=1 multiplied by x^2.
# We calculate the integration constant to make that a density on [1,infty):

# pnorm(infinity,0,1)-pnorm(1,0,1) = 
c <- 1-pnorm(1,0,1)
c

[1] 0.1587


phi <- function(x) pnorm(x, 0, 1)/c
# So now we can estimate the integration by formula:
# (1/n) * sum_n(x^2 * f(X)/phi(X)) = mean(c*X^2) sampled over values from phi

# Generate a sample in an ugly but easy way:
phi.sample <- rnorm(500000, 0, 1)
phi.sample <- phi.sample[which(phi.sample>=1)]
length(phi.sample)

[1] 79520


estimate <- c*mean(phi.sample^2)
estimate

[1] 0.4016


# Real estimate from a smart computer program: 0.400626, so this estimate is 
# close enough for a statistical estimation.

# We can estimate the sd of the estimate:

estimate.sd <-sd(phi.sample)/sqrt(length(phi.sample))
estimate.sd

[1] 0.00159