mcmc2.r

Thomas — Mar 4, 2013, 5:56 AM

# Bayesian statistics, MCMC-assignments, Chapter 6
# Note that I still used the lambda.r-library for functional programming 
# (and sacrificed performance for practice, "clarity" and safety)
suppressPackageStartupMessages({
  require("lambda.r")
  require("geoR")
})

set.seed(10000)
setwd("F:/SkyDrive/Statistics/Bayesian")

data <- read.table("osteoporosis study.txt", head=TRUE)
data_centered <- data
data_centered$BMI = data_centered$BMI - mean(data_centered$BMI)
n <- length(data$AGE)

# Run a tail-recursive function n times. Bug/limitation in lambda.r in next line:
# iterate(func, init, n, retain_as_df=TRUE) %::% Function, t, numeric, bool, t 
# retain=TRUE -> here we retain all the states in the chain
iterate(func, init, n, TRUE, burn_in, ...) %as% {
  l <- data.frame(init)
  x <- init
  for(i in 1:n) {
    x <- func(x, ...)
    l <- rbind(l, x)
  }
  l[(burn_in+1):(n+1),]
}
# retain=FALSE -> only return the final state in the chain (not used in final solution)
# Cajo: only the final state?? Then we have no posterior sample to summarize, eg calculate the 25% point of the posterior of b1.
# |
# \->> I used this function for debugging: to see if the chain gets close to a given value. (it's faster to not store everything)
#      Calling it with TRUE instead of FALSE returns all elements of the chain, which is more useful.
#       (That function is right above this one)
iterate(func, init, n, FALSE, ...) %as% {
  x <- init
  for(i in 1:n) 
    x <- func(x, ...)
  x
}
seal(iterate)


# Define a state in our MCMC-chain (optional log-likelihood)
State(b0=0.1, b1=0.1, s2=0.1, ll=-Inf) %when% { 
  s2 >= 0 
} %as% {
  state <- list(b0=b0, b1=b1, s2=s2, ll=ll)
}
print.State <- function(s) {
  if(s$ll==-Inf)
    cat(sprintf("(b0=%f, b1=%f, s2=%f)\n", s$b0, s$b1, s$s2))
  else
    cat(sprintf("(b0=%f, b1=%f, s2=%f), ll: %f\n", s$b0, s$b1, s$s2, s$ll))
}
seal(State)


# Define a likelihood-function on states:
# note: write partial function support for lambda.r
loglikelihood(x, data) %::% State : data.frame : numeric
loglikelihood(x, data) %as% {
  sum(
    dnorm((data$TBBMC - x$b0 - x$b1*data$BMI)^2, 0, x$s2, log=TRUE)
  )
}
seal(loglikelihood)


# Now define our gibbs step function: (gibbs(State_i) -> State_i+1)
gibbs(x, data) %::% State : data.frame : State
gibbs(x, data) %as% {
  s_s2 <- mean((data$TBBMC - x$b0 - x$b1*data$BMI)^2)
  r_b1 <- mean( data$TBBMC - x$b1*data$BMI)
  r_b0 <- sum ((data$TBBMC - x$b0)*data$BMI/sum(data$BMI^2))

  new_state <- State(
    b0 = rnorm(1, r_b1, sqrt(x$s2/n)),
    b1 = rnorm(1, r_b0, sqrt(x$s2/sum(data$BMI^2))),
    s2 = rinvchisq(1, n, s_s2)
  )
  # Turn it to a mathematically correct gibbs sampler in a very 
  # "efficient" manner (new in this version!)
  # Because of this code, we update only b0 or b1 per transition.
  if(runif(1)<0.5)
    new_state$b0 <- x$b0
  else
    new_state$b1 <- x$b1

  new_state$ll <- loglikelihood(new_state, data)
  new_state
}
seal(gibbs)


# Define our Random-walk metropolis step function: (metropolis(State_i) -> State_i+1)
metropolis(current, data, c) %::% State : data.frame : numeric : State
metropolis(current, data, c=0.033) %when% {
  c >= 0
} %as% {
  # Propose a new state, calculate log likelihood  
  proposed <- State(
    s2 = exp(rnorm(1, log(current$s2), c^2)),
    b0 = rnorm(1, current$b0, c^2),
    b1 = rnorm(1, current$b1, c^2)
  )
  proposed$ll <- loglikelihood(proposed, data)

  # Calculate move probability, and use it
  ratio <- min(exp(proposed$ll-current$ll), 1, na.rm=TRUE)
  if(runif(1)<ratio)
    return(proposed)
  return(current)
}
seal(metropolis)

# ===========
# Exercise 5:
# ===========
extreme <- State(b0=100.0, b1=100.0, s2=0.05)
extreme

(b0=100.000000, b1=100.000000, s2=0.050000)


# Gibbs sampler:
g_e <- iterate(gibbs, extreme, 1000, TRUE, burn_in=0, data=data)
plot(g_e$b0, g_e$b1, type="l")

plot of chunk unnamed-chunk-1

plot(g_e$b0, type="l")

plot of chunk unnamed-chunk-1

plot(g_e$b1, type="l")

plot of chunk unnamed-chunk-1


# Note that these work on the uncentered data. I did the same on the centered
# data (replace data=data by data=data_centered) and the chain got close to the
# right position within several steps. Since the question wanted me to observe
# the initial monotone behavior, I left it with the uncentered data.

# Metropolis sampler
m_e <- iterate(metropolis, extreme, 1000, TRUE, burn_in=0, data=data, c=0.5)
plot(m_e$b0, m_e$b1, type="l")

plot of chunk unnamed-chunk-1

plot(m_e$b0, type="l")

plot of chunk unnamed-chunk-1

plot(m_e$b1, type="l")

plot of chunk unnamed-chunk-1



# ===========
# Exercise 6:
# ===========
# First we generate a large MCMC (slow, thanks to the efficient implementation)
# Then we convert it to a coda-object in R.
# (if we did this in WinBUGS, we'd read the .ind and .out files using read.coda())
library(coda)

Loading required package: lattice

g_e <- iterate(gibbs, State(b0=100.0, b1=100.0, s2=0.05), 10000, TRUE, burn_in=0, data=data_centered)
burnt_in <- g_e[1000:10000,]
burnt_in$sd <- sqrt(burnt_in$s2) # we are told to study the std.dev.
burnt_in <- burnt_in[c("b0", "b1", "sd")] #select interesting parameters

# Now we can do some analytics. Convert to CODA-object
analyt <- mcmc(burnt_in) # codamenu() is interesting
plot(analyt)

plot of chunk unnamed-chunk-1

summary(analyt)


Iterations = 1:9001
Thinning interval = 1 
Number of chains = 1 
Sample size per chain = 9001 

1. Empirical mean and standard deviation for each variable,
   plus standard error of the mean:

     Mean      SD Naive SE Time-series SE
b0 1.8775 0.01905 2.01e-04       3.51e-04
b1 0.0403 0.00429 4.52e-05       7.94e-05
sd 0.2878 0.01342 1.41e-04       1.41e-04

2. Quantiles for each variable:

     2.5%    25%    50%    75%  97.5%
b0 1.8409 1.8645 1.8775 1.8905 1.9148
b1 0.0319 0.0374 0.0404 0.0432 0.0488
sd 0.2627 0.2786 0.2874 0.2965 0.3156

# We see (Naive SE) that the estimates are indeed less than 1% from the estimated means.

# Some experimenting with convergence testing:
geweke.plot(analyt)

plot of chunk unnamed-chunk-1

# seems like it's decent, based on the Z-scores