weibull_test.R

rm(list=ls())

seed <- 382992
set.seed(seed)

library(stats)
library(MCMCpack)

## Loading required package: coda

## Loading required package: MASS

## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)

## ## Copyright (C) 2003-2017 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park

## ##
## ## Support provided by the U.S. National Science Foundation

## ## (Grants SES-0350646 and SES-0350613)
## ##

library(coda)

# MCMC Settings -----------------------------------------------------------
burn.in <- 5000
n.samples <- 10000
verbose <- 5000 # How often to report progress

# True parameters ---------------------------------------------------------
kappa.tr <- 0.5 # Shape
lambda.tr <- 1000 # Scale
b.tr <- 1/lambda.tr^kappa.tr # Alternative parametrization
eta.tr <- log(b.tr) # Model b on the log scale

# Priors ------------------------------------------------------------------
Q.val <- 1-0.999
cutoff.tr <- qweibull(1-Q.val, shape=kappa.tr, scale=lambda.tr) # Bigger than 99.9% of the survival times in the simulation truth
best.guess <- 10^ceiling(log10(cutoff.tr)) # Some estimate for the true cutoff value for the observations
bounds <- c(0.1*best.guess, 10*best.guess) # Say we can narrow it down to two orders of magnitude
T.1 <- bounds[1]
T.2 <- bounds[2]

hyper <- list() # Hyperparameters
hyper$alpha.kappa <- 0.1
hyper$beta.kappa <- 0.1

# Data --------------------------------------------------------------------
y.uncensored <- rweibull(30, shape=kappa.tr, scale=lambda.tr)
censor.threshold <- quantile(y.uncensored, prob=0.1)
v.vec <- y.uncensored <= censor.threshold # 1 if uncensored, 0 if censored
y.censored <- y.uncensored
y.censored[v.vec == 0] <- censor.threshold

# Likelihood --------------------------------------------------------------
fn.log.lik <- function(params, y.censored, v.vec) {
    eta <- params[1]
    kappa <- params[2]
    if(kappa < 0) return(-Inf)

    sum(v.vec*(eta + log(kappa) + (kappa-1)*log(y.censored)) - exp(eta)*y.censored^kappa)
}

# Posterior ---------------------------------------------------------------
fn.log.post <- function(params, y.censored, v.vec) { # Posterior dist'n, up to proportionality constant
    eta <- params[1]
    kappa <- params[2]
    if(kappa < 0) return(-Inf)

    # Setup values for eta's prior
    b.l <- -log(Q.val)/T.2^kappa
    b.u <- -log(Q.val)/T.1^kappa
    sigma.eta <- (log(b.u) - log(b.l))/6 # The 6 is from 3 SD's from the mean, so most likely values of b are covered
    mu.eta <- (log(b.u) + log(b.l))/2 # Just use the midpoint as the mean

    kappa.prior <- dgamma(kappa, hyper$alpha.kappa, hyper$beta.kappa, log=TRUE)
    eta.prior <- dnorm(eta, mu.eta, sigma.eta, log=TRUE)

    kappa.prior + eta.prior + fn.log.lik(params, y.censored, v.vec)
}

# MLEs --------------------------------------------------------------------
optim.result <- optim(par=c(.1, .1),
                      fn.log.lik,
                      gr=NULL,
                      y.censored=y.censored,
                      v.vec=v.vec,
                      control=list(fnscale=-1))
MLEs <- optim.result$par # Vector of eta.mle and kappa.mle
names(MLEs) <- c("eta", "kappa")

mle.b <- exp(MLEs["eta"])
mle.kappa <- MLEs["kappa"]
mle.lambda <- (1/mle.b)^(1/mle.kappa)
names(mle.b) <- c("b")
names(mle.kappa) <- c("kappa")
names(mle.lambda) <- c("lambda")

# MCMC --------------------------------------------------------------------
# Just use MCMC pack sampler because very lazy
mcmc.obj <- MCMCmetrop1R(fn.log.post,
                         theta.init=MLEs,
                         y.censored=y.censored,
                         v.vec=v.vec,
                         burnin=burn.in,
                         mcmc=n.samples,
                         verbose=verbose,
                         seed=seed)

## MCMCmetrop1R iteration 1 of 15000 
## function value =  -69.83547
## theta = 
##   -4.52054
##    1.73576
## Metropolis acceptance rate = 1.00000
## 
## MCMCmetrop1R iteration 5001 of 15000 
## function value =  -17.83255
## theta = 
##   -2.56815
##    0.37095
## Metropolis acceptance rate = 0.56869
## 
## MCMCmetrop1R iteration 10001 of 15000 
## function value =  -18.13473
## theta = 
##   -2.79442
##    0.37967
## Metropolis acceptance rate = 0.56564
## 
## 
## 
## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
## The Metropolis acceptance rate was 0.56873
## @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

colnames(mcmc.obj) <- c("eta", "kappa")


# Analysis ----------------------------------------------------------------
b.samps <- exp(mcmc.obj[,"eta"])
kappa.samps <- mcmc.obj[,"kappa"]
lambda.samps <- (1/b.samps)^(1/kappa.samps)
n.cdfs <- 1000
post.preds <- matrix(rweibull(n.samples*n.cdfs, shape=kappa.samps, scale=lambda.samps),
                     nrow=n.samples, ncol=n.cdfs)
ecdfs <- apply(post.preds, 1, ecdf)

### Posterior predictive distribution
y.grid <- seq(qweibull(0.001, shape=kappa.tr, scale=lambda.tr),
              qweibull(0.999, shape=kappa.tr, scale=lambda.tr),
              length.out=500)
cdf.samps <- sapply(ecdfs, FUN=function(f) f(y.grid))
cdf.pointwise <- apply(cdf.samps, 1, quantile, prob=c(0.025, 0.5, 0.975))
cdf.lb <- cdf.pointwise[1,]
cdf.median <- cdf.pointwise[2,]
cdf.ub <- cdf.pointwise[3,]
plot(y.grid,
     pweibull(y.grid, shape=kappa.tr, scale=lambda.tr),
     col="green",
     type='l',
     xlab="y",
     ylab="F(y)",
     main="CDF")
lines(y.grid,
     pweibull(y.grid, shape=mle.kappa, scale=mle.lambda),
     col="red",
     type='l',
     xlab="y",
     ylab="F(y)")
lines(y.grid, cdf.median, col="blue")
lines(y.grid, cdf.lb, col="blue", lty=2)
lines(y.grid, cdf.ub, col="blue", lty=2)
legend("bottomright",
       legend=c("Truth", "Bayes", "MLE"),
       lty=1,
       col=c("green", "blue", "red"),
       cex=0.6)

### Threshold
y.grid2 <- seq(qweibull(0.001, shape=kappa.tr, scale=lambda.tr),
              T.2,
              length.out=1000)
plot(y.grid2,
     pweibull(y.grid2, shape=kappa.tr, scale=lambda.tr),
     col="green",
     type='l',
     xlab="y",
     ylab="F(y)",
     main="CDF")
abline(v=cutoff.tr)
abline(v=T.1, col="blue", lty=2)
abline(v=T.2, col="blue", lty=2)
legend("bottomright",
       legend=c("True cutoff", "Lower/Upper cutoff estimates"),
       col=c("black", "blue"),
       lty=c(1, 2),
       cex=0.6)

### Priors
## eta prior
b.l.prior <- -log(Q.val)/T.2^kappa.tr
b.u.prior <- -log(Q.val)/T.1^kappa.tr
sigma.eta.prior <- (log(b.u.prior) - log(b.l.prior))/6 # The 6 is from 3 SD's from the mean, so most likely values of b are covered
mu.eta.prior <- (log(b.u.prior) + log(b.l.prior))/2 # Just use the midpoint as the mean
curve(dnorm(x, mu.eta.prior, sigma.eta.prior),
      xlim=c(-6, -2),
      xlab=expression(eta),
      ylab=bquote("f(" ~ eta ~ ")"),
      sub="(True val in green)")
abline(v=eta.tr, col="green")

## kappa prior
curve(dgamma(x, hyper$alpha.kappa, hyper$beta.kappa),
      xlab=expression(kappa),
      ylab=bquote("f(" ~ kappa ~ ")"),
      main=bquote(kappa ~ " prior"),
      sub="(True val in green)")
abline(v=kappa.tr, col="green")

### kappa
hist(kappa.samps,
     xlab=expression(kappa),
     freq=FALSE,
     main=bquote("Posterior distribution of " ~ kappa),
     xlim=range(c(kappa.samps, MLEs["kappa"])))
abline(v=kappa.tr, col="green")
abline(v=mean(kappa.samps), col="blue")
abline(v=MLEs["kappa"], col="red")
legend("topright", legend=c("Truth", "Post mean", "MLE"), lty=1, col=c("green", "blue", "red"), cex=0.6)

### b
hist(b.samps, xlab="b", freq=FALSE, main="Posterior distribution of b")
abline(v=b.tr, col="green")
abline(v=mean(b.samps), col="blue")
abline(v=exp(MLEs["eta"]), col="red")
legend("topright", legend=c("Truth", "Posterior mean", "MLE"), lty=1, col=c("green", "blue", "red"), cex=0.6)

### Data
hist(y.uncensored)

hist(y.censored)

### MCMC Diagnostics
plot(mcmc.obj)

weibull_test.R

ks

Wed Jun 28 06:54:01 2017