Theme Song
# install.packages("remotes")
library('BBmisc', 'rmsfuns')
#remotes::install_github("rstudio/sass")
lib('sass')
## sass
## TRUE
/* https://stackoverflow.com/a/66029010/3806250 */
h1 { color: #002C54; }
h2 { color: #2F496E; }
h3 { color: #375E97; }
h4 { color: #556DAC; }
h5 { color: #92AAC7; }
/* ----------------------------------------------------------------- */
/* https://gist.github.com/himynameisdave/c7a7ed14500d29e58149#file-broken-gradient-animation-less */
.hover01 {
/* color: #FFD64D; */
background: linear-gradient(155deg, #EDAE01 0%, #FFEB94 100%);
transition: all 0.45s;
&:hover{
background: linear-gradient(155deg, #EDAE01 20%, #FFEB94 80%);
}
}
.hover02 {
color: #FFD64D;
background: linear-gradient(155deg, #002C54 0%, #4CB5F5 100%);
transition: all 0.45s;
&:hover{
background: linear-gradient(155deg, #002C54 20%, #4CB5F5 80%);
}
}
.hover03 {
color: #FFD64D;
background: linear-gradient(155deg, #A10115 0%, #FF3C5C 100%);
transition: all 0.45s;
&:hover{
background: linear-gradient(155deg, #A10115 20%, #FF3C5C 80%);
}
}
## https://stackoverflow.com/a/36846793/3806250
options(width = 999)
knitr::opts_chunk$set(class.source = 'hover01', class.output = 'hover02', class.error = 'hover03')
if(!suppressPackageStartupMessages(require('BBmisc'))) {
install.packages('BBmisc', dependencies = TRUE, INSTALL_opts = '--no-lock')
}
suppressPackageStartupMessages(require('BBmisc'))
# suppressPackageStartupMessages(require('rmsfuns'))
pkgs <- c('devtools', 'knitr', 'kableExtra', 'tidyr',
'readr', 'lubridate', 'data.table', 'reprex',
'timetk', 'plyr', 'dplyr', 'stringr', 'magrittr',
'tdplyr', 'tidyverse', 'formattable',
'echarts4r', 'paletteer')
suppressAll(lib(pkgs))
# load_pkg(pkgs)
## Set the timezone but not change the datetime
Sys.setenv(TZ = 'Asia/Tokyo')
## options(knitr.table.format = 'html') will set all kableExtra tables to be 'html', otherwise need to set the parameter on every single table.
options(warn = -1, knitr.table.format = 'html')#, digits.secs = 6)
## https://stackoverflow.com/questions/39417003/long-vectors-not-supported-yet-abnor-in-rmd-but-not-in-r-script
knitr::opts_chunk$set(message = FALSE, warning = FALSE)#,
#cache = TRUE, cache.lazy = FALSE)
rm(pkgs)
他の受講生の課題をレビューする
課題の提出、お疲れさまでした!これで、他の受講生がレビューできます。成績を受け取るには、他の受講生の課題もいくつかレビューする必要があります。 成績は5月27日 15:59 JST
までに受け取れるでしょう。
Data on the lifetime (in years) of fuses produced by the ACME Corporation is available in the file fuses.csv
:
In order to characterize the distribution of the lifetimes, it seems reasonable to fit to the data a two-component mixture of the form:
\[ \begin{align} f(x) = w \lambda \exp\left\{ -\lambda x \right\} + (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x) - \mu \right)^2\right\} \\ \quad\quad\quad x>0. \end{align} \] The first component, which corresponds to an exponential distribution with rate \(\lambda\), is used to model low-quality components with a very short lifetime. The second component, which corresponds to a log-Gaussian distribution, is used to model normal, properly-functioning components.
You are asked to modify the implementation of the MCMC algorithm contained in the Reading “Sample code for MCMC example 1” so that you can fit this two-component mixture distributions instead. You then should run your algorithm for 10,000 iterations after a burn-in period of 1,000 iterations and report your estimates of the posterior means, rounded to two decimal places. Assume the following priors: $∼Uni[0,1], ∼Exp(1), ∼Normal(0,1) $ and \(\tau^2∼IGam(2,1)\).
The code you generate should follow the same structure as “Sample code for MCMC example 1”. In particular, focus on a Gibss sampler that alternates between the full conditionals for \(\omega\), \(\lambda\), \(\mu\), \(\tau^2\) and the latent component indicators \(c_1, \ldots,c_n\). Peer reviewers will be asked to check whether the different pieces of code have been adequately modified to reflect the fact that (1) parameters have been initialized in a reasonable way, (2) each of the two full conditional distributions associated with the sampler are correct, and (2) the numerical values that you obtain are correct. To simplify the peer-review process, assume that component 1 corresponds to the exponential distribution, while component 2 corresponds to the log-Gaussian distribution.
Provide an MCMC algorithm to fit the mixture model
\[ \begin{align} f(x) = w \lambda \exp\left\{ -\lambda x \right\} + (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x) - \mu \right)^2\right\} \\ \quad\quad\quad x>0. \end{align} \]
## Load package
#if(!suppressAll(require('rinvgamma'))) {
##install.packages('rinvgamma', dependencies = TRUE, INSTALL_opts = '--no-lock')
# devtools::install_github('dkahle/invgamma', force=TRUE)
#}
##function in 'rinvgamma' will be more accurate than 'MCMCpack'
suppressAll(require('rinvgamma'))
#lib('MCMCpack')
#dat <- fread('data/fuses.csv')
#fuses <- dat$V1
fuses <- read_csv('data/fuses.csv', col_names = FALSE)$X1
x <- fuses
logfuses = log(fuses)
## Initialize the parameters
KK <- 2 # number of components
n <- length(fuses) # number of samples
v <- array(0, dim=c(n,KK))
v[,1] <- 0.5 #Assign half weight to first component
v[,2] <- 1-v[,1] #Assign all of the remaining weights to the second component
mean1 <- sum(v[,1]*fuses)/sum(v[, 1]) #mean of the first component
lambda <- 1.0/mean1 #parameter for the first component
mu <- sum(v[,2]*logfuses)/sum(v[,2]) #parameter (mean) of the second component
sigmasquared <- sum(v[,2]*((logfuses-mu)**2))/sum(v[,2]) #parameter (variance) for the second component
sigma <- sqrt(sigmasquared)
w <- mean(v[,1])
#print(paste(lambda, mu, tausquared, tau, w))
#print(paste(lambda, mu, sigmasquared, sigma, w))
paste(lambda, mu, sigmasquared, sigma, w)
## [1] "0.459751008921357 0.570093003536586 0.72941677732671 0.854059001080552 0.5"
# Priors
aa <- rep(1,KK) # Uniform prior on
# conjugate prior for exponential:
alpha <- 2 #For Gamma distribution prior number of obs = alpha-1
beta <- 1 #Sum of observations = beta
# conjugate prior for the normal
eta <- mu # Mean for the prior on mu
tau <- 5 # Standard deviation 5 on the prior for mu_l
dd <- 2 # variance prior
qq <- 1
# Number of iterations of the sampler
rrr <- 6000
burn <- 1000
# Storing the samples
cc.out <- array(0, dim=c(rrr, n))
w.out <- rep(0, rrr)
lambda.out <- rep(0,rrr)
mu.out <- array(0, rrr)
sigma.out <- rep(0, rrr)
logpost <- rep(0, rrr)
# MCMC iterations
for(s in 1:rrr){
# Sample the indicators
cc <- rep(0,n)
for(i in 1:n){
v <- rep(0,KK)
v[1] <- log(w) + dexp(fuses[i], lambda, log=TRUE) #Compute the log of the weights
v[2] <- log(1-w) + dnorm(logfuses[i], mu, sigma, log=TRUE) #Compute the log of the weights
v <- exp(v - max(v))/sum(exp(v - max(v)))
cc[i] <- sample(1:KK, 1, replace=TRUE, prob=v)
}
# Sample the weights
w <- rbeta(1, aa[1] + sum(cc==1), aa[2] + sum(cc==2))
# Sample the lambda
nk <- sum(cc==1)
xsumk <- sum(fuses[cc==1])
alpha.hat <- alpha + nk
beta.hat <- beta + xsumk
lambda <- rgamma(1, shape = alpha.hat, rate = beta.hat)
# Sample mu, the mean for norm
nk <- sum(cc==2)
xsumk <- sum(logfuses[cc==2])
tau2.hat <- 1/(nk/sigma^2 + 1/tau^2)
mu.hat <- tau2.hat*(xsumk/sigma^2 + eta/tau^2)
mu <- rnorm(1, mu.hat, sqrt(tau2.hat))
# Sample the variances
dd.star <- dd + nk/2
qq.star <- qq
for(i in 1:n) {
if (cc[i]==2) {
qq.star <- qq.star + ((logfuses[i] - mu)^2)/2
}
}
sigma <- sqrt(invgamma::rinvgamma(1, dd.star, qq.star))
# Store samples
cc.out[s,] <- cc
w.out[s] <- w
lambda.out[s] <- lambda
mu.out[s] <- mu
sigma.out[s] <- sigma
for(i in 1:n){
if(cc[i]==1){
logpost[s] <- logpost[s] + log(w) + dexp(fuses[i], lambda, log=TRUE)
}else{
logpost[s] <- logpost[s] + log(1-w) + dnorm(logfuses[i], mu, sigma, log=TRUE)
}
}
logpost[s] <- logpost[s] + dbeta(w, aa[1], aa[2],log = T)
logpost[s] <- logpost[s] + dgamma(lambda, shape = alpha, rate = beta, log = T)
logpost[s] <- logpost[s] + dnorm(mu, eta, tau, log = T)
logpost[s] <- logpost[s] + log(invgamma::dinvgamma(sigma^2, dd, 1/qq))
if(s/500==floor(s/500)){
print(paste("s =",s, w, lambda, mu, sigma, logpost[s]))
}
}
## [1] "s = 500 0.108504588091813 3.11441825654982 0.80958249901008 0.409762020407082 -288.561556333677"
## [1] "s = 1000 0.0903282965883995 4.94762689234766 0.805483328447189 0.386775628947854 -286.8014939864"
## [1] "s = 1500 0.0851174197858814 2.58255907273887 0.771697824554507 0.389135583921897 -279.257683958764"
## [1] "s = 2000 0.0747938493546211 2.79403165380384 0.794430404232981 0.383871017224883 -282.953044447016"
## [1] "s = 2500 0.100043151896491 2.98300745323523 0.759813356022746 0.378870929098709 -279.814686097284"
## [1] "s = 3000 0.0632944947504811 2.84399876784586 0.799955051786099 0.376744616859304 -284.180314525997"
## [1] "s = 3500 0.0802270657502171 2.31663550039272 0.767643726645595 0.38101843741715 -280.951343296024"
## [1] "s = 4000 0.0940957686167199 3.04837940757356 0.81242262036078 0.420720398881994 -278.834360642677"
## [1] "s = 4500 0.0918923490785446 2.32187956301159 0.790778322800358 0.376216655438987 -283.966760726816"
## [1] "s = 5000 0.114935057107565 2.61978469443038 0.740607689023302 0.388700278173162 -292.372222795386"
## [1] "s = 5500 0.0932959585281394 2.93001497000789 0.778255624422715 0.398159765409939 -275.354227050558"
## [1] "s = 6000 0.117482226552188 3.73633318374268 0.775951167010476 0.381732127115713 -280.57075901505"
## Plot the logposterior distribution for various samples
par(mfrow=c(1,1))
par(mar=c(4,4,1,1)+0.1)
plot(logpost, type="l", xlab="Iterations", ylab="Log posterior")
w_avg <- sum(w.out[(burn+1):rrr])/(rrr-burn)
lambda_avg <- sum(lambda.out[(burn+1):rrr])/(rrr-burn)
mu_avg <- sum(mu.out[(burn+1):rrr])/(rrr-burn)
sigma_avg <- sum(sigma.out[(burn+1):rrr])/(rrr-burn)
#print(paste(w_avg, lambda_avg, mu_avg, sigma_avg))
dfm <- tibble(
Category = c('weight', 'lambda', 'mean', 'sigma'),
Value = c(w_avg, lambda_avg, mu_avg, sigma_avg))
dfm |>
kbl(caption = 'Summary', escape = FALSE) %>%
row_spec(0, background = 'DimGrey', color = 'gold', bold = TRUE) |>
column_spec(1, background = 'CornflowerBlue') %>%
column_spec(2, background = 'DarkGrey') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
kable_material(full_width = FALSE)
Category | Value |
---|---|
weight | 0.0902109 |
lambda | 3.0175754 |
mean | 0.7829672 |
sigma | 0.3839313 |
w_sum = 0
lambda_sum = 0
mu_sum = 0
sigma_sum = 0
counter = 0
for(s in 1:(rrr-burn)){
w_sum = w_sum + w.out[s+burn]
lambda_sum = lambda_sum + lambda.out[s+burn]
mu_sum = mu_sum +mu.out[s+burn]
sigma_sum = sigma_sum + sigma.out[s+burn]
counter = counter + 1
}
#print(paste(counter, w_sum/counter, lambda_sum/counter, mu_sum/counter, sigma_sum/counter))
dfm <- tibble(
Category = c('Counter', 'mean_weight', 'mean_lambda', 'mean', 'mean_sigma'),
Value = c(counter, w_sum/counter, lambda_sum/counter, mu_sum/counter, sigma_sum/counter))
dfm %>%
kbl(caption = 'Summary', escape = FALSE) %>%
row_spec(0, background = 'DimGrey', color = 'gold', bold = TRUE) %>%
column_spec(1, background = 'CornflowerBlue') %>%
column_spec(2, background = 'DarkGrey') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
kable_material(full_width = FALSE)
Category | Value |
---|---|
Counter | 5000.0000000 |
mean_weight | 0.0902109 |
mean_lambda | 3.0175754 |
mean | 0.7829672 |
mean_sigma | 0.3839313 |
Are the initial values appropriate?
The starting values of four parameters, \(\omega\), \(\lambda\), \(\mu\) and \(\tau\), need to be specified, and the context of the problem provides some useful clues.
Because the lognormal component corresponds to the “normal” components, and we expect the majority of the observations to be in this class, it makes sense to bias the weights so that \(\omega \le 1/2\). For example, we could use \(\omega = 0.1\), or simply sample a random starting from something like a Beta(1,9) distribution (which has expectation 0.1).
For the same reason, a reasonable starting values for \(\mu\) and \(\tau\) correspond to their maximum likelihood estimators under the simpler log-Gaussian model. Since a random variable follows a log-Gaussian distribution if and only if its logarithm follows a Gaussian distribution, we can use \(\mu = mean(log(x))\) and \(\tau = sd(log(x))\) as our starting values. Alternatively, the values could be sampled from distributions centered around these values in order to make the starting values random.
Because the defective components should have shorter lifespan than normal components, it makes sense to take \(\frac{1}{\lambda}\) (which is the mean of the first component) to be a small fraction of the overall mean of the data (we use \(5%\) of the overall mean, but other similar values would all be reasonable).
Finally, in this case the indicators \(c_1, \ldots, c_n\) could be randomly initialized to either component of the mixture.
In summary, you should expect an initialization such as this one:
w = 0.1
mu = mean(log(x))
tau = sd(log(x))
lambda = 20/mean(x)
cc = sample(1:2, n, TRUE, c(1/2, 1/2))
Is the full conditional for the indicators \(c_1, \ldots, c_n\) correct?
The indicators \(c_1, \ldots, c_n\) are conditionally independent from each other, and we have
\(\Pr(c_i = 1 \mid \cdots) \propto w \lambda \exp\left\{ -\lambda x_i \right\}\) while \(\Pr(c_i = 2 \mid \cdots) \propto (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x_i) - \mu \right)^2\right\}\)
Hence, the code to sample the indicators might look something like this:
# Full conditional for cc
v = rep(0,2)
for(i in 1:n){
v[1] = log(w) + dexp(x[i], lambda, log=TRUE)
v[2] = log(1-w) + dlnorm(x[i], mu, tau, log=TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:2, 1, replace=TRUE, prob=v)
}
Please be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R. For example, the calculation could be vectorized to increase efficiency.
Is the full conditional for the weight \(\omega\) correct?
This is a simple one, as its structure is common to all mixture models. Recalling that the prior on \(\omega\) is a uniform distribution on [0,1][0,1], we have
\[ \omega \mid \cdots \sim \text{Beta}\left(m(\mathbf{c})+1, n-m(\mathbf{c})+1\right) \]
where \(m(\mathbf{c})\) is the number of observations that c assigns to component 1. The associated code might look something like this
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+n-sum(cc==1))
or, alternatively,
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+sum(cc==2))
Please remember to be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R.
Is the full conditional for the weight \(\lambda\) correct?
Since the prior is conditionally conjugate, it is easy to see that
\[ \lambda \mid \cdots \sim \text{Gam} \left( 1 + m(\mathbf{c}) , 1 + \sum_{i:c_i = 1} x_i\right) \]
Hence, the code might look something like
# Full conditional for lambda
lambda = rgamma(1, 1 + sum(cc==1), 1 + sum(x[cc==1]))
Is the full conditional for the weight \(\mu\) correct?
The full-conditional for \(\mu\) is a Gaussian distribution,
\[ \mu \mid \cdots \sim \text{N} \left( \frac{\frac{\sum_{i:c_i=1}x_i}{\tau^2} + 0}{\frac{m(\mathbf{c})}{\tau^2} + 1} , \frac{1}{\frac{m(\mathbf{c})}{\tau^2} + 1} \right) \]
The corresponding R code might look like:
# Full conditional for mu
mean.post = (sum(log(x[cc==2]))/tau^2 + 0)/(sum(cc==2)/tau^2 + 1)
std.post = sqrt(1/(sum(cc==2)/tau^2 + 1))
mu = rnorm(1, mean.post, std.post)
Is the full conditional for the weight \(\tau\) correct?
The full conditional for \(\tau^2\) is an inverse Gamma distribution,
\[ \tau^2 \mid \cdots \sim \\\text{IGam}\left( 2 + n - m\left(\mathbf{c}\right), 1+ \frac{1}{2}\sum_{i:c_i = 2} \left\{ \log x_i - \mu \right\}^2 \right) \]
The corresponding R code might look like:
# Full conditional for tau
tau = sqrt(1/rgamma(1, 2 + sum(cc==2), 1 + 0.5*sum((log(x[cc==2]) - mu)^2)))
Provide an MCMC algorithm to fit the mixture model
\[ \begin{align} f(x) = w \lambda \exp\left\{ -\lambda x \right\} + (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x) - \mu \right)^2\right\} \\ \quad\quad\quad x>0. \end{align} \]
rm(list=ls())
lib('MCMCpack')
## MCMCpack
## TRUE
Fuses <- read.csv(file = "data/fuses.csv")
## Initialize the parameters
# Number of iterations of the sampler
rrr = 6000
burn = 1000
set.seed(81196)
# Actual data
x = Fuses$X1.062163
KK = 2
# nnumber of observations in data
n = nrow(Fuses)
# Storing the samples
cc.out = array(0, dim=c(rrr, n))
w.out = rep(0, rrr)
lambda.out = array(0, dim=c(rrr, 1))
mu.out = array(0, dim = c(rrr,1))
tau.out = array(0, dim = c(rrr, 1))
logpost = rep(0, rrr)
# MCMC iterations
for(s in 1:rrr){
# Sample the indicators
cc = rep(0,n)
w.hat = runif(1)
lambda.hat = 8/mean(x)
mu.hat = mean(log(x))
tausquared.hat = var(log(x))
for(i in 1:n){
v = rep(0,KK)
v[1] = log(w.hat) + dexp(x[i],lambda.hat,log = TRUE)
v[2] = log(1 - w.hat) + dlnorm(x[i], meanlog = mu.hat, sdlog = sqrt(tausquared.hat), log = TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:KK, 1, replace=TRUE, prob=v)
}
# Sample the weights
w.hat = rbeta(1, 1 + sum(cc==1), 1 + sum(cc==2))
lambda.hat = rgamma(1, sum(cc==1) + 1, sum(x[cc==1]) + 1)
tau_star = 1/(1 + sum(cc==2)/tausquared.hat)
mu_star = tau_star * (sum(x[cc=2])/tausquared.hat)
mu.hat = rnorm(1,mu_star, sqrt(tau_star))
tausquared.hat = rinvgamma(1, 2 + 0.5*sum(cc==2),1 + 0.5*sum((log(x[cc==2])-mu.hat)^2))
# Store samples
cc.out[s,] = cc
w.out[s] = w.hat
lambda.out[s,] = lambda.hat
mu.out[s,] = mu.hat
tau.out[s,] = sqrt(tausquared.hat)
if(s/500==floor(s/500)){
print(paste("s =",s))
}
}
## [1] "s = 500"
## [1] "s = 1000"
## [1] "s = 1500"
## [1] "s = 2000"
## [1] "s = 2500"
## [1] "s = 3000"
## [1] "s = 3500"
## [1] "s = 4000"
## [1] "s = 4500"
## [1] "s = 5000"
## [1] "s = 5500"
## [1] "s = 6000"
Provide you maximum likelihood estimates \(E\left\{\omega \right\}\), \(E\left\{\lambda \right\}\), \(E\left\{\mu \right\}\) and \(E\left\{\tau \right\}\) rounded to two decimal places.
#mean(w.out[burn:rrr]) = 0.12
#mean(lambda.out[burn:rrr]) = 2.28
#mean(mu.out[burn:rrr]) = 0.01
#mean(tau.out[burn:rrr]) = 0.86
cat(
paste(
'w.out =', mean(w.out[burn:rrr]),
'\nlambda.out =', mean(lambda.out[burn:rrr]),
'\nmu.out =', mean(mu.out[burn:rrr]),
'\ntau.out =', mean(tau.out[burn:rrr])
)
)
## w.out = 0.120796241282035
## lambda.out = 2.27705016300445
## mu.out = 0.00668690981016547
## tau.out = 0.885765943211679
Are the initial values appropriate?
The starting values of four parameters, \(\omega\), \(\lambda\), \(\mu\) and \(\tau\), need to be specified, and the context of the problem provides some useful clues.
Because the lognormal component corresponds to the “normal” components, and we expect the majority of the observations to be in this class, it makes sense to bias the weights so that \(\omega \le 1/2\). For example, we could use \(\omega = 0.1\), or simply sample a random starting from something like a Beta(1,9) distribution (which has expectation 0.1).
For the same reason, a reasonable starting values for \(\mu\) and \(\tau\) correspond to their maximum likelihood estimators under the simpler log-Gaussian model. Since a random variable follows a log-Gaussian distribution if and only if its logarithm follows a Gaussian distribution, we can use \(\mu = mean(log(x))\) and \(\tau = sd(log(x))\) as our starting values. Alternatively, the values could be sampled from distributions centered around these values in order to make the starting values random.
Because the defective components should have shorter lifespan than normal components, it makes sense to take \(\frac{1}{\lambda}\) (which is the mean of the first component) to be a small fraction of the overall mean of the data (we use \(5%\) of the overall mean, but other similar values would all be reasonable).
Finally, in this case the indicators \(c_1, \ldots, c_n\) could be randomly initialized to either component of the mixture.
In summary, you should expect an initialization such as this one:
w = 0.1
mu = mean(log(x))
tau = sd(log(x))
lambda = 20/mean(x)
cc = sample(1:2, n, TRUE, c(1/2, 1/2))
Is the full conditional for the indicators \(c_1, \ldots, c_n\) correct?
The indicators \(c_1, \ldots, c_n\) are conditionally independent from each other, and we have
\(\Pr(c_i = 1 \mid \cdots) \propto w \lambda \exp\left\{ -\lambda x_i \right\}\) while \(\Pr(c_i = 2 \mid \cdots) \propto (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x_i) - \mu \right)^2\right\}\)
Hence, the code to sample the indicators might look something like this:
# Full conditional for cc
v = rep(0,2)
for(i in 1:n){
v[1] = log(w) + dexp(x[i], lambda, log=TRUE)
v[2] = log(1-w) + dlnorm(x[i], mu, tau, log=TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:2, 1, replace=TRUE, prob=v)
}
Please be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R. For example, the calculation could be vectorized to increase efficiency.
Is the full conditional for the weight \(\omega\) correct?
This is a simple one, as its structure is common to all mixture models. Recalling that the prior on \(\omega\) is a uniform distribution on [0,1][0,1], we have
\[ \omega \mid \cdots \sim \text{Beta}\left(m(\mathbf{c})+1, n-m(\mathbf{c})+1\right) \]
where \(m(\mathbf{c})\) is the number of observations that c assigns to component 1. The associated code might look something like this
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+n-sum(cc==1))
or, alternatively,
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+sum(cc==2))
Please remember to be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R.
Is the full conditional for the weight \(\lambda\) correct?
Since the prior is conditionally conjugate, it is easy to see that
\[ \lambda \mid \cdots \sim \text{Gam} \left( 1 + m(\mathbf{c}) , 1 + \sum_{i:c_i = 1} x_i\right) \]
Hence, the code might look something like
# Full conditional for lambda
lambda = rgamma(1, 1 + sum(cc==1), 1 + sum(x[cc==1]))
Is the full conditional for the weight \(\mu\) correct?
The full-conditional for \(\mu\) is a Gaussian distribution,
\[ \mu \mid \cdots \sim \text{N} \left( \frac{\frac{\sum_{i:c_i=1}x_i}{\tau^2} + 0}{\frac{m(\mathbf{c})}{\tau^2} + 1} , \frac{1}{\frac{m(\mathbf{c})}{\tau^2} + 1} \right) \]
The corresponding R code might look like:
# Full conditional for mu
mean.post = (sum(log(x[cc==2]))/tau^2 + 0)/(sum(cc==2)/tau^2 + 1)
std.post = sqrt(1/(sum(cc==2)/tau^2 + 1))
mu = rnorm(1, mean.post, std.post)
Is the full conditional for the weight \(\tau\) correct?
The full conditional for \(\tau^2\) is an inverse Gamma distribution,
\[ \tau^2 \mid \cdots \sim \\\text{IGam}\left( 2 + n - m\left(\mathbf{c}\right), 1+ \frac{1}{2}\sum_{i:c_i = 2} \left\{ \log x_i - \mu \right\}^2 \right) \]
The corresponding R code might look like:
# Full conditional for tau
tau = sqrt(1/rgamma(1, 2 + sum(cc==2), 1 + 0.5*sum((log(x[cc==2]) - mu)^2)))
Are the posterior means of the parameters generated by the algorithm correct?
The posterior means, rounded to two decimal places, are \(\text{E} \left\{ w \right\} \approx 0.10\), \(\text{E} \left\{ \lambda \right\} \approx 2.29\), \(\text{E} \left\{ \mu \right\} \approx 0.79\) and \(\text{E} \left\{ \tau \right\} \approx 0.38\)
Provide an MCMC algorithm to fit the mixture model
\[ \begin{align} f(x) = w \lambda \exp\left\{ -\lambda x \right\} + (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x) - \mu \right)^2\right\} \\ \quad\quad\quad x>0. \end{align} \]
x = as.matrix(read.csv("data/fuses.csv",header=FALSE))
## Initialize the parameters
n = length(x)
cc = rep(0, n)
cc = sample(1:2, n, replace=T, prob=c(1/2, 1/2))
mu = mean(log(x))
sigma = sd(log(x))
lambda = 1/mean(x)
w = 0.5
#prior parameters
eta = 0 # Mean 0 for the prior on mu
tau = 1 # Standard deviation 1 on the prior for mu
dd = 2 #ig pror parameter for sigma
qq = 1
## The actual MCMC algorithm starts here
# Number of iterations of the sampler
rrr = 10000
burn = 1000
# Storing the samples
cc.out = array(0, dim=c(rrr, n))
w.out = rep(0, rrr)
lambda.out = array(0, dim=c(rrr))
mu.out = array(0,dim = rrr)
sigma.out = array(0,dim = rrr)
logpost = rep(0, rrr)
# MCMC iterations
for(s in 1:rrr){
# Sample the indicators
for(i in 1:n){
v = rep(0,2)
v[1] = log(w) +dexp(x[i], lambda,log = TRUE)
v[2] = log(1-w) + dlnorm(x[i], mu,sigma, log = TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:2, 1, replace = TRUE, prob = v)
}
# Sample the weights
w = rbeta(1, 1+ sum(cc==1), 1 + sum(cc==2))
# Sample lambda
lambda = rgamma(1,1+sum(cc==1), 1+sum(x[cc==1]))
#sample log mean
n2 = sum(cc==2)
xsum2 = sum(log(x[cc==2]))
tau2.hat = 1/(n2/sigma^2 + 1/tau^2)
mu.hat = tau2.hat*(xsum2/sigma^2 + eta/tau^2)
mu = rnorm(1, mu.hat, sqrt(tau2.hat))
#sample sd d
dd.star = dd + n2/2
qq.star = qq + sum((log(x[cc==2]) - mu)^2)/2
sigma = sqrt(rinvgamma(1, dd.star, qq.star))
# Store samples
cc.out[s,] = cc
w.out[s] = w
lambda.out[s] = lambda
mu.out[s] = mu
sigma.out[s] = sigma
for(i in 1:n){
if(cc[i]==1){
logpost[s] = logpost[s] + log(w) + dexp(x[i], lambda, log = T)
} else {
logpost[s] = logpost[s] + log(1-w) + dlnorm(x[i], mu, sigma, log = TRUE)
}
}
logpost[s] = logpost[s] + dbeta(w, 1, 1, log = T) + dgamma(lambda, 1, 1, log = T) + dnorm(mu, 0, 1, log = T) + dinvgamma(sigma^2, dd, qq)
if(s/500==floor(s/500)){
print(paste("s =", s))
}
}
## [1] "s = 500"
## [1] "s = 1000"
## [1] "s = 1500"
## [1] "s = 2000"
## [1] "s = 2500"
## [1] "s = 3000"
## [1] "s = 3500"
## [1] "s = 4000"
## [1] "s = 4500"
## [1] "s = 5000"
## [1] "s = 5500"
## [1] "s = 6000"
## [1] "s = 6500"
## [1] "s = 7000"
## [1] "s = 7500"
## [1] "s = 8000"
## [1] "s = 8500"
## [1] "s = 9000"
## [1] "s = 9500"
## [1] "s = 10000"
Provide you maximum likelihood estimates \(E\left\{\omega \right\}\), \(E\left\{\lambda \right\}\), \(E\left\{\mu \right\}\) and \(E\left\{\tau \right\}\) rounded to two decimal places.
#round(mean(w.out),2): 0.1
#round(mean(lambda.out),2): 2.31
#round(mean(mu.out),2) : 0.79
#round(mean(sigma.out),2) : 0.38
cat(
paste(
'w.out =', mean(w.out),
'\nlambda.out =', mean(lambda.out),
'\nmu.out =', mean(mu.out),
'\ntau.out =', mean(tau.out)
)
)
## w.out = 0.100046285424584
## lambda.out = 2.42265546131689
## mu.out = 0.785707802124559
## tau.out = 0.885920609270053
Are the initial values appropriate?
The starting values of four parameters, \(\omega\), \(\lambda\), \(\mu\) and \(\tau\), need to be specified, and the context of the problem provides some useful clues.
Because the lognormal component corresponds to the “normal” components, and we expect the majority of the observations to be in this class, it makes sense to bias the weights so that \(\omega \le 1/2\). For example, we could use \(\omega = 0.1\), or simply sample a random starting from something like a Beta(1,9) distribution (which has expectation 0.1).
For the same reason, a reasonable starting values for \(\mu\) and \(\tau\) correspond to their maximum likelihood estimators under the simpler log-Gaussian model. Since a random variable follows a log-Gaussian distribution if and only if its logarithm follows a Gaussian distribution, we can use \(\mu = mean(log(x))\) and \(\tau = sd(log(x))\) as our starting values. Alternatively, the values could be sampled from distributions centered around these values in order to make the starting values random.
Because the defective components should have shorter lifespan than normal components, it makes sense to take \(\frac{1}{\lambda}\) (which is the mean of the first component) to be a small fraction of the overall mean of the data (we use \(5%\) of the overall mean, but other similar values would all be reasonable).
Finally, in this case the indicators \(c_1, \ldots, c_n\) could be randomly initialized to either component of the mixture.
In summary, you should expect an initialization such as this one:
w = 0.1
mu = mean(log(x))
tau = sd(log(x))
lambda = 20/mean(x)
cc = sample(1:2, n, TRUE, c(1/2, 1/2))
Is the full conditional for the indicators \(c_1, \ldots, c_n\) correct?
The indicators \(c_1, \ldots, c_n\) are conditionally independent from each other, and we have
\(\Pr(c_i = 1 \mid \cdots) \propto w \lambda \exp\left\{ -\lambda x_i \right\}\) while \(\Pr(c_i = 2 \mid \cdots) \propto (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x_i) - \mu \right)^2\right\}\)
Hence, the code to sample the indicators might look something like this:
# Full conditional for cc
v = rep(0,2)
for(i in 1:n){
v[1] = log(w) + dexp(x[i], lambda, log=TRUE)
v[2] = log(1-w) + dlnorm(x[i], mu, tau, log=TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:2, 1, replace=TRUE, prob=v)
}
Please be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R. For example, the calculation could be vectorized to increase efficiency.
Is the full conditional for the weight \(\omega\) correct?
This is a simple one, as its structure is common to all mixture models. Recalling that the prior on \(\omega\) is a uniform distribution on [0,1][0,1], we have
\[ \omega \mid \cdots \sim \text{Beta}\left(m(\mathbf{c})+1, n-m(\mathbf{c})+1\right) \]
where \(m(\mathbf{c})\) is the number of observations that c assigns to component 1. The associated code might look something like this
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+n-sum(cc==1))
or, alternatively,
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+sum(cc==2))
Please remember to be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R.
Is the full conditional for the weight \(\lambda\) correct?
Since the prior is conditionally conjugate, it is easy to see that
\[ \lambda \mid \cdots \sim \text{Gam} \left( 1 + m(\mathbf{c}) , 1 + \sum_{i:c_i = 1} x_i\right) \]
Hence, the code might look something like
# Full conditional for lambda
lambda = rgamma(1, 1 + sum(cc==1), 1 + sum(x[cc==1]))
Is the full conditional for the weight \(\mu\) correct?
The full-conditional for \(\mu\) is a Gaussian distribution,
\[ \mu \mid \cdots \sim \text{N} \left( \frac{\frac{\sum_{i:c_i=1}x_i}{\tau^2} + 0}{\frac{m(\mathbf{c})}{\tau^2} + 1} , \frac{1}{\frac{m(\mathbf{c})}{\tau^2} + 1} \right) \]
The corresponding R code might look like:
# Full conditional for mu
mean.post = (sum(log(x[cc==2]))/tau^2 + 0)/(sum(cc==2)/tau^2 + 1)
std.post = sqrt(1/(sum(cc==2)/tau^2 + 1))
mu = rnorm(1, mean.post, std.post)
Is the full conditional for the weight \(\tau\) correct?
The full conditional for \(\tau^2\) is an inverse Gamma distribution,
\[ \tau^2 \mid \cdots \sim \\\text{IGam}\left( 2 + n - m\left(\mathbf{c}\right), 1+ \frac{1}{2}\sum_{i:c_i = 2} \left\{ \log x_i - \mu \right\}^2 \right) \]
The corresponding R code might look like:
# Full conditional for tau
tau = sqrt(1/rgamma(1, 2 + sum(cc==2), 1 + 0.5*sum((log(x[cc==2]) - mu)^2)))
Are the posterior means of the parameters generated by the algorithm correct?
The posterior means, rounded to two decimal places, are \(\text{E} \left\{ w \right\} \approx 0.10\), \(\text{E} \left\{ \lambda \right\} \approx 2.29\), \(\text{E} \left\{ \mu \right\} \approx 0.79\) and \(\text{E} \left\{ \tau \right\} \approx 0.38\)
Provide an MCMC algorithm to fit the mixture model
\[ \begin{align} f(x) = w \lambda \exp\left\{ -\lambda x \right\} + (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x) - \mu \right)^2\right\} \\ \quad\quad\quad x>0. \end{align} \]
# MCMC iterations
set.seed(1024)
data = read.csv("data/fuses.csv", header = FALSE)
x = data$V1
rm(data)
## Initialize the parameters
w = 0.1
mu = mean(log(x))
sigma = sd(log(x))
lambda = 20/mean(x) #Initial standard deviation
n = length(x)
# Plot the initial guess for the density
KK = 2
aa = rep(1, KK)
eta = 0
tau = 1
dd = 2
qq = 1
phi = 1
rrr = 2000
cc.out = array(0, dim = c(rrr, n))
w.out = rep(0, rrr)
mu.out = rep(0, rrr)
tau.out = rep(0, rrr)
lambda.out = rep(0, rrr)
logpost = rep(0, rrr)
for(s in 1:rrr){
# Sample the indicators
cc = rep(0, n)
v = rep(0, KK)
for(i in 1:n){
v[1] = log(w) + dexp(x[i], lambda, log = TRUE)
v[2] = log(1-w) + dlnorm(x[i], mu, tau, log = TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:2, 1, replace = TRUE, prob = v)
}
# Sample
w = rbeta(1, 1+sum(cc==1), 1+n-sum(cc==1))
lambda = rgamma(1,1 + sum(cc==1), 1 + sum(x[cc==1]))
mean.post = (sum(log(x[cc==2]))/tau^2 + 0)/(sum(cc==2)/tau^2 + 1)
std.post = sqrt(1/(sum(cc==2)/tau^2 + 1))
mu = rnorm(1, mean.post, std.post)
tau = sqrt(1/rgamma(1, 2 + sum(cc==2), 1 + sum((log(x[cc==2]) - mu)^2)))
# Store samples
cc.out[s,] = cc
w.out[s] = w
lambda.out[s] = lambda
mu.out[s] = mu
tau.out[s] = sigma
}
Provide you maximum likelihood estimates \(E\left\{\omega \right\}\), \(E\left\{\lambda \right\}\), \(E\left\{\mu \right\}\) and \(E\left\{\tau \right\}\) rounded to two decimal places.
#paste('mean cc.out =', mean(cc.out))
#paste('mean w.out =', mean(w.out))
#paste('mean lambda.out =', mean(lambda.out))
#paste('mean mu.out =', mean(mu.out))
#paste('mean tau.out =', mean(tau.out))
cat(
paste(
'cc.out =', mean(cc.out),
'\nw.out =', mean(w.out),
'\nlambda.out =', mean(lambda.out),
'\nmu.out =', mean(mu.out),
'\ntau.out =', mean(tau.out)
)
)
## cc.out = 1.88996
## w.out = 0.112733901985298
## lambda.out = 2.13635414207982
## mu.out = 0.787782429388135
## tau.out = 0.855128580712039
#w:0.10; lambda:2.29; mu: 0.79; tau: 0.38
cat(
paste(
'w =', w,
'\nlambda =', lambda,
'\nmu =', mu,
'\ntau =', tau
)
)
## w = 0.104482563325936
## lambda = 2.00459843854346
## mu = 0.797944727472358
## tau = 0.396397676347267
Are the initial values appropriate?
The starting values of four parameters, \(\omega\), \(\lambda\), \(\mu\) and \(\tau\), need to be specified, and the context of the problem provides some useful clues.
Because the lognormal component corresponds to the “normal” components, and we expect the majority of the observations to be in this class, it makes sense to bias the weights so that \(\omega \le 1/2\). For example, we could use \(\omega = 0.1\), or simply sample a random starting from something like a Beta(1,9) distribution (which has expectation 0.1).
For the same reason, a reasonable starting values for \(\mu\) and \(\tau\) correspond to their maximum likelihood estimators under the simpler log-Gaussian model. Since a random variable follows a log-Gaussian distribution if and only if its logarithm follows a Gaussian distribution, we can use \(\mu = mean(log(x))\) and \(\tau = sd(log(x))\) as our starting values. Alternatively, the values could be sampled from distributions centered around these values in order to make the starting values random.
Because the defective components should have shorter lifespan than normal components, it makes sense to take \(\frac{1}{\lambda}\) (which is the mean of the first component) to be a small fraction of the overall mean of the data (we use \(5%\) of the overall mean, but other similar values would all be reasonable).
Finally, in this case the indicators \(c_1, \ldots, c_n\) could be randomly initialized to either component of the mixture.
In summary, you should expect an initialization such as this one:
w = 0.1
mu = mean(log(x))
tau = sd(log(x))
lambda = 20/mean(x)
cc = sample(1:2, n, TRUE, c(1/2, 1/2))
Is the full conditional for the indicators \(c_1, \ldots, c_n\) correct?
The indicators \(c_1, \ldots, c_n\) are conditionally independent from each other, and we have
\(\Pr(c_i = 1 \mid \cdots) \propto w \lambda \exp\left\{ -\lambda x_i \right\}\) while \(\Pr(c_i = 2 \mid \cdots) \propto (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x_i) - \mu \right)^2\right\}\)
Hence, the code to sample the indicators might look something like this:
# Full conditional for cc
v = rep(0,2)
for(i in 1:n){
v[1] = log(w) + dexp(x[i], lambda, log=TRUE)
v[2] = log(1-w) + dlnorm(x[i], mu, tau, log=TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:2, 1, replace=TRUE, prob=v)
}
Please be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R. For example, the calculation could be vectorized to increase efficiency.
Is the full conditional for the weight \(\omega\) correct?
This is a simple one, as its structure is common to all mixture models. Recalling that the prior on \(\omega\) is a uniform distribution on [0,1][0,1], we have
\[ \omega \mid \cdots \sim \text{Beta}\left(m(\mathbf{c})+1, n-m(\mathbf{c})+1\right) \]
where \(m(\mathbf{c})\) is the number of observations that c assigns to component 1. The associated code might look something like this
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+n-sum(cc==1))
or, alternatively,
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+sum(cc==2))
Please remember to be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R.
Is the full conditional for the weight \(\lambda\) correct?
Since the prior is conditionally conjugate, it is easy to see that
\[ \lambda \mid \cdots \sim \text{Gam} \left( 1 + m(\mathbf{c}) , 1 + \sum_{i:c_i = 1} x_i\right) \]
Hence, the code might look something like
# Full conditional for lambda
lambda = rgamma(1, 1 + sum(cc==1), 1 + sum(x[cc==1]))
Is the full conditional for the weight \(\mu\) correct?
The full-conditional for \(\mu\) is a Gaussian distribution,
\[ \mu \mid \cdots \sim \text{N} \left( \frac{\frac{\sum_{i:c_i=1}x_i}{\tau^2} + 0}{\frac{m(\mathbf{c})}{\tau^2} + 1} , \frac{1}{\frac{m(\mathbf{c})}{\tau^2} + 1} \right) \]
The corresponding R code might look like:
# Full conditional for mu
mean.post = (sum(log(x[cc==2]))/tau^2 + 0)/(sum(cc==2)/tau^2 + 1)
std.post = sqrt(1/(sum(cc==2)/tau^2 + 1))
mu = rnorm(1, mean.post, std.post)
Is the full conditional for the weight \(\tau\) correct?
The full conditional for \(\tau^2\) is an inverse Gamma distribution,
\[ \tau^2 \mid \cdots \sim \\\text{IGam}\left( 2 + n - m\left(\mathbf{c}\right), 1+ \frac{1}{2}\sum_{i:c_i = 2} \left\{ \log x_i - \mu \right\}^2 \right) \]
The corresponding R code might look like:
# Full conditional for tau
tau = sqrt(1/rgamma(1, 2 + sum(cc==2), 1 + 0.5*sum((log(x[cc==2]) - mu)^2)))
Are the posterior means of the parameters generated by the algorithm correct?
The posterior means, rounded to two decimal places, are \(\text{E} \left\{ w \right\} \approx 0.10\), \(\text{E} \left\{ \lambda \right\} \approx 2.29\), \(\text{E} \left\{ \mu \right\} \approx 0.79\) and \(\text{E} \left\{ \tau \right\} \approx 0.38\)
Provide an MCMC algorithm to fit the mixture model
\[ \begin{align} f(x) = w \lambda \exp\left\{ -\lambda x \right\} + (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x) - \mu \right)^2\right\} \\ \quad\quad\quad x>0. \end{align} \]
set.seed(81196) # So that results are reproducible
fuses <- read.csv(file = 'data/fuses.csv', header = FALSE)
head(fuses)
## V1
## 1 1.062163
## 2 1.284631
## 3 2.733352
## 4 2.284062
## 5 3.289444
## 6 3.113181
x <- fuses$V1
n <- NROW(x)
hist(x, breaks = 50)
aa <- c(1,1) #Beta
eta <- 1
tau <- 1
l <- 1 #lambda prior
dd <- 2 #inverse Gamma
qq <- 1 #inverse Gamma
w <- .1 #Assign equal weight to each component to start with
mu <- mean(log(x))
sigma <- sd(log(x))
lambda <- 1/mu #Initial lambda
# Number of iterations of the sampler
rrr <- 10000
burn <- 1000
# Storing the samples
cc.out <- array(0, dim=c(rrr, n))
w.out <- rep(0, rrr)
lambda.out <- rep(0, rrr)
mu.out <- rep(0, rrr)
sigma.out <- rep(0, rrr)
logpost <- rep(0, rrr)
#s=1
# MCMC iterations
for(s in 1:rrr){
# Sample the indicators
cc <- rep(0,n)
for(i in 1:n){
v <- rep(0,2)
v[1] <- log(w) + dexp(x[i], lambda, log=TRUE) #Compute the log of the weights
v[2] <- log(1-w) + dlnorm(x[i], mu, sigma, log=TRUE) #Compute the log of the weights
v <- exp(v - max(v)) / sum(exp(v - max(v)))
cc[i] <- sample(1:2, 1, replace=TRUE, prob=v)
}
# Sample the weights
w <- rbeta(1, aa[1] + sum(cc==1), aa[2] + sum(cc==2))
# Sample the means
lambda <- 1/ mean(x[cc==1])
xsumk <- sum(log(x[cc==2]))
nk <- NROW(x[cc==2])
tau2.hat <- 1/(nk/sigma^2 + 1/tau^2)
mu.hat <- tau2.hat*(xsumk/sigma^2 + eta/tau^2)
mu <- log(rlnorm(1, mu.hat, tau2.hat))
# Sample the variances
dd.star <- dd + n/2
qq.star <- qq + sum((log(x) - mu)^2)/2
sigma <- sqrt(rinvgamma(1, dd.star, qq.star))
# Store samples
cc.out[s,] <- cc
w.out[s] <- w
lambda.out[s] <- lambda
mu.out[s] <- mu
sigma.out[s] <- sigma
for(i in 1:n){
if(cc[i]==1){
logpost[s] <- logpost[s] + log(w) + dexp(x[i], lambda, log=TRUE)
}else{
logpost[s] <- logpost[s] + log(1-w) + dlnorm(x[i], mu, sigma, log=TRUE)
}
}
logpost[s] <- logpost[s] + dbeta(w, aa[1], aa[2],log = T)
logpost[s] <- logpost[s] + dexp(lambda.out[s], lambda, log = T)
logpost[s] <- logpost[s] + dnorm(mu.out[s], mu, sigma, log = T)
logpost[s] <- logpost[s] + log(dinvgamma(sigma.out[s], dd, 1/qq))
if(s/500==floor(s/500)){
print(paste("s =",s))
}
}
## [1] "s = 500"
## [1] "s = 1000"
## [1] "s = 1500"
## [1] "s = 2000"
## [1] "s = 2500"
## [1] "s = 3000"
## [1] "s = 3500"
## [1] "s = 4000"
## [1] "s = 4500"
## [1] "s = 5000"
## [1] "s = 5500"
## [1] "s = 6000"
## [1] "s = 6500"
## [1] "s = 7000"
## [1] "s = 7500"
## [1] "s = 8000"
## [1] "s = 8500"
## [1] "s = 9000"
## [1] "s = 9500"
## [1] "s = 10000"
Provide you maximum likelihood estimates \(E\left\{\omega \right\}\), \(E\left\{\lambda \right\}\), \(E\left\{\mu \right\}\) and \(E\left\{\tau \right\}\) rounded to two decimal places.
#E(w) = 0.06
#E(lambda) = 9.61
#E(mu) = 0.7
#E(tau) = 0.86
cat(
paste(
'w =', mean(w),
'\nlambda =', mean(lambda),
'\nmu =', mean(mu),
'\ntau =', mean(tau)
)
)
## w = 0.0604240927489753
## lambda = 8.14658896375222
## mu = 0.720780146048355
## tau = 1
Are the initial values appropriate?
The starting values of four parameters, \(\omega\), \(\lambda\), \(\mu\) and \(\tau\), need to be specified, and the context of the problem provides some useful clues.
Because the lognormal component corresponds to the “normal” components, and we expect the majority of the observations to be in this class, it makes sense to bias the weights so that \(\omega \le 1/2\). For example, we could use \(\omega = 0.1\), or simply sample a random starting from something like a Beta(1,9) distribution (which has expectation 0.1).
For the same reason, a reasonable starting values for \(\mu\) and \(\tau\) correspond to their maximum likelihood estimators under the simpler log-Gaussian model. Since a random variable follows a log-Gaussian distribution if and only if its logarithm follows a Gaussian distribution, we can use \(\mu = mean(log(x))\) and \(\tau = sd(log(x))\) as our starting values. Alternatively, the values could be sampled from distributions centered around these values in order to make the starting values random.
Because the defective components should have shorter lifespan than normal components, it makes sense to take \(\frac{1}{\lambda}\) (which is the mean of the first component) to be a small fraction of the overall mean of the data (we use \(5%\) of the overall mean, but other similar values would all be reasonable).
Finally, in this case the indicators \(c_1, \ldots, c_n\) could be randomly initialized to either component of the mixture.
In summary, you should expect an initialization such as this one:
w = 0.1
mu = mean(log(x))
tau = sd(log(x))
lambda = 20/mean(x)
cc = sample(1:2, n, TRUE, c(1/2, 1/2))
Is the full conditional for the indicators \(c_1, \ldots, c_n\) correct?
The indicators \(c_1, \ldots, c_n\) are conditionally independent from each other, and we have
\(\Pr(c_i = 1 \mid \cdots) \propto w \lambda \exp\left\{ -\lambda x_i \right\}\) while \(\Pr(c_i = 2 \mid \cdots) \propto (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x_i) - \mu \right)^2\right\}\)
Hence, the code to sample the indicators might look something like this:
# Full conditional for cc
v = rep(0,2)
for(i in 1:n){
v[1] = log(w) + dexp(x[i], lambda, log=TRUE)
v[2] = log(1-w) + dlnorm(x[i], mu, tau, log=TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:2, 1, replace=TRUE, prob=v)
}
Please be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R. For example, the calculation could be vectorized to increase efficiency.
Is the full conditional for the weight \(\omega\) correct?
This is a simple one, as its structure is common to all mixture models. Recalling that the prior on \(\omega\) is a uniform distribution on [0,1][0,1], we have
\[ \omega \mid \cdots \sim \text{Beta}\left(m(\mathbf{c})+1, n-m(\mathbf{c})+1\right) \]
where \(m(\mathbf{c})\) is the number of observations that c assigns to component 1. The associated code might look something like this
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+n-sum(cc==1))
or, alternatively,
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+sum(cc==2))
Please remember to be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R.
Is the full conditional for the weight \(\lambda\) correct?
Since the prior is conditionally conjugate, it is easy to see that
\[ \lambda \mid \cdots \sim \text{Gam} \left( 1 + m(\mathbf{c}) , 1 + \sum_{i:c_i = 1} x_i\right) \]
Hence, the code might look something like
# Full conditional for lambda
lambda = rgamma(1, 1 + sum(cc==1), 1 + sum(x[cc==1]))
Is the full conditional for the weight \(\mu\) correct?
The full-conditional for \(\mu\) is a Gaussian distribution,
\[ \mu \mid \cdots \sim \text{N} \left( \frac{\frac{\sum_{i:c_i=1}x_i}{\tau^2} + 0}{\frac{m(\mathbf{c})}{\tau^2} + 1} , \frac{1}{\frac{m(\mathbf{c})}{\tau^2} + 1} \right) \]
The corresponding R code might look like:
# Full conditional for mu
mean.post = (sum(log(x[cc==2]))/tau^2 + 0)/(sum(cc==2)/tau^2 + 1)
std.post = sqrt(1/(sum(cc==2)/tau^2 + 1))
mu = rnorm(1, mean.post, std.post)
Is the full conditional for the weight \(\tau\) correct?
The full conditional for \(\tau^2\) is an inverse Gamma distribution,
\[ \tau^2 \mid \cdots \sim \\\text{IGam}\left( 2 + n - m\left(\mathbf{c}\right), 1+ \frac{1}{2}\sum_{i:c_i = 2} \left\{ \log x_i - \mu \right\}^2 \right) \]
The corresponding R code might look like:
# Full conditional for tau
tau = sqrt(1/rgamma(1, 2 + sum(cc==2), 1 + 0.5*sum((log(x[cc==2]) - mu)^2)))
Are the posterior means of the parameters generated by the algorithm correct?
The posterior means, rounded to two decimal places, are \(\text{E} \left\{ w \right\} \approx 0.10\), \(\text{E} \left\{ \lambda \right\} \approx 2.29\), \(\text{E} \left\{ \mu \right\} \approx 0.79\) and \(\text{E} \left\{ \tau \right\} \approx 0.38\)
0点 No
2点 Yes
0点 No
1点 Yes
Provide an MCMC algorithm to fit the mixture model
\[ \begin{align} f(x) = w \lambda \exp\left\{ -\lambda x \right\} + (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x) - \mu \right)^2\right\} \\ \quad\quad\quad x>0. \end{align} \]
rm(list=ls())
library(MCMCpack)
Fuses <- read.csv(file = "data/fuses.csv")
## Initialize the parameters
# Number of iterations of the sampler
rrr = 6000
burn = 1000
set.seed(81196)
# Actual data
x = Fuses$X1.062163
KK = 2
# nnumber of observations in data
n = nrow(Fuses)
# Storing the samples
cc.out = array(0, dim=c(rrr, n))
w.out = rep(0, rrr)
lambda.out = array(0, dim=c(rrr, 1))
mu.out = array(0, dim = c(rrr,1))
tau.out = array(0, dim = c(rrr, 1))
logpost = rep(0, rrr)
#Priors
mu = rnorm(1,0,1)
tausquared = rinvgamma(1,2,1)
# Initial values
w.hat = 1/2
mu.hat = mean(log(x))
lambda.hat = 1/mean(x)
tausquared.hat = var(log(x))
# MCMC iterations
for(s in 1:rrr){
# Sample the indicators
for(i in 1:n){
v = rep(0,KK)
v[1] = log(w.hat) + dexp(x[i],lambda.hat,log = TRUE)
v[2] = log(1 - w.hat) + dlnorm(x[i], meanlog = mu.hat, sdlog = sqrt(tausquared.hat), log = TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:KK, 1, replace=TRUE, prob=v)
}
# Sample the weights
w.hat = rbeta(1, 1 + sum(cc==1), 1 + sum(cc==2))
lambda.hat = rgamma(1, sum(cc==1) + 1, sum(x[cc==1]) + 1)
tau_star = 1/(1 + sum(cc==2)/tausquared.hat)
mu_star = tau_star * (sum(log(x[cc=2]))/tausquared.hat)
mu.hat = rnorm(1,mu_star, sqrt(tau_star))
tausquared.hat = rinvgamma(1, 2 + 0.5*sum(cc==2),1 + 0.5*sum((log(x[cc==2])-mu.hat)^2))
# Store samples
cc.out[s,] = cc
w.out[s] = w.hat
lambda.out[s,] = lambda.hat
mu.out[s,] = mu.hat
tau.out[s,] = sqrt(tausquared.hat)
if(s/500==floor(s/500)){
print(paste("s =",s))
}
}
## Error in eval(expr, envir, enclos): object 'cc' not found
Provide you maximum likelihood estimates \(E\left\{\omega \right\}\), \(E\left\{\lambda \right\}\), \(E\left\{\mu \right\}\) and \(E\left\{\tau \right\}\) rounded to two decimal places.
#mean(w.out[burn:rrr]) = 0.1
#mean(lambda.out[burn:rrr]) = 2.39
#mean(mu.out[burn:rrr]) = 0.79
#mean(tau.out[burn:rrr]) = 0.38
cat(
paste(
'w.out =', mean(w.out[burn:rrr]),
'\nlambda.out =', mean(lambda.out[burn:rrr]),
'\nmu.out =', mean(mu.out[burn:rrr]),
'\ntau.out =', mean(tau.out[burn:rrr])
)
)
## w.out = 0
## lambda.out = 0
## mu.out = 0
## tau.out = 0
Are the initial values appropriate?
The starting values of four parameters, \(\omega\), \(\lambda\), \(\mu\) and \(\tau\), need to be specified, and the context of the problem provides some useful clues.
Because the lognormal component corresponds to the “normal” components, and we expect the majority of the observations to be in this class, it makes sense to bias the weights so that \(\omega \le 1/2\). For example, we could use \(\omega = 0.1\), or simply sample a random starting from something like a Beta(1,9) distribution (which has expectation 0.1).
For the same reason, a reasonable starting values for \(\mu\) and \(\tau\) correspond to their maximum likelihood estimators under the simpler log-Gaussian model. Since a random variable follows a log-Gaussian distribution if and only if its logarithm follows a Gaussian distribution, we can use \(\mu = mean(log(x))\) and \(\tau = sd(log(x))\) as our starting values. Alternatively, the values could be sampled from distributions centered around these values in order to make the starting values random.
Because the defective components should have shorter lifespan than normal components, it makes sense to take \(\frac{1}{\lambda}\) (which is the mean of the first component) to be a small fraction of the overall mean of the data (we use \(5%\) of the overall mean, but other similar values would all be reasonable).
Finally, in this case the indicators \(c_1, \ldots, c_n\) could be randomly initialized to either component of the mixture.
In summary, you should expect an initialization such as this one:
w = 0.1
mu = mean(log(x))
tau = sd(log(x))
lambda = 20/mean(x)
cc = sample(1:2, n, TRUE, c(1/2, 1/2))
Is the full conditional for the indicators \(c_1, \ldots, c_n\) correct?
The indicators \(c_1, \ldots, c_n\) are conditionally independent from each other, and we have
\(\Pr(c_i = 1 \mid \cdots) \propto w \lambda \exp\left\{ -\lambda x_i \right\}\) while \(\Pr(c_i = 2 \mid \cdots) \propto (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x_i) - \mu \right)^2\right\}\)
Hence, the code to sample the indicators might look something like this:
# Full conditional for cc
v = rep(0,2)
for(i in 1:n){
v[1] = log(w) + dexp(x[i], lambda, log=TRUE)
v[2] = log(1-w) + dlnorm(x[i], mu, tau, log=TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:2, 1, replace=TRUE, prob=v)
}
Please be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R. For example, the calculation could be vectorized to increase efficiency.
Is the full conditional for the weight \(\omega\) correct?
This is a simple one, as its structure is common to all mixture models. Recalling that the prior on \(\omega\) is a uniform distribution on [0,1][0,1], we have
\[ \omega \mid \cdots \sim \text{Beta}\left(m(\mathbf{c})+1, n-m(\mathbf{c})+1\right) \]
where \(m(\mathbf{c})\) is the number of observations that c assigns to component 1. The associated code might look something like this
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+n-sum(cc==1))
or, alternatively,
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+sum(cc==2))
Please remember to be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R.
Is the full conditional for the weight \(\lambda\) correct?
Since the prior is conditionally conjugate, it is easy to see that
\[ \lambda \mid \cdots \sim \text{Gam} \left( 1 + m(\mathbf{c}) , 1 + \sum_{i:c_i = 1} x_i\right) \]
Hence, the code might look something like
# Full conditional for lambda
lambda = rgamma(1, 1 + sum(cc==1), 1 + sum(x[cc==1]))
Is the full conditional for the weight \(\mu\) correct?
The full-conditional for \(\mu\) is a Gaussian distribution,
\[ \mu \mid \cdots \sim \text{N} \left( \frac{\frac{\sum_{i:c_i=1}x_i}{\tau^2} + 0}{\frac{m(\mathbf{c})}{\tau^2} + 1} , \frac{1}{\frac{m(\mathbf{c})}{\tau^2} + 1} \right) \]
The corresponding R code might look like:
# Full conditional for mu
mean.post = (sum(log(x[cc==2]))/tau^2 + 0)/(sum(cc==2)/tau^2 + 1)
std.post = sqrt(1/(sum(cc==2)/tau^2 + 1))
mu = rnorm(1, mean.post, std.post)
Is the full conditional for the weight \(\tau\) correct?
The full conditional for \(\tau^2\) is an inverse Gamma distribution,
\[ \tau^2 \mid \cdots \sim \\\text{IGam}\left( 2 + n - m\left(\mathbf{c}\right), 1+ \frac{1}{2}\sum_{i:c_i = 2} \left\{ \log x_i - \mu \right\}^2 \right) \]
The corresponding R code might look like:
# Full conditional for tau
tau = sqrt(1/rgamma(1, 2 + sum(cc==2), 1 + 0.5*sum((log(x[cc==2]) - mu)^2)))
Are the posterior means of the parameters generated by the algorithm correct?
The posterior means, rounded to two decimal places, are \(\text{E} \left\{ w \right\} \approx 0.10\), \(\text{E} \left\{ \lambda \right\} \approx 2.29\), \(\text{E} \left\{ \mu \right\} \approx 0.79\) and \(\text{E} \left\{ \tau \right\} \approx 0.38\)
0点 No
2点 Yes
0点 No
1点 Yes
Provide an MCMC algorithm to fit the mixture model
\[ \begin{align} f(x) = w \lambda \exp\left\{ -\lambda x \right\} + (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x) - \mu \right)^2\right\} \\ \quad\quad\quad x>0. \end{align} \]
Provide you maximum likelihood estimates \(E\left\{\omega \right\}\), \(E\left\{\lambda \right\}\), \(E\left\{\mu \right\}\) and \(E\left\{\tau \right\}\) rounded to two decimal places.
Are the initial values appropriate?
The starting values of four parameters, \(\omega\), \(\lambda\), \(\mu\) and \(\tau\), need to be specified, and the context of the problem provides some useful clues.
Because the lognormal component corresponds to the “normal” components, and we expect the majority of the observations to be in this class, it makes sense to bias the weights so that \(\omega \le 1/2\). For example, we could use \(\omega = 0.1\), or simply sample a random starting from something like a Beta(1,9) distribution (which has expectation 0.1).
For the same reason, a reasonable starting values for \(\mu\) and \(\tau\) correspond to their maximum likelihood estimators under the simpler log-Gaussian model. Since a random variable follows a log-Gaussian distribution if and only if its logarithm follows a Gaussian distribution, we can use \(\mu = mean(log(x))\) and \(\tau = sd(log(x))\) as our starting values. Alternatively, the values could be sampled from distributions centered around these values in order to make the starting values random.
Because the defective components should have shorter lifespan than normal components, it makes sense to take \(\frac{1}{\lambda}\) (which is the mean of the first component) to be a small fraction of the overall mean of the data (we use \(5%\) of the overall mean, but other similar values would all be reasonable).
Finally, in this case the indicators \(c_1, \ldots, c_n\) could be randomly initialized to either component of the mixture.
In summary, you should expect an initialization such as this one:
w = 0.1
mu = mean(log(x))
tau = sd(log(x))
lambda = 20/mean(x)
cc = sample(1:2, n, TRUE, c(1/2, 1/2))
Is the full conditional for the indicators \(c_1, \ldots, c_n\) correct?
The indicators \(c_1, \ldots, c_n\) are conditionally independent from each other, and we have
\(\Pr(c_i = 1 \mid \cdots) \propto w \lambda \exp\left\{ -\lambda x_i \right\}\) while \(\Pr(c_i = 2 \mid \cdots) \propto (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x_i) - \mu \right)^2\right\}\)
Hence, the code to sample the indicators might look something like this:
# Full conditional for cc
v = rep(0,2)
for(i in 1:n){
v[1] = log(w) + dexp(x[i], lambda, log=TRUE)
v[2] = log(1-w) + dlnorm(x[i], mu, tau, log=TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:2, 1, replace=TRUE, prob=v)
}
Please be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R. For example, the calculation could be vectorized to increase efficiency.
Is the full conditional for the weight \(\omega\) correct?
This is a simple one, as its structure is common to all mixture models. Recalling that the prior on \(\omega\) is a uniform distribution on [0,1][0,1], we have
\[ \omega \mid \cdots \sim \text{Beta}\left(m(\mathbf{c})+1, n-m(\mathbf{c})+1\right) \]
where \(m(\mathbf{c})\) is the number of observations that c assigns to component 1. The associated code might look something like this
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+n-sum(cc==1))
or, alternatively,
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+sum(cc==2))
Please remember to be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R.
Is the full conditional for the weight \(\lambda\) correct?
Since the prior is conditionally conjugate, it is easy to see that
\[ \lambda \mid \cdots \sim \text{Gam} \left( 1 + m(\mathbf{c}) , 1 + \sum_{i:c_i = 1} x_i\right) \]
Hence, the code might look something like
# Full conditional for lambda
lambda = rgamma(1, 1 + sum(cc==1), 1 + sum(x[cc==1]))
Is the full conditional for the weight \(\mu\) correct?
The full-conditional for \(\mu\) is a Gaussian distribution,
\[ \mu \mid \cdots \sim \text{N} \left( \frac{\frac{\sum_{i:c_i=1}x_i}{\tau^2} + 0}{\frac{m(\mathbf{c})}{\tau^2} + 1} , \frac{1}{\frac{m(\mathbf{c})}{\tau^2} + 1} \right) \]
The corresponding R code might look like:
# Full conditional for mu
mean.post = (sum(log(x[cc==2]))/tau^2 + 0)/(sum(cc==2)/tau^2 + 1)
std.post = sqrt(1/(sum(cc==2)/tau^2 + 1))
mu = rnorm(1, mean.post, std.post)
Is the full conditional for the weight \(\tau\) correct?
The full conditional for \(\tau^2\) is an inverse Gamma distribution,
\[ \tau^2 \mid \cdots \sim \\\text{IGam}\left( 2 + n - m\left(\mathbf{c}\right), 1+ \frac{1}{2}\sum_{i:c_i = 2} \left\{ \log x_i - \mu \right\}^2 \right) \]
The corresponding R code might look like:
# Full conditional for tau
tau = sqrt(1/rgamma(1, 2 + sum(cc==2), 1 + 0.5*sum((log(x[cc==2]) - mu)^2)))
Are the posterior means of the parameters generated by the algorithm correct?
The posterior means, rounded to two decimal places, are \(\text{E} \left\{ w \right\} \approx 0.10\), \(\text{E} \left\{ \lambda \right\} \approx 2.29\), \(\text{E} \left\{ \mu \right\} \approx 0.79\) and \(\text{E} \left\{ \tau \right\} \approx 0.38\)
0点 No
2点 Yes
0点 No
1点 Yes
Provide an MCMC algorithm to fit the mixture model
\[ \begin{align} f(x) = w \lambda \exp\left\{ -\lambda x \right\} + (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x) - \mu \right)^2\right\} \\ \quad\quad\quad x>0. \end{align} \]
Provide you maximum likelihood estimates \(E\left\{\omega \right\}\), \(E\left\{\lambda \right\}\), \(E\left\{\mu \right\}\) and \(E\left\{\tau \right\}\) rounded to two decimal places.
Are the initial values appropriate?
The starting values of four parameters, \(\omega\), \(\lambda\), \(\mu\) and \(\tau\), need to be specified, and the context of the problem provides some useful clues.
Because the lognormal component corresponds to the “normal” components, and we expect the majority of the observations to be in this class, it makes sense to bias the weights so that \(\omega \le 1/2\). For example, we could use \(\omega = 0.1\), or simply sample a random starting from something like a Beta(1,9) distribution (which has expectation 0.1).
For the same reason, a reasonable starting values for \(\mu\) and \(\tau\) correspond to their maximum likelihood estimators under the simpler log-Gaussian model. Since a random variable follows a log-Gaussian distribution if and only if its logarithm follows a Gaussian distribution, we can use \(\mu = mean(log(x))\) and \(\tau = sd(log(x))\) as our starting values. Alternatively, the values could be sampled from distributions centered around these values in order to make the starting values random.
Because the defective components should have shorter lifespan than normal components, it makes sense to take \(\frac{1}{\lambda}\) (which is the mean of the first component) to be a small fraction of the overall mean of the data (we use \(5%\) of the overall mean, but other similar values would all be reasonable).
Finally, in this case the indicators \(c_1, \ldots, c_n\) could be randomly initialized to either component of the mixture.
In summary, you should expect an initialization such as this one:
w = 0.1
mu = mean(log(x))
tau = sd(log(x))
lambda = 20/mean(x)
cc = sample(1:2, n, TRUE, c(1/2, 1/2))
Is the full conditional for the indicators \(c_1, \ldots, c_n\) correct?
The indicators \(c_1, \ldots, c_n\) are conditionally independent from each other, and we have
\(\Pr(c_i = 1 \mid \cdots) \propto w \lambda \exp\left\{ -\lambda x_i \right\}\) while \(\Pr(c_i = 2 \mid \cdots) \propto (1- \\w) \frac{1}{\sqrt{2\pi} \tau x} \exp\left\{ - \frac{1}{2 \tau^2} \left( \log(x_i) - \mu \right)^2\right\}\)
Hence, the code to sample the indicators might look something like this:
# Full conditional for cc
v = rep(0,2)
for(i in 1:n){
v[1] = log(w) + dexp(x[i], lambda, log=TRUE)
v[2] = log(1-w) + dlnorm(x[i], mu, tau, log=TRUE)
v = exp(v - max(v))/sum(exp(v - max(v)))
cc[i] = sample(1:2, 1, replace=TRUE, prob=v)
}
Please be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R. For example, the calculation could be vectorized to increase efficiency.
Is the full conditional for the weight \(\omega\) correct?
This is a simple one, as its structure is common to all mixture models. Recalling that the prior on \(\omega\) is a uniform distribution on [0,1][0,1], we have
\[ \omega \mid \cdots \sim \text{Beta}\left(m(\mathbf{c})+1, n-m(\mathbf{c})+1\right) \]
where \(m(\mathbf{c})\) is the number of observations that c assigns to component 1. The associated code might look something like this
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+n-sum(cc==1))
or, alternatively,
# Full conditional for w
w = rbeta(1, 1+sum(cc==1), 1+sum(cc==2))
Please remember to be open-minded when reviewing this code, as there are many ways to accomplish the same thing in R.
Is the full conditional for the weight \(\lambda\) correct?
Since the prior is conditionally conjugate, it is easy to see that
\[ \lambda \mid \cdots \sim \text{Gam} \left( 1 + m(\mathbf{c}) , 1 + \sum_{i:c_i = 1} x_i\right) \]
Hence, the code might look something like
# Full conditional for lambda
lambda = rgamma(1, 1 + sum(cc==1), 1 + sum(x[cc==1]))
Is the full conditional for the weight \(\mu\) correct?
The full-conditional for \(\mu\) is a Gaussian distribution,
\[ \mu \mid \cdots \sim \text{N} \left( \frac{\frac{\sum_{i:c_i=1}x_i}{\tau^2} + 0}{\frac{m(\mathbf{c})}{\tau^2} + 1} , \frac{1}{\frac{m(\mathbf{c})}{\tau^2} + 1} \right) \]
The corresponding R code might look like:
# Full conditional for mu
mean.post = (sum(log(x[cc==2]))/tau^2 + 0)/(sum(cc==2)/tau^2 + 1)
std.post = sqrt(1/(sum(cc==2)/tau^2 + 1))
mu = rnorm(1, mean.post, std.post)
Is the full conditional for the weight \(\tau\) correct?
The full conditional for \(\tau^2\) is an inverse Gamma distribution,
\[ \tau^2 \mid \cdots \sim \\\text{IGam}\left( 2 + n - m\left(\mathbf{c}\right), 1+ \frac{1}{2}\sum_{i:c_i = 2} \left\{ \log x_i - \mu \right\}^2 \right) \]
The corresponding R code might look like:
# Full conditional for tau
tau = sqrt(1/rgamma(1, 2 + sum(cc==2), 1 + 0.5*sum((log(x[cc==2]) - mu)^2)))
Are the posterior means of the parameters generated by the algorithm correct?
The posterior means, rounded to two decimal places, are \(\text{E} \left\{ w \right\} \approx 0.10\), \(\text{E} \left\{ \lambda \right\} \approx 2.29\), \(\text{E} \left\{ \mu \right\} \approx 0.79\) and \(\text{E} \left\{ \tau \right\} \approx 0.38\)
Here I noticed the function from rinvgamma
and MCMCpack
get a slight different result where rinvgamma
will be more accurate.
example from MCMCpack
example from rinvgamma
It’s useful to record some information about how your file was created.
suppressMessages(require('dplyr', quietly = TRUE))
suppressMessages(require('magrittr', quietly = TRUE))
suppressMessages(require('formattable', quietly = TRUE))
suppressMessages(require('knitr', quietly = TRUE))
suppressMessages(require('kableExtra', quietly = TRUE))
sys1 <- devtools::session_info()$platform %>%
unlist %>% data.frame(Category = names(.), session_info = .)
rownames(sys1) <- NULL
sys2 <- data.frame(Sys.info()) %>%
dplyr::mutate(Category = rownames(.)) %>% .[2:1]
names(sys2)[2] <- c('Sys.info')
rownames(sys2) <- NULL
if (nrow(sys1) == 9 & nrow(sys2) == 8) {
sys2 %<>% rbind(., data.frame(
Category = 'Current time',
Sys.info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST🗾')))
} else {
sys1 %<>% rbind(., data.frame(
Category = 'Current time',
session_info = paste(as.character(lubridate::now('Asia/Tokyo')), 'JST🗾')))
}
sys <- cbind(sys1, sys2) %>%
kbl(caption = 'Additional session information:') %>%
kable_styling(bootstrap_options = c('striped', 'hover', 'condensed', 'responsive')) %>%
row_spec(0, background = 'DimGrey', color = 'yellow') %>%
column_spec(1, background = 'CornflowerBlue', color = 'red') %>%
column_spec(2, background = 'grey', color = 'black') %>%
column_spec(3, background = 'CornflowerBlue', color = 'blue') %>%
column_spec(4, background = 'grey', color = 'white') %>%
row_spec(9, bold = T, color = 'yellow', background = '#D7261E')
rm(sys1, sys2)
sys
Category | session_info | Category | Sys.info |
---|---|---|---|
version | R version 4.1.0 (2021-05-18) | sysname | Linux |
os | Ubuntu 20.04.2 LTS | release | 5.8.0-54-generic |
system | x86_64, linux-gnu | version | #61~20.04.1-Ubuntu SMP Thu May 13 00:05:49 UTC 2021 |
ui | X11 | nodename | Scibrokes-Trading |
language | en | machine | x86_64 |
collate | en_US.UTF-8 | login | englianhu |
ctype | en_US.UTF-8 | user | englianhu |
tz | Asia/Tokyo | effective_user | englianhu |
date | 2021-05-26 | Current time | 2021-05-26 05:28:24 JST🗾 |