library("rjags")
library("coda")
library("dplyr")
In this question we know the form of the posterior distribution of the location parameter theta up to a proportionality constant. If we take the uniform prior then then this just looks like the liklihood of the data. In order to find HPD credible set for theta we can sample from this distribution using MCMC. Then we can calculate sample quantiles of any order and thereby find minimum length credible sets of desired probability (.99 or .95).
First we do it for sample size 5, then for sample size 20 and then for sample size 100. In each case the location parameter we set as 5 for generating the data.
To calculate the HPD credible set we just iterate over all possible sets with desired probability and find the one with minimum length. By all possible I mean only a finite number but chosen at the granularity of 6th decimal place.
Generating a sample from the Cauchy distribution using location parameter equal to 5.
n <- 5
sample <- rt(n, 1) + 5
sample
[1] 6.213805 4.613941 4.749778 4.689165 6.395880
Now we sample from the posterior using JAGS. First we set up the sampler and check its output.
mod_string = "model {
for (i in 1:n) {
y[i] ~ dt(theta, 1/1, 1)
}
theta ~ dunif(0, 1e7)
}"
set.seed(50)
y = sample
n = length(y)
data_jags = list(y=y, n=n)
params = c("theta")
inits = function() {
inits = list("theta"= 1.0)
}
mod = jags.model(textConnection(mod_string), data = data_jags, inits = inits)
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 5
Unobserved stochastic nodes: 1
Total graph size: 11
Initializing model
mod_sim = coda.samples(model=mod, variable.names=params, n.iter = 1000)
plot(mod_sim)
Now we take a sample of 1000 from this posterior distribution for HPD credible set calculation. First we calculate a 95% HPD credible set.
post_sample <- unlist(mod_sim)
ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)
lengths <- fq - iq
cat("95% HPD credible set is", sep = "\n")
95% HPD credible set is
cat(sep="\n")
c(iq[which.min(lengths)], fq[which.min(lengths)])
1.7017% 96.7017%
4.129573 6.431083
Next we calculate 99% HPD credible set.
post_sample <- unlist(mod_sim)
ip <- seq(0.000000, 0.010000, 0.000001)
fp <- ip + 0.990000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)
lengths <- fq - iq
cat("99% HPD credible set is", sep = "\n")
99% HPD credible set is
cat(sep="\n")
c(iq[which.min(lengths)], fq[which.min(lengths)])
0.2993% 99.2993%
3.950755 6.795079
Generating a sample from the Cauchy distribution using location parameter equal to 5.
n <- 20
sample <- rt(n, 1) + 5
set.seed(50)
y = sample
n = length(y)
data_jags = list(y=y, n=n)
params = c("theta")
inits = function() {
inits = list("theta"= 1.0)
}
mod = jags.model(textConnection(mod_string), data = data_jags, inits = inits)
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 20
Unobserved stochastic nodes: 1
Total graph size: 26
Initializing model
mod_sim = coda.samples(model=mod, variable.names=params, n.iter = 1000)
post_sample <- unlist(mod_sim)
ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)
lengths <- fq - iq
cat("95% HPD credible set is", sep = "\n")
95% HPD credible set is
cat(sep="\n")
c(iq[which.min(lengths)], fq[which.min(lengths)])
3.3984% 98.3984%
4.542225 5.808611
cat(sep="\n")
ip <- seq(0.000000, 0.010000, 0.000001)
fp <- ip + 0.990000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)
lengths <- fq - iq
cat("99% HPD credible set is", sep = "\n")
99% HPD credible set is
cat(sep="\n")
c(iq[which.min(lengths)], fq[which.min(lengths)])
0.3003% 99.3003%
4.178186 5.881324
n <- 100
sample <- rt(n, 1) + 5
set.seed(50)
y = sample
n = length(y)
data_jags = list(y=y, n=n)
params = c("theta")
inits = function() {
inits = list("theta"= 1.0)
}
mod = jags.model(textConnection(mod_string), data = data_jags, inits = inits)
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 100
Unobserved stochastic nodes: 1
Total graph size: 106
Initializing model
mod_sim = coda.samples(model=mod, variable.names=params, n.iter = 1000)
post_sample <- unlist(mod_sim)
ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)
lengths <- fq - iq
cat("95% HPD credible set is", sep = "\n")
95% HPD credible set is
cat(sep="\n")
c(iq[which.min(lengths)], fq[which.min(lengths)])
2.9980% 97.9980%
4.818782 5.489182
cat(sep="\n")
ip <- seq(0.000000, 0.010000, 0.000001)
fp <- ip + 0.990000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)
lengths <- fq - iq
cat("99% HPD credible set is", sep = "\n")
99% HPD credible set is
cat(sep="\n")
c(iq[which.min(lengths)], fq[which.min(lengths)])
0.0000% 99.0000%
4.616977 5.534947
The HPD credible sets become more accurate and narrower as sample size increases.
Jeffrey’s prior = Beta(0.5, 0.5)
We work with a sample of size 30. For a fixed value of the parameter (calling it p) we draw 100 size 30 samples and for each we construct 95% confidence intervals and 95% HPD credible sets and check how many times these intervals contain the true value of the parameter.
set.seed(123)
n <- 30
p <- 1/8
### frequentist confidence interval
#set.seed(3)
sample_again <- rbinom(100, n, p)
mles <- (1/30)*sample_again
CI_lows <- mles - (1.96)*sqrt(mles*(1-mles)/30)
CI_highs <- mles + (1.96)*sqrt(mles*(1-mles)/30)
#sum(CI_highs==CI_lows) %>% as.character() %>% paste("problem", .) %>% cat("\n")
#min(CI_highs - CI_lows)
sum(CI_highs > p & CI_lows < p) %>% as.character() %>%
paste("Number of times the confidence intervals contains the true value",.) %>% cat(sep = "\n")
Number of times the confidence intervals contains the true value 92
cat(sep = "\n")
### HPD credible set
ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
count <- 0
for (a in sample_again) {
iq <- qbeta(ip, 0.5 + a, 30 - a + 0.5)
fq <- qbeta(fp, 0.5 + a, 30 - a + 0.5)
lengths <- fq - iq
L <- iq[which.min(lengths)]
U <- fq[which.min(lengths)]
if (L < p && U > p) {
count <- count + 1
}
}
count %>% as.character() %>%
paste("Number of times the HPD confidence set contains the true value",.) %>% cat(sep = "\n")
Number of times the HPD confidence set contains the true value 91
cat(sep = "\n")
p <- 1/4
### frequentist confidence interval
#set.seed(101)
sample_again <- rbinom(100, n, p)
mles <- (1/30)*sample_again
CI_lows <- mles - (1.96)*sqrt(mles*(1-mles)/30)
CI_highs <- mles + (1.96)*sqrt(mles*(1-mles)/30)
#sum(CI_highs==CI_lows) %>% as.character() %>% paste("problem", .) %>% cat("\n")
#min(CI_highs - CI_lows)
sum(CI_highs > p & CI_lows < p) %>% as.character() %>%
paste("Number of times the confidence intervals contains the true value",.) %>% cat(sep = "\n")
Number of times the confidence intervals contains the true value 96
cat(sep = "\n")
### HPD credible set
ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
count <- 0
for (a in sample_again) {
iq <- qbeta(ip, 0.5 + a, 30 - a + 0.5)
fq <- qbeta(fp, 0.5 + a, 30 - a + 0.5)
lengths <- fq - iq
L <- iq[which.min(lengths)]
U <- fq[which.min(lengths)]
if (L < p && U > p) {
count <- count + 1
}
}
count %>% as.character() %>%
paste("Number of times the HPD confidence set contains the true value",.) %>% cat(sep = "\n")
Number of times the HPD confidence set contains the true value 96
cat(sep = "\n")
p <- 1/2
### frequentist confidence interval
#set.seed(11)
sample_again <- rbinom(100, n, p)
mles <- (1/30)*sample_again
CI_lows <- mles - (1.96)*sqrt(mles*(1-mles)/30)
CI_highs <- mles + (1.96)*sqrt(mles*(1-mles)/30)
#sum(CI_highs==CI_lows) %>% as.character() %>% paste("problem", .) %>% cat("\n")
#min(CI_highs - CI_lows)
sum(CI_highs > p & CI_lows < p) %>% as.character() %>%
paste("Number of times the confidence intervals contains the true value",.) %>% cat(sep = "\n")
Number of times the confidence intervals contains the true value 97
cat(sep = "\n")
### HPD credible set
ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
count <- 0
for (a in sample_again) {
iq <- qbeta(ip, 0.5 + a, 30 - a + 0.5)
fq <- qbeta(fp, 0.5 + a, 30 - a + 0.5)
lengths <- fq - iq
L <- iq[which.min(lengths)]
U <- fq[which.min(lengths)]
if (L < p && U > p) {
count <- count + 1
}
}
count %>% as.character() %>%
paste("Number of times the HPD confidence set contains the true value",.) %>% cat(sep = "\n")
Number of times the HPD confidence set contains the true value 97
cat(sep = "\n")
p <- 3/4
### frequentist confidence interval
#set.seed(994)
sample_again <- rbinom(100, n, p)
mles <- (1/30)*sample_again
CI_lows <- mles - (1.96)*sqrt(mles*(1-mles)/30)
CI_highs <- mles + (1.96)*sqrt(mles*(1-mles)/30)
#sum(CI_highs==CI_lows) %>% as.character() %>% paste("problem", .) %>% cat("\n")
#min(CI_highs - CI_lows)
sum(CI_highs > p & CI_lows < p) %>% as.character() %>%
paste("Number of times the confidence intervals contains the true value",.) %>% cat(sep = "\n")
Number of times the confidence intervals contains the true value 91
cat(sep = "\n")
### HPD credible set
ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
count <- 0
for (a in sample_again) {
iq <- qbeta(ip, 0.5 + a, 30 - a + 0.5)
fq <- qbeta(fp, 0.5 + a, 30 - a + 0.5)
lengths <- fq - iq
L <- iq[which.min(lengths)]
U <- fq[which.min(lengths)]
if (L < p && U > p) {
count <- count + 1
}
}
count %>% as.character() %>%
paste("Number of times the HPD confidence set contains the true value",.) %>% cat(sep = "\n")
Number of times the HPD confidence set contains the true value 91
cat(sep = "\n")
p <- 7/8
### frequentist confidence interval
#set.seed(483)
sample_again <- rbinom(100, n, p)
mles <- (1/30)*sample_again
CI_lows <- mles - (1.96)*sqrt(mles*(1-mles)/30)
CI_highs <- mles + (1.96)*sqrt(mles*(1-mles)/30)
#sum(CI_highs==CI_lows) %>% as.character() %>% paste("problem", .) %>% cat("\n")
#min(CI_highs - CI_lows)
sum(CI_highs > p & CI_lows < p) %>% as.character() %>%
paste("Number of times the confidence intervals contains the true value",.) %>% cat(sep = "\n")
Number of times the confidence intervals contains the true value 90
cat(sep = "\n")
### HPD credible set
ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
count <- 0
for (a in sample_again) {
iq <- qbeta(ip, 0.5 + a, 30 - a + 0.5)
fq <- qbeta(fp, 0.5 + a, 30 - a + 0.5)
lengths <- fq - iq
L <- iq[which.min(lengths)]
U <- fq[which.min(lengths)]
if (L < p && U > p) {
count <- count + 1
}
}
count %>% as.character() %>%
paste("Number of times the HPD confidence set contains the true value",.) %>% cat(sep = "\n")
Number of times the HPD confidence set contains the true value 88
cat(sep = "\n")