Inference-Assmt-1.utf8

library("rjags")
library("coda")
library("dplyr")

Question 6

In this question we know the form of the posterior distribution of the location parameter theta up to a proportionality constant. If we take the uniform prior then then this just looks like the liklihood of the data. In order to find HPD credible set for theta we can sample from this distribution using MCMC. Then we can calculate sample quantiles of any order and thereby find minimum length credible sets of desired probability (.99 or .95).

First we do it for sample size 5, then for sample size 20 and then for sample size 100. In each case the location parameter we set as 5 for generating the data.

To calculate the HPD credible set we just iterate over all possible sets with desired probability and find the one with minimum length. By all possible I mean only a finite number but chosen at the granularity of 6th decimal place.

Sample size n = 5

Generating a sample from the Cauchy distribution using location parameter equal to 5.

n <- 5
sample <- rt(n, 1) + 5
sample

[1] 6.213805 4.613941 4.749778 4.689165 6.395880

Now we sample from the posterior using JAGS. First we set up the sampler and check its output.

mod_string = "model {
  for (i in 1:n) {
    y[i] ~ dt(theta, 1/1, 1)
  }
  theta  ~ dunif(0, 1e7)
}"

set.seed(50)
y = sample
n = length(y)

data_jags = list(y=y, n=n)
params = c("theta")

inits = function() {
  inits = list("theta"= 1.0)
}

mod = jags.model(textConnection(mod_string), data = data_jags, inits = inits)

Compiling model graph
   Resolving undeclared variables
   Allocating nodes
Graph information:
   Observed stochastic nodes: 5
   Unobserved stochastic nodes: 1
   Total graph size: 11

Initializing model

mod_sim = coda.samples(model=mod, variable.names=params, n.iter = 1000)
plot(mod_sim)

Now we take a sample of 1000 from this posterior distribution for HPD credible set calculation. First we calculate a 95% HPD credible set.

post_sample <- unlist(mod_sim)

ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)

lengths <- fq - iq
cat("95% HPD credible set is", sep = "\n")

95% HPD credible set is

cat(sep="\n")

c(iq[which.min(lengths)], fq[which.min(lengths)])

 1.7017% 96.7017% 
4.129573 6.431083

Next we calculate 99% HPD credible set.

post_sample <- unlist(mod_sim)

ip <- seq(0.000000, 0.010000, 0.000001)
fp <- ip + 0.990000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)

lengths <- fq - iq
cat("99% HPD credible set is", sep = "\n")

99% HPD credible set is

cat(sep="\n")

c(iq[which.min(lengths)], fq[which.min(lengths)])

 0.2993% 99.2993% 
3.950755 6.795079

Sample size n = 20

Generating a sample from the Cauchy distribution using location parameter equal to 5.

n <- 20
sample <- rt(n, 1) + 5


set.seed(50)
y = sample
n = length(y)

data_jags = list(y=y, n=n)
params = c("theta")

inits = function() {
  inits = list("theta"= 1.0)
}

mod = jags.model(textConnection(mod_string), data = data_jags, inits = inits)

Compiling model graph
   Resolving undeclared variables
   Allocating nodes
Graph information:
   Observed stochastic nodes: 20
   Unobserved stochastic nodes: 1
   Total graph size: 26

Initializing model

mod_sim = coda.samples(model=mod, variable.names=params, n.iter = 1000)
post_sample <- unlist(mod_sim)

ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)

lengths <- fq - iq
cat("95% HPD credible set is", sep = "\n")

95% HPD credible set is

cat(sep="\n")

c(iq[which.min(lengths)], fq[which.min(lengths)])

 3.3984% 98.3984% 
4.542225 5.808611

cat(sep="\n")

ip <- seq(0.000000, 0.010000, 0.000001)
fp <- ip + 0.990000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)

lengths <- fq - iq
cat("99% HPD credible set is", sep = "\n")

99% HPD credible set is

cat(sep="\n")

c(iq[which.min(lengths)], fq[which.min(lengths)])

 0.3003% 99.3003% 
4.178186 5.881324

Sample size = 100

n <- 100
sample <- rt(n, 1) + 5


set.seed(50)
y = sample
n = length(y)

data_jags = list(y=y, n=n)
params = c("theta")

inits = function() {
  inits = list("theta"= 1.0)
}

mod = jags.model(textConnection(mod_string), data = data_jags, inits = inits)

Compiling model graph
   Resolving undeclared variables
   Allocating nodes
Graph information:
   Observed stochastic nodes: 100
   Unobserved stochastic nodes: 1
   Total graph size: 106

Initializing model

mod_sim = coda.samples(model=mod, variable.names=params, n.iter = 1000)
post_sample <- unlist(mod_sim)

ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)

lengths <- fq - iq
cat("95% HPD credible set is", sep = "\n")

95% HPD credible set is

cat(sep="\n")

c(iq[which.min(lengths)], fq[which.min(lengths)])

 2.9980% 97.9980% 
4.818782 5.489182

cat(sep="\n")

ip <- seq(0.000000, 0.010000, 0.000001)
fp <- ip + 0.990000
iq <- quantile(post_sample, probs=ip)
fq <- quantile(post_sample, probs=fp)

lengths <- fq - iq
cat("99% HPD credible set is", sep = "\n")

99% HPD credible set is

cat(sep="\n")

c(iq[which.min(lengths)], fq[which.min(lengths)])

 0.0000% 99.0000% 
4.616977 5.534947

Conclusion

The HPD credible sets become more accurate and narrower as sample size increases.

Question 2

Jeffrey’s prior = Beta(0.5, 0.5)

We work with a sample of size 30. For a fixed value of the parameter (calling it p) we draw 100 size 30 samples and for each we construct 95% confidence intervals and 95% HPD credible sets and check how many times these intervals contain the true value of the parameter.

set.seed(123)
n <- 30

p = 1/8

p <- 1/8

### frequentist confidence interval

#set.seed(3)
sample_again <- rbinom(100, n, p)
mles <- (1/30)*sample_again
CI_lows <- mles - (1.96)*sqrt(mles*(1-mles)/30)
CI_highs <- mles + (1.96)*sqrt(mles*(1-mles)/30)
#sum(CI_highs==CI_lows) %>% as.character() %>% paste("problem", .) %>% cat("\n")
#min(CI_highs - CI_lows)
sum(CI_highs > p & CI_lows < p) %>% as.character() %>%
paste("Number of times the confidence intervals contains the true value",.) %>% cat(sep = "\n")

Number of times the confidence intervals contains the true value 92

cat(sep = "\n")

### HPD credible set

ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
count <- 0
for (a in sample_again) {
  iq <- qbeta(ip, 0.5 + a, 30 - a + 0.5)
  fq <- qbeta(fp, 0.5 + a, 30 - a + 0.5)
  lengths <- fq - iq
  L <- iq[which.min(lengths)]
  U <- fq[which.min(lengths)]
  if (L < p && U > p) {
    count <- count + 1
  }
  }
count %>% as.character() %>%
paste("Number of times the HPD confidence set contains the true value",.) %>% cat(sep = "\n")

Number of times the HPD confidence set contains the true value 91

cat(sep = "\n")

p = 1/4

p <- 1/4

### frequentist confidence interval

#set.seed(101)
sample_again <- rbinom(100, n, p)
mles <- (1/30)*sample_again
CI_lows <- mles - (1.96)*sqrt(mles*(1-mles)/30)
CI_highs <- mles + (1.96)*sqrt(mles*(1-mles)/30)
#sum(CI_highs==CI_lows) %>% as.character() %>% paste("problem", .) %>% cat("\n")
#min(CI_highs - CI_lows)
sum(CI_highs > p & CI_lows < p) %>% as.character() %>%
paste("Number of times the confidence intervals contains the true value",.) %>% cat(sep = "\n")

Number of times the confidence intervals contains the true value 96

cat(sep = "\n")

### HPD credible set

ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
count <- 0
for (a in sample_again) {
  iq <- qbeta(ip, 0.5 + a, 30 - a + 0.5)
  fq <- qbeta(fp, 0.5 + a, 30 - a + 0.5)
  lengths <- fq - iq
  L <- iq[which.min(lengths)]
  U <- fq[which.min(lengths)]
  if (L < p && U > p) {
    count <- count + 1
  }
  }
count %>% as.character() %>%
paste("Number of times the HPD confidence set contains the true value",.) %>% cat(sep = "\n")

Number of times the HPD confidence set contains the true value 96

cat(sep = "\n")

p = 1/2

p <- 1/2

### frequentist confidence interval

#set.seed(11)
sample_again <- rbinom(100, n, p)
mles <- (1/30)*sample_again
CI_lows <- mles - (1.96)*sqrt(mles*(1-mles)/30)
CI_highs <- mles + (1.96)*sqrt(mles*(1-mles)/30)
#sum(CI_highs==CI_lows) %>% as.character() %>% paste("problem", .) %>% cat("\n")
#min(CI_highs - CI_lows)
sum(CI_highs > p & CI_lows < p) %>% as.character() %>%
paste("Number of times the confidence intervals contains the true value",.) %>% cat(sep = "\n")

Number of times the confidence intervals contains the true value 97

cat(sep = "\n")

### HPD credible set

ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
count <- 0
for (a in sample_again) {
  iq <- qbeta(ip, 0.5 + a, 30 - a + 0.5)
  fq <- qbeta(fp, 0.5 + a, 30 - a + 0.5)
  lengths <- fq - iq
  L <- iq[which.min(lengths)]
  U <- fq[which.min(lengths)]
  if (L < p && U > p) {
    count <- count + 1
  }
  }
count %>% as.character() %>%
paste("Number of times the HPD confidence set contains the true value",.) %>% cat(sep = "\n")

Number of times the HPD confidence set contains the true value 97

cat(sep = "\n")

p = 3/4

p <- 3/4

### frequentist confidence interval

#set.seed(994)
sample_again <- rbinom(100, n, p)
mles <- (1/30)*sample_again
CI_lows <- mles - (1.96)*sqrt(mles*(1-mles)/30)
CI_highs <- mles + (1.96)*sqrt(mles*(1-mles)/30)
#sum(CI_highs==CI_lows) %>% as.character() %>% paste("problem", .) %>% cat("\n")
#min(CI_highs - CI_lows)
sum(CI_highs > p & CI_lows < p) %>% as.character() %>%
paste("Number of times the confidence intervals contains the true value",.) %>% cat(sep = "\n")

Number of times the confidence intervals contains the true value 91

cat(sep = "\n")

### HPD credible set

ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
count <- 0
for (a in sample_again) {
  iq <- qbeta(ip, 0.5 + a, 30 - a + 0.5)
  fq <- qbeta(fp, 0.5 + a, 30 - a + 0.5)
  lengths <- fq - iq
  L <- iq[which.min(lengths)]
  U <- fq[which.min(lengths)]
  if (L < p && U > p) {
    count <- count + 1
  }
  }
count %>% as.character() %>%
paste("Number of times the HPD confidence set contains the true value",.) %>% cat(sep = "\n")

Number of times the HPD confidence set contains the true value 91

cat(sep = "\n")

p = 7/8

p <- 7/8

### frequentist confidence interval

#set.seed(483)
sample_again <- rbinom(100, n, p)
mles <- (1/30)*sample_again
CI_lows <- mles - (1.96)*sqrt(mles*(1-mles)/30)
CI_highs <- mles + (1.96)*sqrt(mles*(1-mles)/30)
#sum(CI_highs==CI_lows) %>% as.character() %>% paste("problem", .) %>% cat("\n")
#min(CI_highs - CI_lows)
sum(CI_highs > p & CI_lows < p) %>% as.character() %>%
paste("Number of times the confidence intervals contains the true value",.) %>% cat(sep = "\n")

Number of times the confidence intervals contains the true value 90

cat(sep = "\n")

### HPD credible set

ip <- seq(0.000000, 0.050000, 0.000001)
fp <- ip + 0.950000
count <- 0
for (a in sample_again) {
  iq <- qbeta(ip, 0.5 + a, 30 - a + 0.5)
  fq <- qbeta(fp, 0.5 + a, 30 - a + 0.5)
  lengths <- fq - iq
  L <- iq[which.min(lengths)]
  U <- fq[which.min(lengths)]
  if (L < p && U > p) {
    count <- count + 1
  }
  }
count %>% as.character() %>%
paste("Number of times the HPD confidence set contains the true value",.) %>% cat(sep = "\n")

Number of times the HPD confidence set contains the true value 88

cat(sep = "\n")