library(combinat) #needed for permutations
## Attaching package: 'combinat'
## The following object(s) are masked from 'package:utils':
##
## combn
library(coda) #needed for HPD interval
## Loading required package: lattice
We are asked to plot several examples of the conjugate prior distribution for a Galenshore(a,\( \theta \)) distribution. Since the conjugate prior was determined to be a Galenshore(\( an_0-\frac{n_0}{2}+1 \),\( \sqrt{n_0t_0} \)), we can simply plot various examples of Galenshore distributions.
dgalenshore <- function(x, a, theta) {
# takes numeric vector (elements should be >0) and returns the galenshore
# probability density at each point. Parameters a and theta should all be
# > 0.
if (any(x <= 0) || any(a <= 0) || any(theta <= 0)) {
stop("Invalid parameters or input values. Inputs must be >0")
} else {
return((2/gamma(a)) * theta^(2 * a) * exp(-theta^2 * x^2))
}
}
x <- seq(0.01, 3, 0.01)
plot(x, dgalenshore(x, 1, 1), type = "l", ylab = "Galenshore Density(x)", col = "black",
ylim = c(0, 5), lwd = 2)
lines(x, dgalenshore(x, 1, 1.5), type = "l", col = "red", lwd = 2, lty = 2)
lines(x, dgalenshore(x, 3, 1), type = "l", col = "blue", lwd = 2, lty = 3)
legend("topright", c("Galenshore(1,1)", "Galenshore(1,2)", "Galenshore(3,1)"),
col = c("black", "red", "blue"), lwd = c(2, 2, 2), lty = c(1, 2, 3))
We are asked to analyze the amount of time students from three high schools spent on studying or homework during exams using a normal model with a conjugate prior in which {\( \mu_0=5, \sigma_0^2=4, \kappa_0=1, \nu_0=2 \)}. Note- many of the R commands I used came from the textbook, p. 76-78.
Posterior means and 95% confidence intervals for the mean \( \theta \) and standard deviation \( \sigma \) from each school
# prior
mu0 <- 5
k0 <- 1
s20 <- 4
nu0 <- 2
# read in data
dat <- list()
dat[1] <- read.table("school1.dat")
dat[2] <- read.table("school2.dat")
dat[3] <- read.table("school3.dat")
n <- sapply(dat, length)
ybar <- sapply(dat, mean)
s2 <- sapply(dat, var)
# posterior
kn <- k0 + n
nun <- nu0 + n
mun <- (k0 * mu0 + n * ybar)/kn
s2n <- (nu0 * s20 + (n - 1) * s2 + k0 * n * (ybar - mu0)^2/kn)/nun
# produce a posterior sample for the parameters
s.postsample <- s2.postsample <- theta.postsample <- matrix(0, 10000, 3, dimnames = list(NULL,
c("school1", "school2", "school3")))
for (i in c(1, 2, 3)) {
s2.postsample[, i] <- 1/rgamma(10000, nun[i]/2, s2n[i] * nun[i]/2)
s.postsample[, i] <- sqrt(s2.postsample[, i])
theta.postsample[, i] <- rnorm(10000, mun[i], s.postsample[, i]/sqrt(kn[i]))
}
At this point, we have a simulated posterior distribution of \( \theta \), \( \sigma \), and \( \sigma^2 \) from which we can calculate the posterior means and 95% confidence interval for each school.
# posterior mean and 95% CI for Thetas (the means)
colMeans(theta.postsample)
## school1 school2 school3
## 9.294 6.943 7.814
apply(theta.postsample, 2, function(x) {
quantile(x, c(0.025, 0.975))
})
## school1 school2 school3
## 2.5% 7.769 5.163 6.152
## 97.5% 10.834 8.719 9.443
# posterior mean and 95% CI for the Standard Deviations
colMeans(s.postsample)
## school1 school2 school3
## 3.904 4.397 3.757
apply(s.postsample, 2, function(x) {
quantile(x, c(0.025, 0.975))
})
## school1 school2 school3
## 2.5% 2.992 3.349 2.800
## 97.5% 5.136 5.871 5.139
The posterior probability that \( \theta_i<\theta_j<\theta_k \) for all six permutations {i,j,k} of {1,2,3}. We estimate each of these by ranking the theta variates in each posterior sample, then counting the proportion of total sample is represented by each permutation of (1,2,3).
# determine the ranks of each posterior sample for theta.
theta.ranks <- t(apply(theta.postsample, 1, rank))
rank.probs <- list()
for (p in permn(3)) {
index <- apply(theta.ranks, 1, function(row) {
all(row == p)
})
rank.probs[[paste(p, collapse = ",")]] <- length(theta.ranks[index, 1])/10000 #this is the estimate of the probability of this permutation
}
In the results matrix, the position indicates the school index and the value indicates the theta rank (smallest=1 and largest=3). This can be confusing to read since it does not correspond to the the subscript indexing in the problem definition. I therefore list out each probability with the simulation result to avoid any ambiguity.
\( P(\theta_1<\theta_2<\theta_3) \)
rank.probs[["1,2,3"]]
## [1] 0.0056
\( P(\theta_1<\theta_3<\theta_2) \)
rank.probs[["1,3,2"]]
## [1] 0.0034
\( P(\theta_2<\theta_1<\theta_3) \)
rank.probs[["2,1,3"]]
## [1] 0.0903
\( P(\theta_2<\theta_3<\theta_1) \)
rank.probs[["3,1,2"]]
## [1] 0.6668
\( P(\theta_3<\theta_1<\theta_2) \)
rank.probs[["2,3,1"]]
## [1] 0.0152
\( P(\theta_3<\theta_2<\theta_1) \)
rank.probs[["3,2,1"]]
## [1] 0.2187
It seems clear that school 1 usually has the largest average study time among its students. Schools 2 and 3 have roughly the same levels of study time.
The posterior probability that \( \tilde{Y}_i<\tilde{Y}_j<\tilde{Y}_k \) for all six permutations where \( \tilde{Y}_i \) is a sample from the posterior predictive distribution of school i. Note that the posterior predictive distribution is normal with mean \( \mu_n \) and variance \[ \sigma^2(1+\frac{1}{\kappa_n}) \] (textbook p. 72).
# Generate Posterior Prediction Distribution
prediction.postsample <- matrix(0, 10000, 3, dimnames = list(NULL, c("school1",
"school2", "school3")))
for (i in c(1, 2, 3)) {
prediction.postsample[, i] <- rnorm(10000, mun[i], sqrt(s2.postsample[,
i] * (1 + 1/kn[i]))) #formula from textbook page 72
}
# determine the ranks of each posterior sample for a prediction.
pred.ranks <- t(apply(prediction.postsample, 1, rank))
pred.probs <- list()
for (p in permn(3)) {
index <- apply(pred.ranks, 1, function(row) {
all(row == p)
})
pred.probs[[paste(p, collapse = ",")]] <- length(pred.ranks[index, 1])/10000 #this is the estimate of the probability of this permutation
}
\( P(\tilde{Y}_1<\tilde{Y}_2<\tilde{Y}_3) \)
pred.probs[["1,2,3"]]
## [1] 0.104
\( P(\tilde{Y}_1<\tilde{Y}_3<\tilde{Y}_2) \)
pred.probs[["1,3,2"]]
## [1] 0.0979
\( P(\tilde{Y}_2<\tilde{Y}_1<\tilde{Y}_3) \)
pred.probs[["2,1,3"]]
## [1] 0.187
\( P(\tilde{Y}_2<\tilde{Y}_3<\tilde{Y}_1) \)
pred.probs[["3,1,2"]]
## [1] 0.268
\( P(\tilde{Y}_3<\tilde{Y}_1<\tilde{Y}_2) \)
pred.probs[["2,3,1"]]
## [1] 0.1421
\( P(\tilde{Y}_3<\tilde{Y}_2<\tilde{Y}_1) \)
pred.probs[["3,2,1"]]
## [1] 0.201
Again our prediction is that a student from school 1 would study more, but in this case the conclusion is less clear because we are predicting an individual value rather than a mean value. Even if the mean study time was truly lower in schools 2 and 3 than school 1, we might encounter an exceptionally dedicated student from school 2 with a higher study time than a below-average student from school 1. This is reflected in the “fuzzier” contrasts between the schools' predictive distributions.
Posterior probability that \( \theta_1 \) is bigger than both \( \theta_2 \) and \( \theta_3 \). This can be determined simply by summing the two probabilities \( P(\theta_2<\theta_3<\theta_1) \) and \( P(\theta_3<\theta_2<\theta_1) \).
sum(unlist(rank.probs[c("3,1,2", "3,2,1")]))
## [1] 0.8855
Posterior probability that \( \tilde{Y}_1 \) is bigger than both \( \tilde{Y}_2 \) and \( \tilde{Y}_3 \). This can be determined simply by summing the two probabilities \( P(\tilde{Y}_2<\tilde{Y}_3<\tilde{Y}_1) \) and \( P(\tilde{Y}_3<\tilde{Y}_2<\tilde{Y}_1) \).
sum(unlist(pred.probs[c("3,1,2", "3,2,1")]))
## [1] 0.469
We are asked to find the 90% credible interval and 90% highest posterior density interval for \( \theta \), and to plot the posterior density along with these intervals. In part (b) we determined that the posterior distribution of \( \theta|\vec{y} \) is a Gamma(10,1012).
# 90% Credible Interval (quantile approach)
ci <- qgamma(c(0.05, 0.95), 10, 1012)
ci
## [1] 0.005361 0.015519
# 90% HPD Interval
post.sample <- rgamma(10000, 10, 1012)
th <- as.mcmc(post.sample)
hpd <- HPDinterval(th, prob = 0.9)
hpd
## lower upper
## var1 0.004795 0.01469
## attr(,"Probability")
## [1] 0.9
# Create plot of posterior distribution
theta.support <- seq(0, 0.02, length = 5000)
plot(theta.support, dgamma(theta.support, 10, 1012), type = "l", xlab = expression(theta),
ylab = expression(paste(italic("p("), theta, "|y)")))
lines(x = c(hpd[1], hpd[1], hpd[2], hpd[2]), y = dgamma(c(0, hpd[1], hpd[2],
0), 10, 1012), col = gray(0.5), lwd = 2, lty = 1)
lines(x = c(ci[1], ci[1], ci[2], ci[2]), y = dgamma(c(0, ci[1], ci[2], 0), 10,
1012), col = gray(0), lwd = 2, lty = 2)
legend("topright", c("95% HPD interval", "95% credible interval"), col = c(gray(0.5),
gray(0)), lty = c(1, 2), lwd = c(2, 2), bty = "n")
The posterior predictive distribution for a new observation “z” was found to be: \[ \frac{11(1107)^{11}}{(1107+z)^{12}} \]
pz <- function(z) {
11 * (1107)^11/((1107 + z)^12)
}
curve(pz, 0, 200, lwd = 2, ylab = "p(z|y1,...yn)")