Math 640 Homework 2

Will Townes

library(combinat)  #needed for permutations
## Attaching package: 'combinat'
## The following object(s) are masked from 'package:utils':
## 
## combn
library(coda)  #needed for HPD interval
## Loading required package: lattice

Problem 1a (Problem 3.9a)

We are asked to plot several examples of the conjugate prior distribution for a Galenshore(a,\( \theta \)) distribution. Since the conjugate prior was determined to be a Galenshore(\( an_0-\frac{n_0}{2}+1 \),\( \sqrt{n_0t_0} \)), we can simply plot various examples of Galenshore distributions.

dgalenshore <- function(x, a, theta) {
    # takes numeric vector (elements should be >0) and returns the galenshore
    # probability density at each point. Parameters a and theta should all be
    # > 0.
    if (any(x <= 0) || any(a <= 0) || any(theta <= 0)) {
        stop("Invalid parameters or input values. Inputs must be >0")
    } else {
        return((2/gamma(a)) * theta^(2 * a) * exp(-theta^2 * x^2))
    }
}
x <- seq(0.01, 3, 0.01)
plot(x, dgalenshore(x, 1, 1), type = "l", ylab = "Galenshore Density(x)", col = "black", 
    ylim = c(0, 5), lwd = 2)
lines(x, dgalenshore(x, 1, 1.5), type = "l", col = "red", lwd = 2, lty = 2)
lines(x, dgalenshore(x, 3, 1), type = "l", col = "blue", lwd = 2, lty = 3)
legend("topright", c("Galenshore(1,1)", "Galenshore(1,2)", "Galenshore(3,1)"), 
    col = c("black", "red", "blue"), lwd = c(2, 2, 2), lty = c(1, 2, 3))

plot of chunk unnamed-chunk-2

Problem 2 (Problem 5.1)

We are asked to analyze the amount of time students from three high schools spent on studying or homework during exams using a normal model with a conjugate prior in which {\( \mu_0=5, \sigma_0^2=4, \kappa_0=1, \nu_0=2 \)}. Note- many of the R commands I used came from the textbook, p. 76-78.

Problem 5.1a

Posterior means and 95% confidence intervals for the mean \( \theta \) and standard deviation \( \sigma \) from each school

# prior
mu0 <- 5
k0 <- 1
s20 <- 4
nu0 <- 2
# read in data
dat <- list()
dat[1] <- read.table("school1.dat")
dat[2] <- read.table("school2.dat")
dat[3] <- read.table("school3.dat")
n <- sapply(dat, length)
ybar <- sapply(dat, mean)
s2 <- sapply(dat, var)
# posterior
kn <- k0 + n
nun <- nu0 + n
mun <- (k0 * mu0 + n * ybar)/kn
s2n <- (nu0 * s20 + (n - 1) * s2 + k0 * n * (ybar - mu0)^2/kn)/nun
# produce a posterior sample for the parameters
s.postsample <- s2.postsample <- theta.postsample <- matrix(0, 10000, 3, dimnames = list(NULL, 
    c("school1", "school2", "school3")))
for (i in c(1, 2, 3)) {
    s2.postsample[, i] <- 1/rgamma(10000, nun[i]/2, s2n[i] * nun[i]/2)
    s.postsample[, i] <- sqrt(s2.postsample[, i])
    theta.postsample[, i] <- rnorm(10000, mun[i], s.postsample[, i]/sqrt(kn[i]))
}

At this point, we have a simulated posterior distribution of \( \theta \), \( \sigma \), and \( \sigma^2 \) from which we can calculate the posterior means and 95% confidence interval for each school.

# posterior mean and 95% CI for Thetas (the means)
colMeans(theta.postsample)
## school1 school2 school3 
##   9.294   6.943   7.814
apply(theta.postsample, 2, function(x) {
    quantile(x, c(0.025, 0.975))
})
##       school1 school2 school3
## 2.5%    7.769   5.163   6.152
## 97.5%  10.834   8.719   9.443
# posterior mean and 95% CI for the Standard Deviations
colMeans(s.postsample)
## school1 school2 school3 
##   3.904   4.397   3.757
apply(s.postsample, 2, function(x) {
    quantile(x, c(0.025, 0.975))
})
##       school1 school2 school3
## 2.5%    2.992   3.349   2.800
## 97.5%   5.136   5.871   5.139

Problem 5.1b

The posterior probability that \( \theta_i<\theta_j<\theta_k \) for all six permutations {i,j,k} of {1,2,3}. We estimate each of these by ranking the theta variates in each posterior sample, then counting the proportion of total sample is represented by each permutation of (1,2,3).

# determine the ranks of each posterior sample for theta.
theta.ranks <- t(apply(theta.postsample, 1, rank))
rank.probs <- list()
for (p in permn(3)) {
    index <- apply(theta.ranks, 1, function(row) {
        all(row == p)
    })
    rank.probs[[paste(p, collapse = ",")]] <- length(theta.ranks[index, 1])/10000  #this is the estimate of the probability of this permutation
}

In the results matrix, the position indicates the school index and the value indicates the theta rank (smallest=1 and largest=3). This can be confusing to read since it does not correspond to the the subscript indexing in the problem definition. I therefore list out each probability with the simulation result to avoid any ambiguity.

\( P(\theta_1<\theta_2<\theta_3) \)

rank.probs[["1,2,3"]]
## [1] 0.0056

\( P(\theta_1<\theta_3<\theta_2) \)

rank.probs[["1,3,2"]]
## [1] 0.0034

\( P(\theta_2<\theta_1<\theta_3) \)

rank.probs[["2,1,3"]]
## [1] 0.0903

\( P(\theta_2<\theta_3<\theta_1) \)

rank.probs[["3,1,2"]]
## [1] 0.6668

\( P(\theta_3<\theta_1<\theta_2) \)

rank.probs[["2,3,1"]]
## [1] 0.0152

\( P(\theta_3<\theta_2<\theta_1) \)

rank.probs[["3,2,1"]]
## [1] 0.2187

It seems clear that school 1 usually has the largest average study time among its students. Schools 2 and 3 have roughly the same levels of study time.

Problem 5.1c

The posterior probability that \( \tilde{Y}_i<\tilde{Y}_j<\tilde{Y}_k \) for all six permutations where \( \tilde{Y}_i \) is a sample from the posterior predictive distribution of school i. Note that the posterior predictive distribution is normal with mean \( \mu_n \) and variance \[ \sigma^2(1+\frac{1}{\kappa_n}) \] (textbook p. 72).

# Generate Posterior Prediction Distribution
prediction.postsample <- matrix(0, 10000, 3, dimnames = list(NULL, c("school1", 
    "school2", "school3")))
for (i in c(1, 2, 3)) {
    prediction.postsample[, i] <- rnorm(10000, mun[i], sqrt(s2.postsample[, 
        i] * (1 + 1/kn[i])))  #formula from textbook page 72
}
# determine the ranks of each posterior sample for a prediction.
pred.ranks <- t(apply(prediction.postsample, 1, rank))
pred.probs <- list()
for (p in permn(3)) {
    index <- apply(pred.ranks, 1, function(row) {
        all(row == p)
    })
    pred.probs[[paste(p, collapse = ",")]] <- length(pred.ranks[index, 1])/10000  #this is the estimate of the probability of this permutation
}

\( P(\tilde{Y}_1<\tilde{Y}_2<\tilde{Y}_3) \)

pred.probs[["1,2,3"]]
## [1] 0.104

\( P(\tilde{Y}_1<\tilde{Y}_3<\tilde{Y}_2) \)

pred.probs[["1,3,2"]]
## [1] 0.0979

\( P(\tilde{Y}_2<\tilde{Y}_1<\tilde{Y}_3) \)

pred.probs[["2,1,3"]]
## [1] 0.187

\( P(\tilde{Y}_2<\tilde{Y}_3<\tilde{Y}_1) \)

pred.probs[["3,1,2"]]
## [1] 0.268

\( P(\tilde{Y}_3<\tilde{Y}_1<\tilde{Y}_2) \)

pred.probs[["2,3,1"]]
## [1] 0.1421

\( P(\tilde{Y}_3<\tilde{Y}_2<\tilde{Y}_1) \)

pred.probs[["3,2,1"]]
## [1] 0.201

Again our prediction is that a student from school 1 would study more, but in this case the conclusion is less clear because we are predicting an individual value rather than a mean value. Even if the mean study time was truly lower in schools 2 and 3 than school 1, we might encounter an exceptionally dedicated student from school 2 with a higher study time than a below-average student from school 1. This is reflected in the “fuzzier” contrasts between the schools' predictive distributions.

Problem 5.1d

Posterior probability that \( \theta_1 \) is bigger than both \( \theta_2 \) and \( \theta_3 \). This can be determined simply by summing the two probabilities \( P(\theta_2<\theta_3<\theta_1) \) and \( P(\theta_3<\theta_2<\theta_1) \).

sum(unlist(rank.probs[c("3,1,2", "3,2,1")]))
## [1] 0.8855

Posterior probability that \( \tilde{Y}_1 \) is bigger than both \( \tilde{Y}_2 \) and \( \tilde{Y}_3 \). This can be determined simply by summing the two probabilities \( P(\tilde{Y}_2<\tilde{Y}_3<\tilde{Y}_1) \) and \( P(\tilde{Y}_3<\tilde{Y}_2<\tilde{Y}_1) \).

sum(unlist(pred.probs[c("3,1,2", "3,2,1")]))
## [1] 0.469

Problem 3c

We are asked to find the 90% credible interval and 90% highest posterior density interval for \( \theta \), and to plot the posterior density along with these intervals. In part (b) we determined that the posterior distribution of \( \theta|\vec{y} \) is a Gamma(10,1012).

# 90% Credible Interval (quantile approach)
ci <- qgamma(c(0.05, 0.95), 10, 1012)
ci
## [1] 0.005361 0.015519
# 90% HPD Interval
post.sample <- rgamma(10000, 10, 1012)
th <- as.mcmc(post.sample)
hpd <- HPDinterval(th, prob = 0.9)
hpd
##         lower   upper
## var1 0.004795 0.01469
## attr(,"Probability")
## [1] 0.9
# Create plot of posterior distribution
theta.support <- seq(0, 0.02, length = 5000)
plot(theta.support, dgamma(theta.support, 10, 1012), type = "l", xlab = expression(theta), 
    ylab = expression(paste(italic("p("), theta, "|y)")))
lines(x = c(hpd[1], hpd[1], hpd[2], hpd[2]), y = dgamma(c(0, hpd[1], hpd[2], 
    0), 10, 1012), col = gray(0.5), lwd = 2, lty = 1)
lines(x = c(ci[1], ci[1], ci[2], ci[2]), y = dgamma(c(0, ci[1], ci[2], 0), 10, 
    1012), col = gray(0), lwd = 2, lty = 2)
legend("topright", c("95% HPD interval", "95% credible interval"), col = c(gray(0.5), 
    gray(0)), lty = c(1, 2), lwd = c(2, 2), bty = "n")

plot of chunk unnamed-chunk-21

Problem 3e

The posterior predictive distribution for a new observation “z” was found to be: \[ \frac{11(1107)^{11}}{(1107+z)^{12}} \]

pz <- function(z) {
    11 * (1107)^11/((1107 + z)^12)
}
curve(pz, 0, 200, lwd = 2, ylab = "p(z|y1,...yn)")

plot of chunk unnamed-chunk-22