5.1)

Analyze the data from each of the three schools seperately using the normal model with a conjugate prior distribution. where: \(\mu_0 = 5, \sigma^2_0 = 4, \kappa_0 = 1, \nu_0 = 2\)

a)

Compute or approximate the posterior means and 95 percent confidence intervals for mean \(\theta\) and standard deviation \(\sigma\) from each school.

School 1:

one <- scan("school1.dat")
two <- scan("school2.dat")
three <- scan("school3.dat")

mu0 <- 5
s20 <- 4
k0 <- 1
v0 <- 2

#posterior mean for school 1
n <- length(one)
ybar <- mean(one)
s2 <- var(one)

mu_n <- (k0 * mu0 + n * ybar)/(k0 + n)
mu_n
## [1] 9.292308
#95 percent confidence intervals for 
#school 1 using monte carlo procedure
set.seed(8675309)

vn <- v0 + n 
kn <- k0 + n

s2n <- (1/vn) * (v0 * s20 + (n-1) * s2 + ((k0 * n )/kn) * (ybar - mu0)^2 )

s2.postsample1 <- 1/rgamma(10000, vn/2, vn * s2n / 2)
theta1.postsample <- rnorm(10000, mu_n, sqrt(s2.postsample1/(n + k0)))

#95 percent confidence interval for theta
quantile(theta1.postsample, c(.025, .975))
##      2.5%     97.5% 
##  7.777865 10.805247
#95 percent confidence interval for sigma
s.postsample <- sqrt(s2.postsample1)
quantile(s.postsample, c(.025, .975))
##     2.5%    97.5% 
## 3.007165 5.178486

School 2:

mu0 <- 5
s20 <- 4
k0 <- 1
v0 <- 2

#posterior mean for school 1
n <- length(two)
ybar <- mean(two)
s2 <- var(two)

mu_n <- (k0 * mu0 + n * ybar)/(k0 + n)
mu_n
## [1] 6.94875
#95 percent confidence intervals for 
#school 1 using monte carlo procedure
set.seed(8675309)

vn <- v0 + n 
kn <- k0 + n

s2n <- (1/vn) * (v0 * s20 + (n-1) * s2 + ((k0 * n )/kn) * (ybar - mu0)^2 )

s2.postsample2 <- 1/rgamma(10000, vn/2, vn * s2n / 2)
theta2.postsample <- rnorm(10000, mu_n, sqrt(s2.postsample2/(n + k0)))

#95 percent confidence interval for theta
quantile(theta2.postsample, c(.025, .975))
##     2.5%    97.5% 
## 5.158684 8.760460
#95 percent confidence interval for sigma
s.postsample <- sqrt(s2.postsample2)
quantile(s.postsample, c(.025, .975))
##     2.5%    97.5% 
## 3.345170 5.903717

School 3:

mu0 <- 5
s20 <- 4
k0 <- 1
v0 <- 2

#posterior mean for school 1
n <- length(three)
ybar <- mean(three)
s2 <- var(three)

mu_n <- (k0 * mu0 + n * ybar)/(k0 + n)
mu_n
## [1] 7.812381
#95 percent confidence intervals for
#school 1 using monte carlo procedure
set.seed(8675309)

vn <- v0 + n 
kn <- k0 + n

s2n <- (1/vn) * (v0 * s20 + (n-1) * s2 + ((k0 * n )/kn) * (ybar - mu0)^2 )

s2.postsample3 <- 1/rgamma(10000, vn/2, vn * s2n / 2)
theta3.postsample <- rnorm(10000, mu_n, sqrt(s2.postsample3/(n + k0)))

#95 percent confidence interval for theta
quantile(theta3.postsample, c(.025, .975))
##     2.5%    97.5% 
## 6.167908 9.413423
#95 percent confidence interval for sigma
s.postsample <- sqrt(s2.postsample3)
quantile(s.postsample, c(.025, .975))
##     2.5%    97.5% 
## 2.800167 5.132818

b)

Approximate the posterior probability that \(\theta_i < \theta_j < \theta_k\) for all six permutations {i,j,k} of {1,2,3}.

theta1 <- theta1.postsample
theta2 <- theta2.postsample
theta3 <- theta3.postsample

permutation_prob = function(small, med, large){
  sum = 0
  for(i in 1:10000){
    if(med[i] > small[i]){
      if(large[i] > med[i]){
        sum = sum + 1
      }
    }
  }
  sum/10000
}

### P(theta1 < theta2 < theta3) 

permutation_prob(theta1, theta2, theta3)
## [1] 0.0056
# I won't leave a comment labeling the 
# other probabilities since the order of 
# arguments is sufficient

permutation_prob(theta2, theta1, theta3)
## [1] 0.0859
permutation_prob(theta1, theta3, theta2)
## [1] 0.0042
permutation_prob(theta3, theta1, theta2)
## [1] 0.0144
permutation_prob(theta3, theta2, theta1)
## [1] 0.2166
permutation_prob(theta2, theta3, theta1)
## [1] 0.6733

c)

The posterior probability that \(\tilde{Y_i} < \tilde{Y_j} < \tilde{Y_k}\) for the same six permutations as part b).

set.seed(8675309)
sig1 <- sqrt(s2.postsample1)
sig2 <- sqrt(s2.postsample2)
sig3 <- sqrt(s2.postsample3)

pred1 <- rnorm(10000, theta1, sig1)
pred2 <- rnorm(10000, theta2, sig2)
pred3 <- rnorm(10000, theta3, sig3)


permutation_prob(pred1, pred2, pred3)
## [1] 0.1099
permutation_prob(pred2, pred3, pred1)
## [1] 0.2758
permutation_prob(pred1, pred3, pred2)
## [1] 0.1012
permutation_prob(pred3, pred1, pred2)
## [1] 0.1257
permutation_prob(pred3, pred2, pred1)
## [1] 0.2024
permutation_prob(pred2, pred3, pred1)
## [1] 0.2758

d)

The posterior probability that \((\theta_1 > \theta_2) \cap (\theta_1 > \theta_3)\) followed by the posterior probability that \((\tilde{Y_1} > \tilde{Y_2}) \cap (\tilde{Y_1} > \tilde{Y_3})\)

length(theta1[theta1>theta2 & theta1 > theta3])/10000
## [1] 0.8899
length(pred1[pred1 > pred2 & pred1 > pred3])/10000
## [1] 0.4782

5.2

\(P(\theta_A < \theta_B | y_A, y_B)\)

set.seed(8675309)
n <- 16
ya <- 75.2
sa <- 7.3

yb <- 77.5
sb <- 8.1

mu0 <-  75
s20 <- 100
k0 <- c(1,2,4,8,16,32, 64)
v0 <- c(1,2,4,8,16,32, 64)

kn <- k0 + n
vn <- v0 + n

data <- data.frame(k0 = k0, v0 = v0)

#calculate probabilites for each parameter value
probs <- c()
for(i in 1:length(k0)){
  
  #monte carlo sim for A
  muna <- (k0[i] * mu0 + n * ya)/kn[i]
  s2na <- 1/vn[i] * (v0[i] * s20 + (n-1) * sa^2 + (k0[i] * n)/kn[i] * (ya - mu0)^2)
  s.postsample.a <- sqrt(1/rgamma(10000, vn[i]/2, s2na * vn[i]/2))
  theta.postsample.a <- rnorm(10000, muna, s.postsample.a/sqrt(kn[i]))
  
  #monte carlo sim for B
  munb <- (k0[i] * mu0 + n * yb)/kn[i]
  s2nb <- 1/vn[i] * (v0[i] * s20 + (n-1) * sb^2 + (k0[i] * n)/kn[i] * (yb - mu0)^2)
  s.postsample.b <- sqrt(1/rgamma(10000, vn[i]/2, s2nb * vn[i]/2))
  theta.postsample.b <- rnorm(10000, munb, s.postsample.b/sqrt(kn[i]))
  
  #approximate probability theta_A < theta_B
  mean <- mean(theta.postsample.a < theta.postsample.b)
  probs <- c(probs, mean)
}
data$probabilites <- probs

plot(x = v0, y = probs, xlab = "v0 = k0", ylab = "Probabilities", main = "P(theta_A < theta_B| y_A, y_B)", type = "l")

We can conceptualize \(\kappa_0\) and \(\nu_0\) as the prior sample size. So if someone has a large prior sample size, i.e.ย they are very certain that the prior mean is 75 and prior variance is 100, they would in turn be less certain that \(\theta_A < \theta_B|y_A, y_B\). However, if someone has a weak prior assumption, they would be more certain that \(\theta_A < \theta_B|y_A, y_B\).