Analyze the data from each of the three schools seperately using the normal model with a conjugate prior distribution. where: \(\mu_0 = 5, \sigma^2_0 = 4, \kappa_0 = 1, \nu_0 = 2\)
Compute or approximate the posterior means and 95 percent confidence intervals for mean \(\theta\) and standard deviation \(\sigma\) from each school.
School 1:
one <- scan("school1.dat")
two <- scan("school2.dat")
three <- scan("school3.dat")
mu0 <- 5
s20 <- 4
k0 <- 1
v0 <- 2
#posterior mean for school 1
n <- length(one)
ybar <- mean(one)
s2 <- var(one)
mu_n <- (k0 * mu0 + n * ybar)/(k0 + n)
mu_n
## [1] 9.292308
#95 percent confidence intervals for
#school 1 using monte carlo procedure
set.seed(8675309)
vn <- v0 + n
kn <- k0 + n
s2n <- (1/vn) * (v0 * s20 + (n-1) * s2 + ((k0 * n )/kn) * (ybar - mu0)^2 )
s2.postsample1 <- 1/rgamma(10000, vn/2, vn * s2n / 2)
theta1.postsample <- rnorm(10000, mu_n, sqrt(s2.postsample1/(n + k0)))
#95 percent confidence interval for theta
quantile(theta1.postsample, c(.025, .975))
## 2.5% 97.5%
## 7.777865 10.805247
#95 percent confidence interval for sigma
s.postsample <- sqrt(s2.postsample1)
quantile(s.postsample, c(.025, .975))
## 2.5% 97.5%
## 3.007165 5.178486
School 2:
mu0 <- 5
s20 <- 4
k0 <- 1
v0 <- 2
#posterior mean for school 1
n <- length(two)
ybar <- mean(two)
s2 <- var(two)
mu_n <- (k0 * mu0 + n * ybar)/(k0 + n)
mu_n
## [1] 6.94875
#95 percent confidence intervals for
#school 1 using monte carlo procedure
set.seed(8675309)
vn <- v0 + n
kn <- k0 + n
s2n <- (1/vn) * (v0 * s20 + (n-1) * s2 + ((k0 * n )/kn) * (ybar - mu0)^2 )
s2.postsample2 <- 1/rgamma(10000, vn/2, vn * s2n / 2)
theta2.postsample <- rnorm(10000, mu_n, sqrt(s2.postsample2/(n + k0)))
#95 percent confidence interval for theta
quantile(theta2.postsample, c(.025, .975))
## 2.5% 97.5%
## 5.158684 8.760460
#95 percent confidence interval for sigma
s.postsample <- sqrt(s2.postsample2)
quantile(s.postsample, c(.025, .975))
## 2.5% 97.5%
## 3.345170 5.903717
School 3:
mu0 <- 5
s20 <- 4
k0 <- 1
v0 <- 2
#posterior mean for school 1
n <- length(three)
ybar <- mean(three)
s2 <- var(three)
mu_n <- (k0 * mu0 + n * ybar)/(k0 + n)
mu_n
## [1] 7.812381
#95 percent confidence intervals for
#school 1 using monte carlo procedure
set.seed(8675309)
vn <- v0 + n
kn <- k0 + n
s2n <- (1/vn) * (v0 * s20 + (n-1) * s2 + ((k0 * n )/kn) * (ybar - mu0)^2 )
s2.postsample3 <- 1/rgamma(10000, vn/2, vn * s2n / 2)
theta3.postsample <- rnorm(10000, mu_n, sqrt(s2.postsample3/(n + k0)))
#95 percent confidence interval for theta
quantile(theta3.postsample, c(.025, .975))
## 2.5% 97.5%
## 6.167908 9.413423
#95 percent confidence interval for sigma
s.postsample <- sqrt(s2.postsample3)
quantile(s.postsample, c(.025, .975))
## 2.5% 97.5%
## 2.800167 5.132818
Approximate the posterior probability that \(\theta_i < \theta_j < \theta_k\) for all six permutations {i,j,k} of {1,2,3}.
theta1 <- theta1.postsample
theta2 <- theta2.postsample
theta3 <- theta3.postsample
permutation_prob = function(small, med, large){
sum = 0
for(i in 1:10000){
if(med[i] > small[i]){
if(large[i] > med[i]){
sum = sum + 1
}
}
}
sum/10000
}
### P(theta1 < theta2 < theta3)
permutation_prob(theta1, theta2, theta3)
## [1] 0.0056
# I won't leave a comment labeling the
# other probabilities since the order of
# arguments is sufficient
permutation_prob(theta2, theta1, theta3)
## [1] 0.0859
permutation_prob(theta1, theta3, theta2)
## [1] 0.0042
permutation_prob(theta3, theta1, theta2)
## [1] 0.0144
permutation_prob(theta3, theta2, theta1)
## [1] 0.2166
permutation_prob(theta2, theta3, theta1)
## [1] 0.6733
The posterior probability that \(\tilde{Y_i} < \tilde{Y_j} < \tilde{Y_k}\) for the same six permutations as part b).
set.seed(8675309)
sig1 <- sqrt(s2.postsample1)
sig2 <- sqrt(s2.postsample2)
sig3 <- sqrt(s2.postsample3)
pred1 <- rnorm(10000, theta1, sig1)
pred2 <- rnorm(10000, theta2, sig2)
pred3 <- rnorm(10000, theta3, sig3)
permutation_prob(pred1, pred2, pred3)
## [1] 0.1099
permutation_prob(pred2, pred3, pred1)
## [1] 0.2758
permutation_prob(pred1, pred3, pred2)
## [1] 0.1012
permutation_prob(pred3, pred1, pred2)
## [1] 0.1257
permutation_prob(pred3, pred2, pred1)
## [1] 0.2024
permutation_prob(pred2, pred3, pred1)
## [1] 0.2758
The posterior probability that \((\theta_1 > \theta_2) \cap (\theta_1 > \theta_3)\) followed by the posterior probability that \((\tilde{Y_1} > \tilde{Y_2}) \cap (\tilde{Y_1} > \tilde{Y_3})\)
length(theta1[theta1>theta2 & theta1 > theta3])/10000
## [1] 0.8899
length(pred1[pred1 > pred2 & pred1 > pred3])/10000
## [1] 0.4782
\(P(\theta_A < \theta_B | y_A, y_B)\)
set.seed(8675309)
n <- 16
ya <- 75.2
sa <- 7.3
yb <- 77.5
sb <- 8.1
mu0 <- 75
s20 <- 100
k0 <- c(1,2,4,8,16,32, 64)
v0 <- c(1,2,4,8,16,32, 64)
kn <- k0 + n
vn <- v0 + n
data <- data.frame(k0 = k0, v0 = v0)
#calculate probabilites for each parameter value
probs <- c()
for(i in 1:length(k0)){
#monte carlo sim for A
muna <- (k0[i] * mu0 + n * ya)/kn[i]
s2na <- 1/vn[i] * (v0[i] * s20 + (n-1) * sa^2 + (k0[i] * n)/kn[i] * (ya - mu0)^2)
s.postsample.a <- sqrt(1/rgamma(10000, vn[i]/2, s2na * vn[i]/2))
theta.postsample.a <- rnorm(10000, muna, s.postsample.a/sqrt(kn[i]))
#monte carlo sim for B
munb <- (k0[i] * mu0 + n * yb)/kn[i]
s2nb <- 1/vn[i] * (v0[i] * s20 + (n-1) * sb^2 + (k0[i] * n)/kn[i] * (yb - mu0)^2)
s.postsample.b <- sqrt(1/rgamma(10000, vn[i]/2, s2nb * vn[i]/2))
theta.postsample.b <- rnorm(10000, munb, s.postsample.b/sqrt(kn[i]))
#approximate probability theta_A < theta_B
mean <- mean(theta.postsample.a < theta.postsample.b)
probs <- c(probs, mean)
}
data$probabilites <- probs
plot(x = v0, y = probs, xlab = "v0 = k0", ylab = "Probabilities", main = "P(theta_A < theta_B| y_A, y_B)", type = "l")
We can conceptualize \(\kappa_0\) and \(\nu_0\) as the prior sample size. So if someone has a large prior sample size, i.e.ย they are very certain that the prior mean is 75 and prior variance is 100, they would in turn be less certain that \(\theta_A < \theta_B|y_A, y_B\). However, if someone has a weak prior assumption, they would be more certain that \(\theta_A < \theta_B|y_A, y_B\).