In this case, there are 431 nonpitchers.
raw_data <- read.table("data/Bat.dat")
data <- subset(raw_data[raw_data$AB.1>10 & raw_data$AB.2>10,],Pitcher==0)
# data information
data_hitting_ability.1 <- matrix(0,dim(data)[1],1)
colnames(data_hitting_ability.1) <- c("Batting Average")
for (i in 1:dim(data)[1]) {
data_hitting_ability.1[i,1] <- data$H.1[i]/data$AB.1[i]
}
summary(data_hitting_ability.1)
## Batting Average
## Min. :0.0000
## 1st Qu.:0.1480
## Median :0.1744
## Mean :0.1737
## 3rd Qu.:0.1996
## Max. :0.3846
# monte carlo
posterior_hitting_ability_p2 <- matrix(0,dim(data)[1],1)
colnames(posterior_hitting_ability_p2) <- c("Batting Average")
set.seed(1)
for (i in 1:dim(data)[1]) {
a <- 1
b <- 4
p.mc5000 <- rbeta(5000,data$H.1[i]+a,data$AB.1[i]-data$H.1[i]+b)
posterior_hitting_ability_p2[i,1] <- mean(p.mc5000)
}
data$X <- 0
data$var <- 0
for (i in 1:dim(data)[1]) {
value <- (data$H.1[i]+0.25)/(data$AB.1[i]+0.5)
data$X[i] <- asin(sqrt(value))
data$var[i] <- 1/(4*data$AB.1[i])
}
# weakly informative priors
eta0 <- 1 ; t20 <- 1
mu0 <- 0.4 ; g20 <- 0.01
# starting values
m <- dim(data)[1]
n <- 1
theta <- ybar <- data$X
sigma2 <- data$var
mu <- mean(theta)
tau2 <- var(theta)
# setup MCMC
set.seed(1)
S <- 5000
THETA <- matrix(nrow=S,ncol=m)
MST <- matrix(nrow=S,ncol=2)
# MCMC algorithm
for(s in 1:S)
{
# sample new values of the thetas
for(j in 1:m)
{
vtheta <- 1/(n/sigma2[j]+1/tau2)
etheta <- vtheta*(ybar[j]*n/sigma2[j]+mu/tau2)
theta[j] <- rnorm(1,etheta,sqrt(vtheta))
}
# sample a new value of mu
vmu <- 1/(m/tau2+1/g20)
emu <- vmu*(m*mean(theta)/tau2+mu0/g20)
mu <- rnorm(1,emu,sqrt(vmu))
# sample a new value of tau2
etam <- eta0+m
ss <- eta0*t20+sum((theta-mu)^2)
tau2 <- 1/rgamma(1,etam/2,ss/2)
# store results
THETA[s,] <- theta
MST[s,] <- c(mu,tau2)
}
mcmc <- list(THETA=THETA,MST=MST)
theta.mc5000 <- apply(THETA, 2, mean)
posterior_hitting_ability_p3 <- (sin(theta.mc5000))^2
The information and histograms of our estimates \(\frac{H_{i2}}{N_{i2}}\) from Problem 6.2 (nonpitchers) and Problem 6.3 (nonpitchers) are presented below.
# estimates (Problem 2)
summary(posterior_hitting_ability_p2)
## Batting Average
## Min. :0.03119
## 1st Qu.:0.15042
## Median :0.17545
## Mean :0.17506
## 3rd Qu.:0.19925
## Max. :0.35549
# histogram (Problem 2)
hist(posterior_hitting_ability_p2,main=expression("Posterior "~p[i]),
xlab="Batting Average",breaks=25)
# estimates (Problem 3)
posterior_hitting_ability_p3 <- matrix(posterior_hitting_ability_p3,dim(data)[1],1)
colnames(posterior_hitting_ability_p3) <- c("Batting Average")
summary(posterior_hitting_ability_p3)
## Batting Average
## Min. :0.07462
## 1st Qu.:0.15484
## Median :0.17487
## Mean :0.17422
## 3rd Qu.:0.19386
## Max. :0.28815
# histogram (Problem 3)
hist(posterior_hitting_ability_p3,main=expression("Posterior "~p[i]),
xlab="Batting Average",breaks=25)
The MSE of the estimates from Problem 6.2 (nonpitchers) is slightly larger than the MSE of the estimates from Problem 6.3 (nonpitchers). We believe that the reason for this is because the variance of the estimates from Problem 6.3 (nonpitchers) is smaller than the variance of the estimates from Problem 6.2 (nonpitchers) and the biases of the estimates from Problem 6.2 (nonpitchers) and 6.3 (nonpitchers) are similar based on the information and histograms in Problem 6.4 (nonpitchers).
data_hitting_ability.2 <- matrix(0,dim(data)[1],1)
colnames(data_hitting_ability.2) <- c("Batting Average")
for (i in 1:dim(data)[1]) {
data_hitting_ability.2[i,1] <- data$H.2[i]/data$AB.2[i]
}
#MSE
mse2 <- sum((posterior_hitting_ability_p2-data_hitting_ability.2)^2)/nrow(data)
mse3 <- sum((posterior_hitting_ability_p3-data_hitting_ability.2)^2)/nrow(data)
MSE <- matrix(c(mse2,mse3),1,2)
rownames(MSE) <- c("MSE")
colnames(MSE) <- c("Problem 6.2","Problem 6.3")
MSE
## Problem 6.2 Problem 6.3
## MSE 0.01169603 0.01074855