Problem 11.2

A flat prior is used with the proposal density be multivariate normal.

likelihood <- function(a, b, data = bioassay) {
  # define the inverse of logit()
  logistic <- function(x) 1 / (1 + exp(-x))
  p <- logistic(a + b * data$x)
  prod(p^data$y * (1-p)^(data$n-data$y))
}

post <- function(a, b) likelihood(a, b) * 1

# proposal is bivariate Normal
a <- 1
s <- a * diag(2)

dprop <- function(theta, m, sigma = s) dmvnorm(theta, mean = t(m), sigma = s)
rprop <- function(m, sigma = s) rmvnorm(1, mean = t(m), sigma = s)


acpt <- 0

theta <- data.frame(a = NA, b = NA)
fit <- glm(cbind(y, n - y) ~ x, family = binomial, data = bioassay)
theta[1, ] <- fit$coefficients


for (i in 2:2500) {
  theta.prop <- rprop(theta[i-1, ])
  
  log.r <- log(post(theta.prop[1], theta.prop[2])) - 
    log(post(theta[i-1, 1], theta[i-1, 2])) + 
    log(dprop(theta[i-1, ], theta.prop)) - 
    log(dprop(theta.prop, theta[i-1, ]))
  
  if (runif(1) < exp(log.r)) {
    theta[i, ] <- theta.prop
    if (i > 500) acpt <- acpt + 1
  } else {
    theta[i, ] <- theta[i-1, ]
  }
}

The above algorithm have acceptance rate 0.6355.

plot.ts(theta[-(1:500), ])

Problem 14.1

a) Frequentist results:

fit <- lm(log(radon) ~ . - 1, data = radon)
summary(fit)
## 
## Call:
## lm(formula = log(radon) ~ . - 1, data = radon)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.65335 -0.55722 -0.01608  0.59331  1.85457 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## countyBlueEarth   1.9561     0.2158   9.066 6.19e-11 ***
## countyClay        1.8763     0.2294   8.178 8.14e-10 ***
## countyGoodhue     1.9182     0.2243   8.553 2.71e-10 ***
## ffFirstFloor     -0.3283     0.3155  -1.041    0.305    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7895 on 37 degrees of freedom
## Multiple R-squared:  0.8599, Adjusted R-squared:  0.8447 
## F-statistic: 56.77 on 4 and 37 DF,  p-value: 2.74e-15

Bayesian results are obtained by direct simulation. Get the variance estimate first by sample from gamma distribution:

n <- nrow(radon)
tao <- rgamma(1000, (n-4)/2, sum(fit$residuals^2)/2)
s2 <- 1/tao
summary(sqrt(s2))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.5779  0.7374  0.7984  0.8055  0.8631  1.1600
summary(exp(sqrt(s2)))  # original scale
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.782   2.091   2.222   2.248   2.371   3.189

Sample \(\beta\)’s from multivariate normal distribution:

x <- model.matrix(fit)
vb <- solve(t(x) %*% x)
beta.hat <- vb %*% t(x) %*% log(radon$radon)
sigma <- lapply(s2, function(s2) s2 * vb)
beta <- t(sapply(sigma, rmvnorm, n = 1, mean = beta.hat))
colnames(beta) <- colnames(x)
xtable(summary(beta))
% latex table generated in R 3.1.3 by xtable 1.7-4 package % Sun Mar 15 00:06:43 2015

These are the effects of county and whether basement or first floor. For example, if you want to estimate the mean of radon measurement for county Clay and on the first floor, the bayesian estimate is \(exp(1.8784-0.3313) = 4.697827\).

b) Predict radon measurements from Blue Earth (basement):

new.x1 <- c(1, 0, 0, 0)
pred1 <- beta %*% new.x1  # 1000 point estimates
log.y1 <- rnorm(1000, mean = pred1, sd = sqrt(s2))
quantile(exp(log.y1), c(.025, .975))
##      2.5%     97.5% 
##  1.438168 38.938467

Predict radon measurements from Blue Earth (first floor):

new.x2 <- c(1, 0, 0, 1)
pred2 <- beta %*% new.x2  # 1000 point estimates
log.y2 <- rnorm(1000, mean = pred2, sd = sqrt(s2))
quantile(exp(log.y2), c(.025, .975))
##       2.5%      97.5% 
##  0.9018839 26.6643201