Bristol Surgey example

 model.1 <- "
model {
  for (i in 1:12) {
    y[i]     ~ dbin(theta, n[i])
    res[i]  <- (y[i] - n[i]*theta)/sqrt(n[i]*theta*(1-theta))
    res2[i] <- res[i]*res[i]
  }
  theta      ~ dunif(0, 1)
  X2.obs    <- sum(res2[])   # sum of squared stand. resids
}"

data.hosp <- list(y=c(41,25,24,23,25,42,24,53,26,25,58,31),
                 n=c(143,187,323,122,164,405,239,482,195,177,581,301))

jags.1 <- jags.model(textConnection(model.1),data=data.hosp, n.adapt=1500)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 12
##    Unobserved stochastic nodes: 1
##    Total graph size: 102
## 
## Initializing model

test.1 <- coda.samples(jags.1, c('theta','X2.obs'), n.adapt=1500, n.iter=10000)
   summary(test.1)

## 
## Iterations = 1501:11500
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##           Mean       SD  Naive SE Time-series SE
## X2.obs 59.0302 2.746982 2.747e-02      3.550e-02
## theta   0.1199 0.005587 5.587e-05      6.905e-05
## 
## 2. Quantiles for each variable:
## 
##           2.5%    25%     50%     75%  97.5%
## X2.obs 56.7583 57.064 58.0272 60.0452 66.455
## theta   0.1093  0.116  0.1199  0.1237  0.131

   plot(test.1)

   autocorr.plot(test.1)

test.1 <- coda.samples(jags.1, c('res'), n.adapt=1500, n.iter=10000)
   out <- as.matrix(test.1)
       boxplot(out)

#order plot
       summary(test.1)

## 
## Iterations = 11501:21500
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##            Mean     SD Naive SE Time-series SE
## res[1]   6.1573 0.3341 0.003341       0.004186
## res[2]   0.5915 0.2503 0.002503       0.003138
## res[3]  -2.5123 0.2618 0.002618       0.003283
## res[4]   2.3445 0.2403 0.002403       0.003012
## res[5]   1.2940 0.2495 0.002495       0.003128
## res[6]  -0.9895 0.3304 0.003304       0.004142
## res[7]  -0.9167 0.2506 0.002506       0.003142
## res[8]  -0.6563 0.3691 0.003691       0.004627
## res[9]   0.5887 0.2553 0.002553       0.003200
## res[10]  0.8852 0.2499 0.002499       0.003132
## res[11] -1.4731 0.3899 0.003899       0.004888
## res[12] -0.8912 0.2841 0.002841       0.003561
## 
## 2. Quantiles for each variable:
## 
##            2.5%     25%     50%     75%    97.5%
## res[1]   5.5079  5.9330  6.1536  6.3764  6.82569
## res[2]   0.1006  0.4242  0.5901  0.7564  1.08812
## res[3]  -3.0290 -2.6867 -2.5126 -2.3391 -1.99609
## res[4]   1.8752  2.1836  2.3426  2.5025  2.82316
## res[5]   0.8054  1.1270  1.2924  1.4583  1.78978
## res[6]  -1.6393 -1.2100 -0.9907 -0.7714 -0.33582
## res[7]  -1.4097 -1.0839 -0.9175 -0.7512 -0.42102
## res[8]  -1.3816 -0.9027 -0.6577 -0.4127  0.07446
## res[9]   0.0880  0.4180  0.5873  0.7569  1.09518
## res[10]  0.3955  0.7181  0.8837  1.0498  1.38121
## res[11] -2.2401 -1.7333 -1.4744 -1.2157 -0.70212
## res[12] -1.4499 -1.0808 -0.8922 -0.7037 -0.32926

       order(summary(test.1)[[2]][,3])

##  [1]  3 11  6  7 12  8  9  2 10  5  4  1

       boxplot(out[,order(summary(test.1)[[2]][,3])])

# Drop first observation if the goal is to estimate the mortality rate

 model.1 <- "
model {
  for (i in 1:11) {
    y[i]     ~ dbin(theta, n[i])
    res[i]  <- (y[i] - n[i]*theta)/sqrt(n[i]*theta*(1-theta))
    res2[i] <- res[i]*res[i]
  }
  theta      ~ dunif(0, 1)
  X2.obs    <- sum(res2[])   # sum of squared stand. resids
}"

data.hosp <- list(y=c(25,24,23,25,42,24,53,26,25,58,31),
                 n=c(187,323,122,164,405,239,482,195,177,581,301))

jags.1 <- jags.model(textConnection(model.1),data=data.hosp, n.adapt=1500)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 11
##    Unobserved stochastic nodes: 1
##    Total graph size: 94
## 
## Initializing model

test.1 <- coda.samples(jags.1, c('theta','X2.obs'), n.adapt=1500, n.iter=10000)
   summary(test.1)

## 
## Iterations = 1501:11500
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##           Mean      SD  Naive SE Time-series SE
## X2.obs 20.5168 1.68894 0.0168894      2.383e-02
## theta   0.1123 0.00556 0.0000556      7.115e-05
## 
## 2. Quantiles for each variable:
## 
##           2.5%     25%     50%    75%   97.5%
## X2.obs 19.3380 19.4577 19.8696 20.899 25.5060
## theta   0.1013  0.1085  0.1122  0.116  0.1232

   plot(test.1)

test.1 <- coda.samples(jags.1, c('res'), n.adapt=1500, n.iter=10000)
   out <- as.matrix(test.1)
       boxplot(out)

#order plot (quantile table, 3rd column)
       boxplot(out[,order(summary(test.1)[[2]][,3])])

Next we fit the hierarchical model

 model.h <- "
model {
  for (i in 1:11) {
    y[i]             ~ dbin(theta[i], n[i])
    logit(theta[i]) <- mu+alpha[i]
          alpha[i]   ~ dnorm(0, tau)
  }
  theta.mean <- exp(mu)/(1+exp(mu))
       mu ~ dnorm(0, 0.001)
      tau ~ dgamma(1,1)
}
"

data.hosp <- list(y=c(25,24,23,25,42,24,53,26,25,58,31),
                 n=c(187,323,122,164,405,239,482,195,177,581,301))

jags.h <- jags.model(textConnection(model.h),data=data.hosp, n.adapt=1500)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 11
##    Unobserved stochastic nodes: 13
##    Total graph size: 63
## 
## Initializing model

test.h <- coda.samples(jags.h, c('mu', 'alpha','theta','theta.mean'), n.adapt=1500, n.iter=10000)
   summary(test.h[,13:24])

## 
## Iterations = 1501:11500
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##               Mean      SD  Naive SE Time-series SE
## theta[1]   0.13114 0.02277 0.0002277      0.0002835
## theta[2]   0.07978 0.01404 0.0001404      0.0001827
## theta[3]   0.17445 0.03196 0.0003196      0.0004101
## theta[4]   0.14659 0.02588 0.0002588      0.0003341
## theta[5]   0.10484 0.01453 0.0001453      0.0001825
## theta[6]   0.10339 0.01832 0.0001832      0.0002321
## theta[7]   0.11035 0.01379 0.0001379      0.0001700
## theta[8]   0.13107 0.02228 0.0002228      0.0002786
## theta[9]   0.13738 0.02421 0.0002421      0.0003155
## theta[10]  0.10080 0.01216 0.0001216      0.0001485
## theta[11]  0.10474 0.01642 0.0001642      0.0002068
## theta.mean 0.11850 0.01885 0.0001885      0.0010597
## 
## 2. Quantiles for each variable:
## 
##               2.5%     25%     50%     75%  97.5%
## theta[1]   0.09012 0.11526 0.13005 0.14551 0.1797
## theta[2]   0.05444 0.07008 0.07896 0.08845 0.1101
## theta[3]   0.11777 0.15186 0.17265 0.19492 0.2428
## theta[4]   0.10113 0.12795 0.14491 0.16338 0.2019
## theta[5]   0.07768 0.09489 0.10430 0.11436 0.1346
## theta[6]   0.07075 0.09021 0.10256 0.11525 0.1417
## theta[7]   0.08506 0.10074 0.10984 0.11944 0.1385
## theta[8]   0.09078 0.11562 0.12983 0.14550 0.1784
## theta[9]   0.09350 0.12047 0.13605 0.15299 0.1882
## theta[10]  0.07786 0.09238 0.10043 0.10869 0.1258
## theta[11]  0.07530 0.09342 0.10381 0.11546 0.1395
## theta.mean 0.08570 0.10544 0.11709 0.12971 0.1599

   plot(test.h)

   autocorr.plot(test.h)

test.h <- coda.samples(jags.h, c('theta'), n.adapt=1500, n.iter=10000)
   out.h <- as.matrix(test.h)

#order plot (hospital specific mortality rate, boxplots show posterior distribution of the thetas)
       boxplot(out.h[,order(summary(test.h)[[2]][,3])],cex.axis=0.7)

#fixed and random effects are generated from the same distribution

Now we fit the independent model

 model.ind <- "
model {
  for (i in 1:11) {
    y[i]             ~ dbin(theta[i], n[i])
    theta[i]         ~ dbeta(1,1)
  }
}
"

data.hosp <- list(y=c(25,24,23,25,42,24,53,26,25,58,31),
                 n=c(187,323,122,164,405,239,482,195,177,581,301))

jags.ind <- jags.model(textConnection(model.ind),data=data.hosp, n.adapt=1500)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 11
##    Unobserved stochastic nodes: 11
##    Total graph size: 34
## 
## Initializing model

test.ind <- coda.samples(jags.ind, c('theta'), n.adapt=1500, n.iter=10000)
   summary(test.ind)

## 
## Iterations = 1501:11500
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##              Mean      SD  Naive SE Time-series SE
## theta[1]  0.13679 0.02488 0.0002488      0.0003192
## theta[2]  0.07699 0.01487 0.0001487      0.0001962
## theta[3]  0.19364 0.03514 0.0003514      0.0004554
## theta[4]  0.15638 0.02842 0.0002842      0.0003713
## theta[5]  0.10582 0.01512 0.0001512      0.0001936
## theta[6]  0.10392 0.01963 0.0001963      0.0002492
## theta[7]  0.11104 0.01418 0.0001418      0.0001766
## theta[8]  0.13756 0.02499 0.0002499      0.0003180
## theta[9]  0.14513 0.02628 0.0002628      0.0003436
## theta[10] 0.10130 0.01254 0.0001254      0.0001590
## theta[11] 0.10544 0.01701 0.0001701      0.0002156
## 
## 2. Quantiles for each variable:
## 
##              2.5%     25%     50%     75%  97.5%
## theta[1]  0.09203 0.11929 0.13554 0.15347 0.1883
## theta[2]  0.04995 0.06642 0.07628 0.08651 0.1083
## theta[3]  0.12939 0.16942 0.19196 0.21623 0.2675
## theta[4]  0.10535 0.13647 0.15464 0.17465 0.2167
## theta[5]  0.07809 0.09527 0.10542 0.11561 0.1369
## theta[6]  0.06816 0.08993 0.10292 0.11673 0.1447
## theta[7]  0.08437 0.10114 0.11061 0.12030 0.1402
## theta[8]  0.09240 0.12004 0.13629 0.15376 0.1901
## theta[9]  0.09718 0.12712 0.14369 0.16183 0.2016
## theta[10] 0.07795 0.09248 0.10086 0.10951 0.1274
## theta[11] 0.07450 0.09344 0.10484 0.11628 0.1412

   plot(test.ind)

   autocorr.plot(test.ind)

   out.i <- as.matrix(test.ind)
       boxplot(out.i)

#order plot
       boxplot(out.i[,order(summary(test.ind)[[2]][,3])],cex.axis=0.5)

# Comparison of hierarchical and Independent Model: Difference between this independent model and hierrarchical model: here the parameters that describe hospital specific mortality rates are independent. In the hierarchical model, they are linked by the variance.

boxplot( cbind(out.i[,order(summary(test.ind)[[2]][,3])],
               out.h[,order(summary(test.h)[[2]][,3])]),cex.lab=1.2, las=2)
 abline(h=0.075)
 abline(h=0.1)
 abline(h=0.2)

Distribution of ranks: using 11 columns and look at rank in each row. Create row table rank with 1000 iterations. Here, we removed MCM formatting by using as.matrix(). We are deriving the posterior distribution of each ranking, by ranking the estimates at each iteration of the Gibbs Sampling and generate the posterior distributions of the ranks.

#hierarchical model
test.h <- coda.samples(jags.h, c('theta'), n.adapt=1500, n.iter=1000)
   out.h <- as.matrix(test.h)
rank(out.h[1,])

##  theta[1]  theta[2]  theta[3]  theta[4]  theta[5]  theta[6]  theta[7] 
##         6         1        11         9         7         3         5 
##  theta[8]  theta[9] theta[10] theta[11] 
##         8        10         4         2

rank.out.h <- c()
  for(i in 1:nrow(out.h)){
    rank.out.h <- rbind(rank.out.h, rank(out.h[i,]))}
#to compute the 95%CI of ranks
t(apply(rank.out.h, 2,quantile, probs=c(0.025, 0.5, 0.975)))

##           2.5% 50% 97.5%
## theta[1]     2   8    11
## theta[2]     1   1     5
## theta[3]     7  11    11
## theta[4]     4   9    11
## theta[5]     1   4     8
## theta[6]     1   4     9
## theta[7]     1   5     9
## theta[8]     2   8    11
## theta[9]     3   9    11
## theta[10]    1   4     8
## theta[11]    1   4     9

  boxplot(rank.out.h,cex.axis=0.75)

    order.column <- order(apply(rank.out.h,2,median))
      boxplot(rank.out.h[,order.column],cex.axis=0.75,las=2)

# independent model
      test.ind <- coda.samples(jags.ind, c('theta'), n.adapt=1500, n.iter=10000)
           out.i <- as.matrix(test.ind)
rank.out.i <- c()
  for(i in 1:nrow(out.i)){
    rank.out.i <- rbind(rank.out.i, rank(out.i[i,]))}
#to compute the 95%CI of ranks
t(apply(rank.out.i, 2,quantile, probs=c(0.025, 0.5, 0.975)))

##           2.5% 50% 97.5%
## theta[1]     3   8    11
## theta[2]     1   1     5
## theta[3]     7  11    11
## theta[4]     4   9    11
## theta[5]     1   4     8
## theta[6]     1   4     9
## theta[7]     2   5     9
## theta[8]     3   8    11
## theta[9]     3   9    11
## theta[10]    1   4     7
## theta[11]    1   4     9

  boxplot(rank.out.i,cex.axis=0.75)

    order.column <- order(apply(rank.out.i,2,median))
      boxplot(rank.out.i[,order.column],cex.axis=0.75,las=2)

Bayesian Statistics: Hierarchical Models In-class R Script Practice

Michelle C. Hsu

February 15, 2018

Bristol Surgey example

Next we fit the hierarchical model

Now we fit the independent model