Markov Chain Monte Carlo simulations from Bayesian inference with rjags

1 Heterogenity of variance in jellyfish dataset
2 Markov Chain Monte Carlo simulations from Bayesian inference.
Reference

library(rjags)
library(tidyverse)
library(rjags)
library(ggmcmc)
library(polspline)
library(propagate)
library(multcomp)
library(DT)

1 Heterogenity of variance in jellyfish dataset

Dataset of JellyFish

mydatajf = readRDS(file = "mydatajf.rds")
mydatajf[, 3] = as.factor(mydatajf[, 3])
head(mydatajf, 30)

##    jellyfishsize jellyfishvolume   location
## 1         122.10              39 Location_6
## 2         100.10              21 Location_6
## 3         100.70              23 Location_6
## 4         102.30              22 Location_6
## 5          94.90              20 Location_6
## 6         116.90              22 Location_6
## 7          94.90              21 Location_6
## 8          91.50              18 Location_6
## 9          94.30              21 Location_6
## 10         85.60              15 Location_6
## 11        113.50              36 Location_6
## 12        111.50              32 Location_6
## 13        106.00              33 Location_6
## 14        115.50              35 Location_6
## 15        101.10              28 Location_6
## 16         97.40              22 Location_6
## 17         89.20              17 Location_6
## 18        121.10              35 Location_6
## 19         95.80              22 Location_6
## 20         94.70              30 Location_6
## 21        108.60              31 Location_6
## 22        100.20              19 Location_6
## 23        107.20              30 Location_6
## 24        112.60              29 Location_6
## 25        103.30              24 Location_6
## 26        118.90              27 Location_3
## 27         61.90               5 Location_3
## 28        124.70              29 Location_3
## 29        132.55              35 Location_3
## 30         91.10              17 Location_3

1.1 boxplot : Location Vs jellyfishsize

p0 <- ggplot(mydatajf,aes(x = location, y = jellyfishsize, fill = location))
p0_box<-p0+ geom_boxplot()
p0_box+ scale_fill_brewer(palette="Dark2") + theme_minimal() + labs(title= "Plot: Location Vs  jellyfishsize")

1.2 boxplot : Location Vs jellyfishvolume

library(ggplot2)

theme_set(theme_bw())
p1 <- ggplot(mydatajf,aes(x = location, y = jellyfishvolume, fill=location))
p1_box<-p1+ geom_boxplot()
p1_box + scale_fill_brewer(palette="RdBu") + theme_minimal()+ labs(title= "Plot of Location Vs  jellyfishvolume")

1.3 Classical One way ANOVA and multiple comparisons:jellyfishsize

model01<-lm(data=mydatajf,jellyfishsize~location)
anova(model01)

## Analysis of Variance Table
## 
## Response: jellyfishsize
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## location    5   5525    1105  6.1732 4.579e-05 ***
## Residuals 107  19153     179                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model01)

## 
## Call:
## lm(formula = jellyfishsize ~ location, data = mydatajf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -44.906  -8.340   1.031   9.231  30.550 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         100.769      2.624  38.405  < 2e-16 ***
## locationLocation_2    8.467      3.748   2.259   0.0259 *  
## locationLocation_3    6.037      5.409   1.116   0.2669    
## locationLocation_4   -3.619      5.409  -0.669   0.5049    
## locationLocation_5   18.697      3.925   4.763 6.02e-06 ***
## locationLocation_6    2.471      3.748   0.659   0.5111    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.38 on 107 degrees of freedom
## Multiple R-squared:  0.2239, Adjusted R-squared:  0.1876 
## F-statistic: 6.173 on 5 and 107 DF,  p-value: 4.579e-05

plot(glht(model01, linfct = mcp(location = "Tukey")))

summary(glht(model01, linfct = mcp(location = "Tukey")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lm(formula = jellyfishsize ~ location, data = mydatajf)
## 
## Linear Hypotheses:
##                              Estimate Std. Error t value Pr(>|t|)    
## Location_2 - Location_1 == 0    8.467      3.748   2.259  0.21205    
## Location_3 - Location_1 == 0    6.037      5.409   1.116  0.86859    
## Location_4 - Location_1 == 0   -3.619      5.409  -0.669  0.98415    
## Location_5 - Location_1 == 0   18.697      3.925   4.763  < 0.001 ***
## Location_6 - Location_1 == 0    2.471      3.748   0.659  0.98517    
## Location_3 - Location_2 == 0   -2.430      5.435  -0.447  0.99756    
## Location_4 - Location_2 == 0  -12.086      5.435  -2.224  0.22704    
## Location_5 - Location_2 == 0   10.231      3.960   2.583  0.10544    
## Location_6 - Location_2 == 0   -5.996      3.784  -1.584  0.60000    
## Location_4 - Location_3 == 0   -9.656      6.690  -1.443  0.69113    
## Location_5 - Location_3 == 0   12.660      5.559   2.278  0.20441    
## Location_6 - Location_3 == 0   -3.566      5.435  -0.656  0.98548    
## Location_5 - Location_4 == 0   22.317      5.559   4.015  0.00138 ** 
## Location_6 - Location_4 == 0    6.090      5.435   1.121  0.86661    
## Location_6 - Location_5 == 0  -16.227      3.960  -4.097  0.00107 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

1.4 Classical One way ANOVA and multiple comparisons:jellyfishvolume

model02<-lm(data=mydatajf,jellyfishvolume~location)
anova(model02)

## Analysis of Variance Table
## 
## Response: jellyfishvolume
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## location    5 3814.7  762.94   10.71 2.314e-08 ***
## Residuals 107 7622.4   71.24                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(model02)

## 
## Call:
## lm(formula = jellyfishvolume ~ location, data = mydatajf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.2500  -4.8000   0.4231   5.5238  20.4231 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          23.577      1.655  14.244  < 2e-16 ***
## locationLocation_2    3.743      2.364   1.583    0.116    
## locationLocation_3    1.673      3.412   0.490    0.625    
## locationLocation_4   -2.202      3.412  -0.645    0.520    
## locationLocation_5   15.899      2.476   6.421 3.82e-09 ***
## locationLocation_6    2.223      2.364   0.940    0.349    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.44 on 107 degrees of freedom
## Multiple R-squared:  0.3335, Adjusted R-squared:  0.3024 
## F-statistic: 10.71 on 5 and 107 DF,  p-value: 2.314e-08

plot(glht(model02, linfct = mcp(location = "Tukey")))

summary(glht(model02, linfct = mcp(location = "Tukey")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lm(formula = jellyfishvolume ~ location, data = mydatajf)
## 
## Linear Hypotheses:
##                              Estimate Std. Error t value Pr(>|t|)    
## Location_2 - Location_1 == 0    3.743      2.364   1.583  0.60080    
## Location_3 - Location_1 == 0    1.673      3.412   0.490  0.99622    
## Location_4 - Location_1 == 0   -2.202      3.412  -0.645  0.98654    
## Location_5 - Location_1 == 0   15.899      2.476   6.421  < 1e-04 ***
## Location_6 - Location_1 == 0    2.223      2.364   0.940  0.93208    
## Location_3 - Location_2 == 0   -2.070      3.428  -0.604  0.99005    
## Location_4 - Location_2 == 0   -5.945      3.428  -1.734  0.50157    
## Location_5 - Location_2 == 0   12.156      2.498   4.866  < 1e-04 ***
## Location_6 - Location_2 == 0   -1.520      2.387  -0.637  0.98733    
## Location_4 - Location_3 == 0   -3.875      4.220  -0.918  0.93831    
## Location_5 - Location_3 == 0   14.226      3.507   4.057  0.00125 ** 
## Location_6 - Location_3 == 0    0.550      3.428   0.160  0.99998    
## Location_5 - Location_4 == 0   18.101      3.507   5.162  < 1e-04 ***
## Location_6 - Location_4 == 0    4.425      3.428   1.291  0.78198    
## Location_6 - Location_5 == 0  -13.676      2.498  -5.474  < 1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

2 Markov Chain Monte Carlo simulations from Bayesian inference.

parameters are highly non-linear
needs full posterior
\[\begin{align} P(\theta|D) = \frac{P(D|\theta)P(\theta)}{P(D)} = \frac{P(D|\theta)P(\theta)}{\int_{\theta}P(D|\theta)P(\theta) d\theta} \end{align}\]
dataset of JellyFish

2.1 Pooled variance model using JAGS : jellyfishsize

data=list(y=mydatajf$jellyfishsize,
          ind=as.numeric(mydatajf$location),
          N=length(mydatajf$jellyfishsize),
          p=length(levels(mydatajf$location)),
          overall_mean=mean(mydatajf$jellyfishsize))

pooled_var="
  model {
      #######  Likelihood
      for (i in 1:N) {                    # Loop through observations
        mu[i]<-Beta[ind[i]]               # The expected values are just the group means
        y[i] ~ dnorm(mu[i],tau)           # Values treated as from a single normal
       
      }
     ############## Uninformative priors
    for (j in 1:p) {
     Beta[j]~dnorm(0,0.0001)
   
     Effect[j]<-Beta[j]-overall_mean  ### Calculate difference from overall mean
     ################### Calculate pair wise differences 
      for (n in 1:(j-1)){
        Difbeta[n,j]<-Beta[n]-Beta[j]
      }
    }
    
    tau ~ dgamma(scale, rate) ## Prior for normal distribution precision.
    scale ~ dunif(0, 1)       ### Hyper parameters for tau.
    rate ~ dunif(0, 1)
    
  }
"

model=jags.model(textConnection(pooled_var),data=data)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 113
##    Unobserved stochastic nodes: 9
##    Total graph size: 262
## 
## Initializing model

update(model,n.iter=1000)
output=coda.samples(model=model,variable.names=c("Difbeta","Effect"),n.iter=100000,thin=10)

ggsout <- ggs(output) 
mtout <- filter(ggsout,grepl("Difbeta",Parameter))
ggs_caterpillar(mtout) + geom_vline(xintercept = 0,col="red")

mtout<-filter(ggsout,grepl("Effect",Parameter))
ggs_caterpillar(mtout) +geom_vline(xintercept = 0,col="red")

2.2 Independent variances model: jellyfishsize

data=list(y=mydatajf$jellyfishsize,
          ind=as.numeric(mydatajf$location),
          N=length(mydatajf$jellyfishsize),
          p=length(levels(mydatajf$location)),
          overall_mean=mean(mydatajf$jellyfishsize))


ind_var="
  model {
      ### Likeihood
      for (i in 1:N) {                   ## Loop through observations
        mu[i]<-Beta[ind[i]]              ## Now set an independent tau for each group
        y[i] ~ dnorm(mu[i],tau[ind[i]])
       
      }
    
    for (j in 1:p) {
     Beta[j]~dnorm(0,0.0001)            ## Set up a prior for each grous
     tau[j] ~ dgamma(scale, rate)
      Effect[j]<-Beta[j]-overall_mean 
      for (n in 1:(j-1)){
        Difbeta[n,j]<-Beta[n]-Beta[j]
      }
    }
   
    scale ~ dunif(0, 1)
    rate ~ dunif(0, 1)
    
  }
"

model_ind =jags.model(textConnection(ind_var),data=data)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 113
##    Unobserved stochastic nodes: 14
##    Total graph size: 267
## 
## Initializing model

update(model_ind,n.iter=1000)
output_ind=coda.samples(model=model_ind,variable.names=c("Beta"),n.iter=100000,thin=10)
ggs_ind <-ggs(output_ind) 
mtout_ind<-filter(ggs_ind,grepl("Beta",Parameter))
ggs_caterpillar(mtout_ind) +geom_vline(xintercept = 0,col="red")

2.3 Random effects model: jellyfishsize

Edwards (2020)
Ellison (2004)

data=list(y=mydatajf$jellyfishsize,
          ind=as.numeric(mydatajf$location),
          N=length(mydatajf$jellyfishsize),
          p=length(levels(mydatajf$location)),
          overall_mean=mean(mydatajf$jellyfishsize))

rand_mod="
  model {
      ### Likeihood
      for (i in 1:N) {                  ## Loop through observations
        mu[i]<-mu_r+Beta[ind[i]]        ## This time Beta is added to an overall mean
        y[i] ~ dnorm(mu[i],tau[ind[i]])  ## Set an independent tau for each group agan. A                                              pooled variance model would also work  here
       
      }
    
    for (j in 1:p) {
     Beta[j]~dnorm(0,tau_r)            ## A single tau represents the variance between group                                        #   means
     tau[j] ~ dgamma(scale, rate)
      for (n in 1:(j-1)){
        Difbeta[n,j]<-Beta[n]-Beta[j]
      }
    }
   
    scale ~ dunif(0, 1)
    rate ~ dunif(0, 1)
    tau_r ~ dgamma(scale,rate)
    sigma_r <- 1/sqrt(tau_r)
    mu_r ~ dnorm(0,0.00001)     ## Prior for the overall mean
   
  }"

model_random =jags.model(textConnection(rand_mod),data=data)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 113
##    Unobserved stochastic nodes: 16
##    Total graph size: 270
## 
## Initializing model

update(model_random,n.iter=1000)
output_random =coda.samples(model=model_random,variable.names=c("sigma_r","mu_r","Difbeta","Beta"),n.iter=100000,thin=10)

ggs_random <-ggs(output_random ) 
mtout_random <-filter(ggs_random,grepl("Beta",Parameter))
ggs_caterpillar(mtout_random ) +geom_vline(xintercept = 0,col="red")

mtout_random <-filter(ggs_random,grepl("Beta",Parameter))
ggplot(data=mtout_random,aes(x=mtout_random$value,col=Parameter,fill=Parameter)) + geom_density(alpha=0.2)

ggs_random <-ggs(output_random ) 
mtout_random <-filter(ggs_random,grepl("Difbeta",Parameter))
ggs_caterpillar(mtout_random ) +geom_vline(xintercept = 0,col="red")

Reference

Edwards, Don. 2020. “Comment: The First Data Analysis Should Be Journalistic.” Ecological Applications 6 (4): 1090–4. www.jstor.org/stable/2269593.

Ellison, Aaron M. 2004. “Bayesian Inference in Ecology.” Ecology Letters 7 (6): 509–20.

DK WC

2020-01-28