Markov Chain Monte Carlo simulations from Bayesian inference with rjags
Markov Chain Monte Carlo simulations from Bayesian inference with rjags
library(rjags)
library(tidyverse)
library(rjags)
library(ggmcmc)
library(polspline)
library(propagate)
library(multcomp)
library(DT)1 Heterogenity of variance in jellyfish dataset
- Dataset of JellyFish
mydatajf = readRDS(file = "mydatajf.rds")
mydatajf[, 3] = as.factor(mydatajf[, 3])
head(mydatajf, 30)## jellyfishsize jellyfishvolume location
## 1 122.10 39 Location_6
## 2 100.10 21 Location_6
## 3 100.70 23 Location_6
## 4 102.30 22 Location_6
## 5 94.90 20 Location_6
## 6 116.90 22 Location_6
## 7 94.90 21 Location_6
## 8 91.50 18 Location_6
## 9 94.30 21 Location_6
## 10 85.60 15 Location_6
## 11 113.50 36 Location_6
## 12 111.50 32 Location_6
## 13 106.00 33 Location_6
## 14 115.50 35 Location_6
## 15 101.10 28 Location_6
## 16 97.40 22 Location_6
## 17 89.20 17 Location_6
## 18 121.10 35 Location_6
## 19 95.80 22 Location_6
## 20 94.70 30 Location_6
## 21 108.60 31 Location_6
## 22 100.20 19 Location_6
## 23 107.20 30 Location_6
## 24 112.60 29 Location_6
## 25 103.30 24 Location_6
## 26 118.90 27 Location_3
## 27 61.90 5 Location_3
## 28 124.70 29 Location_3
## 29 132.55 35 Location_3
## 30 91.10 17 Location_3
1.1 boxplot : Location Vs jellyfishsize
p0 <- ggplot(mydatajf,aes(x = location, y = jellyfishsize, fill = location))
p0_box<-p0+ geom_boxplot()
p0_box+ scale_fill_brewer(palette="Dark2") + theme_minimal() + labs(title= "Plot: Location Vs jellyfishsize")1.2 boxplot : Location Vs jellyfishvolume
library(ggplot2)
theme_set(theme_bw())
p1 <- ggplot(mydatajf,aes(x = location, y = jellyfishvolume, fill=location))
p1_box<-p1+ geom_boxplot()
p1_box + scale_fill_brewer(palette="RdBu") + theme_minimal()+ labs(title= "Plot of Location Vs jellyfishvolume") 1.3 Classical One way ANOVA and multiple comparisons:jellyfishsize
## Analysis of Variance Table
##
## Response: jellyfishsize
## Df Sum Sq Mean Sq F value Pr(>F)
## location 5 5525 1105 6.1732 4.579e-05 ***
## Residuals 107 19153 179
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## lm(formula = jellyfishsize ~ location, data = mydatajf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.906 -8.340 1.031 9.231 30.550
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 100.769 2.624 38.405 < 2e-16 ***
## locationLocation_2 8.467 3.748 2.259 0.0259 *
## locationLocation_3 6.037 5.409 1.116 0.2669
## locationLocation_4 -3.619 5.409 -0.669 0.5049
## locationLocation_5 18.697 3.925 4.763 6.02e-06 ***
## locationLocation_6 2.471 3.748 0.659 0.5111
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.38 on 107 degrees of freedom
## Multiple R-squared: 0.2239, Adjusted R-squared: 0.1876
## F-statistic: 6.173 on 5 and 107 DF, p-value: 4.579e-05
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lm(formula = jellyfishsize ~ location, data = mydatajf)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## Location_2 - Location_1 == 0 8.467 3.748 2.259 0.21205
## Location_3 - Location_1 == 0 6.037 5.409 1.116 0.86859
## Location_4 - Location_1 == 0 -3.619 5.409 -0.669 0.98415
## Location_5 - Location_1 == 0 18.697 3.925 4.763 < 0.001 ***
## Location_6 - Location_1 == 0 2.471 3.748 0.659 0.98517
## Location_3 - Location_2 == 0 -2.430 5.435 -0.447 0.99756
## Location_4 - Location_2 == 0 -12.086 5.435 -2.224 0.22704
## Location_5 - Location_2 == 0 10.231 3.960 2.583 0.10544
## Location_6 - Location_2 == 0 -5.996 3.784 -1.584 0.60000
## Location_4 - Location_3 == 0 -9.656 6.690 -1.443 0.69113
## Location_5 - Location_3 == 0 12.660 5.559 2.278 0.20441
## Location_6 - Location_3 == 0 -3.566 5.435 -0.656 0.98548
## Location_5 - Location_4 == 0 22.317 5.559 4.015 0.00138 **
## Location_6 - Location_4 == 0 6.090 5.435 1.121 0.86661
## Location_6 - Location_5 == 0 -16.227 3.960 -4.097 0.00107 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
1.4 Classical One way ANOVA and multiple comparisons:jellyfishvolume
## Analysis of Variance Table
##
## Response: jellyfishvolume
## Df Sum Sq Mean Sq F value Pr(>F)
## location 5 3814.7 762.94 10.71 2.314e-08 ***
## Residuals 107 7622.4 71.24
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## lm(formula = jellyfishvolume ~ location, data = mydatajf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.2500 -4.8000 0.4231 5.5238 20.4231
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.577 1.655 14.244 < 2e-16 ***
## locationLocation_2 3.743 2.364 1.583 0.116
## locationLocation_3 1.673 3.412 0.490 0.625
## locationLocation_4 -2.202 3.412 -0.645 0.520
## locationLocation_5 15.899 2.476 6.421 3.82e-09 ***
## locationLocation_6 2.223 2.364 0.940 0.349
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.44 on 107 degrees of freedom
## Multiple R-squared: 0.3335, Adjusted R-squared: 0.3024
## F-statistic: 10.71 on 5 and 107 DF, p-value: 2.314e-08
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lm(formula = jellyfishvolume ~ location, data = mydatajf)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## Location_2 - Location_1 == 0 3.743 2.364 1.583 0.60080
## Location_3 - Location_1 == 0 1.673 3.412 0.490 0.99622
## Location_4 - Location_1 == 0 -2.202 3.412 -0.645 0.98654
## Location_5 - Location_1 == 0 15.899 2.476 6.421 < 1e-04 ***
## Location_6 - Location_1 == 0 2.223 2.364 0.940 0.93208
## Location_3 - Location_2 == 0 -2.070 3.428 -0.604 0.99005
## Location_4 - Location_2 == 0 -5.945 3.428 -1.734 0.50157
## Location_5 - Location_2 == 0 12.156 2.498 4.866 < 1e-04 ***
## Location_6 - Location_2 == 0 -1.520 2.387 -0.637 0.98733
## Location_4 - Location_3 == 0 -3.875 4.220 -0.918 0.93831
## Location_5 - Location_3 == 0 14.226 3.507 4.057 0.00125 **
## Location_6 - Location_3 == 0 0.550 3.428 0.160 0.99998
## Location_5 - Location_4 == 0 18.101 3.507 5.162 < 1e-04 ***
## Location_6 - Location_4 == 0 4.425 3.428 1.291 0.78198
## Location_6 - Location_5 == 0 -13.676 2.498 -5.474 < 1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
2 Markov Chain Monte Carlo simulations from Bayesian inference.
- parameters are highly non-linear
- needs full posterior
- \[\begin{align} P(\theta|D) = \frac{P(D|\theta)P(\theta)}{P(D)} = \frac{P(D|\theta)P(\theta)}{\int_{\theta}P(D|\theta)P(\theta) d\theta} \end{align}\]
- dataset of JellyFish
2.1 Pooled variance model using JAGS : jellyfishsize
data=list(y=mydatajf$jellyfishsize,
ind=as.numeric(mydatajf$location),
N=length(mydatajf$jellyfishsize),
p=length(levels(mydatajf$location)),
overall_mean=mean(mydatajf$jellyfishsize))
pooled_var="
model {
####### Likelihood
for (i in 1:N) { # Loop through observations
mu[i]<-Beta[ind[i]] # The expected values are just the group means
y[i] ~ dnorm(mu[i],tau) # Values treated as from a single normal
}
############## Uninformative priors
for (j in 1:p) {
Beta[j]~dnorm(0,0.0001)
Effect[j]<-Beta[j]-overall_mean ### Calculate difference from overall mean
################### Calculate pair wise differences
for (n in 1:(j-1)){
Difbeta[n,j]<-Beta[n]-Beta[j]
}
}
tau ~ dgamma(scale, rate) ## Prior for normal distribution precision.
scale ~ dunif(0, 1) ### Hyper parameters for tau.
rate ~ dunif(0, 1)
}
"## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 113
## Unobserved stochastic nodes: 9
## Total graph size: 262
##
## Initializing model
update(model,n.iter=1000)
output=coda.samples(model=model,variable.names=c("Difbeta","Effect"),n.iter=100000,thin=10)
ggsout <- ggs(output)
mtout <- filter(ggsout,grepl("Difbeta",Parameter))
ggs_caterpillar(mtout) + geom_vline(xintercept = 0,col="red")mtout<-filter(ggsout,grepl("Effect",Parameter))
ggs_caterpillar(mtout) +geom_vline(xintercept = 0,col="red")2.2 Independent variances model: jellyfishsize
data=list(y=mydatajf$jellyfishsize,
ind=as.numeric(mydatajf$location),
N=length(mydatajf$jellyfishsize),
p=length(levels(mydatajf$location)),
overall_mean=mean(mydatajf$jellyfishsize))
ind_var="
model {
### Likeihood
for (i in 1:N) { ## Loop through observations
mu[i]<-Beta[ind[i]] ## Now set an independent tau for each group
y[i] ~ dnorm(mu[i],tau[ind[i]])
}
for (j in 1:p) {
Beta[j]~dnorm(0,0.0001) ## Set up a prior for each grous
tau[j] ~ dgamma(scale, rate)
Effect[j]<-Beta[j]-overall_mean
for (n in 1:(j-1)){
Difbeta[n,j]<-Beta[n]-Beta[j]
}
}
scale ~ dunif(0, 1)
rate ~ dunif(0, 1)
}
"## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 113
## Unobserved stochastic nodes: 14
## Total graph size: 267
##
## Initializing model
update(model_ind,n.iter=1000)
output_ind=coda.samples(model=model_ind,variable.names=c("Beta"),n.iter=100000,thin=10)
ggs_ind <-ggs(output_ind)
mtout_ind<-filter(ggs_ind,grepl("Beta",Parameter))
ggs_caterpillar(mtout_ind) +geom_vline(xintercept = 0,col="red")2.3 Random effects model: jellyfishsize
- Edwards (2020)
- Ellison (2004)
data=list(y=mydatajf$jellyfishsize,
ind=as.numeric(mydatajf$location),
N=length(mydatajf$jellyfishsize),
p=length(levels(mydatajf$location)),
overall_mean=mean(mydatajf$jellyfishsize))
rand_mod="
model {
### Likeihood
for (i in 1:N) { ## Loop through observations
mu[i]<-mu_r+Beta[ind[i]] ## This time Beta is added to an overall mean
y[i] ~ dnorm(mu[i],tau[ind[i]]) ## Set an independent tau for each group agan. A pooled variance model would also work here
}
for (j in 1:p) {
Beta[j]~dnorm(0,tau_r) ## A single tau represents the variance between group # means
tau[j] ~ dgamma(scale, rate)
for (n in 1:(j-1)){
Difbeta[n,j]<-Beta[n]-Beta[j]
}
}
scale ~ dunif(0, 1)
rate ~ dunif(0, 1)
tau_r ~ dgamma(scale,rate)
sigma_r <- 1/sqrt(tau_r)
mu_r ~ dnorm(0,0.00001) ## Prior for the overall mean
}"## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 113
## Unobserved stochastic nodes: 16
## Total graph size: 270
##
## Initializing model
update(model_random,n.iter=1000)
output_random =coda.samples(model=model_random,variable.names=c("sigma_r","mu_r","Difbeta","Beta"),n.iter=100000,thin=10)
ggs_random <-ggs(output_random )
mtout_random <-filter(ggs_random,grepl("Beta",Parameter))
ggs_caterpillar(mtout_random ) +geom_vline(xintercept = 0,col="red")mtout_random <-filter(ggs_random,grepl("Beta",Parameter))
ggplot(data=mtout_random,aes(x=mtout_random$value,col=Parameter,fill=Parameter)) + geom_density(alpha=0.2)ggs_random <-ggs(output_random )
mtout_random <-filter(ggs_random,grepl("Difbeta",Parameter))
ggs_caterpillar(mtout_random ) +geom_vline(xintercept = 0,col="red")Reference
Edwards, Don. 2020. “Comment: The First Data Analysis Should Be Journalistic.” Ecological Applications 6 (4): 1090–4. www.jstor.org/stable/2269593.
Ellison, Aaron M. 2004. “Bayesian Inference in Ecology.” Ecology Letters 7 (6): 509–20.
2020-01-28