rm(list = ls())
suppressWarnings(suppressMessages( {
library(brms)
library(beeswarm)
library(vioplot)
library(glmmTMB)
library(car)
library(cowplot)
library(ggplot2)
library(tidyverse)
library(bayesplot)
} ) )Experimental treatment with edaravone in a mouse model of spinocerebellar ataxia 1
Analysis of behaviour
This page shows R code for the study Sucha et al. (2023, International Journal of Molecular Science).
Citation:
Sucha M, Benediktova S, Tichanek F, Jedlicka J, Kapl S, Jelinkova D, Purkartova Z, Tuma J, Kuncova J, Cendelin J. Experimental Treatment with Edaravone in a Mouse Model of Spinocerebellar Ataxia 1. International Journal of Molecular Sciences. 2023; 24(13):10689. https://doi.org/10.3390/ijms241310689
GitHub page: https://github.com/filip-tichanek/edaravonSCA1
1 Upload of packages
2 Data import and wrangling
## import of data
load("source_data/myEnvironment.RData")
## defining outcome variable 'fst_prop', indicating proportion in immobile state
## during forced swimming test. As the immobility in registred only if it took
## over 2 seconds, the two seconds were added. The variable ('immobile_min') originally
## represented the total immobility time in minutes.
## Now it is just proportion.
behav$fst_prop<-( (behav$immobile_min+(2/60)) /8)
## defining the variable 'sugar_prop' that will be used as outcome variable
## in the analysis of sucrose prefference
behav$sugar_prop <- behav$sugar_water_ratio / (behav$sugar_water_ratio+1)
## defining 'of_distance' variable, that is sum of distance walked
## over 10 minutes (in raw table, the distance is per minute) and is in
## units of meters (originally it was in centimeters)
behav$of_distance<-rowSums(behav[,c(8:17)])/100
summary(behav[, c('fst_prop', 'sugar_prop', 'of_distance')]) fst_prop sugar_prop of_distance
Min. :0.004167 Min. :0.1865 Min. :19.50
1st Qu.:0.139990 1st Qu.:0.6109 1st Qu.:32.72
Median :0.284500 Median :0.7335 Median :43.13
Mean :0.295127 Mean :0.6895 Mean :45.18
3rd Qu.:0.412958 3rd Qu.:0.7935 3rd Qu.:54.27
Max. :0.812688 Max. :0.8864 Max. :84.13
3 Immobility in forced swim test
3.1 Data exploration
plot((behav$immobile_min)~behav$group,las=2,xlab="",
col=cola) plot(behav$immobile_min~behav$group_sex,las=2,xlab="",
col=cola[c(1,1,2,2,3,3,4,4)]) hist(behav$fst_prop)According to plots, male sex may increases the immobility. The sex-difference seems homogeneous across groups (NO sex interaction needed to include). Dispersion is not noticeably different among the groups.
3.2 Modelling
We will try fit the model using Gaussian as well as beta regression (as the outcome - the proportion of the immobility state - is bounded to 0 and 1, beta regression may be more natural). We will fit both and will check posterior predictive check (PPC) to explore how well the both models fit the data.
As there is indication of that male sex may be associated with an increaed immobility, we will fit two versions of model - one ignoring the sex effect, the second including the sex.
3.2.1 Prior specification
Lets look what should be prior for genotype effect, utilizing previous information from Tichanek et al.
Firstly, what should be prior for and intercept if we used Gaussian model?
## re-setting reference group
behav$group<-relevel(behav$group,ref='ctrl.sca')
## defining the variable 'fst_immobility' in previously published data
young_scirep$fst_immobility <- ( ( 2+rowSums(young_scirep[,c(100:105)]) ) /360)
## mean immobility proportion in SCA1 mice
mean(young_scirep[young_scirep$genotype=="tg+",]$fst_immobility)[1] 0.5163481
## mean immobility proportion in our current data, acrros all groups
mean(behav$fst_prop)[1] 0.2951266
## standard deviation of the immobility proportion x 5 and 1.2
sd(behav$fst_prop)*c(5, 1.2)[1] 0.9655930 0.2317423
sd(young_scirep[young_scirep$genotype=="tg+",]$fst_immobility) * 1.2[1] 0.2498246
Mu for intercept will be set to the value between 0.51 and 0.29, i.e. 0.4
Sigma for the intercept will be set to SD of the fst_prop x 5, i.e. 0.97
How to set prior for the genotype effect?
## Model based on data of Tichanek et al.
model_fst<-glmmTMB(fst_immobility~genotype,data=young_scirep)
## model results
fixef(model_fst)
Conditional model:
(Intercept) genotypew
0.5163 -0.2571
## What is reccomented prior?
prios(model_fst) Estimate
-0.1498014 0.2247021
Previous data indicates that WT genotype is related to reduced immobility and our prior specification will reflects it. However, prior will be conservative and will be kept to influence estimation in small extent. Thus, prior will cover both zero effect as well as effect estimated with previous data. We will use the value calculated above, i.e. mu = 0.15, sigma = 0.225
For edaravone effect, we will use zero-effect-centered prior with sigma = 1.2 x SD of the outcome, i.e. 0.23
The same prior as for edaravone effect will be used also for the effect of male sex.
As we will fit 2 Gaussian models (one WITH sex effect, second ignoring the sex), we will define two priors.
# prior01 - without sex
prior01 <- c(set_prior("student_t(3,0.4,0.97)", class = "b", coef = "Intercept"),
set_prior("normal(-0.15,0.225)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(-0.15,0.225)", class = "b", coef = "groupeda.wt"),
set_prior("normal(0,0.23)" , class = "b", coef = "groupeda.sca"))
# prior02 - with sex
prior02 <- c(set_prior("student_t(3,0.4,0.97)", class = "b", coef = "Intercept"),
set_prior("normal(-0.15,0.225)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(-0.15,0.225)", class = "b", coef = "groupeda.wt"),
set_prior("normal(0,0.23)" , class = "b", coef = "groupeda.sca"),
set_prior("normal(0,0.23)" , class = "b", coef = "sex_num"))Similarly, we will define priors for beta models, using prios function and previous data, and using SD in the outcome variables
## 'mu' for prior of intercept
mean(c(logit(mean(young_scirep[young_scirep$genotype=="tg+",]$fst_immobility)),
logit(mean(behav$fst_prop))))[1] -0.402599
## 'sigma' for prior of intercept
sd(logit(behav$fst_prop))*5[1] 7.147216
## model with data from Tichanek et al. to get prior for genotype effect
model_of<-glmmTMB(fst_immobility~genotype,data=young_scirep,
family=beta_family(link="logit"))
summary(model_of) Family: beta ( logit )
Formula: fst_immobility ~ genotype
Data: young_scirep
AIC BIC logLik deviance df.resid
-25.4 -19.4 15.7 -31.4 52
Dispersion parameter for beta family (): 4.05
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.05222 0.16005 0.326 0.744
genotypew -1.21502 0.25299 -4.803 1.57e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
prios(model_of) Estimate
-0.7191515 1.0787273
Defining the priors for beta regression models
# without sex
prior03 <- c(set_prior("student_t(3,-0.4,7.14)", class = "b", coef = "Intercept"),
set_prior("normal(-0.72,1.08)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(-0.72,1.08)", class = "b", coef = "groupeda.wt"),
set_prior("normal(0,1.2)" , class = "b", coef = "groupeda.sca"))
# sex included
prior04 <- c(set_prior("student_t(3,-0.4,7.14)", class = "b", coef = "Intercept"),
set_prior("normal(-0.72,1.08)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(-0.72,1.08)", class = "b", coef = "groupeda.wt"),
set_prior("normal(0,1.2)" , class = "b", coef = "groupeda.sca"),
set_prior("normal(0,1.2)" , class = "b", coef = "sex_num"))3.2.2 Fitting models ignoring sex
Now we have priors and can fit the models.
As we defined prior for Intercept term, we will include the formula 0 + Intercept. We will use 8000 samples (from which 2000 will ‘warmup’ for mapping of probability landscape) per each of 2 chains. To reduce risk of convergence issues, we will increase ‘adapt_delta’ to 0.99 (default is 0.8)
At first, we will fit models ignoring the effect of sex and will firstlu decide whether to use Gaussian or beta model
## Gaussian model without sex factor
fst_model_01 <- run_model(
brm(fst_prop ~ 0+ Intercept+group,
data = behav,
prior = prior01,
family = "Gaussian", save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=1,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/fst_model_01', reuse = TRUE)
fst_model_02 <- run_model(
brm(fst_prop ~0+Intercept+group,
data=behav, prior = prior03,
family=Beta(link="logit", link_phi="log"),
save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=1,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/fst_model_02', reuse = TRUE)3.2.3 Diagnostics of models ignoring sex
We mean here diagnostics of convergence and sufficient size of posterior samples. We will use:
plotting trace of MCMC chains. Both chains should be mixed, the curves should be random without noticeable autocorrelations.
Rhat from model summary: 1 indicates convergence
Bulk_ESS and Tail_ESS from summary output: should be > 1000-2000, idealy more for paramters of interest
## trace plots
mcmc_trace(fst_model_01)mcmc_trace(fst_model_02)summary(fst_model_01) Family: gaussian
Links: mu = identity; sigma = identity
Formula: fst_prop ~ 0 + Intercept + group
Data: behav (Number of observations: 80)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 0.40 0.04 0.32 0.48 1.00 3761 5572
groupctrl.wt -0.17 0.05 -0.28 -0.06 1.00 4810 5778
groupeda.wt -0.19 0.06 -0.30 -0.08 1.00 4393 6355
groupeda.sca -0.06 0.05 -0.17 0.05 1.00 4847 6149
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.18 0.02 0.16 0.21 1.00 7071 6983
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
summary(fst_model_01) Family: gaussian
Links: mu = identity; sigma = identity
Formula: fst_prop ~ 0 + Intercept + group
Data: behav (Number of observations: 80)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 0.40 0.04 0.32 0.48 1.00 3761 5572
groupctrl.wt -0.17 0.05 -0.28 -0.06 1.00 4810 5778
groupeda.wt -0.19 0.06 -0.30 -0.08 1.00 4393 6355
groupeda.sca -0.06 0.05 -0.17 0.05 1.00 4847 6149
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.18 0.02 0.16 0.21 1.00 7071 6983
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Convergence of both models is fine
3.2.4 Posterior predictive checks
data density: the figure shows density of the outcome variable (thick line) and densities of artificial data from simulations based on Bayesian model estimates (slimmer lines). The thick line should not deviate much from the slimmer lines (in that case, model does not predict data well and does not fit the data appropriately).
However, if model includes random effect(s) (providing shrinkage), simulated and actual data may differ.
pla<-pp_check(fst_model_01,type='dens_overlay',ndraws = 50)
plb<-pp_check(fst_model_02,type='dens_overlay',ndraws = 50)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)Here, both Gaussian (A) and beta (B) models seems fine as they predict data well.
grouped data density shows the same as previous, but per group. This is useful for evaluation potential group-specific shapes of data distribution (e.g. different spread of outcome)
pla<-pp_check(fst_model_01,type='dens_overlay_grouped',ndraws = 50,group='group')
plb<-pp_check(fst_model_02,type='dens_overlay_grouped',ndraws = 50,group='group')
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)Both models seem fine
scatter plot shows average predictions (x-axis) vs. data. For Gaussian models, moving along X-axis should not change spread of datapoints
pla<-pp_check(fst_model_01,type='scatter_avg') Using all posterior draws for ppc type 'scatter_avg' by default.
plb<-pp_check(fst_model_02,type='scatter_avg') Using all posterior draws for ppc type 'scatter_avg' by default.
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)Seems fine
max, min shows predicted vs. actual minimal and maximal value of outcome. Dark blue point should be among the light-blue points
pla<-pp_check(fst_model_01,type="stat_2d", stat = c("max", "min"),ndraws=200)
plb<-pp_check(fst_model_02,type="stat_2d", stat = c("max", "min"),ndraws=200)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)We see that Gaussian model predicts smaller (negative) minimal values of the proportion. As the outcome is proportion which may start from zero, the prediction of negative values does not make sense. Beta regression does better work here
mean, sd shows predicted vs. actual mean and SD of the outcome
pla<-pp_check(fst_model_01,type="stat_2d", stat = c("mean", "sd"),ndraws=200)
plb<-pp_check(fst_model_02,type="stat_2d", stat = c("mean", "sd"),ndraws=200)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)Both models predict mean and SD well.
Are there too influential observations? All pareto estimates should have k < 0.7
par(mfrow=c(1,2))
plot(loo(fst_model_01));loo(fst_model_01)
Computed from 12000 by 80 log-likelihood matrix
Estimate SE
elpd_loo 21.3 5.4
p_loo 4.4 0.7
looic -42.6 10.9
------
Monte Carlo SE of elpd_loo is 0.0.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
plot(loo(fst_model_02));loo(fst_model_02)
Computed from 12000 by 80 log-likelihood matrix
Estimate SE
elpd_loo 28.6 5.3
p_loo 5.2 1.3
looic -57.3 10.6
------
Monte Carlo SE of elpd_loo is 0.0.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
Seems fine
3.2.5 Model including sex
According to PPC, we can see that beta regression model the data more realistically and we will thus use beta regression model.
However, we observed that male sex may be associated with higher magnitude of immobility in the Forced Swimming Test. Should we include the sex factor to the model? We will use leave-one-out cross-validation (LOO-CV) to estimate whether such model provides more accurate out-of-sample prediction
fst_model_03 <- run_model(
brm(fst_prop ~0+Intercept+group+sex_num,
data=behav, prior = prior04,
family=Beta(link="logit", link_phi="log"),
save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=1,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/fst_model_03', reuse = TRUE)
mcmc_trace(fst_model_03)summary(fst_model_03) Family: beta
Links: mu = logit; phi = identity
Formula: fst_prop ~ 0 + Intercept + group + sex_num
Data: behav (Number of observations: 80)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -0.38 0.18 -0.74 -0.03 1.00 4301 6277
groupctrl.wt -1.00 0.27 -1.53 -0.47 1.00 5185 7459
groupeda.wt -0.87 0.27 -1.41 -0.35 1.00 5272 6645
groupeda.sca -0.37 0.26 -0.88 0.15 1.00 5253 6924
sex_num 0.29 0.20 -0.10 0.69 1.00 8590 7965
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
phi 4.79 0.71 3.50 6.28 1.00 7624 7732
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Convergence of the model and effective size of samples are fine. Conclusions drawn by both beta models (ignoring vs. including sex) are basically the same. The decision thus have minimal practical implication.
Lets compare the models using the LOO-CV
fst_model_02 <- add_criterion(fst_model_02, criterion = "loo")
fst_model_03 <- add_criterion(fst_model_03, criterion = "loo")
loo_compare(fst_model_02,fst_model_03) elpd_diff se_diff
fst_model_03 0.0 0.0
fst_model_02 -0.1 1.6
No practical difference (including sex had slightly better score, but standard error of the difference is very high and the both models are practically the same. Inclusion of sex is not helpful. We will use model ‘fst_model_03’, ie beta model ignoring sex.
3.3 Posterior draws extraction
To visualize and summarize model, we will extract posterior samples of the between-groups difference.
## data frame of posterior samples
post_fix<-as.data.frame(fst_model_02, variable = c("b_Intercept","b_groupctrl.wt",
"b_groupeda.wt","b_groupeda.sca"))
## re-naming the Intercept (= SCA_0 group)
names(post_fix)[1]<-"sca_ctrl"
## obtaining posterior samples/draws for group-specific proportion of immobility
post_fix$wt_ctrl<-post_fix$sca_ctrl+post_fix$b_groupctrl.wt
post_fix$wt_eda<-post_fix$sca_ctrl+post_fix$b_groupeda.wt
post_fix$sca_eda<-post_fix$sca_ctrl+post_fix$b_groupeda.sca
## differnce between WT_0 and WT_E group
post_fix$wt_contrast<-post_fix$wt_eda-post_fix$wt_ctrl
## obtaining median estimate and bound of 95% credible intervals
groupquant<-sapply(post_fix[,c(5,6,1,7)],function(p) quantile(p, probs = c(0.025,0.975,0.5)))
## the selected model is beta model with logit link. Lets to rescale prediction
## back to proportions
groupquant<-inv.logit(groupquant)3.4 Data and model visualization
Next, we will visualize data and model. First plot will show data points and estimated prediction and 95% credible interval for the group prediction. The second will show posterior distribution of between-groups differences in the estimated proportion of time spent in immobility.
As we used beta model, we will show the effect (between-groups differences) in odds ratio, implying how many times are odds for being immobile in the given time different in one group compared to the second group. Thus, zero effect is OR = 1.
The second plot (posterior distributions) will include information about 95% credible interval about effect size and probability of direction (PD, i.e. probability that the effect goes in particular direction - is positive or negative). The higher PD, the higher estimated probability that there is difference between the groups.
## ordering of groups
behav$group<-factor(behav$group,levels=c('ctrl.wt','eda.wt','ctrl.sca','eda.sca'))
## setting spacing of plots
par(mfrow=c(1,2))
par(mar=c(2,3,0,0))
par(mgp=c(2,0.6,0))
## setting range of Y-axis
range<-c(0,1);scal<-range[2]-range[1]
## setting range of X-axis
xrange<-c(0.5,4.5);xscal=xrange[2]-xrange[1]
## plot
plot(NULL,xlim=c(xrange[1],xrange[2]),ylim=c(range[1],range[2]),xlab="",
ylab="Proportion of immobility",las=1, axes=FALSE,cex.lab=1)
## adding grey background with white lines
rect(xrange[1],range[2],xrange[2],range[1],col="grey92",border=NA)
x<-range[1]
repeat{
lines(c(xrange[1],xrange[2]),c(x,x),col="white",lwd=0.7)
x=x+0.1;if(x>range[2]){break}}
x<-1
repeat{
lines(c(x,x),c(range[1],range[2]),col="white",lwd=0.7)
x=x+1;if(x>4.5){break}}
## showing distribution of data
vioplot(behav$fst_prop~behav$group,col=colb,at=c(1:4),add=T,
border=cola,axes=FALSE,drawRect=F,lwd=1,wex=0.8)
## showing individual data points
beeswarm(behav$fst_prop~behav$group,col=colc,at=c(1:4),add=T,pwpch=(16.5+behav$sex_num) )
## showing group-specific predictions and 95% CI
tp<-groupquant[3,]
xx<-1;wid=0.25
repeat{
lines(c(xx-wid,xx+wid),c(tp[xx],tp[xx]),lwd=2,col="black");
lines(c(xx,xx),c(groupquant[1,xx],groupquant[2,xx]),lwd=1.7,col="black")
xx<-xx+1
if(xx>4){break}}
## providing axis
tckk=-0.02
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=0.2),
labels=c(rep("",length(seq(range[1],range[2],by=0.2)))),pos=xrange[1],tck=tckk)
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=0.2),pos=xrange[1],tck=tckk)
axis(side=1,las=1,cex.axis=1.1,at=c(seq(1,4,by=1)),
labels=c(rep("",length(seq(1,4,by=1)))),pos=range[1],tck=tckk)
lines(c(xrange[1],xrange[2]),c(range[1],range[1]))
## labelling the plot
text(c(1:4),c(rep(range[1]-0.046*scal,4)),xpd=T,cex=1.1,col=cola,
c("Wt","Wt", "SCA1", "SCA1"))
ypo<-c(range[1]-0.1*scal,range[1]-0.1*scal)
text(c(1:4),c(ypo[1],ypo[2],ypo[1],ypo[2]),xpd=T,cex=1.1,col=cola,
c("0","edv", "0", "edv"))
ypos<-0.9;xpos=1.4
points(xpos,ypos,pch=16,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.19*xscal,ypos,"Females",col="grey40",cex=1.2)
points(xpos,ypos-0.08*scal,pch=17,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.19*xscal,ypos-0.08*scal,"Males",col="grey40",cex=1.2)
## setting spacing pf the 2nd plot
par(mar=c(2,0,0,0))
## making data frame of effect sizes (groups differences)
dif<-data.frame(post_fix$wt_contrast,post_fix$b_groupeda.sca,
post_fix$b_groupctrl.wt)
## rescaling the effect to Odds Ratio (from log(OR), i.e. log-odds)
dif<-exp(dif)
## x-label and tickmarks
yla<-"Odds ratio for immobility"
tckk=-0.018
ste<-seq(0.2,2.8,by=0.2)
## drawing the posterior distributions
mons_poste(dif,1)
## labelling the posterior distribution plots
zpos=seq(0,1,1/3)
xx=1;ind=0.3;xpol<-2.1
text(xpol,zpos[xx]+zpos[2]*ind,
"Wt: ed x 0 ",cex=1)
xx=xx+1
text(xpol,zpos[xx]+zpos[2]*ind,
"SCA1: ed x 0 ",cex=1)
xx=xx+1
text(xpol,zpos[xx]+zpos[2]*ind,
"0: Wt x SCA1",cex=1)3.5 Conclusion
It is clear that SCA1 mice (on saline) have clearly larger immobility, but the effect of edaravone is not clear (is uncertain) and likely span from edaravone treated SCA1 reaching 0.42 times to 1.17 times larger immobility
4 Sucrose preference
4.1 Data exploration
par(mar=c(5,3.5,1,1))
par(mgp=c(1.9,0.7,0))
par(mfrow=c(3,1))
plot((behav$sugar_prop)~behav$group,las=2,xlab="",
col=cola)
plot((behav$sugar_prop)~behav$group_sex,las=2,xlab="",
col=cola[c(1,1,2,2,3,3,4,4)])
hist(behav$sugar_prop)In contrast to FSR, there is no apparent and consistent effect of sex on the sucrose preference. The distribution seems as could be expected for proportions concentrated close to 0 or 1 (heavy tail in the direction toward 0.5).
There is no need to try to approximate the data with Gaussian distribution, beta regression is clear choice here.
4.2 Modelling
4.2.1 Prior specification
Due to methodological differences from sucrose test in Tichanek et al. and here, the previous results will not be used here for prior specification
behav$group<-relevel(behav$group,ref='ctrl.sca')
logit(mean(behav[behav$genotype=="sca",]$sugar_prop))[1] 0.7560386
logit(mean(behav$sugar_prop))[1] 0.7975844
sd(logit(behav$sugar_prop))*5[1] 3.49443
Prior specification
prior01 <- c(set_prior("student_t(3,0.76,3.5)", class = "b", coef = "Intercept"),
set_prior("normal(0,2)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(0,2)", class = "b", coef = "groupeda.wt"),
set_prior("normal(0,1.2)" , class = "b", coef = "groupeda.sca"))4.2.2 Model fitting and diagnostics
sugar_model_01 <- run_model(
brm(sugar_prop ~0+Intercept+group,
data=behav, prior = prior01,
family=Beta(link="logit", link_phi="log"),
save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=1,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/sugar_model_01', reuse = TRUE)
mcmc_trace(sugar_model_01)summary(sugar_model_01) Family: beta
Links: mu = logit; phi = identity
Formula: sugar_prop ~ 0 + Intercept + group
Data: behav (Number of observations: 80)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 0.87 0.15 0.58 1.17 1.00 3651 5221
groupctrl.wt -0.03 0.21 -0.44 0.38 1.00 4391 6377
groupeda.wt -0.08 0.21 -0.49 0.33 1.00 4285 6284
groupeda.sca -0.28 0.21 -0.68 0.12 1.00 4420 6172
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
phi 9.94 1.55 7.17 13.27 1.00 6950 7110
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Convergence is fine, ESS are sufficient
4.2.3 PPC
Quick version of PPC
pp_check(sugar_model_01,type='dens_overlay',ndraws = 50)pp_check(sugar_model_01,type='dens_overlay_grouped',ndraws = 50,group='group') pp_check(sugar_model_01,type='scatter_avg')Using all posterior draws for ppc type 'scatter_avg' by default.
pp_check(sugar_model_01,type="stat_2d", stat = c("max", "min"),ndraws=200)pp_check(sugar_model_01,type="stat_2d", stat = c("mean", "sd"),ndraws=200)plot(loo(sugar_model_01));loo(sugar_model_01)
Computed from 12000 by 80 log-likelihood matrix
Estimate SE
elpd_loo 42.1 8.5
p_loo 5.4 1.4
looic -84.3 16.9
------
Monte Carlo SE of elpd_loo is 0.0.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
Diagnostics is not so nice as in previous example. E.g., max values are smaller than generally expected, especially when taking account low minimal values. So, the high range of the proportion seems to be sharply restricted but the low range seems to be unexpectedly low. This reduces reliability of the model.
Thus, if there was some suggestions of plausible effect, non-parametric estimation (based on non-parametric bootstrap) may provide useful validation. Here the effects seem negligible so no validation is necessary.
4.3 Posterior draws extraction
post_fix<-as.data.frame(sugar_model_01, variable = c("b_Intercept","b_groupctrl.wt",
"b_groupeda.wt","b_groupeda.sca"))
names(post_fix)[1]<-"sca_ctrl"
post_fix$wt_ctrl<-post_fix$sca_ctrl+post_fix$b_groupctrl.wt
post_fix$wt_eda<-post_fix$sca_ctrl+post_fix$b_groupeda.wt
post_fix$sca_eda<-post_fix$sca_ctrl+post_fix$b_groupeda.sca
post_fix$wt_contrast<-post_fix$wt_eda-post_fix$wt_ctrl
groupquant<-sapply(post_fix[,c(5,6,1,7)],function(p) quantile(p, probs = c(0.025,0.975,0.5)))
groupquant<-inv.logit(groupquant)4.4 Data and model vizualisation
behav$group<-factor(behav$group,levels=c('ctrl.wt','eda.wt','ctrl.sca','eda.sca'))
par(mfrow=c(1,2))
par(mar=c(2,3,0,0))
par(mgp=c(2,0.6,0))
range<-c(0,1);scal<-range[2]-range[1]
xrange<-c(0.5,4.5);xscal=xrange[2]-xrange[1]
plot(NULL,xlim=c(xrange[1],xrange[2]),ylim=c(range[1],range[2]),xlab="",
ylab="Relative sugar consumption (proportion)",las=1, axes=FALSE,cex.lab=1)
rect(xrange[1],range[2],xrange[2],range[1],col="grey92",border=NA)
x<-range[1]
repeat{
lines(c(xrange[1],xrange[2]),c(x,x),col="white",lwd=0.7)
x=x+0.1;if(x>range[2]){break}}
x<-1
repeat{
lines(c(x,x),c(range[1],range[2]),col="white",lwd=0.7)
x=x+1;if(x>4.5){break}}
vioplot(behav$sugar_prop~behav$group,col=colb,at=c(1:4),add=T,
border=cola,axes=FALSE,drawRect=F,lwd=1,wex=0.8)
beeswarm(behav$sugar_prop~behav$group,col=colc,at=c(1:4),add=T,pwpch=(16.5+behav$sex_num) )
tp<-groupquant[3,]
xx<-1;wid=0.25
repeat{
lines(c(xx-wid,xx+wid),c(tp[xx],tp[xx]),lwd=2,col="black");
lines(c(xx,xx),c(groupquant[1,xx],groupquant[2,xx]),lwd=1.7,col="black")
xx<-xx+1
if(xx>4){break}}
tckk=-0.02
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=0.2),
labels=c(rep("",length(seq(range[1],range[2],by=0.2)))),pos=xrange[1],tck=tckk)
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=0.2),pos=xrange[1],tck=tckk)
axis(side=1,las=1,cex.axis=1.1,at=c(seq(1,4,by=1)),
labels=c(rep("",length(seq(1,4,by=1)))),pos=range[1],tck=tckk)
lines(c(xrange[1],xrange[2]),c(range[1],range[1]))
text(c(1:4),c(rep(range[1]-0.046*scal,4)),xpd=T,cex=1.1,col=cola,
c("Wt","Wt", "SCA1", "SCA1"))
ypo<-c(range[1]-0.1*scal,range[1]-0.1*scal)
text(c(1:4),c(ypo[1],ypo[2],ypo[1],ypo[2]),xpd=T,cex=1.1,col=cola,
c("0","edv", "0", "edv"))
ypos<-0.14;xpos=1.4
points(xpos,ypos,pch=16,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.19*xscal,ypos,"Females",col="grey40",cex=1.2)
points(xpos,ypos-0.08*scal,pch=17,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.19*xscal,ypos-0.08*scal,"Males",col="grey40",cex=1.2)
# probability distributions
par(mar=c(2,0,0,0))
dif<-data.frame(post_fix$wt_contrast,post_fix$b_groupeda.sca,
post_fix$b_groupctrl.wt)
dif<-exp(dif)
yla<-"Odds ratio for relative sugar consumption"
tckk=-0.018
ste<-seq(0.2,2.2,by=0.2)
mons_poste(dif,1)
zpos=seq(0,1,1/3)
xx=1;ind=0.3;xpol<-2.1
text(xpol,zpos[xx]+zpos[2]*ind,
"Wt: ed x 0 ",cex=1)
xx=xx+1
text(xpol,zpos[xx]+zpos[2]*ind,
"SCA1: ed x 0 ",cex=1)
xx=xx+1
text(xpol,zpos[xx]+zpos[2]*ind,
"0: Wt x SCA1",cex=1)5 Distance walked in open field
5.1 Data exploration
## set spacing
par(mar=c(5,3.5,1,1))
par(mgp=c(1.9,0.7,0))
par(mfrow=c(3,1))
## plotting
plot(behav$of_distance~behav$group,las=2,xlab="",
col=cola)
plot(behav$of_distance~behav$group_sex,las=2,xlab="",
col=cola[c(1,1,2,2,3,3,4,4)])
hist(behav$of_distance, 20)No apparent effect of sex. Distribution seems to be symmetric and could be potentially approximated via Gaussian distribution. Alternatively, distance walked may be modelled with Gamma distribution
5.2 Modelling
5.2.1 Prior specification
Exploring data to get prior for gamma model
behav$group<-relevel(behav$group,ref='ctrl.sca')
log(mean(young_scirep[young_scirep$genotype=="tg+",]$of.distance)/100)[1] 3.579207
log(mean(behav$of_distance))[1] 3.810714
sd(log(behav$of_distance))*5[1] 1.774265
model_pri<-glmmTMB((of.distance/100)~genotype,family=Gamma(link="log"),data=young_scirep)
summary(model_pri) Family: Gamma ( log )
Formula: (of.distance/100) ~ genotype
Data: young_scirep
AIC BIC logLik deviance df.resid
453.9 459.9 -223.9 447.9 52
Dispersion estimate for Gamma family (sigma^2): 0.12
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.57921 0.06312 56.71 < 2e-16 ***
genotypew 0.38907 0.09362 4.16 3.24e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
prios(model_pri) Estimate
0.2055787 0.3083680
Setting the prior for gamma model
prior01 <- c(set_prior("student_t(3, 3.7, 1.77)", class = "b", coef = "Intercept"),
set_prior("normal(0.2,0.31)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(0.2,0.31)", class = "b", coef = "groupeda.wt"),
set_prior("normal(0,1.2)" , class = "b", coef = "groupeda.sca"))Exploring data to get prior for Gaussian model
sd(behav[behav$genotype=="sca",]$of_distance)*c(5, 1.2)[1] 59.90019 14.37604
sd(behav$of_distance)*c(5, 1.2)[1] 77.94031 18.70568
mean(c(mean(young_scirep[young_scirep$genotype=="tg+",]$of.distance/100),
mean(behav$of_distance)))[1] 40.5139
model_pri<-glmmTMB((of.distance/100)~genotype,data=young_scirep)
summary(model_pri) Family: gaussian ( identity )
Formula: (of.distance/100) ~ genotype
Data: young_scirep
AIC BIC logLik deviance df.resid
459.2 465.2 -226.6 453.2 52
Dispersion estimate for gaussian family (sigma^2): 222
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 35.845 2.718 13.186 < 2e-16 ***
genotypew 17.048 4.032 4.228 2.36e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
prios(model_pri) Estimate
9.145387 13.718081
Prior for Gaussian model
prior02 <- c(set_prior("student_t(3, 40.5, 60)", class = "b", coef = "Intercept"),
set_prior("normal(9.1, 13.7)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(9.1, 13.7)", class = "b", coef = "groupeda.wt"),
set_prior("normal(0, 14.4)" , class = "b", coef = "groupeda.sca"))5.3 Fitting models and checking convergence and ESS
of_dist_model_01 <- run_model(
brm(of_distance ~0+Intercept+group,
data=behav, prior = prior01,
family=Gamma(link="log"),
save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=1,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/of_dist_model_01', reuse = TRUE)
of_dist_model_02 <- run_model(
brm(of_distance~0+Intercept+group,
data=behav, prior = prior02,
family=gaussian(),
save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=1,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/of_dist_model_02', reuse = TRUE)
mcmc_trace(of_dist_model_01)mcmc_trace(of_dist_model_02)summary(of_dist_model_01) Family: gamma
Links: mu = log; shape = identity
Formula: of_distance ~ 0 + Intercept + group
Data: behav (Number of observations: 80)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 3.58 0.06 3.46 3.71 1.00 3968 5799
groupctrl.wt 0.40 0.09 0.23 0.57 1.00 4820 6583
groupeda.wt 0.40 0.09 0.23 0.57 1.00 4636 6453
groupeda.sca 0.04 0.09 -0.13 0.22 1.00 4714 6283
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 11.80 1.86 8.48 15.81 1.00 7235 7319
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
summary(of_dist_model_02) Family: gaussian
Links: mu = identity; sigma = identity
Formula: of_distance ~ 0 + Intercept + group
Data: behav (Number of observations: 80)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 36.17 2.80 30.71 41.71 1.00 3801 5160
groupctrl.wt 17.48 3.93 9.74 25.12 1.00 4274 5606
groupeda.wt 17.29 3.93 9.40 24.92 1.00 4828 6770
groupeda.sca 1.24 3.93 -6.46 9.05 1.00 4655 6255
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 13.23 1.09 11.30 15.55 1.00 7418 6896
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Convergence and ESS are fine for both models.
5.4 PPC
pla<-pp_check(of_dist_model_01,type='dens_overlay',ndraws = 50)
plb<-pp_check(of_dist_model_02,type='dens_overlay',ndraws = 50)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(of_dist_model_01,type='dens_overlay_grouped',ndraws = 50,group='group')
plb<-pp_check(of_dist_model_02,type='dens_overlay_grouped',ndraws = 50,group='group')
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(of_dist_model_01,type='scatter_avg') Using all posterior draws for ppc type 'scatter_avg' by default.
plb<-pp_check(of_dist_model_02,type='scatter_avg') Using all posterior draws for ppc type 'scatter_avg' by default.
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(of_dist_model_01,type="stat_2d", stat = c("max", "min"),ndraws=200)
plb<-pp_check(of_dist_model_02,type="stat_2d", stat = c("max", "min"),ndraws=200)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(of_dist_model_01,type="stat_2d", stat = c("mean", "sd"),ndraws=200)
plb<-pp_check(of_dist_model_02,type="stat_2d", stat = c("mean", "sd"),ndraws=200)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)### Are there too influential observations?
par(mfrow=c(1,2))
plot(loo(of_dist_model_01));loo(of_dist_model_01)
Computed from 12000 by 80 log-likelihood matrix
Estimate SE
elpd_loo -318.9 6.9
p_loo 4.7 0.9
looic 637.8 13.7
------
Monte Carlo SE of elpd_loo is 0.0.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
plot(loo(of_dist_model_02));loo(of_dist_model_02)
Computed from 12000 by 80 log-likelihood matrix
Estimate SE
elpd_loo -322.1 7.0
p_loo 4.8 0.9
looic 644.3 13.9
------
Monte Carlo SE of elpd_loo is 0.0.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
In both models, no observation has overly high influence. Gamma model seems to have slightly better PPC (at least in terms of predicted minimal value where Gaussian model sometimes predicts negative distance walked)
5.5 Posterior draws extraction and visualization
Posterior draws extraction. Note the we will use exponent of posterior prediction as we used log-link. Then, we will get fold-difference (fold-change) as an effect size, indicating how many time walked one group larger distance than the second group
post_fix<-as.data.frame(of_dist_model_01, variable = c("b_Intercept","b_groupctrl.wt",
"b_groupeda.wt","b_groupeda.sca"))
names(post_fix)[1]<-"sca_ctrl"
post_fix$wt_ctrl<-post_fix$sca_ctrl+post_fix$b_groupctrl.wt
post_fix$wt_eda<-post_fix$sca_ctrl+post_fix$b_groupeda.wt
post_fix$sca_eda<-post_fix$sca_ctrl+post_fix$b_groupeda.sca
post_fix$wt_contrast<-post_fix$wt_eda-post_fix$wt_ctrl
groupquant<-sapply(post_fix[,c(5,6,1,7)],function(p) quantile(p, probs = c(0.025,0.975,0.5)))
groupquant<-exp(groupquant)Visualization
behav$group<-factor(behav$group,levels=c('ctrl.wt','eda.wt','ctrl.sca','eda.sca'))
par(mfrow=c(1,2))
par(mar=c(2,3,0,0))
par(mgp=c(2,0.6,0))
range<-c(0,100);scal<-range[2]-range[1]
xrange<-c(0.5,4.5);xscal=xrange[2]-xrange[1]
plot(NULL,xlim=c(xrange[1],xrange[2]),ylim=c(range[1],range[2]),xlab="",
ylab="Distance moved (m)",las=1, axes=FALSE,cex.lab=1)
rect(xrange[1],range[2],xrange[2],range[1],col="grey92",border=NA)
x<-range[1]
repeat{
lines(c(xrange[1],xrange[2]),c(x,x),col="white",lwd=0.7)
x=x+10;if(x>range[2]){break}}
x<-1
repeat{
lines(c(x,x),c(range[1],range[2]),col="white",lwd=0.7)
x=x+1;if(x>4.5){break}}
vioplot(behav$of_distance~behav$group,col=colb,at=c(1:4),add=T,
border=cola,axes=FALSE,drawRect=F,lwd=1,wex=0.8)
beeswarm(behav$of_distance~behav$group,col=colc,at=c(1:4),add=T,pwpch=(16.5+behav$sex_num) )
tp<-(groupquant[3,])
xx<-1;wid=0.25
repeat{
lines(c(xx-wid,xx+wid),c(tp[xx],tp[xx]),lwd=2,col="black");
lines(c(xx,xx),c(groupquant[1,xx],groupquant[2,xx]),lwd=1.7,col="black")
xx<-xx+1
if(xx>4){break}}
tckk=-0.02
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=20),
labels=c(rep("",length(seq(range[1],range[2],by=20)))),pos=xrange[1],tck=tckk)
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=20),pos=xrange[1],tck=tckk)
axis(side=1,las=1,cex.axis=1.1,at=c(seq(1,4,by=1)),
labels=c(rep("",length(seq(1,4,by=1)))),pos=range[1],tck=tckk)
lines(c(xrange[1],xrange[2]),c(range[1],range[1]))
text(c(1:4),c(rep(range[1]-0.046*scal,4)),xpd=T,cex=1.1,col=cola,
c("Wt","Wt", "SCA1", "SCA1"))
ypo<-c(range[1]-0.1*scal,range[1]-0.1*scal)
text(c(1:4),c(ypo[1],ypo[2],ypo[1],ypo[2]),xpd=T,cex=1.1,col=cola,
c("0","edv", "0", "edv"))
ypos<-17;xpos=1.4
points(xpos,ypos,pch=16,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.19*xscal,ypos,"Females",col="grey40",cex=1.2)
points(xpos,ypos-0.08*scal,pch=17,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.19*xscal,ypos-0.08*scal,"Males",col="grey40",cex=1.2)
### probability distributions --------
par(mar=c(2,0,0,0))
dif<-data.frame(post_fix$wt_contrast,post_fix$b_groupeda.sca,
post_fix$b_groupctrl.wt)
dif<-exp(dif)
yla<-"Fold-difference in distance moved"
tckk=-0.018
ste<-seq(0.2,2.2,by=0.2)
mons_poste(dif,1)
zpos=seq(0,1,1/3)
xx=1;ind=0.3;xpol<-2
text(xpol,zpos[xx]+zpos[2]*ind,
"Wt: ed x 0 ",cex=1)
xx=xx+1
text(xpol,zpos[xx]+zpos[2]*ind,
"SCA1: ed x 0 ",cex=1)
xx=xx+1
text(xpol,zpos[xx]+zpos[2]*ind,
"0: Wt x SCA1",cex=1)6 Thigmotaxis in open filed per distance
6.1 Data exploration
## set spacing
par(mar=c(5,3.5,1,1))
par(mgp=c(1.9,0.7,0))
par(mfrow=c(3,1))
## plotting
plot(behav$thigmo_propdist ~ behav$group,las=2,xlab="",
col=cola)
plot(behav$thigmo_propdist ~ behav$group_sex,las=2,xlab="",
col=cola[c(1,1,2,2,3,3,4,4)])
hist(behav$thigmo_propdist, 20)No apparent effect of sex. Distribution looks relatively similar across the groups and symmetrical. We will fit both beta regression (the outcome is proportion from 0 to 1) or Gaussian models
6.2 Modelling
6.2.1 Prior specification
As the study Tichanek et al. used different size of the zone along the walls (5 vs. 7 cm), prior for intercept will be established according to data only
Beta model
behav$group<-relevel(behav$group,ref='ctrl.sca')
logit(mean(behav$thigmo_propdist))[1] 0.7182175
sd(logit(behav[behav$genotype=="sca",]$thigmo_propdist))*c(5, 1.2)[1] 2.0370066 0.4888816
sd(logit(behav$thigmo_propdist))*c(5, 1.2)[1] 2.0122953 0.4829509
model_pri<-glmmTMB(of.distance_on_edges_narrow~genotype,
family=beta_family(link="logit"),data=young_scirep)
summary(model_pri) Family: beta ( logit )
Formula: of.distance_on_edges_narrow ~ genotype
Data: young_scirep
AIC BIC logLik deviance df.resid
-133.8 -127.8 69.9 -139.8 52
Dispersion parameter for beta family (): 35.5
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.43709 0.07606 18.894 < 2e-16 ***
genotypew -0.48881 0.10516 -4.648 3.35e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
prios(model_pri) Estimate
-0.2826973 0.4240460
Defining prior for beta model
prior01 <- c(set_prior("student_t(3, 0.71, 2)", class = "b", coef = "Intercept"),
set_prior("normal(-0.28, 0.42)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(-0.28, 0.42)", class = "b", coef = "groupeda.wt"),
set_prior("normal(0, 1.2)" , class = "b", coef = "groupeda.sca"))Priors for Gaussian model
mean(behav$thigmo_propdist)[1] 0.6722144
sd((behav[behav$genotype=="sca",]$thigmo_propdist))*c(5, 1.2)[1] 0.40806301 0.09793512
sd((behav$thigmo_propdist))*c(5, 1.2)[1] 0.4225520 0.1014125
model_pri<-glmmTMB(of.distance_on_edges_narrow~genotype,data=young_scirep)
summary(model_pri) Family: gaussian ( identity )
Formula: of.distance_on_edges_narrow ~ genotype
Data: young_scirep
AIC BIC logLik deviance df.resid
-132.7 -126.7 69.3 -138.7 52
Dispersion estimate for gaussian family (sigma^2): 0.0047
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.80832 0.01252 64.56 < 2e-16 ***
genotypew -0.08811 0.01857 -4.74 2.09e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
prios(model_pri) Estimate
-0.05170907 0.07756361
Defining the prior for Gaussian model
prior02 <- c(set_prior("student_t(3,0.67,0.41)", class = "b", coef = "Intercept"),
set_prior("normal(-0.052,0.08)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(-0.052,0.08)", class = "b", coef = "groupeda.wt"),
set_prior("normal(0,0.1)" , class = "b", coef = "groupeda.sca"))6.2.2 Fitting models and convergence and ESS check
of_thigmo_model_01 <- run_model(
brm(thigmo_propdist ~0+Intercept+group,
data=behav, prior = prior01,
family=Beta(link="logit", link_phi="log"),
save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=1,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/of_thigmo_model_01', reuse = TRUE)
of_thigmo_model_02 <- run_model(
brm(thigmo_propdist~0+Intercept+group,
data=behav, prior = prior02,
family=gaussian(),
save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=1,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/of_thigmo_model_02', reuse = TRUE)
mcmc_trace(of_thigmo_model_01)mcmc_trace(of_thigmo_model_02)summary(of_thigmo_model_01) Family: beta
Links: mu = logit; phi = identity
Formula: thigmo_propdist ~ 0 + Intercept + group
Data: behav (Number of observations: 80)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 0.77 0.08 0.61 0.93 1.00 4264 4931
groupctrl.wt -0.13 0.11 -0.35 0.09 1.00 4930 5811
groupeda.wt -0.24 0.11 -0.45 -0.03 1.00 4837 5764
groupeda.sca 0.19 0.12 -0.03 0.42 1.00 4962 6159
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
phi 34.10 5.38 24.45 45.48 1.00 6350 6426
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
summary(of_thigmo_model_02) Family: gaussian
Links: mu = identity; sigma = identity
Formula: thigmo_propdist ~ 0 + Intercept + group
Data: behav (Number of observations: 80)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 0.68 0.02 0.65 0.72 1.00 4017 6156
groupctrl.wt -0.03 0.02 -0.08 0.02 1.00 4983 7241
groupeda.wt -0.05 0.02 -0.10 -0.01 1.00 4873 6445
groupeda.sca 0.04 0.02 -0.01 0.08 1.00 4968 7196
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.08 0.01 0.07 0.09 1.00 7321 6719
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Convergence and ESS of both models are OK
6.2.3 PPC
pla<-pp_check(of_thigmo_model_01,type='dens_overlay',ndraws = 50)
plb<-pp_check(of_thigmo_model_02,type='dens_overlay',ndraws = 50)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(of_thigmo_model_01,type='dens_overlay_grouped',ndraws = 50,group='group')
plb<-pp_check(of_thigmo_model_02,type='dens_overlay_grouped',ndraws = 50,group='group')
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(of_thigmo_model_01,type='scatter_avg') Using all posterior draws for ppc type 'scatter_avg' by default.
plb<-pp_check(of_thigmo_model_02,type='scatter_avg') Using all posterior draws for ppc type 'scatter_avg' by default.
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(of_thigmo_model_01,type="stat_2d", stat = c("max", "min"),ndraws=200)
plb<-pp_check(of_thigmo_model_02,type="stat_2d", stat = c("max", "min"),ndraws=200)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(of_thigmo_model_01,type="stat_2d", stat = c("mean", "sd"),ndraws=200)
plb<-pp_check(of_thigmo_model_02,type="stat_2d", stat = c("mean", "sd"),ndraws=200)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)### Are there too influential observations?
par(mfrow=c(1,2))
plot(loo(of_thigmo_model_01));loo(of_thigmo_model_01)
Computed from 12000 by 80 log-likelihood matrix
Estimate SE
elpd_loo 87.0 6.6
p_loo 4.8 1.0
looic -174.1 13.3
------
Monte Carlo SE of elpd_loo is 0.0.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
plot(loo(of_thigmo_model_02));loo(of_thigmo_model_02)
Computed from 12000 by 80 log-likelihood matrix
Estimate SE
elpd_loo 86.6 7.1
p_loo 4.9 1.2
looic -173.1 14.3
------
Monte Carlo SE of elpd_loo is 0.0.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
Both models seem OK. Beta model is more natural for proportional data and thus will be preferred
6.3 Posterior draws extraction and visualization
Posterior samples extraction
post_fix<-as.data.frame(of_thigmo_model_01, variable = c("b_Intercept","b_groupctrl.wt",
"b_groupeda.wt","b_groupeda.sca"))
names(post_fix)[1]<-"sca_ctrl"
post_fix$wt_ctrl<-post_fix$sca_ctrl+post_fix$b_groupctrl.wt
post_fix$wt_eda<-post_fix$sca_ctrl+post_fix$b_groupeda.wt
post_fix$sca_eda<-post_fix$sca_ctrl+post_fix$b_groupeda.sca
post_fix$wt_contrast<-post_fix$wt_eda-post_fix$wt_ctrl
groupquant<-sapply(post_fix[,c(5,6,1,7)],function(p) quantile(p, probs = c(0.025,0.975,0.5)))
groupquant<-inv.logit(groupquant)Plotting
behav$group<-factor(behav$group,levels=c('ctrl.wt','eda.wt','ctrl.sca','eda.sca'))
par(mfrow=c(1,2))
par(mar=c(2,3,0,0))
par(mgp=c(2,0.6,0))
range<-c(0,1);scal<-range[2]-range[1]
xrange<-c(0.5,4.5);xscal=xrange[2]-xrange[1]
plot(NULL,xlim=c(xrange[1],xrange[2]),ylim=c(range[1],range[2]),xlab="",
ylab="Thigmotaxis (proportion per distance)",las=1, axes=FALSE,cex.lab=1)
rect(xrange[1],range[2],xrange[2],range[1],col="grey92",border=NA)
x<-range[1]
repeat{
lines(c(xrange[1],xrange[2]),c(x,x),col="white",lwd=0.7)
x=x+0.1;if(x>range[2]){break}}
x<-1
repeat{
lines(c(x,x),c(range[1],range[2]),col="white",lwd=0.7)
x=x+1;if(x>4.5){break}}
vioplot(behav$thigmo_propdist~behav$group,col=colb,at=c(1:4),add=T,
border=cola,axes=FALSE,drawRect=F,lwd=1,wex=0.8)
beeswarm(behav$thigmo_propdist~behav$group,col=colc,at=c(1:4),add=T,pwpch=(16.5+behav$sex_num) )
tp<-(groupquant[3,])
xx<-1;wid=0.25
repeat{
lines(c(xx-wid,xx+wid),c(tp[xx],tp[xx]),lwd=2,col="black");
lines(c(xx,xx),c(groupquant[1,xx],groupquant[2,xx]),lwd=1.7,col="black")
xx<-xx+1
if(xx>4){break}}
tckk=-0.02
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=0.2),
labels=c(rep("",length(seq(range[1],range[2],by=0.2)))),pos=xrange[1],tck=tckk)
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=0.2),pos=xrange[1],tck=tckk)
axis(side=1,las=1,cex.axis=1.1,at=c(seq(1,4,by=1)),
labels=c(rep("",length(seq(1,4,by=1)))),pos=range[1],tck=tckk)
lines(c(xrange[1],xrange[2]),c(range[1],range[1]))
text(c(1:4),c(rep(range[1]-0.046*scal,4)),xpd=T,cex=1.1,col=cola,
c("Wt","Wt", "SCA1", "SCA1"))
ypo<-c(range[1]-0.1*scal,range[1]-0.1*scal)
text(c(1:4),c(ypo[1],ypo[2],ypo[1],ypo[2]),xpd=T,cex=1.1,col=cola,
c("0","edv", "0", "edv"))
ypos<-0.2;xpos=1.4
points(xpos,ypos,pch=16,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.19*xscal,ypos,"Females",col="grey40",cex=1.2)
points(xpos,ypos-0.08*scal,pch=17,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.19*xscal,ypos-0.08*scal,"Males",col="grey40",cex=1.2)
### probability distributions --------
par(mar=c(2,0,0,0))
dif<-data.frame(post_fix$wt_contrast,post_fix$b_groupeda.sca,
post_fix$b_groupctrl.wt)
dif<-exp(dif)
yla<-"Odds ratio for thigmotaxic distance"
tckk=-0.018
ste<-seq(0.2,2.2,by=0.2)
mons_poste(dif,1)
zpos=seq(0,1,1/3)
xx=1;ind=0.3;xpol<-1.77
text(xpol,zpos[xx]+zpos[2]*ind,
"Wt: ed x 0 ",cex=1)
xx=xx+1
text(xpol,zpos[xx]+zpos[2]*ind,
"SCA1: ed x 0 ",cex=1)
xx=xx+1
text(xpol,zpos[xx]+zpos[2]*ind,
"0: Wt x SCA1",cex=1)7 Thigmotaxis in OF per time
7.1 Data exploration
## set spacing
par(mar=c(5,3.5,1,1))
par(mgp=c(1.9,0.7,0))
par(mfrow=c(3,1))
## plotting
plot(behav$thigmo_proptime ~ behav$group,las=2,xlab="",
col=cola)
plot(behav$thigmo_proptime ~ behav$group_sex,las=2,xlab="",
col=cola[c(1,1,2,2,3,3,4,4)])
hist(behav$thigmo_proptime, 20)No apparent effect of sex. Data distribution has tail around the 0.5 value, expectable for data of proportion concentrated close to 0-1 bound. Thus, beta regression is indicated.
7.2 Modelling
7.2.1 Prior specification
In the study Tichanek et al., time thigmotaxis was not main outcome and was confounded with distance walked. The previous data will not be used for prior
behav$group<-relevel(behav$group,ref='ctrl.sca')
logit(mean(behav$thigmo_proptime))[1] 1.13492
sd(logit(behav$thigmo_proptime))*5[1] 2.894561
Setting prior
prior01 <- c(set_prior("student_t(3,1.13, 2.89)", class = "b", coef = "Intercept"),
set_prior("normal(0, 2)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(0, 2)", class = "b", coef = "groupeda.wt"),
set_prior("normal(0, 1.2)" , class = "b", coef = "groupeda.sca"))7.2.2 Model fit and diagnostics
of_thigmo_time_model_01<-run_model(
brm(thigmo_proptime ~0+Intercept+group,
data=behav, prior = prior01,
family=Beta(link="logit", link_phi="log"),
save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=1,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/of_thigmo_time_model_01', reuse = TRUE)
mcmc_trace(of_thigmo_time_model_01)summary(of_thigmo_time_model_01) Family: beta
Links: mu = logit; phi = identity
Formula: thigmo_proptime ~ 0 + Intercept + group
Data: behav (Number of observations: 80)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 1.09 0.12 0.86 1.32 1.00 3316 5084
groupctrl.wt 0.00 0.17 -0.32 0.34 1.00 4076 5473
groupeda.wt -0.02 0.17 -0.36 0.31 1.00 4083 5731
groupeda.sca 0.22 0.17 -0.11 0.57 1.00 3951 6202
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
phi 17.41 2.80 12.39 23.28 1.00 6795 7222
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Model converged and ESS are sufficient
7.2.3 PPC
pp_check(of_thigmo_time_model_01,type='dens_overlay',ndraws = 50)pp_check(of_thigmo_time_model_01,type='dens_overlay_grouped',ndraws = 50,group='group') pp_check(of_thigmo_time_model_01,type='scatter_avg') Using all posterior draws for ppc type 'scatter_avg' by default.
pp_check(of_thigmo_time_model_01,type="stat_2d", stat = c("max", "min"),ndraws=200)pp_check(of_thigmo_time_model_01,type="stat_2d", stat = c("mean", "sd"),ndraws=200)Model predicts the data well
7.3 Posterior draws extraction and vizualisation
Posterior extraction
post_fix<-as.data.frame(of_thigmo_time_model_01, variable = c("b_Intercept","b_groupctrl.wt",
"b_groupeda.wt","b_groupeda.sca"))
names(post_fix)[1]<-"sca_ctrl"
post_fix$wt_ctrl<-post_fix$sca_ctrl+post_fix$b_groupctrl.wt
post_fix$wt_eda<-post_fix$sca_ctrl+post_fix$b_groupeda.wt
post_fix$sca_eda<-post_fix$sca_ctrl+post_fix$b_groupeda.sca
post_fix$wt_contrast<-post_fix$wt_eda-post_fix$wt_ctrl
summary(post_fix) sca_ctrl b_groupctrl.wt b_groupeda.wt b_groupeda.sca
Min. :0.5749 Min. :-0.638285 Min. :-0.72352 Min. :-0.5474
1st Qu.:1.0055 1st Qu.:-0.109775 1st Qu.:-0.13585 1st Qu.: 0.1029
Median :1.0828 Median : 0.001793 Median :-0.02158 Median : 0.2205
Mean :1.0853 Mean : 0.003648 Mean :-0.02241 Mean : 0.2206
3rd Qu.:1.1642 3rd Qu.: 0.114611 3rd Qu.: 0.09074 3rd Qu.: 0.3354
Max. :1.5748 Max. : 0.665536 Max. : 0.65074 Max. : 0.9281
wt_ctrl wt_eda sca_eda wt_contrast
Min. :0.6423 Min. :0.5208 Min. :0.7265 Min. :-0.72352
1st Qu.:1.0066 1st Qu.:0.9816 1st Qu.:1.2181 1st Qu.:-0.13826
Median :1.0871 Median :1.0599 Median :1.3023 Median :-0.02629
Mean :1.0889 Mean :1.0628 Mean :1.3058 Mean :-0.02606
3rd Qu.:1.1692 3rd Qu.:1.1410 3rd Qu.:1.3914 3rd Qu.: 0.08764
Max. :1.6426 Max. :1.5603 Max. :1.8698 Max. : 0.63216
groupquant<-sapply(post_fix[,c(5,6,1,7)],function(p) quantile(p, probs = c(0.025,0.975,0.5)))
groupquant<-inv.logit(groupquant)Vizualisation
## ordering of groups
behav$group<-factor(behav$group,levels=c('ctrl.wt','eda.wt','ctrl.sca','eda.sca'))
## plot spacing
par(mfrow=c(1,2))
par(mar=c(2,3,0,0))
par(mgp=c(2,0.6,0))
## scales setting
range<-c(0,1);scal<-range[2]-range[1]
xrange<-c(0.5,4.5);xscal=xrange[2]-xrange[1]
## plottting
plot(NULL,xlim=c(xrange[1],xrange[2]),ylim=c(range[1],range[2]),xlab="",
ylab="Thigmotaxis (proportion per time)",las=1, axes=FALSE,cex.lab=1)
## theming
rect(xrange[1],range[2],xrange[2],range[1],col="grey92",border=NA)
x<-range[1]
repeat{
lines(c(xrange[1],xrange[2]),c(x,x),col="white",lwd=0.7)
x=x+0.1;if(x>range[2]){break}}
x<-1
repeat{
lines(c(x,x),c(range[1],range[2]),col="white",lwd=0.7)
x=x+1;if(x>4.5){break}}
## data distribution across groups and showing datapoints
vioplot(behav$thigmo_proptime~behav$group,col=colb,at=c(1:4),add=T,
border=cola,axes=FALSE,drawRect=F,lwd=1,wex=0.8)
beeswarm(behav$thigmo_proptime~behav$group,col=colc,at=c(1:4),add=T,pwpch=(16.5+behav$sex_num) )
## group-specific prediction and CI
tp<-(groupquant[3,])
xx<-1;wid=0.25
repeat{
lines(c(xx-wid,xx+wid),c(tp[xx],tp[xx]),lwd=2,col="black");
lines(c(xx,xx),c(groupquant[1,xx],groupquant[2,xx]),lwd=1.7,col="black")
xx<-xx+1
if(xx>4){break}}
## axes
tckk=-0.02
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=0.2),
labels=c(rep("",length(seq(range[1],range[2],by=0.2)))),pos=xrange[1],tck=tckk)
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=0.2),pos=xrange[1],tck=tckk)
axis(side=1,las=1,cex.axis=1.1,at=c(seq(1,4,by=1)),
labels=c(rep("",length(seq(1,4,by=1)))),pos=range[1],tck=tckk)
lines(c(xrange[1],xrange[2]),c(range[1],range[1]))
## labelling
text(c(1:4),c(rep(range[1]-0.046*scal,4)),xpd=T,cex=1.1,col=cola,
c("Wt","Wt", "SCA1", "SCA1"))
ypo<-c(range[1]-0.1*scal,range[1]-0.1*scal)
text(c(1:4),c(ypo[1],ypo[2],ypo[1],ypo[2]),xpd=T,cex=1.1,col=cola,
c("0","edv", "0", "edv"))
ypos<-0.2;xpos=1.4
points(xpos,ypos,pch=16,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.19*xscal,ypos,"Females",col="grey40",cex=1.2)
points(xpos,ypos-0.08*scal,pch=17,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.19*xscal,ypos-0.08*scal,"Males",col="grey40",cex=1.2)
## probability distributions
par(mar=c(2,0,0,0))
dif<-data.frame(post_fix$wt_contrast,post_fix$b_groupeda.sca,
post_fix$b_groupctrl.wt)
dif<-exp(dif)
yla<-"Odds ratio for thigmotaxic time"
tckk=-0.018
ste<-seq(0.2,4,by=0.4)
mons_poste(dif,1)
zpos=seq(0,1,1/3)
xx=1;ind=0.3;xpol<-2.2
text(xpol,zpos[xx]+zpos[2]*ind,
"Wt: ed x 0 ",cex=1)
xx=xx+1
text(xpol,zpos[xx]+zpos[2]*ind,
"SCA1: ed x 0 ",cex=1)
xx=xx+1
text(xpol,zpos[xx]+zpos[2]*ind,
"0: Wt x SCA1",cex=1)