rm(list = ls())
suppressWarnings(suppressMessages( {
library(brms)
library(beeswarm)
library(vioplot)
library(glmmTMB)
library(car)
library(cowplot)
library(ggplot2)
library(tidyverse)
library(bayesplot)
library(gtsummary)
library(vegan)
library(knitr)
library(ggplot2)
} ) )Experimental treatment with edaravone in a mouse model of spinocerebellar ataxia 1
Calbindin immunofluorescence intensity
This page shows R code for the study Sucha et al. (2023, International Journal of Molecular Science).
Citation:
Sucha M, Benediktova S, Tichanek F, Jedlicka J, Kapl S, Jelinkova D, Purkartova Z, Tuma J, Kuncova J, Cendelin J. Experimental Treatment with Edaravone in a Mouse Model of Spinocerebellar Ataxia 1. International Journal of Molecular Sciences. 2023; 24(13):10689. https://doi.org/10.3390/ijms241310689
GitHub page: https://github.com/filip-tichanek/edaravonSCA1
1 Upload of packages
2 Data upload and wrangling
## Data upload
load("source_data/myEnvironment.RData")
## Defining new variables
calb <- calb %>% rename(reg = CbRegion.vermis_hemispheres.)
calb$group_region <- interaction(calb$reg, calb$group)
levels(calb$group_region)[1] "hemispheres.ctrl.wt" "vermis.ctrl.wt" "hemispheres.ctrl.sca"
[4] "vermis.ctrl.sca" "hemispheres.eda.sca" "vermis.eda.sca"
#calb$reg <- ifelse(calb$reg == 'hemispheres', 0.5, -0.5)
calb$mean_gran <- scale(calb$mean_gran)
calb$group_region <- interaction(calb$group, calb$reg)
## re-ordering factor levels
calb$group<-relevel(calb$group,ref='ctrl.sca')
## data summary
summary(calb) id treatment genotype sex slice order
15 : 80 ctrl:596 sca:580 f:443 Min. : 1.000 A:140
29 : 78 eda :282 wt :298 m:435 1st Qu.: 4.000 B:129
9 : 77 Median : 8.000 C:162
36 : 77 Mean : 7.548 D:161
20 : 76 3rd Qu.:11.000 E:150
11 : 73 Max. :15.000 F:136
(Other):417
reg mean area slice_id
hemispheres:610 Min. :13.97 Min. : 46036 29.1 : 6
vermis :268 1st Qu.:30.03 1st Qu.:192086 15.2 : 6
Median :35.36 Median :297077 19.2 : 6
Mean :36.53 Mean :312565 29.2 : 6
3rd Qu.:41.88 3rd Qu.:419212 13.3 : 6
Max. :62.38 Max. :868452 15.3 : 6
(Other):842
group sex_num group_sex mean_gran.V1
ctrl.sca:298 Min. :-0.500000 ctrl.wt.f :151 Min. :-2.428296
ctrl.wt :298 1st Qu.:-0.500000 ctrl.wt.m :147 1st Qu.:-0.733368
eda.sca :282 Median :-0.500000 ctrl.sca.f:156 Median :-0.054692
Mean :-0.004556 ctrl.sca.m:142 Mean : 0.000000
3rd Qu.: 0.500000 eda.sca.f :136 3rd Qu.: 0.624205
Max. : 0.500000 eda.sca.m :146 Max. : 3.802883
NA's :10
group_region
ctrl.wt.hemispheres :210
ctrl.sca.hemispheres:205
eda.sca.hemispheres :195
ctrl.wt.vermis : 88
ctrl.sca.vermis : 93
eda.sca.vermis : 87
3 Data exploration
Does brain regions differ in the calbindin immunofluorescence intensity (Calb IF) in molecular layer of the cerebellum? Does sexes differ?
## setting spacing
par(mfrow=c(3,2))
par(mar=c(7,3.5,1,1))
par(mgp=c(2,0.6,0))
plot(calb$mean ~ calb$group_region,
col=cola[c(1,1,3,3,4,4)],cex.axis=0.8, las=2, xlab='')
plot(calb$mean ~ calb$group_sex,
col=cola[c(1,1,3,3,4,4)],cex.axis=0.8, las=2, xlab='')
plot(calb$mean_gran ~ calb$group_region,
col=cola[c(1,1,3,3,4,4)],cex.axis=0.8, las=2, xlab='')
plot(calb$mean_gran ~ calb$group_sex,
col=cola[c(1,1,3,3,4,4)],cex.axis=0.8, las=2, xlab='')
hist(calb[calb$reg == 'vermis',]$mean,20)
hist(calb[calb$reg == 'hemispheres',]$mean,20)Calb IF seems smaller in vermis and in males. The same patters are also seen in granular layers (GL). Distribution is slightly asymmetric and cannot go belong 0. Thus, gamma regression will be compared with Gaussian model
Is there correlation between GL and ML calb IF within individuals?
calb %>% ggplot(aes(x = mean_gran, y=mean, col=reg)) +
geom_point(alpha = 0.9)+
facet_wrap(~id) +
labs(x = "calb IF in GL", y = "calb IF in ML")Warning: Removed 10 rows containing missing values (`geom_point()`).
The correlation is seen in most individuals. Both covariates that vary across individuals (calb IF in GL and region) will thus be included as a covariate. Due to small sample size, we will not include sex in default, but the model with sex will be compared with this without sex.
4 Modelling
We will use gamma regression, with above-mentioned covariates. As there are multiple measurements per slice and region (4 for hemisphere), we will used hierarchical model with random intercepts of slice nested in individual
4.1 Prior specification
Summary statistics for prior
## gamma
log(mean(calb$mean))[1] 3.598145
sd(log(calb$mean))*5[1] 1.244363
## Gaussian
mean(calb$mean)[1] 36.53042
sd(calb$mean)*c(5, 2, 1.2)[1] 44.44635 17.77854 10.66713
sd(calb$mean_gran, na.rm=TRUE)*1.2[1] 1.2
Setting priors
calb$group<-relevel(calb$group,ref='ctrl.sca')
## Gaussian
prior01 <- c(set_prior("student_t(3, 37, 44)", class = "b", coef = "Intercept"),
set_prior("normal(0,18)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(0, 11)" , class = "b", coef = "groupeda.sca"),
set_prior("normal(0, 11)", class = "b", coef = "regvermis"),
set_prior("normal(0, 5.5)", class = "b", coef = "mean_gran"))
## gamma
prior2 <- c(set_prior("student_t(3, 3.6, 1.24)", class = "b", coef = "Intercept"),
set_prior("normal(0, 2)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(0, 1.2)", class = "b", coef = "groupeda.sca"),
set_prior("normal(0 ,1.2)", class = "b", coef = "regvermis"),
set_prior("normal(0, 1.2)", class = "b", coef = "mean_gran"))4.2 Models fitting and diagnostics
## Gaussian
calb_model1 <- run_model(
brm(mean~0+Intercept+group+reg + mean_gran + (1|id/slice_id),
data=calb, prior = prior01,
family=gaussian(),
save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=2,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/calb_model1', reuse = TRUE)
## Gamma
calb_model2 <- run_model(
brm(mean ~0+Intercept+group+reg+ mean_gran + (1|id/slice_id),
data=calb, prior = prior2,
family=Gamma(link="log"),
save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=2,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/calb_model2', reuse = TRUE)
## chains mixing and convergence
mcmc_trace(calb_model1, pars = c('b_Intercept',
'b_groupctrl.wt',
'b_groupeda.sca'))mcmc_trace(calb_model2, pars = c('b_Intercept',
'b_groupctrl.wt',
'b_groupeda.sca'))## model summary
summary(calb_model1) Family: gaussian
Links: mu = identity; sigma = identity
Formula: mean ~ 0 + Intercept + group + reg + mean_gran + (1 | id/slice_id)
Data: calb (Number of observations: 868)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Group-Level Effects:
~id (Number of levels: 12)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 8.04 1.97 5.16 12.73 1.00 4465 6934
~id:slice_id (Number of levels: 168)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 1.34 0.28 0.73 1.85 1.00 2855 2857
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 36.00 3.80 28.52 43.59 1.00 3011 4955
groupctrl.wt 4.27 5.35 -6.50 14.70 1.00 3642 5494
groupeda.sca 1.13 4.95 -8.60 10.94 1.00 3820 5755
regvermis -3.90 0.35 -4.59 -3.23 1.00 16240 9706
mean_gran 1.30 0.20 0.91 1.68 1.00 13563 9507
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 4.35 0.12 4.13 4.60 1.00 6804 8215
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
summary(calb_model2) Family: gamma
Links: mu = log; shape = identity
Formula: mean ~ 0 + Intercept + group + reg + mean_gran + (1 | id/slice_id)
Data: calb (Number of observations: 868)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Group-Level Effects:
~id (Number of levels: 12)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.23 0.07 0.14 0.40 1.00 3252 4928
~id:slice_id (Number of levels: 168)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.03 0.01 0.00 0.05 1.00 1888 2514
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 3.56 0.12 3.33 3.80 1.00 2217 3565
groupctrl.wt 0.12 0.17 -0.22 0.46 1.00 2420 3834
groupeda.sca 0.03 0.17 -0.31 0.36 1.00 2369 3566
regvermis -0.12 0.01 -0.14 -0.10 1.00 14103 9449
mean_gran 0.04 0.01 0.03 0.05 1.00 13372 9395
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 57.26 3.11 51.45 63.63 1.00 6199 7982
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Both models converged well.
4.3 PPC
pla<-pp_check(calb_model1,type='dens_overlay',ndraws = 50)
plb<-pp_check(calb_model2,type='dens_overlay',ndraws = 50)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(calb_model1,type='dens_overlay_grouped',ndraws = 50,group='group')
plb<-pp_check(calb_model2,type='dens_overlay_grouped',ndraws = 50,group='group')
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(calb_model1,type='scatter_avg') Using all posterior draws for ppc type 'scatter_avg' by default.
plb<-pp_check(calb_model2,type='scatter_avg') Using all posterior draws for ppc type 'scatter_avg' by default.
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(calb_model1,type="stat_2d", stat = c("max", "min"),ndraws=200)
plb<-pp_check(calb_model2,type="stat_2d", stat = c("max", "min"),ndraws=200)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)pla<-pp_check(calb_model1,type="stat_2d", stat = c("mean", "sd"),ndraws=200)
plb<-pp_check(calb_model2,type="stat_2d", stat = c("mean", "sd"),ndraws=200)
plot_grid(pla, plb, labels=c("A", "B"), ncol = 1, nrow = 2)### Are there too influential observations?
par(mfrow=c(1,2))
plot(loo(calb_model1));loo(calb_model1)
Computed from 12000 by 868 log-likelihood matrix
Estimate SE
elpd_loo -2542.2 21.0
p_loo 65.1 3.1
looic 5084.3 42.0
------
Monte Carlo SE of elpd_loo is 0.1.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
plot(loo(calb_model2));loo(calb_model2)
Computed from 12000 by 868 log-likelihood matrix
Estimate SE
elpd_loo -2597.7 23.1
p_loo 44.2 2.4
looic 5195.4 46.1
------
Monte Carlo SE of elpd_loo is 0.1.
All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.
No serious issues detected. Gaussian model shows suprisingly better properties and fits the data better. Gaussian likelihood thus will be used.
What about inclusion of sex?
## prior
prior03 <- c(set_prior("student_t(3, 37, 44)", class = "b", coef = "Intercept"),
set_prior("normal(0,18)", class = "b", coef = "groupctrl.wt"),
set_prior("normal(0, 11)" , class = "b", coef = "groupeda.sca"),
set_prior("normal(0, 11)", class = "b", coef = "regvermis"),
set_prior("normal(0, 5.5)", class = "b", coef = "mean_gran"),
set_prior("normal(0, 11)", class = "b", coef = "sex_num"))
## model fit
calb_model3 <- run_model(
brm(mean~0+Intercept+group+reg + mean_gran +
sex_num + (1|id/slice_id),
data=calb, prior = prior03,
family=gaussian(),
save_pars = save_pars(all = TRUE),
iter=8000, warmup=2000,chains=2,cores=2,seed=17,
control = list(adapt_delta = 0.99)),
'output/brm/calb_model3', reuse = TRUE)
summary(calb_model3) Family: gaussian
Links: mu = identity; sigma = identity
Formula: mean ~ 0 + Intercept + group + reg + mean_gran + sex_num + (1 | id/slice_id)
Data: calb (Number of observations: 868)
Draws: 2 chains, each with iter = 8000; warmup = 2000; thin = 1;
total post-warmup draws = 12000
Group-Level Effects:
~id (Number of levels: 12)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 5.02 1.43 3.00 8.48 1.00 4217 6240
~id:slice_id (Number of levels: 168)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 1.34 0.29 0.72 1.87 1.00 2802 3254
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 35.85 2.48 30.99 40.88 1.00 4511 5985
groupctrl.wt 4.59 3.57 -2.68 11.58 1.00 4359 5414
groupeda.sca 1.41 3.37 -5.23 8.21 1.00 4926 6063
regvermis -4.04 0.36 -4.74 -3.34 1.00 17890 10043
mean_gran 1.26 0.20 0.86 1.65 1.00 14628 10471
sex_num -10.17 2.99 -15.79 -3.94 1.00 5241 6103
Family Specific Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 4.35 0.12 4.12 4.59 1.00 8165 7650
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(calb_model3,type='dens_overlay',ndraws = 50)pp_check(calb_model3,type='dens_overlay_grouped',ndraws = 50,group='group') pp_check(calb_model3,type='scatter_avg') Using all posterior draws for ppc type 'scatter_avg' by default.
pp_check(calb_model3,type="stat_2d", stat = c("max", "min"),ndraws=200)pp_check(calb_model3,type="stat_2d", stat = c("mean", "sd"),ndraws=200)Rhat , ESS and PPC are OK
Lets to compare the predictive accuracy of model with sex
4.4 Models comparison
calb_model1 <- add_criterion(calb_model1, criterion = "loo")
calb_model3 <- add_criterion(calb_model3, criterion = "loo")
loo_compare(calb_model1, calb_model3) elpd_diff se_diff
calb_model1 0.0 0.0
calb_model3 -0.2 0.5
There is not evidence of improved accuracy via inclusion of sex, possibly due to inclusion of the covariate Calbindin IF - GL which was also different for males and females and arelady provides some adjustment. Model without sex will be used as the final model.
4.5 Posterior draws extraction
post_fix_calb <- as.data.frame(calb_model1, variable = c("b_Intercept","b_groupctrl.wt",
"b_groupeda.sca"), )
names(post_fix_calb)[1]<-"sca_ctrl"
post_fix_calb$wt_ctrl<-post_fix_calb$sca_ctrl+post_fix_calb$b_groupctrl.wt
post_fix_calb$sca_eda<-post_fix_calb$sca_ctrl+post_fix_calb$b_groupeda.sca
summary(post_fix_calb) sca_ctrl b_groupctrl.wt b_groupeda.sca wt_ctrl
Min. :13.62 Min. :-19.9196 Min. :-20.032 Min. :20.48
1st Qu.:33.55 1st Qu.: 0.8546 1st Qu.: -2.039 1st Qu.:37.73
Median :35.95 Median : 4.2531 Median : 1.130 Median :40.25
Mean :36.00 Mean : 4.2696 Mean : 1.129 Mean :40.27
3rd Qu.:38.42 3rd Qu.: 7.7336 3rd Qu.: 4.382 3rd Qu.:42.78
Max. :53.62 Max. : 29.0853 Max. : 22.791 Max. :56.71
sca_eda
Min. :21.41
1st Qu.:34.68
Median :37.14
Mean :37.13
3rd Qu.:39.53
Max. :54.66
groupquant_calb <- sapply(post_fix_calb[,c(4,1,5)],
function(p) quantile(p, probs = c(0.025,0.975,0.5)))5 Data and model visualization
In data visualization, individual points represent average per subject. Violin plot (area indicating data distribution) was, in contrast, constructed on the basis of individual data points (non-averaged)
suppressWarnings(suppressMessages( {
range<-c(0,70);scal<-range[2]-range[1]
par(mfrow=c(1,2))
par(mar=c(2,3,0,0))
par(mgp=c(2,0.6,0))
calb$group<-factor(calb$group,levels=c('ctrl.wt','ctrl.sca','eda.sca'))
xrange<-c(0.5,3.5);xscal=xrange[2]-xrange[1]
plot(NULL,xlim=c(xrange[1],xrange[2]),ylim=c(range[1],range[2]),xlab="",
ylab="Calbindin IF in Cb-ML",las=1, axes=FALSE,cex.lab=1.1)
rect(xrange[1],range[2],xrange[2],range[1],col="grey92",border=NA)
x<-range[1]
repeat{
lines(c(xrange[1],xrange[2]),c(x,x),col="white",lwd=0.7)
x=x+10;if(x>range[2]){break}}
x<-1
repeat{
lines(c(x,x),c(range[1],range[2]),col="white",lwd=0.7)
x=x+1;if(x>3.5){break}}
vioplot(calb$mean~calb$group,col=colb[-2],at=c(1:4),add=T,
border=cola[-2],axes=FALSE,drawRect=F,lwd=1,wex=0.8)
calb_sum <- calb %>% group_by(id, group, sex_num) %>% summarize(mean = mean(mean))
beeswarm(calb_sum$mean~calb_sum$group,col=cola[-2],at=c(1:3),
add=T,pwpch=(16.5+calb_sum$sex_num), cex=1.2 )
tp<-(groupquant_calb[3,])
xx<-1;wid=0.25
repeat{
lines(c(xx-wid,xx+wid),c(tp[xx],tp[xx]),lwd=2,col="black");
lines(c(xx,xx),c(groupquant_calb[1,xx],groupquant_calb[2,xx]),lwd=1.7,col="black")
xx<-xx+1
if(xx>3){break}}
tckk=-0.02
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=10),
labels=c(rep("",length(seq(range[1],range[2],by=10)))),pos=xrange[1],tck=tckk)
axis(2,las=2,cex.axis=1.1,at=seq(range[1],range[2],by=10),pos=xrange[1],tck=tckk)
axis(side=1,las=1,cex.axis=1.1,at=c(seq(1,4,by=1)),
labels=c(rep("",length(seq(1,4,by=1)))),pos=range[1],tck=tckk)
lines(c(xrange[1],xrange[2]),c(range[1],range[1]))
text(c(1:3),c(rep(range[1]-0.046*scal,3)),xpd=T,cex=1.1,col=cola[-2],
c("Wt", "SCA1", "SCA1"))
ypo<-c(range[1]-0.1*scal,range[1]-0.1*scal)
text(c(1:4),c(ypo[1],ypo[2],ypo[1],ypo[2]),xpd=T,cex=1.1,col=cola[-2],
c("0", "0", "edv"))
ypos<-12; xpos=1
points(xpos,ypos,pch=16,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.23*xscal,ypos,"Females",col="grey40",cex=1.2)
points(xpos,ypos-0.08*scal,pch=17,cex=1.4,col=rgb(0.4,0.4,0.4,alpha=0.6))
text(xpos+0.23*xscal,ypos-0.08*scal,"Males",col="grey40",cex=1.2)
### probability distributions --------
par(mar=c(2.2,1,0,0))
dif<-data.frame(post_fix_calb$b_groupeda.sca,
post_fix_calb$b_groupctrl.wt)
dif<-(dif)
yla<-"Difference in Calb IF"
tckk=-0.018
ste<-seq(-30, 30,by=10)
mons_poste(dif,0)
zpos=seq(-0.05,1,1/2)
xx=1;ind=0.6;xpol<- -15
text(xpol,zpos[xx]+zpos[2]*ind,
"SCA1: ed x 0 ",cex=1)
xx=xx+1
text(xpol,zpos[xx]+zpos[2]*ind,
"0: Wt x SCA1",cex=1)
} ) )