library(foreach)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ purrr   0.2.4
## ✔ tibble  1.4.2     ✔ dplyr   0.7.4
## ✔ tidyr   0.8.0     ✔ stringr 1.3.0
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::accumulate() masks foreach::accumulate()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ purrr::when()       masks foreach::when()
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.17.3, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## 
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
## 
##     extract
if("package:org.Hs.eg.db" %in% search()) detach("package:org.Hs.eg.db", unload=TRUE, force=TRUE)
if("package:hgu133plus2.db" %in% search()) detach("package:hgu133plus2.db", unload=TRUE, force=TRUE)
if("package:hgu95av2.db" %in% search()) detach("package:hgu95av2.db", unload=TRUE, force=TRUE)
if("package:illuminaHumanv4.db" %in% search()) detach("package:illuminaHumanv4.db", unload=TRUE, force=TRUE)
set.seed(123)

The numerical generative process - simulated data

simulate_data = function(delta_magnitude = 4){

# number of genes
n_genes = 100

# number of changing genes
changing_genes = round(n_genes * 0.3)

# Gene true value values baseline
baseline = rnbinom(n_genes, mu = 1000, size = 5)

# Gene true value values baseline
treated = c(baseline[1:(n_genes-changing_genes-1)],  baseline[(n_genes-changing_genes) : n_genes] * delta_magnitude)

# numbe rof samples
n_samples = 30

# Covariates
cov = sample(0:1, size = n_samples, replace = T)

# Produce tissue samples
y_real = foreach(n = 1:n_samples, .combine=bind_cols) %do% {
    tibble(
        switch(
            (cov[n] == 0) + 1,
            rnbinom(n_genes, mu = treated, size = 1000),
            rnbinom(n_genes, mu = baseline, size = 1000)
        ) 
    )
} %>%
    setNames(sprintf("s%s", 1:n_samples)) %>%
    mutate_if(is.numeric, as.integer)

# produce probability values
y_prob = foreach(n = 1:n_samples, .combine=bind_cols) %do% { y_real[,n]/sum(y_real[,n]) } %>% as_tibble()

# sample from multinomial
y_observed = foreach(n = 1:n_samples, .combine=bind_cols) %do% { 
    tibble(as.numeric(rmultinom(n = 1, prob = y_prob %>% pull(n), size = 10000000)) )  
} %>%
    setNames(colnames(y_real)) %>%
    mutate_if(is.numeric, as.integer)

    list(
        G =                           nrow(y_observed),
        T =                           ncol(y_observed),
        F =                           n_genes - changing_genes,
        y =                           t(y_observed),
        cond =                        cov,
        y_prob =                      y_prob,
        baseline =                    baseline,
        treated =                     treated
    )

} 

Simple model

I have done few tests on the new gene expression model. Let’s start defining the simulated data from the numerical generating process.

The data simulation

m1 = "
    data {
      int<lower = 0> G;                   // all genes
      int<lower = 0> T;                   // tube
      int<lower = 0> y[T, G];             // RNA-seq counts
    }
    parameters {
        simplex[G] phi;
    }
    model {
      for (t in 1:T) y[t] ~ multinomial(phi);
    }
"
data = simulate_data (1)
    
fit = rstan:::stan(
    model_code = m1, 
    data= data,
    iter=                             500,
    chains = 4,
    cores = 4
)
## In file included from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config.hpp:39,
##                  from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/math/tools/config.hpp:13,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/var.hpp:7,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core.hpp:12,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/mat.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/src/stan/model/model_header.hpp:4,
##                  from file14ec86446eebc.cpp:8:
## /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config/compiler/gcc.hpp:186:1: warning: "BOOST_NO_CXX11_RVALUE_REFERENCES" redefined
## <command-line>: warning: this is the location of the previous definition

The inference as expected is successful

plot(summary(fit, "phi")$summary[,1], data$baseline/sum(data$baseline))

Inserting softmax(alpha)

m2 = "data {
      int<lower = 0> G;                   // all genes
      int<lower = 0> T;                   // tube
      int<lower = 0> y[T, G];             // RNA-seq counts
    }
    parameters {
      vector[G] alpha;  // control log relative expression
    }
    transformed parameters{
        vector[G] phi;
      for(t in 1:T) phi = softmax( alpha ); 
    }
    model {
      for (t in 1:T) y[t] ~ multinomial(phi);
      alpha ~ normal(0, 5);    // log relative expression in control
      sum(alpha) ~ normal(0, 0.0001 * G);  // soft identification of control
    }
    "
data = simulate_data (1)
    
fit = rstan:::stan(
    model_code = m2, 
    data= data,
    iter=   500,
    chains = 4,
    cores = 4
)
## In file included from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config.hpp:39,
##                  from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/math/tools/config.hpp:13,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/var.hpp:7,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core.hpp:12,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/mat.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/src/stan/model/model_header.hpp:4,
##                  from file17539634c0959.cpp:8:
## /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config/compiler/gcc.hpp:186:1: warning: "BOOST_NO_CXX11_RVALUE_REFERENCES" redefined
## <command-line>: warning: this is the location of the previous definition
plot(summary(fit, "phi")$summary[,1], data$baseline/sum(data$baseline))

traceplot(fit, sprintf("alpha[%s]", 1:10) )

rstan::summary(fit, "alpha")$summary
##                   mean      se_mean           sd        2.5%         25%
## alpha[1]    0.02524969 1.956917e-05 0.0006188316  0.02406258  0.02483113
## alpha[2]    0.19116635 1.779566e-05 0.0005627482  0.18999891  0.19083819
## alpha[3]    1.06569769 1.117117e-05 0.0003532633  1.06500227  1.06544890
## alpha[4]   -0.01283520 1.932730e-05 0.0006111829 -0.01407415 -0.01324483
## alpha[5]   -0.02107187 1.953132e-05 0.0006176344 -0.02225096 -0.02150341
## alpha[6]   -0.11666816 2.001209e-05 0.0006328379 -0.11787438 -0.11709446
## alpha[7]    0.37716742 1.756481e-05 0.0005554480  0.37610491  0.37679030
## alpha[8]    0.12497996 1.798258e-05 0.0005686590  0.12387976  0.12456415
## alpha[9]   -0.76345940 2.652987e-05 0.0008389481 -0.76512291 -0.76402425
## alpha[10]  -0.37875721 2.243371e-05 0.0007094163 -0.38018903 -0.37924074
## alpha[11]   0.66116250 1.528383e-05 0.0004833170  0.66025488  0.66082801
## alpha[12]  -0.27941300 2.260234e-05 0.0007147488 -0.28078146 -0.27987337
## alpha[13]   0.61720783 1.466971e-05 0.0004638968  0.61631937  0.61688857
## alpha[14]   0.18635191 1.778618e-05 0.0005624484  0.18528742  0.18599932
## alpha[15]   0.44799536 1.501076e-05 0.0004746820  0.44705692  0.44769595
## alpha[16]   0.11606460 1.841776e-05 0.0005824206  0.11495822  0.11563898
## alpha[17]  -0.47639174 2.340557e-05 0.0007401492 -0.47786731 -0.47689390
## alpha[18]  -0.73155078 2.758270e-05 0.0008722415 -0.73324778 -0.73217021
## alpha[19]  -0.13423272 2.029852e-05 0.0006418956 -0.13544723 -0.13470495
## alpha[20]   0.57902478 1.463176e-05 0.0004626970  0.57811822  0.57872882
## alpha[21]  -0.10878464 2.027031e-05 0.0006410036 -0.10999651 -0.10924168
## alpha[22]  -0.33859134 2.193556e-05 0.0006936634 -0.33991527 -0.33908451
## alpha[23]   0.65264840 1.370799e-05 0.0004334846  0.65180144  0.65235395
## alpha[24]  -0.53612288 2.526775e-05 0.0007990365 -0.53768274 -0.53662758
## alpha[25]  -0.19185037 2.167437e-05 0.0006854037 -0.19315679 -0.19230985
## alpha[26]   0.10714181 1.881674e-05 0.0005950377  0.10602078  0.10672611
## alpha[27]   0.66121436 1.446143e-05 0.0004573105  0.66029737  0.66091876
## alpha[28]   0.34255413 1.644000e-05 0.0005198784  0.34154035  0.34218072
## alpha[29]   0.48835988 1.565808e-05 0.0004951519  0.48738986  0.48803723
## alpha[30]  -0.10747656 2.104026e-05 0.0006653514 -0.10877826 -0.10794323
## alpha[31]   0.31885432 1.547368e-05 0.0004893207  0.31785591  0.31853476
## alpha[32]  -1.27228912 3.934643e-05 0.0010942311 -1.27435525 -1.27303163
## alpha[33]   0.40090617 1.592053e-05 0.0005034513  0.39998006  0.40056361
## alpha[34]   0.13301902 1.740739e-05 0.0005504701  0.13188268  0.13265854
## alpha[35]   0.21649680 1.782392e-05 0.0005636419  0.21541976  0.21612798
## alpha[36]  -0.47164575 2.492000e-05 0.0007880395 -0.47323113 -0.47217239
## alpha[37]  -0.30771745 2.352735e-05 0.0007440002 -0.30922045 -0.30819679
## alpha[38]   0.56035304 1.493053e-05 0.0004721447  0.55945548  0.56004414
## alpha[39]  -0.17946576 2.032884e-05 0.0006428545 -0.18074134 -0.17989966
## alpha[40]  -0.12658675 2.106146e-05 0.0006660219 -0.12788633 -0.12702253
## alpha[41]  -0.32762930 2.290712e-05 0.0007243867 -0.32900547 -0.32812344
## alpha[42]   0.15416654 1.814920e-05 0.0005739281  0.15308347  0.15377956
## alpha[43]  -0.37620448 2.373592e-05 0.0007505958 -0.37767490 -0.37672331
## alpha[44]  -0.19545293 2.093166e-05 0.0006619171 -0.19678048 -0.19586226
## alpha[45]   0.72111984 1.337190e-05 0.0004228566  0.72033780  0.72081046
## alpha[46]  -0.07842809 1.982092e-05 0.0006267926 -0.07960479 -0.07885374
## alpha[47]  -0.44520492 2.337930e-05 0.0007393182 -0.44668067 -0.44571742
## alpha[48]  -0.10321084 2.010858e-05 0.0006358892 -0.10448635 -0.10363183
## alpha[49]  -0.10893012 2.098341e-05 0.0006635538 -0.11021243 -0.10937881
## alpha[50]   0.31950986 1.738654e-05 0.0005498106  0.31847330  0.31911666
## alpha[51]  -0.45986229 2.590380e-05 0.0008191502 -0.46147772 -0.46040723
## alpha[52]  -0.59944629 2.454292e-05 0.0007761154 -0.60105271 -0.59994095
## alpha[53]   0.28437536 1.702432e-05 0.0005383562  0.28331300  0.28402480
## alpha[54]  -0.30443349 2.274675e-05 0.0007193155 -0.30581099 -0.30491238
## alpha[55]  -0.52805129 2.660056e-05 0.0008411836 -0.52970845 -0.52865314
## alpha[56]   0.36267994 1.758815e-05 0.0005561860  0.36153997  0.36230486
## alpha[57]  -0.79271994 2.875428e-05 0.0009092903 -0.79457544 -0.79333184
## alpha[58]   0.28712657 1.709024e-05 0.0005404410  0.28606097  0.28676716
## alpha[59]   0.07204108 1.762214e-05 0.0005572610  0.07097873  0.07166208
## alpha[60]   0.07784833 1.849997e-05 0.0005850206  0.07669257  0.07745217
## alpha[61]   0.27507504 1.724316e-05 0.0005452765  0.27405674  0.27470982
## alpha[62]   0.28847707 1.701164e-05 0.0005379553  0.28745579  0.28813821
## alpha[63]  -0.21760783 2.191055e-05 0.0006928723 -0.21890460 -0.21807921
## alpha[64]  -0.50902504 2.362210e-05 0.0007469964 -0.51054630 -0.50953966
## alpha[65]   0.06267133 1.826197e-05 0.0005774941  0.06151965  0.06228815
## alpha[66]   0.75275085 1.318233e-05 0.0004168618  0.75191825  0.75246949
## alpha[67]   0.29052968 1.694077e-05 0.0005357142  0.28950475  0.29016508
## alpha[68]   0.06649792 1.893457e-05 0.0005987638  0.06538037  0.06605204
## alpha[69]   0.15230817 1.891226e-05 0.0005980583  0.15110605  0.15192411
## alpha[70]  -0.09493079 2.149813e-05 0.0006798306 -0.09620675 -0.09539014
## alpha[71]   0.19593517 1.766433e-05 0.0005585952  0.19486570  0.19553542
## alpha[72]  -0.68908910 2.613262e-05 0.0008263861 -0.69066871 -0.68965362
## alpha[73]  -0.28918742 2.169634e-05 0.0006860984 -0.29050097 -0.28966873
## alpha[74]   0.38477468 1.579546e-05 0.0004994963  0.38385255  0.38442796
## alpha[75]   0.68299313 1.378459e-05 0.0004359071  0.68213959  0.68272178
## alpha[76]  -0.79375133 2.906612e-05 0.0009191516 -0.79566455 -0.79436388
## alpha[77]   0.41794536 1.471313e-05 0.0004652701  0.41699458  0.41763862
## alpha[78]   0.55453872 1.437606e-05 0.0004546108  0.55363802  0.55424697
## alpha[79]  -0.60020717 2.669668e-05 0.0008442232 -0.60184560 -0.60075386
## alpha[80]   0.11457855 1.917308e-05 0.0006063060  0.11333643  0.11417691
## alpha[81]   0.16806224 1.864242e-05 0.0005895250  0.16688874  0.16764159
## alpha[82]   0.36351379 1.655722e-05 0.0005235852  0.36243033  0.36316735
## alpha[83]  -0.64079662 2.671866e-05 0.0008449182 -0.64242048 -0.64140971
## alpha[84]   0.34075931 1.603566e-05 0.0005070922  0.33975845  0.34040880
## alpha[85]   0.09067529 1.885268e-05 0.0005961741  0.08947994  0.09027933
## alpha[86]  -0.89040825 2.878007e-05 0.0009101057 -0.89220742 -0.89104909
## alpha[87]  -0.52885951 2.535470e-05 0.0008017859 -0.53052107 -0.52937865
## alpha[88]   0.37704155 1.690583e-05 0.0005346092  0.37600376  0.37665312
## alpha[89]  -0.27926772 2.293627e-05 0.0007253085 -0.28069037 -0.27974744
## alpha[90]  -0.68450357 2.846914e-05 0.0009002732 -0.68622350 -0.68511312
## alpha[91]   0.59113080 1.433024e-05 0.0004531621  0.59024493  0.59082001
## alpha[92]  -1.00527879 3.094061e-05 0.0009784281 -1.00702679 -1.00595353
## alpha[93]   0.09396809 1.823144e-05 0.0005765289  0.09281868  0.09359463
## alpha[94]   0.20390545 1.808665e-05 0.0005719502  0.20276251  0.20354046
## alpha[95]  -1.02268249 3.124798e-05 0.0009881479 -1.02462267 -1.02329622
## alpha[96]  -0.24740405 2.113496e-05 0.0006683460 -0.24867646 -0.24787302
## alpha[97]   0.40547113 1.556418e-05 0.0004921825  0.40451530  0.40514987
## alpha[98]   0.58547375 1.490188e-05 0.0004712388  0.58457167  0.58513123
## alpha[99]   0.99092123 1.173862e-05 0.0003712077  0.99016713  0.99067698
## alpha[100]  0.36489762 1.635058e-05 0.0005170508  0.36387141  0.36454582
##                    50%         75%       97.5%    n_eff      Rhat
## alpha[1]    0.02525057  0.02569447  0.02640102 1000.000 0.9987812
## alpha[2]    0.19115419  0.19150747  0.19228313 1000.000 0.9981230
## alpha[3]    1.06569667  1.06594043  1.06636459 1000.000 0.9997694
## alpha[4]   -0.01282983 -0.01243108 -0.01164024 1000.000 0.9983475
## alpha[5]   -0.02106708 -0.02064450 -0.01992993 1000.000 0.9982180
## alpha[6]   -0.11666593 -0.11623945 -0.11537581 1000.000 0.9981007
## alpha[7]    0.37718596  0.37753254  0.37823094 1000.000 0.9982498
## alpha[8]    0.12498183  0.12539437  0.12603425 1000.000 0.9968329
## alpha[9]   -0.76346743 -0.76288215 -0.76173139 1000.000 0.9961981
## alpha[10]  -0.37877676 -0.37824559 -0.37737936 1000.000 0.9967997
## alpha[11]   0.66116782  0.66149871  0.66209834 1000.000 1.0006124
## alpha[12]  -0.27942985 -0.27894040 -0.27805020 1000.000 0.9985780
## alpha[13]   0.61722235  0.61750137  0.61814756 1000.000 0.9975485
## alpha[14]   0.18634019  0.18673801  0.18747208 1000.000 0.9983633
## alpha[15]   0.44800617  0.44831314  0.44892763 1000.000 0.9980324
## alpha[16]   0.11607834  0.11645838  0.11719243 1000.000 0.9998572
## alpha[17]  -0.47638849 -0.47590193 -0.47501332 1000.000 0.9979153
## alpha[18]  -0.73153709 -0.73097105 -0.72987328 1000.000 1.0039562
## alpha[19]  -0.13419623 -0.13376268 -0.13306001 1000.000 0.9986981
## alpha[20]   0.57901175  0.57932598  0.57988743 1000.000 0.9983843
## alpha[21]  -0.10878502 -0.10834917 -0.10751128 1000.000 0.9969376
## alpha[22]  -0.33859324 -0.33812525 -0.33728424 1000.000 0.9999378
## alpha[23]   0.65264760  0.65295763  0.65348946 1000.000 0.9990245
## alpha[24]  -0.53611034 -0.53558277 -0.53459862 1000.000 1.0026897
## alpha[25]  -0.19184945 -0.19138380 -0.19060451 1000.000 0.9999845
## alpha[26]   0.10713175  0.10756080  0.10823929 1000.000 0.9996126
## alpha[27]   0.66121845  0.66155194  0.66202799 1000.000 0.9999404
## alpha[28]   0.34255449  0.34291371  0.34356679 1000.000 0.9982350
## alpha[29]   0.48836570  0.48867966  0.48934289 1000.000 0.9970428
## alpha[30]  -0.10746483 -0.10705114 -0.10613171 1000.000 0.9991306
## alpha[31]   0.31884511  0.31917622  0.31985525 1000.000 0.9982068
## alpha[32]  -1.27226110 -1.27151206 -1.27019537  773.406 0.9983632
## alpha[33]   0.40089882  0.40125490  0.40191437 1000.000 0.9991022
## alpha[34]   0.13303651  0.13337756  0.13406094 1000.000 0.9987876
## alpha[35]   0.21647207  0.21686782  0.21766806 1000.000 0.9974124
## alpha[36]  -0.47163453 -0.47112263 -0.47015780 1000.000 0.9973741
## alpha[37]  -0.30771997 -0.30721634 -0.30631263 1000.000 0.9967554
## alpha[38]   0.56035836  0.56066197  0.56127506 1000.000 0.9977654
## alpha[39]  -0.17945785 -0.17900919 -0.17822706 1000.000 0.9989422
## alpha[40]  -0.12657381 -0.12614402 -0.12525020 1000.000 0.9967103
## alpha[41]  -0.32760943 -0.32713868 -0.32622642 1000.000 0.9978191
## alpha[42]   0.15416398  0.15454040  0.15525090 1000.000 0.9972019
## alpha[43]  -0.37620307 -0.37568055 -0.37475918 1000.000 0.9970913
## alpha[44]  -0.19546136 -0.19501757 -0.19417971 1000.000 0.9985576
## alpha[45]   0.72111698  0.72139743  0.72196621 1000.000 1.0022599
## alpha[46]  -0.07842988 -0.07802382 -0.07719566 1000.000 0.9970522
## alpha[47]  -0.44519311 -0.44470704 -0.44374692 1000.000 0.9965404
## alpha[48]  -0.10320516 -0.10281788 -0.10197686 1000.000 0.9987610
## alpha[49]  -0.10894537 -0.10849256 -0.10765789 1000.000 0.9966972
## alpha[50]   0.31950632  0.31986867  0.32061107 1000.000 0.9990267
## alpha[51]  -0.45986271 -0.45927793 -0.45834129 1000.000 0.9973963
## alpha[52]  -0.59943865 -0.59896384 -0.59794789 1000.000 1.0001995
## alpha[53]   0.28437356  0.28473480  0.28541888 1000.000 0.9997964
## alpha[54]  -0.30443427 -0.30394136 -0.30300020 1000.000 0.9980747
## alpha[55]  -0.52804817 -0.52744874 -0.52642224 1000.000 0.9973994
## alpha[56]   0.36269517  0.36305227  0.36371572 1000.000 1.0009757
## alpha[57]  -0.79271505 -0.79211020 -0.79091143 1000.000 1.0003696
## alpha[58]   0.28712254  0.28748043  0.28814220 1000.000 0.9973661
## alpha[59]   0.07203558  0.07240026  0.07316946 1000.000 0.9994912
## alpha[60]   0.07785183  0.07826640  0.07891412 1000.000 0.9974587
## alpha[61]   0.27505660  0.27544031  0.27610887 1000.000 0.9996219
## alpha[62]   0.28847347  0.28883589  0.28957747 1000.000 1.0010084
## alpha[63]  -0.21762360 -0.21715089 -0.21620284 1000.000 0.9967753
## alpha[64]  -0.50903301 -0.50850996 -0.50756084 1000.000 0.9972078
## alpha[65]   0.06266523  0.06305032  0.06376131 1000.000 0.9999446
## alpha[66]   0.75275107  0.75303778  0.75351469 1000.000 0.9991093
## alpha[67]   0.29052343  0.29088530  0.29152370 1000.000 0.9978729
## alpha[68]   0.06651104  0.06693030  0.06770267 1000.000 0.9986744
## alpha[69]   0.15231062  0.15268908  0.15344444 1000.000 0.9970646
## alpha[70]  -0.09492877 -0.09445209 -0.09361371 1000.000 0.9979414
## alpha[71]   0.19592020  0.19632447  0.19702555 1000.000 1.0024334
## alpha[72]  -0.68909640 -0.68854384 -0.68748113 1000.000 0.9989257
## alpha[73]  -0.28919106 -0.28867172 -0.28787139 1000.000 0.9991498
## alpha[74]   0.38477454  0.38511646  0.38576871 1000.000 0.9984879
## alpha[75]   0.68297071  0.68328313  0.68385213 1000.000 0.9974123
## alpha[76]  -0.79374708 -0.79314403 -0.79195905 1000.000 0.9973938
## alpha[77]   0.41793390  0.41825437  0.41884762 1000.000 0.9968963
## alpha[78]   0.55454887  0.55484566  0.55539158 1000.000 0.9980076
## alpha[79]  -0.60019818 -0.59965878 -0.59861841 1000.000 0.9966944
## alpha[80]   0.11457623  0.11498960  0.11573755 1000.000 1.0010579
## alpha[81]   0.16806367  0.16849195  0.16920960 1000.000 0.9993025
## alpha[82]   0.36350727  0.36387309  0.36452817 1000.000 1.0000080
## alpha[83]  -0.64077973 -0.64024259 -0.63906284 1000.000 1.0016332
## alpha[84]   0.34078509  0.34111175  0.34175189 1000.000 0.9999979
## alpha[85]   0.09067083  0.09106623  0.09186995 1000.000 1.0007762
## alpha[86]  -0.89039603 -0.88982624 -0.88863884 1000.000 0.9982137
## alpha[87]  -0.52884051 -0.52832879 -0.52728436 1000.000 1.0062513
## alpha[88]   0.37704303  0.37741164  0.37803941 1000.000 0.9973887
## alpha[89]  -0.27925916 -0.27879251 -0.27780807 1000.000 0.9987065
## alpha[90]  -0.68451652 -0.68390485 -0.68275910 1000.000 0.9983984
## alpha[91]   0.59113628  0.59144037  0.59200176 1000.000 0.9996465
## alpha[92]  -1.00528954 -1.00463508 -1.00335662 1000.000 0.9988724
## alpha[93]   0.09398067  0.09437121  0.09502525 1000.000 0.9979717
## alpha[94]   0.20390239  0.20426773  0.20501658 1000.000 0.9986221
## alpha[95]  -1.02270636 -1.02205936 -1.02068993 1000.000 1.0013118
## alpha[96]  -0.24739249 -0.24694154 -0.24614332 1000.000 0.9974788
## alpha[97]   0.40547806  0.40578979  0.40642550 1000.000 0.9979955
## alpha[98]   0.58547466  0.58580888  0.58635368 1000.000 0.9964880
## alpha[99]   0.99092724  0.99117697  0.99159433 1000.000 0.9987098
## alpha[100]  0.36491268  0.36523835  0.36591100 1000.000 1.0008076

Modelling asymetry bias

data = simulate_data (4)

Here the bias caused by the asymetry (only increase) of the changes of expression

plot(data$y_prob %>% pull(which(data$cond==0)[1]), data$y_prob %>% pull(which(data$cond==1)[1]) )
abline(0,1)

With multiplying by a adjusted factor 2 we can re-establish the baseline

plot(data$y_prob %>% pull(which(data$cond==0)[1]), (data$y_prob %>% pull(which(data$cond==1)[1])) * 2 )
abline(0,1)

m3 = "data {
      int<lower = 0> F;                   // fixed genes
      int<lower = 0> G;                   // all genes
      int<lower = 0> T;                   // tube
      int<lower = 0> y[T, G];             // RNA-seq counts
      int<lower=0, upper=1> cond[T];            // condition per tube
}
    parameters {
      vector[G] alpha[2];  // control log relative expression

    }
    transformed parameters{
        vector[G] phi_raw[T];
        vector[G] phi[T];

        for(t in 1:T) 
            for(g in 1:G) phi_raw[t,g] =  alpha[cond[t] + 1 , g];
        
    for(t in 1:T) phi[t] = softmax( phi_raw[t] );
    }
    model {
      for (t in 1:T) y[t] ~ multinomial(phi[t]);
      for(i in 1:2) alpha[i] ~ normal(0, 5);    // log relative expression in control
      for(i in 1:2) sum(alpha[i]) ~ normal(0, 0.0001 * G);  // soft identification of control
    }
    "
fit3 = rstan:::stan(
    model_code = m3, 
    data= data,
    iter=   500,
    chains = 4,
    cores = 4
)
## In file included from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config.hpp:39,
##                  from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/math/tools/config.hpp:13,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/var.hpp:7,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core.hpp:12,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/mat.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/src/stan/model/model_header.hpp:4,
##                  from file4e1057df37b2.cpp:8:
## /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config/compiler/gcc.hpp:186:1: warning: "BOOST_NO_CXX11_RVALUE_REFERENCES" redefined
## <command-line>: warning: this is the location of the previous definition
head(summary(fit, "alpha")$summary, n=10)
##                  mean      se_mean           sd        2.5%         25%
## alpha[1]   0.02524969 1.956917e-05 0.0006188316  0.02406258  0.02483113
## alpha[2]   0.19116635 1.779566e-05 0.0005627482  0.18999891  0.19083819
## alpha[3]   1.06569769 1.117117e-05 0.0003532633  1.06500227  1.06544890
## alpha[4]  -0.01283520 1.932730e-05 0.0006111829 -0.01407415 -0.01324483
## alpha[5]  -0.02107187 1.953132e-05 0.0006176344 -0.02225096 -0.02150341
## alpha[6]  -0.11666816 2.001209e-05 0.0006328379 -0.11787438 -0.11709446
## alpha[7]   0.37716742 1.756481e-05 0.0005554480  0.37610491  0.37679030
## alpha[8]   0.12497996 1.798258e-05 0.0005686590  0.12387976  0.12456415
## alpha[9]  -0.76345940 2.652987e-05 0.0008389481 -0.76512291 -0.76402425
## alpha[10] -0.37875721 2.243371e-05 0.0007094163 -0.38018903 -0.37924074
##                   50%         75%       97.5% n_eff      Rhat
## alpha[1]   0.02525057  0.02569447  0.02640102  1000 0.9987812
## alpha[2]   0.19115419  0.19150747  0.19228313  1000 0.9981230
## alpha[3]   1.06569667  1.06594043  1.06636459  1000 0.9997694
## alpha[4]  -0.01282983 -0.01243108 -0.01164024  1000 0.9983475
## alpha[5]  -0.02106708 -0.02064450 -0.01992993  1000 0.9982180
## alpha[6]  -0.11666593 -0.11623945 -0.11537581  1000 0.9981007
## alpha[7]   0.37718596  0.37753254  0.37823094  1000 0.9982498
## alpha[8]   0.12498183  0.12539437  0.12603425  1000 0.9968329
## alpha[9]  -0.76346743 -0.76288215 -0.76173139  1000 0.9961981
## alpha[10] -0.37877676 -0.37824559 -0.37737936  1000 0.9967997

The model shows the uncorrected bias

alpha_table = as.data.frame(rstan::summary(fit3, "alpha")$summary) %>% 
        tibble::as_tibble(rownames="par") %>%
        tidyr::separate(par, c("dummy", "sample", "gene", "dummy2"), "\\[|,|\\]") %>%
        dplyr::mutate(gene = as.integer(gene), sample = as.integer(sample)) %>%
        dplyr::select(-dummy, -dummy2) %>%
        dplyr::select(1:2, mean) %>%
        tidyr::spread(1,3)

plot(alpha_table %>% pull(2), alpha_table %>% pull(3) )
abline(0,1)

While manually adding + log(1.5) seem to fix the problem

plot(alpha_table %>% pull(2), alpha_table %>% pull(3) + log(1.5) )
abline(0,1)

Let’s try to replicate it with Stan. In this simple model, I assume that delta for FIXED genes (70% of genes) is simply 0

m4 = "data {
      int<lower = 0> F;                   // fixed genes
      int<lower = 0> G;                   // all genes
      int<lower = 0> T;                   // tube
      int<lower = 0> y[T, G];             // RNA-seq counts
      int<lower=0, upper=1> cond[T];            // condition per tube
}
    parameters {
      vector[G] alpha;  // control log relative expression
      real epsilon;
            vector[G-F] delta;  //  rate of change

    }
    transformed parameters{
        vector[G] phi_raw[T];
        vector[G] phi[T];

        for(t in 1:T) {

            for(f in 1:F)      phi_raw[t,f] =  alpha[f] + ( epsilon * cond[t] ) + ( 0            * cond[t] ) ;
            for(g in (F+1):G)  phi_raw[t,g] =  alpha[g] + ( epsilon * cond[t] ) + ( delta[g - F] * cond[t] );

        }

    for(t in 1:T) phi[t] = softmax( phi_raw[t] );
    }
    model {
      for (t in 1:T) y[t] ~ multinomial(phi[t]);
      alpha ~ normal(0, 5);    // log relative expression in control
      sum(alpha) ~ normal(0, 0.0001 * G);  // soft identification of control

      delta ~ normal(0,1);
      epsilon ~ normal(0,1);
    }
    "
fit4 = rstan:::stan(
    model_code = m4, 
    data= data,
    iter=   500,
    chains = 4,
    cores = 4
)
## In file included from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config.hpp:39,
##                  from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/math/tools/config.hpp:13,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/var.hpp:7,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core.hpp:12,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/mat.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/src/stan/model/model_header.hpp:4,
##                  from file4b9d1939ef3c.cpp:8:
## /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config/compiler/gcc.hpp:186:1: warning: "BOOST_NO_CXX11_RVALUE_REFERENCES" redefined
## <command-line>: warning: this is the location of the previous definition
head(summary(fit4, "alpha")$summary, n=10)
##                  mean      se_mean           sd        2.5%         25%
## alpha[1]   0.36430832 1.838529e-05 0.0005813940  0.36318497  0.36392199
## alpha[2]   0.07680065 2.218122e-05 0.0007014318  0.07542596  0.07632993
## alpha[3]  -0.09418932 2.278780e-05 0.0007206134 -0.09561129 -0.09464528
## alpha[4]   0.28748073 1.921231e-05 0.0006075465  0.28631782  0.28706897
## alpha[5]  -0.38676026 2.728681e-05 0.0008628846 -0.38842306 -0.38735434
## alpha[6]  -1.02342736 3.553382e-05 0.0011107599 -1.02564544 -1.02414810
## alpha[7]  -0.33536020 2.535598e-05 0.0008018264 -0.33687881 -0.33593351
## alpha[8]  -0.08527592 2.287974e-05 0.0007235210 -0.08662599 -0.08576925
## alpha[9]  -0.04110035 2.381949e-05 0.0007532383 -0.04265013 -0.04158295
## alpha[10] -0.10646207 2.365233e-05 0.0007479524 -0.10789287 -0.10697376
##                   50%         75%       97.5%     n_eff      Rhat
## alpha[1]   0.36431029  0.36467530  0.36546795 1000.0000 0.9984356
## alpha[2]   0.07679316  0.07728106  0.07817713 1000.0000 0.9976178
## alpha[3]  -0.09417792 -0.09373485 -0.09274634 1000.0000 0.9983248
## alpha[4]   0.28747444  0.28788826  0.28862345 1000.0000 0.9983265
## alpha[5]  -0.38678942 -0.38615986 -0.38509822 1000.0000 0.9999230
## alpha[6]  -1.02344508 -1.02267509 -1.02133891  977.1395 0.9970713
## alpha[7]  -0.33537197 -0.33481211 -0.33382590 1000.0000 1.0028884
## alpha[8]  -0.08528195 -0.08479673 -0.08387415 1000.0000 0.9977019
## alpha[9]  -0.04110123 -0.04060290 -0.03967852 1000.0000 1.0009313
## alpha[10] -0.10643689 -0.10597595 -0.10504452 1000.0000 0.9988490

As you can see the epsilon factor that would work log(1.5) is not detected. What am I doing wrong is not clear to me

summary(fit4, "epsilon")$summary
##              mean   se_mean        sd     2.5%        25%        50%
## epsilon 0.1554633 0.2403415 0.8774096 -1.86533 -0.4261631 0.09048492
##               75%    97.5%    n_eff     Rhat
## epsilon 0.7732695 1.751431 13.32745 1.220202
traceplot(fit4, "epsilon")

phi_raw_table = as.data.frame(rstan::summary(fit4, "phi_raw")$summary) %>% 
        tibble::as_tibble(rownames="par") %>%
        tidyr::separate(par, c("dummy", "sample", "gene", "dummy2"), "\\[|,|\\]") %>%
        dplyr::mutate(gene = as.integer(gene), sample = as.integer(sample)) %>%
        dplyr::select(-dummy, -dummy2) %>%
        dplyr::select(1:2, mean) %>%
        tidyr::spread(1,3)

plot(phi_raw_table %>% pull(which(data$cond==0)[1]+1), phi_raw_table %>% pull(which(data$cond==1)[1]+1) )
abline(0,1)
abline(log(4),1)