library(foreach)
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ purrr   0.2.4
## ✔ tibble  1.4.2     ✔ dplyr   0.7.4
## ✔ tidyr   0.8.0     ✔ stringr 1.3.0
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::accumulate() masks foreach::accumulate()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ purrr::when()       masks foreach::when()
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.17.3, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## 
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
## 
##     extract
if("package:org.Hs.eg.db" %in% search()) detach("package:org.Hs.eg.db", unload=TRUE, force=TRUE)
if("package:hgu133plus2.db" %in% search()) detach("package:hgu133plus2.db", unload=TRUE, force=TRUE)
if("package:hgu95av2.db" %in% search()) detach("package:hgu95av2.db", unload=TRUE, force=TRUE)
if("package:illuminaHumanv4.db" %in% search()) detach("package:illuminaHumanv4.db", unload=TRUE, force=TRUE)
set.seed(123)

The numerical generative process - simulated data

simulate_data = function(delta_magnitude = 4){

# number of genes
n_genes = 100

# number of changing genes
changing_genes = round(n_genes * 0.3)

# Gene true value values baseline
baseline = rnbinom(n_genes, mu = 1000, size = 5)

# Gene true value values baseline
treated = c(baseline[1:(n_genes-changing_genes-1)],  baseline[(n_genes-changing_genes) : n_genes] * delta_magnitude)

# numbe rof samples
n_samples = 30

# Covariates
cov = sample(0:1, size = n_samples, replace = T)

# Produce tissue samples
y_real = foreach(n = 1:n_samples, .combine=bind_cols) %do% {
    tibble(
        switch(
            (cov[n] == 0) + 1,
            rnbinom(n_genes, mu = treated, size = 1000),
            rnbinom(n_genes, mu = baseline, size = 1000)
        ) 
    )
} %>%
    setNames(sprintf("s%s", 1:n_samples)) %>%
    mutate_if(is.numeric, as.integer)

# produce probability values
y_prob = foreach(n = 1:n_samples, .combine=bind_cols) %do% { y_real[,n]/sum(y_real[,n]) } %>% as_tibble()

# sample from multinomial
y_observed = foreach(n = 1:n_samples, .combine=bind_cols) %do% { 
    tibble(as.numeric(rmultinom(n = 1, prob = y_prob %>% pull(n), size = 10000000)) )  
} %>%
    setNames(colnames(y_real)) %>%
    mutate_if(is.numeric, as.integer)

    list(
        G =                           nrow(y_observed),
        T =                           ncol(y_observed),
        F =                           n_genes - changing_genes,
        y =                           t(y_observed),
        cond =                        cov,
        y_prob =                      y_prob,
        baseline =                    baseline,
        treated =                     treated
    )

} 

Let’s implement Bob suggestion

mBob = "data {
      int<lower = 0> F;                   // fixed genes
      int<lower = 0> G;                   // all genes
      int<lower = 0> T;                   // tube
      int<lower = 0> y[T, G];             // RNA-seq counts
      int<lower=0, upper=1> cond[T];            // condition per tube
}
parameters {
  vector[G] phi_control;
  vector[G - F] delta_treatment;
}
transformed parameters{
  // I took the liberty of moving the transformed vars to locals
  vector[G] phi_treatment;
    for(f in 1:F) phi_treatment[f] = phi_control[f];
  for(g in (F + 1) : G) phi_treatment[g] = phi_control[g] + delta_treatment[g - F]; 
}
model {

  for (t in 1:T)
  if(cond[t] == 0)  y[t] ~ multinomial(softmax(phi_control ));
    else y[t] ~ multinomial(softmax(phi_treatment ));
  delta_treatment ~ cauchy(0, 0.1);  // keep most small but allow some large
  phi_control ~ normal(0, 2);
  //sum(phi_control) ~ normal(0, 0.0001 * G);  // soft identification of control, does not change results for some reason

}

"

Data structure:

data_2 = simulate_data(4)
data_2$F = 30
fitBob= rstan:::stan(
    model_code = mBob, 
    data= data_2,
    iter=   500,
    chains = 4,
    cores = 4, refresh = 0
)
## In file included from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config.hpp:39,
##                  from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/math/tools/config.hpp:13,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/var.hpp:7,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core.hpp:12,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/mat.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/src/stan/model/model_header.hpp:4,
##                  from file1eeb625174c2e.cpp:8:
## /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config/compiler/gcc.hpp:186:1: warning: "BOOST_NO_CXX11_RVALUE_REFERENCES" redefined
## <command-line>: warning: this is the location of the previous definition

UNknown Fixed genes, delta is close to 0 but different from 0

#head(summary(fitBob, "alpha")$summary, n=10)
head(summary(fitBob, "delta_treatment")$summary, n=10)
##                             mean      se_mean          sd         2.5%
## delta_treatment[1]  -0.009419010 5.541442e-05 0.001703646 -0.012817185
## delta_treatment[2]  -0.012720232 4.966017e-05 0.001570392 -0.015898698
## delta_treatment[3]   0.020138649 5.022169e-05 0.001588149  0.017172888
## delta_treatment[4]   0.004614408 4.403311e-05 0.001392449  0.001883552
## delta_treatment[5]  -0.001056983 4.329805e-05 0.001369205 -0.003664755
## delta_treatment[6]  -0.019571470 4.711952e-05 0.001490050 -0.022480083
## delta_treatment[7]   0.016711625 4.683899e-05 0.001481179  0.013822281
## delta_treatment[8]   0.003516560 3.718508e-05 0.001175896  0.001091076
## delta_treatment[9]  -0.013165691 3.560418e-05 0.001125903 -0.015446939
## delta_treatment[10] -0.012169559 4.378688e-05 0.001384663 -0.015012484
##                              25%          50%           75%        97.5%
## delta_treatment[1]  -0.010606028 -0.009361279 -0.0082611836 -0.006326001
## delta_treatment[2]  -0.013744203 -0.012716645 -0.0116718208 -0.009723637
## delta_treatment[3]   0.019113233  0.020154762  0.0212049891  0.023279508
## delta_treatment[4]   0.003594960  0.004627687  0.0055792462  0.007366394
## delta_treatment[5]  -0.002029114 -0.001050552 -0.0001028852  0.001592940
## delta_treatment[6]  -0.020564551 -0.019555748 -0.0185943408 -0.016713367
## delta_treatment[7]   0.015716653  0.016723664  0.0176689499  0.019603512
## delta_treatment[8]   0.002732101  0.003530824  0.0042867127  0.005849873
## delta_treatment[9]  -0.013878210 -0.013171720 -0.0124008956 -0.010837968
## delta_treatment[10] -0.013022187 -0.012158462 -0.0112984622 -0.009385551
##                        n_eff      Rhat
## delta_treatment[1]   945.177 1.0056086
## delta_treatment[2]  1000.000 0.9998347
## delta_treatment[3]  1000.000 0.9999461
## delta_treatment[4]  1000.000 0.9991611
## delta_treatment[5]  1000.000 0.9997065
## delta_treatment[6]  1000.000 0.9986268
## delta_treatment[7]  1000.000 0.9976128
## delta_treatment[8]  1000.000 0.9990607
## delta_treatment[9]  1000.000 0.9977530
## delta_treatment[10] 1000.000 0.9972104
plot(summary(fitBob, "phi_control")$summary[,1], summary(fitBob, "phi_treatment")$summary[,1] )
points(summary(fitBob, "phi_control")$summary[,1] %>% head(n=data_2$F), summary(fitBob, "phi_treatment")$summary[,1] %>% head(n=data_2$F), col="red" )
abline(0,1)
abline(log(4),1)

The although the fold change of +4 was detected without loss, the 30 UNknown fixed genes alhtough they have a small delta, such delta is different from 0. I could put an hard theshold in this case, after the model, but would be great to do it within the model.

For this reason I tried

I tried to add some uncertanty, avoiding fixing to 0 the delta of fixed genes but giving a tight prior ~cauchy(0, 0.001)

mBob_2 = "data {
      int<lower = 0> F;                   // fixed genes
      int<lower = 0> G;                   // all genes
      int<lower = 0> T;                   // tube
      int<lower = 0> y[T, G];             // RNA-seq counts
      int<lower=0, upper=1> cond[T];            // condition per tube
}
parameters {
  vector[G] phi_control;
  vector[G] delta_treatment;
}
transformed parameters{
  // I took the liberty of moving the transformed vars to locals
  vector[G] phi_treatment;
  for(g in 1 : G) phi_treatment[g] = phi_control[g] + delta_treatment[g]; 
}
model {

  for (t in 1:T){
      if(cond[t] == 0)  y[t] ~ multinomial(softmax(phi_control ));
        else y[t] ~ multinomial(softmax(phi_treatment ));
    }
    for(f in 1:F) delta_treatment[f] ~ cauchy(0, 0.001);  // keep most small but allow some large
  for(g in (F + 1) : G) delta_treatment[g] ~ cauchy(0, 0.1);  // keep most small but allow some large

  phi_control ~ normal(0, 2);
  sum(phi_control) ~ normal(0, 0.0001 * G);  // soft identification of control

}

"
fitBob_2= rstan:::stan(
    model_code = mBob_2, 
    data= data_2,
    iter=   500,
    chains = 4,
    cores = 4, refresh = 0
)
## In file included from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config.hpp:39,
##                  from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/math/tools/config.hpp:13,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/var.hpp:7,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core.hpp:12,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/mat.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math.hpp:4,
##                  from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/src/stan/model/model_header.hpp:4,
##                  from file1eeb61c6a179e.cpp:8:
## /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config/compiler/gcc.hpp:186:1: warning: "BOOST_NO_CXX11_RVALUE_REFERENCES" redefined
## <command-line>: warning: this is the location of the previous definition
#head(summary(fitBob, "alpha")$summary, n=10)

Fixed KNOWN genes

head(summary(fitBob_2, "delta_treatment")$summary, n=10)
##                           mean    se_mean         sd       2.5%        25%
## delta_treatment[1]  -0.3689235 0.04893774 0.06974052 -0.4872950 -0.3999930
## delta_treatment[2]  -0.3868813 0.04902884 0.06989178 -0.5054049 -0.4189431
## delta_treatment[3]  -0.3820139 0.04896709 0.06977711 -0.4999795 -0.4111258
## delta_treatment[4]  -0.3921944 0.04897412 0.06980355 -0.5112060 -0.4212327
## delta_treatment[5]  -0.3592685 0.04899700 0.06982013 -0.4772249 -0.3885198
## delta_treatment[6]  -0.4112459 0.04897147 0.06978147 -0.5294539 -0.4406242
## delta_treatment[7]  -0.3793153 0.04895318 0.06977123 -0.4973365 -0.4094277
## delta_treatment[8]  -0.3685113 0.04897647 0.06978948 -0.4862600 -0.3977611
## delta_treatment[9]  -0.3653712 0.04898550 0.06982065 -0.4833000 -0.3945125
## delta_treatment[10] -0.4073094 0.04900817 0.06985273 -0.5255561 -0.4377042
##                            50%        75%      97.5%    n_eff     Rhat
## delta_treatment[1]  -0.3460253 -0.3277713 -0.2828100 2.030872 17.77200
## delta_treatment[2]  -0.3641414 -0.3442901 -0.2994867 2.032118 16.80802
## delta_treatment[3]  -0.3592091 -0.3414025 -0.2960072 2.030567 18.44642
## delta_treatment[4]  -0.3696703 -0.3505512 -0.3058603 2.031524 17.02388
## delta_treatment[5]  -0.3366982 -0.3185653 -0.2730300 2.030591 18.48903
## delta_treatment[6]  -0.3882972 -0.3701718 -0.3254051 2.030457 18.05432
## delta_treatment[7]  -0.3570995 -0.3384736 -0.2928930 2.031379 17.36520
## delta_treatment[8]  -0.3464586 -0.3279278 -0.2822403 2.030509 18.42086
## delta_treatment[9]  -0.3424122 -0.3245217 -0.2787180 2.031574 17.33032
## delta_treatment[10] -0.3861030 -0.3654703 -0.3207096 2.031561 16.88582

Fixed UNknown genes

summary(fitBob_2, "delta_treatment")$summary[(data_2$F +1) :(data_2$F +1 + 10),]
##                           mean    se_mean         sd       2.5%        25%
## delta_treatment[31] -0.3885820 0.04897892 0.06979473 -0.5068794 -0.4186471
## delta_treatment[32] -0.3918513 0.04897994 0.06980324 -0.5100675 -0.4218535
## delta_treatment[33] -0.3590262 0.04901896 0.06985769 -0.4774584 -0.3892294
## delta_treatment[34] -0.3744505 0.04898779 0.06981006 -0.4922661 -0.4041540
## delta_treatment[35] -0.3801921 0.04899221 0.06981724 -0.4982832 -0.4100414
## delta_treatment[36] -0.3987731 0.04897538 0.06979261 -0.5169041 -0.4280118
## delta_treatment[37] -0.3624008 0.04900641 0.06984108 -0.4804646 -0.3915885
## delta_treatment[38] -0.3755570 0.04897932 0.06979834 -0.4933337 -0.4048170
## delta_treatment[39] -0.3922842 0.04900347 0.06983149 -0.5102217 -0.4220549
## delta_treatment[40] -0.3912393 0.04903017 0.06987661 -0.5090773 -0.4213341
## delta_treatment[41] -0.3994141 0.04896288 0.06976940 -0.5172621 -0.4290697
##                            50%        75%      97.5%    n_eff     Rhat
## delta_treatment[31] -0.3653905 -0.3479205 -0.3024043 2.030612 17.26419
## delta_treatment[32] -0.3691825 -0.3505061 -0.3062816 2.031022 17.82832
## delta_treatment[33] -0.3356390 -0.3184737 -0.2721395 2.030955 17.86015
## delta_treatment[34] -0.3518643 -0.3338610 -0.2877892 2.030768 17.97807
## delta_treatment[35] -0.3580911 -0.3398188 -0.2939413 2.030819 18.05155
## delta_treatment[36] -0.3769094 -0.3580713 -0.3126150 2.030782 17.93860
## delta_treatment[37] -0.3391643 -0.3210341 -0.2762616 2.031029 17.85327
## delta_treatment[38] -0.3525088 -0.3354811 -0.2893455 2.030788 18.32955
## delta_treatment[39] -0.3695465 -0.3520082 -0.3060701 2.030715 18.45803
## delta_treatment[40] -0.3698285 -0.3505570 -0.3047565 2.031126 17.98815
## delta_treatment[41] -0.3771828 -0.3582887 -0.3133307 2.030467 17.98886

Variable genes

tail(summary(fitBob_2, "delta_treatment")$summary, n=10)
##                           mean    se_mean         sd      2.5%       25%
## delta_treatment[91]  0.9911721 0.04896042 0.06977988 0.8735638 0.9619049
## delta_treatment[92]  1.0034303 0.04897062 0.06976708 0.8855493 0.9744061
## delta_treatment[93]  1.0312562 0.04900547 0.06982847 0.9135402 1.0024694
## delta_treatment[94]  1.0099449 0.04898705 0.06980064 0.8923705 0.9804915
## delta_treatment[95]  1.0266959 0.04898907 0.06980849 0.9088894 0.9968032
## delta_treatment[96]  0.9864867 0.04894907 0.06975316 0.8687848 0.9574264
## delta_treatment[97]  1.0144847 0.04897601 0.06979241 0.8966596 0.9857311
## delta_treatment[98]  1.0258554 0.04897463 0.06980451 0.9077713 0.9968529
## delta_treatment[99]  0.9678669 0.04903139 0.06988010 0.8494227 0.9383497
## delta_treatment[100] 1.0083933 0.04896804 0.06979499 0.8901786 0.9787896
##                            50%      75%    97.5%    n_eff     Rhat
## delta_treatment[91]  1.0143031 1.030918 1.077872 2.031282 18.19614
## delta_treatment[92]  1.0261784 1.044145 1.088920 2.029690 18.27706
## delta_treatment[93]  1.0540287 1.071412 1.117507 2.030373 18.51811
## delta_treatment[94]  1.0327895 1.049946 1.095834 2.030282 18.72588
## delta_treatment[95]  1.0494625 1.066695 1.112752 2.030570 18.54377
## delta_treatment[96]  1.0088826 1.026231 1.072525 2.030668 18.62772
## delta_treatment[97]  1.0373953 1.055220 1.100695 2.030718 18.23688
## delta_treatment[98]  1.0483675 1.067981 1.111885 2.031536 17.55206
## delta_treatment[99]  0.9900874 1.008826 1.054303 2.031228 17.51867
## delta_treatment[100] 1.0317293 1.049903 1.094547 2.031529 17.76014

In this case, did not work. I also tried with even more stringent prior to see what happen. ~ normal(0, 0.01).

I imagine now that deta is not defined anymore, eventhough the prior is tight.