library(foreach)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
## ✔ tibble 1.4.2 ✔ dplyr 0.7.4
## ✔ tidyr 0.8.0 ✔ stringr 1.3.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::accumulate() masks foreach::accumulate()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::when() masks foreach::when()
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.17.3, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
##
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
##
## extract
if("package:org.Hs.eg.db" %in% search()) detach("package:org.Hs.eg.db", unload=TRUE, force=TRUE)
if("package:hgu133plus2.db" %in% search()) detach("package:hgu133plus2.db", unload=TRUE, force=TRUE)
if("package:hgu95av2.db" %in% search()) detach("package:hgu95av2.db", unload=TRUE, force=TRUE)
if("package:illuminaHumanv4.db" %in% search()) detach("package:illuminaHumanv4.db", unload=TRUE, force=TRUE)
set.seed(123)
simulate_data = function(delta_magnitude = 4){
# number of genes
n_genes = 100
# number of changing genes
changing_genes = round(n_genes * 0.3)
# Gene true value values baseline
baseline = rnbinom(n_genes, mu = 1000, size = 5)
# Gene true value values baseline
treated = c(baseline[1:(n_genes-changing_genes-1)], baseline[(n_genes-changing_genes) : n_genes] * delta_magnitude)
# numbe rof samples
n_samples = 30
# Covariates
cov = sample(0:1, size = n_samples, replace = T)
# Produce tissue samples
y_real = foreach(n = 1:n_samples, .combine=bind_cols) %do% {
tibble(
switch(
(cov[n] == 0) + 1,
rnbinom(n_genes, mu = treated, size = 1000),
rnbinom(n_genes, mu = baseline, size = 1000)
)
)
} %>%
setNames(sprintf("s%s", 1:n_samples)) %>%
mutate_if(is.numeric, as.integer)
# produce probability values
y_prob = foreach(n = 1:n_samples, .combine=bind_cols) %do% { y_real[,n]/sum(y_real[,n]) } %>% as_tibble()
# sample from multinomial
y_observed = foreach(n = 1:n_samples, .combine=bind_cols) %do% {
tibble(as.numeric(rmultinom(n = 1, prob = y_prob %>% pull(n), size = 10000000)) )
} %>%
setNames(colnames(y_real)) %>%
mutate_if(is.numeric, as.integer)
list(
G = nrow(y_observed),
T = ncol(y_observed),
F = n_genes - changing_genes,
y = t(y_observed),
cond = cov,
y_prob = y_prob,
baseline = baseline,
treated = treated
)
}
I have done few tests on the new gene expression model. Let’s start defining the simulated data from the numerical generating process.
The data simulation
m1 = "
data {
int<lower = 0> G; // all genes
int<lower = 0> T; // tube
int<lower = 0> y[T, G]; // RNA-seq counts
}
parameters {
simplex[G] phi;
}
model {
for (t in 1:T) y[t] ~ multinomial(phi);
}
"
data = simulate_data (1)
fit = rstan:::stan(
model_code = m1,
data= data,
iter= 500,
chains = 4,
cores = 4
)
## In file included from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config.hpp:39,
## from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/math/tools/config.hpp:13,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/var.hpp:7,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core.hpp:12,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/mat.hpp:4,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math.hpp:4,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/src/stan/model/model_header.hpp:4,
## from file14ec86446eebc.cpp:8:
## /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config/compiler/gcc.hpp:186:1: warning: "BOOST_NO_CXX11_RVALUE_REFERENCES" redefined
## <command-line>: warning: this is the location of the previous definition
The inference as expected is successful
plot(summary(fit, "phi")$summary[,1], data$baseline/sum(data$baseline))
m2 = "data {
int<lower = 0> G; // all genes
int<lower = 0> T; // tube
int<lower = 0> y[T, G]; // RNA-seq counts
}
parameters {
vector[G] alpha; // control log relative expression
}
transformed parameters{
vector[G] phi;
for(t in 1:T) phi = softmax( alpha );
}
model {
for (t in 1:T) y[t] ~ multinomial(phi);
alpha ~ normal(0, 5); // log relative expression in control
sum(alpha) ~ normal(0, 0.0001 * G); // soft identification of control
}
"
data = simulate_data (1)
fit = rstan:::stan(
model_code = m2,
data= data,
iter= 500,
chains = 4,
cores = 4
)
## In file included from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config.hpp:39,
## from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/math/tools/config.hpp:13,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/var.hpp:7,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core.hpp:12,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/mat.hpp:4,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math.hpp:4,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/src/stan/model/model_header.hpp:4,
## from file17539634c0959.cpp:8:
## /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config/compiler/gcc.hpp:186:1: warning: "BOOST_NO_CXX11_RVALUE_REFERENCES" redefined
## <command-line>: warning: this is the location of the previous definition
plot(summary(fit, "phi")$summary[,1], data$baseline/sum(data$baseline))
traceplot(fit, sprintf("alpha[%s]", 1:10) )
rstan::summary(fit, "alpha")$summary
## mean se_mean sd 2.5% 25%
## alpha[1] 0.02524969 1.956917e-05 0.0006188316 0.02406258 0.02483113
## alpha[2] 0.19116635 1.779566e-05 0.0005627482 0.18999891 0.19083819
## alpha[3] 1.06569769 1.117117e-05 0.0003532633 1.06500227 1.06544890
## alpha[4] -0.01283520 1.932730e-05 0.0006111829 -0.01407415 -0.01324483
## alpha[5] -0.02107187 1.953132e-05 0.0006176344 -0.02225096 -0.02150341
## alpha[6] -0.11666816 2.001209e-05 0.0006328379 -0.11787438 -0.11709446
## alpha[7] 0.37716742 1.756481e-05 0.0005554480 0.37610491 0.37679030
## alpha[8] 0.12497996 1.798258e-05 0.0005686590 0.12387976 0.12456415
## alpha[9] -0.76345940 2.652987e-05 0.0008389481 -0.76512291 -0.76402425
## alpha[10] -0.37875721 2.243371e-05 0.0007094163 -0.38018903 -0.37924074
## alpha[11] 0.66116250 1.528383e-05 0.0004833170 0.66025488 0.66082801
## alpha[12] -0.27941300 2.260234e-05 0.0007147488 -0.28078146 -0.27987337
## alpha[13] 0.61720783 1.466971e-05 0.0004638968 0.61631937 0.61688857
## alpha[14] 0.18635191 1.778618e-05 0.0005624484 0.18528742 0.18599932
## alpha[15] 0.44799536 1.501076e-05 0.0004746820 0.44705692 0.44769595
## alpha[16] 0.11606460 1.841776e-05 0.0005824206 0.11495822 0.11563898
## alpha[17] -0.47639174 2.340557e-05 0.0007401492 -0.47786731 -0.47689390
## alpha[18] -0.73155078 2.758270e-05 0.0008722415 -0.73324778 -0.73217021
## alpha[19] -0.13423272 2.029852e-05 0.0006418956 -0.13544723 -0.13470495
## alpha[20] 0.57902478 1.463176e-05 0.0004626970 0.57811822 0.57872882
## alpha[21] -0.10878464 2.027031e-05 0.0006410036 -0.10999651 -0.10924168
## alpha[22] -0.33859134 2.193556e-05 0.0006936634 -0.33991527 -0.33908451
## alpha[23] 0.65264840 1.370799e-05 0.0004334846 0.65180144 0.65235395
## alpha[24] -0.53612288 2.526775e-05 0.0007990365 -0.53768274 -0.53662758
## alpha[25] -0.19185037 2.167437e-05 0.0006854037 -0.19315679 -0.19230985
## alpha[26] 0.10714181 1.881674e-05 0.0005950377 0.10602078 0.10672611
## alpha[27] 0.66121436 1.446143e-05 0.0004573105 0.66029737 0.66091876
## alpha[28] 0.34255413 1.644000e-05 0.0005198784 0.34154035 0.34218072
## alpha[29] 0.48835988 1.565808e-05 0.0004951519 0.48738986 0.48803723
## alpha[30] -0.10747656 2.104026e-05 0.0006653514 -0.10877826 -0.10794323
## alpha[31] 0.31885432 1.547368e-05 0.0004893207 0.31785591 0.31853476
## alpha[32] -1.27228912 3.934643e-05 0.0010942311 -1.27435525 -1.27303163
## alpha[33] 0.40090617 1.592053e-05 0.0005034513 0.39998006 0.40056361
## alpha[34] 0.13301902 1.740739e-05 0.0005504701 0.13188268 0.13265854
## alpha[35] 0.21649680 1.782392e-05 0.0005636419 0.21541976 0.21612798
## alpha[36] -0.47164575 2.492000e-05 0.0007880395 -0.47323113 -0.47217239
## alpha[37] -0.30771745 2.352735e-05 0.0007440002 -0.30922045 -0.30819679
## alpha[38] 0.56035304 1.493053e-05 0.0004721447 0.55945548 0.56004414
## alpha[39] -0.17946576 2.032884e-05 0.0006428545 -0.18074134 -0.17989966
## alpha[40] -0.12658675 2.106146e-05 0.0006660219 -0.12788633 -0.12702253
## alpha[41] -0.32762930 2.290712e-05 0.0007243867 -0.32900547 -0.32812344
## alpha[42] 0.15416654 1.814920e-05 0.0005739281 0.15308347 0.15377956
## alpha[43] -0.37620448 2.373592e-05 0.0007505958 -0.37767490 -0.37672331
## alpha[44] -0.19545293 2.093166e-05 0.0006619171 -0.19678048 -0.19586226
## alpha[45] 0.72111984 1.337190e-05 0.0004228566 0.72033780 0.72081046
## alpha[46] -0.07842809 1.982092e-05 0.0006267926 -0.07960479 -0.07885374
## alpha[47] -0.44520492 2.337930e-05 0.0007393182 -0.44668067 -0.44571742
## alpha[48] -0.10321084 2.010858e-05 0.0006358892 -0.10448635 -0.10363183
## alpha[49] -0.10893012 2.098341e-05 0.0006635538 -0.11021243 -0.10937881
## alpha[50] 0.31950986 1.738654e-05 0.0005498106 0.31847330 0.31911666
## alpha[51] -0.45986229 2.590380e-05 0.0008191502 -0.46147772 -0.46040723
## alpha[52] -0.59944629 2.454292e-05 0.0007761154 -0.60105271 -0.59994095
## alpha[53] 0.28437536 1.702432e-05 0.0005383562 0.28331300 0.28402480
## alpha[54] -0.30443349 2.274675e-05 0.0007193155 -0.30581099 -0.30491238
## alpha[55] -0.52805129 2.660056e-05 0.0008411836 -0.52970845 -0.52865314
## alpha[56] 0.36267994 1.758815e-05 0.0005561860 0.36153997 0.36230486
## alpha[57] -0.79271994 2.875428e-05 0.0009092903 -0.79457544 -0.79333184
## alpha[58] 0.28712657 1.709024e-05 0.0005404410 0.28606097 0.28676716
## alpha[59] 0.07204108 1.762214e-05 0.0005572610 0.07097873 0.07166208
## alpha[60] 0.07784833 1.849997e-05 0.0005850206 0.07669257 0.07745217
## alpha[61] 0.27507504 1.724316e-05 0.0005452765 0.27405674 0.27470982
## alpha[62] 0.28847707 1.701164e-05 0.0005379553 0.28745579 0.28813821
## alpha[63] -0.21760783 2.191055e-05 0.0006928723 -0.21890460 -0.21807921
## alpha[64] -0.50902504 2.362210e-05 0.0007469964 -0.51054630 -0.50953966
## alpha[65] 0.06267133 1.826197e-05 0.0005774941 0.06151965 0.06228815
## alpha[66] 0.75275085 1.318233e-05 0.0004168618 0.75191825 0.75246949
## alpha[67] 0.29052968 1.694077e-05 0.0005357142 0.28950475 0.29016508
## alpha[68] 0.06649792 1.893457e-05 0.0005987638 0.06538037 0.06605204
## alpha[69] 0.15230817 1.891226e-05 0.0005980583 0.15110605 0.15192411
## alpha[70] -0.09493079 2.149813e-05 0.0006798306 -0.09620675 -0.09539014
## alpha[71] 0.19593517 1.766433e-05 0.0005585952 0.19486570 0.19553542
## alpha[72] -0.68908910 2.613262e-05 0.0008263861 -0.69066871 -0.68965362
## alpha[73] -0.28918742 2.169634e-05 0.0006860984 -0.29050097 -0.28966873
## alpha[74] 0.38477468 1.579546e-05 0.0004994963 0.38385255 0.38442796
## alpha[75] 0.68299313 1.378459e-05 0.0004359071 0.68213959 0.68272178
## alpha[76] -0.79375133 2.906612e-05 0.0009191516 -0.79566455 -0.79436388
## alpha[77] 0.41794536 1.471313e-05 0.0004652701 0.41699458 0.41763862
## alpha[78] 0.55453872 1.437606e-05 0.0004546108 0.55363802 0.55424697
## alpha[79] -0.60020717 2.669668e-05 0.0008442232 -0.60184560 -0.60075386
## alpha[80] 0.11457855 1.917308e-05 0.0006063060 0.11333643 0.11417691
## alpha[81] 0.16806224 1.864242e-05 0.0005895250 0.16688874 0.16764159
## alpha[82] 0.36351379 1.655722e-05 0.0005235852 0.36243033 0.36316735
## alpha[83] -0.64079662 2.671866e-05 0.0008449182 -0.64242048 -0.64140971
## alpha[84] 0.34075931 1.603566e-05 0.0005070922 0.33975845 0.34040880
## alpha[85] 0.09067529 1.885268e-05 0.0005961741 0.08947994 0.09027933
## alpha[86] -0.89040825 2.878007e-05 0.0009101057 -0.89220742 -0.89104909
## alpha[87] -0.52885951 2.535470e-05 0.0008017859 -0.53052107 -0.52937865
## alpha[88] 0.37704155 1.690583e-05 0.0005346092 0.37600376 0.37665312
## alpha[89] -0.27926772 2.293627e-05 0.0007253085 -0.28069037 -0.27974744
## alpha[90] -0.68450357 2.846914e-05 0.0009002732 -0.68622350 -0.68511312
## alpha[91] 0.59113080 1.433024e-05 0.0004531621 0.59024493 0.59082001
## alpha[92] -1.00527879 3.094061e-05 0.0009784281 -1.00702679 -1.00595353
## alpha[93] 0.09396809 1.823144e-05 0.0005765289 0.09281868 0.09359463
## alpha[94] 0.20390545 1.808665e-05 0.0005719502 0.20276251 0.20354046
## alpha[95] -1.02268249 3.124798e-05 0.0009881479 -1.02462267 -1.02329622
## alpha[96] -0.24740405 2.113496e-05 0.0006683460 -0.24867646 -0.24787302
## alpha[97] 0.40547113 1.556418e-05 0.0004921825 0.40451530 0.40514987
## alpha[98] 0.58547375 1.490188e-05 0.0004712388 0.58457167 0.58513123
## alpha[99] 0.99092123 1.173862e-05 0.0003712077 0.99016713 0.99067698
## alpha[100] 0.36489762 1.635058e-05 0.0005170508 0.36387141 0.36454582
## 50% 75% 97.5% n_eff Rhat
## alpha[1] 0.02525057 0.02569447 0.02640102 1000.000 0.9987812
## alpha[2] 0.19115419 0.19150747 0.19228313 1000.000 0.9981230
## alpha[3] 1.06569667 1.06594043 1.06636459 1000.000 0.9997694
## alpha[4] -0.01282983 -0.01243108 -0.01164024 1000.000 0.9983475
## alpha[5] -0.02106708 -0.02064450 -0.01992993 1000.000 0.9982180
## alpha[6] -0.11666593 -0.11623945 -0.11537581 1000.000 0.9981007
## alpha[7] 0.37718596 0.37753254 0.37823094 1000.000 0.9982498
## alpha[8] 0.12498183 0.12539437 0.12603425 1000.000 0.9968329
## alpha[9] -0.76346743 -0.76288215 -0.76173139 1000.000 0.9961981
## alpha[10] -0.37877676 -0.37824559 -0.37737936 1000.000 0.9967997
## alpha[11] 0.66116782 0.66149871 0.66209834 1000.000 1.0006124
## alpha[12] -0.27942985 -0.27894040 -0.27805020 1000.000 0.9985780
## alpha[13] 0.61722235 0.61750137 0.61814756 1000.000 0.9975485
## alpha[14] 0.18634019 0.18673801 0.18747208 1000.000 0.9983633
## alpha[15] 0.44800617 0.44831314 0.44892763 1000.000 0.9980324
## alpha[16] 0.11607834 0.11645838 0.11719243 1000.000 0.9998572
## alpha[17] -0.47638849 -0.47590193 -0.47501332 1000.000 0.9979153
## alpha[18] -0.73153709 -0.73097105 -0.72987328 1000.000 1.0039562
## alpha[19] -0.13419623 -0.13376268 -0.13306001 1000.000 0.9986981
## alpha[20] 0.57901175 0.57932598 0.57988743 1000.000 0.9983843
## alpha[21] -0.10878502 -0.10834917 -0.10751128 1000.000 0.9969376
## alpha[22] -0.33859324 -0.33812525 -0.33728424 1000.000 0.9999378
## alpha[23] 0.65264760 0.65295763 0.65348946 1000.000 0.9990245
## alpha[24] -0.53611034 -0.53558277 -0.53459862 1000.000 1.0026897
## alpha[25] -0.19184945 -0.19138380 -0.19060451 1000.000 0.9999845
## alpha[26] 0.10713175 0.10756080 0.10823929 1000.000 0.9996126
## alpha[27] 0.66121845 0.66155194 0.66202799 1000.000 0.9999404
## alpha[28] 0.34255449 0.34291371 0.34356679 1000.000 0.9982350
## alpha[29] 0.48836570 0.48867966 0.48934289 1000.000 0.9970428
## alpha[30] -0.10746483 -0.10705114 -0.10613171 1000.000 0.9991306
## alpha[31] 0.31884511 0.31917622 0.31985525 1000.000 0.9982068
## alpha[32] -1.27226110 -1.27151206 -1.27019537 773.406 0.9983632
## alpha[33] 0.40089882 0.40125490 0.40191437 1000.000 0.9991022
## alpha[34] 0.13303651 0.13337756 0.13406094 1000.000 0.9987876
## alpha[35] 0.21647207 0.21686782 0.21766806 1000.000 0.9974124
## alpha[36] -0.47163453 -0.47112263 -0.47015780 1000.000 0.9973741
## alpha[37] -0.30771997 -0.30721634 -0.30631263 1000.000 0.9967554
## alpha[38] 0.56035836 0.56066197 0.56127506 1000.000 0.9977654
## alpha[39] -0.17945785 -0.17900919 -0.17822706 1000.000 0.9989422
## alpha[40] -0.12657381 -0.12614402 -0.12525020 1000.000 0.9967103
## alpha[41] -0.32760943 -0.32713868 -0.32622642 1000.000 0.9978191
## alpha[42] 0.15416398 0.15454040 0.15525090 1000.000 0.9972019
## alpha[43] -0.37620307 -0.37568055 -0.37475918 1000.000 0.9970913
## alpha[44] -0.19546136 -0.19501757 -0.19417971 1000.000 0.9985576
## alpha[45] 0.72111698 0.72139743 0.72196621 1000.000 1.0022599
## alpha[46] -0.07842988 -0.07802382 -0.07719566 1000.000 0.9970522
## alpha[47] -0.44519311 -0.44470704 -0.44374692 1000.000 0.9965404
## alpha[48] -0.10320516 -0.10281788 -0.10197686 1000.000 0.9987610
## alpha[49] -0.10894537 -0.10849256 -0.10765789 1000.000 0.9966972
## alpha[50] 0.31950632 0.31986867 0.32061107 1000.000 0.9990267
## alpha[51] -0.45986271 -0.45927793 -0.45834129 1000.000 0.9973963
## alpha[52] -0.59943865 -0.59896384 -0.59794789 1000.000 1.0001995
## alpha[53] 0.28437356 0.28473480 0.28541888 1000.000 0.9997964
## alpha[54] -0.30443427 -0.30394136 -0.30300020 1000.000 0.9980747
## alpha[55] -0.52804817 -0.52744874 -0.52642224 1000.000 0.9973994
## alpha[56] 0.36269517 0.36305227 0.36371572 1000.000 1.0009757
## alpha[57] -0.79271505 -0.79211020 -0.79091143 1000.000 1.0003696
## alpha[58] 0.28712254 0.28748043 0.28814220 1000.000 0.9973661
## alpha[59] 0.07203558 0.07240026 0.07316946 1000.000 0.9994912
## alpha[60] 0.07785183 0.07826640 0.07891412 1000.000 0.9974587
## alpha[61] 0.27505660 0.27544031 0.27610887 1000.000 0.9996219
## alpha[62] 0.28847347 0.28883589 0.28957747 1000.000 1.0010084
## alpha[63] -0.21762360 -0.21715089 -0.21620284 1000.000 0.9967753
## alpha[64] -0.50903301 -0.50850996 -0.50756084 1000.000 0.9972078
## alpha[65] 0.06266523 0.06305032 0.06376131 1000.000 0.9999446
## alpha[66] 0.75275107 0.75303778 0.75351469 1000.000 0.9991093
## alpha[67] 0.29052343 0.29088530 0.29152370 1000.000 0.9978729
## alpha[68] 0.06651104 0.06693030 0.06770267 1000.000 0.9986744
## alpha[69] 0.15231062 0.15268908 0.15344444 1000.000 0.9970646
## alpha[70] -0.09492877 -0.09445209 -0.09361371 1000.000 0.9979414
## alpha[71] 0.19592020 0.19632447 0.19702555 1000.000 1.0024334
## alpha[72] -0.68909640 -0.68854384 -0.68748113 1000.000 0.9989257
## alpha[73] -0.28919106 -0.28867172 -0.28787139 1000.000 0.9991498
## alpha[74] 0.38477454 0.38511646 0.38576871 1000.000 0.9984879
## alpha[75] 0.68297071 0.68328313 0.68385213 1000.000 0.9974123
## alpha[76] -0.79374708 -0.79314403 -0.79195905 1000.000 0.9973938
## alpha[77] 0.41793390 0.41825437 0.41884762 1000.000 0.9968963
## alpha[78] 0.55454887 0.55484566 0.55539158 1000.000 0.9980076
## alpha[79] -0.60019818 -0.59965878 -0.59861841 1000.000 0.9966944
## alpha[80] 0.11457623 0.11498960 0.11573755 1000.000 1.0010579
## alpha[81] 0.16806367 0.16849195 0.16920960 1000.000 0.9993025
## alpha[82] 0.36350727 0.36387309 0.36452817 1000.000 1.0000080
## alpha[83] -0.64077973 -0.64024259 -0.63906284 1000.000 1.0016332
## alpha[84] 0.34078509 0.34111175 0.34175189 1000.000 0.9999979
## alpha[85] 0.09067083 0.09106623 0.09186995 1000.000 1.0007762
## alpha[86] -0.89039603 -0.88982624 -0.88863884 1000.000 0.9982137
## alpha[87] -0.52884051 -0.52832879 -0.52728436 1000.000 1.0062513
## alpha[88] 0.37704303 0.37741164 0.37803941 1000.000 0.9973887
## alpha[89] -0.27925916 -0.27879251 -0.27780807 1000.000 0.9987065
## alpha[90] -0.68451652 -0.68390485 -0.68275910 1000.000 0.9983984
## alpha[91] 0.59113628 0.59144037 0.59200176 1000.000 0.9996465
## alpha[92] -1.00528954 -1.00463508 -1.00335662 1000.000 0.9988724
## alpha[93] 0.09398067 0.09437121 0.09502525 1000.000 0.9979717
## alpha[94] 0.20390239 0.20426773 0.20501658 1000.000 0.9986221
## alpha[95] -1.02270636 -1.02205936 -1.02068993 1000.000 1.0013118
## alpha[96] -0.24739249 -0.24694154 -0.24614332 1000.000 0.9974788
## alpha[97] 0.40547806 0.40578979 0.40642550 1000.000 0.9979955
## alpha[98] 0.58547466 0.58580888 0.58635368 1000.000 0.9964880
## alpha[99] 0.99092724 0.99117697 0.99159433 1000.000 0.9987098
## alpha[100] 0.36491268 0.36523835 0.36591100 1000.000 1.0008076
data = simulate_data (4)
Here the bias caused by the asymetry (only increase) of the changes of expression
plot(data$y_prob %>% pull(which(data$cond==0)[1]), data$y_prob %>% pull(which(data$cond==1)[1]) )
abline(0,1)
With multiplying by a adjusted factor 2 we can re-establish the baseline
plot(data$y_prob %>% pull(which(data$cond==0)[1]), (data$y_prob %>% pull(which(data$cond==1)[1])) * 2 )
abline(0,1)
m3 = "data {
int<lower = 0> F; // fixed genes
int<lower = 0> G; // all genes
int<lower = 0> T; // tube
int<lower = 0> y[T, G]; // RNA-seq counts
int<lower=0, upper=1> cond[T]; // condition per tube
}
parameters {
vector[G] alpha[2]; // control log relative expression
}
transformed parameters{
vector[G] phi_raw[T];
vector[G] phi[T];
for(t in 1:T)
for(g in 1:G) phi_raw[t,g] = alpha[cond[t] + 1 , g];
for(t in 1:T) phi[t] = softmax( phi_raw[t] );
}
model {
for (t in 1:T) y[t] ~ multinomial(phi[t]);
for(i in 1:2) alpha[i] ~ normal(0, 5); // log relative expression in control
for(i in 1:2) sum(alpha[i]) ~ normal(0, 0.0001 * G); // soft identification of control
}
"
fit3 = rstan:::stan(
model_code = m3,
data= data,
iter= 500,
chains = 4,
cores = 4
)
## In file included from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config.hpp:39,
## from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/math/tools/config.hpp:13,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/var.hpp:7,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core.hpp:12,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/mat.hpp:4,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math.hpp:4,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/src/stan/model/model_header.hpp:4,
## from file4e1057df37b2.cpp:8:
## /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config/compiler/gcc.hpp:186:1: warning: "BOOST_NO_CXX11_RVALUE_REFERENCES" redefined
## <command-line>: warning: this is the location of the previous definition
head(summary(fit, "alpha")$summary, n=10)
## mean se_mean sd 2.5% 25%
## alpha[1] 0.02524969 1.956917e-05 0.0006188316 0.02406258 0.02483113
## alpha[2] 0.19116635 1.779566e-05 0.0005627482 0.18999891 0.19083819
## alpha[3] 1.06569769 1.117117e-05 0.0003532633 1.06500227 1.06544890
## alpha[4] -0.01283520 1.932730e-05 0.0006111829 -0.01407415 -0.01324483
## alpha[5] -0.02107187 1.953132e-05 0.0006176344 -0.02225096 -0.02150341
## alpha[6] -0.11666816 2.001209e-05 0.0006328379 -0.11787438 -0.11709446
## alpha[7] 0.37716742 1.756481e-05 0.0005554480 0.37610491 0.37679030
## alpha[8] 0.12497996 1.798258e-05 0.0005686590 0.12387976 0.12456415
## alpha[9] -0.76345940 2.652987e-05 0.0008389481 -0.76512291 -0.76402425
## alpha[10] -0.37875721 2.243371e-05 0.0007094163 -0.38018903 -0.37924074
## 50% 75% 97.5% n_eff Rhat
## alpha[1] 0.02525057 0.02569447 0.02640102 1000 0.9987812
## alpha[2] 0.19115419 0.19150747 0.19228313 1000 0.9981230
## alpha[3] 1.06569667 1.06594043 1.06636459 1000 0.9997694
## alpha[4] -0.01282983 -0.01243108 -0.01164024 1000 0.9983475
## alpha[5] -0.02106708 -0.02064450 -0.01992993 1000 0.9982180
## alpha[6] -0.11666593 -0.11623945 -0.11537581 1000 0.9981007
## alpha[7] 0.37718596 0.37753254 0.37823094 1000 0.9982498
## alpha[8] 0.12498183 0.12539437 0.12603425 1000 0.9968329
## alpha[9] -0.76346743 -0.76288215 -0.76173139 1000 0.9961981
## alpha[10] -0.37877676 -0.37824559 -0.37737936 1000 0.9967997
The model shows the uncorrected bias
alpha_table = as.data.frame(rstan::summary(fit3, "alpha")$summary) %>%
tibble::as_tibble(rownames="par") %>%
tidyr::separate(par, c("dummy", "sample", "gene", "dummy2"), "\\[|,|\\]") %>%
dplyr::mutate(gene = as.integer(gene), sample = as.integer(sample)) %>%
dplyr::select(-dummy, -dummy2) %>%
dplyr::select(1:2, mean) %>%
tidyr::spread(1,3)
plot(alpha_table %>% pull(2), alpha_table %>% pull(3) )
abline(0,1)
While manually adding + log(1.5) seem to fix the problem
plot(alpha_table %>% pull(2), alpha_table %>% pull(3) + log(1.5) )
abline(0,1)
Let’s try to replicate it with Stan. In this simple model, I assume that delta for FIXED genes (70% of genes) is simply 0
m4 = "data {
int<lower = 0> F; // fixed genes
int<lower = 0> G; // all genes
int<lower = 0> T; // tube
int<lower = 0> y[T, G]; // RNA-seq counts
int<lower=0, upper=1> cond[T]; // condition per tube
}
parameters {
vector[G] alpha; // control log relative expression
real epsilon;
vector[G-F] delta; // rate of change
}
transformed parameters{
vector[G] phi_raw[T];
vector[G] phi[T];
for(t in 1:T) {
for(f in 1:F) phi_raw[t,f] = alpha[f] + ( epsilon * cond[t] ) + ( 0 * cond[t] ) ;
for(g in (F+1):G) phi_raw[t,g] = alpha[g] + ( epsilon * cond[t] ) + ( delta[g - F] * cond[t] );
}
for(t in 1:T) phi[t] = softmax( phi_raw[t] );
}
model {
for (t in 1:T) y[t] ~ multinomial(phi[t]);
alpha ~ normal(0, 5); // log relative expression in control
sum(alpha) ~ normal(0, 0.0001 * G); // soft identification of control
delta ~ normal(0,1);
epsilon ~ normal(0,1);
}
"
fit4 = rstan:::stan(
model_code = m4,
data= data,
iter= 500,
chains = 4,
cores = 4
)
## In file included from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config.hpp:39,
## from /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/math/tools/config.hpp:13,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/var.hpp:7,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/core.hpp:12,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math/rev/mat.hpp:4,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/stan/math.hpp:4,
## from /wehisan/home/allstaff/m/mangiola.s/R/x86_64-pc-linux-gnu-library/3.4/StanHeaders/include/src/stan/model/model_header.hpp:4,
## from file4b9d1939ef3c.cpp:8:
## /wehisan/general/system/bioinf-software/bioinfsoftware/R/R-3.4.3/lib64/R/library/BH/include/boost/config/compiler/gcc.hpp:186:1: warning: "BOOST_NO_CXX11_RVALUE_REFERENCES" redefined
## <command-line>: warning: this is the location of the previous definition
head(summary(fit4, "alpha")$summary, n=10)
## mean se_mean sd 2.5% 25%
## alpha[1] 0.36430832 1.838529e-05 0.0005813940 0.36318497 0.36392199
## alpha[2] 0.07680065 2.218122e-05 0.0007014318 0.07542596 0.07632993
## alpha[3] -0.09418932 2.278780e-05 0.0007206134 -0.09561129 -0.09464528
## alpha[4] 0.28748073 1.921231e-05 0.0006075465 0.28631782 0.28706897
## alpha[5] -0.38676026 2.728681e-05 0.0008628846 -0.38842306 -0.38735434
## alpha[6] -1.02342736 3.553382e-05 0.0011107599 -1.02564544 -1.02414810
## alpha[7] -0.33536020 2.535598e-05 0.0008018264 -0.33687881 -0.33593351
## alpha[8] -0.08527592 2.287974e-05 0.0007235210 -0.08662599 -0.08576925
## alpha[9] -0.04110035 2.381949e-05 0.0007532383 -0.04265013 -0.04158295
## alpha[10] -0.10646207 2.365233e-05 0.0007479524 -0.10789287 -0.10697376
## 50% 75% 97.5% n_eff Rhat
## alpha[1] 0.36431029 0.36467530 0.36546795 1000.0000 0.9984356
## alpha[2] 0.07679316 0.07728106 0.07817713 1000.0000 0.9976178
## alpha[3] -0.09417792 -0.09373485 -0.09274634 1000.0000 0.9983248
## alpha[4] 0.28747444 0.28788826 0.28862345 1000.0000 0.9983265
## alpha[5] -0.38678942 -0.38615986 -0.38509822 1000.0000 0.9999230
## alpha[6] -1.02344508 -1.02267509 -1.02133891 977.1395 0.9970713
## alpha[7] -0.33537197 -0.33481211 -0.33382590 1000.0000 1.0028884
## alpha[8] -0.08528195 -0.08479673 -0.08387415 1000.0000 0.9977019
## alpha[9] -0.04110123 -0.04060290 -0.03967852 1000.0000 1.0009313
## alpha[10] -0.10643689 -0.10597595 -0.10504452 1000.0000 0.9988490
As you can see the epsilon factor that would work log(1.5) is not detected. What am I doing wrong is not clear to me
summary(fit4, "epsilon")$summary
## mean se_mean sd 2.5% 25% 50%
## epsilon 0.1554633 0.2403415 0.8774096 -1.86533 -0.4261631 0.09048492
## 75% 97.5% n_eff Rhat
## epsilon 0.7732695 1.751431 13.32745 1.220202
traceplot(fit4, "epsilon")
phi_raw_table = as.data.frame(rstan::summary(fit4, "phi_raw")$summary) %>%
tibble::as_tibble(rownames="par") %>%
tidyr::separate(par, c("dummy", "sample", "gene", "dummy2"), "\\[|,|\\]") %>%
dplyr::mutate(gene = as.integer(gene), sample = as.integer(sample)) %>%
dplyr::select(-dummy, -dummy2) %>%
dplyr::select(1:2, mean) %>%
tidyr::spread(1,3)
plot(phi_raw_table %>% pull(which(data$cond==0)[1]+1), phi_raw_table %>% pull(which(data$cond==1)[1]+1) )
abline(0,1)
abline(log(4),1)