The following code generates vectors of four numbers \((e_0, e_X, e_Y, e_{X+Y})\) corresponding to a profile of interest (see Appendix below with all profiles)
packages = c("R.utils", "limSolve", "Biobase",
"limma")
package_check = lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
})
sourceDirectory("../utils")
#this file encodes the system of constraints satisfied by the group mean expression corresponding to control, X, Y, X+Y and additive level
load("../data/constraints_vector")
#reading profile definitions
prof_codes = read.table("../data/profile_codes_v2.txt",header = TRUE,sep = "\t")
#range of expression
exp_min = 2 #min of expression value
exp_max = 16 #max of expression value
#index of simulated profile (appendix 1 below shows all profiles)
prof_index = 3
#n. of instances being generated
n_times = 10
min_signal = 2.5 #minimum signal, defined as the minimum non-zero difference of all pairwise comparisons between the conditions 0, X, Y, X+Y)
Here we compute several instances of profile 3:
profile_means = compute_profile_means(prof_codes, prof_index, n_times,
exp_min, exp_max,
constraints_vector, min_signal)
head(profile_means)
## 0 X Y YX min_signal
## [1,] 11.662060 11.662060 11.662060 14.52489 2.862831
## [2,] 7.064994 7.064994 7.064994 14.92770 7.862702
## [3,] 3.124769 3.124769 3.124769 15.18644 12.061669
## [4,] 7.624017 7.624017 7.624017 13.92908 6.305068
## [5,] 8.645269 8.645269 8.645269 11.30650 2.661235
## [6,] 8.621091 8.621091 8.621091 12.88407 4.262981
For example, let us visualize the first row:
setPowerPointStyle()
barplot(profile_means[1,1:4], ylab = 'simulated expression',
col = 'white', names.arg =c("0", "X", "Y", "Y+X"))
After computing the group means for a profile of interest, we add random noise. We assume normal distributions centered at the group mean values computed above. To simulate variable levels of noise, we set the parameter signal_to_noise, which is the ratio between the minimum non-zero signal of a profile (the 5th column of the vector profile_means) and the standard deviation of the normal distribution. The number signal_to_noise corresponds to the minimum effect size considering all possible pairwise comparisons among 0, X, Y, X+Y.
source("../utils/simulate_from_means.R")
source("../utils/setPowerPointStyle.R")
setPowerPointStyle()
samples = 4 #number of samples for each condition (assuming a balanced design)
signal_to_noise = 4 #minimum effect size
#experimental design of a combinatorial treatment
design = factor(c(rep("0", samples), rep("X", samples),
rep("Y", samples), rep("Y+X", samples)))
simulated_values = simulate_from_means(profile_means[1,], samples,
signal_to_noise, exp_min, exp_max)
names(simulated_values) = design
boxplot(simulated_values ~ design, ylab = 'simulated expression', col = 'gray')
stripchart(simulated_values ~ design, vertical = TRUE,
method = "jitter", add = TRUE, pch = 20, col = 'black',cex = 1.5)
Let’s simulate the same profile with more noise. This is done by lowering the parameter signal_to_noise, for example from 4 (previous case) to 2.
source("../utils/setPowerPointStyle.R")
setPowerPointStyle()
signal_to_noise = 2
simulated_values = simulate_from_means(profile_means[1,], samples,
signal_to_noise, exp_min, exp_max)
names(simulated_values) = design
boxplot(simulated_values ~ design, ylab = 'simulated expression', col = 'gray')
stripchart(simulated_values ~ design, vertical = TRUE,
method = "jitter", add = TRUE, pch = 20, col = 'black',cex = 1.5)
Here we generate all the 123 profiles.
source("../utils/setPowerPointStyle.R")
setPowerPointStyle()
n_times = 5
exp_min = 2
exp_max = 16
min_signal = 0.5
cols = c(A = 'gray', N = 'red', P = 'blue')
for (prof_index in 1:nrow(prof_codes)){
x = compute_profile_means(prof_codes, prof_index, n_times, exp_min,
exp_max, constraints_vector, min_signal)[,1:4]
colnames(x)=c("0","X","Y","Y+X")
#compute additive level
add = x[1,1] + (x[1,2] - x[1,1]) + (x[1,3] - x[1,1])
#adjust for visualization purpose
if (add<exp_min) add = 0.1
if (add>exp_max) add = max(x[1,]) + 1.5
if (prof_index == 2) add = x[1,1]
type = as.character(prof_codes[prof_index, 'type'])
barplot(x[1,], ylab='', yaxt='n', ann = FALSE, col = cols[type],
main = paste('Profile', prof_index), ylim=c(0, max(x[1,]) + 1.6))
abline(h = add, col = "black", lty = 2, lwd=3)
}
#pdf('myplot.pdf')
#png(filename="myfile.png", res=150, width = 1000, height = 1000)
pdf('all_profiles.pdf',width = 30, height = 20)
par(mfrow = c(12, 11), mar = c(2,2,2,2))
for (prof_index in 1:123){
x = compute_profile_means(prof_codes, prof_index, n_times, exp_min,
exp_max, constraints_vector, min_signal)[,1:4]
names(x) = NULL
type = as.character(prof_codes[prof_index, 'type'])
par(lwd = 4)
barplot(x[1,], xlab = '', ylab = '', yaxt = 'n', xaxt="n",
ann = FALSE, col = 'white',
main = paste('Profile', prof_index), cex.main = 2.8)
box(col = cols[type], lwd = 4)
}
dev.off()
visualize_all_profile_groups()
sessionInfo()
## R version 3.4.0 (2017-04-21)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Sierra 10.12.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] limma_3.34.9 Biobase_2.38.0 BiocGenerics_0.24.0
## [4] limSolve_1.5.5.2 R.utils_2.6.0 R.oo_1.22.0
## [7] R.methodsS3_1.7.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.1 quadprog_1.5-5 digest_0.6.18 rprojroot_1.2
## [5] MASS_7.3-47 backports_1.1.0 magrittr_1.5 evaluate_0.10.1
## [9] stringi_1.1.7 rmarkdown_1.6 tools_3.4.0 stringr_1.3.0
## [13] xfun_0.6 yaml_2.2.0 compiler_3.4.0 htmltools_0.3.6
## [17] knitr_1.22 lpSolve_5.6.13