Computing mean expression values corresponding to a given profile

The following code generates vectors of four numbers \((e_0, e_X, e_Y, e_{X+Y})\) corresponding to a profile of interest (see Appendix below with all profiles)

packages = c("R.utils", "limSolve", "Biobase",  
             "limma")

package_check = lapply(packages, FUN = function(x) {
  if (!require(x, character.only = TRUE)) {
    install.packages(x, dependencies = TRUE)
    library(x, character.only = TRUE)
    }
  })

sourceDirectory("../utils")

#this file encodes the system of constraints satisfied by the group mean expression corresponding to control, X, Y, X+Y and additive level
load("../data/constraints_vector")

#reading profile definitions
prof_codes = read.table("../data/profile_codes_v2.txt",header = TRUE,sep = "\t") 

#range of expression
exp_min = 2 #min of expression value
exp_max = 16 #max of expression value

#index of simulated profile (appendix 1 below shows all profiles)
prof_index = 3 

#n. of instances being generated 
n_times = 10

min_signal = 2.5 #minimum signal, defined as the minimum non-zero difference of all pairwise comparisons between the conditions 0, X, Y, X+Y)

Here we compute several instances of profile 3:

profile_means = compute_profile_means(prof_codes, prof_index, n_times, 
                                      exp_min, exp_max, 
                                      constraints_vector, min_signal)

head(profile_means)
##              0         X         Y       YX min_signal
## [1,] 11.662060 11.662060 11.662060 14.52489   2.862831
## [2,]  7.064994  7.064994  7.064994 14.92770   7.862702
## [3,]  3.124769  3.124769  3.124769 15.18644  12.061669
## [4,]  7.624017  7.624017  7.624017 13.92908   6.305068
## [5,]  8.645269  8.645269  8.645269 11.30650   2.661235
## [6,]  8.621091  8.621091  8.621091 12.88407   4.262981

For example, let us visualize the first row:

setPowerPointStyle()

barplot(profile_means[1,1:4], ylab = 'simulated expression', 
        col = 'white', names.arg =c("0", "X", "Y", "Y+X"))

Simulating random noise around the mean values

After computing the group means for a profile of interest, we add random noise. We assume normal distributions centered at the group mean values computed above. To simulate variable levels of noise, we set the parameter signal_to_noise, which is the ratio between the minimum non-zero signal of a profile (the 5th column of the vector profile_means) and the standard deviation of the normal distribution. The number signal_to_noise corresponds to the minimum effect size considering all possible pairwise comparisons among 0, X, Y, X+Y.

source("../utils/simulate_from_means.R")
source("../utils/setPowerPointStyle.R")
setPowerPointStyle()

samples = 4 #number of samples for each condition (assuming a balanced design)

signal_to_noise = 4 #minimum effect size

#experimental design of a combinatorial treatment
design = factor(c(rep("0", samples), rep("X", samples), 
                  rep("Y", samples), rep("Y+X", samples)))

simulated_values = simulate_from_means(profile_means[1,], samples,
                                       signal_to_noise, exp_min, exp_max)
names(simulated_values) = design

boxplot(simulated_values ~ design, ylab = 'simulated expression', col = 'gray')

stripchart(simulated_values ~ design, vertical = TRUE,
   method = "jitter", add = TRUE, pch = 20, col = 'black',cex = 1.5)

Let’s simulate the same profile with more noise. This is done by lowering the parameter signal_to_noise, for example from 4 (previous case) to 2.

source("../utils/setPowerPointStyle.R")
setPowerPointStyle()

signal_to_noise = 2

simulated_values = simulate_from_means(profile_means[1,], samples,
                                       signal_to_noise, exp_min, exp_max)

names(simulated_values) = design

boxplot(simulated_values ~ design, ylab = 'simulated expression', col = 'gray')

stripchart(simulated_values ~ design, vertical = TRUE,
   method = "jitter", add = TRUE, pch = 20, col = 'black',cex = 1.5)

Appendix 1: Generating all profiles

Here we generate all the 123 profiles.

source("../utils/setPowerPointStyle.R")
setPowerPointStyle()

n_times = 5 
exp_min = 2 
exp_max = 16 
min_signal = 0.5 

cols = c(A = 'gray', N = 'red', P = 'blue')

for (prof_index in 1:nrow(prof_codes)){
  
  x = compute_profile_means(prof_codes, prof_index, n_times, exp_min,
                            exp_max, constraints_vector, min_signal)[,1:4]

  colnames(x)=c("0","X","Y","Y+X")
  
  #compute additive level
  add = x[1,1] + (x[1,2] - x[1,1]) + (x[1,3] - x[1,1])
  
  #adjust for visualization purpose
  if (add<exp_min) add = 0.1
  if (add>exp_max) add = max(x[1,]) + 1.5
  
  if (prof_index == 2) add = x[1,1]
    
  type = as.character(prof_codes[prof_index, 'type'])
    
  barplot(x[1,], ylab='', yaxt='n', ann = FALSE, col = cols[type], 
          main = paste('Profile', prof_index), ylim=c(0, max(x[1,]) + 1.6))
  
  abline(h = add, col = "black", lty = 2, lwd=3)
 
   
}

#pdf('myplot.pdf')
#png(filename="myfile.png", res=150, width = 1000, height = 1000) 
pdf('all_profiles.pdf',width = 30, height = 20)
par(mfrow = c(12, 11), mar = c(2,2,2,2))

for (prof_index in 1:123){
  
  x = compute_profile_means(prof_codes, prof_index, n_times, exp_min,
                            exp_max, constraints_vector, min_signal)[,1:4]

  names(x) = NULL
  type = as.character(prof_codes[prof_index, 'type'])
  par(lwd = 4)
  barplot(x[1,], xlab = '', ylab = '', yaxt = 'n', xaxt="n",
          ann = FALSE, col = 'white',
          main = paste('Profile', prof_index), cex.main = 2.8)
box(col = cols[type], lwd = 4)
}
dev.off()

Appendix 2: Generating all profile groups

visualize_all_profile_groups()

sessionInfo()
## R version 3.4.0 (2017-04-21)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Sierra 10.12.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] limma_3.34.9        Biobase_2.38.0      BiocGenerics_0.24.0
## [4] limSolve_1.5.5.2    R.utils_2.6.0       R.oo_1.22.0        
## [7] R.methodsS3_1.7.1  
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1      quadprog_1.5-5  digest_0.6.18   rprojroot_1.2  
##  [5] MASS_7.3-47     backports_1.1.0 magrittr_1.5    evaluate_0.10.1
##  [9] stringi_1.1.7   rmarkdown_1.6   tools_3.4.0     stringr_1.3.0  
## [13] xfun_0.6        yaml_2.2.0      compiler_3.4.0  htmltools_0.3.6
## [17] knitr_1.22      lpSolve_5.6.13