Examining the FPR and number of genes at a nominal FDR cutoff with a 3 x 3 design, 3 replicates each (total n=27), over 1000 genes where all genes are null. We’ll use alpha of 0.01 to look at false positive rate where the target for uniform p-values should be alpha * n, and nominal FDR cutoff of 5%.

n <- 1000 # num genes per simulation
alpha <- .01 # alpha on raw p-value
nom.fdr <- .05 # see how we do for nominal FDR
coldata <- expand.grid(x=1:3,y=1:3,z=1:3)
head(coldata)
##   x y z
## 1 1 1 1
## 2 2 1 1
## 3 3 1 1
## 4 1 2 1
## 5 2 2 1
## 6 3 2 1

This chunk run on a cluster:

# using DESeq2 v1.20.0
# takes ~600 s using 10 clusters
suppressPackageStartupMessages(library(DESeq2))
library(pbapply)
set.seed(1)
out <- pbsapply(seq_len(1000), function(i) {
  set.seed(i)
  dds <- makeExampleDESeqDataSet(n=n, m=27)
  for (var in c("x","y","z"))
    colData(dds)[[var]] <- factor(coldata[[var]])
  design(dds) <- ~x + y + z
  dds <- DESeq(dds, quiet=TRUE)
  res <- results(dds, alpha=nom.fdr) # this is 'z' 3 vs 1 comparison
  c(sum(res$pvalue < alpha, na.rm=TRUE),
    sum(res$padj < nom.fdr, na.rm=TRUE))
}, cl=10)
dim(out)
#save(out, file="out.rda")
load("out.rda")

A histogram for p-value and adj p-value:

hists <- function(out) {
  par(mfrow=c(1,2))
  hist(out[1,],breaks=0:20 * 2 - .5,col="grey",xlab="",
       main=paste0("p-value < ",alpha,", n=",n))
  abline(v=alpha*n, col="red", lwd=3)
  text(30,100,paste0("mean: ",round(mean(out[1,]))," / ",n))
  hist(out[2,], breaks=0:20 - .5, col="grey",xlab="",
       main=paste0("adj p-value < ",nom.fdr,", n=",n))
  text(10,150,paste0("mean: ",round(mean(out[2,]),1)," / ",n))
}
hists(out)