Examining the FPR and number of genes at a nominal FDR cutoff with a 3 x 3 design, 3 replicates each (total n=27), over 1000 genes where all genes are null. We’ll use alpha of 0.01 to look at false positive rate where the target for uniform p-values should be alpha * n, and nominal FDR cutoff of 5%.
n <- 1000 # num genes per simulation
alpha <- .01 # alpha on raw p-value
nom.fdr <- .05 # see how we do for nominal FDR
coldata <- expand.grid(x=1:3,y=1:3,z=1:3)
head(coldata)
## x y z
## 1 1 1 1
## 2 2 1 1
## 3 3 1 1
## 4 1 2 1
## 5 2 2 1
## 6 3 2 1
This chunk run on a cluster:
# using DESeq2 v1.20.0
# takes ~600 s using 10 clusters
suppressPackageStartupMessages(library(DESeq2))
library(pbapply)
set.seed(1)
out <- pbsapply(seq_len(1000), function(i) {
set.seed(i)
dds <- makeExampleDESeqDataSet(n=n, m=27)
for (var in c("x","y","z"))
colData(dds)[[var]] <- factor(coldata[[var]])
design(dds) <- ~x + y + z
dds <- DESeq(dds, quiet=TRUE)
res <- results(dds, alpha=nom.fdr) # this is 'z' 3 vs 1 comparison
c(sum(res$pvalue < alpha, na.rm=TRUE),
sum(res$padj < nom.fdr, na.rm=TRUE))
}, cl=10)
dim(out)
#save(out, file="out.rda")
load("out.rda")
A histogram for p-value and adj p-value:
hists <- function(out) {
par(mfrow=c(1,2))
hist(out[1,],breaks=0:20 * 2 - .5,col="grey",xlab="",
main=paste0("p-value < ",alpha,", n=",n))
abline(v=alpha*n, col="red", lwd=3)
text(30,100,paste0("mean: ",round(mean(out[1,]))," / ",n))
hist(out[2,], breaks=0:20 - .5, col="grey",xlab="",
main=paste0("adj p-value < ",nom.fdr,", n=",n))
text(10,150,paste0("mean: ",round(mean(out[2,]),1)," / ",n))
}
hists(out)