Li Y, Ge X, Peng F, Li W, Li JJ. Exaggerated false positives by popular differential expression methods when analyzing human population samples. Genome Biol 23, 79 (2022). https://doi.org/10.1186/s13059-022-02648-4
suppressWarnings(library(edgeR, quietly = T))
Read count matrix file: each row is a gene and each column is a sample; the first column is the gene name; the first row is the sample name.
Condition labels file: one row with condition labels corresponding to each sample in the read count matrix file.
You can download the example files from https://github.com/xihuimeijing/DEGs_Analysis_FDR/tree/main/RMarkdown_Wilcoxon/examples
readCount<-read.table(file="examples/examples.countMatrix.tsv", header = T, row.names = 1, stringsAsFactors = F,check.names = F)
conditions<-read.table(file="examples/examples.conditions.tsv", header = F)
conditions<-factor(t(conditions))
y <- DGEList(counts=readCount,group=conditions)
##Remove rows consistently have zero or very low counts
keep <- filterByExpr(y)
y <- y[keep,keep.lib.sizes=FALSE]
##Perform TMM normalization and transfer to CPM (Counts Per Million)
y <- calcNormFactors(y,method="TMM")
count_norm=cpm(y)
count_norm<-as.data.frame(count_norm)
pvalues <- sapply(1:nrow(count_norm),function(i){
data<-cbind.data.frame(gene=as.numeric(t(count_norm[i,])),conditions)
p=wilcox.test(gene~conditions, data)$p.value
return(p)
})
fdr=p.adjust(pvalues,method = "fdr")
conditionsLevel<-levels(conditions)
dataCon1=count_norm[,c(which(conditions==conditionsLevel[1]))]
dataCon2=count_norm[,c(which(conditions==conditionsLevel[2]))]
foldChanges=log2(rowMeans(dataCon2)/rowMeans(dataCon1))
outRst<-data.frame(log2foldChange=foldChanges, pValues=pvalues, FDR=fdr)
rownames(outRst)=rownames(count_norm)
outRst=na.omit(outRst)
fdrThres=0.05
write.table(outRst[outRst$FDR<fdrThres,], file="examples/examples.WilcoxonTest.rst.tsv",sep="\t", quote=F,row.names = T,col.names = T)